clang  6.0.0
avx512fintrin.h
Go to the documentation of this file.
1 /*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------===
2  *
3  * Permission is hereby granted, free of charge, to any person obtaining a copy
4  * of this software and associated documentation files (the "Software"), to deal
5  * in the Software without restriction, including without limitation the rights
6  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7  * copies of the Software, and to permit persons to whom the Software is
8  * furnished to do so, subject to the following conditions:
9  *
10  * The above copyright notice and this permission notice shall be included in
11  * all copies or substantial portions of the Software.
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19  * THE SOFTWARE.
20  *
21  *===-----------------------------------------------------------------------===
22  */
23 #ifndef __IMMINTRIN_H
24 #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
25 #endif
26 
27 #ifndef __AVX512FINTRIN_H
28 #define __AVX512FINTRIN_H
29 
30 typedef char __v64qi __attribute__((__vector_size__(64)));
31 typedef short __v32hi __attribute__((__vector_size__(64)));
32 typedef double __v8df __attribute__((__vector_size__(64)));
33 typedef float __v16sf __attribute__((__vector_size__(64)));
34 typedef long long __v8di __attribute__((__vector_size__(64)));
35 typedef int __v16si __attribute__((__vector_size__(64)));
36 
37 /* Unsigned types */
38 typedef unsigned char __v64qu __attribute__((__vector_size__(64)));
39 typedef unsigned short __v32hu __attribute__((__vector_size__(64)));
40 typedef unsigned long long __v8du __attribute__((__vector_size__(64)));
41 typedef unsigned int __v16su __attribute__((__vector_size__(64)));
42 
43 typedef float __m512 __attribute__((__vector_size__(64)));
44 typedef double __m512d __attribute__((__vector_size__(64)));
45 typedef long long __m512i __attribute__((__vector_size__(64)));
46 
47 typedef unsigned char __mmask8;
48 typedef unsigned short __mmask16;
49 
50 /* Rounding mode macros. */
51 #define _MM_FROUND_TO_NEAREST_INT 0x00
52 #define _MM_FROUND_TO_NEG_INF 0x01
53 #define _MM_FROUND_TO_POS_INF 0x02
54 #define _MM_FROUND_TO_ZERO 0x03
55 #define _MM_FROUND_CUR_DIRECTION 0x04
56 
57 /* Constants for integer comparison predicates */
58 typedef enum {
59  _MM_CMPINT_EQ, /* Equal */
60  _MM_CMPINT_LT, /* Less than */
61  _MM_CMPINT_LE, /* Less than or Equal */
63  _MM_CMPINT_NE, /* Not Equal */
64  _MM_CMPINT_NLT, /* Not Less than */
65 #define _MM_CMPINT_GE _MM_CMPINT_NLT /* Greater than or Equal */
66  _MM_CMPINT_NLE /* Not Less than or Equal */
67 #define _MM_CMPINT_GT _MM_CMPINT_NLE /* Greater than */
69 
70 typedef enum
71 {
72  _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
73  _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
74  _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
75  _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
76  _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
77  _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
78  _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
79  _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
80  _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
81  _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
82  _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
83  _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
84  _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
85  _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
86  _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
87  _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
88  _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
89  _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
90  _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
91  _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
92  _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
93  _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
94  _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
95  _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
96  _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
97  _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
98  _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
99  _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
100  _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
101  _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
102  _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
103  _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
104  _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
105  _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
106  _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
107  _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
108  _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
109  _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
110  _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
111  _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
112  _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
113  _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
114  _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
115  _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
116  _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
117  _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
118  _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
119  _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
120  _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
121  _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
122  _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
123  _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
124  _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
125  _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
126  _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
127  _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
128  _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
129  _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
130  _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
131  _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
132  _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
133  _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
134  _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
135  _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
136  _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
137  _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
138  _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
139  _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
140  _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
141  _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
142  _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
143  _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
144  _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
145  _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
146  _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
147  _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
148  _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
149  _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
150  _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
151  _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
152  _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
153  _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
154  _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
155  _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
156  _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
158 } _MM_PERM_ENUM;
159 
160 typedef enum
161 {
162  _MM_MANT_NORM_1_2, /* interval [1, 2) */
163  _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */
164  _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */
165  _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */
167 
168 typedef enum
169 {
170  _MM_MANT_SIGN_src, /* sign = sign(SRC) */
171  _MM_MANT_SIGN_zero, /* sign = 0 */
172  _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */
174 
175 /* Define the default attributes for the functions in this file. */
176 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
177 
178 /* Create vectors with repeated elements */
179 
180 static __inline __m512i __DEFAULT_FN_ATTRS
182 {
183  return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
184 }
185 
186 #define _mm512_setzero_epi32 _mm512_setzero_si512
187 
188 static __inline__ __m512d __DEFAULT_FN_ATTRS
190 {
191  return (__m512d)__builtin_ia32_undef512();
192 }
193 
194 static __inline__ __m512 __DEFAULT_FN_ATTRS
196 {
197  return (__m512)__builtin_ia32_undef512();
198 }
199 
200 static __inline__ __m512 __DEFAULT_FN_ATTRS
202 {
203  return (__m512)__builtin_ia32_undef512();
204 }
205 
206 static __inline__ __m512i __DEFAULT_FN_ATTRS
208 {
209  return (__m512i)__builtin_ia32_undef512();
210 }
211 
212 static __inline__ __m512i __DEFAULT_FN_ATTRS
214 {
215  return (__m512i)__builtin_shufflevector((__v4si) __A,
216  (__v4si)_mm_undefined_si128(),
217  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
218 }
219 
220 static __inline__ __m512i __DEFAULT_FN_ATTRS
221 _mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
222 {
223  return (__m512i)__builtin_ia32_selectd_512(__M,
224  (__v16si) _mm512_broadcastd_epi32(__A),
225  (__v16si) __O);
226 }
227 
228 static __inline__ __m512i __DEFAULT_FN_ATTRS
229 _mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
230 {
231  return (__m512i)__builtin_ia32_selectd_512(__M,
232  (__v16si) _mm512_broadcastd_epi32(__A),
233  (__v16si) _mm512_setzero_si512());
234 }
235 
236 static __inline__ __m512i __DEFAULT_FN_ATTRS
238 {
239  return (__m512i)__builtin_shufflevector((__v2di) __A,
240  (__v2di) _mm_undefined_si128(),
241  0, 0, 0, 0, 0, 0, 0, 0);
242 }
243 
244 static __inline__ __m512i __DEFAULT_FN_ATTRS
245 _mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
246 {
247  return (__m512i)__builtin_ia32_selectq_512(__M,
248  (__v8di) _mm512_broadcastq_epi64(__A),
249  (__v8di) __O);
250 
251 }
252 
253 static __inline__ __m512i __DEFAULT_FN_ATTRS
254 _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
255 {
256  return (__m512i)__builtin_ia32_selectq_512(__M,
257  (__v8di) _mm512_broadcastq_epi64(__A),
258  (__v8di) _mm512_setzero_si512());
259 }
260 
261 
262 static __inline __m512 __DEFAULT_FN_ATTRS
264 {
265  return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
266  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
267 }
268 
269 #define _mm512_setzero _mm512_setzero_ps
270 
271 static __inline __m512d __DEFAULT_FN_ATTRS
273 {
274  return (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
275 }
276 
277 static __inline __m512 __DEFAULT_FN_ATTRS
278 _mm512_set1_ps(float __w)
279 {
280  return (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
281  __w, __w, __w, __w, __w, __w, __w, __w };
282 }
283 
284 static __inline __m512d __DEFAULT_FN_ATTRS
285 _mm512_set1_pd(double __w)
286 {
287  return (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
288 }
289 
290 static __inline __m512i __DEFAULT_FN_ATTRS
292 {
293  return (__m512i)(__v64qi){ __w, __w, __w, __w, __w, __w, __w, __w,
294  __w, __w, __w, __w, __w, __w, __w, __w,
295  __w, __w, __w, __w, __w, __w, __w, __w,
296  __w, __w, __w, __w, __w, __w, __w, __w,
297  __w, __w, __w, __w, __w, __w, __w, __w,
298  __w, __w, __w, __w, __w, __w, __w, __w,
299  __w, __w, __w, __w, __w, __w, __w, __w,
300  __w, __w, __w, __w, __w, __w, __w, __w };
301 }
302 
303 static __inline __m512i __DEFAULT_FN_ATTRS
305 {
306  return (__m512i)(__v32hi){ __w, __w, __w, __w, __w, __w, __w, __w,
307  __w, __w, __w, __w, __w, __w, __w, __w,
308  __w, __w, __w, __w, __w, __w, __w, __w,
309  __w, __w, __w, __w, __w, __w, __w, __w };
310 }
311 
312 static __inline __m512i __DEFAULT_FN_ATTRS
314 {
315  return (__m512i)(__v16si){ __s, __s, __s, __s, __s, __s, __s, __s,
316  __s, __s, __s, __s, __s, __s, __s, __s };
317 }
318 
319 static __inline __m512i __DEFAULT_FN_ATTRS
320 _mm512_maskz_set1_epi32(__mmask16 __M, int __A)
321 {
322  return (__m512i)__builtin_ia32_selectd_512(__M,
323  (__v16si)_mm512_set1_epi32(__A),
324  (__v16si)_mm512_setzero_si512());
325 }
326 
327 static __inline __m512i __DEFAULT_FN_ATTRS
328 _mm512_set1_epi64(long long __d)
329 {
330  return (__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
331 }
332 
333 #ifdef __x86_64__
334 static __inline __m512i __DEFAULT_FN_ATTRS
335 _mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
336 {
337  return (__m512i)__builtin_ia32_selectq_512(__M,
338  (__v8di)_mm512_set1_epi64(__A),
339  (__v8di)_mm512_setzero_si512());
340 }
341 #endif
342 
343 static __inline__ __m512 __DEFAULT_FN_ATTRS
345 {
346  return (__m512)__builtin_shufflevector((__v4sf) __A,
347  (__v4sf)_mm_undefined_ps(),
348  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
349 }
350 
351 static __inline __m512i __DEFAULT_FN_ATTRS
352 _mm512_set4_epi32 (int __A, int __B, int __C, int __D)
353 {
354  return (__m512i)(__v16si)
355  { __D, __C, __B, __A, __D, __C, __B, __A,
356  __D, __C, __B, __A, __D, __C, __B, __A };
357 }
358 
359 static __inline __m512i __DEFAULT_FN_ATTRS
360 _mm512_set4_epi64 (long long __A, long long __B, long long __C,
361  long long __D)
362 {
363  return (__m512i) (__v8di)
364  { __D, __C, __B, __A, __D, __C, __B, __A };
365 }
366 
367 static __inline __m512d __DEFAULT_FN_ATTRS
368 _mm512_set4_pd (double __A, double __B, double __C, double __D)
369 {
370  return (__m512d)
371  { __D, __C, __B, __A, __D, __C, __B, __A };
372 }
373 
374 static __inline __m512 __DEFAULT_FN_ATTRS
375 _mm512_set4_ps (float __A, float __B, float __C, float __D)
376 {
377  return (__m512)
378  { __D, __C, __B, __A, __D, __C, __B, __A,
379  __D, __C, __B, __A, __D, __C, __B, __A };
380 }
381 
382 #define _mm512_setr4_epi32(e0,e1,e2,e3) \
383  _mm512_set4_epi32((e3),(e2),(e1),(e0))
384 
385 #define _mm512_setr4_epi64(e0,e1,e2,e3) \
386  _mm512_set4_epi64((e3),(e2),(e1),(e0))
387 
388 #define _mm512_setr4_pd(e0,e1,e2,e3) \
389  _mm512_set4_pd((e3),(e2),(e1),(e0))
390 
391 #define _mm512_setr4_ps(e0,e1,e2,e3) \
392  _mm512_set4_ps((e3),(e2),(e1),(e0))
393 
394 static __inline__ __m512d __DEFAULT_FN_ATTRS
396 {
397  return (__m512d)__builtin_shufflevector((__v2df) __A,
398  (__v2df) _mm_undefined_pd(),
399  0, 0, 0, 0, 0, 0, 0, 0);
400 }
401 
402 /* Cast between vector types */
403 
404 static __inline __m512d __DEFAULT_FN_ATTRS
406 {
407  return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
408 }
409 
410 static __inline __m512 __DEFAULT_FN_ATTRS
412 {
413  return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7,
414  -1, -1, -1, -1, -1, -1, -1, -1);
415 }
416 
417 static __inline __m128d __DEFAULT_FN_ATTRS
419 {
420  return __builtin_shufflevector(__a, __a, 0, 1);
421 }
422 
423 static __inline __m256d __DEFAULT_FN_ATTRS
425 {
426  return __builtin_shufflevector(__A, __A, 0, 1, 2, 3);
427 }
428 
429 static __inline __m128 __DEFAULT_FN_ATTRS
431 {
432  return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
433 }
434 
435 static __inline __m256 __DEFAULT_FN_ATTRS
437 {
438  return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7);
439 }
440 
441 static __inline __m512 __DEFAULT_FN_ATTRS
442 _mm512_castpd_ps (__m512d __A)
443 {
444  return (__m512) (__A);
445 }
446 
447 static __inline __m512i __DEFAULT_FN_ATTRS
448 _mm512_castpd_si512 (__m512d __A)
449 {
450  return (__m512i) (__A);
451 }
452 
453 static __inline__ __m512d __DEFAULT_FN_ATTRS
455 {
456  return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
457 }
458 
459 static __inline __m512d __DEFAULT_FN_ATTRS
460 _mm512_castps_pd (__m512 __A)
461 {
462  return (__m512d) (__A);
463 }
464 
465 static __inline __m512i __DEFAULT_FN_ATTRS
467 {
468  return (__m512i) (__A);
469 }
470 
471 static __inline__ __m512 __DEFAULT_FN_ATTRS
473 {
474  return __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
475 }
476 
477 static __inline__ __m512i __DEFAULT_FN_ATTRS
479 {
480  return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
481 }
482 
483 static __inline__ __m512i __DEFAULT_FN_ATTRS
485 {
486  return __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1);
487 }
488 
489 static __inline __m512 __DEFAULT_FN_ATTRS
490 _mm512_castsi512_ps (__m512i __A)
491 {
492  return (__m512) (__A);
493 }
494 
495 static __inline __m512d __DEFAULT_FN_ATTRS
496 _mm512_castsi512_pd (__m512i __A)
497 {
498  return (__m512d) (__A);
499 }
500 
501 static __inline __m128i __DEFAULT_FN_ATTRS
503 {
504  return (__m128i)__builtin_shufflevector(__A, __A , 0, 1);
505 }
506 
507 static __inline __m256i __DEFAULT_FN_ATTRS
509 {
510  return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3);
511 }
512 
513 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
515 {
516  return (__mmask16)__a;
517 }
518 
519 static __inline__ int __DEFAULT_FN_ATTRS
520 _mm512_mask2int(__mmask16 __a)
521 {
522  return (int)__a;
523 }
524 
525 /// \brief Constructs a 512-bit floating-point vector of [8 x double] from a
526 /// 128-bit floating-point vector of [2 x double]. The lower 128 bits
527 /// contain the value of the source vector. The upper 384 bits are set
528 /// to zero.
529 ///
530 /// \headerfile <x86intrin.h>
531 ///
532 /// This intrinsic has no corresponding instruction.
533 ///
534 /// \param __a
535 /// A 128-bit vector of [2 x double].
536 /// \returns A 512-bit floating-point vector of [8 x double]. The lower 128 bits
537 /// contain the value of the parameter. The upper 384 bits are set to zero.
538 static __inline __m512d __DEFAULT_FN_ATTRS
540 {
541  return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3, 2, 3, 2, 3);
542 }
543 
544 /// \brief Constructs a 512-bit floating-point vector of [8 x double] from a
545 /// 256-bit floating-point vector of [4 x double]. The lower 256 bits
546 /// contain the value of the source vector. The upper 256 bits are set
547 /// to zero.
548 ///
549 /// \headerfile <x86intrin.h>
550 ///
551 /// This intrinsic has no corresponding instruction.
552 ///
553 /// \param __a
554 /// A 256-bit vector of [4 x double].
555 /// \returns A 512-bit floating-point vector of [8 x double]. The lower 256 bits
556 /// contain the value of the parameter. The upper 256 bits are set to zero.
557 static __inline __m512d __DEFAULT_FN_ATTRS
559 {
560  return __builtin_shufflevector((__v4df)__a, (__v4df)_mm256_setzero_pd(), 0, 1, 2, 3, 4, 5, 6, 7);
561 }
562 
563 /// \brief Constructs a 512-bit floating-point vector of [16 x float] from a
564 /// 128-bit floating-point vector of [4 x float]. The lower 128 bits contain
565 /// the value of the source vector. The upper 384 bits are set to zero.
566 ///
567 /// \headerfile <x86intrin.h>
568 ///
569 /// This intrinsic has no corresponding instruction.
570 ///
571 /// \param __a
572 /// A 128-bit vector of [4 x float].
573 /// \returns A 512-bit floating-point vector of [16 x float]. The lower 128 bits
574 /// contain the value of the parameter. The upper 384 bits are set to zero.
575 static __inline __m512 __DEFAULT_FN_ATTRS
577 {
578  return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7);
579 }
580 
581 /// \brief Constructs a 512-bit floating-point vector of [16 x float] from a
582 /// 256-bit floating-point vector of [8 x float]. The lower 256 bits contain
583 /// the value of the source vector. The upper 256 bits are set to zero.
584 ///
585 /// \headerfile <x86intrin.h>
586 ///
587 /// This intrinsic has no corresponding instruction.
588 ///
589 /// \param __a
590 /// A 256-bit vector of [8 x float].
591 /// \returns A 512-bit floating-point vector of [16 x float]. The lower 256 bits
592 /// contain the value of the parameter. The upper 256 bits are set to zero.
593 static __inline __m512 __DEFAULT_FN_ATTRS
595 {
596  return __builtin_shufflevector((__v8sf)__a, (__v8sf)_mm256_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
597 }
598 
599 /// \brief Constructs a 512-bit integer vector from a 128-bit integer vector.
600 /// The lower 128 bits contain the value of the source vector. The upper
601 /// 384 bits are set to zero.
602 ///
603 /// \headerfile <x86intrin.h>
604 ///
605 /// This intrinsic has no corresponding instruction.
606 ///
607 /// \param __a
608 /// A 128-bit integer vector.
609 /// \returns A 512-bit integer vector. The lower 128 bits contain the value of
610 /// the parameter. The upper 384 bits are set to zero.
611 static __inline __m512i __DEFAULT_FN_ATTRS
613 {
614  return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 1, 2, 3, 2, 3, 2, 3);
615 }
616 
617 /// \brief Constructs a 512-bit integer vector from a 256-bit integer vector.
618 /// The lower 256 bits contain the value of the source vector. The upper
619 /// 256 bits are set to zero.
620 ///
621 /// \headerfile <x86intrin.h>
622 ///
623 /// This intrinsic has no corresponding instruction.
624 ///
625 /// \param __a
626 /// A 256-bit integer vector.
627 /// \returns A 512-bit integer vector. The lower 256 bits contain the value of
628 /// the parameter. The upper 256 bits are set to zero.
629 static __inline __m512i __DEFAULT_FN_ATTRS
631 {
632  return __builtin_shufflevector((__v4di)__a, (__v4di)_mm256_setzero_si256(), 0, 1, 2, 3, 4, 5, 6, 7);
633 }
634 
635 /* Bitwise operators */
636 static __inline__ __m512i __DEFAULT_FN_ATTRS
637 _mm512_and_epi32(__m512i __a, __m512i __b)
638 {
639  return (__m512i)((__v16su)__a & (__v16su)__b);
640 }
641 
642 static __inline__ __m512i __DEFAULT_FN_ATTRS
643 _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
644 {
645  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
646  (__v16si) _mm512_and_epi32(__a, __b),
647  (__v16si) __src);
648 }
649 
650 static __inline__ __m512i __DEFAULT_FN_ATTRS
651 _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
652 {
653  return (__m512i) _mm512_mask_and_epi32(_mm512_setzero_si512 (),
654  __k, __a, __b);
655 }
656 
657 static __inline__ __m512i __DEFAULT_FN_ATTRS
658 _mm512_and_epi64(__m512i __a, __m512i __b)
659 {
660  return (__m512i)((__v8du)__a & (__v8du)__b);
661 }
662 
663 static __inline__ __m512i __DEFAULT_FN_ATTRS
664 _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
665 {
666  return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __k,
667  (__v8di) _mm512_and_epi64(__a, __b),
668  (__v8di) __src);
669 }
670 
671 static __inline__ __m512i __DEFAULT_FN_ATTRS
672 _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
673 {
674  return (__m512i) _mm512_mask_and_epi64(_mm512_setzero_si512 (),
675  __k, __a, __b);
676 }
677 
678 static __inline__ __m512i __DEFAULT_FN_ATTRS
679 _mm512_andnot_si512 (__m512i __A, __m512i __B)
680 {
681  return (__m512i)(~(__v8du)(__A) & (__v8du)__B);
682 }
683 
684 static __inline__ __m512i __DEFAULT_FN_ATTRS
685 _mm512_andnot_epi32 (__m512i __A, __m512i __B)
686 {
687  return (__m512i)(~(__v16su)(__A) & (__v16su)__B);
688 }
689 
690 static __inline__ __m512i __DEFAULT_FN_ATTRS
691 _mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
692 {
693  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
694  (__v16si)_mm512_andnot_epi32(__A, __B),
695  (__v16si)__W);
696 }
697 
698 static __inline__ __m512i __DEFAULT_FN_ATTRS
699 _mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
700 {
702  __U, __A, __B);
703 }
704 
705 static __inline__ __m512i __DEFAULT_FN_ATTRS
706 _mm512_andnot_epi64(__m512i __A, __m512i __B)
707 {
708  return (__m512i)(~(__v8du)(__A) & (__v8du)__B);
709 }
710 
711 static __inline__ __m512i __DEFAULT_FN_ATTRS
712 _mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
713 {
714  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
715  (__v8di)_mm512_andnot_epi64(__A, __B),
716  (__v8di)__W);
717 }
718 
719 static __inline__ __m512i __DEFAULT_FN_ATTRS
720 _mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
721 {
723  __U, __A, __B);
724 }
725 
726 static __inline__ __m512i __DEFAULT_FN_ATTRS
727 _mm512_or_epi32(__m512i __a, __m512i __b)
728 {
729  return (__m512i)((__v16su)__a | (__v16su)__b);
730 }
731 
732 static __inline__ __m512i __DEFAULT_FN_ATTRS
733 _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
734 {
735  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
736  (__v16si)_mm512_or_epi32(__a, __b),
737  (__v16si)__src);
738 }
739 
740 static __inline__ __m512i __DEFAULT_FN_ATTRS
741 _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
742 {
743  return (__m512i)_mm512_mask_or_epi32(_mm512_setzero_si512(), __k, __a, __b);
744 }
745 
746 static __inline__ __m512i __DEFAULT_FN_ATTRS
747 _mm512_or_epi64(__m512i __a, __m512i __b)
748 {
749  return (__m512i)((__v8du)__a | (__v8du)__b);
750 }
751 
752 static __inline__ __m512i __DEFAULT_FN_ATTRS
753 _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
754 {
755  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
756  (__v8di)_mm512_or_epi64(__a, __b),
757  (__v8di)__src);
758 }
759 
760 static __inline__ __m512i __DEFAULT_FN_ATTRS
761 _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
762 {
763  return (__m512i)_mm512_mask_or_epi64(_mm512_setzero_si512(), __k, __a, __b);
764 }
765 
766 static __inline__ __m512i __DEFAULT_FN_ATTRS
767 _mm512_xor_epi32(__m512i __a, __m512i __b)
768 {
769  return (__m512i)((__v16su)__a ^ (__v16su)__b);
770 }
771 
772 static __inline__ __m512i __DEFAULT_FN_ATTRS
773 _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
774 {
775  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
776  (__v16si)_mm512_xor_epi32(__a, __b),
777  (__v16si)__src);
778 }
779 
780 static __inline__ __m512i __DEFAULT_FN_ATTRS
781 _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
782 {
783  return (__m512i)_mm512_mask_xor_epi32(_mm512_setzero_si512(), __k, __a, __b);
784 }
785 
786 static __inline__ __m512i __DEFAULT_FN_ATTRS
787 _mm512_xor_epi64(__m512i __a, __m512i __b)
788 {
789  return (__m512i)((__v8du)__a ^ (__v8du)__b);
790 }
791 
792 static __inline__ __m512i __DEFAULT_FN_ATTRS
793 _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
794 {
795  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
796  (__v8di)_mm512_xor_epi64(__a, __b),
797  (__v8di)__src);
798 }
799 
800 static __inline__ __m512i __DEFAULT_FN_ATTRS
801 _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
802 {
803  return (__m512i)_mm512_mask_xor_epi64(_mm512_setzero_si512(), __k, __a, __b);
804 }
805 
806 static __inline__ __m512i __DEFAULT_FN_ATTRS
807 _mm512_and_si512(__m512i __a, __m512i __b)
808 {
809  return (__m512i)((__v8du)__a & (__v8du)__b);
810 }
811 
812 static __inline__ __m512i __DEFAULT_FN_ATTRS
813 _mm512_or_si512(__m512i __a, __m512i __b)
814 {
815  return (__m512i)((__v8du)__a | (__v8du)__b);
816 }
817 
818 static __inline__ __m512i __DEFAULT_FN_ATTRS
819 _mm512_xor_si512(__m512i __a, __m512i __b)
820 {
821  return (__m512i)((__v8du)__a ^ (__v8du)__b);
822 }
823 
824 /* Arithmetic */
825 
826 static __inline __m512d __DEFAULT_FN_ATTRS
827 _mm512_add_pd(__m512d __a, __m512d __b)
828 {
829  return (__m512d)((__v8df)__a + (__v8df)__b);
830 }
831 
832 static __inline __m512 __DEFAULT_FN_ATTRS
833 _mm512_add_ps(__m512 __a, __m512 __b)
834 {
835  return (__m512)((__v16sf)__a + (__v16sf)__b);
836 }
837 
838 static __inline __m512d __DEFAULT_FN_ATTRS
839 _mm512_mul_pd(__m512d __a, __m512d __b)
840 {
841  return (__m512d)((__v8df)__a * (__v8df)__b);
842 }
843 
844 static __inline __m512 __DEFAULT_FN_ATTRS
845 _mm512_mul_ps(__m512 __a, __m512 __b)
846 {
847  return (__m512)((__v16sf)__a * (__v16sf)__b);
848 }
849 
850 static __inline __m512d __DEFAULT_FN_ATTRS
851 _mm512_sub_pd(__m512d __a, __m512d __b)
852 {
853  return (__m512d)((__v8df)__a - (__v8df)__b);
854 }
855 
856 static __inline __m512 __DEFAULT_FN_ATTRS
857 _mm512_sub_ps(__m512 __a, __m512 __b)
858 {
859  return (__m512)((__v16sf)__a - (__v16sf)__b);
860 }
861 
862 static __inline__ __m512i __DEFAULT_FN_ATTRS
863 _mm512_add_epi64 (__m512i __A, __m512i __B)
864 {
865  return (__m512i) ((__v8du) __A + (__v8du) __B);
866 }
867 
868 static __inline__ __m512i __DEFAULT_FN_ATTRS
869 _mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
870 {
871  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
872  (__v8di)_mm512_add_epi64(__A, __B),
873  (__v8di)__W);
874 }
875 
876 static __inline__ __m512i __DEFAULT_FN_ATTRS
877 _mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
878 {
879  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
880  (__v8di)_mm512_add_epi64(__A, __B),
881  (__v8di)_mm512_setzero_si512());
882 }
883 
884 static __inline__ __m512i __DEFAULT_FN_ATTRS
885 _mm512_sub_epi64 (__m512i __A, __m512i __B)
886 {
887  return (__m512i) ((__v8du) __A - (__v8du) __B);
888 }
889 
890 static __inline__ __m512i __DEFAULT_FN_ATTRS
891 _mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
892 {
893  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
894  (__v8di)_mm512_sub_epi64(__A, __B),
895  (__v8di)__W);
896 }
897 
898 static __inline__ __m512i __DEFAULT_FN_ATTRS
899 _mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
900 {
901  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
902  (__v8di)_mm512_sub_epi64(__A, __B),
903  (__v8di)_mm512_setzero_si512());
904 }
905 
906 static __inline__ __m512i __DEFAULT_FN_ATTRS
907 _mm512_add_epi32 (__m512i __A, __m512i __B)
908 {
909  return (__m512i) ((__v16su) __A + (__v16su) __B);
910 }
911 
912 static __inline__ __m512i __DEFAULT_FN_ATTRS
913 _mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
914 {
915  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
916  (__v16si)_mm512_add_epi32(__A, __B),
917  (__v16si)__W);
918 }
919 
920 static __inline__ __m512i __DEFAULT_FN_ATTRS
921 _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
922 {
923  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
924  (__v16si)_mm512_add_epi32(__A, __B),
925  (__v16si)_mm512_setzero_si512());
926 }
927 
928 static __inline__ __m512i __DEFAULT_FN_ATTRS
929 _mm512_sub_epi32 (__m512i __A, __m512i __B)
930 {
931  return (__m512i) ((__v16su) __A - (__v16su) __B);
932 }
933 
934 static __inline__ __m512i __DEFAULT_FN_ATTRS
935 _mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
936 {
937  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
938  (__v16si)_mm512_sub_epi32(__A, __B),
939  (__v16si)__W);
940 }
941 
942 static __inline__ __m512i __DEFAULT_FN_ATTRS
943 _mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B)
944 {
945  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
946  (__v16si)_mm512_sub_epi32(__A, __B),
947  (__v16si)_mm512_setzero_si512());
948 }
949 
950 #define _mm512_mask_max_round_pd(W, U, A, B, R) __extension__ ({ \
951  (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \
952  (__v8df)(__m512d)(B), \
953  (__v8df)(__m512d)(W), (__mmask8)(U), \
954  (int)(R)); })
955 
956 #define _mm512_maskz_max_round_pd(U, A, B, R) __extension__ ({ \
957  (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \
958  (__v8df)(__m512d)(B), \
959  (__v8df)_mm512_setzero_pd(), \
960  (__mmask8)(U), (int)(R)); })
961 
962 #define _mm512_max_round_pd(A, B, R) __extension__ ({ \
963  (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \
964  (__v8df)(__m512d)(B), \
965  (__v8df)_mm512_undefined_pd(), \
966  (__mmask8)-1, (int)(R)); })
967 
968 static __inline__ __m512d __DEFAULT_FN_ATTRS
969 _mm512_max_pd(__m512d __A, __m512d __B)
970 {
971  return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
972  (__v8df) __B,
973  (__v8df)
975  (__mmask8) -1,
977 }
978 
979 static __inline__ __m512d __DEFAULT_FN_ATTRS
980 _mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
981 {
982  return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
983  (__v8df) __B,
984  (__v8df) __W,
985  (__mmask8) __U,
987 }
988 
989 static __inline__ __m512d __DEFAULT_FN_ATTRS
990 _mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
991 {
992  return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
993  (__v8df) __B,
994  (__v8df)
996  (__mmask8) __U,
998 }
999 
1000 #define _mm512_mask_max_round_ps(W, U, A, B, R) __extension__ ({ \
1001  (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \
1002  (__v16sf)(__m512)(B), \
1003  (__v16sf)(__m512)(W), (__mmask16)(U), \
1004  (int)(R)); })
1005 
1006 #define _mm512_maskz_max_round_ps(U, A, B, R) __extension__ ({ \
1007  (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \
1008  (__v16sf)(__m512)(B), \
1009  (__v16sf)_mm512_setzero_ps(), \
1010  (__mmask16)(U), (int)(R)); })
1011 
1012 #define _mm512_max_round_ps(A, B, R) __extension__ ({ \
1013  (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \
1014  (__v16sf)(__m512)(B), \
1015  (__v16sf)_mm512_undefined_ps(), \
1016  (__mmask16)-1, (int)(R)); })
1017 
1018 static __inline__ __m512 __DEFAULT_FN_ATTRS
1019 _mm512_max_ps(__m512 __A, __m512 __B)
1020 {
1021  return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
1022  (__v16sf) __B,
1023  (__v16sf)
1024  _mm512_setzero_ps (),
1025  (__mmask16) -1,
1027 }
1028 
1029 static __inline__ __m512 __DEFAULT_FN_ATTRS
1030 _mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
1031 {
1032  return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
1033  (__v16sf) __B,
1034  (__v16sf) __W,
1035  (__mmask16) __U,
1037 }
1038 
1039 static __inline__ __m512 __DEFAULT_FN_ATTRS
1040 _mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
1041 {
1042  return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
1043  (__v16sf) __B,
1044  (__v16sf)
1045  _mm512_setzero_ps (),
1046  (__mmask16) __U,
1048 }
1049 
1050 static __inline__ __m128 __DEFAULT_FN_ATTRS
1051 _mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1052  return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1053  (__v4sf) __B,
1054  (__v4sf) __W,
1055  (__mmask8) __U,
1057 }
1058 
1059 static __inline__ __m128 __DEFAULT_FN_ATTRS
1060 _mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1061  return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1062  (__v4sf) __B,
1063  (__v4sf) _mm_setzero_ps (),
1064  (__mmask8) __U,
1066 }
1067 
1068 #define _mm_max_round_ss(A, B, R) __extension__ ({ \
1069  (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1070  (__v4sf)(__m128)(B), \
1071  (__v4sf)_mm_setzero_ps(), \
1072  (__mmask8)-1, (int)(R)); })
1073 
1074 #define _mm_mask_max_round_ss(W, U, A, B, R) __extension__ ({ \
1075  (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1076  (__v4sf)(__m128)(B), \
1077  (__v4sf)(__m128)(W), (__mmask8)(U), \
1078  (int)(R)); })
1079 
1080 #define _mm_maskz_max_round_ss(U, A, B, R) __extension__ ({ \
1081  (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1082  (__v4sf)(__m128)(B), \
1083  (__v4sf)_mm_setzero_ps(), \
1084  (__mmask8)(U), (int)(R)); })
1085 
1086 static __inline__ __m128d __DEFAULT_FN_ATTRS
1087 _mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1088  return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1089  (__v2df) __B,
1090  (__v2df) __W,
1091  (__mmask8) __U,
1093 }
1094 
1095 static __inline__ __m128d __DEFAULT_FN_ATTRS
1096 _mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1097  return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1098  (__v2df) __B,
1099  (__v2df) _mm_setzero_pd (),
1100  (__mmask8) __U,
1102 }
1103 
1104 #define _mm_max_round_sd(A, B, R) __extension__ ({ \
1105  (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1106  (__v2df)(__m128d)(B), \
1107  (__v2df)_mm_setzero_pd(), \
1108  (__mmask8)-1, (int)(R)); })
1109 
1110 #define _mm_mask_max_round_sd(W, U, A, B, R) __extension__ ({ \
1111  (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1112  (__v2df)(__m128d)(B), \
1113  (__v2df)(__m128d)(W), \
1114  (__mmask8)(U), (int)(R)); })
1115 
1116 #define _mm_maskz_max_round_sd(U, A, B, R) __extension__ ({ \
1117  (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1118  (__v2df)(__m128d)(B), \
1119  (__v2df)_mm_setzero_pd(), \
1120  (__mmask8)(U), (int)(R)); })
1121 
1122 static __inline __m512i
1124 _mm512_max_epi32(__m512i __A, __m512i __B)
1125 {
1126  return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
1127  (__v16si) __B,
1128  (__v16si)
1130  (__mmask16) -1);
1131 }
1132 
1133 static __inline__ __m512i __DEFAULT_FN_ATTRS
1134 _mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1135 {
1136  return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
1137  (__v16si) __B,
1138  (__v16si) __W, __M);
1139 }
1140 
1141 static __inline__ __m512i __DEFAULT_FN_ATTRS
1142 _mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
1143 {
1144  return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
1145  (__v16si) __B,
1146  (__v16si)
1148  __M);
1149 }
1150 
1151 static __inline __m512i __DEFAULT_FN_ATTRS
1152 _mm512_max_epu32(__m512i __A, __m512i __B)
1153 {
1154  return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
1155  (__v16si) __B,
1156  (__v16si)
1158  (__mmask16) -1);
1159 }
1160 
1161 static __inline__ __m512i __DEFAULT_FN_ATTRS
1162 _mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1163 {
1164  return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
1165  (__v16si) __B,
1166  (__v16si) __W, __M);
1167 }
1168 
1169 static __inline__ __m512i __DEFAULT_FN_ATTRS
1170 _mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
1171 {
1172  return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
1173  (__v16si) __B,
1174  (__v16si)
1176  __M);
1177 }
1178 
1179 static __inline __m512i __DEFAULT_FN_ATTRS
1180 _mm512_max_epi64(__m512i __A, __m512i __B)
1181 {
1182  return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
1183  (__v8di) __B,
1184  (__v8di)
1186  (__mmask8) -1);
1187 }
1188 
1189 static __inline__ __m512i __DEFAULT_FN_ATTRS
1190 _mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1191 {
1192  return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
1193  (__v8di) __B,
1194  (__v8di) __W, __M);
1195 }
1196 
1197 static __inline__ __m512i __DEFAULT_FN_ATTRS
1198 _mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
1199 {
1200  return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
1201  (__v8di) __B,
1202  (__v8di)
1204  __M);
1205 }
1206 
1207 static __inline __m512i __DEFAULT_FN_ATTRS
1208 _mm512_max_epu64(__m512i __A, __m512i __B)
1209 {
1210  return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
1211  (__v8di) __B,
1212  (__v8di)
1214  (__mmask8) -1);
1215 }
1216 
1217 static __inline__ __m512i __DEFAULT_FN_ATTRS
1218 _mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1219 {
1220  return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
1221  (__v8di) __B,
1222  (__v8di) __W, __M);
1223 }
1224 
1225 static __inline__ __m512i __DEFAULT_FN_ATTRS
1226 _mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
1227 {
1228  return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
1229  (__v8di) __B,
1230  (__v8di)
1232  __M);
1233 }
1234 
1235 #define _mm512_mask_min_round_pd(W, U, A, B, R) __extension__ ({ \
1236  (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \
1237  (__v8df)(__m512d)(B), \
1238  (__v8df)(__m512d)(W), (__mmask8)(U), \
1239  (int)(R)); })
1240 
1241 #define _mm512_maskz_min_round_pd(U, A, B, R) __extension__ ({ \
1242  (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \
1243  (__v8df)(__m512d)(B), \
1244  (__v8df)_mm512_setzero_pd(), \
1245  (__mmask8)(U), (int)(R)); })
1246 
1247 #define _mm512_min_round_pd(A, B, R) __extension__ ({ \
1248  (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \
1249  (__v8df)(__m512d)(B), \
1250  (__v8df)_mm512_undefined_pd(), \
1251  (__mmask8)-1, (int)(R)); })
1252 
1253 static __inline__ __m512d __DEFAULT_FN_ATTRS
1254 _mm512_min_pd(__m512d __A, __m512d __B)
1255 {
1256  return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
1257  (__v8df) __B,
1258  (__v8df)
1259  _mm512_setzero_pd (),
1260  (__mmask8) -1,
1262 }
1263 
1264 static __inline__ __m512d __DEFAULT_FN_ATTRS
1265 _mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
1266 {
1267  return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
1268  (__v8df) __B,
1269  (__v8df) __W,
1270  (__mmask8) __U,
1272 }
1273 
1274 #define _mm512_mask_min_round_ps(W, U, A, B, R) __extension__ ({ \
1275  (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \
1276  (__v16sf)(__m512)(B), \
1277  (__v16sf)(__m512)(W), (__mmask16)(U), \
1278  (int)(R)); })
1279 
1280 #define _mm512_maskz_min_round_ps(U, A, B, R) __extension__ ({ \
1281  (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \
1282  (__v16sf)(__m512)(B), \
1283  (__v16sf)_mm512_setzero_ps(), \
1284  (__mmask16)(U), (int)(R)); })
1285 
1286 #define _mm512_min_round_ps(A, B, R) __extension__ ({ \
1287  (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \
1288  (__v16sf)(__m512)(B), \
1289  (__v16sf)_mm512_undefined_ps(), \
1290  (__mmask16)-1, (int)(R)); })
1291 
1292 static __inline__ __m512d __DEFAULT_FN_ATTRS
1293 _mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
1294 {
1295  return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
1296  (__v8df) __B,
1297  (__v8df)
1298  _mm512_setzero_pd (),
1299  (__mmask8) __U,
1301 }
1302 
1303 static __inline__ __m512 __DEFAULT_FN_ATTRS
1304 _mm512_min_ps(__m512 __A, __m512 __B)
1305 {
1306  return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
1307  (__v16sf) __B,
1308  (__v16sf)
1309  _mm512_setzero_ps (),
1310  (__mmask16) -1,
1312 }
1313 
1314 static __inline__ __m512 __DEFAULT_FN_ATTRS
1315 _mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
1316 {
1317  return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
1318  (__v16sf) __B,
1319  (__v16sf) __W,
1320  (__mmask16) __U,
1322 }
1323 
1324 static __inline__ __m512 __DEFAULT_FN_ATTRS
1325 _mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
1326 {
1327  return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
1328  (__v16sf) __B,
1329  (__v16sf)
1330  _mm512_setzero_ps (),
1331  (__mmask16) __U,
1333 }
1334 
1335 static __inline__ __m128 __DEFAULT_FN_ATTRS
1336 _mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1337  return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1338  (__v4sf) __B,
1339  (__v4sf) __W,
1340  (__mmask8) __U,
1342 }
1343 
1344 static __inline__ __m128 __DEFAULT_FN_ATTRS
1345 _mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1346  return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1347  (__v4sf) __B,
1348  (__v4sf) _mm_setzero_ps (),
1349  (__mmask8) __U,
1351 }
1352 
1353 #define _mm_min_round_ss(A, B, R) __extension__ ({ \
1354  (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1355  (__v4sf)(__m128)(B), \
1356  (__v4sf)_mm_setzero_ps(), \
1357  (__mmask8)-1, (int)(R)); })
1358 
1359 #define _mm_mask_min_round_ss(W, U, A, B, R) __extension__ ({ \
1360  (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1361  (__v4sf)(__m128)(B), \
1362  (__v4sf)(__m128)(W), (__mmask8)(U), \
1363  (int)(R)); })
1364 
1365 #define _mm_maskz_min_round_ss(U, A, B, R) __extension__ ({ \
1366  (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1367  (__v4sf)(__m128)(B), \
1368  (__v4sf)_mm_setzero_ps(), \
1369  (__mmask8)(U), (int)(R)); })
1370 
1371 static __inline__ __m128d __DEFAULT_FN_ATTRS
1372 _mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1373  return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1374  (__v2df) __B,
1375  (__v2df) __W,
1376  (__mmask8) __U,
1378 }
1379 
1380 static __inline__ __m128d __DEFAULT_FN_ATTRS
1381 _mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1382  return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1383  (__v2df) __B,
1384  (__v2df) _mm_setzero_pd (),
1385  (__mmask8) __U,
1387 }
1388 
1389 #define _mm_min_round_sd(A, B, R) __extension__ ({ \
1390  (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1391  (__v2df)(__m128d)(B), \
1392  (__v2df)_mm_setzero_pd(), \
1393  (__mmask8)-1, (int)(R)); })
1394 
1395 #define _mm_mask_min_round_sd(W, U, A, B, R) __extension__ ({ \
1396  (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1397  (__v2df)(__m128d)(B), \
1398  (__v2df)(__m128d)(W), \
1399  (__mmask8)(U), (int)(R)); })
1400 
1401 #define _mm_maskz_min_round_sd(U, A, B, R) __extension__ ({ \
1402  (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1403  (__v2df)(__m128d)(B), \
1404  (__v2df)_mm_setzero_pd(), \
1405  (__mmask8)(U), (int)(R)); })
1406 
1407 static __inline __m512i
1409 _mm512_min_epi32(__m512i __A, __m512i __B)
1410 {
1411  return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
1412  (__v16si) __B,
1413  (__v16si)
1415  (__mmask16) -1);
1416 }
1417 
1418 static __inline__ __m512i __DEFAULT_FN_ATTRS
1419 _mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1420 {
1421  return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
1422  (__v16si) __B,
1423  (__v16si) __W, __M);
1424 }
1425 
1426 static __inline__ __m512i __DEFAULT_FN_ATTRS
1427 _mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
1428 {
1429  return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
1430  (__v16si) __B,
1431  (__v16si)
1433  __M);
1434 }
1435 
1436 static __inline __m512i __DEFAULT_FN_ATTRS
1437 _mm512_min_epu32(__m512i __A, __m512i __B)
1438 {
1439  return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
1440  (__v16si) __B,
1441  (__v16si)
1443  (__mmask16) -1);
1444 }
1445 
1446 static __inline__ __m512i __DEFAULT_FN_ATTRS
1447 _mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1448 {
1449  return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
1450  (__v16si) __B,
1451  (__v16si) __W, __M);
1452 }
1453 
1454 static __inline__ __m512i __DEFAULT_FN_ATTRS
1455 _mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
1456 {
1457  return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
1458  (__v16si) __B,
1459  (__v16si)
1461  __M);
1462 }
1463 
1464 static __inline __m512i __DEFAULT_FN_ATTRS
1465 _mm512_min_epi64(__m512i __A, __m512i __B)
1466 {
1467  return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
1468  (__v8di) __B,
1469  (__v8di)
1471  (__mmask8) -1);
1472 }
1473 
1474 static __inline__ __m512i __DEFAULT_FN_ATTRS
1475 _mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1476 {
1477  return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
1478  (__v8di) __B,
1479  (__v8di) __W, __M);
1480 }
1481 
1482 static __inline__ __m512i __DEFAULT_FN_ATTRS
1483 _mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
1484 {
1485  return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
1486  (__v8di) __B,
1487  (__v8di)
1489  __M);
1490 }
1491 
1492 static __inline __m512i __DEFAULT_FN_ATTRS
1493 _mm512_min_epu64(__m512i __A, __m512i __B)
1494 {
1495  return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
1496  (__v8di) __B,
1497  (__v8di)
1499  (__mmask8) -1);
1500 }
1501 
1502 static __inline__ __m512i __DEFAULT_FN_ATTRS
1503 _mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1504 {
1505  return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
1506  (__v8di) __B,
1507  (__v8di) __W, __M);
1508 }
1509 
1510 static __inline__ __m512i __DEFAULT_FN_ATTRS
1511 _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
1512 {
1513  return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
1514  (__v8di) __B,
1515  (__v8di)
1517  __M);
1518 }
1519 
1520 static __inline __m512i __DEFAULT_FN_ATTRS
1521 _mm512_mul_epi32(__m512i __X, __m512i __Y)
1522 {
1523  return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y);
1524 }
1525 
1526 static __inline __m512i __DEFAULT_FN_ATTRS
1527 _mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
1528 {
1529  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1530  (__v8di)_mm512_mul_epi32(__X, __Y),
1531  (__v8di)__W);
1532 }
1533 
1534 static __inline __m512i __DEFAULT_FN_ATTRS
1535 _mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
1536 {
1537  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1538  (__v8di)_mm512_mul_epi32(__X, __Y),
1539  (__v8di)_mm512_setzero_si512 ());
1540 }
1541 
1542 static __inline __m512i __DEFAULT_FN_ATTRS
1543 _mm512_mul_epu32(__m512i __X, __m512i __Y)
1544 {
1545  return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y);
1546 }
1547 
1548 static __inline __m512i __DEFAULT_FN_ATTRS
1549 _mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
1550 {
1551  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1552  (__v8di)_mm512_mul_epu32(__X, __Y),
1553  (__v8di)__W);
1554 }
1555 
1556 static __inline __m512i __DEFAULT_FN_ATTRS
1557 _mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y)
1558 {
1559  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1560  (__v8di)_mm512_mul_epu32(__X, __Y),
1561  (__v8di)_mm512_setzero_si512 ());
1562 }
1563 
1564 static __inline __m512i __DEFAULT_FN_ATTRS
1565 _mm512_mullo_epi32 (__m512i __A, __m512i __B)
1566 {
1567  return (__m512i) ((__v16su) __A * (__v16su) __B);
1568 }
1569 
1570 static __inline __m512i __DEFAULT_FN_ATTRS
1571 _mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B)
1572 {
1573  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1574  (__v16si)_mm512_mullo_epi32(__A, __B),
1575  (__v16si)_mm512_setzero_si512());
1576 }
1577 
1578 static __inline __m512i __DEFAULT_FN_ATTRS
1579 _mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1580 {
1581  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1582  (__v16si)_mm512_mullo_epi32(__A, __B),
1583  (__v16si)__W);
1584 }
1585 
1586 #define _mm512_mask_sqrt_round_pd(W, U, A, R) __extension__ ({ \
1587  (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
1588  (__v8df)(__m512d)(W), (__mmask8)(U), \
1589  (int)(R)); })
1590 
1591 #define _mm512_maskz_sqrt_round_pd(U, A, R) __extension__ ({ \
1592  (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
1593  (__v8df)_mm512_setzero_pd(), \
1594  (__mmask8)(U), (int)(R)); })
1595 
1596 #define _mm512_sqrt_round_pd(A, R) __extension__ ({ \
1597  (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
1598  (__v8df)_mm512_undefined_pd(), \
1599  (__mmask8)-1, (int)(R)); })
1600 
1601 static __inline__ __m512d __DEFAULT_FN_ATTRS
1602 _mm512_sqrt_pd(__m512d __a)
1603 {
1604  return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)__a,
1605  (__v8df) _mm512_setzero_pd (),
1606  (__mmask8) -1,
1608 }
1609 
1610 static __inline__ __m512d __DEFAULT_FN_ATTRS
1611 _mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
1612 {
1613  return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1614  (__v8df) __W,
1615  (__mmask8) __U,
1617 }
1618 
1619 static __inline__ __m512d __DEFAULT_FN_ATTRS
1620 _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
1621 {
1622  return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1623  (__v8df)
1624  _mm512_setzero_pd (),
1625  (__mmask8) __U,
1627 }
1628 
1629 #define _mm512_mask_sqrt_round_ps(W, U, A, R) __extension__ ({ \
1630  (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
1631  (__v16sf)(__m512)(W), (__mmask16)(U), \
1632  (int)(R)); })
1633 
1634 #define _mm512_maskz_sqrt_round_ps(U, A, R) __extension__ ({ \
1635  (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
1636  (__v16sf)_mm512_setzero_ps(), \
1637  (__mmask16)(U), (int)(R)); })
1638 
1639 #define _mm512_sqrt_round_ps(A, R) __extension__ ({ \
1640  (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
1641  (__v16sf)_mm512_undefined_ps(), \
1642  (__mmask16)-1, (int)(R)); })
1643 
1644 static __inline__ __m512 __DEFAULT_FN_ATTRS
1645 _mm512_sqrt_ps(__m512 __a)
1646 {
1647  return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__a,
1648  (__v16sf) _mm512_setzero_ps (),
1649  (__mmask16) -1,
1651 }
1652 
1653 static __inline__ __m512 __DEFAULT_FN_ATTRS
1654 _mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
1655 {
1656  return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__A,
1657  (__v16sf) __W,
1658  (__mmask16) __U,
1660 }
1661 
1662 static __inline__ __m512 __DEFAULT_FN_ATTRS
1663 _mm512_maskz_sqrt_ps( __mmask16 __U, __m512 __A)
1664 {
1665  return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__A,
1666  (__v16sf) _mm512_setzero_ps (),
1667  (__mmask16) __U,
1669 }
1670 
1671 static __inline__ __m512d __DEFAULT_FN_ATTRS
1672 _mm512_rsqrt14_pd(__m512d __A)
1673 {
1674  return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1675  (__v8df)
1676  _mm512_setzero_pd (),
1677  (__mmask8) -1);}
1678 
1679 static __inline__ __m512d __DEFAULT_FN_ATTRS
1680 _mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1681 {
1682  return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1683  (__v8df) __W,
1684  (__mmask8) __U);
1685 }
1686 
1687 static __inline__ __m512d __DEFAULT_FN_ATTRS
1688 _mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
1689 {
1690  return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1691  (__v8df)
1692  _mm512_setzero_pd (),
1693  (__mmask8) __U);
1694 }
1695 
1696 static __inline__ __m512 __DEFAULT_FN_ATTRS
1698 {
1699  return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1700  (__v16sf)
1701  _mm512_setzero_ps (),
1702  (__mmask16) -1);
1703 }
1704 
1705 static __inline__ __m512 __DEFAULT_FN_ATTRS
1706 _mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1707 {
1708  return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1709  (__v16sf) __W,
1710  (__mmask16) __U);
1711 }
1712 
1713 static __inline__ __m512 __DEFAULT_FN_ATTRS
1714 _mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
1715 {
1716  return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1717  (__v16sf)
1718  _mm512_setzero_ps (),
1719  (__mmask16) __U);
1720 }
1721 
1722 static __inline__ __m128 __DEFAULT_FN_ATTRS
1723 _mm_rsqrt14_ss(__m128 __A, __m128 __B)
1724 {
1725  return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1726  (__v4sf) __B,
1727  (__v4sf)
1728  _mm_setzero_ps (),
1729  (__mmask8) -1);
1730 }
1731 
1732 static __inline__ __m128 __DEFAULT_FN_ATTRS
1733 _mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1734 {
1735  return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1736  (__v4sf) __B,
1737  (__v4sf) __W,
1738  (__mmask8) __U);
1739 }
1740 
1741 static __inline__ __m128 __DEFAULT_FN_ATTRS
1742 _mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1743 {
1744  return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1745  (__v4sf) __B,
1746  (__v4sf) _mm_setzero_ps (),
1747  (__mmask8) __U);
1748 }
1749 
1750 static __inline__ __m128d __DEFAULT_FN_ATTRS
1751 _mm_rsqrt14_sd(__m128d __A, __m128d __B)
1752 {
1753  return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
1754  (__v2df) __B,
1755  (__v2df)
1756  _mm_setzero_pd (),
1757  (__mmask8) -1);
1758 }
1759 
1760 static __inline__ __m128d __DEFAULT_FN_ATTRS
1761 _mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1762 {
1763  return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1764  (__v2df) __B,
1765  (__v2df) __W,
1766  (__mmask8) __U);
1767 }
1768 
1769 static __inline__ __m128d __DEFAULT_FN_ATTRS
1770 _mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1771 {
1772  return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1773  (__v2df) __B,
1774  (__v2df) _mm_setzero_pd (),
1775  (__mmask8) __U);
1776 }
1777 
1778 static __inline__ __m512d __DEFAULT_FN_ATTRS
1779 _mm512_rcp14_pd(__m512d __A)
1780 {
1781  return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1782  (__v8df)
1783  _mm512_setzero_pd (),
1784  (__mmask8) -1);
1785 }
1786 
1787 static __inline__ __m512d __DEFAULT_FN_ATTRS
1788 _mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1789 {
1790  return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1791  (__v8df) __W,
1792  (__mmask8) __U);
1793 }
1794 
1795 static __inline__ __m512d __DEFAULT_FN_ATTRS
1796 _mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
1797 {
1798  return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1799  (__v8df)
1800  _mm512_setzero_pd (),
1801  (__mmask8) __U);
1802 }
1803 
1804 static __inline__ __m512 __DEFAULT_FN_ATTRS
1805 _mm512_rcp14_ps(__m512 __A)
1806 {
1807  return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1808  (__v16sf)
1809  _mm512_setzero_ps (),
1810  (__mmask16) -1);
1811 }
1812 
1813 static __inline__ __m512 __DEFAULT_FN_ATTRS
1814 _mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1815 {
1816  return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1817  (__v16sf) __W,
1818  (__mmask16) __U);
1819 }
1820 
1821 static __inline__ __m512 __DEFAULT_FN_ATTRS
1822 _mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
1823 {
1824  return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1825  (__v16sf)
1826  _mm512_setzero_ps (),
1827  (__mmask16) __U);
1828 }
1829 
1830 static __inline__ __m128 __DEFAULT_FN_ATTRS
1831 _mm_rcp14_ss(__m128 __A, __m128 __B)
1832 {
1833  return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1834  (__v4sf) __B,
1835  (__v4sf)
1836  _mm_setzero_ps (),
1837  (__mmask8) -1);
1838 }
1839 
1840 static __inline__ __m128 __DEFAULT_FN_ATTRS
1841 _mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1842 {
1843  return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1844  (__v4sf) __B,
1845  (__v4sf) __W,
1846  (__mmask8) __U);
1847 }
1848 
1849 static __inline__ __m128 __DEFAULT_FN_ATTRS
1850 _mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1851 {
1852  return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1853  (__v4sf) __B,
1854  (__v4sf) _mm_setzero_ps (),
1855  (__mmask8) __U);
1856 }
1857 
1858 static __inline__ __m128d __DEFAULT_FN_ATTRS
1859 _mm_rcp14_sd(__m128d __A, __m128d __B)
1860 {
1861  return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
1862  (__v2df) __B,
1863  (__v2df)
1864  _mm_setzero_pd (),
1865  (__mmask8) -1);
1866 }
1867 
1868 static __inline__ __m128d __DEFAULT_FN_ATTRS
1869 _mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1870 {
1871  return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1872  (__v2df) __B,
1873  (__v2df) __W,
1874  (__mmask8) __U);
1875 }
1876 
1877 static __inline__ __m128d __DEFAULT_FN_ATTRS
1878 _mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1879 {
1880  return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1881  (__v2df) __B,
1882  (__v2df) _mm_setzero_pd (),
1883  (__mmask8) __U);
1884 }
1885 
1886 static __inline __m512 __DEFAULT_FN_ATTRS
1887 _mm512_floor_ps(__m512 __A)
1888 {
1889  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1891  (__v16sf) __A, -1,
1893 }
1894 
1895 static __inline__ __m512 __DEFAULT_FN_ATTRS
1896 _mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
1897 {
1898  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1900  (__v16sf) __W, __U,
1902 }
1903 
1904 static __inline __m512d __DEFAULT_FN_ATTRS
1905 _mm512_floor_pd(__m512d __A)
1906 {
1907  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1909  (__v8df) __A, -1,
1911 }
1912 
1913 static __inline__ __m512d __DEFAULT_FN_ATTRS
1914 _mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
1915 {
1916  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1918  (__v8df) __W, __U,
1920 }
1921 
1922 static __inline__ __m512 __DEFAULT_FN_ATTRS
1923 _mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
1924 {
1925  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1927  (__v16sf) __W, __U,
1929 }
1930 
1931 static __inline __m512 __DEFAULT_FN_ATTRS
1932 _mm512_ceil_ps(__m512 __A)
1933 {
1934  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1936  (__v16sf) __A, -1,
1938 }
1939 
1940 static __inline __m512d __DEFAULT_FN_ATTRS
1941 _mm512_ceil_pd(__m512d __A)
1942 {
1943  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1945  (__v8df) __A, -1,
1947 }
1948 
1949 static __inline__ __m512d __DEFAULT_FN_ATTRS
1950 _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
1951 {
1952  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1954  (__v8df) __W, __U,
1956 }
1957 
1958 static __inline __m512i __DEFAULT_FN_ATTRS
1959 _mm512_abs_epi64(__m512i __A)
1960 {
1961  return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
1962  (__v8di)
1964  (__mmask8) -1);
1965 }
1966 
1967 static __inline__ __m512i __DEFAULT_FN_ATTRS
1968 _mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
1969 {
1970  return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
1971  (__v8di) __W,
1972  (__mmask8) __U);
1973 }
1974 
1975 static __inline__ __m512i __DEFAULT_FN_ATTRS
1976 _mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
1977 {
1978  return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
1979  (__v8di)
1981  (__mmask8) __U);
1982 }
1983 
1984 static __inline __m512i __DEFAULT_FN_ATTRS
1985 _mm512_abs_epi32(__m512i __A)
1986 {
1987  return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
1988  (__v16si)
1990  (__mmask16) -1);
1991 }
1992 
1993 static __inline__ __m512i __DEFAULT_FN_ATTRS
1994 _mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
1995 {
1996  return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
1997  (__v16si) __W,
1998  (__mmask16) __U);
1999 }
2000 
2001 static __inline__ __m512i __DEFAULT_FN_ATTRS
2002 _mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
2003 {
2004  return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
2005  (__v16si)
2007  (__mmask16) __U);
2008 }
2009 
2010 static __inline__ __m128 __DEFAULT_FN_ATTRS
2011 _mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2012  return (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A,
2013  (__v4sf) __B,
2014  (__v4sf) __W,
2015  (__mmask8) __U,
2017 }
2018 
2019 static __inline__ __m128 __DEFAULT_FN_ATTRS
2020 _mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2021  return (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A,
2022  (__v4sf) __B,
2023  (__v4sf) _mm_setzero_ps (),
2024  (__mmask8) __U,
2026 }
2027 
2028 #define _mm_add_round_ss(A, B, R) __extension__ ({ \
2029  (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
2030  (__v4sf)(__m128)(B), \
2031  (__v4sf)_mm_setzero_ps(), \
2032  (__mmask8)-1, (int)(R)); })
2033 
2034 #define _mm_mask_add_round_ss(W, U, A, B, R) __extension__ ({ \
2035  (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
2036  (__v4sf)(__m128)(B), \
2037  (__v4sf)(__m128)(W), (__mmask8)(U), \
2038  (int)(R)); })
2039 
2040 #define _mm_maskz_add_round_ss(U, A, B, R) __extension__ ({ \
2041  (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
2042  (__v4sf)(__m128)(B), \
2043  (__v4sf)_mm_setzero_ps(), \
2044  (__mmask8)(U), (int)(R)); })
2045 
2046 static __inline__ __m128d __DEFAULT_FN_ATTRS
2047 _mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2048  return (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A,
2049  (__v2df) __B,
2050  (__v2df) __W,
2051  (__mmask8) __U,
2053 }
2054 
2055 static __inline__ __m128d __DEFAULT_FN_ATTRS
2056 _mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2057  return (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A,
2058  (__v2df) __B,
2059  (__v2df) _mm_setzero_pd (),
2060  (__mmask8) __U,
2062 }
2063 #define _mm_add_round_sd(A, B, R) __extension__ ({ \
2064  (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
2065  (__v2df)(__m128d)(B), \
2066  (__v2df)_mm_setzero_pd(), \
2067  (__mmask8)-1, (int)(R)); })
2068 
2069 #define _mm_mask_add_round_sd(W, U, A, B, R) __extension__ ({ \
2070  (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
2071  (__v2df)(__m128d)(B), \
2072  (__v2df)(__m128d)(W), \
2073  (__mmask8)(U), (int)(R)); })
2074 
2075 #define _mm_maskz_add_round_sd(U, A, B, R) __extension__ ({ \
2076  (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
2077  (__v2df)(__m128d)(B), \
2078  (__v2df)_mm_setzero_pd(), \
2079  (__mmask8)(U), (int)(R)); })
2080 
2081 static __inline__ __m512d __DEFAULT_FN_ATTRS
2082 _mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2083  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2084  (__v8df)_mm512_add_pd(__A, __B),
2085  (__v8df)__W);
2086 }
2087 
2088 static __inline__ __m512d __DEFAULT_FN_ATTRS
2089 _mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2090  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2091  (__v8df)_mm512_add_pd(__A, __B),
2092  (__v8df)_mm512_setzero_pd());
2093 }
2094 
2095 static __inline__ __m512 __DEFAULT_FN_ATTRS
2096 _mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2097  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2098  (__v16sf)_mm512_add_ps(__A, __B),
2099  (__v16sf)__W);
2100 }
2101 
2102 static __inline__ __m512 __DEFAULT_FN_ATTRS
2103 _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2104  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2105  (__v16sf)_mm512_add_ps(__A, __B),
2106  (__v16sf)_mm512_setzero_ps());
2107 }
2108 
2109 #define _mm512_add_round_pd(A, B, R) __extension__ ({ \
2110  (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \
2111  (__v8df)(__m512d)(B), \
2112  (__v8df)_mm512_setzero_pd(), \
2113  (__mmask8)-1, (int)(R)); })
2114 
2115 #define _mm512_mask_add_round_pd(W, U, A, B, R) __extension__ ({ \
2116  (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \
2117  (__v8df)(__m512d)(B), \
2118  (__v8df)(__m512d)(W), (__mmask8)(U), \
2119  (int)(R)); })
2120 
2121 #define _mm512_maskz_add_round_pd(U, A, B, R) __extension__ ({ \
2122  (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \
2123  (__v8df)(__m512d)(B), \
2124  (__v8df)_mm512_setzero_pd(), \
2125  (__mmask8)(U), (int)(R)); })
2126 
2127 #define _mm512_add_round_ps(A, B, R) __extension__ ({ \
2128  (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \
2129  (__v16sf)(__m512)(B), \
2130  (__v16sf)_mm512_setzero_ps(), \
2131  (__mmask16)-1, (int)(R)); })
2132 
2133 #define _mm512_mask_add_round_ps(W, U, A, B, R) __extension__ ({ \
2134  (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \
2135  (__v16sf)(__m512)(B), \
2136  (__v16sf)(__m512)(W), (__mmask16)(U), \
2137  (int)(R)); })
2138 
2139 #define _mm512_maskz_add_round_ps(U, A, B, R) __extension__ ({ \
2140  (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \
2141  (__v16sf)(__m512)(B), \
2142  (__v16sf)_mm512_setzero_ps(), \
2143  (__mmask16)(U), (int)(R)); })
2144 
2145 static __inline__ __m128 __DEFAULT_FN_ATTRS
2146 _mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2147  return (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A,
2148  (__v4sf) __B,
2149  (__v4sf) __W,
2150  (__mmask8) __U,
2152 }
2153 
2154 static __inline__ __m128 __DEFAULT_FN_ATTRS
2155 _mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2156  return (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A,
2157  (__v4sf) __B,
2158  (__v4sf) _mm_setzero_ps (),
2159  (__mmask8) __U,
2161 }
2162 #define _mm_sub_round_ss(A, B, R) __extension__ ({ \
2163  (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2164  (__v4sf)(__m128)(B), \
2165  (__v4sf)_mm_setzero_ps(), \
2166  (__mmask8)-1, (int)(R)); })
2167 
2168 #define _mm_mask_sub_round_ss(W, U, A, B, R) __extension__ ({ \
2169  (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2170  (__v4sf)(__m128)(B), \
2171  (__v4sf)(__m128)(W), (__mmask8)(U), \
2172  (int)(R)); })
2173 
2174 #define _mm_maskz_sub_round_ss(U, A, B, R) __extension__ ({ \
2175  (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2176  (__v4sf)(__m128)(B), \
2177  (__v4sf)_mm_setzero_ps(), \
2178  (__mmask8)(U), (int)(R)); })
2179 
2180 static __inline__ __m128d __DEFAULT_FN_ATTRS
2181 _mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2182  return (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A,
2183  (__v2df) __B,
2184  (__v2df) __W,
2185  (__mmask8) __U,
2187 }
2188 
2189 static __inline__ __m128d __DEFAULT_FN_ATTRS
2190 _mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2191  return (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A,
2192  (__v2df) __B,
2193  (__v2df) _mm_setzero_pd (),
2194  (__mmask8) __U,
2196 }
2197 
2198 #define _mm_sub_round_sd(A, B, R) __extension__ ({ \
2199  (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2200  (__v2df)(__m128d)(B), \
2201  (__v2df)_mm_setzero_pd(), \
2202  (__mmask8)-1, (int)(R)); })
2203 
2204 #define _mm_mask_sub_round_sd(W, U, A, B, R) __extension__ ({ \
2205  (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2206  (__v2df)(__m128d)(B), \
2207  (__v2df)(__m128d)(W), \
2208  (__mmask8)(U), (int)(R)); })
2209 
2210 #define _mm_maskz_sub_round_sd(U, A, B, R) __extension__ ({ \
2211  (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2212  (__v2df)(__m128d)(B), \
2213  (__v2df)_mm_setzero_pd(), \
2214  (__mmask8)(U), (int)(R)); })
2215 
2216 static __inline__ __m512d __DEFAULT_FN_ATTRS
2217 _mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2218  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2219  (__v8df)_mm512_sub_pd(__A, __B),
2220  (__v8df)__W);
2221 }
2222 
2223 static __inline__ __m512d __DEFAULT_FN_ATTRS
2224 _mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2225  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2226  (__v8df)_mm512_sub_pd(__A, __B),
2227  (__v8df)_mm512_setzero_pd());
2228 }
2229 
2230 static __inline__ __m512 __DEFAULT_FN_ATTRS
2231 _mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2232  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2233  (__v16sf)_mm512_sub_ps(__A, __B),
2234  (__v16sf)__W);
2235 }
2236 
2237 static __inline__ __m512 __DEFAULT_FN_ATTRS
2238 _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2239  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2240  (__v16sf)_mm512_sub_ps(__A, __B),
2241  (__v16sf)_mm512_setzero_ps());
2242 }
2243 
2244 #define _mm512_sub_round_pd(A, B, R) __extension__ ({ \
2245  (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \
2246  (__v8df)(__m512d)(B), \
2247  (__v8df)_mm512_setzero_pd(), \
2248  (__mmask8)-1, (int)(R)); })
2249 
2250 #define _mm512_mask_sub_round_pd(W, U, A, B, R) __extension__ ({ \
2251  (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \
2252  (__v8df)(__m512d)(B), \
2253  (__v8df)(__m512d)(W), (__mmask8)(U), \
2254  (int)(R)); })
2255 
2256 #define _mm512_maskz_sub_round_pd(U, A, B, R) __extension__ ({ \
2257  (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \
2258  (__v8df)(__m512d)(B), \
2259  (__v8df)_mm512_setzero_pd(), \
2260  (__mmask8)(U), (int)(R)); })
2261 
2262 #define _mm512_sub_round_ps(A, B, R) __extension__ ({ \
2263  (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \
2264  (__v16sf)(__m512)(B), \
2265  (__v16sf)_mm512_setzero_ps(), \
2266  (__mmask16)-1, (int)(R)); })
2267 
2268 #define _mm512_mask_sub_round_ps(W, U, A, B, R) __extension__ ({ \
2269  (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \
2270  (__v16sf)(__m512)(B), \
2271  (__v16sf)(__m512)(W), (__mmask16)(U), \
2272  (int)(R)); });
2273 
2274 #define _mm512_maskz_sub_round_ps(U, A, B, R) __extension__ ({ \
2275  (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \
2276  (__v16sf)(__m512)(B), \
2277  (__v16sf)_mm512_setzero_ps(), \
2278  (__mmask16)(U), (int)(R)); });
2279 
2280 static __inline__ __m128 __DEFAULT_FN_ATTRS
2281 _mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2282  return (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A,
2283  (__v4sf) __B,
2284  (__v4sf) __W,
2285  (__mmask8) __U,
2287 }
2288 
2289 static __inline__ __m128 __DEFAULT_FN_ATTRS
2290 _mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2291  return (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A,
2292  (__v4sf) __B,
2293  (__v4sf) _mm_setzero_ps (),
2294  (__mmask8) __U,
2296 }
2297 #define _mm_mul_round_ss(A, B, R) __extension__ ({ \
2298  (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2299  (__v4sf)(__m128)(B), \
2300  (__v4sf)_mm_setzero_ps(), \
2301  (__mmask8)-1, (int)(R)); })
2302 
2303 #define _mm_mask_mul_round_ss(W, U, A, B, R) __extension__ ({ \
2304  (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2305  (__v4sf)(__m128)(B), \
2306  (__v4sf)(__m128)(W), (__mmask8)(U), \
2307  (int)(R)); })
2308 
2309 #define _mm_maskz_mul_round_ss(U, A, B, R) __extension__ ({ \
2310  (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2311  (__v4sf)(__m128)(B), \
2312  (__v4sf)_mm_setzero_ps(), \
2313  (__mmask8)(U), (int)(R)); })
2314 
2315 static __inline__ __m128d __DEFAULT_FN_ATTRS
2316 _mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2317  return (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A,
2318  (__v2df) __B,
2319  (__v2df) __W,
2320  (__mmask8) __U,
2322 }
2323 
2324 static __inline__ __m128d __DEFAULT_FN_ATTRS
2325 _mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2326  return (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A,
2327  (__v2df) __B,
2328  (__v2df) _mm_setzero_pd (),
2329  (__mmask8) __U,
2331 }
2332 
2333 #define _mm_mul_round_sd(A, B, R) __extension__ ({ \
2334  (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2335  (__v2df)(__m128d)(B), \
2336  (__v2df)_mm_setzero_pd(), \
2337  (__mmask8)-1, (int)(R)); })
2338 
2339 #define _mm_mask_mul_round_sd(W, U, A, B, R) __extension__ ({ \
2340  (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2341  (__v2df)(__m128d)(B), \
2342  (__v2df)(__m128d)(W), \
2343  (__mmask8)(U), (int)(R)); })
2344 
2345 #define _mm_maskz_mul_round_sd(U, A, B, R) __extension__ ({ \
2346  (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2347  (__v2df)(__m128d)(B), \
2348  (__v2df)_mm_setzero_pd(), \
2349  (__mmask8)(U), (int)(R)); })
2350 
2351 static __inline__ __m512d __DEFAULT_FN_ATTRS
2352 _mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2353  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2354  (__v8df)_mm512_mul_pd(__A, __B),
2355  (__v8df)__W);
2356 }
2357 
2358 static __inline__ __m512d __DEFAULT_FN_ATTRS
2359 _mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2360  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2361  (__v8df)_mm512_mul_pd(__A, __B),
2362  (__v8df)_mm512_setzero_pd());
2363 }
2364 
2365 static __inline__ __m512 __DEFAULT_FN_ATTRS
2366 _mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2367  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2368  (__v16sf)_mm512_mul_ps(__A, __B),
2369  (__v16sf)__W);
2370 }
2371 
2372 static __inline__ __m512 __DEFAULT_FN_ATTRS
2373 _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2374  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2375  (__v16sf)_mm512_mul_ps(__A, __B),
2376  (__v16sf)_mm512_setzero_ps());
2377 }
2378 
2379 #define _mm512_mul_round_pd(A, B, R) __extension__ ({ \
2380  (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \
2381  (__v8df)(__m512d)(B), \
2382  (__v8df)_mm512_setzero_pd(), \
2383  (__mmask8)-1, (int)(R)); })
2384 
2385 #define _mm512_mask_mul_round_pd(W, U, A, B, R) __extension__ ({ \
2386  (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \
2387  (__v8df)(__m512d)(B), \
2388  (__v8df)(__m512d)(W), (__mmask8)(U), \
2389  (int)(R)); })
2390 
2391 #define _mm512_maskz_mul_round_pd(U, A, B, R) __extension__ ({ \
2392  (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \
2393  (__v8df)(__m512d)(B), \
2394  (__v8df)_mm512_setzero_pd(), \
2395  (__mmask8)(U), (int)(R)); })
2396 
2397 #define _mm512_mul_round_ps(A, B, R) __extension__ ({ \
2398  (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \
2399  (__v16sf)(__m512)(B), \
2400  (__v16sf)_mm512_setzero_ps(), \
2401  (__mmask16)-1, (int)(R)); })
2402 
2403 #define _mm512_mask_mul_round_ps(W, U, A, B, R) __extension__ ({ \
2404  (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \
2405  (__v16sf)(__m512)(B), \
2406  (__v16sf)(__m512)(W), (__mmask16)(U), \
2407  (int)(R)); });
2408 
2409 #define _mm512_maskz_mul_round_ps(U, A, B, R) __extension__ ({ \
2410  (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \
2411  (__v16sf)(__m512)(B), \
2412  (__v16sf)_mm512_setzero_ps(), \
2413  (__mmask16)(U), (int)(R)); });
2414 
2415 static __inline__ __m128 __DEFAULT_FN_ATTRS
2416 _mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2417  return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A,
2418  (__v4sf) __B,
2419  (__v4sf) __W,
2420  (__mmask8) __U,
2422 }
2423 
2424 static __inline__ __m128 __DEFAULT_FN_ATTRS
2425 _mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2426  return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A,
2427  (__v4sf) __B,
2428  (__v4sf) _mm_setzero_ps (),
2429  (__mmask8) __U,
2431 }
2432 
2433 #define _mm_div_round_ss(A, B, R) __extension__ ({ \
2434  (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2435  (__v4sf)(__m128)(B), \
2436  (__v4sf)_mm_setzero_ps(), \
2437  (__mmask8)-1, (int)(R)); })
2438 
2439 #define _mm_mask_div_round_ss(W, U, A, B, R) __extension__ ({ \
2440  (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2441  (__v4sf)(__m128)(B), \
2442  (__v4sf)(__m128)(W), (__mmask8)(U), \
2443  (int)(R)); })
2444 
2445 #define _mm_maskz_div_round_ss(U, A, B, R) __extension__ ({ \
2446  (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2447  (__v4sf)(__m128)(B), \
2448  (__v4sf)_mm_setzero_ps(), \
2449  (__mmask8)(U), (int)(R)); })
2450 
2451 static __inline__ __m128d __DEFAULT_FN_ATTRS
2452 _mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2453  return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A,
2454  (__v2df) __B,
2455  (__v2df) __W,
2456  (__mmask8) __U,
2458 }
2459 
2460 static __inline__ __m128d __DEFAULT_FN_ATTRS
2461 _mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2462  return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A,
2463  (__v2df) __B,
2464  (__v2df) _mm_setzero_pd (),
2465  (__mmask8) __U,
2467 }
2468 
2469 #define _mm_div_round_sd(A, B, R) __extension__ ({ \
2470  (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2471  (__v2df)(__m128d)(B), \
2472  (__v2df)_mm_setzero_pd(), \
2473  (__mmask8)-1, (int)(R)); })
2474 
2475 #define _mm_mask_div_round_sd(W, U, A, B, R) __extension__ ({ \
2476  (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2477  (__v2df)(__m128d)(B), \
2478  (__v2df)(__m128d)(W), \
2479  (__mmask8)(U), (int)(R)); })
2480 
2481 #define _mm_maskz_div_round_sd(U, A, B, R) __extension__ ({ \
2482  (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2483  (__v2df)(__m128d)(B), \
2484  (__v2df)_mm_setzero_pd(), \
2485  (__mmask8)(U), (int)(R)); })
2486 
2487 static __inline __m512d __DEFAULT_FN_ATTRS
2488 _mm512_div_pd(__m512d __a, __m512d __b)
2489 {
2490  return (__m512d)((__v8df)__a/(__v8df)__b);
2491 }
2492 
2493 static __inline__ __m512d __DEFAULT_FN_ATTRS
2494 _mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2495  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2496  (__v8df)_mm512_div_pd(__A, __B),
2497  (__v8df)__W);
2498 }
2499 
2500 static __inline__ __m512d __DEFAULT_FN_ATTRS
2501 _mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2502  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2503  (__v8df)_mm512_div_pd(__A, __B),
2504  (__v8df)_mm512_setzero_pd());
2505 }
2506 
2507 static __inline __m512 __DEFAULT_FN_ATTRS
2508 _mm512_div_ps(__m512 __a, __m512 __b)
2509 {
2510  return (__m512)((__v16sf)__a/(__v16sf)__b);
2511 }
2512 
2513 static __inline__ __m512 __DEFAULT_FN_ATTRS
2514 _mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2515  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2516  (__v16sf)_mm512_div_ps(__A, __B),
2517  (__v16sf)__W);
2518 }
2519 
2520 static __inline__ __m512 __DEFAULT_FN_ATTRS
2521 _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2522  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2523  (__v16sf)_mm512_div_ps(__A, __B),
2524  (__v16sf)_mm512_setzero_ps());
2525 }
2526 
2527 #define _mm512_div_round_pd(A, B, R) __extension__ ({ \
2528  (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \
2529  (__v8df)(__m512d)(B), \
2530  (__v8df)_mm512_setzero_pd(), \
2531  (__mmask8)-1, (int)(R)); })
2532 
2533 #define _mm512_mask_div_round_pd(W, U, A, B, R) __extension__ ({ \
2534  (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \
2535  (__v8df)(__m512d)(B), \
2536  (__v8df)(__m512d)(W), (__mmask8)(U), \
2537  (int)(R)); })
2538 
2539 #define _mm512_maskz_div_round_pd(U, A, B, R) __extension__ ({ \
2540  (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \
2541  (__v8df)(__m512d)(B), \
2542  (__v8df)_mm512_setzero_pd(), \
2543  (__mmask8)(U), (int)(R)); })
2544 
2545 #define _mm512_div_round_ps(A, B, R) __extension__ ({ \
2546  (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \
2547  (__v16sf)(__m512)(B), \
2548  (__v16sf)_mm512_setzero_ps(), \
2549  (__mmask16)-1, (int)(R)); })
2550 
2551 #define _mm512_mask_div_round_ps(W, U, A, B, R) __extension__ ({ \
2552  (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \
2553  (__v16sf)(__m512)(B), \
2554  (__v16sf)(__m512)(W), (__mmask16)(U), \
2555  (int)(R)); });
2556 
2557 #define _mm512_maskz_div_round_ps(U, A, B, R) __extension__ ({ \
2558  (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \
2559  (__v16sf)(__m512)(B), \
2560  (__v16sf)_mm512_setzero_ps(), \
2561  (__mmask16)(U), (int)(R)); });
2562 
2563 #define _mm512_roundscale_ps(A, B) __extension__ ({ \
2564  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \
2565  (__v16sf)(__m512)(A), (__mmask16)-1, \
2566  _MM_FROUND_CUR_DIRECTION); })
2567 
2568 #define _mm512_mask_roundscale_ps(A, B, C, imm) __extension__ ({\
2569  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2570  (__v16sf)(__m512)(A), (__mmask16)(B), \
2571  _MM_FROUND_CUR_DIRECTION); })
2572 
2573 #define _mm512_maskz_roundscale_ps(A, B, imm) __extension__ ({\
2574  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2575  (__v16sf)_mm512_setzero_ps(), \
2576  (__mmask16)(A), \
2577  _MM_FROUND_CUR_DIRECTION); })
2578 
2579 #define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) __extension__ ({ \
2580  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2581  (__v16sf)(__m512)(A), (__mmask16)(B), \
2582  (int)(R)); })
2583 
2584 #define _mm512_maskz_roundscale_round_ps(A, B, imm, R) __extension__ ({ \
2585  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2586  (__v16sf)_mm512_setzero_ps(), \
2587  (__mmask16)(A), (int)(R)); })
2588 
2589 #define _mm512_roundscale_round_ps(A, imm, R) __extension__ ({ \
2590  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \
2591  (__v16sf)_mm512_undefined_ps(), \
2592  (__mmask16)-1, (int)(R)); })
2593 
2594 #define _mm512_roundscale_pd(A, B) __extension__ ({ \
2595  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \
2596  (__v8df)(__m512d)(A), (__mmask8)-1, \
2597  _MM_FROUND_CUR_DIRECTION); })
2598 
2599 #define _mm512_mask_roundscale_pd(A, B, C, imm) __extension__ ({\
2600  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2601  (__v8df)(__m512d)(A), (__mmask8)(B), \
2602  _MM_FROUND_CUR_DIRECTION); })
2603 
2604 #define _mm512_maskz_roundscale_pd(A, B, imm) __extension__ ({\
2605  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2606  (__v8df)_mm512_setzero_pd(), \
2607  (__mmask8)(A), \
2608  _MM_FROUND_CUR_DIRECTION); })
2609 
2610 #define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) __extension__ ({ \
2611  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2612  (__v8df)(__m512d)(A), (__mmask8)(B), \
2613  (int)(R)); })
2614 
2615 #define _mm512_maskz_roundscale_round_pd(A, B, imm, R) __extension__ ({ \
2616  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2617  (__v8df)_mm512_setzero_pd(), \
2618  (__mmask8)(A), (int)(R)); })
2619 
2620 #define _mm512_roundscale_round_pd(A, imm, R) __extension__ ({ \
2621  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \
2622  (__v8df)_mm512_undefined_pd(), \
2623  (__mmask8)-1, (int)(R)); })
2624 
2625 #define _mm512_fmadd_round_pd(A, B, C, R) __extension__ ({ \
2626  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2627  (__v8df)(__m512d)(B), \
2628  (__v8df)(__m512d)(C), (__mmask8)-1, \
2629  (int)(R)); })
2630 
2631 
2632 #define _mm512_mask_fmadd_round_pd(A, U, B, C, R) __extension__ ({ \
2633  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2634  (__v8df)(__m512d)(B), \
2635  (__v8df)(__m512d)(C), \
2636  (__mmask8)(U), (int)(R)); })
2637 
2638 
2639 #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) __extension__ ({ \
2640  (__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \
2641  (__v8df)(__m512d)(B), \
2642  (__v8df)(__m512d)(C), \
2643  (__mmask8)(U), (int)(R)); })
2644 
2645 
2646 #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) __extension__ ({ \
2647  (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2648  (__v8df)(__m512d)(B), \
2649  (__v8df)(__m512d)(C), \
2650  (__mmask8)(U), (int)(R)); })
2651 
2652 
2653 #define _mm512_fmsub_round_pd(A, B, C, R) __extension__ ({ \
2654  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2655  (__v8df)(__m512d)(B), \
2656  -(__v8df)(__m512d)(C), \
2657  (__mmask8)-1, (int)(R)); })
2658 
2659 
2660 #define _mm512_mask_fmsub_round_pd(A, U, B, C, R) __extension__ ({ \
2661  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2662  (__v8df)(__m512d)(B), \
2663  -(__v8df)(__m512d)(C), \
2664  (__mmask8)(U), (int)(R)); })
2665 
2666 
2667 #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) __extension__ ({ \
2668  (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2669  (__v8df)(__m512d)(B), \
2670  -(__v8df)(__m512d)(C), \
2671  (__mmask8)(U), (int)(R)); })
2672 
2673 
2674 #define _mm512_fnmadd_round_pd(A, B, C, R) __extension__ ({ \
2675  (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2676  (__v8df)(__m512d)(B), \
2677  (__v8df)(__m512d)(C), (__mmask8)-1, \
2678  (int)(R)); })
2679 
2680 
2681 #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) __extension__ ({ \
2682  (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \
2683  (__v8df)(__m512d)(B), \
2684  (__v8df)(__m512d)(C), \
2685  (__mmask8)(U), (int)(R)); })
2686 
2687 
2688 #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) __extension__ ({ \
2689  (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2690  (__v8df)(__m512d)(B), \
2691  (__v8df)(__m512d)(C), \
2692  (__mmask8)(U), (int)(R)); })
2693 
2694 
2695 #define _mm512_fnmsub_round_pd(A, B, C, R) __extension__ ({ \
2696  (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2697  (__v8df)(__m512d)(B), \
2698  -(__v8df)(__m512d)(C), \
2699  (__mmask8)-1, (int)(R)); })
2700 
2701 
2702 #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) __extension__ ({ \
2703  (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2704  (__v8df)(__m512d)(B), \
2705  -(__v8df)(__m512d)(C), \
2706  (__mmask8)(U), (int)(R)); })
2707 
2708 
2709 static __inline__ __m512d __DEFAULT_FN_ATTRS
2710 _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
2711 {
2712  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2713  (__v8df) __B,
2714  (__v8df) __C,
2715  (__mmask8) -1,
2717 }
2718 
2719 static __inline__ __m512d __DEFAULT_FN_ATTRS
2720 _mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2721 {
2722  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2723  (__v8df) __B,
2724  (__v8df) __C,
2725  (__mmask8) __U,
2727 }
2728 
2729 static __inline__ __m512d __DEFAULT_FN_ATTRS
2730 _mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2731 {
2732  return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
2733  (__v8df) __B,
2734  (__v8df) __C,
2735  (__mmask8) __U,
2737 }
2738 
2739 static __inline__ __m512d __DEFAULT_FN_ATTRS
2740 _mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2741 {
2742  return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2743  (__v8df) __B,
2744  (__v8df) __C,
2745  (__mmask8) __U,
2747 }
2748 
2749 static __inline__ __m512d __DEFAULT_FN_ATTRS
2750 _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
2751 {
2752  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2753  (__v8df) __B,
2754  -(__v8df) __C,
2755  (__mmask8) -1,
2757 }
2758 
2759 static __inline__ __m512d __DEFAULT_FN_ATTRS
2760 _mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2761 {
2762  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2763  (__v8df) __B,
2764  -(__v8df) __C,
2765  (__mmask8) __U,
2767 }
2768 
2769 static __inline__ __m512d __DEFAULT_FN_ATTRS
2770 _mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2771 {
2772  return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2773  (__v8df) __B,
2774  -(__v8df) __C,
2775  (__mmask8) __U,
2777 }
2778 
2779 static __inline__ __m512d __DEFAULT_FN_ATTRS
2780 _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
2781 {
2782  return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
2783  (__v8df) __B,
2784  (__v8df) __C,
2785  (__mmask8) -1,
2787 }
2788 
2789 static __inline__ __m512d __DEFAULT_FN_ATTRS
2790 _mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2791 {
2792  return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
2793  (__v8df) __B,
2794  (__v8df) __C,
2795  (__mmask8) __U,
2797 }
2798 
2799 static __inline__ __m512d __DEFAULT_FN_ATTRS
2800 _mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2801 {
2802  return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2803  (__v8df) __B,
2804  (__v8df) __C,
2805  (__mmask8) __U,
2807 }
2808 
2809 static __inline__ __m512d __DEFAULT_FN_ATTRS
2810 _mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
2811 {
2812  return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
2813  (__v8df) __B,
2814  -(__v8df) __C,
2815  (__mmask8) -1,
2817 }
2818 
2819 static __inline__ __m512d __DEFAULT_FN_ATTRS
2820 _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2821 {
2822  return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2823  (__v8df) __B,
2824  -(__v8df) __C,
2825  (__mmask8) __U,
2827 }
2828 
2829 #define _mm512_fmadd_round_ps(A, B, C, R) __extension__ ({ \
2830  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2831  (__v16sf)(__m512)(B), \
2832  (__v16sf)(__m512)(C), (__mmask16)-1, \
2833  (int)(R)); })
2834 
2835 
2836 #define _mm512_mask_fmadd_round_ps(A, U, B, C, R) __extension__ ({ \
2837  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2838  (__v16sf)(__m512)(B), \
2839  (__v16sf)(__m512)(C), \
2840  (__mmask16)(U), (int)(R)); })
2841 
2842 
2843 #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) __extension__ ({ \
2844  (__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \
2845  (__v16sf)(__m512)(B), \
2846  (__v16sf)(__m512)(C), \
2847  (__mmask16)(U), (int)(R)); })
2848 
2849 
2850 #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) __extension__ ({ \
2851  (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2852  (__v16sf)(__m512)(B), \
2853  (__v16sf)(__m512)(C), \
2854  (__mmask16)(U), (int)(R)); })
2855 
2856 
2857 #define _mm512_fmsub_round_ps(A, B, C, R) __extension__ ({ \
2858  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2859  (__v16sf)(__m512)(B), \
2860  -(__v16sf)(__m512)(C), \
2861  (__mmask16)-1, (int)(R)); })
2862 
2863 
2864 #define _mm512_mask_fmsub_round_ps(A, U, B, C, R) __extension__ ({ \
2865  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2866  (__v16sf)(__m512)(B), \
2867  -(__v16sf)(__m512)(C), \
2868  (__mmask16)(U), (int)(R)); })
2869 
2870 
2871 #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) __extension__ ({ \
2872  (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2873  (__v16sf)(__m512)(B), \
2874  -(__v16sf)(__m512)(C), \
2875  (__mmask16)(U), (int)(R)); })
2876 
2877 
2878 #define _mm512_fnmadd_round_ps(A, B, C, R) __extension__ ({ \
2879  (__m512)__builtin_ia32_vfmaddps512_mask(-(__v16sf)(__m512)(A), \
2880  (__v16sf)(__m512)(B), \
2881  (__v16sf)(__m512)(C), (__mmask16)-1, \
2882  (int)(R)); })
2883 
2884 
2885 #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) __extension__ ({ \
2886  (__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \
2887  (__v16sf)(__m512)(B), \
2888  (__v16sf)(__m512)(C), \
2889  (__mmask16)(U), (int)(R)); })
2890 
2891 
2892 #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) __extension__ ({ \
2893  (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2894  (__v16sf)(__m512)(B), \
2895  (__v16sf)(__m512)(C), \
2896  (__mmask16)(U), (int)(R)); })
2897 
2898 
2899 #define _mm512_fnmsub_round_ps(A, B, C, R) __extension__ ({ \
2900  (__m512)__builtin_ia32_vfmaddps512_mask(-(__v16sf)(__m512)(A), \
2901  (__v16sf)(__m512)(B), \
2902  -(__v16sf)(__m512)(C), \
2903  (__mmask16)-1, (int)(R)); })
2904 
2905 
2906 #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) __extension__ ({ \
2907  (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2908  (__v16sf)(__m512)(B), \
2909  -(__v16sf)(__m512)(C), \
2910  (__mmask16)(U), (int)(R)); })
2911 
2912 
2913 static __inline__ __m512 __DEFAULT_FN_ATTRS
2914 _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
2915 {
2916  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2917  (__v16sf) __B,
2918  (__v16sf) __C,
2919  (__mmask16) -1,
2921 }
2922 
2923 static __inline__ __m512 __DEFAULT_FN_ATTRS
2924 _mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2925 {
2926  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2927  (__v16sf) __B,
2928  (__v16sf) __C,
2929  (__mmask16) __U,
2931 }
2932 
2933 static __inline__ __m512 __DEFAULT_FN_ATTRS
2934 _mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2935 {
2936  return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
2937  (__v16sf) __B,
2938  (__v16sf) __C,
2939  (__mmask16) __U,
2941 }
2942 
2943 static __inline__ __m512 __DEFAULT_FN_ATTRS
2944 _mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2945 {
2946  return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2947  (__v16sf) __B,
2948  (__v16sf) __C,
2949  (__mmask16) __U,
2951 }
2952 
2953 static __inline__ __m512 __DEFAULT_FN_ATTRS
2954 _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
2955 {
2956  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2957  (__v16sf) __B,
2958  -(__v16sf) __C,
2959  (__mmask16) -1,
2961 }
2962 
2963 static __inline__ __m512 __DEFAULT_FN_ATTRS
2964 _mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2965 {
2966  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2967  (__v16sf) __B,
2968  -(__v16sf) __C,
2969  (__mmask16) __U,
2971 }
2972 
2973 static __inline__ __m512 __DEFAULT_FN_ATTRS
2974 _mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2975 {
2976  return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2977  (__v16sf) __B,
2978  -(__v16sf) __C,
2979  (__mmask16) __U,
2981 }
2982 
2983 static __inline__ __m512 __DEFAULT_FN_ATTRS
2984 _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
2985 {
2986  return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
2987  (__v16sf) __B,
2988  (__v16sf) __C,
2989  (__mmask16) -1,
2991 }
2992 
2993 static __inline__ __m512 __DEFAULT_FN_ATTRS
2994 _mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2995 {
2996  return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
2997  (__v16sf) __B,
2998  (__v16sf) __C,
2999  (__mmask16) __U,
3001 }
3002 
3003 static __inline__ __m512 __DEFAULT_FN_ATTRS
3004 _mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3005 {
3006  return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3007  (__v16sf) __B,
3008  (__v16sf) __C,
3009  (__mmask16) __U,
3011 }
3012 
3013 static __inline__ __m512 __DEFAULT_FN_ATTRS
3014 _mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
3015 {
3016  return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
3017  (__v16sf) __B,
3018  -(__v16sf) __C,
3019  (__mmask16) -1,
3021 }
3022 
3023 static __inline__ __m512 __DEFAULT_FN_ATTRS
3024 _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3025 {
3026  return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3027  (__v16sf) __B,
3028  -(__v16sf) __C,
3029  (__mmask16) __U,
3031 }
3032 
3033 #define _mm512_fmaddsub_round_pd(A, B, C, R) __extension__ ({ \
3034  (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
3035  (__v8df)(__m512d)(B), \
3036  (__v8df)(__m512d)(C), \
3037  (__mmask8)-1, (int)(R)); })
3038 
3039 
3040 #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) __extension__ ({ \
3041  (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
3042  (__v8df)(__m512d)(B), \
3043  (__v8df)(__m512d)(C), \
3044  (__mmask8)(U), (int)(R)); })
3045 
3046 
3047 #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) __extension__ ({ \
3048  (__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \
3049  (__v8df)(__m512d)(B), \
3050  (__v8df)(__m512d)(C), \
3051  (__mmask8)(U), (int)(R)); })
3052 
3053 
3054 #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) __extension__ ({ \
3055  (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
3056  (__v8df)(__m512d)(B), \
3057  (__v8df)(__m512d)(C), \
3058  (__mmask8)(U), (int)(R)); })
3059 
3060 
3061 #define _mm512_fmsubadd_round_pd(A, B, C, R) __extension__ ({ \
3062  (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
3063  (__v8df)(__m512d)(B), \
3064  -(__v8df)(__m512d)(C), \
3065  (__mmask8)-1, (int)(R)); })
3066 
3067 
3068 #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) __extension__ ({ \
3069  (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
3070  (__v8df)(__m512d)(B), \
3071  -(__v8df)(__m512d)(C), \
3072  (__mmask8)(U), (int)(R)); })
3073 
3074 
3075 #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) __extension__ ({ \
3076  (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
3077  (__v8df)(__m512d)(B), \
3078  -(__v8df)(__m512d)(C), \
3079  (__mmask8)(U), (int)(R)); })
3080 
3081 
3082 static __inline__ __m512d __DEFAULT_FN_ATTRS
3083 _mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
3084 {
3085  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3086  (__v8df) __B,
3087  (__v8df) __C,
3088  (__mmask8) -1,
3090 }
3091 
3092 static __inline__ __m512d __DEFAULT_FN_ATTRS
3093 _mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3094 {
3095  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3096  (__v8df) __B,
3097  (__v8df) __C,
3098  (__mmask8) __U,
3100 }
3101 
3102 static __inline__ __m512d __DEFAULT_FN_ATTRS
3103 _mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3104 {
3105  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
3106  (__v8df) __B,
3107  (__v8df) __C,
3108  (__mmask8) __U,
3110 }
3111 
3112 static __inline__ __m512d __DEFAULT_FN_ATTRS
3113 _mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
3114 {
3115  return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3116  (__v8df) __B,
3117  (__v8df) __C,
3118  (__mmask8) __U,
3120 }
3121 
3122 static __inline__ __m512d __DEFAULT_FN_ATTRS
3123 _mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
3124 {
3125  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3126  (__v8df) __B,
3127  -(__v8df) __C,
3128  (__mmask8) -1,
3130 }
3131 
3132 static __inline__ __m512d __DEFAULT_FN_ATTRS
3133 _mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3134 {
3135  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3136  (__v8df) __B,
3137  -(__v8df) __C,
3138  (__mmask8) __U,
3140 }
3141 
3142 static __inline__ __m512d __DEFAULT_FN_ATTRS
3143 _mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
3144 {
3145  return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3146  (__v8df) __B,
3147  -(__v8df) __C,
3148  (__mmask8) __U,
3150 }
3151 
3152 #define _mm512_fmaddsub_round_ps(A, B, C, R) __extension__ ({ \
3153  (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
3154  (__v16sf)(__m512)(B), \
3155  (__v16sf)(__m512)(C), \
3156  (__mmask16)-1, (int)(R)); })
3157 
3158 
3159 #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) __extension__ ({ \
3160  (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
3161  (__v16sf)(__m512)(B), \
3162  (__v16sf)(__m512)(C), \
3163  (__mmask16)(U), (int)(R)); })
3164 
3165 
3166 #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) __extension__ ({ \
3167  (__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \
3168  (__v16sf)(__m512)(B), \
3169  (__v16sf)(__m512)(C), \
3170  (__mmask16)(U), (int)(R)); })
3171 
3172 
3173 #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) __extension__ ({ \
3174  (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
3175  (__v16sf)(__m512)(B), \
3176  (__v16sf)(__m512)(C), \
3177  (__mmask16)(U), (int)(R)); })
3178 
3179 
3180 #define _mm512_fmsubadd_round_ps(A, B, C, R) __extension__ ({ \
3181  (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
3182  (__v16sf)(__m512)(B), \
3183  -(__v16sf)(__m512)(C), \
3184  (__mmask16)-1, (int)(R)); })
3185 
3186 
3187 #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) __extension__ ({ \
3188  (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
3189  (__v16sf)(__m512)(B), \
3190  -(__v16sf)(__m512)(C), \
3191  (__mmask16)(U), (int)(R)); })
3192 
3193 
3194 #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) __extension__ ({ \
3195  (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
3196  (__v16sf)(__m512)(B), \
3197  -(__v16sf)(__m512)(C), \
3198  (__mmask16)(U), (int)(R)); })
3199 
3200 
3201 static __inline__ __m512 __DEFAULT_FN_ATTRS
3202 _mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
3203 {
3204  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3205  (__v16sf) __B,
3206  (__v16sf) __C,
3207  (__mmask16) -1,
3209 }
3210 
3211 static __inline__ __m512 __DEFAULT_FN_ATTRS
3212 _mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3213 {
3214  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3215  (__v16sf) __B,
3216  (__v16sf) __C,
3217  (__mmask16) __U,
3219 }
3220 
3221 static __inline__ __m512 __DEFAULT_FN_ATTRS
3222 _mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3223 {
3224  return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
3225  (__v16sf) __B,
3226  (__v16sf) __C,
3227  (__mmask16) __U,
3229 }
3230 
3231 static __inline__ __m512 __DEFAULT_FN_ATTRS
3232 _mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3233 {
3234  return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3235  (__v16sf) __B,
3236  (__v16sf) __C,
3237  (__mmask16) __U,
3239 }
3240 
3241 static __inline__ __m512 __DEFAULT_FN_ATTRS
3242 _mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
3243 {
3244  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3245  (__v16sf) __B,
3246  -(__v16sf) __C,
3247  (__mmask16) -1,
3249 }
3250 
3251 static __inline__ __m512 __DEFAULT_FN_ATTRS
3252 _mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3253 {
3254  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3255  (__v16sf) __B,
3256  -(__v16sf) __C,
3257  (__mmask16) __U,
3259 }
3260 
3261 static __inline__ __m512 __DEFAULT_FN_ATTRS
3262 _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3263 {
3264  return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3265  (__v16sf) __B,
3266  -(__v16sf) __C,
3267  (__mmask16) __U,
3269 }
3270 
3271 #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) __extension__ ({ \
3272  (__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \
3273  (__v8df)(__m512d)(B), \
3274  (__v8df)(__m512d)(C), \
3275  (__mmask8)(U), (int)(R)); })
3276 
3277 
3278 static __inline__ __m512d __DEFAULT_FN_ATTRS
3279 _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3280 {
3281  return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
3282  (__v8df) __B,
3283  (__v8df) __C,
3284  (__mmask8) __U,
3286 }
3287 
3288 #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) __extension__ ({ \
3289  (__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \
3290  (__v16sf)(__m512)(B), \
3291  (__v16sf)(__m512)(C), \
3292  (__mmask16)(U), (int)(R)); })
3293 
3294 
3295 static __inline__ __m512 __DEFAULT_FN_ATTRS
3296 _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3297 {
3298  return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
3299  (__v16sf) __B,
3300  (__v16sf) __C,
3301  (__mmask16) __U,
3303 }
3304 
3305 #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) __extension__ ({ \
3306  (__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \
3307  (__v8df)(__m512d)(B), \
3308  (__v8df)(__m512d)(C), \
3309  (__mmask8)(U), (int)(R)); })
3310 
3311 
3312 static __inline__ __m512d __DEFAULT_FN_ATTRS
3313 _mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3314 {
3315  return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
3316  (__v8df) __B,
3317  (__v8df) __C,
3318  (__mmask8) __U,
3320 }
3321 
3322 #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) __extension__ ({ \
3323  (__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \
3324  (__v16sf)(__m512)(B), \
3325  (__v16sf)(__m512)(C), \
3326  (__mmask16)(U), (int)(R)); })
3327 
3328 
3329 static __inline__ __m512 __DEFAULT_FN_ATTRS
3330 _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3331 {
3332  return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3333  (__v16sf) __B,
3334  (__v16sf) __C,
3335  (__mmask16) __U,
3337 }
3338 
3339 #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) __extension__ ({ \
3340  (__m512d)__builtin_ia32_vfnmaddpd512_mask((__v8df)(__m512d)(A), \
3341  (__v8df)(__m512d)(B), \
3342  (__v8df)(__m512d)(C), \
3343  (__mmask8)(U), (int)(R)); })
3344 
3345 
3346 static __inline__ __m512d __DEFAULT_FN_ATTRS
3347 _mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3348 {
3349  return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
3350  (__v8df) __B,
3351  (__v8df) __C,
3352  (__mmask8) __U,
3354 }
3355 
3356 #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) __extension__ ({ \
3357  (__m512)__builtin_ia32_vfnmaddps512_mask((__v16sf)(__m512)(A), \
3358  (__v16sf)(__m512)(B), \
3359  (__v16sf)(__m512)(C), \
3360  (__mmask16)(U), (int)(R)); })
3361 
3362 
3363 static __inline__ __m512 __DEFAULT_FN_ATTRS
3364 _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3365 {
3366  return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
3367  (__v16sf) __B,
3368  (__v16sf) __C,
3369  (__mmask16) __U,
3371 }
3372 
3373 #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) __extension__ ({ \
3374  (__m512d)__builtin_ia32_vfnmsubpd512_mask((__v8df)(__m512d)(A), \
3375  (__v8df)(__m512d)(B), \
3376  (__v8df)(__m512d)(C), \
3377  (__mmask8)(U), (int)(R)); })
3378 
3379 
3380 #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) __extension__ ({ \
3381  (__m512d)__builtin_ia32_vfnmsubpd512_mask3((__v8df)(__m512d)(A), \
3382  (__v8df)(__m512d)(B), \
3383  (__v8df)(__m512d)(C), \
3384  (__mmask8)(U), (int)(R)); })
3385 
3386 
3387 static __inline__ __m512d __DEFAULT_FN_ATTRS
3388 _mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3389 {
3390  return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
3391  (__v8df) __B,
3392  (__v8df) __C,
3393  (__mmask8) __U,
3395 }
3396 
3397 static __inline__ __m512d __DEFAULT_FN_ATTRS
3398 _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3399 {
3400  return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
3401  (__v8df) __B,
3402  (__v8df) __C,
3403  (__mmask8) __U,
3405 }
3406 
3407 #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) __extension__ ({ \
3408  (__m512)__builtin_ia32_vfnmsubps512_mask((__v16sf)(__m512)(A), \
3409  (__v16sf)(__m512)(B), \
3410  (__v16sf)(__m512)(C), \
3411  (__mmask16)(U), (int)(R)); })
3412 
3413 
3414 #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) __extension__ ({ \
3415  (__m512)__builtin_ia32_vfnmsubps512_mask3((__v16sf)(__m512)(A), \
3416  (__v16sf)(__m512)(B), \
3417  (__v16sf)(__m512)(C), \
3418  (__mmask16)(U), (int)(R)); })
3419 
3420 
3421 static __inline__ __m512 __DEFAULT_FN_ATTRS
3422 _mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3423 {
3424  return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
3425  (__v16sf) __B,
3426  (__v16sf) __C,
3427  (__mmask16) __U,
3429 }
3430 
3431 static __inline__ __m512 __DEFAULT_FN_ATTRS
3432 _mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3433 {
3434  return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
3435  (__v16sf) __B,
3436  (__v16sf) __C,
3437  (__mmask16) __U,
3439 }
3440 
3441 
3442 
3443 /* Vector permutations */
3444 
3445 static __inline __m512i __DEFAULT_FN_ATTRS
3446 _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
3447 {
3448  return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
3449  /* idx */ ,
3450  (__v16si) __A,
3451  (__v16si) __B,
3452  (__mmask16) -1);
3453 }
3454 
3455 static __inline__ __m512i __DEFAULT_FN_ATTRS
3456 _mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U,
3457  __m512i __I, __m512i __B)
3458 {
3459  return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
3460  /* idx */ ,
3461  (__v16si) __A,
3462  (__v16si) __B,
3463  (__mmask16) __U);
3464 }
3465 
3466 static __inline__ __m512i __DEFAULT_FN_ATTRS
3467 _mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A,
3468  __m512i __I, __m512i __B)
3469 {
3470  return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I
3471  /* idx */ ,
3472  (__v16si) __A,
3473  (__v16si) __B,
3474  (__mmask16) __U);
3475 }
3476 
3477 static __inline __m512i __DEFAULT_FN_ATTRS
3478 _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
3479 {
3480  return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
3481  /* idx */ ,
3482  (__v8di) __A,
3483  (__v8di) __B,
3484  (__mmask8) -1);
3485 }
3486 
3487 static __inline__ __m512i __DEFAULT_FN_ATTRS
3488 _mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I,
3489  __m512i __B)
3490 {
3491  return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
3492  /* idx */ ,
3493  (__v8di) __A,
3494  (__v8di) __B,
3495  (__mmask8) __U);
3496 }
3497 
3498 
3499 static __inline__ __m512i __DEFAULT_FN_ATTRS
3500 _mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
3501  __m512i __I, __m512i __B)
3502 {
3503  return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I
3504  /* idx */ ,
3505  (__v8di) __A,
3506  (__v8di) __B,
3507  (__mmask8) __U);
3508 }
3509 
3510 #define _mm512_alignr_epi64(A, B, I) __extension__ ({ \
3511  (__m512i)__builtin_shufflevector((__v8di)(__m512i)(B), \
3512  (__v8di)(__m512i)(A), \
3513  ((int)(I) & 0x7) + 0, \
3514  ((int)(I) & 0x7) + 1, \
3515  ((int)(I) & 0x7) + 2, \
3516  ((int)(I) & 0x7) + 3, \
3517  ((int)(I) & 0x7) + 4, \
3518  ((int)(I) & 0x7) + 5, \
3519  ((int)(I) & 0x7) + 6, \
3520  ((int)(I) & 0x7) + 7); })
3521 
3522 #define _mm512_mask_alignr_epi64(W, U, A, B, imm) __extension__({\
3523  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3524  (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3525  (__v8di)(__m512i)(W)); })
3526 
3527 #define _mm512_maskz_alignr_epi64(U, A, B, imm) __extension__({\
3528  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3529  (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3530  (__v8di)_mm512_setzero_si512()); })
3531 
3532 #define _mm512_alignr_epi32(A, B, I) __extension__ ({ \
3533  (__m512i)__builtin_shufflevector((__v16si)(__m512i)(B), \
3534  (__v16si)(__m512i)(A), \
3535  ((int)(I) & 0xf) + 0, \
3536  ((int)(I) & 0xf) + 1, \
3537  ((int)(I) & 0xf) + 2, \
3538  ((int)(I) & 0xf) + 3, \
3539  ((int)(I) & 0xf) + 4, \
3540  ((int)(I) & 0xf) + 5, \
3541  ((int)(I) & 0xf) + 6, \
3542  ((int)(I) & 0xf) + 7, \
3543  ((int)(I) & 0xf) + 8, \
3544  ((int)(I) & 0xf) + 9, \
3545  ((int)(I) & 0xf) + 10, \
3546  ((int)(I) & 0xf) + 11, \
3547  ((int)(I) & 0xf) + 12, \
3548  ((int)(I) & 0xf) + 13, \
3549  ((int)(I) & 0xf) + 14, \
3550  ((int)(I) & 0xf) + 15); })
3551 
3552 #define _mm512_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({\
3553  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3554  (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3555  (__v16si)(__m512i)(W)); })
3556 
3557 #define _mm512_maskz_alignr_epi32(U, A, B, imm) __extension__({\
3558  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3559  (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3560  (__v16si)_mm512_setzero_si512()); })
3561 /* Vector Extract */
3562 
3563 #define _mm512_extractf64x4_pd(A, I) __extension__ ({ \
3564  (__m256d)__builtin_shufflevector((__v8df)(__m512d)(A), \
3565  (__v8df)_mm512_undefined_pd(), \
3566  ((I) & 1) ? 4 : 0, \
3567  ((I) & 1) ? 5 : 1, \
3568  ((I) & 1) ? 6 : 2, \
3569  ((I) & 1) ? 7 : 3); })
3570 
3571 #define _mm512_mask_extractf64x4_pd(W, U, A, imm) __extension__ ({\
3572  (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
3573  (__v4df)_mm512_extractf64x4_pd((A), (imm)), \
3574  (__v4df)(W)); })
3575 
3576 #define _mm512_maskz_extractf64x4_pd(U, A, imm) __extension__ ({\
3577  (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
3578  (__v4df)_mm512_extractf64x4_pd((A), (imm)), \
3579  (__v4df)_mm256_setzero_pd()); })
3580 
3581 #define _mm512_extractf32x4_ps(A, I) __extension__ ({ \
3582  (__m128)__builtin_shufflevector((__v16sf)(__m512)(A), \
3583  (__v16sf)_mm512_undefined_ps(), \
3584  0 + ((I) & 0x3) * 4, \
3585  1 + ((I) & 0x3) * 4, \
3586  2 + ((I) & 0x3) * 4, \
3587  3 + ((I) & 0x3) * 4); })
3588 
3589 #define _mm512_mask_extractf32x4_ps(W, U, A, imm) __extension__ ({\
3590  (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
3591  (__v4sf)_mm512_extractf32x4_ps((A), (imm)), \
3592  (__v4sf)(W)); })
3593 
3594 #define _mm512_maskz_extractf32x4_ps(U, A, imm) __extension__ ({\
3595  (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
3596  (__v4sf)_mm512_extractf32x4_ps((A), (imm)), \
3597  (__v4sf)_mm_setzero_ps()); })
3598 
3599 /* Vector Blend */
3600 
3601 static __inline __m512d __DEFAULT_FN_ATTRS
3602 _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
3603 {
3604  return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
3605  (__v8df) __W,
3606  (__v8df) __A);
3607 }
3608 
3609 static __inline __m512 __DEFAULT_FN_ATTRS
3610 _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
3611 {
3612  return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
3613  (__v16sf) __W,
3614  (__v16sf) __A);
3615 }
3616 
3617 static __inline __m512i __DEFAULT_FN_ATTRS
3618 _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
3619 {
3620  return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
3621  (__v8di) __W,
3622  (__v8di) __A);
3623 }
3624 
3625 static __inline __m512i __DEFAULT_FN_ATTRS
3626 _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
3627 {
3628  return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
3629  (__v16si) __W,
3630  (__v16si) __A);
3631 }
3632 
3633 /* Compare */
3634 
3635 #define _mm512_cmp_round_ps_mask(A, B, P, R) __extension__ ({ \
3636  (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3637  (__v16sf)(__m512)(B), (int)(P), \
3638  (__mmask16)-1, (int)(R)); })
3639 
3640 #define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) __extension__ ({ \
3641  (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3642  (__v16sf)(__m512)(B), (int)(P), \
3643  (__mmask16)(U), (int)(R)); })
3644 
3645 #define _mm512_cmp_ps_mask(A, B, P) \
3646  _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3647 #define _mm512_mask_cmp_ps_mask(U, A, B, P) \
3648  _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3649 
3650 #define _mm512_cmpeq_ps_mask(A, B) \
3651  _mm512_cmp_ps_mask((A), (B), _CMP_EQ_OQ)
3652 #define _mm512_mask_cmpeq_ps_mask(k, A, B) \
3653  _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_EQ_OQ)
3654 
3655 #define _mm512_cmplt_ps_mask(A, B) \
3656  _mm512_cmp_ps_mask((A), (B), _CMP_LT_OS)
3657 #define _mm512_mask_cmplt_ps_mask(k, A, B) \
3658  _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LT_OS)
3659 
3660 #define _mm512_cmple_ps_mask(A, B) \
3661  _mm512_cmp_ps_mask((A), (B), _CMP_LE_OS)
3662 #define _mm512_mask_cmple_ps_mask(k, A, B) \
3663  _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LE_OS)
3664 
3665 #define _mm512_cmpunord_ps_mask(A, B) \
3666  _mm512_cmp_ps_mask((A), (B), _CMP_UNORD_Q)
3667 #define _mm512_mask_cmpunord_ps_mask(k, A, B) \
3668  _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_UNORD_Q)
3669 
3670 #define _mm512_cmpneq_ps_mask(A, B) \
3671  _mm512_cmp_ps_mask((A), (B), _CMP_NEQ_UQ)
3672 #define _mm512_mask_cmpneq_ps_mask(k, A, B) \
3673  _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NEQ_UQ)
3674 
3675 #define _mm512_cmpnlt_ps_mask(A, B) \
3676  _mm512_cmp_ps_mask((A), (B), _CMP_NLT_US)
3677 #define _mm512_mask_cmpnlt_ps_mask(k, A, B) \
3678  _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLT_US)
3679 
3680 #define _mm512_cmpnle_ps_mask(A, B) \
3681  _mm512_cmp_ps_mask((A), (B), _CMP_NLE_US)
3682 #define _mm512_mask_cmpnle_ps_mask(k, A, B) \
3683  _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLE_US)
3684 
3685 #define _mm512_cmpord_ps_mask(A, B) \
3686  _mm512_cmp_ps_mask((A), (B), _CMP_ORD_Q)
3687 #define _mm512_mask_cmpord_ps_mask(k, A, B) \
3688  _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q)
3689 
3690 #define _mm512_cmp_round_pd_mask(A, B, P, R) __extension__ ({ \
3691  (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3692  (__v8df)(__m512d)(B), (int)(P), \
3693  (__mmask8)-1, (int)(R)); })
3694 
3695 #define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) __extension__ ({ \
3696  (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3697  (__v8df)(__m512d)(B), (int)(P), \
3698  (__mmask8)(U), (int)(R)); })
3699 
3700 #define _mm512_cmp_pd_mask(A, B, P) \
3701  _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3702 #define _mm512_mask_cmp_pd_mask(U, A, B, P) \
3703  _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3704 
3705 #define _mm512_cmpeq_pd_mask(A, B) \
3706  _mm512_cmp_pd_mask((A), (B), _CMP_EQ_OQ)
3707 #define _mm512_mask_cmpeq_pd_mask(k, A, B) \
3708  _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_EQ_OQ)
3709 
3710 #define _mm512_cmplt_pd_mask(A, B) \
3711  _mm512_cmp_pd_mask((A), (B), _CMP_LT_OS)
3712 #define _mm512_mask_cmplt_pd_mask(k, A, B) \
3713  _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LT_OS)
3714 
3715 #define _mm512_cmple_pd_mask(A, B) \
3716  _mm512_cmp_pd_mask((A), (B), _CMP_LE_OS)
3717 #define _mm512_mask_cmple_pd_mask(k, A, B) \
3718  _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LE_OS)
3719 
3720 #define _mm512_cmpunord_pd_mask(A, B) \
3721  _mm512_cmp_pd_mask((A), (B), _CMP_UNORD_Q)
3722 #define _mm512_mask_cmpunord_pd_mask(k, A, B) \
3723  _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_UNORD_Q)
3724 
3725 #define _mm512_cmpneq_pd_mask(A, B) \
3726  _mm512_cmp_pd_mask((A), (B), _CMP_NEQ_UQ)
3727 #define _mm512_mask_cmpneq_pd_mask(k, A, B) \
3728  _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NEQ_UQ)
3729 
3730 #define _mm512_cmpnlt_pd_mask(A, B) \
3731  _mm512_cmp_pd_mask((A), (B), _CMP_NLT_US)
3732 #define _mm512_mask_cmpnlt_pd_mask(k, A, B) \
3733  _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLT_US)
3734 
3735 #define _mm512_cmpnle_pd_mask(A, B) \
3736  _mm512_cmp_pd_mask((A), (B), _CMP_NLE_US)
3737 #define _mm512_mask_cmpnle_pd_mask(k, A, B) \
3738  _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLE_US)
3739 
3740 #define _mm512_cmpord_pd_mask(A, B) \
3741  _mm512_cmp_pd_mask((A), (B), _CMP_ORD_Q)
3742 #define _mm512_mask_cmpord_pd_mask(k, A, B) \
3743  _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_ORD_Q)
3744 
3745 /* Conversion */
3746 
3747 #define _mm512_cvtt_roundps_epu32(A, R) __extension__ ({ \
3748  (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3749  (__v16si)_mm512_undefined_epi32(), \
3750  (__mmask16)-1, (int)(R)); })
3751 
3752 #define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) __extension__ ({ \
3753  (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3754  (__v16si)(__m512i)(W), \
3755  (__mmask16)(U), (int)(R)); })
3756 
3757 #define _mm512_maskz_cvtt_roundps_epu32(U, A, R) __extension__ ({ \
3758  (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3759  (__v16si)_mm512_setzero_si512(), \
3760  (__mmask16)(U), (int)(R)); })
3761 
3762 
3763 static __inline __m512i __DEFAULT_FN_ATTRS
3765 {
3766  return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3767  (__v16si)
3769  (__mmask16) -1,
3771 }
3772 
3773 static __inline__ __m512i __DEFAULT_FN_ATTRS
3774 _mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
3775 {
3776  return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3777  (__v16si) __W,
3778  (__mmask16) __U,
3780 }
3781 
3782 static __inline__ __m512i __DEFAULT_FN_ATTRS
3783 _mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
3784 {
3785  return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3786  (__v16si) _mm512_setzero_si512 (),
3787  (__mmask16) __U,
3789 }
3790 
3791 #define _mm512_cvt_roundepi32_ps(A, R) __extension__ ({ \
3792  (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3793  (__v16sf)_mm512_setzero_ps(), \
3794  (__mmask16)-1, (int)(R)); })
3795 
3796 #define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) __extension__ ({ \
3797  (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3798  (__v16sf)(__m512)(W), \
3799  (__mmask16)(U), (int)(R)); })
3800 
3801 #define _mm512_maskz_cvt_roundepi32_ps(U, A, R) __extension__ ({ \
3802  (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3803  (__v16sf)_mm512_setzero_ps(), \
3804  (__mmask16)(U), (int)(R)); })
3805 
3806 #define _mm512_cvt_roundepu32_ps(A, R) __extension__ ({ \
3807  (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3808  (__v16sf)_mm512_setzero_ps(), \
3809  (__mmask16)-1, (int)(R)); })
3810 
3811 #define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) __extension__ ({ \
3812  (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3813  (__v16sf)(__m512)(W), \
3814  (__mmask16)(U), (int)(R)); })
3815 
3816 #define _mm512_maskz_cvt_roundepu32_ps(U, A, R) __extension__ ({ \
3817  (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3818  (__v16sf)_mm512_setzero_ps(), \
3819  (__mmask16)(U), (int)(R)); })
3820 
3821 static __inline__ __m512 __DEFAULT_FN_ATTRS
3822 _mm512_cvtepu32_ps (__m512i __A)
3823 {
3824  return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
3825  (__v16sf) _mm512_undefined_ps (),
3826  (__mmask16) -1,
3828 }
3829 
3830 static __inline__ __m512 __DEFAULT_FN_ATTRS
3831 _mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
3832 {
3833  return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
3834  (__v16sf) __W,
3835  (__mmask16) __U,
3837 }
3838 
3839 static __inline__ __m512 __DEFAULT_FN_ATTRS
3840 _mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
3841 {
3842  return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
3843  (__v16sf) _mm512_setzero_ps (),
3844  (__mmask16) __U,
3846 }
3847 
3848 static __inline __m512d __DEFAULT_FN_ATTRS
3850 {
3851  return (__m512d)__builtin_convertvector((__v8si)__A, __v8df);
3852 }
3853 
3854 static __inline__ __m512d __DEFAULT_FN_ATTRS
3855 _mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
3856 {
3857  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3858  (__v8df)_mm512_cvtepi32_pd(__A),
3859  (__v8df)__W);
3860 }
3861 
3862 static __inline__ __m512d __DEFAULT_FN_ATTRS
3863 _mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
3864 {
3865  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3866  (__v8df)_mm512_cvtepi32_pd(__A),
3867  (__v8df)_mm512_setzero_pd());
3868 }
3869 
3870 static __inline__ __m512d __DEFAULT_FN_ATTRS
3872 {
3873  return (__m512d) _mm512_cvtepi32_pd(_mm512_castsi512_si256(__A));
3874 }
3875 
3876 static __inline__ __m512d __DEFAULT_FN_ATTRS
3877 _mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
3878 {
3879  return (__m512d) _mm512_mask_cvtepi32_pd(__W, __U, _mm512_castsi512_si256(__A));
3880 }
3881 
3882 static __inline__ __m512 __DEFAULT_FN_ATTRS
3883 _mm512_cvtepi32_ps (__m512i __A)
3884 {
3885  return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
3886  (__v16sf) _mm512_undefined_ps (),
3887  (__mmask16) -1,
3889 }
3890 
3891 static __inline__ __m512 __DEFAULT_FN_ATTRS
3892 _mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
3893 {
3894  return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
3895  (__v16sf) __W,
3896  (__mmask16) __U,
3898 }
3899 
3900 static __inline__ __m512 __DEFAULT_FN_ATTRS
3901 _mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
3902 {
3903  return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
3904  (__v16sf) _mm512_setzero_ps (),
3905  (__mmask16) __U,
3907 }
3908 
3909 static __inline __m512d __DEFAULT_FN_ATTRS
3911 {
3912  return (__m512d)__builtin_convertvector((__v8su)__A, __v8df);
3913 }
3914 
3915 static __inline__ __m512d __DEFAULT_FN_ATTRS
3916 _mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
3917 {
3918  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3919  (__v8df)_mm512_cvtepu32_pd(__A),
3920  (__v8df)__W);
3921 }
3922 
3923 static __inline__ __m512d __DEFAULT_FN_ATTRS
3924 _mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
3925 {
3926  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3927  (__v8df)_mm512_cvtepu32_pd(__A),
3928  (__v8df)_mm512_setzero_pd());
3929 }
3930 
3931 static __inline__ __m512d __DEFAULT_FN_ATTRS
3933 {
3934  return (__m512d) _mm512_cvtepu32_pd(_mm512_castsi512_si256(__A));
3935 }
3936 
3937 static __inline__ __m512d __DEFAULT_FN_ATTRS
3938 _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
3939 {
3940  return (__m512d) _mm512_mask_cvtepu32_pd(__W, __U, _mm512_castsi512_si256(__A));
3941 }
3942 
3943 #define _mm512_cvt_roundpd_ps(A, R) __extension__ ({ \
3944  (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3945  (__v8sf)_mm256_setzero_ps(), \
3946  (__mmask8)-1, (int)(R)); })
3947 
3948 #define _mm512_mask_cvt_roundpd_ps(W, U, A, R) __extension__ ({ \
3949  (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3950  (__v8sf)(__m256)(W), (__mmask8)(U), \
3951  (int)(R)); })
3952 
3953 #define _mm512_maskz_cvt_roundpd_ps(U, A, R) __extension__ ({ \
3954  (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3955  (__v8sf)_mm256_setzero_ps(), \
3956  (__mmask8)(U), (int)(R)); })
3957 
3958 static __inline__ __m256 __DEFAULT_FN_ATTRS
3959 _mm512_cvtpd_ps (__m512d __A)
3960 {
3961  return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3962  (__v8sf) _mm256_undefined_ps (),
3963  (__mmask8) -1,
3965 }
3966 
3967 static __inline__ __m256 __DEFAULT_FN_ATTRS
3968 _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
3969 {
3970  return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3971  (__v8sf) __W,
3972  (__mmask8) __U,
3974 }
3975 
3976 static __inline__ __m256 __DEFAULT_FN_ATTRS
3977 _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
3978 {
3979  return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3980  (__v8sf) _mm256_setzero_ps (),
3981  (__mmask8) __U,
3983 }
3984 
3985 static __inline__ __m512 __DEFAULT_FN_ATTRS
3986 _mm512_cvtpd_pslo (__m512d __A)
3987 {
3988  return (__m512) __builtin_shufflevector((__v8sf) _mm512_cvtpd_ps(__A),
3989  (__v8sf) _mm256_setzero_ps (),
3990  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3991 }
3992 
3993 static __inline__ __m512 __DEFAULT_FN_ATTRS
3994 _mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A)
3995 {
3996  return (__m512) __builtin_shufflevector (
3998  __U, __A),
3999  (__v8sf) _mm256_setzero_ps (),
4000  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
4001 }
4002 
4003 #define _mm512_cvt_roundps_ph(A, I) __extension__ ({ \
4004  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
4005  (__v16hi)_mm256_undefined_si256(), \
4006  (__mmask16)-1); })
4007 
4008 #define _mm512_mask_cvt_roundps_ph(U, W, A, I) __extension__ ({ \
4009  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
4010  (__v16hi)(__m256i)(U), \
4011  (__mmask16)(W)); })
4012 
4013 #define _mm512_maskz_cvt_roundps_ph(W, A, I) __extension__ ({ \
4014  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
4015  (__v16hi)_mm256_setzero_si256(), \
4016  (__mmask16)(W)); })
4017 
4018 #define _mm512_cvtps_ph(A, I) __extension__ ({ \
4019  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
4020  (__v16hi)_mm256_setzero_si256(), \
4021  (__mmask16)-1); })
4022 
4023 #define _mm512_mask_cvtps_ph(U, W, A, I) __extension__ ({ \
4024  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
4025  (__v16hi)(__m256i)(U), \
4026  (__mmask16)(W)); })
4027 
4028 #define _mm512_maskz_cvtps_ph(W, A, I) __extension__ ({\
4029  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
4030  (__v16hi)_mm256_setzero_si256(), \
4031  (__mmask16)(W)); })
4032 
4033 #define _mm512_cvt_roundph_ps(A, R) __extension__ ({ \
4034  (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
4035  (__v16sf)_mm512_undefined_ps(), \
4036  (__mmask16)-1, (int)(R)); })
4037 
4038 #define _mm512_mask_cvt_roundph_ps(W, U, A, R) __extension__ ({ \
4039  (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
4040  (__v16sf)(__m512)(W), \
4041  (__mmask16)(U), (int)(R)); })
4042 
4043 #define _mm512_maskz_cvt_roundph_ps(U, A, R) __extension__ ({ \
4044  (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
4045  (__v16sf)_mm512_setzero_ps(), \
4046  (__mmask16)(U), (int)(R)); })
4047 
4048 
4049 static __inline __m512 __DEFAULT_FN_ATTRS
4050 _mm512_cvtph_ps(__m256i __A)
4051 {
4052  return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
4053  (__v16sf)
4054  _mm512_setzero_ps (),
4055  (__mmask16) -1,
4057 }
4058 
4059 static __inline__ __m512 __DEFAULT_FN_ATTRS
4060 _mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
4061 {
4062  return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
4063  (__v16sf) __W,
4064  (__mmask16) __U,
4066 }
4067 
4068 static __inline__ __m512 __DEFAULT_FN_ATTRS
4069 _mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
4070 {
4071  return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
4072  (__v16sf) _mm512_setzero_ps (),
4073  (__mmask16) __U,
4075 }
4076 
4077 #define _mm512_cvtt_roundpd_epi32(A, R) __extension__ ({ \
4078  (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
4079  (__v8si)_mm256_setzero_si256(), \
4080  (__mmask8)-1, (int)(R)); })
4081 
4082 #define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) __extension__ ({ \
4083  (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
4084  (__v8si)(__m256i)(W), \
4085  (__mmask8)(U), (int)(R)); })
4086 
4087 #define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) __extension__ ({ \
4088  (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
4089  (__v8si)_mm256_setzero_si256(), \
4090  (__mmask8)(U), (int)(R)); })
4091 
4092 static __inline __m256i __DEFAULT_FN_ATTRS
4094 {
4095  return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a,
4096  (__v8si)_mm256_setzero_si256(),
4097  (__mmask8) -1,
4099 }
4100 
4101 static __inline__ __m256i __DEFAULT_FN_ATTRS
4102 _mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
4103 {
4104  return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4105  (__v8si) __W,
4106  (__mmask8) __U,
4108 }
4109 
4110 static __inline__ __m256i __DEFAULT_FN_ATTRS
4111 _mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
4112 {
4113  return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4114  (__v8si) _mm256_setzero_si256 (),
4115  (__mmask8) __U,
4117 }
4118 
4119 #define _mm512_cvtt_roundps_epi32(A, R) __extension__ ({ \
4120  (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
4121  (__v16si)_mm512_setzero_si512(), \
4122  (__mmask16)-1, (int)(R)); })
4123 
4124 #define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) __extension__ ({ \
4125  (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
4126  (__v16si)(__m512i)(W), \
4127  (__mmask16)(U), (int)(R)); })
4128 
4129 #define _mm512_maskz_cvtt_roundps_epi32(U, A, R) __extension__ ({ \
4130  (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
4131  (__v16si)_mm512_setzero_si512(), \
4132  (__mmask16)(U), (int)(R)); })
4133 
4134 static __inline __m512i __DEFAULT_FN_ATTRS
4136 {
4137  return (__m512i)
4138  __builtin_ia32_cvttps2dq512_mask((__v16sf) __a,
4139  (__v16si) _mm512_setzero_si512 (),
4140  (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
4141 }
4142 
4143 static __inline__ __m512i __DEFAULT_FN_ATTRS
4144 _mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
4145 {
4146  return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4147  (__v16si) __W,
4148  (__mmask16) __U,
4150 }
4151 
4152 static __inline__ __m512i __DEFAULT_FN_ATTRS
4153 _mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
4154 {
4155  return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4156  (__v16si) _mm512_setzero_si512 (),
4157  (__mmask16) __U,
4159 }
4160 
4161 #define _mm512_cvt_roundps_epi32(A, R) __extension__ ({ \
4162  (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
4163  (__v16si)_mm512_setzero_si512(), \
4164  (__mmask16)-1, (int)(R)); })
4165 
4166 #define _mm512_mask_cvt_roundps_epi32(W, U, A, R) __extension__ ({ \
4167  (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
4168  (__v16si)(__m512i)(W), \
4169  (__mmask16)(U), (int)(R)); })
4170 
4171 #define _mm512_maskz_cvt_roundps_epi32(U, A, R) __extension__ ({ \
4172  (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
4173  (__v16si)_mm512_setzero_si512(), \
4174  (__mmask16)(U), (int)(R)); })
4175 
4176 static __inline__ __m512i __DEFAULT_FN_ATTRS
4178 {
4179  return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4180  (__v16si) _mm512_undefined_epi32 (),
4181  (__mmask16) -1,
4183 }
4184 
4185 static __inline__ __m512i __DEFAULT_FN_ATTRS
4186 _mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
4187 {
4188  return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4189  (__v16si) __W,
4190  (__mmask16) __U,
4192 }
4193 
4194 static __inline__ __m512i __DEFAULT_FN_ATTRS
4195 _mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
4196 {
4197  return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4198  (__v16si)
4200  (__mmask16) __U,
4202 }
4203 
4204 #define _mm512_cvt_roundpd_epi32(A, R) __extension__ ({ \
4205  (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
4206  (__v8si)_mm256_setzero_si256(), \
4207  (__mmask8)-1, (int)(R)); })
4208 
4209 #define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) __extension__ ({ \
4210  (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
4211  (__v8si)(__m256i)(W), \
4212  (__mmask8)(U), (int)(R)); })
4213 
4214 #define _mm512_maskz_cvt_roundpd_epi32(U, A, R) __extension__ ({ \
4215  (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
4216  (__v8si)_mm256_setzero_si256(), \
4217  (__mmask8)(U), (int)(R)); })
4218 
4219 static __inline__ __m256i __DEFAULT_FN_ATTRS
4220 _mm512_cvtpd_epi32 (__m512d __A)
4221 {
4222  return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4223  (__v8si)
4225  (__mmask8) -1,
4227 }
4228 
4229 static __inline__ __m256i __DEFAULT_FN_ATTRS
4230 _mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
4231 {
4232  return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4233  (__v8si) __W,
4234  (__mmask8) __U,
4236 }
4237 
4238 static __inline__ __m256i __DEFAULT_FN_ATTRS
4239 _mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
4240 {
4241  return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4242  (__v8si)
4244  (__mmask8) __U,
4246 }
4247 
4248 #define _mm512_cvt_roundps_epu32(A, R) __extension__ ({ \
4249  (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4250  (__v16si)_mm512_setzero_si512(), \
4251  (__mmask16)-1, (int)(R)); })
4252 
4253 #define _mm512_mask_cvt_roundps_epu32(W, U, A, R) __extension__ ({ \
4254  (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4255  (__v16si)(__m512i)(W), \
4256  (__mmask16)(U), (int)(R)); })
4257 
4258 #define _mm512_maskz_cvt_roundps_epu32(U, A, R) __extension__ ({ \
4259  (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4260  (__v16si)_mm512_setzero_si512(), \
4261  (__mmask16)(U), (int)(R)); })
4262 
4263 static __inline__ __m512i __DEFAULT_FN_ATTRS
4264 _mm512_cvtps_epu32 ( __m512 __A)
4265 {
4266  return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\
4267  (__v16si)\
4269  (__mmask16) -1,\
4271 }
4272 
4273 static __inline__ __m512i __DEFAULT_FN_ATTRS
4274 _mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
4275 {
4276  return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4277  (__v16si) __W,
4278  (__mmask16) __U,
4280 }
4281 
4282 static __inline__ __m512i __DEFAULT_FN_ATTRS
4283 _mm512_maskz_cvtps_epu32 ( __mmask16 __U, __m512 __A)
4284 {
4285  return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4286  (__v16si)
4288  (__mmask16) __U ,
4290 }
4291 
4292 #define _mm512_cvt_roundpd_epu32(A, R) __extension__ ({ \
4293  (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4294  (__v8si)_mm256_setzero_si256(), \
4295  (__mmask8)-1, (int)(R)); })
4296 
4297 #define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) __extension__ ({ \
4298  (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4299  (__v8si)(W), \
4300  (__mmask8)(U), (int)(R)); })
4301 
4302 #define _mm512_maskz_cvt_roundpd_epu32(U, A, R) __extension__ ({ \
4303  (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4304  (__v8si)_mm256_setzero_si256(), \
4305  (__mmask8)(U), (int)(R)); })
4306 
4307 static __inline__ __m256i __DEFAULT_FN_ATTRS
4308 _mm512_cvtpd_epu32 (__m512d __A)
4309 {
4310  return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4311  (__v8si)
4313  (__mmask8) -1,
4315 }
4316 
4317 static __inline__ __m256i __DEFAULT_FN_ATTRS
4318 _mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
4319 {
4320  return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4321  (__v8si) __W,
4322  (__mmask8) __U,
4324 }
4325 
4326 static __inline__ __m256i __DEFAULT_FN_ATTRS
4327 _mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
4328 {
4329  return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4330  (__v8si)
4332  (__mmask8) __U,
4334 }
4335 
4336 static __inline__ double __DEFAULT_FN_ATTRS
4337 _mm512_cvtsd_f64(__m512d __a)
4338 {
4339  return __a[0];
4340 }
4341 
4342 static __inline__ float __DEFAULT_FN_ATTRS
4343 _mm512_cvtss_f32(__m512 __a)
4344 {
4345  return __a[0];
4346 }
4347 
4348 /* Unpack and Interleave */
4349 
4350 static __inline __m512d __DEFAULT_FN_ATTRS
4351 _mm512_unpackhi_pd(__m512d __a, __m512d __b)
4352 {
4353  return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4354  1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4355 }
4356 
4357 static __inline__ __m512d __DEFAULT_FN_ATTRS
4358 _mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
4359 {
4360  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4361  (__v8df)_mm512_unpackhi_pd(__A, __B),
4362  (__v8df)__W);
4363 }
4364 
4365 static __inline__ __m512d __DEFAULT_FN_ATTRS
4366 _mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
4367 {
4368  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4369  (__v8df)_mm512_unpackhi_pd(__A, __B),
4370  (__v8df)_mm512_setzero_pd());
4371 }
4372 
4373 static __inline __m512d __DEFAULT_FN_ATTRS
4374 _mm512_unpacklo_pd(__m512d __a, __m512d __b)
4375 {
4376  return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4377  0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4378 }
4379 
4380 static __inline__ __m512d __DEFAULT_FN_ATTRS
4381 _mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
4382 {
4383  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4384  (__v8df)_mm512_unpacklo_pd(__A, __B),
4385  (__v8df)__W);
4386 }
4387 
4388 static __inline__ __m512d __DEFAULT_FN_ATTRS
4389 _mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
4390 {
4391  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4392  (__v8df)_mm512_unpacklo_pd(__A, __B),
4393  (__v8df)_mm512_setzero_pd());
4394 }
4395 
4396 static __inline __m512 __DEFAULT_FN_ATTRS
4397 _mm512_unpackhi_ps(__m512 __a, __m512 __b)
4398 {
4399  return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4400  2, 18, 3, 19,
4401  2+4, 18+4, 3+4, 19+4,
4402  2+8, 18+8, 3+8, 19+8,
4403  2+12, 18+12, 3+12, 19+12);
4404 }
4405 
4406 static __inline__ __m512 __DEFAULT_FN_ATTRS
4407 _mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
4408 {
4409  return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4410  (__v16sf)_mm512_unpackhi_ps(__A, __B),
4411  (__v16sf)__W);
4412 }
4413 
4414 static __inline__ __m512 __DEFAULT_FN_ATTRS
4415 _mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
4416 {
4417  return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4418  (__v16sf)_mm512_unpackhi_ps(__A, __B),
4419  (__v16sf)_mm512_setzero_ps());
4420 }
4421 
4422 static __inline __m512 __DEFAULT_FN_ATTRS
4423 _mm512_unpacklo_ps(__m512 __a, __m512 __b)
4424 {
4425  return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4426  0, 16, 1, 17,
4427  0+4, 16+4, 1+4, 17+4,
4428  0+8, 16+8, 1+8, 17+8,
4429  0+12, 16+12, 1+12, 17+12);
4430 }
4431 
4432 static __inline__ __m512 __DEFAULT_FN_ATTRS
4433 _mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
4434 {
4435  return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4436  (__v16sf)_mm512_unpacklo_ps(__A, __B),
4437  (__v16sf)__W);
4438 }
4439 
4440 static __inline__ __m512 __DEFAULT_FN_ATTRS
4441 _mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
4442 {
4443  return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4444  (__v16sf)_mm512_unpacklo_ps(__A, __B),
4445  (__v16sf)_mm512_setzero_ps());
4446 }
4447 
4448 static __inline__ __m512i __DEFAULT_FN_ATTRS
4449 _mm512_unpackhi_epi32(__m512i __A, __m512i __B)
4450 {
4451  return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4452  2, 18, 3, 19,
4453  2+4, 18+4, 3+4, 19+4,
4454  2+8, 18+8, 3+8, 19+8,
4455  2+12, 18+12, 3+12, 19+12);
4456 }
4457 
4458 static __inline__ __m512i __DEFAULT_FN_ATTRS
4459 _mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4460 {
4461  return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4462  (__v16si)_mm512_unpackhi_epi32(__A, __B),
4463  (__v16si)__W);
4464 }
4465 
4466 static __inline__ __m512i __DEFAULT_FN_ATTRS
4467 _mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
4468 {
4469  return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4470  (__v16si)_mm512_unpackhi_epi32(__A, __B),
4471  (__v16si)_mm512_setzero_si512());
4472 }
4473 
4474 static __inline__ __m512i __DEFAULT_FN_ATTRS
4475 _mm512_unpacklo_epi32(__m512i __A, __m512i __B)
4476 {
4477  return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4478  0, 16, 1, 17,
4479  0+4, 16+4, 1+4, 17+4,
4480  0+8, 16+8, 1+8, 17+8,
4481  0+12, 16+12, 1+12, 17+12);
4482 }
4483 
4484 static __inline__ __m512i __DEFAULT_FN_ATTRS
4485 _mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4486 {
4487  return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4488  (__v16si)_mm512_unpacklo_epi32(__A, __B),
4489  (__v16si)__W);
4490 }
4491 
4492 static __inline__ __m512i __DEFAULT_FN_ATTRS
4493 _mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
4494 {
4495  return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4496  (__v16si)_mm512_unpacklo_epi32(__A, __B),
4497  (__v16si)_mm512_setzero_si512());
4498 }
4499 
4500 static __inline__ __m512i __DEFAULT_FN_ATTRS
4501 _mm512_unpackhi_epi64(__m512i __A, __m512i __B)
4502 {
4503  return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4504  1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4505 }
4506 
4507 static __inline__ __m512i __DEFAULT_FN_ATTRS
4508 _mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4509 {
4510  return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4511  (__v8di)_mm512_unpackhi_epi64(__A, __B),
4512  (__v8di)__W);
4513 }
4514 
4515 static __inline__ __m512i __DEFAULT_FN_ATTRS
4516 _mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
4517 {
4518  return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4519  (__v8di)_mm512_unpackhi_epi64(__A, __B),
4520  (__v8di)_mm512_setzero_si512());
4521 }
4522 
4523 static __inline__ __m512i __DEFAULT_FN_ATTRS
4524 _mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
4525 {
4526  return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4527  0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4528 }
4529 
4530 static __inline__ __m512i __DEFAULT_FN_ATTRS
4531 _mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4532 {
4533  return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4534  (__v8di)_mm512_unpacklo_epi64(__A, __B),
4535  (__v8di)__W);
4536 }
4537 
4538 static __inline__ __m512i __DEFAULT_FN_ATTRS
4539 _mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4540 {
4541  return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4542  (__v8di)_mm512_unpacklo_epi64(__A, __B),
4543  (__v8di)_mm512_setzero_si512());
4544 }
4545 
4546 
4547 /* SIMD load ops */
4548 
4549 static __inline __m512i __DEFAULT_FN_ATTRS
4550 _mm512_loadu_si512 (void const *__P)
4551 {
4552  return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
4553  (__v16si)
4555  (__mmask16) -1);
4556 }
4557 
4558 static __inline __m512i __DEFAULT_FN_ATTRS
4559 _mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
4560 {
4561  return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
4562  (__v16si) __W,
4563  (__mmask16) __U);
4564 }
4565 
4566 
4567 static __inline __m512i __DEFAULT_FN_ATTRS
4568 _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
4569 {
4570  return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *)__P,
4571  (__v16si)
4573  (__mmask16) __U);
4574 }
4575 
4576 static __inline __m512i __DEFAULT_FN_ATTRS
4577 _mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
4578 {
4579  return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
4580  (__v8di) __W,
4581  (__mmask8) __U);
4582 }
4583 
4584 static __inline __m512i __DEFAULT_FN_ATTRS
4585 _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
4586 {
4587  return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *)__P,
4588  (__v8di)
4590  (__mmask8) __U);
4591 }
4592 
4593 static __inline __m512 __DEFAULT_FN_ATTRS
4594 _mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
4595 {
4596  return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
4597  (__v16sf) __W,
4598  (__mmask16) __U);
4599 }
4600 
4601 static __inline __m512 __DEFAULT_FN_ATTRS
4602 _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
4603 {
4604  return (__m512) __builtin_ia32_loadups512_mask ((const float *)__P,
4605  (__v16sf)
4606  _mm512_setzero_ps (),
4607  (__mmask16) __U);
4608 }
4609 
4610 static __inline __m512d __DEFAULT_FN_ATTRS
4611 _mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
4612 {
4613  return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
4614  (__v8df) __W,
4615  (__mmask8) __U);
4616 }
4617 
4618 static __inline __m512d __DEFAULT_FN_ATTRS
4619 _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
4620 {
4621  return (__m512d) __builtin_ia32_loadupd512_mask ((const double *)__P,
4622  (__v8df)
4623  _mm512_setzero_pd (),
4624  (__mmask8) __U);
4625 }
4626 
4627 static __inline __m512d __DEFAULT_FN_ATTRS
4628 _mm512_loadu_pd(void const *__p)
4629 {
4630  struct __loadu_pd {
4631  __m512d __v;
4632  } __attribute__((__packed__, __may_alias__));
4633  return ((struct __loadu_pd*)__p)->__v;
4634 }
4635 
4636 static __inline __m512 __DEFAULT_FN_ATTRS
4637 _mm512_loadu_ps(void const *__p)
4638 {
4639  struct __loadu_ps {
4640  __m512 __v;
4641  } __attribute__((__packed__, __may_alias__));
4642  return ((struct __loadu_ps*)__p)->__v;
4643 }
4644 
4645 static __inline __m512 __DEFAULT_FN_ATTRS
4646 _mm512_load_ps(void const *__p)
4647 {
4648  return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__p,
4649  (__v16sf)
4650  _mm512_setzero_ps (),
4651  (__mmask16) -1);
4652 }
4653 
4654 static __inline __m512 __DEFAULT_FN_ATTRS
4655 _mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
4656 {
4657  return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
4658  (__v16sf) __W,
4659  (__mmask16) __U);
4660 }
4661 
4662 static __inline __m512 __DEFAULT_FN_ATTRS
4663 _mm512_maskz_load_ps(__mmask16 __U, void const *__P)
4664 {
4665  return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
4666  (__v16sf)
4667  _mm512_setzero_ps (),
4668  (__mmask16) __U);
4669 }
4670 
4671 static __inline __m512d __DEFAULT_FN_ATTRS
4672 _mm512_load_pd(void const *__p)
4673 {
4674  return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__p,
4675  (__v8df)
4676  _mm512_setzero_pd (),
4677  (__mmask8) -1);
4678 }
4679 
4680 static __inline __m512d __DEFAULT_FN_ATTRS
4681 _mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
4682 {
4683  return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
4684  (__v8df) __W,
4685  (__mmask8) __U);
4686 }
4687 
4688 static __inline __m512d __DEFAULT_FN_ATTRS
4689 _mm512_maskz_load_pd(__mmask8 __U, void const *__P)
4690 {
4691  return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
4692  (__v8df)
4693  _mm512_setzero_pd (),
4694  (__mmask8) __U);
4695 }
4696 
4697 static __inline __m512i __DEFAULT_FN_ATTRS
4698 _mm512_load_si512 (void const *__P)
4699 {
4700  return *(__m512i *) __P;
4701 }
4702 
4703 static __inline __m512i __DEFAULT_FN_ATTRS
4704 _mm512_load_epi32 (void const *__P)
4705 {
4706  return *(__m512i *) __P;
4707 }
4708 
4709 static __inline __m512i __DEFAULT_FN_ATTRS
4710 _mm512_load_epi64 (void const *__P)
4711 {
4712  return *(__m512i *) __P;
4713 }
4714 
4715 /* SIMD store ops */
4716 
4717 static __inline void __DEFAULT_FN_ATTRS
4718 _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
4719 {
4720  __builtin_ia32_storedqudi512_mask ((long long *)__P, (__v8di) __A,
4721  (__mmask8) __U);
4722 }
4723 
4724 static __inline void __DEFAULT_FN_ATTRS
4725 _mm512_storeu_si512 (void *__P, __m512i __A)
4726 {
4727  __builtin_ia32_storedqusi512_mask ((int *) __P, (__v16si) __A,
4728  (__mmask16) -1);
4729 }
4730 
4731 static __inline void __DEFAULT_FN_ATTRS
4732 _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
4733 {
4734  __builtin_ia32_storedqusi512_mask ((int *)__P, (__v16si) __A,
4735  (__mmask16) __U);
4736 }
4737 
4738 static __inline void __DEFAULT_FN_ATTRS
4739 _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
4740 {
4741  __builtin_ia32_storeupd512_mask ((double *)__P, (__v8df) __A, (__mmask8) __U);
4742 }
4743 
4744 static __inline void __DEFAULT_FN_ATTRS
4745 _mm512_storeu_pd(void *__P, __m512d __A)
4746 {
4747  __builtin_ia32_storeupd512_mask((double *)__P, (__v8df)__A, (__mmask8)-1);
4748 }
4749 
4750 static __inline void __DEFAULT_FN_ATTRS
4751 _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
4752 {
4753  __builtin_ia32_storeups512_mask ((float *)__P, (__v16sf) __A,
4754  (__mmask16) __U);
4755 }
4756 
4757 static __inline void __DEFAULT_FN_ATTRS
4758 _mm512_storeu_ps(void *__P, __m512 __A)
4759 {
4760  __builtin_ia32_storeups512_mask((float *)__P, (__v16sf)__A, (__mmask16)-1);
4761 }
4762 
4763 static __inline void __DEFAULT_FN_ATTRS
4764 _mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
4765 {
4766  __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
4767 }
4768 
4769 static __inline void __DEFAULT_FN_ATTRS
4770 _mm512_store_pd(void *__P, __m512d __A)
4771 {
4772  *(__m512d*)__P = __A;
4773 }
4774 
4775 static __inline void __DEFAULT_FN_ATTRS
4776 _mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
4777 {
4778  __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
4779  (__mmask16) __U);
4780 }
4781 
4782 static __inline void __DEFAULT_FN_ATTRS
4783 _mm512_store_ps(void *__P, __m512 __A)
4784 {
4785  *(__m512*)__P = __A;
4786 }
4787 
4788 static __inline void __DEFAULT_FN_ATTRS
4789 _mm512_store_si512 (void *__P, __m512i __A)
4790 {
4791  *(__m512i *) __P = __A;
4792 }
4793 
4794 static __inline void __DEFAULT_FN_ATTRS
4795 _mm512_store_epi32 (void *__P, __m512i __A)
4796 {
4797  *(__m512i *) __P = __A;
4798 }
4799 
4800 static __inline void __DEFAULT_FN_ATTRS
4801 _mm512_store_epi64 (void *__P, __m512i __A)
4802 {
4803  *(__m512i *) __P = __A;
4804 }
4805 
4806 /* Mask ops */
4807 
4808 static __inline __mmask16 __DEFAULT_FN_ATTRS
4809 _mm512_knot(__mmask16 __M)
4810 {
4811  return __builtin_ia32_knothi(__M);
4812 }
4813 
4814 /* Integer compare */
4815 
4816 #define _mm512_cmpeq_epi32_mask(A, B) \
4817  _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
4818 #define _mm512_mask_cmpeq_epi32_mask(k, A, B) \
4819  _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
4820 #define _mm512_cmpge_epi32_mask(A, B) \
4821  _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
4822 #define _mm512_mask_cmpge_epi32_mask(k, A, B) \
4823  _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
4824 #define _mm512_cmpgt_epi32_mask(A, B) \
4825  _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
4826 #define _mm512_mask_cmpgt_epi32_mask(k, A, B) \
4827  _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
4828 #define _mm512_cmple_epi32_mask(A, B) \
4829  _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
4830 #define _mm512_mask_cmple_epi32_mask(k, A, B) \
4831  _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
4832 #define _mm512_cmplt_epi32_mask(A, B) \
4833  _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
4834 #define _mm512_mask_cmplt_epi32_mask(k, A, B) \
4835  _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
4836 #define _mm512_cmpneq_epi32_mask(A, B) \
4837  _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
4838 #define _mm512_mask_cmpneq_epi32_mask(k, A, B) \
4839  _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
4840 
4841 #define _mm512_cmpeq_epu32_mask(A, B) \
4842  _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
4843 #define _mm512_mask_cmpeq_epu32_mask(k, A, B) \
4844  _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
4845 #define _mm512_cmpge_epu32_mask(A, B) \
4846  _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
4847 #define _mm512_mask_cmpge_epu32_mask(k, A, B) \
4848  _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
4849 #define _mm512_cmpgt_epu32_mask(A, B) \
4850  _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
4851 #define _mm512_mask_cmpgt_epu32_mask(k, A, B) \
4852  _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
4853 #define _mm512_cmple_epu32_mask(A, B) \
4854  _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
4855 #define _mm512_mask_cmple_epu32_mask(k, A, B) \
4856  _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
4857 #define _mm512_cmplt_epu32_mask(A, B) \
4858  _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
4859 #define _mm512_mask_cmplt_epu32_mask(k, A, B) \
4860  _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
4861 #define _mm512_cmpneq_epu32_mask(A, B) \
4862  _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
4863 #define _mm512_mask_cmpneq_epu32_mask(k, A, B) \
4864  _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
4865 
4866 #define _mm512_cmpeq_epi64_mask(A, B) \
4867  _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
4868 #define _mm512_mask_cmpeq_epi64_mask(k, A, B) \
4869  _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
4870 #define _mm512_cmpge_epi64_mask(A, B) \
4871  _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
4872 #define _mm512_mask_cmpge_epi64_mask(k, A, B) \
4873  _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
4874 #define _mm512_cmpgt_epi64_mask(A, B) \
4875  _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
4876 #define _mm512_mask_cmpgt_epi64_mask(k, A, B) \
4877  _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
4878 #define _mm512_cmple_epi64_mask(A, B) \
4879  _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
4880 #define _mm512_mask_cmple_epi64_mask(k, A, B) \
4881  _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
4882 #define _mm512_cmplt_epi64_mask(A, B) \
4883  _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
4884 #define _mm512_mask_cmplt_epi64_mask(k, A, B) \
4885  _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
4886 #define _mm512_cmpneq_epi64_mask(A, B) \
4887  _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
4888 #define _mm512_mask_cmpneq_epi64_mask(k, A, B) \
4889  _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
4890 
4891 #define _mm512_cmpeq_epu64_mask(A, B) \
4892  _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
4893 #define _mm512_mask_cmpeq_epu64_mask(k, A, B) \
4894  _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
4895 #define _mm512_cmpge_epu64_mask(A, B) \
4896  _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
4897 #define _mm512_mask_cmpge_epu64_mask(k, A, B) \
4898  _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
4899 #define _mm512_cmpgt_epu64_mask(A, B) \
4900  _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
4901 #define _mm512_mask_cmpgt_epu64_mask(k, A, B) \
4902  _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
4903 #define _mm512_cmple_epu64_mask(A, B) \
4904  _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
4905 #define _mm512_mask_cmple_epu64_mask(k, A, B) \
4906  _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
4907 #define _mm512_cmplt_epu64_mask(A, B) \
4908  _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
4909 #define _mm512_mask_cmplt_epu64_mask(k, A, B) \
4910  _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
4911 #define _mm512_cmpneq_epu64_mask(A, B) \
4912  _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
4913 #define _mm512_mask_cmpneq_epu64_mask(k, A, B) \
4914  _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
4915 
4916 static __inline__ __m512i __DEFAULT_FN_ATTRS
4918 {
4919  /* This function always performs a signed extension, but __v16qi is a char
4920  which may be signed or unsigned, so use __v16qs. */
4921  return (__m512i)__builtin_convertvector((__v16qs)__A, __v16si);
4922 }
4923 
4924 static __inline__ __m512i __DEFAULT_FN_ATTRS
4925 _mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
4926 {
4927  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4928  (__v16si)_mm512_cvtepi8_epi32(__A),
4929  (__v16si)__W);
4930 }
4931 
4932 static __inline__ __m512i __DEFAULT_FN_ATTRS
4933 _mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A)
4934 {
4935  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4936  (__v16si)_mm512_cvtepi8_epi32(__A),
4937  (__v16si)_mm512_setzero_si512());
4938 }
4939 
4940 static __inline__ __m512i __DEFAULT_FN_ATTRS
4942 {
4943  /* This function always performs a signed extension, but __v16qi is a char
4944  which may be signed or unsigned, so use __v16qs. */
4945  return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__A, (__v16qs)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
4946 }
4947 
4948 static __inline__ __m512i __DEFAULT_FN_ATTRS
4949 _mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
4950 {
4951  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4952  (__v8di)_mm512_cvtepi8_epi64(__A),
4953  (__v8di)__W);
4954 }
4955 
4956 static __inline__ __m512i __DEFAULT_FN_ATTRS
4957 _mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
4958 {
4959  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4960  (__v8di)_mm512_cvtepi8_epi64(__A),
4961  (__v8di)_mm512_setzero_si512 ());
4962 }
4963 
4964 static __inline__ __m512i __DEFAULT_FN_ATTRS
4966 {
4967  return (__m512i)__builtin_convertvector((__v8si)__X, __v8di);
4968 }
4969 
4970 static __inline__ __m512i __DEFAULT_FN_ATTRS
4971 _mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
4972 {
4973  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4974  (__v8di)_mm512_cvtepi32_epi64(__X),
4975  (__v8di)__W);
4976 }
4977 
4978 static __inline__ __m512i __DEFAULT_FN_ATTRS
4979 _mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X)
4980 {
4981  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4982  (__v8di)_mm512_cvtepi32_epi64(__X),
4983  (__v8di)_mm512_setzero_si512());
4984 }
4985 
4986 static __inline__ __m512i __DEFAULT_FN_ATTRS
4988 {
4989  return (__m512i)__builtin_convertvector((__v16hi)__A, __v16si);
4990 }
4991 
4992 static __inline__ __m512i __DEFAULT_FN_ATTRS
4993 _mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
4994 {
4995  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4996  (__v16si)_mm512_cvtepi16_epi32(__A),
4997  (__v16si)__W);
4998 }
4999 
5000 static __inline__ __m512i __DEFAULT_FN_ATTRS
5001 _mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A)
5002 {
5003  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5004  (__v16si)_mm512_cvtepi16_epi32(__A),
5005  (__v16si)_mm512_setzero_si512 ());
5006 }
5007 
5008 static __inline__ __m512i __DEFAULT_FN_ATTRS
5010 {
5011  return (__m512i)__builtin_convertvector((__v8hi)__A, __v8di);
5012 }
5013 
5014 static __inline__ __m512i __DEFAULT_FN_ATTRS
5015 _mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
5016 {
5017  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5018  (__v8di)_mm512_cvtepi16_epi64(__A),
5019  (__v8di)__W);
5020 }
5021 
5022 static __inline__ __m512i __DEFAULT_FN_ATTRS
5023 _mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
5024 {
5025  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5026  (__v8di)_mm512_cvtepi16_epi64(__A),
5027  (__v8di)_mm512_setzero_si512());
5028 }
5029 
5030 static __inline__ __m512i __DEFAULT_FN_ATTRS
5032 {
5033  return (__m512i)__builtin_convertvector((__v16qu)__A, __v16si);
5034 }
5035 
5036 static __inline__ __m512i __DEFAULT_FN_ATTRS
5037 _mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
5038 {
5039  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5040  (__v16si)_mm512_cvtepu8_epi32(__A),
5041  (__v16si)__W);
5042 }
5043 
5044 static __inline__ __m512i __DEFAULT_FN_ATTRS
5045 _mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A)
5046 {
5047  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5048  (__v16si)_mm512_cvtepu8_epi32(__A),
5049  (__v16si)_mm512_setzero_si512());
5050 }
5051 
5052 static __inline__ __m512i __DEFAULT_FN_ATTRS
5054 {
5055  return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__A, (__v16qu)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
5056 }
5057 
5058 static __inline__ __m512i __DEFAULT_FN_ATTRS
5059 _mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
5060 {
5061  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5062  (__v8di)_mm512_cvtepu8_epi64(__A),
5063  (__v8di)__W);
5064 }
5065 
5066 static __inline__ __m512i __DEFAULT_FN_ATTRS
5067 _mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
5068 {
5069  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5070  (__v8di)_mm512_cvtepu8_epi64(__A),
5071  (__v8di)_mm512_setzero_si512());
5072 }
5073 
5074 static __inline__ __m512i __DEFAULT_FN_ATTRS
5076 {
5077  return (__m512i)__builtin_convertvector((__v8su)__X, __v8di);
5078 }
5079 
5080 static __inline__ __m512i __DEFAULT_FN_ATTRS
5081 _mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
5082 {
5083  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5084  (__v8di)_mm512_cvtepu32_epi64(__X),
5085  (__v8di)__W);
5086 }
5087 
5088 static __inline__ __m512i __DEFAULT_FN_ATTRS
5089 _mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X)
5090 {
5091  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5092  (__v8di)_mm512_cvtepu32_epi64(__X),
5093  (__v8di)_mm512_setzero_si512());
5094 }
5095 
5096 static __inline__ __m512i __DEFAULT_FN_ATTRS
5098 {
5099  return (__m512i)__builtin_convertvector((__v16hu)__A, __v16si);
5100 }
5101 
5102 static __inline__ __m512i __DEFAULT_FN_ATTRS
5103 _mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
5104 {
5105  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5106  (__v16si)_mm512_cvtepu16_epi32(__A),
5107  (__v16si)__W);
5108 }
5109 
5110 static __inline__ __m512i __DEFAULT_FN_ATTRS
5111 _mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A)
5112 {
5113  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5114  (__v16si)_mm512_cvtepu16_epi32(__A),
5115  (__v16si)_mm512_setzero_si512());
5116 }
5117 
5118 static __inline__ __m512i __DEFAULT_FN_ATTRS
5120 {
5121  return (__m512i)__builtin_convertvector((__v8hu)__A, __v8di);
5122 }
5123 
5124 static __inline__ __m512i __DEFAULT_FN_ATTRS
5125 _mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
5126 {
5127  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5128  (__v8di)_mm512_cvtepu16_epi64(__A),
5129  (__v8di)__W);
5130 }
5131 
5132 static __inline__ __m512i __DEFAULT_FN_ATTRS
5133 _mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
5134 {
5135  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5136  (__v8di)_mm512_cvtepu16_epi64(__A),
5137  (__v8di)_mm512_setzero_si512());
5138 }
5139 
5140 static __inline__ __m512i __DEFAULT_FN_ATTRS
5141 _mm512_rorv_epi32 (__m512i __A, __m512i __B)
5142 {
5143  return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
5144  (__v16si) __B,
5145  (__v16si)
5147  (__mmask16) -1);
5148 }
5149 
5150 static __inline__ __m512i __DEFAULT_FN_ATTRS
5151 _mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
5152 {
5153  return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
5154  (__v16si) __B,
5155  (__v16si) __W,
5156  (__mmask16) __U);
5157 }
5158 
5159 static __inline__ __m512i __DEFAULT_FN_ATTRS
5160 _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
5161 {
5162  return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
5163  (__v16si) __B,
5164  (__v16si)
5166  (__mmask16) __U);
5167 }
5168 
5169 static __inline__ __m512i __DEFAULT_FN_ATTRS
5170 _mm512_rorv_epi64 (__m512i __A, __m512i __B)
5171 {
5172  return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
5173  (__v8di) __B,
5174  (__v8di)
5176  (__mmask8) -1);
5177 }
5178 
5179 static __inline__ __m512i __DEFAULT_FN_ATTRS
5180 _mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
5181 {
5182  return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
5183  (__v8di) __B,
5184  (__v8di) __W,
5185  (__mmask8) __U);
5186 }
5187 
5188 static __inline__ __m512i __DEFAULT_FN_ATTRS
5189 _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
5190 {
5191  return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
5192  (__v8di) __B,
5193  (__v8di)
5195  (__mmask8) __U);
5196 }
5197 
5198 
5199 
5200 #define _mm512_cmp_epi32_mask(a, b, p) __extension__ ({ \
5201  (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
5202  (__v16si)(__m512i)(b), (int)(p), \
5203  (__mmask16)-1); })
5204 
5205 #define _mm512_cmp_epu32_mask(a, b, p) __extension__ ({ \
5206  (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
5207  (__v16si)(__m512i)(b), (int)(p), \
5208  (__mmask16)-1); })
5209 
5210 #define _mm512_cmp_epi64_mask(a, b, p) __extension__ ({ \
5211  (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
5212  (__v8di)(__m512i)(b), (int)(p), \
5213  (__mmask8)-1); })
5214 
5215 #define _mm512_cmp_epu64_mask(a, b, p) __extension__ ({ \
5216  (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
5217  (__v8di)(__m512i)(b), (int)(p), \
5218  (__mmask8)-1); })
5219 
5220 #define _mm512_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \
5221  (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
5222  (__v16si)(__m512i)(b), (int)(p), \
5223  (__mmask16)(m)); })
5224 
5225 #define _mm512_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \
5226  (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
5227  (__v16si)(__m512i)(b), (int)(p), \
5228  (__mmask16)(m)); })
5229 
5230 #define _mm512_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \
5231  (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
5232  (__v8di)(__m512i)(b), (int)(p), \
5233  (__mmask8)(m)); })
5234 
5235 #define _mm512_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \
5236  (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
5237  (__v8di)(__m512i)(b), (int)(p), \
5238  (__mmask8)(m)); })
5239 
5240 #define _mm512_rol_epi32(a, b) __extension__ ({ \
5241  (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \
5242  (__v16si)_mm512_setzero_si512(), \
5243  (__mmask16)-1); })
5244 
5245 #define _mm512_mask_rol_epi32(W, U, a, b) __extension__ ({ \
5246  (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \
5247  (__v16si)(__m512i)(W), \
5248  (__mmask16)(U)); })
5249 
5250 #define _mm512_maskz_rol_epi32(U, a, b) __extension__ ({ \
5251  (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \
5252  (__v16si)_mm512_setzero_si512(), \
5253  (__mmask16)(U)); })
5254 
5255 #define _mm512_rol_epi64(a, b) __extension__ ({ \
5256  (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \
5257  (__v8di)_mm512_setzero_si512(), \
5258  (__mmask8)-1); })
5259 
5260 #define _mm512_mask_rol_epi64(W, U, a, b) __extension__ ({ \
5261  (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \
5262  (__v8di)(__m512i)(W), (__mmask8)(U)); })
5263 
5264 #define _mm512_maskz_rol_epi64(U, a, b) __extension__ ({ \
5265  (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \
5266  (__v8di)_mm512_setzero_si512(), \
5267  (__mmask8)(U)); })
5268 static __inline__ __m512i __DEFAULT_FN_ATTRS
5269 _mm512_rolv_epi32 (__m512i __A, __m512i __B)
5270 {
5271  return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
5272  (__v16si) __B,
5273  (__v16si)
5275  (__mmask16) -1);
5276 }
5277 
5278 static __inline__ __m512i __DEFAULT_FN_ATTRS
5279 _mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
5280 {
5281  return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
5282  (__v16si) __B,
5283  (__v16si) __W,
5284  (__mmask16) __U);
5285 }
5286 
5287 static __inline__ __m512i __DEFAULT_FN_ATTRS
5288 _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
5289 {
5290  return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
5291  (__v16si) __B,
5292  (__v16si)
5294  (__mmask16) __U);
5295 }
5296 
5297 static __inline__ __m512i __DEFAULT_FN_ATTRS
5298 _mm512_rolv_epi64 (__m512i __A, __m512i __B)
5299 {
5300  return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
5301  (__v8di) __B,
5302  (__v8di)
5304  (__mmask8) -1);
5305 }
5306 
5307 static __inline__ __m512i __DEFAULT_FN_ATTRS
5308 _mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
5309 {
5310  return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
5311  (__v8di) __B,
5312  (__v8di) __W,
5313  (__mmask8) __U);
5314 }
5315 
5316 static __inline__ __m512i __DEFAULT_FN_ATTRS
5317 _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
5318 {
5319  return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
5320  (__v8di) __B,
5321  (__v8di)
5323  (__mmask8) __U);
5324 }
5325 
5326 #define _mm512_ror_epi32(A, B) __extension__ ({ \
5327  (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \
5328  (__v16si)_mm512_setzero_si512(), \
5329  (__mmask16)-1); })
5330 
5331 #define _mm512_mask_ror_epi32(W, U, A, B) __extension__ ({ \
5332  (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \
5333  (__v16si)(__m512i)(W), \
5334  (__mmask16)(U)); })
5335 
5336 #define _mm512_maskz_ror_epi32(U, A, B) __extension__ ({ \
5337  (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \
5338  (__v16si)_mm512_setzero_si512(), \
5339  (__mmask16)(U)); })
5340 
5341 #define _mm512_ror_epi64(A, B) __extension__ ({ \
5342  (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \
5343  (__v8di)_mm512_setzero_si512(), \
5344  (__mmask8)-1); })
5345 
5346 #define _mm512_mask_ror_epi64(W, U, A, B) __extension__ ({ \
5347  (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \
5348  (__v8di)(__m512i)(W), (__mmask8)(U)); })
5349 
5350 #define _mm512_maskz_ror_epi64(U, A, B) __extension__ ({ \
5351  (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \
5352  (__v8di)_mm512_setzero_si512(), \
5353  (__mmask8)(U)); })
5354 
5355 static __inline__ __m512i __DEFAULT_FN_ATTRS
5356 _mm512_slli_epi32(__m512i __A, int __B)
5357 {
5358  return (__m512i)__builtin_ia32_pslldi512((__v16si)__A, __B);
5359 }
5360 
5361 static __inline__ __m512i __DEFAULT_FN_ATTRS
5362 _mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
5363 {
5364  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5365  (__v16si)_mm512_slli_epi32(__A, __B),
5366  (__v16si)__W);
5367 }
5368 
5369 static __inline__ __m512i __DEFAULT_FN_ATTRS
5370 _mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, int __B) {
5371  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5372  (__v16si)_mm512_slli_epi32(__A, __B),
5373  (__v16si)_mm512_setzero_si512());
5374 }
5375 
5376 static __inline__ __m512i __DEFAULT_FN_ATTRS
5377 _mm512_slli_epi64(__m512i __A, int __B)
5378 {
5379  return (__m512i)__builtin_ia32_psllqi512((__v8di)__A, __B);
5380 }
5381 
5382 static __inline__ __m512i __DEFAULT_FN_ATTRS
5383 _mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
5384 {
5385  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5386  (__v8di)_mm512_slli_epi64(__A, __B),
5387  (__v8di)__W);
5388 }
5389 
5390 static __inline__ __m512i __DEFAULT_FN_ATTRS
5391 _mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, int __B)
5392 {
5393  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5394  (__v8di)_mm512_slli_epi64(__A, __B),
5395  (__v8di)_mm512_setzero_si512());
5396 }
5397 
5398 static __inline__ __m512i __DEFAULT_FN_ATTRS
5399 _mm512_srli_epi32(__m512i __A, int __B)
5400 {
5401  return (__m512i)__builtin_ia32_psrldi512((__v16si)__A, __B);
5402 }
5403 
5404 static __inline__ __m512i __DEFAULT_FN_ATTRS
5405 _mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
5406 {
5407  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5408  (__v16si)_mm512_srli_epi32(__A, __B),
5409  (__v16si)__W);
5410 }
5411 
5412 static __inline__ __m512i __DEFAULT_FN_ATTRS
5413 _mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, int __B) {
5414  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5415  (__v16si)_mm512_srli_epi32(__A, __B),
5416  (__v16si)_mm512_setzero_si512());
5417 }
5418 
5419 static __inline__ __m512i __DEFAULT_FN_ATTRS
5420 _mm512_srli_epi64(__m512i __A, int __B)
5421 {
5422  return (__m512i)__builtin_ia32_psrlqi512((__v8di)__A, __B);
5423 }
5424 
5425 static __inline__ __m512i __DEFAULT_FN_ATTRS
5426 _mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
5427 {
5428  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5429  (__v8di)_mm512_srli_epi64(__A, __B),
5430  (__v8di)__W);
5431 }
5432 
5433 static __inline__ __m512i __DEFAULT_FN_ATTRS
5434 _mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, int __B)
5435 {
5436  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5437  (__v8di)_mm512_srli_epi64(__A, __B),
5438  (__v8di)_mm512_setzero_si512());
5439 }
5440 
5441 static __inline__ __m512i __DEFAULT_FN_ATTRS
5442 _mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
5443 {
5444  return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
5445  (__v16si) __W,
5446  (__mmask16) __U);
5447 }
5448 
5449 static __inline__ __m512i __DEFAULT_FN_ATTRS
5450 _mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
5451 {
5452  return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
5453  (__v16si)
5455  (__mmask16) __U);
5456 }
5457 
5458 static __inline__ void __DEFAULT_FN_ATTRS
5459 _mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
5460 {
5461  __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
5462  (__mmask16) __U);
5463 }
5464 
5465 static __inline__ __m512i __DEFAULT_FN_ATTRS
5466 _mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
5467 {
5468  return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
5469  (__v16si) __A,
5470  (__v16si) __W);
5471 }
5472 
5473 static __inline__ __m512i __DEFAULT_FN_ATTRS
5474 _mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
5475 {
5476  return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
5477  (__v16si) __A,
5478  (__v16si) _mm512_setzero_si512 ());
5479 }
5480 
5481 static __inline__ __m512i __DEFAULT_FN_ATTRS
5482 _mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
5483 {
5484  return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
5485  (__v8di) __A,
5486  (__v8di) __W);
5487 }
5488 
5489 static __inline__ __m512i __DEFAULT_FN_ATTRS
5490 _mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
5491 {
5492  return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
5493  (__v8di) __A,
5494  (__v8di) _mm512_setzero_si512 ());
5495 }
5496 
5497 static __inline__ __m512i __DEFAULT_FN_ATTRS
5498 _mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
5499 {
5500  return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
5501  (__v8di) __W,
5502  (__mmask8) __U);
5503 }
5504 
5505 static __inline__ __m512i __DEFAULT_FN_ATTRS
5506 _mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
5507 {
5508  return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
5509  (__v8di)
5511  (__mmask8) __U);
5512 }
5513 
5514 static __inline__ void __DEFAULT_FN_ATTRS
5515 _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
5516 {
5517  __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
5518  (__mmask8) __U);
5519 }
5520 
5521 static __inline__ __m512d __DEFAULT_FN_ATTRS
5522 _mm512_movedup_pd (__m512d __A)
5523 {
5524  return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A,
5525  0, 0, 2, 2, 4, 4, 6, 6);
5526 }
5527 
5528 static __inline__ __m512d __DEFAULT_FN_ATTRS
5529 _mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
5530 {
5531  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5532  (__v8df)_mm512_movedup_pd(__A),
5533  (__v8df)__W);
5534 }
5535 
5536 static __inline__ __m512d __DEFAULT_FN_ATTRS
5537 _mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
5538 {
5539  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5540  (__v8df)_mm512_movedup_pd(__A),
5541  (__v8df)_mm512_setzero_pd());
5542 }
5543 
5544 #define _mm512_fixupimm_round_pd(A, B, C, imm, R) __extension__ ({ \
5545  (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5546  (__v8df)(__m512d)(B), \
5547  (__v8di)(__m512i)(C), (int)(imm), \
5548  (__mmask8)-1, (int)(R)); })
5549 
5550 #define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) __extension__ ({ \
5551  (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5552  (__v8df)(__m512d)(B), \
5553  (__v8di)(__m512i)(C), (int)(imm), \
5554  (__mmask8)(U), (int)(R)); })
5555 
5556 #define _mm512_fixupimm_pd(A, B, C, imm) __extension__ ({ \
5557  (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5558  (__v8df)(__m512d)(B), \
5559  (__v8di)(__m512i)(C), (int)(imm), \
5560  (__mmask8)-1, \
5561  _MM_FROUND_CUR_DIRECTION); })
5562 
5563 #define _mm512_mask_fixupimm_pd(A, U, B, C, imm) __extension__ ({ \
5564  (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5565  (__v8df)(__m512d)(B), \
5566  (__v8di)(__m512i)(C), (int)(imm), \
5567  (__mmask8)(U), \
5568  _MM_FROUND_CUR_DIRECTION); })
5569 
5570 #define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) __extension__ ({ \
5571  (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5572  (__v8df)(__m512d)(B), \
5573  (__v8di)(__m512i)(C), \
5574  (int)(imm), (__mmask8)(U), \
5575  (int)(R)); })
5576 
5577 #define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) __extension__ ({ \
5578  (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5579  (__v8df)(__m512d)(B), \
5580  (__v8di)(__m512i)(C), \
5581  (int)(imm), (__mmask8)(U), \
5582  _MM_FROUND_CUR_DIRECTION); })
5583 
5584 #define _mm512_fixupimm_round_ps(A, B, C, imm, R) __extension__ ({ \
5585  (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5586  (__v16sf)(__m512)(B), \
5587  (__v16si)(__m512i)(C), (int)(imm), \
5588  (__mmask16)-1, (int)(R)); })
5589 
5590 #define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) __extension__ ({ \
5591  (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5592  (__v16sf)(__m512)(B), \
5593  (__v16si)(__m512i)(C), (int)(imm), \
5594  (__mmask16)(U), (int)(R)); })
5595 
5596 #define _mm512_fixupimm_ps(A, B, C, imm) __extension__ ({ \
5597  (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5598  (__v16sf)(__m512)(B), \
5599  (__v16si)(__m512i)(C), (int)(imm), \
5600  (__mmask16)-1, \
5601  _MM_FROUND_CUR_DIRECTION); })
5602 
5603 #define _mm512_mask_fixupimm_ps(A, U, B, C, imm) __extension__ ({ \
5604  (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5605  (__v16sf)(__m512)(B), \
5606  (__v16si)(__m512i)(C), (int)(imm), \
5607  (__mmask16)(U), \
5608  _MM_FROUND_CUR_DIRECTION); })
5609 
5610 #define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) __extension__ ({ \
5611  (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5612  (__v16sf)(__m512)(B), \
5613  (__v16si)(__m512i)(C), \
5614  (int)(imm), (__mmask16)(U), \
5615  (int)(R)); })
5616 
5617 #define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) __extension__ ({ \
5618  (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5619  (__v16sf)(__m512)(B), \
5620  (__v16si)(__m512i)(C), \
5621  (int)(imm), (__mmask16)(U), \
5622  _MM_FROUND_CUR_DIRECTION); })
5623 
5624 #define _mm_fixupimm_round_sd(A, B, C, imm, R) __extension__ ({ \
5625  (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5626  (__v2df)(__m128d)(B), \
5627  (__v2di)(__m128i)(C), (int)(imm), \
5628  (__mmask8)-1, (int)(R)); })
5629 
5630 #define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) __extension__ ({ \
5631  (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5632  (__v2df)(__m128d)(B), \
5633  (__v2di)(__m128i)(C), (int)(imm), \
5634  (__mmask8)(U), (int)(R)); })
5635 
5636 #define _mm_fixupimm_sd(A, B, C, imm) __extension__ ({ \
5637  (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5638  (__v2df)(__m128d)(B), \
5639  (__v2di)(__m128i)(C), (int)(imm), \
5640  (__mmask8)-1, \
5641  _MM_FROUND_CUR_DIRECTION); })
5642 
5643 #define _mm_mask_fixupimm_sd(A, U, B, C, imm) __extension__ ({ \
5644  (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5645  (__v2df)(__m128d)(B), \
5646  (__v2di)(__m128i)(C), (int)(imm), \
5647  (__mmask8)(U), \
5648  _MM_FROUND_CUR_DIRECTION); })
5649 
5650 #define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) __extension__ ({ \
5651  (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5652  (__v2df)(__m128d)(B), \
5653  (__v2di)(__m128i)(C), (int)(imm), \
5654  (__mmask8)(U), (int)(R)); })
5655 
5656 #define _mm_maskz_fixupimm_sd(U, A, B, C, imm) __extension__ ({ \
5657  (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5658  (__v2df)(__m128d)(B), \
5659  (__v2di)(__m128i)(C), (int)(imm), \
5660  (__mmask8)(U), \
5661  _MM_FROUND_CUR_DIRECTION); })
5662 
5663 #define _mm_fixupimm_round_ss(A, B, C, imm, R) __extension__ ({ \
5664  (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5665  (__v4sf)(__m128)(B), \
5666  (__v4si)(__m128i)(C), (int)(imm), \
5667  (__mmask8)-1, (int)(R)); })
5668 
5669 #define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) __extension__ ({ \
5670  (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5671  (__v4sf)(__m128)(B), \
5672  (__v4si)(__m128i)(C), (int)(imm), \
5673  (__mmask8)(U), (int)(R)); })
5674 
5675 #define _mm_fixupimm_ss(A, B, C, imm) __extension__ ({ \
5676  (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5677  (__v4sf)(__m128)(B), \
5678  (__v4si)(__m128i)(C), (int)(imm), \
5679  (__mmask8)-1, \
5680  _MM_FROUND_CUR_DIRECTION); })
5681 
5682 #define _mm_mask_fixupimm_ss(A, U, B, C, imm) __extension__ ({ \
5683  (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5684  (__v4sf)(__m128)(B), \
5685  (__v4si)(__m128i)(C), (int)(imm), \
5686  (__mmask8)(U), \
5687  _MM_FROUND_CUR_DIRECTION); })
5688 
5689 #define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) __extension__ ({ \
5690  (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5691  (__v4sf)(__m128)(B), \
5692  (__v4si)(__m128i)(C), (int)(imm), \
5693  (__mmask8)(U), (int)(R)); })
5694 
5695 #define _mm_maskz_fixupimm_ss(U, A, B, C, imm) __extension__ ({ \
5696  (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5697  (__v4sf)(__m128)(B), \
5698  (__v4si)(__m128i)(C), (int)(imm), \
5699  (__mmask8)(U), \
5700  _MM_FROUND_CUR_DIRECTION); })
5701 
5702 #define _mm_getexp_round_sd(A, B, R) __extension__ ({ \
5703  (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5704  (__v2df)(__m128d)(B), \
5705  (__v2df)_mm_setzero_pd(), \
5706  (__mmask8)-1, (int)(R)); })
5707 
5708 
5709 static __inline__ __m128d __DEFAULT_FN_ATTRS
5710 _mm_getexp_sd (__m128d __A, __m128d __B)
5711 {
5712  return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A,
5713  (__v2df) __B, (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5714 }
5715 
5716 static __inline__ __m128d __DEFAULT_FN_ATTRS
5717 _mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
5718 {
5719  return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5720  (__v2df) __B,
5721  (__v2df) __W,
5722  (__mmask8) __U,
5724 }
5725 
5726 #define _mm_mask_getexp_round_sd(W, U, A, B, R) __extension__ ({\
5727  (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5728  (__v2df)(__m128d)(B), \
5729  (__v2df)(__m128d)(W), \
5730  (__mmask8)(U), (int)(R)); })
5731 
5732 static __inline__ __m128d __DEFAULT_FN_ATTRS
5733 _mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
5734 {
5735  return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5736  (__v2df) __B,
5737  (__v2df) _mm_setzero_pd (),
5738  (__mmask8) __U,
5740 }
5741 
5742 #define _mm_maskz_getexp_round_sd(U, A, B, R) __extension__ ({\
5743  (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5744  (__v2df)(__m128d)(B), \
5745  (__v2df)_mm_setzero_pd(), \
5746  (__mmask8)(U), (int)(R)); })
5747 
5748 #define _mm_getexp_round_ss(A, B, R) __extension__ ({ \
5749  (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5750  (__v4sf)(__m128)(B), \
5751  (__v4sf)_mm_setzero_ps(), \
5752  (__mmask8)-1, (int)(R)); })
5753 
5754 static __inline__ __m128 __DEFAULT_FN_ATTRS
5755 _mm_getexp_ss (__m128 __A, __m128 __B)
5756 {
5757  return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5758  (__v4sf) __B, (__v4sf) _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5759 }
5760 
5761 static __inline__ __m128 __DEFAULT_FN_ATTRS
5762 _mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
5763 {
5764  return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5765  (__v4sf) __B,
5766  (__v4sf) __W,
5767  (__mmask8) __U,
5769 }
5770 
5771 #define _mm_mask_getexp_round_ss(W, U, A, B, R) __extension__ ({\
5772  (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5773  (__v4sf)(__m128)(B), \
5774  (__v4sf)(__m128)(W), \
5775  (__mmask8)(U), (int)(R)); })
5776 
5777 static __inline__ __m128 __DEFAULT_FN_ATTRS
5778 _mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
5779 {
5780  return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5781  (__v4sf) __B,
5782  (__v4sf) _mm_setzero_pd (),
5783  (__mmask8) __U,
5785 }
5786 
5787 #define _mm_maskz_getexp_round_ss(U, A, B, R) __extension__ ({\
5788  (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5789  (__v4sf)(__m128)(B), \
5790  (__v4sf)_mm_setzero_ps(), \
5791  (__mmask8)(U), (int)(R)); })
5792 
5793 #define _mm_getmant_round_sd(A, B, C, D, R) __extension__ ({ \
5794  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5795  (__v2df)(__m128d)(B), \
5796  (int)(((D)<<2) | (C)), \
5797  (__v2df)_mm_setzero_pd(), \
5798  (__mmask8)-1, (int)(R)); })
5799 
5800 #define _mm_getmant_sd(A, B, C, D) __extension__ ({ \
5801  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5802  (__v2df)(__m128d)(B), \
5803  (int)(((D)<<2) | (C)), \
5804  (__v2df)_mm_setzero_pd(), \
5805  (__mmask8)-1, \
5806  _MM_FROUND_CUR_DIRECTION); })
5807 
5808 #define _mm_mask_getmant_sd(W, U, A, B, C, D) __extension__ ({\
5809  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5810  (__v2df)(__m128d)(B), \
5811  (int)(((D)<<2) | (C)), \
5812  (__v2df)(__m128d)(W), \
5813  (__mmask8)(U), \
5814  _MM_FROUND_CUR_DIRECTION); })
5815 
5816 #define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R)({\
5817  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5818  (__v2df)(__m128d)(B), \
5819  (int)(((D)<<2) | (C)), \
5820  (__v2df)(__m128d)(W), \
5821  (__mmask8)(U), (int)(R)); })
5822 
5823 #define _mm_maskz_getmant_sd(U, A, B, C, D) __extension__ ({\
5824  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5825  (__v2df)(__m128d)(B), \
5826  (int)(((D)<<2) | (C)), \
5827  (__v2df)_mm_setzero_pd(), \
5828  (__mmask8)(U), \
5829  _MM_FROUND_CUR_DIRECTION); })
5830 
5831 #define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) __extension__ ({\
5832  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5833  (__v2df)(__m128d)(B), \
5834  (int)(((D)<<2) | (C)), \
5835  (__v2df)_mm_setzero_pd(), \
5836  (__mmask8)(U), (int)(R)); })
5837 
5838 #define _mm_getmant_round_ss(A, B, C, D, R) __extension__ ({ \
5839  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5840  (__v4sf)(__m128)(B), \
5841  (int)(((D)<<2) | (C)), \
5842  (__v4sf)_mm_setzero_ps(), \
5843  (__mmask8)-1, (int)(R)); })
5844 
5845 #define _mm_getmant_ss(A, B, C, D) __extension__ ({ \
5846  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5847  (__v4sf)(__m128)(B), \
5848  (int)(((D)<<2) | (C)), \
5849  (__v4sf)_mm_setzero_ps(), \
5850  (__mmask8)-1, \
5851  _MM_FROUND_CUR_DIRECTION); })
5852 
5853 #define _mm_mask_getmant_ss(W, U, A, B, C, D) __extension__ ({\
5854  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5855  (__v4sf)(__m128)(B), \
5856  (int)(((D)<<2) | (C)), \
5857  (__v4sf)(__m128)(W), \
5858  (__mmask8)(U), \
5859  _MM_FROUND_CUR_DIRECTION); })
5860 
5861 #define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R)({\
5862  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5863  (__v4sf)(__m128)(B), \
5864  (int)(((D)<<2) | (C)), \
5865  (__v4sf)(__m128)(W), \
5866  (__mmask8)(U), (int)(R)); })
5867 
5868 #define _mm_maskz_getmant_ss(U, A, B, C, D) __extension__ ({\
5869  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5870  (__v4sf)(__m128)(B), \
5871  (int)(((D)<<2) | (C)), \
5872  (__v4sf)_mm_setzero_pd(), \
5873  (__mmask8)(U), \
5874  _MM_FROUND_CUR_DIRECTION); })
5875 
5876 #define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) __extension__ ({\
5877  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5878  (__v4sf)(__m128)(B), \
5879  (int)(((D)<<2) | (C)), \
5880  (__v4sf)_mm_setzero_ps(), \
5881  (__mmask8)(U), (int)(R)); })
5882 
5883 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5884 _mm512_kmov (__mmask16 __A)
5885 {
5886  return __A;
5887 }
5888 
5889 #define _mm_comi_round_sd(A, B, P, R) __extension__ ({\
5890  (int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \
5891  (int)(P), (int)(R)); })
5892 
5893 #define _mm_comi_round_ss(A, B, P, R) __extension__ ({\
5894  (int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
5895  (int)(P), (int)(R)); })
5896 
5897 #ifdef __x86_64__
5898 #define _mm_cvt_roundsd_si64(A, R) __extension__ ({ \
5899  (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)); })
5900 #endif
5901 
5902 static __inline__ __m512i __DEFAULT_FN_ATTRS
5903 _mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
5904  __mmask16 __U, __m512i __B)
5905 {
5906  return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A,
5907  (__v16si) __I
5908  /* idx */ ,
5909  (__v16si) __B,
5910  (__mmask16) __U);
5911 }
5912 
5913 static __inline__ __m512i __DEFAULT_FN_ATTRS
5914 _mm512_sll_epi32(__m512i __A, __m128i __B)
5915 {
5916  return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B);
5917 }
5918 
5919 static __inline__ __m512i __DEFAULT_FN_ATTRS
5920 _mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
5921 {
5922  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5923  (__v16si)_mm512_sll_epi32(__A, __B),
5924  (__v16si)__W);
5925 }
5926 
5927 static __inline__ __m512i __DEFAULT_FN_ATTRS
5928 _mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B)
5929 {
5930  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5931  (__v16si)_mm512_sll_epi32(__A, __B),
5932  (__v16si)_mm512_setzero_si512());
5933 }
5934 
5935 static __inline__ __m512i __DEFAULT_FN_ATTRS
5936 _mm512_sll_epi64(__m512i __A, __m128i __B)
5937 {
5938  return (__m512i)__builtin_ia32_psllq512((__v8di)__A, (__v2di)__B);
5939 }
5940 
5941 static __inline__ __m512i __DEFAULT_FN_ATTRS
5942 _mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
5943 {
5944  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5945  (__v8di)_mm512_sll_epi64(__A, __B),
5946  (__v8di)__W);
5947 }
5948 
5949 static __inline__ __m512i __DEFAULT_FN_ATTRS
5950 _mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B)
5951 {
5952  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5953  (__v8di)_mm512_sll_epi64(__A, __B),
5954  (__v8di)_mm512_setzero_si512());
5955 }
5956 
5957 static __inline__ __m512i __DEFAULT_FN_ATTRS
5958 _mm512_sllv_epi32(__m512i __X, __m512i __Y)
5959 {
5960  return (__m512i)__builtin_ia32_psllv16si((__v16si)__X, (__v16si)__Y);
5961 }
5962 
5963 static __inline__ __m512i __DEFAULT_FN_ATTRS
5964 _mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
5965 {
5966  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5967  (__v16si)_mm512_sllv_epi32(__X, __Y),
5968  (__v16si)__W);
5969 }
5970 
5971 static __inline__ __m512i __DEFAULT_FN_ATTRS
5972 _mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
5973 {
5974  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5975  (__v16si)_mm512_sllv_epi32(__X, __Y),
5976  (__v16si)_mm512_setzero_si512());
5977 }
5978 
5979 static __inline__ __m512i __DEFAULT_FN_ATTRS
5980 _mm512_sllv_epi64(__m512i __X, __m512i __Y)
5981 {
5982  return (__m512i)__builtin_ia32_psllv8di((__v8di)__X, (__v8di)__Y);
5983 }
5984 
5985 static __inline__ __m512i __DEFAULT_FN_ATTRS
5986 _mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
5987 {
5988  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5989  (__v8di)_mm512_sllv_epi64(__X, __Y),
5990  (__v8di)__W);
5991 }
5992 
5993 static __inline__ __m512i __DEFAULT_FN_ATTRS
5994 _mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
5995 {
5996  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5997  (__v8di)_mm512_sllv_epi64(__X, __Y),
5998  (__v8di)_mm512_setzero_si512());
5999 }
6000 
6001 static __inline__ __m512i __DEFAULT_FN_ATTRS
6002 _mm512_sra_epi32(__m512i __A, __m128i __B)
6003 {
6004  return (__m512i)__builtin_ia32_psrad512((__v16si) __A, (__v4si)__B);
6005 }
6006 
6007 static __inline__ __m512i __DEFAULT_FN_ATTRS
6008 _mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
6009 {
6010  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6011  (__v16si)_mm512_sra_epi32(__A, __B),
6012  (__v16si)__W);
6013 }
6014 
6015 static __inline__ __m512i __DEFAULT_FN_ATTRS
6016 _mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B)
6017 {
6018  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6019  (__v16si)_mm512_sra_epi32(__A, __B),
6020  (__v16si)_mm512_setzero_si512());
6021 }
6022 
6023 static __inline__ __m512i __DEFAULT_FN_ATTRS
6024 _mm512_sra_epi64(__m512i __A, __m128i __B)
6025 {
6026  return (__m512i)__builtin_ia32_psraq512((__v8di)__A, (__v2di)__B);
6027 }
6028 
6029 static __inline__ __m512i __DEFAULT_FN_ATTRS
6030 _mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
6031 {
6032  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6033  (__v8di)_mm512_sra_epi64(__A, __B),
6034  (__v8di)__W);
6035 }
6036 
6037 static __inline__ __m512i __DEFAULT_FN_ATTRS
6038 _mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B)
6039 {
6040  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6041  (__v8di)_mm512_sra_epi64(__A, __B),
6042  (__v8di)_mm512_setzero_si512());
6043 }
6044 
6045 static __inline__ __m512i __DEFAULT_FN_ATTRS
6046 _mm512_srav_epi32(__m512i __X, __m512i __Y)
6047 {
6048  return (__m512i)__builtin_ia32_psrav16si((__v16si)__X, (__v16si)__Y);
6049 }
6050 
6051 static __inline__ __m512i __DEFAULT_FN_ATTRS
6052 _mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
6053 {
6054  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6055  (__v16si)_mm512_srav_epi32(__X, __Y),
6056  (__v16si)__W);
6057 }
6058 
6059 static __inline__ __m512i __DEFAULT_FN_ATTRS
6060 _mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
6061 {
6062  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6063  (__v16si)_mm512_srav_epi32(__X, __Y),
6064  (__v16si)_mm512_setzero_si512());
6065 }
6066 
6067 static __inline__ __m512i __DEFAULT_FN_ATTRS
6068 _mm512_srav_epi64(__m512i __X, __m512i __Y)
6069 {
6070  return (__m512i)__builtin_ia32_psrav8di((__v8di)__X, (__v8di)__Y);
6071 }
6072 
6073 static __inline__ __m512i __DEFAULT_FN_ATTRS
6074 _mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
6075 {
6076  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6077  (__v8di)_mm512_srav_epi64(__X, __Y),
6078  (__v8di)__W);
6079 }
6080 
6081 static __inline__ __m512i __DEFAULT_FN_ATTRS
6082 _mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
6083 {
6084  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6085  (__v8di)_mm512_srav_epi64(__X, __Y),
6086  (__v8di)_mm512_setzero_si512());
6087 }
6088 
6089 static __inline__ __m512i __DEFAULT_FN_ATTRS
6090 _mm512_srl_epi32(__m512i __A, __m128i __B)
6091 {
6092  return (__m512i)__builtin_ia32_psrld512((__v16si) __A, (__v4si)__B);
6093 }
6094 
6095 static __inline__ __m512i __DEFAULT_FN_ATTRS
6096 _mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
6097 {
6098  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6099  (__v16si)_mm512_srl_epi32(__A, __B),
6100  (__v16si)__W);
6101 }
6102 
6103 static __inline__ __m512i __DEFAULT_FN_ATTRS
6104 _mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B)
6105 {
6106  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6107  (__v16si)_mm512_srl_epi32(__A, __B),
6108  (__v16si)_mm512_setzero_si512());
6109 }
6110 
6111 static __inline__ __m512i __DEFAULT_FN_ATTRS
6112 _mm512_srl_epi64(__m512i __A, __m128i __B)
6113 {
6114  return (__m512i)__builtin_ia32_psrlq512((__v8di)__A, (__v2di)__B);
6115 }
6116 
6117 static __inline__ __m512i __DEFAULT_FN_ATTRS
6118 _mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
6119 {
6120  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6121  (__v8di)_mm512_srl_epi64(__A, __B),
6122  (__v8di)__W);
6123 }
6124 
6125 static __inline__ __m512i __DEFAULT_FN_ATTRS
6126 _mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B)
6127 {
6128  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6129  (__v8di)_mm512_srl_epi64(__A, __B),
6130  (__v8di)_mm512_setzero_si512());
6131 }
6132 
6133 static __inline__ __m512i __DEFAULT_FN_ATTRS
6134 _mm512_srlv_epi32(__m512i __X, __m512i __Y)
6135 {
6136  return (__m512i)__builtin_ia32_psrlv16si((__v16si)__X, (__v16si)__Y);
6137 }
6138 
6139 static __inline__ __m512i __DEFAULT_FN_ATTRS
6140 _mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
6141 {
6142  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6143  (__v16si)_mm512_srlv_epi32(__X, __Y),
6144  (__v16si)__W);
6145 }
6146 
6147 static __inline__ __m512i __DEFAULT_FN_ATTRS
6148 _mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
6149 {
6150  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6151  (__v16si)_mm512_srlv_epi32(__X, __Y),
6152  (__v16si)_mm512_setzero_si512());
6153 }
6154 
6155 static __inline__ __m512i __DEFAULT_FN_ATTRS
6156 _mm512_srlv_epi64 (__m512i __X, __m512i __Y)
6157 {
6158  return (__m512i)__builtin_ia32_psrlv8di((__v8di)__X, (__v8di)__Y);
6159 }
6160 
6161 static __inline__ __m512i __DEFAULT_FN_ATTRS
6162 _mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
6163 {
6164  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6165  (__v8di)_mm512_srlv_epi64(__X, __Y),
6166  (__v8di)__W);
6167 }
6168 
6169 static __inline__ __m512i __DEFAULT_FN_ATTRS
6170 _mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
6171 {
6172  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6173  (__v8di)_mm512_srlv_epi64(__X, __Y),
6174  (__v8di)_mm512_setzero_si512());
6175 }
6176 
6177 #define _mm512_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \
6178  (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \
6179  (__v16si)(__m512i)(B), \
6180  (__v16si)(__m512i)(C), (int)(imm), \
6181  (__mmask16)-1); })
6182 
6183 #define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm) __extension__ ({ \
6184  (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \
6185  (__v16si)(__m512i)(B), \
6186  (__v16si)(__m512i)(C), (int)(imm), \
6187  (__mmask16)(U)); })
6188 
6189 #define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm) __extension__ ({ \
6190  (__m512i)__builtin_ia32_pternlogd512_maskz((__v16si)(__m512i)(A), \
6191  (__v16si)(__m512i)(B), \
6192  (__v16si)(__m512i)(C), \
6193  (int)(imm), (__mmask16)(U)); })
6194 
6195 #define _mm512_ternarylogic_epi64(A, B, C, imm) __extension__ ({ \
6196  (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \
6197  (__v8di)(__m512i)(B), \
6198  (__v8di)(__m512i)(C), (int)(imm), \
6199  (__mmask8)-1); })
6200 
6201 #define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm) __extension__ ({ \
6202  (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \
6203  (__v8di)(__m512i)(B), \
6204  (__v8di)(__m512i)(C), (int)(imm), \
6205  (__mmask8)(U)); })
6206 
6207 #define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) __extension__ ({ \
6208  (__m512i)__builtin_ia32_pternlogq512_maskz((__v8di)(__m512i)(A), \
6209  (__v8di)(__m512i)(B), \
6210  (__v8di)(__m512i)(C), (int)(imm), \
6211  (__mmask8)(U)); })
6212 
6213 #ifdef __x86_64__
6214 #define _mm_cvt_roundsd_i64(A, R) __extension__ ({ \
6215  (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)); })
6216 #endif
6217 
6218 #define _mm_cvt_roundsd_si32(A, R) __extension__ ({ \
6219  (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)); })
6220 
6221 #define _mm_cvt_roundsd_i32(A, R) __extension__ ({ \
6222  (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)); })
6223 
6224 #define _mm_cvt_roundsd_u32(A, R) __extension__ ({ \
6225  (unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R)); })
6226 
6227 static __inline__ unsigned __DEFAULT_FN_ATTRS
6228 _mm_cvtsd_u32 (__m128d __A)
6229 {
6230  return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
6232 }
6233 
6234 #ifdef __x86_64__
6235 #define _mm_cvt_roundsd_u64(A, R) __extension__ ({ \
6236  (unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \
6237  (int)(R)); })
6238 
6239 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
6240 _mm_cvtsd_u64 (__m128d __A)
6241 {
6242  return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
6243  __A,
6245 }
6246 #endif
6247 
6248 #define _mm_cvt_roundss_si32(A, R) __extension__ ({ \
6249  (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)); })
6250 
6251 #define _mm_cvt_roundss_i32(A, R) __extension__ ({ \
6252  (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)); })
6253 
6254 #ifdef __x86_64__
6255 #define _mm_cvt_roundss_si64(A, R) __extension__ ({ \
6256  (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)); })
6257 
6258 #define _mm_cvt_roundss_i64(A, R) __extension__ ({ \
6259  (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)); })
6260 #endif
6261 
6262 #define _mm_cvt_roundss_u32(A, R) __extension__ ({ \
6263  (unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R)); })
6264 
6265 static __inline__ unsigned __DEFAULT_FN_ATTRS
6266 _mm_cvtss_u32 (__m128 __A)
6267 {
6268  return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
6270 }
6271 
6272 #ifdef __x86_64__
6273 #define _mm_cvt_roundss_u64(A, R) __extension__ ({ \
6274  (unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \
6275  (int)(R)); })
6276 
6277 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
6278 _mm_cvtss_u64 (__m128 __A)
6279 {
6280  return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
6281  __A,
6283 }
6284 #endif
6285 
6286 #define _mm_cvtt_roundsd_i32(A, R) __extension__ ({ \
6287  (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)); })
6288 
6289 #define _mm_cvtt_roundsd_si32(A, R) __extension__ ({ \
6290  (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)); })
6291 
6292 static __inline__ int __DEFAULT_FN_ATTRS
6293 _mm_cvttsd_i32 (__m128d __A)
6294 {
6295  return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
6297 }
6298 
6299 #ifdef __x86_64__
6300 #define _mm_cvtt_roundsd_si64(A, R) __extension__ ({ \
6301  (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)); })
6302 
6303 #define _mm_cvtt_roundsd_i64(A, R) __extension__ ({ \
6304  (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)); })
6305 
6306 static __inline__ long long __DEFAULT_FN_ATTRS
6307 _mm_cvttsd_i64 (__m128d __A)
6308 {
6309  return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
6311 }
6312 #endif
6313 
6314 #define _mm_cvtt_roundsd_u32(A, R) __extension__ ({ \
6315  (unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R)); })
6316 
6317 static __inline__ unsigned __DEFAULT_FN_ATTRS
6318 _mm_cvttsd_u32 (__m128d __A)
6319 {
6320  return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
6322 }
6323 
6324 #ifdef __x86_64__
6325 #define _mm_cvtt_roundsd_u64(A, R) __extension__ ({ \
6326  (unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \
6327  (int)(R)); })
6328 
6329 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
6330 _mm_cvttsd_u64 (__m128d __A)
6331 {
6332  return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
6333  __A,
6335 }
6336 #endif
6337 
6338 #define _mm_cvtt_roundss_i32(A, R) __extension__ ({ \
6339  (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)); })
6340 
6341 #define _mm_cvtt_roundss_si32(A, R) __extension__ ({ \
6342  (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)); })
6343 
6344 static __inline__ int __DEFAULT_FN_ATTRS
6345 _mm_cvttss_i32 (__m128 __A)
6346 {
6347  return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
6349 }
6350 
6351 #ifdef __x86_64__
6352 #define _mm_cvtt_roundss_i64(A, R) __extension__ ({ \
6353  (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)); })
6354 
6355 #define _mm_cvtt_roundss_si64(A, R) __extension__ ({ \
6356  (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)); })
6357 
6358 static __inline__ long long __DEFAULT_FN_ATTRS
6359 _mm_cvttss_i64 (__m128 __A)
6360 {
6361  return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
6363 }
6364 #endif
6365 
6366 #define _mm_cvtt_roundss_u32(A, R) __extension__ ({ \
6367  (unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R)); })
6368 
6369 static __inline__ unsigned __DEFAULT_FN_ATTRS
6370 _mm_cvttss_u32 (__m128 __A)
6371 {
6372  return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
6374 }
6375 
6376 #ifdef __x86_64__
6377 #define _mm_cvtt_roundss_u64(A, R) __extension__ ({ \
6378  (unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \
6379  (int)(R)); })
6380 
6381 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
6382 _mm_cvttss_u64 (__m128 __A)
6383 {
6384  return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
6385  __A,
6387 }
6388 #endif
6389 
6390 static __inline__ __m512d __DEFAULT_FN_ATTRS
6391 _mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U,
6392  __m512d __B)
6393 {
6394  return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A,
6395  (__v8di) __I
6396  /* idx */ ,
6397  (__v8df) __B,
6398  (__mmask8) __U);
6399 }
6400 
6401 static __inline__ __m512 __DEFAULT_FN_ATTRS
6402 _mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U,
6403  __m512 __B)
6404 {
6405  return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A,
6406  (__v16si) __I
6407  /* idx */ ,
6408  (__v16sf) __B,
6409  (__mmask16) __U);
6410 }
6411 
6412 static __inline__ __m512i __DEFAULT_FN_ATTRS
6413 _mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I,
6414  __mmask8 __U, __m512i __B)
6415 {
6416  return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A,
6417  (__v8di) __I
6418  /* idx */ ,
6419  (__v8di) __B,
6420  (__mmask8) __U);
6421 }
6422 
6423 #define _mm512_permute_pd(X, C) __extension__ ({ \
6424  (__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \
6425  (__v8df)_mm512_undefined_pd(), \
6426  0 + (((C) >> 0) & 0x1), \
6427  0 + (((C) >> 1) & 0x1), \
6428  2 + (((C) >> 2) & 0x1), \
6429  2 + (((C) >> 3) & 0x1), \
6430  4 + (((C) >> 4) & 0x1), \
6431  4 + (((C) >> 5) & 0x1), \
6432  6 + (((C) >> 6) & 0x1), \
6433  6 + (((C) >> 7) & 0x1)); })
6434 
6435 #define _mm512_mask_permute_pd(W, U, X, C) __extension__ ({ \
6436  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6437  (__v8df)_mm512_permute_pd((X), (C)), \
6438  (__v8df)(__m512d)(W)); })
6439 
6440 #define _mm512_maskz_permute_pd(U, X, C) __extension__ ({ \
6441  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6442  (__v8df)_mm512_permute_pd((X), (C)), \
6443  (__v8df)_mm512_setzero_pd()); })
6444 
6445 #define _mm512_permute_ps(X, C) __extension__ ({ \
6446  (__m512)__builtin_shufflevector((__v16sf)(__m512)(X), \
6447  (__v16sf)_mm512_undefined_ps(), \
6448  0 + (((C) >> 0) & 0x3), \
6449  0 + (((C) >> 2) & 0x3), \
6450  0 + (((C) >> 4) & 0x3), \
6451  0 + (((C) >> 6) & 0x3), \
6452  4 + (((C) >> 0) & 0x3), \
6453  4 + (((C) >> 2) & 0x3), \
6454  4 + (((C) >> 4) & 0x3), \
6455  4 + (((C) >> 6) & 0x3), \
6456  8 + (((C) >> 0) & 0x3), \
6457  8 + (((C) >> 2) & 0x3), \
6458  8 + (((C) >> 4) & 0x3), \
6459  8 + (((C) >> 6) & 0x3), \
6460  12 + (((C) >> 0) & 0x3), \
6461  12 + (((C) >> 2) & 0x3), \
6462  12 + (((C) >> 4) & 0x3), \
6463  12 + (((C) >> 6) & 0x3)); })
6464 
6465 #define _mm512_mask_permute_ps(W, U, X, C) __extension__ ({ \
6466  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6467  (__v16sf)_mm512_permute_ps((X), (C)), \
6468  (__v16sf)(__m512)(W)); })
6469 
6470 #define _mm512_maskz_permute_ps(U, X, C) __extension__ ({ \
6471  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6472  (__v16sf)_mm512_permute_ps((X), (C)), \
6473  (__v16sf)_mm512_setzero_ps()); })
6474 
6475 static __inline__ __m512d __DEFAULT_FN_ATTRS
6476 _mm512_permutevar_pd(__m512d __A, __m512i __C)
6477 {
6478  return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C);
6479 }
6480 
6481 static __inline__ __m512d __DEFAULT_FN_ATTRS
6482 _mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
6483 {
6484  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
6485  (__v8df)_mm512_permutevar_pd(__A, __C),
6486  (__v8df)__W);
6487 }
6488 
6489 static __inline__ __m512d __DEFAULT_FN_ATTRS
6490 _mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
6491 {
6492  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
6493  (__v8df)_mm512_permutevar_pd(__A, __C),
6494  (__v8df)_mm512_setzero_pd());
6495 }
6496 
6497 static __inline__ __m512 __DEFAULT_FN_ATTRS
6498 _mm512_permutevar_ps(__m512 __A, __m512i __C)
6499 {
6500  return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C);
6501 }
6502 
6503 static __inline__ __m512 __DEFAULT_FN_ATTRS
6504 _mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
6505 {
6506  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
6507  (__v16sf)_mm512_permutevar_ps(__A, __C),
6508  (__v16sf)__W);
6509 }
6510 
6511 static __inline__ __m512 __DEFAULT_FN_ATTRS
6512 _mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
6513 {
6514  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
6515  (__v16sf)_mm512_permutevar_ps(__A, __C),
6516  (__v16sf)_mm512_setzero_ps());
6517 }
6518 
6519 static __inline __m512d __DEFAULT_FN_ATTRS
6520 _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
6521 {
6522  return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6523  /* idx */ ,
6524  (__v8df) __A,
6525  (__v8df) __B,
6526  (__mmask8) -1);
6527 }
6528 
6529 static __inline__ __m512d __DEFAULT_FN_ATTRS
6530 _mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I, __m512d __B)
6531 {
6532  return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6533  /* idx */ ,
6534  (__v8df) __A,
6535  (__v8df) __B,
6536  (__mmask8) __U);
6537 }
6538 
6539 static __inline__ __m512d __DEFAULT_FN_ATTRS
6540 _mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I,
6541  __m512d __B)
6542 {
6543  return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I
6544  /* idx */ ,
6545  (__v8df) __A,
6546  (__v8df) __B,
6547  (__mmask8) __U);
6548 }
6549 
6550 static __inline __m512 __DEFAULT_FN_ATTRS
6551 _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
6552 {
6553  return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6554  /* idx */ ,
6555  (__v16sf) __A,
6556  (__v16sf) __B,
6557  (__mmask16) -1);
6558 }
6559 
6560 static __inline__ __m512 __DEFAULT_FN_ATTRS
6561 _mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
6562 {
6563  return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6564  /* idx */ ,
6565  (__v16sf) __A,
6566  (__v16sf) __B,
6567  (__mmask16) __U);
6568 }
6569 
6570 static __inline__ __m512 __DEFAULT_FN_ATTRS
6571 _mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I,
6572  __m512 __B)
6573 {
6574  return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I
6575  /* idx */ ,
6576  (__v16sf) __A,
6577  (__v16sf) __B,
6578  (__mmask16) __U);
6579 }
6580 
6581 
6582 #define _mm512_cvtt_roundpd_epu32(A, R) __extension__ ({ \
6583  (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6584  (__v8si)_mm256_undefined_si256(), \
6585  (__mmask8)-1, (int)(R)); })
6586 
6587 #define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) __extension__ ({ \
6588  (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6589  (__v8si)(__m256i)(W), \
6590  (__mmask8)(U), (int)(R)); })
6591 
6592 #define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) __extension__ ({ \
6593  (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6594  (__v8si)_mm256_setzero_si256(), \
6595  (__mmask8)(U), (int)(R)); })
6596 
6597 static __inline__ __m256i __DEFAULT_FN_ATTRS
6598 _mm512_cvttpd_epu32 (__m512d __A)
6599 {
6600  return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6601  (__v8si)
6603  (__mmask8) -1,
6605 }
6606 
6607 static __inline__ __m256i __DEFAULT_FN_ATTRS
6608 _mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
6609 {
6610  return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6611  (__v8si) __W,
6612  (__mmask8) __U,
6614 }
6615 
6616 static __inline__ __m256i __DEFAULT_FN_ATTRS
6617 _mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
6618 {
6619  return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6620  (__v8si)
6622  (__mmask8) __U,
6624 }
6625 
6626 #define _mm_roundscale_round_sd(A, B, imm, R) __extension__ ({ \
6627  (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6628  (__v2df)(__m128d)(B), \
6629  (__v2df)_mm_setzero_pd(), \
6630  (__mmask8)-1, (int)(imm), \
6631  (int)(R)); })
6632 
6633 #define _mm_roundscale_sd(A, B, imm) __extension__ ({ \
6634  (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6635  (__v2df)(__m128d)(B), \
6636  (__v2df)_mm_setzero_pd(), \
6637  (__mmask8)-1, (int)(imm), \
6638  _MM_FROUND_CUR_DIRECTION); })
6639 
6640 #define _mm_mask_roundscale_sd(W, U, A, B, imm) __extension__ ({ \
6641  (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6642  (__v2df)(__m128d)(B), \
6643  (__v2df)(__m128d)(W), \
6644  (__mmask8)(U), (int)(imm), \
6645  _MM_FROUND_CUR_DIRECTION); })
6646 
6647 #define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) __extension__ ({ \
6648  (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6649  (__v2df)(__m128d)(B), \
6650  (__v2df)(__m128d)(W), \
6651  (__mmask8)(U), (int)(I), \
6652  (int)(R)); })
6653 
6654 #define _mm_maskz_roundscale_sd(U, A, B, I) __extension__ ({ \
6655  (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6656  (__v2df)(__m128d)(B), \
6657  (__v2df)_mm_setzero_pd(), \
6658  (__mmask8)(U), (int)(I), \
6659  _MM_FROUND_CUR_DIRECTION); })
6660 
6661 #define _mm_maskz_roundscale_round_sd(U, A, B, I, R) __extension__ ({ \
6662  (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6663  (__v2df)(__m128d)(B), \
6664  (__v2df)_mm_setzero_pd(), \
6665  (__mmask8)(U), (int)(I), \
6666  (int)(R)); })
6667 
6668 #define _mm_roundscale_round_ss(A, B, imm, R) __extension__ ({ \
6669  (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6670  (__v4sf)(__m128)(B), \
6671  (__v4sf)_mm_setzero_ps(), \
6672  (__mmask8)-1, (int)(imm), \
6673  (int)(R)); })
6674 
6675 #define _mm_roundscale_ss(A, B, imm) __extension__ ({ \
6676  (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6677  (__v4sf)(__m128)(B), \
6678  (__v4sf)_mm_setzero_ps(), \
6679  (__mmask8)-1, (int)(imm), \
6680  _MM_FROUND_CUR_DIRECTION); })
6681 
6682 #define _mm_mask_roundscale_ss(W, U, A, B, I) __extension__ ({ \
6683  (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6684  (__v4sf)(__m128)(B), \
6685  (__v4sf)(__m128)(W), \
6686  (__mmask8)(U), (int)(I), \
6687  _MM_FROUND_CUR_DIRECTION); })
6688 
6689 #define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) __extension__ ({ \
6690  (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6691  (__v4sf)(__m128)(B), \
6692  (__v4sf)(__m128)(W), \
6693  (__mmask8)(U), (int)(I), \
6694  (int)(R)); })
6695 
6696 #define _mm_maskz_roundscale_ss(U, A, B, I) __extension__ ({ \
6697  (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6698  (__v4sf)(__m128)(B), \
6699  (__v4sf)_mm_setzero_ps(), \
6700  (__mmask8)(U), (int)(I), \
6701  _MM_FROUND_CUR_DIRECTION); })
6702 
6703 #define _mm_maskz_roundscale_round_ss(U, A, B, I, R) __extension__ ({ \
6704  (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6705  (__v4sf)(__m128)(B), \
6706  (__v4sf)_mm_setzero_ps(), \
6707  (__mmask8)(U), (int)(I), \
6708  (int)(R)); })
6709 
6710 #define _mm512_scalef_round_pd(A, B, R) __extension__ ({ \
6711  (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6712  (__v8df)(__m512d)(B), \
6713  (__v8df)_mm512_undefined_pd(), \
6714  (__mmask8)-1, (int)(R)); })
6715 
6716 #define _mm512_mask_scalef_round_pd(W, U, A, B, R) __extension__ ({ \
6717  (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6718  (__v8df)(__m512d)(B), \
6719  (__v8df)(__m512d)(W), \
6720  (__mmask8)(U), (int)(R)); })
6721 
6722 #define _mm512_maskz_scalef_round_pd(U, A, B, R) __extension__ ({ \
6723  (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6724  (__v8df)(__m512d)(B), \
6725  (__v8df)_mm512_setzero_pd(), \
6726  (__mmask8)(U), (int)(R)); })
6727 
6728 static __inline__ __m512d __DEFAULT_FN_ATTRS
6729 _mm512_scalef_pd (__m512d __A, __m512d __B)
6730 {
6731  return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6732  (__v8df) __B,
6733  (__v8df)
6735  (__mmask8) -1,
6737 }
6738 
6739 static __inline__ __m512d __DEFAULT_FN_ATTRS
6740 _mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
6741 {
6742  return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6743  (__v8df) __B,
6744  (__v8df) __W,
6745  (__mmask8) __U,
6747 }
6748 
6749 static __inline__ __m512d __DEFAULT_FN_ATTRS
6750 _mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
6751 {
6752  return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6753  (__v8df) __B,
6754  (__v8df)
6755  _mm512_setzero_pd (),
6756  (__mmask8) __U,
6758 }
6759 
6760 #define _mm512_scalef_round_ps(A, B, R) __extension__ ({ \
6761  (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6762  (__v16sf)(__m512)(B), \
6763  (__v16sf)_mm512_undefined_ps(), \
6764  (__mmask16)-1, (int)(R)); })
6765 
6766 #define _mm512_mask_scalef_round_ps(W, U, A, B, R) __extension__ ({ \
6767  (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6768  (__v16sf)(__m512)(B), \
6769  (__v16sf)(__m512)(W), \
6770  (__mmask16)(U), (int)(R)); })
6771 
6772 #define _mm512_maskz_scalef_round_ps(U, A, B, R) __extension__ ({ \
6773  (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6774  (__v16sf)(__m512)(B), \
6775  (__v16sf)_mm512_setzero_ps(), \
6776  (__mmask16)(U), (int)(R)); })
6777 
6778 static __inline__ __m512 __DEFAULT_FN_ATTRS
6779 _mm512_scalef_ps (__m512 __A, __m512 __B)
6780 {
6781  return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6782  (__v16sf) __B,
6783  (__v16sf)
6785  (__mmask16) -1,
6787 }
6788 
6789 static __inline__ __m512 __DEFAULT_FN_ATTRS
6790 _mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
6791 {
6792  return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6793  (__v16sf) __B,
6794  (__v16sf) __W,
6795  (__mmask16) __U,
6797 }
6798 
6799 static __inline__ __m512 __DEFAULT_FN_ATTRS
6800 _mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
6801 {
6802  return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6803  (__v16sf) __B,
6804  (__v16sf)
6805  _mm512_setzero_ps (),
6806  (__mmask16) __U,
6808 }
6809 
6810 #define _mm_scalef_round_sd(A, B, R) __extension__ ({ \
6811  (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6812  (__v2df)(__m128d)(B), \
6813  (__v2df)_mm_setzero_pd(), \
6814  (__mmask8)-1, (int)(R)); })
6815 
6816 static __inline__ __m128d __DEFAULT_FN_ATTRS
6817 _mm_scalef_sd (__m128d __A, __m128d __B)
6818 {
6819  return (__m128d) __builtin_ia32_scalefsd_round_mask ((__v2df) __A,
6820  (__v2df)( __B), (__v2df) _mm_setzero_pd(),
6821  (__mmask8) -1,
6823 }
6824 
6825 static __inline__ __m128d __DEFAULT_FN_ATTRS
6826 _mm_mask_scalef_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
6827 {
6828  return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6829  (__v2df) __B,
6830  (__v2df) __W,
6831  (__mmask8) __U,
6833 }
6834 
6835 #define _mm_mask_scalef_round_sd(W, U, A, B, R) __extension__ ({ \
6836  (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6837  (__v2df)(__m128d)(B), \
6838  (__v2df)(__m128d)(W), \
6839  (__mmask8)(U), (int)(R)); })
6840 
6841 static __inline__ __m128d __DEFAULT_FN_ATTRS
6842 _mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B)
6843 {
6844  return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6845  (__v2df) __B,
6846  (__v2df) _mm_setzero_pd (),
6847  (__mmask8) __U,
6849 }
6850 
6851 #define _mm_maskz_scalef_round_sd(U, A, B, R) __extension__ ({ \
6852  (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6853  (__v2df)(__m128d)(B), \
6854  (__v2df)_mm_setzero_pd(), \
6855  (__mmask8)(U), (int)(R)); })
6856 
6857 #define _mm_scalef_round_ss(A, B, R) __extension__ ({ \
6858  (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6859  (__v4sf)(__m128)(B), \
6860  (__v4sf)_mm_setzero_ps(), \
6861  (__mmask8)-1, (int)(R)); })
6862 
6863 static __inline__ __m128 __DEFAULT_FN_ATTRS
6864 _mm_scalef_ss (__m128 __A, __m128 __B)
6865 {
6866  return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A,
6867  (__v4sf)( __B), (__v4sf) _mm_setzero_ps(),
6868  (__mmask8) -1,
6870 }
6871 
6872 static __inline__ __m128 __DEFAULT_FN_ATTRS
6873 _mm_mask_scalef_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
6874 {
6875  return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6876  (__v4sf) __B,
6877  (__v4sf) __W,
6878  (__mmask8) __U,
6880 }
6881 
6882 #define _mm_mask_scalef_round_ss(W, U, A, B, R) __extension__ ({ \
6883  (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6884  (__v4sf)(__m128)(B), \
6885  (__v4sf)(__m128)(W), \
6886  (__mmask8)(U), (int)(R)); })
6887 
6888 static __inline__ __m128 __DEFAULT_FN_ATTRS
6889 _mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B)
6890 {
6891  return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6892  (__v4sf) __B,
6893  (__v4sf) _mm_setzero_ps (),
6894  (__mmask8) __U,
6896 }
6897 
6898 #define _mm_maskz_scalef_round_ss(U, A, B, R) __extension__ ({ \
6899  (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6900  (__v4sf)(__m128)(B), \
6901  (__v4sf)_mm_setzero_ps(), \
6902  (__mmask8)(U), \
6903  _MM_FROUND_CUR_DIRECTION); })
6904 
6905 static __inline__ __m512i __DEFAULT_FN_ATTRS
6906 _mm512_srai_epi32(__m512i __A, int __B)
6907 {
6908  return (__m512i)__builtin_ia32_psradi512((__v16si)__A, __B);
6909 }
6910 
6911 static __inline__ __m512i __DEFAULT_FN_ATTRS
6912 _mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
6913 {
6914  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, \
6915  (__v16si)_mm512_srai_epi32(__A, __B), \
6916  (__v16si)__W);
6917 }
6918 
6919 static __inline__ __m512i __DEFAULT_FN_ATTRS
6920 _mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, int __B) {
6921  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, \
6922  (__v16si)_mm512_srai_epi32(__A, __B), \
6923  (__v16si)_mm512_setzero_si512());
6924 }
6925 
6926 static __inline__ __m512i __DEFAULT_FN_ATTRS
6927 _mm512_srai_epi64(__m512i __A, int __B)
6928 {
6929  return (__m512i)__builtin_ia32_psraqi512((__v8di)__A, __B);
6930 }
6931 
6932 static __inline__ __m512i __DEFAULT_FN_ATTRS
6933 _mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
6934 {
6935  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, \
6936  (__v8di)_mm512_srai_epi64(__A, __B), \
6937  (__v8di)__W);
6938 }
6939 
6940 static __inline__ __m512i __DEFAULT_FN_ATTRS
6941 _mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, int __B)
6942 {
6943  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, \
6944  (__v8di)_mm512_srai_epi64(__A, __B), \
6945  (__v8di)_mm512_setzero_si512());
6946 }
6947 
6948 #define _mm512_shuffle_f32x4(A, B, imm) __extension__ ({ \
6949  (__m512)__builtin_shufflevector((__v16sf)(__m512)(A), \
6950  (__v16sf)(__m512)(B), \
6951  0 + ((((imm) >> 0) & 0x3) * 4), \
6952  1 + ((((imm) >> 0) & 0x3) * 4), \
6953  2 + ((((imm) >> 0) & 0x3) * 4), \
6954  3 + ((((imm) >> 0) & 0x3) * 4), \
6955  0 + ((((imm) >> 2) & 0x3) * 4), \
6956  1 + ((((imm) >> 2) & 0x3) * 4), \
6957  2 + ((((imm) >> 2) & 0x3) * 4), \
6958  3 + ((((imm) >> 2) & 0x3) * 4), \
6959  16 + ((((imm) >> 4) & 0x3) * 4), \
6960  17 + ((((imm) >> 4) & 0x3) * 4), \
6961  18 + ((((imm) >> 4) & 0x3) * 4), \
6962  19 + ((((imm) >> 4) & 0x3) * 4), \
6963  16 + ((((imm) >> 6) & 0x3) * 4), \
6964  17 + ((((imm) >> 6) & 0x3) * 4), \
6965  18 + ((((imm) >> 6) & 0x3) * 4), \
6966  19 + ((((imm) >> 6) & 0x3) * 4)); })
6967 
6968 #define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) __extension__ ({ \
6969  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6970  (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
6971  (__v16sf)(__m512)(W)); })
6972 
6973 #define _mm512_maskz_shuffle_f32x4(U, A, B, imm) __extension__ ({ \
6974  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6975  (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
6976  (__v16sf)_mm512_setzero_ps()); })
6977 
6978 #define _mm512_shuffle_f64x2(A, B, imm) __extension__ ({ \
6979  (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \
6980  (__v8df)(__m512d)(B), \
6981  0 + ((((imm) >> 0) & 0x3) * 2), \
6982  1 + ((((imm) >> 0) & 0x3) * 2), \
6983  0 + ((((imm) >> 2) & 0x3) * 2), \
6984  1 + ((((imm) >> 2) & 0x3) * 2), \
6985  8 + ((((imm) >> 4) & 0x3) * 2), \
6986  9 + ((((imm) >> 4) & 0x3) * 2), \
6987  8 + ((((imm) >> 6) & 0x3) * 2), \
6988  9 + ((((imm) >> 6) & 0x3) * 2)); })
6989 
6990 #define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) __extension__ ({ \
6991  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6992  (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
6993  (__v8df)(__m512d)(W)); })
6994 
6995 #define _mm512_maskz_shuffle_f64x2(U, A, B, imm) __extension__ ({ \
6996  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6997  (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
6998  (__v8df)_mm512_setzero_pd()); })
6999 
7000 #define _mm512_shuffle_i32x4(A, B, imm) __extension__ ({ \
7001  (__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \
7002  (__v8di)(__m512i)(B), \
7003  0 + ((((imm) >> 0) & 0x3) * 2), \
7004  1 + ((((imm) >> 0) & 0x3) * 2), \
7005  0 + ((((imm) >> 2) & 0x3) * 2), \
7006  1 + ((((imm) >> 2) & 0x3) * 2), \
7007  8 + ((((imm) >> 4) & 0x3) * 2), \
7008  9 + ((((imm) >> 4) & 0x3) * 2), \
7009  8 + ((((imm) >> 6) & 0x3) * 2), \
7010  9 + ((((imm) >> 6) & 0x3) * 2)); })
7011 
7012 #define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) __extension__ ({ \
7013  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7014  (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
7015  (__v16si)(__m512i)(W)); })
7016 
7017 #define _mm512_maskz_shuffle_i32x4(U, A, B, imm) __extension__ ({ \
7018  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7019  (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
7020  (__v16si)_mm512_setzero_si512()); })
7021 
7022 #define _mm512_shuffle_i64x2(A, B, imm) __extension__ ({ \
7023  (__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \
7024  (__v8di)(__m512i)(B), \
7025  0 + ((((imm) >> 0) & 0x3) * 2), \
7026  1 + ((((imm) >> 0) & 0x3) * 2), \
7027  0 + ((((imm) >> 2) & 0x3) * 2), \
7028  1 + ((((imm) >> 2) & 0x3) * 2), \
7029  8 + ((((imm) >> 4) & 0x3) * 2), \
7030  9 + ((((imm) >> 4) & 0x3) * 2), \
7031  8 + ((((imm) >> 6) & 0x3) * 2), \
7032  9 + ((((imm) >> 6) & 0x3) * 2)); })
7033 
7034 #define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) __extension__ ({ \
7035  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7036  (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
7037  (__v8di)(__m512i)(W)); })
7038 
7039 #define _mm512_maskz_shuffle_i64x2(U, A, B, imm) __extension__ ({ \
7040  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7041  (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
7042  (__v8di)_mm512_setzero_si512()); })
7043 
7044 #define _mm512_shuffle_pd(A, B, M) __extension__ ({ \
7045  (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \
7046  (__v8df)(__m512d)(B), \
7047  0 + (((M) >> 0) & 0x1), \
7048  8 + (((M) >> 1) & 0x1), \
7049  2 + (((M) >> 2) & 0x1), \
7050  10 + (((M) >> 3) & 0x1), \
7051  4 + (((M) >> 4) & 0x1), \
7052  12 + (((M) >> 5) & 0x1), \
7053  6 + (((M) >> 6) & 0x1), \
7054  14 + (((M) >> 7) & 0x1)); })
7055 
7056 #define _mm512_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \
7057  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7058  (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
7059  (__v8df)(__m512d)(W)); })
7060 
7061 #define _mm512_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \
7062  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7063  (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
7064  (__v8df)_mm512_setzero_pd()); })
7065 
7066 #define _mm512_shuffle_ps(A, B, M) __extension__ ({ \
7067  (__m512d)__builtin_shufflevector((__v16sf)(__m512)(A), \
7068  (__v16sf)(__m512)(B), \
7069  0 + (((M) >> 0) & 0x3), \
7070  0 + (((M) >> 2) & 0x3), \
7071  16 + (((M) >> 4) & 0x3), \
7072  16 + (((M) >> 6) & 0x3), \
7073  4 + (((M) >> 0) & 0x3), \
7074  4 + (((M) >> 2) & 0x3), \
7075  20 + (((M) >> 4) & 0x3), \
7076  20 + (((M) >> 6) & 0x3), \
7077  8 + (((M) >> 0) & 0x3), \
7078  8 + (((M) >> 2) & 0x3), \
7079  24 + (((M) >> 4) & 0x3), \
7080  24 + (((M) >> 6) & 0x3), \
7081  12 + (((M) >> 0) & 0x3), \
7082  12 + (((M) >> 2) & 0x3), \
7083  28 + (((M) >> 4) & 0x3), \
7084  28 + (((M) >> 6) & 0x3)); })
7085 
7086 #define _mm512_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \
7087  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7088  (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
7089  (__v16sf)(__m512)(W)); })
7090 
7091 #define _mm512_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \
7092  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7093  (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
7094  (__v16sf)_mm512_setzero_ps()); })
7095 
7096 #define _mm_sqrt_round_sd(A, B, R) __extension__ ({ \
7097  (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
7098  (__v2df)(__m128d)(B), \
7099  (__v2df)_mm_setzero_pd(), \
7100  (__mmask8)-1, (int)(R)); })
7101 
7102 static __inline__ __m128d __DEFAULT_FN_ATTRS
7103 _mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7104 {
7105  return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
7106  (__v2df) __B,
7107  (__v2df) __W,
7108  (__mmask8) __U,
7110 }
7111 
7112 #define _mm_mask_sqrt_round_sd(W, U, A, B, R) __extension__ ({ \
7113  (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
7114  (__v2df)(__m128d)(B), \
7115  (__v2df)(__m128d)(W), \
7116  (__mmask8)(U), (int)(R)); })
7117 
7118 static __inline__ __m128d __DEFAULT_FN_ATTRS
7119 _mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B)
7120 {
7121  return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
7122  (__v2df) __B,
7123  (__v2df) _mm_setzero_pd (),
7124  (__mmask8) __U,
7126 }
7127 
7128 #define _mm_maskz_sqrt_round_sd(U, A, B, R) __extension__ ({ \
7129  (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
7130  (__v2df)(__m128d)(B), \
7131  (__v2df)_mm_setzero_pd(), \
7132  (__mmask8)(U), (int)(R)); })
7133 
7134 #define _mm_sqrt_round_ss(A, B, R) __extension__ ({ \
7135  (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
7136  (__v4sf)(__m128)(B), \
7137  (__v4sf)_mm_setzero_ps(), \
7138  (__mmask8)-1, (int)(R)); })
7139 
7140 static __inline__ __m128 __DEFAULT_FN_ATTRS
7141 _mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7142 {
7143  return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
7144  (__v4sf) __B,
7145  (__v4sf) __W,
7146  (__mmask8) __U,
7148 }
7149 
7150 #define _mm_mask_sqrt_round_ss(W, U, A, B, R) __extension__ ({ \
7151  (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
7152  (__v4sf)(__m128)(B), \
7153  (__v4sf)(__m128)(W), (__mmask8)(U), \
7154  (int)(R)); })
7155 
7156 static __inline__ __m128 __DEFAULT_FN_ATTRS
7157 _mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B)
7158 {
7159  return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
7160  (__v4sf) __B,
7161  (__v4sf) _mm_setzero_ps (),
7162  (__mmask8) __U,
7164 }
7165 
7166 #define _mm_maskz_sqrt_round_ss(U, A, B, R) __extension__ ({ \
7167  (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
7168  (__v4sf)(__m128)(B), \
7169  (__v4sf)_mm_setzero_ps(), \
7170  (__mmask8)(U), (int)(R)); })
7171 
7172 static __inline__ __m512 __DEFAULT_FN_ATTRS
7174 {
7175  return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
7176  0, 1, 2, 3, 0, 1, 2, 3,
7177  0, 1, 2, 3, 0, 1, 2, 3);
7178 }
7179 
7180 static __inline__ __m512 __DEFAULT_FN_ATTRS
7181 _mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A)
7182 {
7183  return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
7184  (__v16sf)_mm512_broadcast_f32x4(__A),
7185  (__v16sf)__O);
7186 }
7187 
7188 static __inline__ __m512 __DEFAULT_FN_ATTRS
7189 _mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A)
7190 {
7191  return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
7192  (__v16sf)_mm512_broadcast_f32x4(__A),
7193  (__v16sf)_mm512_setzero_ps());
7194 }
7195 
7196 static __inline__ __m512d __DEFAULT_FN_ATTRS
7198 {
7199  return (__m512d)__builtin_shufflevector((__v4df)__A, (__v4df)__A,
7200  0, 1, 2, 3, 0, 1, 2, 3);
7201 }
7202 
7203 static __inline__ __m512d __DEFAULT_FN_ATTRS
7204 _mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A)
7205 {
7206  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
7207  (__v8df)_mm512_broadcast_f64x4(__A),
7208  (__v8df)__O);
7209 }
7210 
7211 static __inline__ __m512d __DEFAULT_FN_ATTRS
7212 _mm512_maskz_broadcast_f64x4(__mmask8 __M, __m256d __A)
7213 {
7214  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
7215  (__v8df)_mm512_broadcast_f64x4(__A),
7216  (__v8df)_mm512_setzero_pd());
7217 }
7218 
7219 static __inline__ __m512i __DEFAULT_FN_ATTRS
7221 {
7222  return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
7223  0, 1, 2, 3, 0, 1, 2, 3,
7224  0, 1, 2, 3, 0, 1, 2, 3);
7225 }
7226 
7227 static __inline__ __m512i __DEFAULT_FN_ATTRS
7228 _mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A)
7229 {
7230  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
7231  (__v16si)_mm512_broadcast_i32x4(__A),
7232  (__v16si)__O);
7233 }
7234 
7235 static __inline__ __m512i __DEFAULT_FN_ATTRS
7236 _mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A)
7237 {
7238  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
7239  (__v16si)_mm512_broadcast_i32x4(__A),
7240  (__v16si)_mm512_setzero_si512());
7241 }
7242 
7243 static __inline__ __m512i __DEFAULT_FN_ATTRS
7245 {
7246  return (__m512i)__builtin_shufflevector((__v4di)__A, (__v4di)__A,
7247  0, 1, 2, 3, 0, 1, 2, 3);
7248 }
7249 
7250 static __inline__ __m512i __DEFAULT_FN_ATTRS
7251 _mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A)
7252 {
7253  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
7254  (__v8di)_mm512_broadcast_i64x4(__A),
7255  (__v8di)__O);
7256 }
7257 
7258 static __inline__ __m512i __DEFAULT_FN_ATTRS
7259 _mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A)
7260 {
7261  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
7262  (__v8di)_mm512_broadcast_i64x4(__A),
7263  (__v8di)_mm512_setzero_si512());
7264 }
7265 
7266 static __inline__ __m512d __DEFAULT_FN_ATTRS
7267 _mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
7268 {
7269  return (__m512d)__builtin_ia32_selectpd_512(__M,
7270  (__v8df) _mm512_broadcastsd_pd(__A),
7271  (__v8df) __O);
7272 }
7273 
7274 static __inline__ __m512d __DEFAULT_FN_ATTRS
7275 _mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
7276 {
7277  return (__m512d)__builtin_ia32_selectpd_512(__M,
7278  (__v8df) _mm512_broadcastsd_pd(__A),
7279  (__v8df) _mm512_setzero_pd());
7280 }
7281 
7282 static __inline__ __m512 __DEFAULT_FN_ATTRS
7283 _mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
7284 {
7285  return (__m512)__builtin_ia32_selectps_512(__M,
7286  (__v16sf) _mm512_broadcastss_ps(__A),
7287  (__v16sf) __O);
7288 }
7289 
7290 static __inline__ __m512 __DEFAULT_FN_ATTRS
7291 _mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
7292 {
7293  return (__m512)__builtin_ia32_selectps_512(__M,
7294  (__v16sf) _mm512_broadcastss_ps(__A),
7295  (__v16sf) _mm512_setzero_ps());
7296 }
7297 
7298 static __inline__ __m128i __DEFAULT_FN_ATTRS
7300 {
7301  return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
7302  (__v16qi) _mm_undefined_si128 (),
7303  (__mmask16) -1);
7304 }
7305 
7306 static __inline__ __m128i __DEFAULT_FN_ATTRS
7307 _mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
7308 {
7309  return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
7310  (__v16qi) __O, __M);
7311 }
7312 
7313 static __inline__ __m128i __DEFAULT_FN_ATTRS
7314 _mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
7315 {
7316  return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
7317  (__v16qi) _mm_setzero_si128 (),
7318  __M);
7319 }
7320 
7321 static __inline__ void __DEFAULT_FN_ATTRS
7322 _mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
7323 {
7324  __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7325 }
7326 
7327 static __inline__ __m256i __DEFAULT_FN_ATTRS
7329 {
7330  return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
7331  (__v16hi) _mm256_undefined_si256 (),
7332  (__mmask16) -1);
7333 }
7334 
7335 static __inline__ __m256i __DEFAULT_FN_ATTRS
7336 _mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
7337 {
7338  return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
7339  (__v16hi) __O, __M);
7340 }
7341 
7342 static __inline__ __m256i __DEFAULT_FN_ATTRS
7343 _mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
7344 {
7345  return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
7346  (__v16hi) _mm256_setzero_si256 (),
7347  __M);
7348 }
7349 
7350 static __inline__ void __DEFAULT_FN_ATTRS
7351 _mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
7352 {
7353  __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
7354 }
7355 
7356 static __inline__ __m128i __DEFAULT_FN_ATTRS
7358 {
7359  return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7360  (__v16qi) _mm_undefined_si128 (),
7361  (__mmask8) -1);
7362 }
7363 
7364 static __inline__ __m128i __DEFAULT_FN_ATTRS
7365 _mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7366 {
7367  return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7368  (__v16qi) __O, __M);
7369 }
7370 
7371 static __inline__ __m128i __DEFAULT_FN_ATTRS
7372 _mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
7373 {
7374  return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7375  (__v16qi) _mm_setzero_si128 (),
7376  __M);
7377 }
7378 
7379 static __inline__ void __DEFAULT_FN_ATTRS
7380 _mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
7381 {
7382  __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7383 }
7384 
7385 static __inline__ __m256i __DEFAULT_FN_ATTRS
7387 {
7388  return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7389  (__v8si) _mm256_undefined_si256 (),
7390  (__mmask8) -1);
7391 }
7392 
7393 static __inline__ __m256i __DEFAULT_FN_ATTRS
7394 _mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7395 {
7396  return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7397  (__v8si) __O, __M);
7398 }
7399 
7400 static __inline__ __m256i __DEFAULT_FN_ATTRS
7401 _mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
7402 {
7403  return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7404  (__v8si) _mm256_setzero_si256 (),
7405  __M);
7406 }
7407 
7408 static __inline__ void __DEFAULT_FN_ATTRS
7409 _mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
7410 {
7411  __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
7412 }
7413 
7414 static __inline__ __m128i __DEFAULT_FN_ATTRS
7416 {
7417  return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7418  (__v8hi) _mm_undefined_si128 (),
7419  (__mmask8) -1);
7420 }
7421 
7422 static __inline__ __m128i __DEFAULT_FN_ATTRS
7423 _mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7424 {
7425  return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7426  (__v8hi) __O, __M);
7427 }
7428 
7429 static __inline__ __m128i __DEFAULT_FN_ATTRS
7430 _mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
7431 {
7432  return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7433  (__v8hi) _mm_setzero_si128 (),
7434  __M);
7435 }
7436 
7437 static __inline__ void __DEFAULT_FN_ATTRS
7438 _mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
7439 {
7440  __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
7441 }
7442 
7443 static __inline__ __m128i __DEFAULT_FN_ATTRS
7445 {
7446  return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7447  (__v16qi) _mm_undefined_si128 (),
7448  (__mmask16) -1);
7449 }
7450 
7451 static __inline__ __m128i __DEFAULT_FN_ATTRS
7452 _mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
7453 {
7454  return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7455  (__v16qi) __O,
7456  __M);
7457 }
7458 
7459 static __inline__ __m128i __DEFAULT_FN_ATTRS
7460 _mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
7461 {
7462  return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7463  (__v16qi) _mm_setzero_si128 (),
7464  __M);
7465 }
7466 
7467 static __inline__ void __DEFAULT_FN_ATTRS
7468 _mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
7469 {
7470  __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7471 }
7472 
7473 static __inline__ __m256i __DEFAULT_FN_ATTRS
7475 {
7476  return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7477  (__v16hi) _mm256_undefined_si256 (),
7478  (__mmask16) -1);
7479 }
7480 
7481 static __inline__ __m256i __DEFAULT_FN_ATTRS
7482 _mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
7483 {
7484  return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7485  (__v16hi) __O,
7486  __M);
7487 }
7488 
7489 static __inline__ __m256i __DEFAULT_FN_ATTRS
7490 _mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
7491 {
7492  return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7493  (__v16hi) _mm256_setzero_si256 (),
7494  __M);
7495 }
7496 
7497 static __inline__ void __DEFAULT_FN_ATTRS
7498 _mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
7499 {
7500  __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
7501 }
7502 
7503 static __inline__ __m128i __DEFAULT_FN_ATTRS
7505 {
7506  return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7507  (__v16qi) _mm_undefined_si128 (),
7508  (__mmask8) -1);
7509 }
7510 
7511 static __inline__ __m128i __DEFAULT_FN_ATTRS
7512 _mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7513 {
7514  return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7515  (__v16qi) __O,
7516  __M);
7517 }
7518 
7519 static __inline__ __m128i __DEFAULT_FN_ATTRS
7520 _mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
7521 {
7522  return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7523  (__v16qi) _mm_setzero_si128 (),
7524  __M);
7525 }
7526 
7527 static __inline__ void __DEFAULT_FN_ATTRS
7528 _mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
7529 {
7530  __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7531 }
7532 
7533 static __inline__ __m256i __DEFAULT_FN_ATTRS
7535 {
7536  return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7537  (__v8si) _mm256_undefined_si256 (),
7538  (__mmask8) -1);
7539 }
7540 
7541 static __inline__ __m256i __DEFAULT_FN_ATTRS
7542 _mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7543 {
7544  return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7545  (__v8si) __O, __M);
7546 }
7547 
7548 static __inline__ __m256i __DEFAULT_FN_ATTRS
7549 _mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
7550 {
7551  return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7552  (__v8si) _mm256_setzero_si256 (),
7553  __M);
7554 }
7555 
7556 static __inline__ void __DEFAULT_FN_ATTRS
7557 _mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
7558 {
7559  __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
7560 }
7561 
7562 static __inline__ __m128i __DEFAULT_FN_ATTRS
7564 {
7565  return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7566  (__v8hi) _mm_undefined_si128 (),
7567  (__mmask8) -1);
7568 }
7569 
7570 static __inline__ __m128i __DEFAULT_FN_ATTRS
7571 _mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7572 {
7573  return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7574  (__v8hi) __O, __M);
7575 }
7576 
7577 static __inline__ __m128i __DEFAULT_FN_ATTRS
7578 _mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
7579 {
7580  return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7581  (__v8hi) _mm_setzero_si128 (),
7582  __M);
7583 }
7584 
7585 static __inline__ void __DEFAULT_FN_ATTRS
7586 _mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
7587 {
7588  __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
7589 }
7590 
7591 static __inline__ __m128i __DEFAULT_FN_ATTRS
7593 {
7594  return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7595  (__v16qi) _mm_undefined_si128 (),
7596  (__mmask16) -1);
7597 }
7598 
7599 static __inline__ __m128i __DEFAULT_FN_ATTRS
7600 _mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
7601 {
7602  return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7603  (__v16qi) __O, __M);
7604 }
7605 
7606 static __inline__ __m128i __DEFAULT_FN_ATTRS
7607 _mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
7608 {
7609  return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7610  (__v16qi) _mm_setzero_si128 (),
7611  __M);
7612 }
7613 
7614 static __inline__ void __DEFAULT_FN_ATTRS
7615 _mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
7616 {
7617  __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7618 }
7619 
7620 static __inline__ __m256i __DEFAULT_FN_ATTRS
7622 {
7623  return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7624  (__v16hi) _mm256_undefined_si256 (),
7625  (__mmask16) -1);
7626 }
7627 
7628 static __inline__ __m256i __DEFAULT_FN_ATTRS
7629 _mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
7630 {
7631  return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7632  (__v16hi) __O, __M);
7633 }
7634 
7635 static __inline__ __m256i __DEFAULT_FN_ATTRS
7636 _mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
7637 {
7638  return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7639  (__v16hi) _mm256_setzero_si256 (),
7640  __M);
7641 }
7642 
7643 static __inline__ void __DEFAULT_FN_ATTRS
7644 _mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
7645 {
7646  __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
7647 }
7648 
7649 static __inline__ __m128i __DEFAULT_FN_ATTRS
7651 {
7652  return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7653  (__v16qi) _mm_undefined_si128 (),
7654  (__mmask8) -1);
7655 }
7656 
7657 static __inline__ __m128i __DEFAULT_FN_ATTRS
7658 _mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7659 {
7660  return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7661  (__v16qi) __O, __M);
7662 }
7663 
7664 static __inline__ __m128i __DEFAULT_FN_ATTRS
7665 _mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
7666 {
7667  return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7668  (__v16qi) _mm_setzero_si128 (),
7669  __M);
7670 }
7671 
7672 static __inline__ void __DEFAULT_FN_ATTRS
7673 _mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
7674 {
7675  __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7676 }
7677 
7678 static __inline__ __m256i __DEFAULT_FN_ATTRS
7680 {
7681  return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7682  (__v8si) _mm256_undefined_si256 (),
7683  (__mmask8) -1);
7684 }
7685 
7686 static __inline__ __m256i __DEFAULT_FN_ATTRS
7687 _mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7688 {
7689  return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7690  (__v8si) __O, __M);
7691 }
7692 
7693 static __inline__ __m256i __DEFAULT_FN_ATTRS
7694 _mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
7695 {
7696  return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7697  (__v8si) _mm256_setzero_si256 (),
7698  __M);
7699 }
7700 
7701 static __inline__ void __DEFAULT_FN_ATTRS
7702 _mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
7703 {
7704  __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
7705 }
7706 
7707 static __inline__ __m128i __DEFAULT_FN_ATTRS
7709 {
7710  return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7711  (__v8hi) _mm_undefined_si128 (),
7712  (__mmask8) -1);
7713 }
7714 
7715 static __inline__ __m128i __DEFAULT_FN_ATTRS
7716 _mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7717 {
7718  return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7719  (__v8hi) __O, __M);
7720 }
7721 
7722 static __inline__ __m128i __DEFAULT_FN_ATTRS
7723 _mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
7724 {
7725  return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7726  (__v8hi) _mm_setzero_si128 (),
7727  __M);
7728 }
7729 
7730 static __inline__ void __DEFAULT_FN_ATTRS
7731 _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
7732 {
7733  __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
7734 }
7735 
7736 #define _mm512_extracti32x4_epi32(A, imm) __extension__ ({ \
7737  (__m128i)__builtin_shufflevector((__v16si)(__m512i)(A), \
7738  (__v16si)_mm512_undefined_epi32(), \
7739  0 + ((imm) & 0x3) * 4, \
7740  1 + ((imm) & 0x3) * 4, \
7741  2 + ((imm) & 0x3) * 4, \
7742  3 + ((imm) & 0x3) * 4); })
7743 
7744 #define _mm512_mask_extracti32x4_epi32(W, U, A, imm) __extension__ ({ \
7745  (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
7746  (__v4si)_mm512_extracti32x4_epi32((A), (imm)), \
7747  (__v4si)(W)); })
7748 
7749 #define _mm512_maskz_extracti32x4_epi32(U, A, imm) __extension__ ({ \
7750  (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
7751  (__v4si)_mm512_extracti32x4_epi32((A), (imm)), \
7752  (__v4si)_mm_setzero_si128()); })
7753 
7754 #define _mm512_extracti64x4_epi64(A, imm) __extension__ ({ \
7755  (__m256i)__builtin_shufflevector((__v8di)(__m512i)(A), \
7756  (__v8di)_mm512_undefined_epi32(), \
7757  ((imm) & 1) ? 4 : 0, \
7758  ((imm) & 1) ? 5 : 1, \
7759  ((imm) & 1) ? 6 : 2, \
7760  ((imm) & 1) ? 7 : 3); })
7761 
7762 #define _mm512_mask_extracti64x4_epi64(W, U, A, imm) __extension__ ({ \
7763  (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
7764  (__v4di)_mm512_extracti64x4_epi64((A), (imm)), \
7765  (__v4di)(W)); })
7766 
7767 #define _mm512_maskz_extracti64x4_epi64(U, A, imm) __extension__ ({ \
7768  (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
7769  (__v4di)_mm512_extracti64x4_epi64((A), (imm)), \
7770  (__v4di)_mm256_setzero_si256()); })
7771 
7772 #define _mm512_insertf64x4(A, B, imm) __extension__ ({ \
7773  (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \
7774  (__v8df)_mm512_castpd256_pd512((__m256d)(B)), \
7775  ((imm) & 0x1) ? 0 : 8, \
7776  ((imm) & 0x1) ? 1 : 9, \
7777  ((imm) & 0x1) ? 2 : 10, \
7778  ((imm) & 0x1) ? 3 : 11, \
7779  ((imm) & 0x1) ? 8 : 4, \
7780  ((imm) & 0x1) ? 9 : 5, \
7781  ((imm) & 0x1) ? 10 : 6, \
7782  ((imm) & 0x1) ? 11 : 7); })
7783 
7784 #define _mm512_mask_insertf64x4(W, U, A, B, imm) __extension__ ({ \
7785  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7786  (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7787  (__v8df)(W)); })
7788 
7789 #define _mm512_maskz_insertf64x4(U, A, B, imm) __extension__ ({ \
7790  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7791  (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7792  (__v8df)_mm512_setzero_pd()); })
7793 
7794 #define _mm512_inserti64x4(A, B, imm) __extension__ ({ \
7795  (__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \
7796  (__v8di)_mm512_castsi256_si512((__m256i)(B)), \
7797  ((imm) & 0x1) ? 0 : 8, \
7798  ((imm) & 0x1) ? 1 : 9, \
7799  ((imm) & 0x1) ? 2 : 10, \
7800  ((imm) & 0x1) ? 3 : 11, \
7801  ((imm) & 0x1) ? 8 : 4, \
7802  ((imm) & 0x1) ? 9 : 5, \
7803  ((imm) & 0x1) ? 10 : 6, \
7804  ((imm) & 0x1) ? 11 : 7); })
7805 
7806 #define _mm512_mask_inserti64x4(W, U, A, B, imm) __extension__ ({ \
7807  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7808  (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7809  (__v8di)(W)); })
7810 
7811 #define _mm512_maskz_inserti64x4(U, A, B, imm) __extension__ ({ \
7812  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7813  (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7814  (__v8di)_mm512_setzero_si512()); })
7815 
7816 #define _mm512_insertf32x4(A, B, imm) __extension__ ({ \
7817  (__m512)__builtin_shufflevector((__v16sf)(__m512)(A), \
7818  (__v16sf)_mm512_castps128_ps512((__m128)(B)),\
7819  (((imm) & 0x3) == 0) ? 16 : 0, \
7820  (((imm) & 0x3) == 0) ? 17 : 1, \
7821  (((imm) & 0x3) == 0) ? 18 : 2, \
7822  (((imm) & 0x3) == 0) ? 19 : 3, \
7823  (((imm) & 0x3) == 1) ? 16 : 4, \
7824  (((imm) & 0x3) == 1) ? 17 : 5, \
7825  (((imm) & 0x3) == 1) ? 18 : 6, \
7826  (((imm) & 0x3) == 1) ? 19 : 7, \
7827  (((imm) & 0x3) == 2) ? 16 : 8, \
7828  (((imm) & 0x3) == 2) ? 17 : 9, \
7829  (((imm) & 0x3) == 2) ? 18 : 10, \
7830  (((imm) & 0x3) == 2) ? 19 : 11, \
7831  (((imm) & 0x3) == 3) ? 16 : 12, \
7832  (((imm) & 0x3) == 3) ? 17 : 13, \
7833  (((imm) & 0x3) == 3) ? 18 : 14, \
7834  (((imm) & 0x3) == 3) ? 19 : 15); })
7835 
7836 #define _mm512_mask_insertf32x4(W, U, A, B, imm) __extension__ ({ \
7837  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7838  (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7839  (__v16sf)(W)); })
7840 
7841 #define _mm512_maskz_insertf32x4(U, A, B, imm) __extension__ ({ \
7842  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7843  (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7844  (__v16sf)_mm512_setzero_ps()); })
7845 
7846 #define _mm512_inserti32x4(A, B, imm) __extension__ ({ \
7847  (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \
7848  (__v16si)_mm512_castsi128_si512((__m128i)(B)),\
7849  (((imm) & 0x3) == 0) ? 16 : 0, \
7850  (((imm) & 0x3) == 0) ? 17 : 1, \
7851  (((imm) & 0x3) == 0) ? 18 : 2, \
7852  (((imm) & 0x3) == 0) ? 19 : 3, \
7853  (((imm) & 0x3) == 1) ? 16 : 4, \
7854  (((imm) & 0x3) == 1) ? 17 : 5, \
7855  (((imm) & 0x3) == 1) ? 18 : 6, \
7856  (((imm) & 0x3) == 1) ? 19 : 7, \
7857  (((imm) & 0x3) == 2) ? 16 : 8, \
7858  (((imm) & 0x3) == 2) ? 17 : 9, \
7859  (((imm) & 0x3) == 2) ? 18 : 10, \
7860  (((imm) & 0x3) == 2) ? 19 : 11, \
7861  (((imm) & 0x3) == 3) ? 16 : 12, \
7862  (((imm) & 0x3) == 3) ? 17 : 13, \
7863  (((imm) & 0x3) == 3) ? 18 : 14, \
7864  (((imm) & 0x3) == 3) ? 19 : 15); })
7865 
7866 #define _mm512_mask_inserti32x4(W, U, A, B, imm) __extension__ ({ \
7867  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7868  (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
7869  (__v16si)(W)); })
7870 
7871 #define _mm512_maskz_inserti32x4(U, A, B, imm) __extension__ ({ \
7872  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7873  (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
7874  (__v16si)_mm512_setzero_si512()); })
7875 
7876 #define _mm512_getmant_round_pd(A, B, C, R) __extension__ ({ \
7877  (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7878  (int)(((C)<<2) | (B)), \
7879  (__v8df)_mm512_undefined_pd(), \
7880  (__mmask8)-1, (int)(R)); })
7881 
7882 #define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) __extension__ ({ \
7883  (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7884  (int)(((C)<<2) | (B)), \
7885  (__v8df)(__m512d)(W), \
7886  (__mmask8)(U), (int)(R)); })
7887 
7888 #define _mm512_maskz_getmant_round_pd(U, A, B, C, R) __extension__ ({ \
7889  (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7890  (int)(((C)<<2) | (B)), \
7891  (__v8df)_mm512_setzero_pd(), \
7892  (__mmask8)(U), (int)(R)); })
7893 
7894 #define _mm512_getmant_pd(A, B, C) __extension__ ({ \
7895  (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7896  (int)(((C)<<2) | (B)), \
7897  (__v8df)_mm512_setzero_pd(), \
7898  (__mmask8)-1, \
7899  _MM_FROUND_CUR_DIRECTION); })
7900 
7901 #define _mm512_mask_getmant_pd(W, U, A, B, C) __extension__ ({ \
7902  (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7903  (int)(((C)<<2) | (B)), \
7904  (__v8df)(__m512d)(W), \
7905  (__mmask8)(U), \
7906  _MM_FROUND_CUR_DIRECTION); })
7907 
7908 #define _mm512_maskz_getmant_pd(U, A, B, C) __extension__ ({ \
7909  (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7910  (int)(((C)<<2) | (B)), \
7911  (__v8df)_mm512_setzero_pd(), \
7912  (__mmask8)(U), \
7913  _MM_FROUND_CUR_DIRECTION); })
7914 
7915 #define _mm512_getmant_round_ps(A, B, C, R) __extension__ ({ \
7916  (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7917  (int)(((C)<<2) | (B)), \
7918  (__v16sf)_mm512_undefined_ps(), \
7919  (__mmask16)-1, (int)(R)); })
7920 
7921 #define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) __extension__ ({ \
7922  (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7923  (int)(((C)<<2) | (B)), \
7924  (__v16sf)(__m512)(W), \
7925  (__mmask16)(U), (int)(R)); })
7926 
7927 #define _mm512_maskz_getmant_round_ps(U, A, B, C, R) __extension__ ({ \
7928  (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7929  (int)(((C)<<2) | (B)), \
7930  (__v16sf)_mm512_setzero_ps(), \
7931  (__mmask16)(U), (int)(R)); })
7932 
7933 #define _mm512_getmant_ps(A, B, C) __extension__ ({ \
7934  (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7935  (int)(((C)<<2)|(B)), \
7936  (__v16sf)_mm512_undefined_ps(), \
7937  (__mmask16)-1, \
7938  _MM_FROUND_CUR_DIRECTION); })
7939 
7940 #define _mm512_mask_getmant_ps(W, U, A, B, C) __extension__ ({ \
7941  (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7942  (int)(((C)<<2)|(B)), \
7943  (__v16sf)(__m512)(W), \
7944  (__mmask16)(U), \
7945  _MM_FROUND_CUR_DIRECTION); })
7946 
7947 #define _mm512_maskz_getmant_ps(U, A, B, C) __extension__ ({ \
7948  (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7949  (int)(((C)<<2)|(B)), \
7950  (__v16sf)_mm512_setzero_ps(), \
7951  (__mmask16)(U), \
7952  _MM_FROUND_CUR_DIRECTION); })
7953 
7954 #define _mm512_getexp_round_pd(A, R) __extension__ ({ \
7955  (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7956  (__v8df)_mm512_undefined_pd(), \
7957  (__mmask8)-1, (int)(R)); })
7958 
7959 #define _mm512_mask_getexp_round_pd(W, U, A, R) __extension__ ({ \
7960  (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7961  (__v8df)(__m512d)(W), \
7962  (__mmask8)(U), (int)(R)); })
7963 
7964 #define _mm512_maskz_getexp_round_pd(U, A, R) __extension__ ({ \
7965  (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7966  (__v8df)_mm512_setzero_pd(), \
7967  (__mmask8)(U), (int)(R)); })
7968 
7969 static __inline__ __m512d __DEFAULT_FN_ATTRS
7970 _mm512_getexp_pd (__m512d __A)
7971 {
7972  return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7973  (__v8df) _mm512_undefined_pd (),
7974  (__mmask8) -1,
7976 }
7977 
7978 static __inline__ __m512d __DEFAULT_FN_ATTRS
7979 _mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
7980 {
7981  return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7982  (__v8df) __W,
7983  (__mmask8) __U,
7985 }
7986 
7987 static __inline__ __m512d __DEFAULT_FN_ATTRS
7988 _mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
7989 {
7990  return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7991  (__v8df) _mm512_setzero_pd (),
7992  (__mmask8) __U,
7994 }
7995 
7996 #define _mm512_getexp_round_ps(A, R) __extension__ ({ \
7997  (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7998  (__v16sf)_mm512_undefined_ps(), \
7999  (__mmask16)-1, (int)(R)); })
8000 
8001 #define _mm512_mask_getexp_round_ps(W, U, A, R) __extension__ ({ \
8002  (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8003  (__v16sf)(__m512)(W), \
8004  (__mmask16)(U), (int)(R)); })
8005 
8006 #define _mm512_maskz_getexp_round_ps(U, A, R) __extension__ ({ \
8007  (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8008  (__v16sf)_mm512_setzero_ps(), \
8009  (__mmask16)(U), (int)(R)); })
8010 
8011 static __inline__ __m512 __DEFAULT_FN_ATTRS
8012 _mm512_getexp_ps (__m512 __A)
8013 {
8014  return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8015  (__v16sf) _mm512_undefined_ps (),
8016  (__mmask16) -1,
8018 }
8019 
8020 static __inline__ __m512 __DEFAULT_FN_ATTRS
8021 _mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
8022 {
8023  return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8024  (__v16sf) __W,
8025  (__mmask16) __U,
8027 }
8028 
8029 static __inline__ __m512 __DEFAULT_FN_ATTRS
8030 _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
8031 {
8032  return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8033  (__v16sf) _mm512_setzero_ps (),
8034  (__mmask16) __U,
8036 }
8037 
8038 #define _mm512_i64gather_ps(index, addr, scale) __extension__ ({ \
8039  (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \
8040  (float const *)(addr), \
8041  (__v8di)(__m512i)(index), (__mmask8)-1, \
8042  (int)(scale)); })
8043 
8044 #define _mm512_mask_i64gather_ps(v1_old, mask, index, addr, scale) __extension__({\
8045  (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)(__m256)(v1_old),\
8046  (float const *)(addr), \
8047  (__v8di)(__m512i)(index), \
8048  (__mmask8)(mask), (int)(scale)); })
8049 
8050 #define _mm512_i64gather_epi32(index, addr, scale) __extension__ ({\
8051  (__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_ps(), \
8052  (int const *)(addr), \
8053  (__v8di)(__m512i)(index), \
8054  (__mmask8)-1, (int)(scale)); })
8055 
8056 #define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
8057  (__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \
8058  (int const *)(addr), \
8059  (__v8di)(__m512i)(index), \
8060  (__mmask8)(mask), (int)(scale)); })
8061 
8062 #define _mm512_i64gather_pd(index, addr, scale) __extension__ ({\
8063  (__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \
8064  (double const *)(addr), \
8065  (__v8di)(__m512i)(index), (__mmask8)-1, \
8066  (int)(scale)); })
8067 
8068 #define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
8069  (__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \
8070  (double const *)(addr), \
8071  (__v8di)(__m512i)(index), \
8072  (__mmask8)(mask), (int)(scale)); })
8073 
8074 #define _mm512_i64gather_epi64(index, addr, scale) __extension__ ({\
8075  (__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_pd(), \
8076  (long long const *)(addr), \
8077  (__v8di)(__m512i)(index), (__mmask8)-1, \
8078  (int)(scale)); })
8079 
8080 #define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
8081  (__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \
8082  (long long const *)(addr), \
8083  (__v8di)(__m512i)(index), \
8084  (__mmask8)(mask), (int)(scale)); })
8085 
8086 #define _mm512_i32gather_ps(index, addr, scale) __extension__ ({\
8087  (__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \
8088  (float const *)(addr), \
8089  (__v16sf)(__m512)(index), \
8090  (__mmask16)-1, (int)(scale)); })
8091 
8092 #define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\
8093  (__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \
8094  (float const *)(addr), \
8095  (__v16sf)(__m512)(index), \
8096  (__mmask16)(mask), (int)(scale)); })
8097 
8098 #define _mm512_i32gather_epi32(index, addr, scale) __extension__ ({\
8099  (__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \
8100  (int const *)(addr), \
8101  (__v16si)(__m512i)(index), \
8102  (__mmask16)-1, (int)(scale)); })
8103 
8104 #define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
8105  (__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \
8106  (int const *)(addr), \
8107  (__v16si)(__m512i)(index), \
8108  (__mmask16)(mask), (int)(scale)); })
8109 
8110 #define _mm512_i32gather_pd(index, addr, scale) __extension__ ({\
8111  (__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \
8112  (double const *)(addr), \
8113  (__v8si)(__m256i)(index), (__mmask8)-1, \
8114  (int)(scale)); })
8115 
8116 #define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
8117  (__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \
8118  (double const *)(addr), \
8119  (__v8si)(__m256i)(index), \
8120  (__mmask8)(mask), (int)(scale)); })
8121 
8122 #define _mm512_i32gather_epi64(index, addr, scale) __extension__ ({\
8123  (__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \
8124  (long long const *)(addr), \
8125  (__v8si)(__m256i)(index), (__mmask8)-1, \
8126  (int)(scale)); })
8127 
8128 #define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
8129  (__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \
8130  (long long const *)(addr), \
8131  (__v8si)(__m256i)(index), \
8132  (__mmask8)(mask), (int)(scale)); })
8133 
8134 #define _mm512_i64scatter_ps(addr, index, v1, scale) __extension__ ({\
8135  __builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)-1, \
8136  (__v8di)(__m512i)(index), \
8137  (__v8sf)(__m256)(v1), (int)(scale)); })
8138 
8139 #define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) __extension__ ({\
8140  __builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)(mask), \
8141  (__v8di)(__m512i)(index), \
8142  (__v8sf)(__m256)(v1), (int)(scale)); })
8143 
8144 #define _mm512_i64scatter_epi32(addr, index, v1, scale) __extension__ ({\
8145  __builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)-1, \
8146  (__v8di)(__m512i)(index), \
8147  (__v8si)(__m256i)(v1), (int)(scale)); })
8148 
8149 #define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) __extension__ ({\
8150  __builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)(mask), \
8151  (__v8di)(__m512i)(index), \
8152  (__v8si)(__m256i)(v1), (int)(scale)); })
8153 
8154 #define _mm512_i64scatter_pd(addr, index, v1, scale) __extension__ ({\
8155  __builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)-1, \
8156  (__v8di)(__m512i)(index), \
8157  (__v8df)(__m512d)(v1), (int)(scale)); })
8158 
8159 #define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) __extension__ ({\
8160  __builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)(mask), \
8161  (__v8di)(__m512i)(index), \
8162  (__v8df)(__m512d)(v1), (int)(scale)); })
8163 
8164 #define _mm512_i64scatter_epi64(addr, index, v1, scale) __extension__ ({\
8165  __builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)-1, \
8166  (__v8di)(__m512i)(index), \
8167  (__v8di)(__m512i)(v1), (int)(scale)); })
8168 
8169 #define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) __extension__ ({\
8170  __builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)(mask), \
8171  (__v8di)(__m512i)(index), \
8172  (__v8di)(__m512i)(v1), (int)(scale)); })
8173 
8174 #define _mm512_i32scatter_ps(addr, index, v1, scale) __extension__ ({\
8175  __builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)-1, \
8176  (__v16si)(__m512i)(index), \
8177  (__v16sf)(__m512)(v1), (int)(scale)); })
8178 
8179 #define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) __extension__ ({\
8180  __builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)(mask), \
8181  (__v16si)(__m512i)(index), \
8182  (__v16sf)(__m512)(v1), (int)(scale)); })
8183 
8184 #define _mm512_i32scatter_epi32(addr, index, v1, scale) __extension__ ({\
8185  __builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)-1, \
8186  (__v16si)(__m512i)(index), \
8187  (__v16si)(__m512i)(v1), (int)(scale)); })
8188 
8189 #define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) __extension__ ({\
8190  __builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)(mask), \
8191  (__v16si)(__m512i)(index), \
8192  (__v16si)(__m512i)(v1), (int)(scale)); })
8193 
8194 #define _mm512_i32scatter_pd(addr, index, v1, scale) __extension__ ({\
8195  __builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)-1, \
8196  (__v8si)(__m256i)(index), \
8197  (__v8df)(__m512d)(v1), (int)(scale)); })
8198 
8199 #define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) __extension__ ({\
8200  __builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)(mask), \
8201  (__v8si)(__m256i)(index), \
8202  (__v8df)(__m512d)(v1), (int)(scale)); })
8203 
8204 #define _mm512_i32scatter_epi64(addr, index, v1, scale) __extension__ ({\
8205  __builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)-1, \
8206  (__v8si)(__m256i)(index), \
8207  (__v8di)(__m512i)(v1), (int)(scale)); })
8208 
8209 #define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) __extension__ ({\
8210  __builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)(mask), \
8211  (__v8si)(__m256i)(index), \
8212  (__v8di)(__m512i)(v1), (int)(scale)); })
8213 
8214 static __inline__ __m128 __DEFAULT_FN_ATTRS
8215 _mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8216 {
8217  return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
8218  (__v4sf) __A,
8219  (__v4sf) __B,
8220  (__mmask8) __U,
8222 }
8223 
8224 #define _mm_mask_fmadd_round_ss(W, U, A, B, R) __extension__({\
8225  (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
8226  (__v4sf)(__m128)(A), \
8227  (__v4sf)(__m128)(B), (__mmask8)(U), \
8228  (int)(R)); })
8229 
8230 static __inline__ __m128 __DEFAULT_FN_ATTRS
8231 _mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
8232 {
8233  return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __A,
8234  (__v4sf) __B,
8235  (__v4sf) __C,
8236  (__mmask8) __U,
8238 }
8239 
8240 #define _mm_maskz_fmadd_round_ss(U, A, B, C, R) __extension__ ({\
8241  (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
8242  (__v4sf)(__m128)(B), \
8243  (__v4sf)(__m128)(C), (__mmask8)(U), \
8244  _MM_FROUND_CUR_DIRECTION); })
8245 
8246 static __inline__ __m128 __DEFAULT_FN_ATTRS
8247 _mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
8248 {
8249  return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
8250  (__v4sf) __X,
8251  (__v4sf) __Y,
8252  (__mmask8) __U,
8254 }
8255 
8256 #define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) __extension__ ({\
8257  (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
8258  (__v4sf)(__m128)(X), \
8259  (__v4sf)(__m128)(Y), (__mmask8)(U), \
8260  (int)(R)); })
8261 
8262 static __inline__ __m128 __DEFAULT_FN_ATTRS
8263 _mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8264 {
8265  return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
8266  (__v4sf) __A,
8267  -(__v4sf) __B,
8268  (__mmask8) __U,
8270 }
8271 
8272 #define _mm_mask_fmsub_round_ss(W, U, A, B, R) __extension__ ({\
8273  (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
8274  (__v4sf)(__m128)(A), \
8275  (__v4sf)(__m128)(B), (__mmask8)(U), \
8276  (int)(R)); })
8277 
8278 static __inline__ __m128 __DEFAULT_FN_ATTRS
8279 _mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
8280 {
8281  return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __A,
8282  (__v4sf) __B,
8283  -(__v4sf) __C,
8284  (__mmask8) __U,
8286 }
8287 
8288 #define _mm_maskz_fmsub_round_ss(U, A, B, C, R) __extension__ ({\
8289  (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
8290  (__v4sf)(__m128)(B), \
8291  -(__v4sf)(__m128)(C), (__mmask8)(U), \
8292  (int)(R)); })
8293 
8294 static __inline__ __m128 __DEFAULT_FN_ATTRS
8295 _mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
8296 {
8297  return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
8298  (__v4sf) __X,
8299  (__v4sf) __Y,
8300  (__mmask8) __U,
8302 }
8303 
8304 #define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) __extension__ ({\
8305  (__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
8306  (__v4sf)(__m128)(X), \
8307  (__v4sf)(__m128)(Y), (__mmask8)(U), \
8308  (int)(R)); })
8309 
8310 static __inline__ __m128 __DEFAULT_FN_ATTRS
8311 _mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8312 {
8313  return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
8314  -(__v4sf) __A,
8315  (__v4sf) __B,
8316  (__mmask8) __U,
8318 }
8319 
8320 #define _mm_mask_fnmadd_round_ss(W, U, A, B, R) __extension__ ({\
8321  (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
8322  -(__v4sf)(__m128)(A), \
8323  (__v4sf)(__m128)(B), (__mmask8)(U), \
8324  (int)(R)); })
8325 
8326 static __inline__ __m128 __DEFAULT_FN_ATTRS
8327 _mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
8328 {
8329  return (__m128) __builtin_ia32_vfmaddss3_maskz (-(__v4sf) __A,
8330  (__v4sf) __B,
8331  (__v4sf) __C,
8332  (__mmask8) __U,
8334 }
8335 
8336 #define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) __extension__ ({\
8337  (__m128)__builtin_ia32_vfmaddss3_maskz(-(__v4sf)(__m128)(A), \
8338  (__v4sf)(__m128)(B), \
8339  (__v4sf)(__m128)(C), (__mmask8)(U), \
8340  (int)(R)); })
8341 
8342 static __inline__ __m128 __DEFAULT_FN_ATTRS
8343 _mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
8344 {
8345  return (__m128) __builtin_ia32_vfmaddss3_mask3 (-(__v4sf) __W,
8346  (__v4sf) __X,
8347  (__v4sf) __Y,
8348  (__mmask8) __U,
8350 }
8351 
8352 #define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) __extension__({\
8353  (__m128)__builtin_ia32_vfmaddss3_mask3(-(__v4sf)(__m128)(W), \
8354  (__v4sf)(__m128)(X), \
8355  (__v4sf)(__m128)(Y), (__mmask8)(U), \
8356  (int)(R)); })
8357 
8358 static __inline__ __m128 __DEFAULT_FN_ATTRS
8359 _mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8360 {
8361  return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
8362  -(__v4sf) __A,
8363  -(__v4sf) __B,
8364  (__mmask8) __U,
8366 }
8367 
8368 #define _mm_mask_fnmsub_round_ss(W, U, A, B, R) __extension__ ({\
8369  (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
8370  -(__v4sf)(__m128)(A), \
8371  -(__v4sf)(__m128)(B), (__mmask8)(U), \
8372  (int)(R)); })
8373 
8374 static __inline__ __m128 __DEFAULT_FN_ATTRS
8375 _mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
8376 {
8377  return (__m128) __builtin_ia32_vfmaddss3_maskz (-(__v4sf) __A,
8378  (__v4sf) __B,
8379  -(__v4sf) __C,
8380  (__mmask8) __U,
8382 }
8383 
8384 #define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) __extension__ ({\
8385  (__m128)__builtin_ia32_vfmaddss3_maskz(-(__v4sf)(__m128)(A), \
8386  (__v4sf)(__m128)(B), \
8387  -(__v4sf)(__m128)(C), (__mmask8)(U), \
8388  _MM_FROUND_CUR_DIRECTION); })
8389 
8390 static __inline__ __m128 __DEFAULT_FN_ATTRS
8391 _mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
8392 {
8393  return (__m128) __builtin_ia32_vfnmsubss3_mask3 ((__v4sf) __W,
8394  (__v4sf) __X,
8395  (__v4sf) __Y,
8396  (__mmask8) __U,
8398 }
8399 
8400 #define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) __extension__({\
8401  (__m128)__builtin_ia32_vfnmsubss3_mask3((__v4sf)(__m128)(W), \
8402  (__v4sf)(__m128)(X), \
8403  (__v4sf)(__m128)(Y), (__mmask8)(U), \
8404  (int)(R)); })
8405 
8406 static __inline__ __m128d __DEFAULT_FN_ATTRS
8407 _mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8408 {
8409  return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
8410  (__v2df) __A,
8411  (__v2df) __B,
8412  (__mmask8) __U,
8414 }
8415 
8416 #define _mm_mask_fmadd_round_sd(W, U, A, B, R) __extension__({\
8417  (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8418  (__v2df)(__m128d)(A), \
8419  (__v2df)(__m128d)(B), (__mmask8)(U), \
8420  (int)(R)); })
8421 
8422 static __inline__ __m128d __DEFAULT_FN_ATTRS
8423 _mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8424 {
8425  return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( (__v2df) __A,
8426  (__v2df) __B,
8427  (__v2df) __C,
8428  (__mmask8) __U,
8430 }
8431 
8432 #define _mm_maskz_fmadd_round_sd(U, A, B, C, R) __extension__ ({\
8433  (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8434  (__v2df)(__m128d)(B), \
8435  (__v2df)(__m128d)(C), (__mmask8)(U), \
8436  _MM_FROUND_CUR_DIRECTION); })
8437 
8438 static __inline__ __m128d __DEFAULT_FN_ATTRS
8439 _mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8440 {
8441  return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
8442  (__v2df) __X,
8443  (__v2df) __Y,
8444  (__mmask8) __U,
8446 }
8447 
8448 #define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) __extension__ ({\
8449  (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
8450  (__v2df)(__m128d)(X), \
8451  (__v2df)(__m128d)(Y), (__mmask8)(U), \
8452  (int)(R)); })
8453 
8454 static __inline__ __m128d __DEFAULT_FN_ATTRS
8455 _mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8456 {
8457  return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
8458  (__v2df) __A,
8459  -(__v2df) __B,
8460  (__mmask8) __U,
8462 }
8463 
8464 #define _mm_mask_fmsub_round_sd(W, U, A, B, R) __extension__ ({\
8465  (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8466  (__v2df)(__m128d)(A), \
8467  -(__v2df)(__m128d)(B), (__mmask8)(U), \
8468  (int)(R)); })
8469 
8470 static __inline__ __m128d __DEFAULT_FN_ATTRS
8471 _mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8472 {
8473  return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( (__v2df) __A,
8474  (__v2df) __B,
8475  -(__v2df) __C,
8476  (__mmask8) __U,
8478 }
8479 
8480 #define _mm_maskz_fmsub_round_sd(U, A, B, C, R) __extension__ ({\
8481  (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8482  (__v2df)(__m128d)(B), \
8483  -(__v2df)(__m128d)(C), \
8484  (__mmask8)(U), (int)(R)); })
8485 
8486 static __inline__ __m128d __DEFAULT_FN_ATTRS
8487 _mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8488 {
8489  return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
8490  (__v2df) __X,
8491  (__v2df) __Y,
8492  (__mmask8) __U,
8494 }
8495 
8496 #define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) __extension__ ({\
8497  (__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
8498  (__v2df)(__m128d)(X), \
8499  (__v2df)(__m128d)(Y), \
8500  (__mmask8)(U), (int)(R)); })
8501 
8502 static __inline__ __m128d __DEFAULT_FN_ATTRS
8503 _mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8504 {
8505  return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
8506  -(__v2df) __A,
8507  (__v2df) __B,
8508  (__mmask8) __U,
8510 }
8511 
8512 #define _mm_mask_fnmadd_round_sd(W, U, A, B, R) __extension__ ({\
8513  (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8514  -(__v2df)(__m128d)(A), \
8515  (__v2df)(__m128d)(B), (__mmask8)(U), \
8516  (int)(R)); })
8517 
8518 static __inline__ __m128d __DEFAULT_FN_ATTRS
8519 _mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8520 {
8521  return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( -(__v2df) __A,
8522  (__v2df) __B,
8523  (__v2df) __C,
8524  (__mmask8) __U,
8526 }
8527 
8528 #define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) __extension__ ({\
8529  (__m128d)__builtin_ia32_vfmaddsd3_maskz(-(__v2df)(__m128d)(A), \
8530  (__v2df)(__m128d)(B), \
8531  (__v2df)(__m128d)(C), (__mmask8)(U), \
8532  (int)(R)); })
8533 
8534 static __inline__ __m128d __DEFAULT_FN_ATTRS
8535 _mm_mask3_fnmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8536 {
8537  return (__m128d) __builtin_ia32_vfmaddsd3_mask3 (-(__v2df) __W,
8538  (__v2df) __X,
8539  (__v2df) __Y,
8540  (__mmask8) __U,
8542 }
8543 
8544 #define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) __extension__({\
8545  (__m128d)__builtin_ia32_vfmaddsd3_mask3(-(__v2df)(__m128d)(W), \
8546  (__v2df)(__m128d)(X), \
8547  (__v2df)(__m128d)(Y), (__mmask8)(U), \
8548  (int)(R)); })
8549 
8550 static __inline__ __m128d __DEFAULT_FN_ATTRS
8551 _mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8552 {
8553  return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
8554  -(__v2df) __A,
8555  -(__v2df) __B,
8556  (__mmask8) __U,
8558 }
8559 
8560 #define _mm_mask_fnmsub_round_sd(W, U, A, B, R) __extension__ ({\
8561  (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8562  -(__v2df)(__m128d)(A), \
8563  -(__v2df)(__m128d)(B), (__mmask8)(U), \
8564  (int)(R)); })
8565 
8566 static __inline__ __m128d __DEFAULT_FN_ATTRS
8567 _mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8568 {
8569  return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( -(__v2df) __A,
8570  (__v2df) __B,
8571  -(__v2df) __C,
8572  (__mmask8) __U,
8574 }
8575 
8576 #define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) __extension__ ({\
8577  (__m128d)__builtin_ia32_vfmaddsd3_maskz(-(__v2df)(__m128d)(A), \
8578  (__v2df)(__m128d)(B), \
8579  -(__v2df)(__m128d)(C), \
8580  (__mmask8)(U), \
8581  _MM_FROUND_CUR_DIRECTION); })
8582 
8583 static __inline__ __m128d __DEFAULT_FN_ATTRS
8584 _mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8585 {
8586  return (__m128d) __builtin_ia32_vfnmsubsd3_mask3 ((__v2df) (__W),
8587  (__v2df) __X,
8588  (__v2df) (__Y),
8589  (__mmask8) __U,
8591 }
8592 
8593 #define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) __extension__({\
8594  (__m128d)__builtin_ia32_vfnmsubsd3_mask3((__v2df)(__m128d)(W), \
8595  (__v2df)(__m128d)(X), \
8596  (__v2df)(__m128d)(Y), \
8597  (__mmask8)(U), (int)(R)); })
8598 
8599 #define _mm512_permutex_pd(X, C) __extension__ ({ \
8600  (__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \
8601  (__v8df)_mm512_undefined_pd(), \
8602  0 + (((C) >> 0) & 0x3), \
8603  0 + (((C) >> 2) & 0x3), \
8604  0 + (((C) >> 4) & 0x3), \
8605  0 + (((C) >> 6) & 0x3), \
8606  4 + (((C) >> 0) & 0x3), \
8607  4 + (((C) >> 2) & 0x3), \
8608  4 + (((C) >> 4) & 0x3), \
8609  4 + (((C) >> 6) & 0x3)); })
8610 
8611 #define _mm512_mask_permutex_pd(W, U, X, C) __extension__ ({ \
8612  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8613  (__v8df)_mm512_permutex_pd((X), (C)), \
8614  (__v8df)(__m512d)(W)); })
8615 
8616 #define _mm512_maskz_permutex_pd(U, X, C) __extension__ ({ \
8617  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8618  (__v8df)_mm512_permutex_pd((X), (C)), \
8619  (__v8df)_mm512_setzero_pd()); })
8620 
8621 #define _mm512_permutex_epi64(X, C) __extension__ ({ \
8622  (__m512i)__builtin_shufflevector((__v8di)(__m512i)(X), \
8623  (__v8di)_mm512_undefined_epi32(), \
8624  0 + (((C) >> 0) & 0x3), \
8625  0 + (((C) >> 2) & 0x3), \
8626  0 + (((C) >> 4) & 0x3), \
8627  0 + (((C) >> 6) & 0x3), \
8628  4 + (((C) >> 0) & 0x3), \
8629  4 + (((C) >> 2) & 0x3), \
8630  4 + (((C) >> 4) & 0x3), \
8631  4 + (((C) >> 6) & 0x3)); })
8632 
8633 #define _mm512_mask_permutex_epi64(W, U, X, C) __extension__ ({ \
8634  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8635  (__v8di)_mm512_permutex_epi64((X), (C)), \
8636  (__v8di)(__m512i)(W)); })
8637 
8638 #define _mm512_maskz_permutex_epi64(U, X, C) __extension__ ({ \
8639  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8640  (__v8di)_mm512_permutex_epi64((X), (C)), \
8641  (__v8di)_mm512_setzero_si512()); })
8642 
8643 static __inline__ __m512d __DEFAULT_FN_ATTRS
8644 _mm512_permutexvar_pd (__m512i __X, __m512d __Y)
8645 {
8646  return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
8647  (__v8di) __X,
8648  (__v8df) _mm512_undefined_pd (),
8649  (__mmask8) -1);
8650 }
8651 
8652 static __inline__ __m512d __DEFAULT_FN_ATTRS
8653 _mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
8654 {
8655  return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
8656  (__v8di) __X,
8657  (__v8df) __W,
8658  (__mmask8) __U);
8659 }
8660 
8661 static __inline__ __m512d __DEFAULT_FN_ATTRS
8662 _mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
8663 {
8664  return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
8665  (__v8di) __X,
8666  (__v8df) _mm512_setzero_pd (),
8667  (__mmask8) __U);
8668 }
8669 
8670 static __inline__ __m512i __DEFAULT_FN_ATTRS
8671 _mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
8672 {
8673  return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
8674  (__v8di) __X,
8675  (__v8di) _mm512_setzero_si512 (),
8676  __M);
8677 }
8678 
8679 static __inline__ __m512i __DEFAULT_FN_ATTRS
8680 _mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
8681 {
8682  return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
8683  (__v8di) __X,
8684  (__v8di) _mm512_undefined_epi32 (),
8685  (__mmask8) -1);
8686 }
8687 
8688 static __inline__ __m512i __DEFAULT_FN_ATTRS
8689 _mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
8690  __m512i __Y)
8691 {
8692  return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
8693  (__v8di) __X,
8694  (__v8di) __W,
8695  __M);
8696 }
8697 
8698 static __inline__ __m512 __DEFAULT_FN_ATTRS
8699 _mm512_permutexvar_ps (__m512i __X, __m512 __Y)
8700 {
8701  return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
8702  (__v16si) __X,
8703  (__v16sf) _mm512_undefined_ps (),
8704  (__mmask16) -1);
8705 }
8706 
8707 static __inline__ __m512 __DEFAULT_FN_ATTRS
8708 _mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
8709 {
8710  return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
8711  (__v16si) __X,
8712  (__v16sf) __W,
8713  (__mmask16) __U);
8714 }
8715 
8716 static __inline__ __m512 __DEFAULT_FN_ATTRS
8717 _mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
8718 {
8719  return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
8720  (__v16si) __X,
8721  (__v16sf) _mm512_setzero_ps (),
8722  (__mmask16) __U);
8723 }
8724 
8725 static __inline__ __m512i __DEFAULT_FN_ATTRS
8726 _mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
8727 {
8728  return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
8729  (__v16si) __X,
8730  (__v16si) _mm512_setzero_si512 (),
8731  __M);
8732 }
8733 
8734 static __inline__ __m512i __DEFAULT_FN_ATTRS
8735 _mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
8736 {
8737  return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
8738  (__v16si) __X,
8739  (__v16si) _mm512_undefined_epi32 (),
8740  (__mmask16) -1);
8741 }
8742 
8743 #define _mm512_permutevar_epi32 _mm512_permutexvar_epi32
8744 
8745 static __inline__ __m512i __DEFAULT_FN_ATTRS
8746 _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
8747  __m512i __Y)
8748 {
8749  return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
8750  (__v16si) __X,
8751  (__v16si) __W,
8752  __M);
8753 }
8754 
8755 #define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32
8756 
8757 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8758 _mm512_kand (__mmask16 __A, __mmask16 __B)
8759 {
8760  return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
8761 }
8762 
8763 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8764 _mm512_kandn (__mmask16 __A, __mmask16 __B)
8765 {
8766  return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
8767 }
8768 
8769 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8770 _mm512_kor (__mmask16 __A, __mmask16 __B)
8771 {
8772  return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
8773 }
8774 
8775 static __inline__ int __DEFAULT_FN_ATTRS
8776 _mm512_kortestc (__mmask16 __A, __mmask16 __B)
8777 {
8778  return __builtin_ia32_kortestchi ((__mmask16) __A, (__mmask16) __B);
8779 }
8780 
8781 static __inline__ int __DEFAULT_FN_ATTRS
8782 _mm512_kortestz (__mmask16 __A, __mmask16 __B)
8783 {
8784  return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B);
8785 }
8786 
8787 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8788 _mm512_kunpackb (__mmask16 __A, __mmask16 __B)
8789 {
8790  return (__mmask16) (( __A & 0xFF) | ( __B << 8));
8791 }
8792 
8793 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8794 _mm512_kxnor (__mmask16 __A, __mmask16 __B)
8795 {
8796  return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
8797 }
8798 
8799 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8800 _mm512_kxor (__mmask16 __A, __mmask16 __B)
8801 {
8802  return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
8803 }
8804 
8805 static __inline__ void __DEFAULT_FN_ATTRS
8806 _mm512_stream_si512 (__m512i * __P, __m512i __A)
8807 {
8808  typedef __v8di __v8di_aligned __attribute__((aligned(64)));
8809  __builtin_nontemporal_store((__v8di_aligned)__A, (__v8di_aligned*)__P);
8810 }
8811 
8812 static __inline__ __m512i __DEFAULT_FN_ATTRS
8813 _mm512_stream_load_si512 (void const *__P)
8814 {
8815  typedef __v8di __v8di_aligned __attribute__((aligned(64)));
8816  return (__m512i) __builtin_nontemporal_load((const __v8di_aligned *)__P);
8817 }
8818 
8819 static __inline__ void __DEFAULT_FN_ATTRS
8820 _mm512_stream_pd (double *__P, __m512d __A)
8821 {
8822  typedef __v8df __v8df_aligned __attribute__((aligned(64)));
8823  __builtin_nontemporal_store((__v8df_aligned)__A, (__v8df_aligned*)__P);
8824 }
8825 
8826 static __inline__ void __DEFAULT_FN_ATTRS
8827 _mm512_stream_ps (float *__P, __m512 __A)
8828 {
8829  typedef __v16sf __v16sf_aligned __attribute__((aligned(64)));
8830  __builtin_nontemporal_store((__v16sf_aligned)__A, (__v16sf_aligned*)__P);
8831 }
8832 
8833 static __inline__ __m512d __DEFAULT_FN_ATTRS
8834 _mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
8835 {
8836  return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8837  (__v8df) __W,
8838  (__mmask8) __U);
8839 }
8840 
8841 static __inline__ __m512d __DEFAULT_FN_ATTRS
8842 _mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
8843 {
8844  return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8845  (__v8df)
8846  _mm512_setzero_pd (),
8847  (__mmask8) __U);
8848 }
8849 
8850 static __inline__ __m512i __DEFAULT_FN_ATTRS
8851 _mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
8852 {
8853  return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8854  (__v8di) __W,
8855  (__mmask8) __U);
8856 }
8857 
8858 static __inline__ __m512i __DEFAULT_FN_ATTRS
8859 _mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
8860 {
8861  return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8862  (__v8di)
8864  (__mmask8) __U);
8865 }
8866 
8867 static __inline__ __m512 __DEFAULT_FN_ATTRS
8868 _mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
8869 {
8870  return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8871  (__v16sf) __W,
8872  (__mmask16) __U);
8873 }
8874 
8875 static __inline__ __m512 __DEFAULT_FN_ATTRS
8876 _mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
8877 {
8878  return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8879  (__v16sf)
8880  _mm512_setzero_ps (),
8881  (__mmask16) __U);
8882 }
8883 
8884 static __inline__ __m512i __DEFAULT_FN_ATTRS
8885 _mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
8886 {
8887  return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8888  (__v16si) __W,
8889  (__mmask16) __U);
8890 }
8891 
8892 static __inline__ __m512i __DEFAULT_FN_ATTRS
8893 _mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
8894 {
8895  return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8896  (__v16si)
8898  (__mmask16) __U);
8899 }
8900 
8901 #define _mm_cmp_round_ss_mask(X, Y, P, R) __extension__ ({ \
8902  (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8903  (__v4sf)(__m128)(Y), (int)(P), \
8904  (__mmask8)-1, (int)(R)); })
8905 
8906 #define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) __extension__ ({ \
8907  (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8908  (__v4sf)(__m128)(Y), (int)(P), \
8909  (__mmask8)(M), (int)(R)); })
8910 
8911 #define _mm_cmp_ss_mask(X, Y, P) __extension__ ({ \
8912  (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8913  (__v4sf)(__m128)(Y), (int)(P), \
8914  (__mmask8)-1, \
8915  _MM_FROUND_CUR_DIRECTION); })
8916 
8917 #define _mm_mask_cmp_ss_mask(M, X, Y, P) __extension__ ({ \
8918  (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8919  (__v4sf)(__m128)(Y), (int)(P), \
8920  (__mmask8)(M), \
8921  _MM_FROUND_CUR_DIRECTION); })
8922 
8923 #define _mm_cmp_round_sd_mask(X, Y, P, R) __extension__ ({ \
8924  (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8925  (__v2df)(__m128d)(Y), (int)(P), \
8926  (__mmask8)-1, (int)(R)); })
8927 
8928 #define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) __extension__ ({ \
8929  (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8930  (__v2df)(__m128d)(Y), (int)(P), \
8931  (__mmask8)(M), (int)(R)); })
8932 
8933 #define _mm_cmp_sd_mask(X, Y, P) __extension__ ({ \
8934  (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8935  (__v2df)(__m128d)(Y), (int)(P), \
8936  (__mmask8)-1, \
8937  _MM_FROUND_CUR_DIRECTION); })
8938 
8939 #define _mm_mask_cmp_sd_mask(M, X, Y, P) __extension__ ({ \
8940  (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8941  (__v2df)(__m128d)(Y), (int)(P), \
8942  (__mmask8)(M), \
8943  _MM_FROUND_CUR_DIRECTION); })
8944 
8945 /* Bit Test */
8946 
8947 static __inline __mmask16 __DEFAULT_FN_ATTRS
8948 _mm512_test_epi32_mask (__m512i __A, __m512i __B)
8949 {
8950  return _mm512_cmpneq_epi32_mask (_mm512_and_epi32(__A, __B),
8952 }
8953 
8954 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8955 _mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8956 {
8957  return _mm512_mask_cmpneq_epi32_mask (__U, _mm512_and_epi32 (__A, __B),
8959 }
8960 
8961 static __inline __mmask8 __DEFAULT_FN_ATTRS
8962 _mm512_test_epi64_mask (__m512i __A, __m512i __B)
8963 {
8964  return _mm512_cmpneq_epi64_mask (_mm512_and_epi32 (__A, __B),
8966 }
8967 
8968 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
8969 _mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8970 {
8971  return _mm512_mask_cmpneq_epi64_mask (__U, _mm512_and_epi32 (__A, __B),
8973 }
8974 
8975 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8976 _mm512_testn_epi32_mask (__m512i __A, __m512i __B)
8977 {
8978  return _mm512_cmpeq_epi32_mask (_mm512_and_epi32 (__A, __B),
8980 }
8981 
8982 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8983 _mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8984 {
8985  return _mm512_mask_cmpeq_epi32_mask (__U, _mm512_and_epi32 (__A, __B),
8987 }
8988 
8989 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
8990 _mm512_testn_epi64_mask (__m512i __A, __m512i __B)
8991 {
8992  return _mm512_cmpeq_epi64_mask (_mm512_and_epi32 (__A, __B),
8994 }
8995 
8996 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
8997 _mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8998 {
8999  return _mm512_mask_cmpeq_epi64_mask (__U, _mm512_and_epi32 (__A, __B),
9001 }
9002 
9003 static __inline__ __m512 __DEFAULT_FN_ATTRS
9005 {
9006  return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
9007  1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15);
9008 }
9009 
9010 static __inline__ __m512 __DEFAULT_FN_ATTRS
9011 _mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
9012 {
9013  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
9014  (__v16sf)_mm512_movehdup_ps(__A),
9015  (__v16sf)__W);
9016 }
9017 
9018 static __inline__ __m512 __DEFAULT_FN_ATTRS
9019 _mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
9020 {
9021  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
9022  (__v16sf)_mm512_movehdup_ps(__A),
9023  (__v16sf)_mm512_setzero_ps());
9024 }
9025 
9026 static __inline__ __m512 __DEFAULT_FN_ATTRS
9028 {
9029  return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
9030  0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14);
9031 }
9032 
9033 static __inline__ __m512 __DEFAULT_FN_ATTRS
9034 _mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
9035 {
9036  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
9037  (__v16sf)_mm512_moveldup_ps(__A),
9038  (__v16sf)__W);
9039 }
9040 
9041 static __inline__ __m512 __DEFAULT_FN_ATTRS
9042 _mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
9043 {
9044  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
9045  (__v16sf)_mm512_moveldup_ps(__A),
9046  (__v16sf)_mm512_setzero_ps());
9047 }
9048 
9049 static __inline__ __m128 __DEFAULT_FN_ATTRS
9050 _mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
9051 {
9052  __m128 res = __A;
9053  res[0] = (__U & 1) ? __B[0] : __W[0];
9054  return res;
9055 }
9056 
9057 static __inline__ __m128 __DEFAULT_FN_ATTRS
9058 _mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B)
9059 {
9060  __m128 res = __A;
9061  res[0] = (__U & 1) ? __B[0] : 0;
9062  return res;
9063 }
9064 
9065 static __inline__ __m128d __DEFAULT_FN_ATTRS
9066 _mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
9067 {
9068  __m128d res = __A;
9069  res[0] = (__U & 1) ? __B[0] : __W[0];
9070  return res;
9071 }
9072 
9073 static __inline__ __m128d __DEFAULT_FN_ATTRS
9074 _mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B)
9075 {
9076  __m128d res = __A;
9077  res[0] = (__U & 1) ? __B[0] : 0;
9078  return res;
9079 }
9080 
9081 static __inline__ void __DEFAULT_FN_ATTRS
9082 _mm_mask_store_ss (float * __W, __mmask8 __U, __m128 __A)
9083 {
9084  __builtin_ia32_storess128_mask ((__v16sf *)__W,
9085  (__v16sf) _mm512_castps128_ps512(__A),
9086  (__mmask16) __U & (__mmask16)1);
9087 }
9088 
9089 static __inline__ void __DEFAULT_FN_ATTRS
9090 _mm_mask_store_sd (double * __W, __mmask8 __U, __m128d __A)
9091 {
9092  __builtin_ia32_storesd128_mask ((__v8df *)__W,
9093  (__v8df) _mm512_castpd128_pd512(__A),
9094  (__mmask8) __U & 1);
9095 }
9096 
9097 static __inline__ __m128 __DEFAULT_FN_ATTRS
9098 _mm_mask_load_ss (__m128 __W, __mmask8 __U, const float* __A)
9099 {
9100  __m128 src = (__v4sf) __builtin_shufflevector((__v4sf) __W,
9101  (__v4sf) {0.0, 0.0, 0.0, 0.0},
9102  0, 4, 4, 4);
9103 
9104  return (__m128) __builtin_shufflevector(
9105  __builtin_ia32_loadss128_mask ((__v16sf *) __A,
9106  (__v16sf) _mm512_castps128_ps512(src),
9107  (__mmask16) __U & 1),
9108  _mm512_undefined_ps(), 0, 1, 2, 3);
9109 }
9110 
9111 static __inline__ __m128 __DEFAULT_FN_ATTRS
9112 _mm_maskz_load_ss (__mmask8 __U, const float* __A)
9113 {
9114  return (__m128) __builtin_shufflevector(
9115  __builtin_ia32_loadss128_mask ((__v16sf *) __A,
9116  (__v16sf) _mm512_setzero_ps(),
9117  (__mmask16) __U & 1),
9118  _mm512_undefined_ps(), 0, 1, 2, 3);
9119 }
9120 
9121 static __inline__ __m128d __DEFAULT_FN_ATTRS
9122 _mm_mask_load_sd (__m128d __W, __mmask8 __U, const double* __A)
9123 {
9124  __m128d src = (__v2df) __builtin_shufflevector((__v2df) __W,
9125  (__v2df) {0.0, 0.0}, 0, 2);
9126 
9127  return (__m128d) __builtin_shufflevector(
9128  __builtin_ia32_loadsd128_mask ((__v8df *) __A,
9129  (__v8df) _mm512_castpd128_pd512(src),
9130  (__mmask8) __U & 1),
9131  _mm512_undefined_pd(), 0, 1);
9132 }
9133 
9134 static __inline__ __m128d __DEFAULT_FN_ATTRS
9135 _mm_maskz_load_sd (__mmask8 __U, const double* __A)
9136 {
9137  return (__m128d) __builtin_shufflevector(
9138  __builtin_ia32_loadsd128_mask ((__v8df *) __A,
9139  (__v8df) _mm512_setzero_pd(),
9140  (__mmask8) __U & 1),
9141  _mm512_undefined_pd(), 0, 1);
9142 }
9143 
9144 #define _mm512_shuffle_epi32(A, I) __extension__ ({ \
9145  (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \
9146  (__v16si)_mm512_undefined_epi32(), \
9147  0 + (((I) >> 0) & 0x3), \
9148  0 + (((I) >> 2) & 0x3), \
9149  0 + (((I) >> 4) & 0x3), \
9150  0 + (((I) >> 6) & 0x3), \
9151  4 + (((I) >> 0) & 0x3), \
9152  4 + (((I) >> 2) & 0x3), \
9153  4 + (((I) >> 4) & 0x3), \
9154  4 + (((I) >> 6) & 0x3), \
9155  8 + (((I) >> 0) & 0x3), \
9156  8 + (((I) >> 2) & 0x3), \
9157  8 + (((I) >> 4) & 0x3), \
9158  8 + (((I) >> 6) & 0x3), \
9159  12 + (((I) >> 0) & 0x3), \
9160  12 + (((I) >> 2) & 0x3), \
9161  12 + (((I) >> 4) & 0x3), \
9162  12 + (((I) >> 6) & 0x3)); })
9163 
9164 #define _mm512_mask_shuffle_epi32(W, U, A, I) __extension__ ({ \
9165  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
9166  (__v16si)_mm512_shuffle_epi32((A), (I)), \
9167  (__v16si)(__m512i)(W)); })
9168 
9169 #define _mm512_maskz_shuffle_epi32(U, A, I) __extension__ ({ \
9170  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
9171  (__v16si)_mm512_shuffle_epi32((A), (I)), \
9172  (__v16si)_mm512_setzero_si512()); })
9173 
9174 static __inline__ __m512d __DEFAULT_FN_ATTRS
9175 _mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
9176 {
9177  return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
9178  (__v8df) __W,
9179  (__mmask8) __U);
9180 }
9181 
9182 static __inline__ __m512d __DEFAULT_FN_ATTRS
9183 _mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
9184 {
9185  return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
9186  (__v8df) _mm512_setzero_pd (),
9187  (__mmask8) __U);
9188 }
9189 
9190 static __inline__ __m512i __DEFAULT_FN_ATTRS
9191 _mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
9192 {
9193  return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
9194  (__v8di) __W,
9195  (__mmask8) __U);
9196 }
9197 
9198 static __inline__ __m512i __DEFAULT_FN_ATTRS
9199 _mm512_maskz_expand_epi64 ( __mmask8 __U, __m512i __A)
9200 {
9201  return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
9202  (__v8di) _mm512_setzero_pd (),
9203  (__mmask8) __U);
9204 }
9205 
9206 static __inline__ __m512d __DEFAULT_FN_ATTRS
9207 _mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
9208 {
9209  return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
9210  (__v8df) __W,
9211  (__mmask8) __U);
9212 }
9213 
9214 static __inline__ __m512d __DEFAULT_FN_ATTRS
9215 _mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
9216 {
9217  return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
9218  (__v8df) _mm512_setzero_pd(),
9219  (__mmask8) __U);
9220 }
9221 
9222 static __inline__ __m512i __DEFAULT_FN_ATTRS
9223 _mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
9224 {
9225  return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
9226  (__v8di) __W,
9227  (__mmask8) __U);
9228 }
9229 
9230 static __inline__ __m512i __DEFAULT_FN_ATTRS
9231 _mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
9232 {
9233  return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
9234  (__v8di) _mm512_setzero_pd(),
9235  (__mmask8) __U);
9236 }
9237 
9238 static __inline__ __m512 __DEFAULT_FN_ATTRS
9239 _mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
9240 {
9241  return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
9242  (__v16sf) __W,
9243  (__mmask16) __U);
9244 }
9245 
9246 static __inline__ __m512 __DEFAULT_FN_ATTRS
9247 _mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P)
9248 {
9249  return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
9250  (__v16sf) _mm512_setzero_ps(),
9251  (__mmask16) __U);
9252 }
9253 
9254 static __inline__ __m512i __DEFAULT_FN_ATTRS
9255 _mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
9256 {
9257  return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
9258  (__v16si) __W,
9259  (__mmask16) __U);
9260 }
9261 
9262 static __inline__ __m512i __DEFAULT_FN_ATTRS
9263 _mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P)
9264 {
9265  return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
9266  (__v16si) _mm512_setzero_ps(),
9267  (__mmask16) __U);
9268 }
9269 
9270 static __inline__ __m512 __DEFAULT_FN_ATTRS
9271 _mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
9272 {
9273  return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
9274  (__v16sf) __W,
9275  (__mmask16) __U);
9276 }
9277 
9278 static __inline__ __m512 __DEFAULT_FN_ATTRS
9279 _mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
9280 {
9281  return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
9282  (__v16sf) _mm512_setzero_ps(),
9283  (__mmask16) __U);
9284 }
9285 
9286 static __inline__ __m512i __DEFAULT_FN_ATTRS
9287 _mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
9288 {
9289  return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
9290  (__v16si) __W,
9291  (__mmask16) __U);
9292 }
9293 
9294 static __inline__ __m512i __DEFAULT_FN_ATTRS
9295 _mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
9296 {
9297  return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
9298  (__v16si) _mm512_setzero_ps(),
9299  (__mmask16) __U);
9300 }
9301 
9302 #define _mm512_cvt_roundps_pd(A, R) __extension__ ({ \
9303  (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
9304  (__v8df)_mm512_undefined_pd(), \
9305  (__mmask8)-1, (int)(R)); })
9306 
9307 #define _mm512_mask_cvt_roundps_pd(W, U, A, R) __extension__ ({ \
9308  (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
9309  (__v8df)(__m512d)(W), \
9310  (__mmask8)(U), (int)(R)); })
9311 
9312 #define _mm512_maskz_cvt_roundps_pd(U, A, R) __extension__ ({ \
9313  (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
9314  (__v8df)_mm512_setzero_pd(), \
9315  (__mmask8)(U), (int)(R)); })
9316 
9317 static __inline__ __m512d __DEFAULT_FN_ATTRS
9318 _mm512_cvtps_pd (__m256 __A)
9319 {
9320  return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
9321  (__v8df)
9323  (__mmask8) -1,
9325 }
9326 
9327 static __inline__ __m512d __DEFAULT_FN_ATTRS
9328 _mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
9329 {
9330  return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
9331  (__v8df) __W,
9332  (__mmask8) __U,
9334 }
9335 
9336 static __inline__ __m512d __DEFAULT_FN_ATTRS
9337 _mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
9338 {
9339  return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
9340  (__v8df)
9341  _mm512_setzero_pd (),
9342  (__mmask8) __U,
9344 }
9345 
9346 static __inline__ __m512 __DEFAULT_FN_ATTRS
9347 _mm512_cvtpslo_pd (__m512 __A)
9348 {
9349  return (__m512) _mm512_cvtps_pd(_mm512_castps512_ps256(__A));
9350 }
9351 
9352 static __inline__ __m512 __DEFAULT_FN_ATTRS
9353 _mm512_mask_cvtpslo_pd (__m512d __W, __mmask8 __U, __m512 __A)
9354 {
9355  return (__m512) _mm512_mask_cvtps_pd(__W, __U, _mm512_castps512_ps256(__A));
9356 }
9357 
9358 static __inline__ __m512d __DEFAULT_FN_ATTRS
9359 _mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
9360 {
9361  return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
9362  (__v8df) __A,
9363  (__v8df) __W);
9364 }
9365 
9366 static __inline__ __m512d __DEFAULT_FN_ATTRS
9367 _mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
9368 {
9369  return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
9370  (__v8df) __A,
9371  (__v8df) _mm512_setzero_pd ());
9372 }
9373 
9374 static __inline__ __m512 __DEFAULT_FN_ATTRS
9375 _mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
9376 {
9377  return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
9378  (__v16sf) __A,
9379  (__v16sf) __W);
9380 }
9381 
9382 static __inline__ __m512 __DEFAULT_FN_ATTRS
9383 _mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
9384 {
9385  return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
9386  (__v16sf) __A,
9387  (__v16sf) _mm512_setzero_ps ());
9388 }
9389 
9390 static __inline__ void __DEFAULT_FN_ATTRS
9391 _mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
9392 {
9393  __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
9394  (__mmask8) __U);
9395 }
9396 
9397 static __inline__ void __DEFAULT_FN_ATTRS
9398 _mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
9399 {
9400  __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
9401  (__mmask8) __U);
9402 }
9403 
9404 static __inline__ void __DEFAULT_FN_ATTRS
9405 _mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
9406 {
9407  __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
9408  (__mmask16) __U);
9409 }
9410 
9411 static __inline__ void __DEFAULT_FN_ATTRS
9412 _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
9413 {
9414  __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
9415  (__mmask16) __U);
9416 }
9417 
9418 #define _mm_cvt_roundsd_ss(A, B, R) __extension__ ({ \
9419  (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9420  (__v2df)(__m128d)(B), \
9421  (__v4sf)_mm_undefined_ps(), \
9422  (__mmask8)-1, (int)(R)); })
9423 
9424 #define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) __extension__ ({ \
9425  (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9426  (__v2df)(__m128d)(B), \
9427  (__v4sf)(__m128)(W), \
9428  (__mmask8)(U), (int)(R)); })
9429 
9430 #define _mm_maskz_cvt_roundsd_ss(U, A, B, R) __extension__ ({ \
9431  (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9432  (__v2df)(__m128d)(B), \
9433  (__v4sf)_mm_setzero_ps(), \
9434  (__mmask8)(U), (int)(R)); })
9435 
9436 static __inline__ __m128 __DEFAULT_FN_ATTRS
9437 _mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
9438 {
9439  return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)(__A),
9440  (__v2df)(__B),
9441  (__v4sf)(__W),
9442  (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
9443 }
9444 
9445 static __inline__ __m128 __DEFAULT_FN_ATTRS
9446 _mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B)
9447 {
9448  return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)(__A),
9449  (__v2df)(__B),
9450  (__v4sf)_mm_setzero_ps(),
9451  (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
9452 }
9453 
9454 #define _mm_cvtss_i32 _mm_cvtss_si32
9455 #define _mm_cvtsd_i32 _mm_cvtsd_si32
9456 #define _mm_cvti32_sd _mm_cvtsi32_sd
9457 #define _mm_cvti32_ss _mm_cvtsi32_ss
9458 #ifdef __x86_64__
9459 #define _mm_cvtss_i64 _mm_cvtss_si64
9460 #define _mm_cvtsd_i64 _mm_cvtsd_si64
9461 #define _mm_cvti64_sd _mm_cvtsi64_sd
9462 #define _mm_cvti64_ss _mm_cvtsi64_ss
9463 #endif
9464 
9465 #ifdef __x86_64__
9466 #define _mm_cvt_roundi64_sd(A, B, R) __extension__ ({ \
9467  (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
9468  (int)(R)); })
9469 
9470 #define _mm_cvt_roundsi64_sd(A, B, R) __extension__ ({ \
9471  (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
9472  (int)(R)); })
9473 #endif
9474 
9475 #define _mm_cvt_roundsi32_ss(A, B, R) __extension__ ({ \
9476  (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)); })
9477 
9478 #define _mm_cvt_roundi32_ss(A, B, R) __extension__ ({ \
9479  (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)); })
9480 
9481 #ifdef __x86_64__
9482 #define _mm_cvt_roundsi64_ss(A, B, R) __extension__ ({ \
9483  (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
9484  (int)(R)); })
9485 
9486 #define _mm_cvt_roundi64_ss(A, B, R) __extension__ ({ \
9487  (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
9488  (int)(R)); })
9489 #endif
9490 
9491 #define _mm_cvt_roundss_sd(A, B, R) __extension__ ({ \
9492  (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9493  (__v4sf)(__m128)(B), \
9494  (__v2df)_mm_undefined_pd(), \
9495  (__mmask8)-1, (int)(R)); })
9496 
9497 #define _mm_mask_cvt_roundss_sd(W, U, A, B, R) __extension__ ({ \
9498  (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9499  (__v4sf)(__m128)(B), \
9500  (__v2df)(__m128d)(W), \
9501  (__mmask8)(U), (int)(R)); })
9502 
9503 #define _mm_maskz_cvt_roundss_sd(U, A, B, R) __extension__ ({ \
9504  (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9505  (__v4sf)(__m128)(B), \
9506  (__v2df)_mm_setzero_pd(), \
9507  (__mmask8)(U), (int)(R)); })
9508 
9509 static __inline__ __m128d __DEFAULT_FN_ATTRS
9510 _mm_mask_cvtss_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
9511 {
9512  return __builtin_ia32_cvtss2sd_round_mask((__v2df)(__A),
9513  (__v4sf)(__B),
9514  (__v2df)(__W),
9515  (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
9516 }
9517 
9518 static __inline__ __m128d __DEFAULT_FN_ATTRS
9519 _mm_maskz_cvtss_sd (__mmask8 __U, __m128d __A, __m128 __B)
9520 {
9521  return __builtin_ia32_cvtss2sd_round_mask((__v2df)(__A),
9522  (__v4sf)(__B),
9523  (__v2df)_mm_setzero_pd(),
9524  (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
9525 }
9526 
9527 static __inline__ __m128d __DEFAULT_FN_ATTRS
9528 _mm_cvtu32_sd (__m128d __A, unsigned __B)
9529 {
9530  return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
9531 }
9532 
9533 #ifdef __x86_64__
9534 #define _mm_cvt_roundu64_sd(A, B, R) __extension__ ({ \
9535  (__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \
9536  (unsigned long long)(B), (int)(R)); })
9537 
9538 static __inline__ __m128d __DEFAULT_FN_ATTRS
9539 _mm_cvtu64_sd (__m128d __A, unsigned long long __B)
9540 {
9541  return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
9543 }
9544 #endif
9545 
9546 #define _mm_cvt_roundu32_ss(A, B, R) __extension__ ({ \
9547  (__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \
9548  (int)(R)); })
9549 
9550 static __inline__ __m128 __DEFAULT_FN_ATTRS
9551 _mm_cvtu32_ss (__m128 __A, unsigned __B)
9552 {
9553  return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
9555 }
9556 
9557 #ifdef __x86_64__
9558 #define _mm_cvt_roundu64_ss(A, B, R) __extension__ ({ \
9559  (__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \
9560  (unsigned long long)(B), (int)(R)); })
9561 
9562 static __inline__ __m128 __DEFAULT_FN_ATTRS
9563 _mm_cvtu64_ss (__m128 __A, unsigned long long __B)
9564 {
9565  return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
9567 }
9568 #endif
9569 
9570 static __inline__ __m512i __DEFAULT_FN_ATTRS
9571 _mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
9572 {
9573  return (__m512i) __builtin_ia32_selectd_512(__M,
9574  (__v16si) _mm512_set1_epi32(__A),
9575  (__v16si) __O);
9576 }
9577 
9578 #ifdef __x86_64__
9579 static __inline__ __m512i __DEFAULT_FN_ATTRS
9580 _mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
9581 {
9582  return (__m512i) __builtin_ia32_selectq_512(__M,
9583  (__v8di) _mm512_set1_epi64(__A),
9584  (__v8di) __O);
9585 }
9586 #endif
9587 
9588 static __inline __m512i __DEFAULT_FN_ATTRS
9589 _mm512_set_epi8 (char __e63, char __e62, char __e61, char __e60, char __e59,
9590  char __e58, char __e57, char __e56, char __e55, char __e54, char __e53,
9591  char __e52, char __e51, char __e50, char __e49, char __e48, char __e47,
9592  char __e46, char __e45, char __e44, char __e43, char __e42, char __e41,
9593  char __e40, char __e39, char __e38, char __e37, char __e36, char __e35,
9594  char __e34, char __e33, char __e32, char __e31, char __e30, char __e29,
9595  char __e28, char __e27, char __e26, char __e25, char __e24, char __e23,
9596  char __e22, char __e21, char __e20, char __e19, char __e18, char __e17,
9597  char __e16, char __e15, char __e14, char __e13, char __e12, char __e11,
9598  char __e10, char __e9, char __e8, char __e7, char __e6, char __e5,
9599  char __e4, char __e3, char __e2, char __e1, char __e0) {
9600 
9601  return __extension__ (__m512i)(__v64qi)
9602  {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
9603  __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
9604  __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
9605  __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31,
9606  __e32, __e33, __e34, __e35, __e36, __e37, __e38, __e39,
9607  __e40, __e41, __e42, __e43, __e44, __e45, __e46, __e47,
9608  __e48, __e49, __e50, __e51, __e52, __e53, __e54, __e55,
9609  __e56, __e57, __e58, __e59, __e60, __e61, __e62, __e63};
9610 }
9611 
9612 static __inline __m512i __DEFAULT_FN_ATTRS
9613 _mm512_set_epi16(short __e31, short __e30, short __e29, short __e28,
9614  short __e27, short __e26, short __e25, short __e24, short __e23,
9615  short __e22, short __e21, short __e20, short __e19, short __e18,
9616  short __e17, short __e16, short __e15, short __e14, short __e13,
9617  short __e12, short __e11, short __e10, short __e9, short __e8,
9618  short __e7, short __e6, short __e5, short __e4, short __e3,
9619  short __e2, short __e1, short __e0) {
9620  return __extension__ (__m512i)(__v32hi)
9621  {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
9622  __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
9623  __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
9624  __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31 };
9625 }
9626 
9627 static __inline __m512i __DEFAULT_FN_ATTRS
9628 _mm512_set_epi32 (int __A, int __B, int __C, int __D,
9629  int __E, int __F, int __G, int __H,
9630  int __I, int __J, int __K, int __L,
9631  int __M, int __N, int __O, int __P)
9632 {
9633  return __extension__ (__m512i)(__v16si)
9634  { __P, __O, __N, __M, __L, __K, __J, __I,
9635  __H, __G, __F, __E, __D, __C, __B, __A };
9636 }
9637 
9638 #define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \
9639  e8,e9,e10,e11,e12,e13,e14,e15) \
9640  _mm512_set_epi32((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6), \
9641  (e5),(e4),(e3),(e2),(e1),(e0))
9642 
9643 static __inline__ __m512i __DEFAULT_FN_ATTRS
9644 _mm512_set_epi64 (long long __A, long long __B, long long __C,
9645  long long __D, long long __E, long long __F,
9646  long long __G, long long __H)
9647 {
9648  return __extension__ (__m512i) (__v8di)
9649  { __H, __G, __F, __E, __D, __C, __B, __A };
9650 }
9651 
9652 #define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \
9653  _mm512_set_epi64((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
9654 
9655 static __inline__ __m512d __DEFAULT_FN_ATTRS
9656 _mm512_set_pd (double __A, double __B, double __C, double __D,
9657  double __E, double __F, double __G, double __H)
9658 {
9659  return __extension__ (__m512d)
9660  { __H, __G, __F, __E, __D, __C, __B, __A };
9661 }
9662 
9663 #define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \
9664  _mm512_set_pd((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
9665 
9666 static __inline__ __m512 __DEFAULT_FN_ATTRS
9667 _mm512_set_ps (float __A, float __B, float __C, float __D,
9668  float __E, float __F, float __G, float __H,
9669  float __I, float __J, float __K, float __L,
9670  float __M, float __N, float __O, float __P)
9671 {
9672  return __extension__ (__m512)
9673  { __P, __O, __N, __M, __L, __K, __J, __I,
9674  __H, __G, __F, __E, __D, __C, __B, __A };
9675 }
9676 
9677 #define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
9678  _mm512_set_ps((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6),(e5), \
9679  (e4),(e3),(e2),(e1),(e0))
9680 
9681 static __inline__ __m512 __DEFAULT_FN_ATTRS
9682 _mm512_abs_ps(__m512 __A)
9683 {
9684  return (__m512)_mm512_and_epi32(_mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
9685 }
9686 
9687 static __inline__ __m512 __DEFAULT_FN_ATTRS
9688 _mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A)
9689 {
9690  return (__m512)_mm512_mask_and_epi32((__m512i)__W, __K, _mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
9691 }
9692 
9693 static __inline__ __m512d __DEFAULT_FN_ATTRS
9694 _mm512_abs_pd(__m512d __A)
9695 {
9696  return (__m512d)_mm512_and_epi64(_mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A) ;
9697 }
9698 
9699 static __inline__ __m512d __DEFAULT_FN_ATTRS
9700 _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A)
9701 {
9702  return (__m512d)_mm512_mask_and_epi64((__v8di)__W, __K, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A);
9703 }
9704 
9705 // Vector-reduction arithmetic accepts vectors as inputs and produces scalars as
9706 // outputs. This class of vector operation forms the basis of many scientific
9707 // computations. In vector-reduction arithmetic, the evaluation off is
9708 // independent of the order of the input elements of V.
9709 
9710 // Used bisection method. At each step, we partition the vector with previous
9711 // step in half, and the operation is performed on its two halves.
9712 // This takes log2(n) steps where n is the number of elements in the vector.
9713 
9714 // Vec512 - Vector with size 512.
9715 // Operator - Can be one of following: +,*,&,|
9716 // T2 - Can get 'i' for int and 'f' for float.
9717 // T1 - Can get 'i' for int and 'd' for double.
9718 
9719 #define _mm512_reduce_operator_64bit(Vec512, Operator, T2, T1) \
9720  __extension__({ \
9721  __m256##T1 Vec256 = __builtin_shufflevector( \
9722  (__v8d##T2)Vec512, \
9723  (__v8d##T2)Vec512, \
9724  0, 1, 2, 3) \
9725  Operator \
9726  __builtin_shufflevector( \
9727  (__v8d##T2)Vec512, \
9728  (__v8d##T2)Vec512, \
9729  4, 5, 6, 7); \
9730  __m128##T1 Vec128 = __builtin_shufflevector( \
9731  (__v4d##T2)Vec256, \
9732  (__v4d##T2)Vec256, \
9733  0, 1) \
9734  Operator \
9735  __builtin_shufflevector( \
9736  (__v4d##T2)Vec256, \
9737  (__v4d##T2)Vec256, \
9738  2, 3); \
9739  Vec128 = __builtin_shufflevector((__v2d##T2)Vec128, \
9740  (__v2d##T2)Vec128, 0, -1) \
9741  Operator \
9742  __builtin_shufflevector((__v2d##T2)Vec128, \
9743  (__v2d##T2)Vec128, 1, -1); \
9744  return Vec128[0]; \
9745  })
9746 
9747 static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_add_epi64(__m512i __W) {
9748  _mm512_reduce_operator_64bit(__W, +, i, i);
9749 }
9750 
9751 static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_mul_epi64(__m512i __W) {
9752  _mm512_reduce_operator_64bit(__W, *, i, i);
9753 }
9754 
9755 static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_and_epi64(__m512i __W) {
9756  _mm512_reduce_operator_64bit(__W, &, i, i);
9757 }
9758 
9759 static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_or_epi64(__m512i __W) {
9760  _mm512_reduce_operator_64bit(__W, |, i, i);
9761 }
9762 
9763 static __inline__ double __DEFAULT_FN_ATTRS _mm512_reduce_add_pd(__m512d __W) {
9764  _mm512_reduce_operator_64bit(__W, +, f, d);
9765 }
9766 
9767 static __inline__ double __DEFAULT_FN_ATTRS _mm512_reduce_mul_pd(__m512d __W) {
9768  _mm512_reduce_operator_64bit(__W, *, f, d);
9769 }
9770 
9771 // Vec512 - Vector with size 512.
9772 // Vec512Neutral - All vector elements set to the identity element.
9773 // Identity element: {+,0},{*,1},{&,0xFFFFFFFFFFFFFFFF},{|,0}
9774 // Operator - Can be one of following: +,*,&,|
9775 // Mask - Intrinsic Mask
9776 // T2 - Can get 'i' for int and 'f' for float.
9777 // T1 - Can get 'i' for int and 'd' for packed double-precision.
9778 // T3 - Can be Pd for packed double or q for q-word.
9779 
9780 #define _mm512_mask_reduce_operator_64bit(Vec512, Vec512Neutral, Operator, \
9781  Mask, T2, T1, T3) \
9782  __extension__({ \
9783  Vec512 = __builtin_ia32_select##T3##_512( \
9784  (__mmask8)Mask, \
9785  (__v8d##T2)Vec512, \
9786  (__v8d##T2)Vec512Neutral); \
9787  _mm512_reduce_operator_64bit(Vec512, Operator, T2, T1); \
9788  })
9789 
9790 static __inline__ long long __DEFAULT_FN_ATTRS
9791 _mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W) {
9792  _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(0), +, __M, i, i, q);
9793 }
9794 
9795 static __inline__ long long __DEFAULT_FN_ATTRS
9796 _mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W) {
9797  _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(1), *, __M, i, i, q);
9798 }
9799 
9800 static __inline__ long long __DEFAULT_FN_ATTRS
9801 _mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W) {
9802  _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(0xFFFFFFFFFFFFFFFF),
9803  &, __M, i, i, q);
9804 }
9805 
9806 static __inline__ long long __DEFAULT_FN_ATTRS
9807 _mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W) {
9809  i, i, q);
9810 }
9811 
9812 static __inline__ double __DEFAULT_FN_ATTRS
9813 _mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W) {
9815  f, d, pd);
9816 }
9817 
9818 static __inline__ double __DEFAULT_FN_ATTRS
9819 _mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W) {
9821  f, d, pd);
9822 }
9823 
9824 // Vec512 - Vector with size 512.
9825 // Operator - Can be one of following: +,*,&,|
9826 // T2 - Can get 'i' for int and ' ' for packed single.
9827 // T1 - Can get 'i' for int and 'f' for float.
9828 
9829 #define _mm512_reduce_operator_32bit(Vec512, Operator, T2, T1) __extension__({ \
9830  __m256##T1 Vec256 = \
9831  (__m256##T1)(__builtin_shufflevector( \
9832  (__v16s##T2)Vec512, \
9833  (__v16s##T2)Vec512, \
9834  0, 1, 2, 3, 4, 5, 6, 7) \
9835  Operator \
9836  __builtin_shufflevector( \
9837  (__v16s##T2)Vec512, \
9838  (__v16s##T2)Vec512, \
9839  8, 9, 10, 11, 12, 13, 14, 15)); \
9840  __m128##T1 Vec128 = \
9841  (__m128##T1)(__builtin_shufflevector( \
9842  (__v8s##T2)Vec256, \
9843  (__v8s##T2)Vec256, \
9844  0, 1, 2, 3) \
9845  Operator \
9846  __builtin_shufflevector( \
9847  (__v8s##T2)Vec256, \
9848  (__v8s##T2)Vec256, \
9849  4, 5, 6, 7)); \
9850  Vec128 = (__m128##T1)(__builtin_shufflevector( \
9851  (__v4s##T2)Vec128, \
9852  (__v4s##T2)Vec128, \
9853  0, 1, -1, -1) \
9854  Operator \
9855  __builtin_shufflevector( \
9856  (__v4s##T2)Vec128, \
9857  (__v4s##T2)Vec128, \
9858  2, 3, -1, -1)); \
9859  Vec128 = (__m128##T1)(__builtin_shufflevector( \
9860  (__v4s##T2)Vec128, \
9861  (__v4s##T2)Vec128, \
9862  0, -1, -1, -1) \
9863  Operator \
9864  __builtin_shufflevector( \
9865  (__v4s##T2)Vec128, \
9866  (__v4s##T2)Vec128, \
9867  1, -1, -1, -1)); \
9868  return Vec128[0]; \
9869  })
9870 
9871 static __inline__ int __DEFAULT_FN_ATTRS
9873  _mm512_reduce_operator_32bit(__W, +, i, i);
9874 }
9875 
9876 static __inline__ int __DEFAULT_FN_ATTRS
9878  _mm512_reduce_operator_32bit(__W, *, i, i);
9879 }
9880 
9881 static __inline__ int __DEFAULT_FN_ATTRS
9883  _mm512_reduce_operator_32bit(__W, &, i, i);
9884 }
9885 
9886 static __inline__ int __DEFAULT_FN_ATTRS
9888  _mm512_reduce_operator_32bit(__W, |, i, i);
9889 }
9890 
9891 static __inline__ float __DEFAULT_FN_ATTRS
9893  _mm512_reduce_operator_32bit(__W, +, f, );
9894 }
9895 
9896 static __inline__ float __DEFAULT_FN_ATTRS
9898  _mm512_reduce_operator_32bit(__W, *, f, );
9899 }
9900 
9901 // Vec512 - Vector with size 512.
9902 // Vec512Neutral - All vector elements set to the identity element.
9903 // Identity element: {+,0},{*,1},{&,0xFFFFFFFF},{|,0}
9904 // Operator - Can be one of following: +,*,&,|
9905 // Mask - Intrinsic Mask
9906 // T2 - Can get 'i' for int and 'f' for float.
9907 // T1 - Can get 'i' for int and 'd' for double.
9908 // T3 - Can be Ps for packed single or d for d-word.
9909 
9910 #define _mm512_mask_reduce_operator_32bit(Vec512, Vec512Neutral, Operator, \
9911  Mask, T2, T1, T3) \
9912  __extension__({ \
9913  Vec512 = (__m512##T1)__builtin_ia32_select##T3##_512( \
9914  (__mmask16)Mask, \
9915  (__v16s##T2)Vec512, \
9916  (__v16s##T2)Vec512Neutral); \
9917  _mm512_reduce_operator_32bit(Vec512, Operator, T2, T1); \
9918  })
9919 
9920 static __inline__ int __DEFAULT_FN_ATTRS
9921 _mm512_mask_reduce_add_epi32( __mmask16 __M, __m512i __W) {
9922  _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(0), +, __M, i, i, d);
9923 }
9924 
9925 static __inline__ int __DEFAULT_FN_ATTRS
9926 _mm512_mask_reduce_mul_epi32( __mmask16 __M, __m512i __W) {
9927  _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(1), *, __M, i, i, d);
9928 }
9929 
9930 static __inline__ int __DEFAULT_FN_ATTRS
9931 _mm512_mask_reduce_and_epi32( __mmask16 __M, __m512i __W) {
9932  _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(0xFFFFFFFF), &, __M,
9933  i, i, d);
9934 }
9935 
9936 static __inline__ int __DEFAULT_FN_ATTRS
9937 _mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W) {
9938  _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(0), |, __M, i, i, d);
9939 }
9940 
9941 static __inline__ float __DEFAULT_FN_ATTRS
9942 _mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W) {
9943  _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_ps(0), +, __M, f, , ps);
9944 }
9945 
9946 static __inline__ float __DEFAULT_FN_ATTRS
9947 _mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W) {
9948  _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_ps(1), *, __M, f, , ps);
9949 }
9950 
9951 // Used bisection method. At each step, we partition the vector with previous
9952 // step in half, and the operation is performed on its two halves.
9953 // This takes log2(n) steps where n is the number of elements in the vector.
9954 // This macro uses only intrinsics from the AVX512F feature.
9955 
9956 // Vec512 - Vector with size of 512.
9957 // IntrinName - Can be one of following: {max|min}_{epi64|epu64|pd} for example:
9958 // __mm512_max_epi64
9959 // T1 - Can get 'i' for int and 'd' for double.[__m512{i|d}]
9960 // T2 - Can get 'i' for int and 'f' for float. [__v8d{i|f}]
9961 
9962 #define _mm512_reduce_maxMin_64bit(Vec512, IntrinName, T1, T2) __extension__({ \
9963  Vec512 = _mm512_##IntrinName( \
9964  (__m512##T1)__builtin_shufflevector( \
9965  (__v8d##T2)Vec512, \
9966  (__v8d##T2)Vec512, \
9967  0, 1, 2, 3, -1, -1, -1, -1), \
9968  (__m512##T1)__builtin_shufflevector( \
9969  (__v8d##T2)Vec512, \
9970  (__v8d##T2)Vec512, \
9971  4, 5, 6, 7, -1, -1, -1, -1)); \
9972  Vec512 = _mm512_##IntrinName( \
9973  (__m512##T1)__builtin_shufflevector( \
9974  (__v8d##T2)Vec512, \
9975  (__v8d##T2)Vec512, \
9976  0, 1, -1, -1, -1, -1, -1, -1),\
9977  (__m512##T1)__builtin_shufflevector( \
9978  (__v8d##T2)Vec512, \
9979  (__v8d##T2)Vec512, \
9980  2, 3, -1, -1, -1, -1, -1, \
9981  -1)); \
9982  Vec512 = _mm512_##IntrinName( \
9983  (__m512##T1)__builtin_shufflevector( \
9984  (__v8d##T2)Vec512, \
9985  (__v8d##T2)Vec512, \
9986  0, -1, -1, -1, -1, -1, -1, -1),\
9987  (__m512##T1)__builtin_shufflevector( \
9988  (__v8d##T2)Vec512, \
9989  (__v8d##T2)Vec512, \
9990  1, -1, -1, -1, -1, -1, -1, -1))\
9991  ; \
9992  return Vec512[0]; \
9993  })
9994 
9995 static __inline__ long long __DEFAULT_FN_ATTRS
9997  _mm512_reduce_maxMin_64bit(__V, max_epi64, i, i);
9998 }
9999 
10000 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
10002  _mm512_reduce_maxMin_64bit(__V, max_epu64, i, i);
10003 }
10004 
10005 static __inline__ double __DEFAULT_FN_ATTRS
10006 _mm512_reduce_max_pd(__m512d __V) {
10007  _mm512_reduce_maxMin_64bit(__V, max_pd, d, f);
10008 }
10009 
10010 static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_min_epi64
10011 (__m512i __V) {
10012  _mm512_reduce_maxMin_64bit(__V, min_epi64, i, i);
10013 }
10014 
10015 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
10017  _mm512_reduce_maxMin_64bit(__V, min_epu64, i, i);
10018 }
10019 
10020 static __inline__ double __DEFAULT_FN_ATTRS
10021 _mm512_reduce_min_pd(__m512d __V) {
10022  _mm512_reduce_maxMin_64bit(__V, min_pd, d, f);
10023 }
10024 
10025 // Vec512 - Vector with size 512.
10026 // Vec512Neutral - A 512 length vector with elements set to the identity element
10027 // Identity element: {max_epi,0x8000000000000000}
10028 // {max_epu,0x0000000000000000}
10029 // {max_pd, 0xFFF0000000000000}
10030 // {min_epi,0x7FFFFFFFFFFFFFFF}
10031 // {min_epu,0xFFFFFFFFFFFFFFFF}
10032 // {min_pd, 0x7FF0000000000000}
10033 //
10034 // IntrinName - Can be one of following: {max|min}_{epi64|epu64|pd} for example:
10035 // __mm512_max_epi64
10036 // T1 - Can get 'i' for int and 'd' for double.[__m512{i|d}]
10037 // T2 - Can get 'i' for int and 'f' for float. [__v8d{i|f}]
10038 // T3 - Can get 'q' q word and 'pd' for packed double.
10039 // [__builtin_ia32_select{q|pd}_512]
10040 // Mask - Intrinsic Mask
10041 
10042 #define _mm512_mask_reduce_maxMin_64bit(Vec512, Vec512Neutral, IntrinName, T1, \
10043  T2, T3, Mask) \
10044  __extension__({ \
10045  Vec512 = (__m512##T1)__builtin_ia32_select##T3##_512( \
10046  (__mmask8)Mask, \
10047  (__v8d##T2)Vec512, \
10048  (__v8d##T2)Vec512Neutral); \
10049  _mm512_reduce_maxMin_64bit(Vec512, IntrinName, T1, T2); \
10050  })
10051 
10052 static __inline__ long long __DEFAULT_FN_ATTRS
10053 _mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __V) {
10054  _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0x8000000000000000),
10055  max_epi64, i, i, q, __M);
10056 }
10057 
10058 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
10059 _mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V) {
10060  _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0x0000000000000000),
10061  max_epu64, i, i, q, __M);
10062 }
10063 
10064 static __inline__ double __DEFAULT_FN_ATTRS
10065 _mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V) {
10066  _mm512_mask_reduce_maxMin_64bit(__V, -_mm512_set1_pd(__builtin_inf()),
10067  max_pd, d, f, pd, __M);
10068 }
10069 
10070 static __inline__ long long __DEFAULT_FN_ATTRS
10071 _mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __V) {
10072  _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),
10073  min_epi64, i, i, q, __M);
10074 }
10075 
10076 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
10077 _mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V) {
10078  _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0xFFFFFFFFFFFFFFFF),
10079  min_epu64, i, i, q, __M);
10080 }
10081 
10082 static __inline__ double __DEFAULT_FN_ATTRS
10083 _mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V) {
10084  _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_pd(__builtin_inf()),
10085  min_pd, d, f, pd, __M);
10086 }
10087 
10088 // Vec512 - Vector with size 512.
10089 // IntrinName - Can be one of following: {max|min}_{epi32|epu32|ps} for example:
10090 // __mm512_max_epi32
10091 // T1 - Can get 'i' for int and ' ' .[__m512{i|}]
10092 // T2 - Can get 'i' for int and 'f' for float.[__v16s{i|f}]
10093 
10094 #define _mm512_reduce_maxMin_32bit(Vec512, IntrinName, T1, T2) __extension__({ \
10095  Vec512 = _mm512_##IntrinName( \
10096  (__m512##T1)__builtin_shufflevector( \
10097  (__v16s##T2)Vec512, \
10098  (__v16s##T2)Vec512, \
10099  0, 1, 2, 3, 4, 5, 6, 7, \
10100  -1, -1, -1, -1, -1, -1, -1, -1), \
10101  (__m512##T1)__builtin_shufflevector( \
10102  (__v16s##T2)Vec512, \
10103  (__v16s##T2)Vec512, \
10104  8, 9, 10, 11, 12, 13, 14, 15, \
10105  -1, -1, -1, -1, -1, -1, -1, -1)); \
10106  Vec512 = _mm512_##IntrinName( \
10107  (__m512##T1)__builtin_shufflevector( \
10108  (__v16s##T2)Vec512, \
10109  (__v16s##T2)Vec512, \
10110  0, 1, 2, 3, -1, -1, -1, -1, \
10111  -1, -1, -1, -1, -1, -1, -1, -1), \
10112  (__m512##T1)__builtin_shufflevector( \
10113  (__v16s##T2)Vec512, \
10114  (__v16s##T2)Vec512, \
10115  4, 5, 6, 7, -1, -1, -1, -1, \
10116  -1, -1, -1, -1, -1, -1, -1, -1)); \
10117  Vec512 = _mm512_##IntrinName( \
10118  (__m512##T1)__builtin_shufflevector( \
10119  (__v16s##T2)Vec512, \
10120  (__v16s##T2)Vec512, \
10121  0, 1, -1, -1, -1, -1, -1, -1, \
10122  -1, -1, -1, -1, -1, -1, -1, -1), \
10123  (__m512##T1)__builtin_shufflevector( \
10124  (__v16s##T2)Vec512, \
10125  (__v16s##T2)Vec512, \
10126  2, 3, -1, -1, -1, -1, -1, -1, \
10127  -1, -1, -1, -1, -1, -1, -1, -1)); \
10128  Vec512 = _mm512_##IntrinName( \
10129  (__m512##T1)__builtin_shufflevector( \
10130  (__v16s##T2)Vec512, \
10131  (__v16s##T2)Vec512, \
10132  0, -1, -1, -1, -1, -1, -1, -1, \
10133  -1, -1, -1, -1, -1, -1, -1, -1), \
10134  (__m512##T1)__builtin_shufflevector( \
10135  (__v16s##T2)Vec512, \
10136  (__v16s##T2)Vec512, \
10137  1, -1, -1, -1, -1, -1, -1, -1, \
10138  -1, -1, -1, -1, -1, -1, -1, -1)); \
10139  return Vec512[0]; \
10140  })
10141 
10142 static __inline__ int __DEFAULT_FN_ATTRS _mm512_reduce_max_epi32(__m512i a) {
10143  _mm512_reduce_maxMin_32bit(a, max_epi32, i, i);
10144 }
10145 
10146 static __inline__ unsigned int __DEFAULT_FN_ATTRS
10148  _mm512_reduce_maxMin_32bit(a, max_epu32, i, i);
10149 }
10150 
10151 static __inline__ float __DEFAULT_FN_ATTRS _mm512_reduce_max_ps(__m512 a) {
10152  _mm512_reduce_maxMin_32bit(a, max_ps, , f);
10153 }
10154 
10155 static __inline__ int __DEFAULT_FN_ATTRS _mm512_reduce_min_epi32(__m512i a) {
10156  _mm512_reduce_maxMin_32bit(a, min_epi32, i, i);
10157 }
10158 
10159 static __inline__ unsigned int __DEFAULT_FN_ATTRS
10161  _mm512_reduce_maxMin_32bit(a, min_epu32, i, i);
10162 }
10163 
10164 static __inline__ float __DEFAULT_FN_ATTRS _mm512_reduce_min_ps(__m512 a) {
10165  _mm512_reduce_maxMin_32bit(a, min_ps, , f);
10166 }
10167 
10168 // Vec512 - Vector with size 512.
10169 // Vec512Neutral - A 512 length vector with elements set to the identity element
10170 // Identity element: {max_epi,0x80000000}
10171 // {max_epu,0x00000000}
10172 // {max_ps, 0xFF800000}
10173 // {min_epi,0x7FFFFFFF}
10174 // {min_epu,0xFFFFFFFF}
10175 // {min_ps, 0x7F800000}
10176 //
10177 // IntrinName - Can be one of following: {max|min}_{epi32|epu32|ps} for example:
10178 // __mm512_max_epi32
10179 // T1 - Can get 'i' for int and ' ' .[__m512{i|}]
10180 // T2 - Can get 'i' for int and 'f' for float.[__v16s{i|f}]
10181 // T3 - Can get 'q' q word and 'pd' for packed double.
10182 // [__builtin_ia32_select{q|pd}_512]
10183 // Mask - Intrinsic Mask
10184 
10185 #define _mm512_mask_reduce_maxMin_32bit(Vec512, Vec512Neutral, IntrinName, T1, \
10186  T2, T3, Mask) \
10187  __extension__({ \
10188  Vec512 = (__m512##T1)__builtin_ia32_select##T3##_512( \
10189  (__mmask16)Mask, \
10190  (__v16s##T2)Vec512, \
10191  (__v16s##T2)Vec512Neutral); \
10192  _mm512_reduce_maxMin_32bit(Vec512, IntrinName, T1, T2); \
10193  })
10194 
10195 static __inline__ int __DEFAULT_FN_ATTRS
10196 _mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __V) {
10197  _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0x80000000), max_epi32,
10198  i, i, d, __M);
10199 }
10200 
10201 static __inline__ unsigned int __DEFAULT_FN_ATTRS
10202 _mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V) {
10203  _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0x00000000), max_epu32,
10204  i, i, d, __M);
10205 }
10206 
10207 static __inline__ float __DEFAULT_FN_ATTRS
10208 _mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V) {
10209  _mm512_mask_reduce_maxMin_32bit(__V,-_mm512_set1_ps(__builtin_inff()), max_ps, , f,
10210  ps, __M);
10211 }
10212 
10213 static __inline__ int __DEFAULT_FN_ATTRS
10214 _mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __V) {
10215  _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0x7FFFFFFF), min_epi32,
10216  i, i, d, __M);
10217 }
10218 
10219 static __inline__ unsigned int __DEFAULT_FN_ATTRS
10220 _mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V) {
10221  _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0xFFFFFFFF), min_epu32,
10222  i, i, d, __M);
10223 }
10224 
10225 static __inline__ float __DEFAULT_FN_ATTRS
10226 _mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V) {
10227  _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_ps(__builtin_inff()), min_ps, , f,
10228  ps, __M);
10229 }
10230 
10231 #undef __DEFAULT_FN_ATTRS
10232 
10233 #endif // __AVX512FINTRIN_H
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_castsi128_si512(__m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_sqrt_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline __m512 __DEFAULT_FN_ATTRS _mm512_cvtph_ps(__m256i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_cvtps_pd(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_cvtepi64_epi8(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtepu8_epi64(__m128i __A)
static __inline void __DEFAULT_FN_ATTRS _mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
#define _mm512_reduce_maxMin_64bit(Vec512, IntrinName, T1, T2)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_permutexvar_epi32(__mmask16 __M, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_cvtps_pd(__m512d __W, __mmask8 __U, __m256 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_expand_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_xor_si512(__m512i __a, __m512i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_mask_cvtepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_add_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_broadcast_i64x4(__m256i __A)
static __inline__ float __DEFAULT_FN_ATTRS _mm512_reduce_mul_ps(__m512 __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline __m512 __DEFAULT_FN_ATTRS _mm512_unpackhi_ps(__m512 __a, __m512 __b)
static __inline__ double __DEFAULT_FN_ATTRS _mm512_reduce_min_pd(__m512d __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_maskz_cvttpd_epi32(__mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_cvtepu32_ps(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
#define _mm512_cmpeq_epi32_mask(A, B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_sub_epi32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_permutex2var_epi64(__m512i __A, __mmask8 __U, __m512i __I, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kor(__mmask16 __A, __mmask16 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_mul_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_broadcast_i32x4(__m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtepi32_epi64(__m256i __X)
static __inline__ void __DEFAULT_FN_ATTRS _mm512_mask_cvtepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_sub_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_mul_epu32(__m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_sqrt_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_setzero_ps(void)
Constructs a 256-bit floating-point vector of [8 x float] with all vector elements initialized to zer...
Definition: avxintrin.h:4345
static __inline __m512 __DEFAULT_FN_ATTRS _mm512_sub_ps(__m512 __a, __m512 __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_rsqrt14_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_set_epi64(long long __A, long long __B, long long __C, long long __D, long long __E, long long __F, long long __G, long long __H)
static __inline __m512d __DEFAULT_FN_ATTRS _mm512_setzero_pd(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_rolv_epi64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline void __DEFAULT_FN_ATTRS _mm512_storeu_pd(void *__P, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_broadcastsd_pd(__m128d __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kunpackb(__mmask16 __A, __mmask16 __B)
static __inline __m512d __DEFAULT_FN_ATTRS _mm512_floor_pd(__m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_scalef_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_fmadd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_div_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_mul_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_maskz_cvttpd_epu32(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_max_epu64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_compress_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_min_epu64(__m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_scalef_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline __m128 __DEFAULT_FN_ATTRS _mm512_castps512_ps128(__m512 __a)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_cvtusepi64_epi32(__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_cvtepi32_ps(__mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_srl_epi32(__m512i __A, __m128i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U, __m512i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm512_stream_si512(__m512i *__P, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_unpacklo_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_srai_epi64(__m512i __A, int __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kxnor(__mmask16 __A, __mmask16 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_max_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_broadcast_f64x4(__m256d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_getexp_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 __U, __m512i __B)
static __inline void __DEFAULT_FN_ATTRS _mm512_store_si512(void *__P, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_broadcastsd_pd(__mmask8 __M, __m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_max_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask3_fmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
unsigned char __mmask8
Definition: avx512fintrin.h:47
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_min_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ int __DEFAULT_FN_ATTRS _mm512_reduce_and_epi32(__m512i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_cvtsepi64_epi32(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_floor_ps(__m512 __W, __mmask16 __U, __m512 __A)
#define _mm512_cmpneq_epi32_mask(A, B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_mov_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_rsqrt14_ps(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
static __inline __m256 __DEFAULT_FN_ATTRS _mm512_castps512_ps256(__m512 __A)
static __inline __m512d __DEFAULT_FN_ATTRS _mm512_zextpd256_pd512(__m256d __a)
Constructs a 512-bit floating-point vector of [8 x double] from a 256-bit floating-point vector of [4...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_load_sd(__mmask8 __U, const double *__A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_scalef_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask3_fmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_set1_epi8(char __w)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_mask_cvtsepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_cvtph_ps(__m512 __W, __mmask16 __U, __m256i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm512_stream_ps(float *__P, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_div_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_min_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_cvtusepi64_epi8(__m512i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm512_mask_cvtusepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_castps_si512(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_scalef_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_broadcast_f64x4(__mmask8 __M, __m256d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask2_permutex2var_pd(__m512d __A, __m512i __I, __mmask8 __U, __m512d __B)
static __inline __m256i __DEFAULT_FN_ATTRS _mm512_castsi512_si256(__m512i __A)
static __inline__ float __DEFAULT_FN_ATTRS _mm512_cvtss_f32(__m512 __a)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_broadcastd_epi32(__mmask16 __M, __m128i __A)
#define _MM_FROUND_CEIL
Definition: smmintrin.h:44
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_rcp14_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_slli_epi64(__m512i __A, int __B)
static __inline void __DEFAULT_FN_ATTRS _mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_max_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline __m512 __DEFAULT_FN_ATTRS _mm512_zextps128_ps512(__m128 __a)
Constructs a 512-bit floating-point vector of [16 x float] from a 128-bit floating-point vector of [4...
static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_max_epi64(__m512i __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline __m512 __DEFAULT_FN_ATTRS _mm512_maskz_load_ps(__mmask16 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_move_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_fnmsub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS _mm512_mask_cvtsepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_sqrt_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_and_epi32(__m512i __a, __m512i __b)
static __inline __m512 __DEFAULT_FN_ATTRS _mm512_setzero_ps(void)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_expand_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline __m512 __DEFAULT_FN_ATTRS _mm512_unpacklo_ps(__m512 __a, __m512 __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_min_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ int __DEFAULT_FN_ATTRS _mm512_kortestz(__mmask16 __A, __mmask16 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_cvtsepi64_epi8(__m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_fmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_min_epu32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_set1_epi32(int __s)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_min_ps(__m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_srlv_epi32(__m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_set_epi16(short __e31, short __e30, short __e29, short __e28, short __e27, short __e26, short __e25, short __e24, short __e23, short __e22, short __e21, short __e20, short __e19, short __e18, short __e17, short __e16, short __e15, short __e14, short __e13, short __e12, short __e11, short __e10, short __e9, short __e8, short __e7, short __e6, short __e5, short __e4, short __e3, short __e2, short __e1, short __e0)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_max_epi64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_rolv_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ int __DEFAULT_FN_ATTRS _mm512_reduce_mul_epi32(__m512i __W)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm512_mask_test_epi64_mask(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_min_epi64(__m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_min_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_compress_pd(__mmask8 __U, __m512d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_max_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_undefined_ps(void)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_permutexvar_ps(__m512i __X, __m512 __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
static __inline void __DEFAULT_FN_ATTRS _mm512_store_epi64(void *__P, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS _mm512_add_ps(__m512 __a, __m512 __b)
static __inline __m512 __DEFAULT_FN_ATTRS _mm512_set4_ps(float __A, float __B, float __C, float __D)
static __inline __m512 __DEFAULT_FN_ATTRS _mm512_castpd_ps(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_movehdup_ps(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_sll_epi64(__m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_sqrt_pd(__m512d __a)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_permutexvar_epi32(__m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtepi16_epi32(__m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_or_epi64(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_movehdup_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_srav_epi32(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_load_ss(__m128 __W, __mmask8 __U, const float *__A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_fnmsub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_max_epu32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_sllv_epi32(__m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_abs_pd(__m512d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_rcp14_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_cvtps_pd(__m256 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_max_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_cvtsepi32_epi16(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_rcp14_pd(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_andnot_epi64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_broadcastq_epi64(__m512i __O, __mmask8 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_expand_epi64(__mmask8 __U, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_undefined_ps(void)
Create a 128-bit vector of [4 x float] with undefined values.
Definition: xmmintrin.h:1754
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask3_fnmadd_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B)
static __inline __m128i __DEFAULT_FN_ATTRS _mm512_castsi512_si128(__m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_rsqrt14_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm512_mask_compressstoreu_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_min_epi64(__m512i __A, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS _mm512_cvtepu32_pd(__m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_cvtepi32_ps(__m512 __W, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_rorv_epi64(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_rcp14_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_unpackhi_epi32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_abs_epi64(__mmask8 __U, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_fnmadd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
static __inline __m512d __DEFAULT_FN_ATTRS _mm512_mask_load_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_mask_cvttpd_epi32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kand(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_sll_epi32(__m512i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_rcp14_sd(__m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_fmsub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, int __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask3_fnmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_stream_load_si512(void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_rorv_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_permutexvar_epi32(__m512i __W, __mmask16 __M, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B)
static __inline __m512d __DEFAULT_FN_ATTRS _mm512_set1_pd(double __w)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_mask_cvtpd_epu32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_sra_epi64(__m512i __A, __m128i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_cvtepi32lo_pd(__m512i __A)
static __inline__ int __DEFAULT_FN_ATTRS _mm512_mask_reduce_add_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_maskz_cvtepi32_epi8(__mmask16 __M, __m512i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm512_mask_compressstoreu_epi32(void *__P, __mmask16 __U, __m512i __A)
#define _mm512_mask_cmpneq_epi32_mask(k, A, B)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS _mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_castsi256_si512(__m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_slli_epi32(__m512i __A, int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_cvtps_epi32(__mmask16 __U, __m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, int __B)
static __inline __m512d __DEFAULT_FN_ATTRS _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_maskz_cvtepi64_epi32(__mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_ceil_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_cvtusepi64_epi16(__m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_sqrt_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline void __DEFAULT_FN_ATTRS _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_load_epi32(__mmask16 __U, void const *__P)
#define _mm512_reduce_operator_32bit(Vec512, Operator, T2, T1)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm512_maskz_cvtpd_ps(__mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_expand_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_min_epu64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_mul_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_andnot_epi32(__m512i __A, __m512i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_mask_cvtusepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_min_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_cvtepu32_pd(__mmask8 __U, __m256i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_cvtph_ps(__mmask16 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_maskz_cvtepi64_epi8(__mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_rcp14_pd(__mmask8 __U, __m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_min_epi32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_add_epi64(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_srlv_epi64(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_abs_epi32(__mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_expand_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_cvtsepi64_epi16(__m512i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_maskz_cvtusepi32_epi8(__mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_undefined_si256(void)
Create a 256-bit integer vector with undefined values.
Definition: avxintrin.h:3673
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_max_pd(__m512d __A, __m512d __B)
static __inline__ double __DEFAULT_FN_ATTRS _mm512_reduce_max_pd(__m512d __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_castpd128_pd512(__m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_mov_epi64(__mmask8 __U, __m512i __A)
#define _mm512_cmpeq_epi64_mask(A, B)
static __inline__ void __DEFAULT_FN_ATTRS _mm512_mask_cvtusepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_movedup_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_set_epi32(int __A, int __B, int __C, int __D, int __E, int __F, int __G, int __H, int __I, int __J, int __K, int __L, int __M, int __N, int __O, int __P)
static __inline__ void __DEFAULT_FN_ATTRS _mm512_mask_cvtusepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_sub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_rolv_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_cvtepi32_pd(__mmask8 __U, __m256i __A)
_MM_MANTISSA_NORM_ENUM
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_maskz_cvtepi32_epi16(__mmask16 __M, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS _mm512_load_ps(void const *__p)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_cvtpd_pslo(__m512d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_cvtusepi32_epi16(__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_cvttps_epu32(__mmask16 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_div_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline __m512d __DEFAULT_FN_ATTRS _mm512_castsi512_pd(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_max_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ long long __DEFAULT_FN_ATTRS _mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_permutex2var_pd(__m512d __A, __mmask8 __U, __m512i __I, __m512d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_rcp14_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_movehdup_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtepu32_epi64(__m256i __X)
static __inline__ int __DEFAULT_FN_ATTRS _mm_cvttss_i32(__m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_getexp_sd(__m128d __A, __m128d __B)
static __inline __mmask8 __DEFAULT_FN_ATTRS _mm512_test_epi64_mask(__m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_max_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm512_mask_testn_epi64_mask(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask3_fnmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_cvtpslo_pd(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B)
static __inline __m512d __DEFAULT_FN_ATTRS _mm512_loadu_pd(void const *__p)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
static __inline unsigned char unsigned int unsigned int unsigned int * __p
Definition: adxintrin.h:38
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_max_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm512_mask_store_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS _mm512_zextps256_ps512(__m256 __a)
Constructs a 512-bit floating-point vector of [16 x float] from a 256-bit floating-point vector of [8...
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtepi16_epi64(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_maskz_cvtpd_epi32(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_or_si512(__m512i __a, __m512i __b)
static __inline__ double __DEFAULT_FN_ATTRS _mm512_reduce_add_pd(__m512d __W)
static __inline__ void __DEFAULT_FN_ATTRS _mm512_mask_cvtepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
char __v64qi __attribute__((__vector_size__(64)))
Definition: avx512fintrin.h:30
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_broadcastss_ps(__mmask16 __M, __m128 __A)
static __inline __mmask16 __DEFAULT_FN_ATTRS _mm512_knot(__mmask16 __M)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_undefined_epi32(void)
#define _mm512_mask_reduce_maxMin_32bit(Vec512, Vec512Neutral, IntrinName, T1, T2, T3, Mask)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_cvtepi32_pd(__m512d __W, __mmask8 __U, __m256i __A)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_zextsi256_si512(__m256i __a)
Constructs a 512-bit integer vector from a 256-bit integer vector.
static __inline__ float __DEFAULT_FN_ATTRS _mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask3_fmadd_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_maskz_set1_epi32(__mmask16 __M, int __A)
static __inline__ long long __DEFAULT_FN_ATTRS _mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __V)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ float __DEFAULT_FN_ATTRS _mm512_reduce_max_ps(__m512 a)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_mask_loadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_rolv_epi32(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_unpacklo_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_scalef_ss(__m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_scalef_pd(__m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtps_epu32(__m512 __A)
_MM_CMPINT_ENUM
Definition: avx512fintrin.h:58
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline __m512d __DEFAULT_FN_ATTRS _mm512_unpackhi_pd(__m512d __a, __m512d __b)
static __inline__ void __DEFAULT_FN_ATTRS _mm512_mask_cvtepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_rsqrt14_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_scalef_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_unpackhi_epi64(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_scalef_ps(__m512 __A, __m512 __B)
static __inline__ unsigned int __DEFAULT_FN_ATTRS _mm512_reduce_max_epu32(__m512i a)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_floor_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm512_mask_cvtepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_setzero_ps(void)
Constructs a 128-bit floating-point vector of [4 x float] initialized to zero.
Definition: xmmintrin.h:1881
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm512_cvtpd_ps(__m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_cvtps_epu32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_cvtps_epu32(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_abs_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline__ unsigned __DEFAULT_FN_ATTRS _mm_cvttss_u32(__m128 __A)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_mask_loadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline __m512 __DEFAULT_FN_ATTRS _mm512_loadu_ps(void const *__p)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, int __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_moveldup_ps(__m512 __A)
static __inline void __DEFAULT_FN_ATTRS _mm512_storeu_ps(void *__P, __m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_abs_epi64(__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_fmadd_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_min_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_broadcastsd_pd(__m512d __O, __mmask8 __M, __m128d __A)
static __inline __m512 __DEFAULT_FN_ATTRS _mm512_mask_loadu_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setzero_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] initialized to zero.
Definition: emmintrin.h:1881
static __inline__ int __DEFAULT_FN_ATTRS _mm512_mask_reduce_mul_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_undefined_ps(void)
Create a 256-bit vector of [8 x float] with undefined values.
Definition: avxintrin.h:3660
static __inline void __DEFAULT_FN_ATTRS _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
#define _mm512_mask_reduce_operator_64bit(Vec512, Vec512Neutral, Operator, Mask, T2, T1, T3)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline __m512 __DEFAULT_FN_ATTRS _mm512_set1_ps(float __w)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_permutexvar_ps(__mmask16 __U, __m512i __X, __m512 __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_sqrt_ps(__m512 __a)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_fmadd_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_min_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kandn(__mmask16 __A, __mmask16 __B)
static __inline__ float __DEFAULT_FN_ATTRS _mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_mov_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_load_epi64(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_cvtepi64_epi16(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
static __inline__ unsigned __DEFAULT_FN_ATTRS _mm_cvttsd_u32(__m128d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_cvtsd_ss(__mmask8 __U, __m128 __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_move_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ double __DEFAULT_FN_ATTRS _mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_movedup_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_cvttps_epi32(__m512 __a)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_moveldup_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_sqrt_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_cvtpslo_pd(__m512d __W, __mmask8 __U, __m512 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_add_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_max_epu32(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_set_epi8(char __e63, char __e62, char __e61, char __e60, char __e59, char __e58, char __e57, char __e56, char __e55, char __e54, char __e53, char __e52, char __e51, char __e50, char __e49, char __e48, char __e47, char __e46, char __e45, char __e44, char __e43, char __e42, char __e41, char __e40, char __e39, char __e38, char __e37, char __e36, char __e35, char __e34, char __e33, char __e32, char __e31, char __e30, char __e29, char __e28, char __e27, char __e26, char __e25, char __e24, char __e23, char __e22, char __e21, char __e20, char __e19, char __e18, char __e17, char __e16, char __e15, char __e14, char __e13, char __e12, char __e11, char __e10, char __e9, char __e8, char __e7, char __e6, char __e5, char __e4, char __e3, char __e2, char __e1, char __e0)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_min_epu32(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ int __DEFAULT_FN_ATTRS _mm512_reduce_or_epi32(__m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_unpacklo_epi64(__m512i __A, __m512i __B)
static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_and_epi64(__m512i __W)
static __inline__ double __DEFAULT_FN_ATTRS _mm512_cvtsd_f64(__m512d __a)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_cvtpd_pslo(__m512 __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_cvttps_epi32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_or_epi32(__m512i __a, __m512i __b)
static __inline__ float __DEFAULT_FN_ATTRS _mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_mullo_epi32(__m512i __A, __m512i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm512_testn_epi64_mask(__m512i __A, __m512i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_mask_cvtepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_min_pd(__m512d __A, __m512d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_getexp_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_unpackhi_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_set1_epi16(short __w)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_rsqrt14_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_int2mask(int __a)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_mov_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_and_epi64(__m512i __a, __m512i __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_permutexvar_pd(__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_rorv_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_permutexvar_pd(__mmask8 __U, __m512i __X, __m512d __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_maskz_cvtsepi32_epi8(__mmask16 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_permutex2var_pd(__mmask8 __U, __m512d __A, __m512i __I, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_compress_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_permutex2var_epi32(__mmask16 __U, __m512i __A, __m512i __I, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_cvtepi64_epi32(__m512i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm512_mask_compressstoreu_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm512_mask_cvtpd_ps(__m256 __W, __mmask8 __U, __m512d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_cvtss_sd(__mmask8 __U, __m128d __A, __m128 __B)
static __inline__ double __DEFAULT_FN_ATTRS _mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
static __inline__ vector float vector float __b
Definition: altivec.h:534
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_cvtss_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_cvttpd_epu32(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_broadcastq_epi64(__m128i __A)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_ceil_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ int __DEFAULT_FN_ATTRS _mm512_reduce_add_epi32(__m512i __W)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_rsqrt14_ss(__m128 __A, __m128 __B)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_load_si512(void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_set1_epi32(__m512i __O, __mmask16 __M, int __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_permutevar_pd(__m512d __A, __m512i __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_max_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_min_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_rsqrt14_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ long long __DEFAULT_FN_ATTRS _mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W)
static __inline__ void __DEFAULT_FN_ATTRS _mm512_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline __m512d __DEFAULT_FN_ATTRS _mm512_ceil_pd(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtepi8_epi32(__m128i __A)
static __inline void __DEFAULT_FN_ATTRS _mm512_storeu_si512(void *__P, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_rsqrt14_pd(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, int __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_sqrt_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtps_epi32(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_move_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_broadcastd_epi32(__m512i __O, __mmask16 __M, __m128i __A)
static __inline __m512d __DEFAULT_FN_ATTRS _mm512_maskz_load_pd(__mmask8 __U, void const *__P)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kxor(__mmask16 __A, __mmask16 __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_store_sd(double *__W, __mmask8 __U, __m128d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_compress_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_getexp_ps(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_compress_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_broadcastss_ps(__m512 __O, __mmask16 __M, __m128 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_sra_epi32(__m512i __A, __m128i __B)
static __inline __m512 __DEFAULT_FN_ATTRS _mm512_castsi512_ps(__m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
_MM_PERM_ENUM
Definition: avx512fintrin.h:70
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_rcp14_ss(__m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_fnmsub_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_permutexvar_epi64(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_rorv_epi32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_permutexvar_epi64(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_compress_epi64(__mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_broadcastd_epi32(__m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_mov_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask3_fnmadd_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ int __DEFAULT_FN_ATTRS _mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_testn_epi32_mask(__m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_set4_epi64(long long __A, long long __B, long long __C, long long __D)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_cvtusepi32_epi8(__m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS _mm512_mul_pd(__m512d __a, __m512d __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_cvtepi32_ps(__m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_mul_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline __m512 __DEFAULT_FN_ATTRS _mm512_castps256_ps512(__m256 __a)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_mov_ps(__mmask16 __U, __m512 __A)
#define _mm512_reduce_maxMin_32bit(Vec512, IntrinName, T1, T2)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_xor_epi64(__m512i __a, __m512i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_add_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_unpacklo_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_mask_cvtsepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_scalef_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_srai_epi32(__m512i __A, int __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_cvtepu32_ps(__m512 __W, __mmask16 __U, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_min_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_srl_epi64(__m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtepu16_epi64(__m128i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_rsqrt14_ps(__mmask16 __U, __m512 __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm512_mask_cvtepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_scalef_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512d __DEFAULT_FN_ATTRS _mm512_load_pd(void const *__p)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_fnmsub_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
#define _mm512_reduce_operator_64bit(Vec512, Operator, T2, T1)
static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_add_epi64(__m512i __W)
static __inline __m128d __DEFAULT_FN_ATTRS _mm512_castpd512_pd128(__m512d __a)
static __inline __m512d __DEFAULT_FN_ATTRS _mm512_castpd256_pd512(__m256d __a)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask2_permutex2var_epi32(__m512i __A, __m512i __I, __mmask16 __U, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm512_mask_cvtsepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask2_permutex2var_ps(__m512 __A, __m512i __I, __mmask16 __U, __m512 __B)
static __inline__ int __DEFAULT_FN_ATTRS _mm512_mask2int(__mmask16 __a)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_sqrt_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_load_epi32(void const *__P)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_castpd_si512(__m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm512_mask_cvtsepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_expand_epi32(__mmask16 __U, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_broadcastss_ps(__m128 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtepu16_epi32(__m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_load_sd(__m128d __W, __mmask8 __U, const double *__A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_max_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline __m256i __DEFAULT_FN_ATTRS _mm512_cvttpd_epi32(__m512d __a)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline __m512d __DEFAULT_FN_ATTRS _mm512_cvtepi32_pd(__m256i __A)
static __inline __m512d __DEFAULT_FN_ATTRS _mm512_mask_loadu_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_srli_epi64(__m512i __A, int __B)
static __inline__ int __DEFAULT_FN_ATTRS _mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __V)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS _mm512_reduce_max_epu64(__m512i __V)
static __inline __m512d __DEFAULT_FN_ATTRS _mm512_add_pd(__m512d __a, __m512d __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_srli_epi32(__m512i __A, int __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_rsqrt14_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
static __inline __m512d __DEFAULT_FN_ATTRS _mm512_div_pd(__m512d __a, __m512d __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_maskz_cvtsepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_min_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtepi8_epi64(__m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_fnmadd_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_fmsub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_mask_cvtpd_epi32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_mask_cvtepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_maskz_cvtusepi64_epi32(__mmask8 __M, __m512i __A)
static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_mul_epi64(__m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtepu8_epi32(__m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_movedup_pd(__m512d __A)
static __inline__ unsigned int __DEFAULT_FN_ATTRS _mm512_reduce_min_epu32(__m512i a)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
Definition: emmintrin.h:3990
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_mask_cvtsepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_mov_epi32(__mmask16 __U, __m512i __A)
static __inline __m256d __DEFAULT_FN_ATTRS _mm512_castpd512_pd256(__m512d __A)
#define _MM_FROUND_CUR_DIRECTION
Definition: avx512fintrin.h:55
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_cvtpd_epi32(__m512d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_rcp14_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_mask_cvtsepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline __mmask16 __DEFAULT_FN_ATTRS _mm512_test_epi32_mask(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_getexp_pd(__m512d __A)
static __inline__ long long __DEFAULT_FN_ATTRS _mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_mask_cvttpd_epu32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_mask_cvtusepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_mask_cvtusepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtu32_ss(__m128 __A, unsigned __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_cvttps_epi32(__mmask16 __U, __m512 __A)
static __inline __m512d __DEFAULT_FN_ATTRS _mm512_set4_pd(double __A, double __B, double __C, double __D)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_max_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
_MM_MANTISSA_SIGN_ENUM
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kmov(__mmask16 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_unpacklo_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_sllv_epi64(__m512i __X, __m512i __Y)
static __inline __m512d __DEFAULT_FN_ATTRS _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
#define _mm512_mask_reduce_maxMin_64bit(Vec512, Vec512Neutral, IntrinName, T1, T2, T3, Mask)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_scalef_sd(__m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_fnmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline __m512 __DEFAULT_FN_ATTRS _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_permutexvar_epi64(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_zextsi128_si512(__m128i __a)
Constructs a 512-bit integer vector from a 128-bit integer vector.
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_load_epi64(void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_srav_epi64(__m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_expand_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ double __DEFAULT_FN_ATTRS _mm512_reduce_mul_pd(__m512d __W)
static __inline void __DEFAULT_FN_ATTRS _mm512_store_pd(void *__P, __m512d __A)
static __inline__ int __DEFAULT_FN_ATTRS _mm512_reduce_min_epi32(__m512i a)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_sub_epi64(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_maskz_cvtsepi64_epi32(__mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_set1_epi64(long long __d)
static __inline __m512 __DEFAULT_FN_ATTRS _mm512_floor_ps(__m512 __A)
static __inline__ float __DEFAULT_FN_ATTRS _mm512_reduce_min_ps(__m512 a)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_cvtepu32lo_pd(__m512i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm512_mask_cvtsepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_mask_cvtusepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_loadu_si512(void const *__P)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_mask_testn_epi32_mask(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_min_epi64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_max_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_set4_epi32(int __A, int __B, int __C, int __D)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void)
Generates a 128-bit vector of [4 x i32] with unspecified content.
Definition: emmintrin.h:3600
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_cvtsepi32_epi8(__m512i __A)
static __inline__ int __DEFAULT_FN_ATTRS _mm512_reduce_max_epi32(__m512i a)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_maskz_cvtusepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_maskz_cvtsepi64_epi8(__mmask8 __M, __m512i __A)
static __inline__ float __DEFAULT_FN_ATTRS _mm512_reduce_add_ps(__m512 __W)
static __inline __m512d __DEFAULT_FN_ATTRS _mm512_castps_pd(__m512 __A)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS _mm512_reduce_min_epu64(__m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_getexp_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ unsigned int __DEFAULT_FN_ATTRS _mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_mask_cvtepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_and_si512(__m512i __a, __m512i __b)
static __inline__ void __DEFAULT_FN_ATTRS _mm512_mask_store_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_rcp14_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_setzero_si512(void)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_rsqrt14_sd(__m128d __A, __m128d __B)
#define _mm512_setzero_epi32
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_cvtepu32_ps(__mmask16 __U, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_getexp_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_cvtepi32_epi16(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
static __inline __m512d __DEFAULT_FN_ATTRS _mm512_sub_pd(__m512d __a, __m512d __b)
static __inline__ int __DEFAULT_FN_ATTRS _mm512_mask_reduce_and_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_fmsub_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ unsigned int __DEFAULT_FN_ATTRS _mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_expand_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_or_epi64(__m512i __W)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_add_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_undefined_pd(void)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_min_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_andnot_si512(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_permutex2var_epi32(__m512i __A, __mmask16 __U, __m512i __I, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_mul_epi32(__m512i __X, __m512i __Y)
static __inline__ void __DEFAULT_FN_ATTRS _mm512_mask_cvtsepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_set_ps(float __A, float __B, float __C, float __D, float __E, float __F, float __G, float __H, float __I, float __J, float __K, float __L, float __M, float __N, float __O, float __P)
#define __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_abs_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ long long __DEFAULT_FN_ATTRS _mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W)
static __inline__ long long __DEFAULT_FN_ATTRS _mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_load_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_maskz_cvtsepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A)
#define _mm512_mask_cmpeq_epi32_mask(k, A, B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_castps128_ps512(__m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_getexp_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_xor_epi32(__m512i __a, __m512i __b)
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
Definition: avxintrin.h:4358
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_cvtepi32_epi8(__m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_fmsub_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_abs_ps(__m512 __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm512_stream_pd(double *__P, __m512d __A)
#define _mm512_mask_cmpeq_epi64_mask(k, A, B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline __m512 __DEFAULT_FN_ATTRS _mm512_ceil_ps(__m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_sub_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_move_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_rolv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_cvttps_epu32(__m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline void __DEFAULT_FN_ATTRS _mm512_store_epi32(void *__P, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_div_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_load_ss(__mmask8 __U, const float *__A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_permutexvar_ps(__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_getexp_ss(__m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_sub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_cvttps_epu32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_scalef_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm512_mask_cvtusepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS _mm512_zextpd128_pd512(__m128d __a)
Constructs a 512-bit floating-point vector of [8 x double] from a 128-bit floating-point vector of [2...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_cvtsd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_rcp14_ps(__m512 __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_mask_test_epi32_mask(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_rorv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
#define _mm512_mask_reduce_operator_32bit(Vec512, Vec512Neutral, Operator, Mask, T2, T1, T3)
unsigned short __mmask16
Definition: avx512fintrin.h:48
static __inline void __DEFAULT_FN_ATTRS _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline__ unsigned __DEFAULT_FN_ATTRS _mm_cvtsd_u32(__m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_maskz_cvtpd_epu32(__mmask8 __U, __m512d __A)
static __inline __m512 __DEFAULT_FN_ATTRS _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_rolv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_max_epu64(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_permutex2var_ps(__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_getexp_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A)
static __inline __m512 __DEFAULT_FN_ATTRS _mm512_mul_ps(__m512 __a, __m512 __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_add_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_cvtps_epi32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_permutevar_ps(__m512 __A, __m512i __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_add_epi32(__m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtu32_sd(__m128d __A, unsigned __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_rcp14_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_unpacklo_epi32(__m512i __A, __m512i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_maskz_cvtepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_max_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_set_pd(double __A, double __B, double __C, double __D, double __E, double __F, double __G, double __H)
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_setzero_pd(void)
Constructs a 256-bit floating-point vector of [4 x double] with all vector elements initialized to ze...
Definition: avxintrin.h:4331
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_rorv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_mask_cvtepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_getexp_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline __m512 __DEFAULT_FN_ATTRS _mm512_mask_load_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_max_ps(__m512 __A, __m512 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_rsqrt14_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_mov_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_undefined(void)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_broadcast_f32x4(__m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_mask_cvtusepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline__ unsigned __DEFAULT_FN_ATTRS _mm_cvtss_u32(__m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask3_fmadd_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X)
static __inline__ int __DEFAULT_FN_ATTRS _mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_getexp_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_cvtepu32_pd(__m512d __W, __mmask8 __U, __m256i __A)
static __inline __m512d __DEFAULT_FN_ATTRS _mm512_unpacklo_pd(__m512d __a, __m512d __b)
static __inline__ int __DEFAULT_FN_ATTRS _mm_cvttsd_i32(__m128d __A)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_abs_epi32(__m512i __A)
static __inline void __DEFAULT_FN_ATTRS _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
#define _mm512_mask_cmpneq_epi64_mask(k, A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_maskz_cvtusepi64_epi8(__mmask8 __M, __m512i __A)
#define _mm512_cmpneq_epi64_mask(A, B)
static __inline __m512 __DEFAULT_FN_ATTRS _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm512_mask_cvtusepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_undefined_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] with unspecified content.
Definition: emmintrin.h:1769
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_load_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS _mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_min_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_min_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_moveldup_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ double __DEFAULT_FN_ATTRS _mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_store_ss(float *__W, __mmask8 __U, __m128 __A)
static __inline__ float __DEFAULT_FN_ATTRS _mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_compress_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_mask_cvtsepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS _mm512_div_ps(__m512 __a, __m512 __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_max_epi64(__mmask8 __M, __m512i __A, __m512i __B)
#define _MM_FROUND_FLOOR
Definition: smmintrin.h:43
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_permutexvar_pd(__m512i __X, __m512d __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_cvtpd_epu32(__m512d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_fnmadd_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_maskz_cvtusepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_compress_epi32(__mmask16 __U, __m512i __A)
static __inline__ double __DEFAULT_FN_ATTRS _mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W)
static __inline void __DEFAULT_FN_ATTRS _mm512_store_ps(void *__P, __m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_max_epi32(__m512i __A, __m512i __B)
static __inline__ int __DEFAULT_FN_ATTRS _mm512_kortestc(__mmask16 __A, __mmask16 __B)