clang  8.0.0
avx512fintrin.h
Go to the documentation of this file.
1 /*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------===
2  *
3  * Permission is hereby granted, free of charge, to any person obtaining a copy
4  * of this software and associated documentation files (the "Software"), to deal
5  * in the Software without restriction, including without limitation the rights
6  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7  * copies of the Software, and to permit persons to whom the Software is
8  * furnished to do so, subject to the following conditions:
9  *
10  * The above copyright notice and this permission notice shall be included in
11  * all copies or substantial portions of the Software.
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19  * THE SOFTWARE.
20  *
21  *===-----------------------------------------------------------------------===
22  */
23 #ifndef __IMMINTRIN_H
24 #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
25 #endif
26 
27 #ifndef __AVX512FINTRIN_H
28 #define __AVX512FINTRIN_H
29 
30 typedef char __v64qi __attribute__((__vector_size__(64)));
31 typedef short __v32hi __attribute__((__vector_size__(64)));
32 typedef double __v8df __attribute__((__vector_size__(64)));
33 typedef float __v16sf __attribute__((__vector_size__(64)));
34 typedef long long __v8di __attribute__((__vector_size__(64)));
35 typedef int __v16si __attribute__((__vector_size__(64)));
36 
37 /* Unsigned types */
38 typedef unsigned char __v64qu __attribute__((__vector_size__(64)));
39 typedef unsigned short __v32hu __attribute__((__vector_size__(64)));
40 typedef unsigned long long __v8du __attribute__((__vector_size__(64)));
41 typedef unsigned int __v16su __attribute__((__vector_size__(64)));
42 
43 typedef float __m512 __attribute__((__vector_size__(64)));
44 typedef double __m512d __attribute__((__vector_size__(64)));
45 typedef long long __m512i __attribute__((__vector_size__(64)));
46 
47 typedef unsigned char __mmask8;
48 typedef unsigned short __mmask16;
49 
50 /* Rounding mode macros. */
51 #define _MM_FROUND_TO_NEAREST_INT 0x00
52 #define _MM_FROUND_TO_NEG_INF 0x01
53 #define _MM_FROUND_TO_POS_INF 0x02
54 #define _MM_FROUND_TO_ZERO 0x03
55 #define _MM_FROUND_CUR_DIRECTION 0x04
56 
57 /* Constants for integer comparison predicates */
58 typedef enum {
59  _MM_CMPINT_EQ, /* Equal */
60  _MM_CMPINT_LT, /* Less than */
61  _MM_CMPINT_LE, /* Less than or Equal */
63  _MM_CMPINT_NE, /* Not Equal */
64  _MM_CMPINT_NLT, /* Not Less than */
65 #define _MM_CMPINT_GE _MM_CMPINT_NLT /* Greater than or Equal */
66  _MM_CMPINT_NLE /* Not Less than or Equal */
67 #define _MM_CMPINT_GT _MM_CMPINT_NLE /* Greater than */
69 
70 typedef enum
71 {
72  _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
73  _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
74  _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
75  _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
76  _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
77  _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
78  _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
79  _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
80  _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
81  _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
82  _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
83  _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
84  _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
85  _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
86  _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
87  _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
88  _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
89  _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
90  _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
91  _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
92  _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
93  _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
94  _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
95  _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
96  _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
97  _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
98  _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
99  _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
100  _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
101  _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
102  _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
103  _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
104  _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
105  _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
106  _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
107  _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
108  _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
109  _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
110  _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
111  _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
112  _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
113  _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
114  _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
115  _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
116  _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
117  _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
118  _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
119  _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
120  _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
121  _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
122  _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
123  _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
124  _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
125  _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
126  _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
127  _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
128  _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
129  _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
130  _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
131  _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
132  _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
133  _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
134  _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
135  _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
136  _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
137  _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
138  _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
139  _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
140  _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
141  _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
142  _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
143  _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
144  _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
145  _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
146  _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
147  _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
148  _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
149  _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
150  _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
151  _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
152  _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
153  _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
154  _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
155  _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
156  _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
158 } _MM_PERM_ENUM;
159 
160 typedef enum
161 {
162  _MM_MANT_NORM_1_2, /* interval [1, 2) */
163  _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */
164  _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */
165  _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */
167 
168 typedef enum
169 {
170  _MM_MANT_SIGN_src, /* sign = sign(SRC) */
171  _MM_MANT_SIGN_zero, /* sign = 0 */
172  _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */
174 
175 /* Define the default attributes for the functions in this file. */
176 #define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(512)))
177 #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(128)))
178 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
179 
180 /* Create vectors with repeated elements */
181 
182 static __inline __m512i __DEFAULT_FN_ATTRS512
184 {
185  return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
186 }
187 
188 #define _mm512_setzero_epi32 _mm512_setzero_si512
189 
190 static __inline__ __m512d __DEFAULT_FN_ATTRS512
192 {
193  return (__m512d)__builtin_ia32_undef512();
194 }
195 
196 static __inline__ __m512 __DEFAULT_FN_ATTRS512
198 {
199  return (__m512)__builtin_ia32_undef512();
200 }
201 
202 static __inline__ __m512 __DEFAULT_FN_ATTRS512
204 {
205  return (__m512)__builtin_ia32_undef512();
206 }
207 
208 static __inline__ __m512i __DEFAULT_FN_ATTRS512
210 {
211  return (__m512i)__builtin_ia32_undef512();
212 }
213 
214 static __inline__ __m512i __DEFAULT_FN_ATTRS512
216 {
217  return (__m512i)__builtin_shufflevector((__v4si) __A, (__v4si) __A,
218  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
219 }
220 
221 static __inline__ __m512i __DEFAULT_FN_ATTRS512
222 _mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
223 {
224  return (__m512i)__builtin_ia32_selectd_512(__M,
225  (__v16si) _mm512_broadcastd_epi32(__A),
226  (__v16si) __O);
227 }
228 
229 static __inline__ __m512i __DEFAULT_FN_ATTRS512
230 _mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
231 {
232  return (__m512i)__builtin_ia32_selectd_512(__M,
233  (__v16si) _mm512_broadcastd_epi32(__A),
234  (__v16si) _mm512_setzero_si512());
235 }
236 
237 static __inline__ __m512i __DEFAULT_FN_ATTRS512
239 {
240  return (__m512i)__builtin_shufflevector((__v2di) __A, (__v2di) __A,
241  0, 0, 0, 0, 0, 0, 0, 0);
242 }
243 
244 static __inline__ __m512i __DEFAULT_FN_ATTRS512
245 _mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
246 {
247  return (__m512i)__builtin_ia32_selectq_512(__M,
248  (__v8di) _mm512_broadcastq_epi64(__A),
249  (__v8di) __O);
250 
251 }
252 
253 static __inline__ __m512i __DEFAULT_FN_ATTRS512
254 _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
255 {
256  return (__m512i)__builtin_ia32_selectq_512(__M,
257  (__v8di) _mm512_broadcastq_epi64(__A),
258  (__v8di) _mm512_setzero_si512());
259 }
260 
261 
262 static __inline __m512 __DEFAULT_FN_ATTRS512
264 {
265  return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
266  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
267 }
268 
269 #define _mm512_setzero _mm512_setzero_ps
270 
271 static __inline __m512d __DEFAULT_FN_ATTRS512
273 {
274  return __extension__ (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
275 }
276 
277 static __inline __m512 __DEFAULT_FN_ATTRS512
278 _mm512_set1_ps(float __w)
279 {
280  return __extension__ (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
281  __w, __w, __w, __w, __w, __w, __w, __w };
282 }
283 
284 static __inline __m512d __DEFAULT_FN_ATTRS512
285 _mm512_set1_pd(double __w)
286 {
287  return __extension__ (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
288 }
289 
290 static __inline __m512i __DEFAULT_FN_ATTRS512
292 {
293  return __extension__ (__m512i)(__v64qi){
294  __w, __w, __w, __w, __w, __w, __w, __w,
295  __w, __w, __w, __w, __w, __w, __w, __w,
296  __w, __w, __w, __w, __w, __w, __w, __w,
297  __w, __w, __w, __w, __w, __w, __w, __w,
298  __w, __w, __w, __w, __w, __w, __w, __w,
299  __w, __w, __w, __w, __w, __w, __w, __w,
300  __w, __w, __w, __w, __w, __w, __w, __w,
301  __w, __w, __w, __w, __w, __w, __w, __w };
302 }
303 
304 static __inline __m512i __DEFAULT_FN_ATTRS512
306 {
307  return __extension__ (__m512i)(__v32hi){
308  __w, __w, __w, __w, __w, __w, __w, __w,
309  __w, __w, __w, __w, __w, __w, __w, __w,
310  __w, __w, __w, __w, __w, __w, __w, __w,
311  __w, __w, __w, __w, __w, __w, __w, __w };
312 }
313 
314 static __inline __m512i __DEFAULT_FN_ATTRS512
316 {
317  return __extension__ (__m512i)(__v16si){
318  __s, __s, __s, __s, __s, __s, __s, __s,
319  __s, __s, __s, __s, __s, __s, __s, __s };
320 }
321 
322 static __inline __m512i __DEFAULT_FN_ATTRS512
323 _mm512_maskz_set1_epi32(__mmask16 __M, int __A)
324 {
325  return (__m512i)__builtin_ia32_selectd_512(__M,
326  (__v16si)_mm512_set1_epi32(__A),
327  (__v16si)_mm512_setzero_si512());
328 }
329 
330 static __inline __m512i __DEFAULT_FN_ATTRS512
331 _mm512_set1_epi64(long long __d)
332 {
333  return __extension__(__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
334 }
335 
336 static __inline __m512i __DEFAULT_FN_ATTRS512
337 _mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
338 {
339  return (__m512i)__builtin_ia32_selectq_512(__M,
340  (__v8di)_mm512_set1_epi64(__A),
341  (__v8di)_mm512_setzero_si512());
342 }
343 
344 static __inline__ __m512 __DEFAULT_FN_ATTRS512
346 {
347  return (__m512)__builtin_shufflevector((__v4sf) __A, (__v4sf) __A,
348  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
349 }
350 
351 static __inline __m512i __DEFAULT_FN_ATTRS512
352 _mm512_set4_epi32 (int __A, int __B, int __C, int __D)
353 {
354  return __extension__ (__m512i)(__v16si)
355  { __D, __C, __B, __A, __D, __C, __B, __A,
356  __D, __C, __B, __A, __D, __C, __B, __A };
357 }
358 
359 static __inline __m512i __DEFAULT_FN_ATTRS512
360 _mm512_set4_epi64 (long long __A, long long __B, long long __C,
361  long long __D)
362 {
363  return __extension__ (__m512i) (__v8di)
364  { __D, __C, __B, __A, __D, __C, __B, __A };
365 }
366 
367 static __inline __m512d __DEFAULT_FN_ATTRS512
368 _mm512_set4_pd (double __A, double __B, double __C, double __D)
369 {
370  return __extension__ (__m512d)
371  { __D, __C, __B, __A, __D, __C, __B, __A };
372 }
373 
374 static __inline __m512 __DEFAULT_FN_ATTRS512
375 _mm512_set4_ps (float __A, float __B, float __C, float __D)
376 {
377  return __extension__ (__m512)
378  { __D, __C, __B, __A, __D, __C, __B, __A,
379  __D, __C, __B, __A, __D, __C, __B, __A };
380 }
381 
382 #define _mm512_setr4_epi32(e0,e1,e2,e3) \
383  _mm512_set4_epi32((e3),(e2),(e1),(e0))
384 
385 #define _mm512_setr4_epi64(e0,e1,e2,e3) \
386  _mm512_set4_epi64((e3),(e2),(e1),(e0))
387 
388 #define _mm512_setr4_pd(e0,e1,e2,e3) \
389  _mm512_set4_pd((e3),(e2),(e1),(e0))
390 
391 #define _mm512_setr4_ps(e0,e1,e2,e3) \
392  _mm512_set4_ps((e3),(e2),(e1),(e0))
393 
394 static __inline__ __m512d __DEFAULT_FN_ATTRS512
396 {
397  return (__m512d)__builtin_shufflevector((__v2df) __A, (__v2df) __A,
398  0, 0, 0, 0, 0, 0, 0, 0);
399 }
400 
401 /* Cast between vector types */
402 
403 static __inline __m512d __DEFAULT_FN_ATTRS512
405 {
406  return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
407 }
408 
409 static __inline __m512 __DEFAULT_FN_ATTRS512
411 {
412  return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7,
413  -1, -1, -1, -1, -1, -1, -1, -1);
414 }
415 
416 static __inline __m128d __DEFAULT_FN_ATTRS512
418 {
419  return __builtin_shufflevector(__a, __a, 0, 1);
420 }
421 
422 static __inline __m256d __DEFAULT_FN_ATTRS512
424 {
425  return __builtin_shufflevector(__A, __A, 0, 1, 2, 3);
426 }
427 
428 static __inline __m128 __DEFAULT_FN_ATTRS512
430 {
431  return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
432 }
433 
434 static __inline __m256 __DEFAULT_FN_ATTRS512
436 {
437  return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7);
438 }
439 
440 static __inline __m512 __DEFAULT_FN_ATTRS512
441 _mm512_castpd_ps (__m512d __A)
442 {
443  return (__m512) (__A);
444 }
445 
446 static __inline __m512i __DEFAULT_FN_ATTRS512
447 _mm512_castpd_si512 (__m512d __A)
448 {
449  return (__m512i) (__A);
450 }
451 
452 static __inline__ __m512d __DEFAULT_FN_ATTRS512
454 {
455  return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
456 }
457 
458 static __inline __m512d __DEFAULT_FN_ATTRS512
459 _mm512_castps_pd (__m512 __A)
460 {
461  return (__m512d) (__A);
462 }
463 
464 static __inline __m512i __DEFAULT_FN_ATTRS512
466 {
467  return (__m512i) (__A);
468 }
469 
470 static __inline__ __m512 __DEFAULT_FN_ATTRS512
472 {
473  return __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
474 }
475 
476 static __inline__ __m512i __DEFAULT_FN_ATTRS512
478 {
479  return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
480 }
481 
482 static __inline__ __m512i __DEFAULT_FN_ATTRS512
484 {
485  return __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1);
486 }
487 
488 static __inline __m512 __DEFAULT_FN_ATTRS512
489 _mm512_castsi512_ps (__m512i __A)
490 {
491  return (__m512) (__A);
492 }
493 
494 static __inline __m512d __DEFAULT_FN_ATTRS512
495 _mm512_castsi512_pd (__m512i __A)
496 {
497  return (__m512d) (__A);
498 }
499 
500 static __inline __m128i __DEFAULT_FN_ATTRS512
502 {
503  return (__m128i)__builtin_shufflevector(__A, __A , 0, 1);
504 }
505 
506 static __inline __m256i __DEFAULT_FN_ATTRS512
508 {
509  return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3);
510 }
511 
512 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
514 {
515  return (__mmask16)__a;
516 }
517 
518 static __inline__ int __DEFAULT_FN_ATTRS
520 {
521  return (int)__a;
522 }
523 
524 /// Constructs a 512-bit floating-point vector of [8 x double] from a
525 /// 128-bit floating-point vector of [2 x double]. The lower 128 bits
526 /// contain the value of the source vector. The upper 384 bits are set
527 /// to zero.
528 ///
529 /// \headerfile <x86intrin.h>
530 ///
531 /// This intrinsic has no corresponding instruction.
532 ///
533 /// \param __a
534 /// A 128-bit vector of [2 x double].
535 /// \returns A 512-bit floating-point vector of [8 x double]. The lower 128 bits
536 /// contain the value of the parameter. The upper 384 bits are set to zero.
537 static __inline __m512d __DEFAULT_FN_ATTRS512
539 {
540  return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3, 2, 3, 2, 3);
541 }
542 
543 /// Constructs a 512-bit floating-point vector of [8 x double] from a
544 /// 256-bit floating-point vector of [4 x double]. The lower 256 bits
545 /// contain the value of the source vector. The upper 256 bits are set
546 /// to zero.
547 ///
548 /// \headerfile <x86intrin.h>
549 ///
550 /// This intrinsic has no corresponding instruction.
551 ///
552 /// \param __a
553 /// A 256-bit vector of [4 x double].
554 /// \returns A 512-bit floating-point vector of [8 x double]. The lower 256 bits
555 /// contain the value of the parameter. The upper 256 bits are set to zero.
556 static __inline __m512d __DEFAULT_FN_ATTRS512
558 {
559  return __builtin_shufflevector((__v4df)__a, (__v4df)_mm256_setzero_pd(), 0, 1, 2, 3, 4, 5, 6, 7);
560 }
561 
562 /// Constructs a 512-bit floating-point vector of [16 x float] from a
563 /// 128-bit floating-point vector of [4 x float]. The lower 128 bits contain
564 /// the value of the source vector. The upper 384 bits are set to zero.
565 ///
566 /// \headerfile <x86intrin.h>
567 ///
568 /// This intrinsic has no corresponding instruction.
569 ///
570 /// \param __a
571 /// A 128-bit vector of [4 x float].
572 /// \returns A 512-bit floating-point vector of [16 x float]. The lower 128 bits
573 /// contain the value of the parameter. The upper 384 bits are set to zero.
574 static __inline __m512 __DEFAULT_FN_ATTRS512
576 {
577  return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7);
578 }
579 
580 /// Constructs a 512-bit floating-point vector of [16 x float] from a
581 /// 256-bit floating-point vector of [8 x float]. The lower 256 bits contain
582 /// the value of the source vector. The upper 256 bits are set to zero.
583 ///
584 /// \headerfile <x86intrin.h>
585 ///
586 /// This intrinsic has no corresponding instruction.
587 ///
588 /// \param __a
589 /// A 256-bit vector of [8 x float].
590 /// \returns A 512-bit floating-point vector of [16 x float]. The lower 256 bits
591 /// contain the value of the parameter. The upper 256 bits are set to zero.
592 static __inline __m512 __DEFAULT_FN_ATTRS512
594 {
595  return __builtin_shufflevector((__v8sf)__a, (__v8sf)_mm256_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
596 }
597 
598 /// Constructs a 512-bit integer vector from a 128-bit integer vector.
599 /// The lower 128 bits contain the value of the source vector. The upper
600 /// 384 bits are set to zero.
601 ///
602 /// \headerfile <x86intrin.h>
603 ///
604 /// This intrinsic has no corresponding instruction.
605 ///
606 /// \param __a
607 /// A 128-bit integer vector.
608 /// \returns A 512-bit integer vector. The lower 128 bits contain the value of
609 /// the parameter. The upper 384 bits are set to zero.
610 static __inline __m512i __DEFAULT_FN_ATTRS512
612 {
613  return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 1, 2, 3, 2, 3, 2, 3);
614 }
615 
616 /// Constructs a 512-bit integer vector from a 256-bit integer vector.
617 /// The lower 256 bits contain the value of the source vector. The upper
618 /// 256 bits are set to zero.
619 ///
620 /// \headerfile <x86intrin.h>
621 ///
622 /// This intrinsic has no corresponding instruction.
623 ///
624 /// \param __a
625 /// A 256-bit integer vector.
626 /// \returns A 512-bit integer vector. The lower 256 bits contain the value of
627 /// the parameter. The upper 256 bits are set to zero.
628 static __inline __m512i __DEFAULT_FN_ATTRS512
630 {
631  return __builtin_shufflevector((__v4di)__a, (__v4di)_mm256_setzero_si256(), 0, 1, 2, 3, 4, 5, 6, 7);
632 }
633 
634 /* Bitwise operators */
635 static __inline__ __m512i __DEFAULT_FN_ATTRS512
636 _mm512_and_epi32(__m512i __a, __m512i __b)
637 {
638  return (__m512i)((__v16su)__a & (__v16su)__b);
639 }
640 
641 static __inline__ __m512i __DEFAULT_FN_ATTRS512
642 _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
643 {
644  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
645  (__v16si) _mm512_and_epi32(__a, __b),
646  (__v16si) __src);
647 }
648 
649 static __inline__ __m512i __DEFAULT_FN_ATTRS512
650 _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
651 {
652  return (__m512i) _mm512_mask_and_epi32(_mm512_setzero_si512 (),
653  __k, __a, __b);
654 }
655 
656 static __inline__ __m512i __DEFAULT_FN_ATTRS512
657 _mm512_and_epi64(__m512i __a, __m512i __b)
658 {
659  return (__m512i)((__v8du)__a & (__v8du)__b);
660 }
661 
662 static __inline__ __m512i __DEFAULT_FN_ATTRS512
663 _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
664 {
665  return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __k,
666  (__v8di) _mm512_and_epi64(__a, __b),
667  (__v8di) __src);
668 }
669 
670 static __inline__ __m512i __DEFAULT_FN_ATTRS512
671 _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
672 {
673  return (__m512i) _mm512_mask_and_epi64(_mm512_setzero_si512 (),
674  __k, __a, __b);
675 }
676 
677 static __inline__ __m512i __DEFAULT_FN_ATTRS512
678 _mm512_andnot_si512 (__m512i __A, __m512i __B)
679 {
680  return (__m512i)(~(__v8du)__A & (__v8du)__B);
681 }
682 
683 static __inline__ __m512i __DEFAULT_FN_ATTRS512
684 _mm512_andnot_epi32 (__m512i __A, __m512i __B)
685 {
686  return (__m512i)(~(__v16su)__A & (__v16su)__B);
687 }
688 
689 static __inline__ __m512i __DEFAULT_FN_ATTRS512
690 _mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
691 {
692  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
693  (__v16si)_mm512_andnot_epi32(__A, __B),
694  (__v16si)__W);
695 }
696 
697 static __inline__ __m512i __DEFAULT_FN_ATTRS512
698 _mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
699 {
701  __U, __A, __B);
702 }
703 
704 static __inline__ __m512i __DEFAULT_FN_ATTRS512
705 _mm512_andnot_epi64(__m512i __A, __m512i __B)
706 {
707  return (__m512i)(~(__v8du)__A & (__v8du)__B);
708 }
709 
710 static __inline__ __m512i __DEFAULT_FN_ATTRS512
711 _mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
712 {
713  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
714  (__v8di)_mm512_andnot_epi64(__A, __B),
715  (__v8di)__W);
716 }
717 
718 static __inline__ __m512i __DEFAULT_FN_ATTRS512
719 _mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
720 {
722  __U, __A, __B);
723 }
724 
725 static __inline__ __m512i __DEFAULT_FN_ATTRS512
726 _mm512_or_epi32(__m512i __a, __m512i __b)
727 {
728  return (__m512i)((__v16su)__a | (__v16su)__b);
729 }
730 
731 static __inline__ __m512i __DEFAULT_FN_ATTRS512
732 _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
733 {
734  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
735  (__v16si)_mm512_or_epi32(__a, __b),
736  (__v16si)__src);
737 }
738 
739 static __inline__ __m512i __DEFAULT_FN_ATTRS512
740 _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
741 {
742  return (__m512i)_mm512_mask_or_epi32(_mm512_setzero_si512(), __k, __a, __b);
743 }
744 
745 static __inline__ __m512i __DEFAULT_FN_ATTRS512
746 _mm512_or_epi64(__m512i __a, __m512i __b)
747 {
748  return (__m512i)((__v8du)__a | (__v8du)__b);
749 }
750 
751 static __inline__ __m512i __DEFAULT_FN_ATTRS512
752 _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
753 {
754  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
755  (__v8di)_mm512_or_epi64(__a, __b),
756  (__v8di)__src);
757 }
758 
759 static __inline__ __m512i __DEFAULT_FN_ATTRS512
760 _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
761 {
762  return (__m512i)_mm512_mask_or_epi64(_mm512_setzero_si512(), __k, __a, __b);
763 }
764 
765 static __inline__ __m512i __DEFAULT_FN_ATTRS512
766 _mm512_xor_epi32(__m512i __a, __m512i __b)
767 {
768  return (__m512i)((__v16su)__a ^ (__v16su)__b);
769 }
770 
771 static __inline__ __m512i __DEFAULT_FN_ATTRS512
772 _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
773 {
774  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
775  (__v16si)_mm512_xor_epi32(__a, __b),
776  (__v16si)__src);
777 }
778 
779 static __inline__ __m512i __DEFAULT_FN_ATTRS512
780 _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
781 {
782  return (__m512i)_mm512_mask_xor_epi32(_mm512_setzero_si512(), __k, __a, __b);
783 }
784 
785 static __inline__ __m512i __DEFAULT_FN_ATTRS512
786 _mm512_xor_epi64(__m512i __a, __m512i __b)
787 {
788  return (__m512i)((__v8du)__a ^ (__v8du)__b);
789 }
790 
791 static __inline__ __m512i __DEFAULT_FN_ATTRS512
792 _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
793 {
794  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
795  (__v8di)_mm512_xor_epi64(__a, __b),
796  (__v8di)__src);
797 }
798 
799 static __inline__ __m512i __DEFAULT_FN_ATTRS512
800 _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
801 {
802  return (__m512i)_mm512_mask_xor_epi64(_mm512_setzero_si512(), __k, __a, __b);
803 }
804 
805 static __inline__ __m512i __DEFAULT_FN_ATTRS512
806 _mm512_and_si512(__m512i __a, __m512i __b)
807 {
808  return (__m512i)((__v8du)__a & (__v8du)__b);
809 }
810 
811 static __inline__ __m512i __DEFAULT_FN_ATTRS512
812 _mm512_or_si512(__m512i __a, __m512i __b)
813 {
814  return (__m512i)((__v8du)__a | (__v8du)__b);
815 }
816 
817 static __inline__ __m512i __DEFAULT_FN_ATTRS512
818 _mm512_xor_si512(__m512i __a, __m512i __b)
819 {
820  return (__m512i)((__v8du)__a ^ (__v8du)__b);
821 }
822 
823 /* Arithmetic */
824 
825 static __inline __m512d __DEFAULT_FN_ATTRS512
826 _mm512_add_pd(__m512d __a, __m512d __b)
827 {
828  return (__m512d)((__v8df)__a + (__v8df)__b);
829 }
830 
831 static __inline __m512 __DEFAULT_FN_ATTRS512
832 _mm512_add_ps(__m512 __a, __m512 __b)
833 {
834  return (__m512)((__v16sf)__a + (__v16sf)__b);
835 }
836 
837 static __inline __m512d __DEFAULT_FN_ATTRS512
838 _mm512_mul_pd(__m512d __a, __m512d __b)
839 {
840  return (__m512d)((__v8df)__a * (__v8df)__b);
841 }
842 
843 static __inline __m512 __DEFAULT_FN_ATTRS512
844 _mm512_mul_ps(__m512 __a, __m512 __b)
845 {
846  return (__m512)((__v16sf)__a * (__v16sf)__b);
847 }
848 
849 static __inline __m512d __DEFAULT_FN_ATTRS512
850 _mm512_sub_pd(__m512d __a, __m512d __b)
851 {
852  return (__m512d)((__v8df)__a - (__v8df)__b);
853 }
854 
855 static __inline __m512 __DEFAULT_FN_ATTRS512
856 _mm512_sub_ps(__m512 __a, __m512 __b)
857 {
858  return (__m512)((__v16sf)__a - (__v16sf)__b);
859 }
860 
861 static __inline__ __m512i __DEFAULT_FN_ATTRS512
862 _mm512_add_epi64 (__m512i __A, __m512i __B)
863 {
864  return (__m512i) ((__v8du) __A + (__v8du) __B);
865 }
866 
867 static __inline__ __m512i __DEFAULT_FN_ATTRS512
868 _mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
869 {
870  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
871  (__v8di)_mm512_add_epi64(__A, __B),
872  (__v8di)__W);
873 }
874 
875 static __inline__ __m512i __DEFAULT_FN_ATTRS512
876 _mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
877 {
878  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
879  (__v8di)_mm512_add_epi64(__A, __B),
880  (__v8di)_mm512_setzero_si512());
881 }
882 
883 static __inline__ __m512i __DEFAULT_FN_ATTRS512
884 _mm512_sub_epi64 (__m512i __A, __m512i __B)
885 {
886  return (__m512i) ((__v8du) __A - (__v8du) __B);
887 }
888 
889 static __inline__ __m512i __DEFAULT_FN_ATTRS512
890 _mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
891 {
892  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
893  (__v8di)_mm512_sub_epi64(__A, __B),
894  (__v8di)__W);
895 }
896 
897 static __inline__ __m512i __DEFAULT_FN_ATTRS512
898 _mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
899 {
900  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
901  (__v8di)_mm512_sub_epi64(__A, __B),
902  (__v8di)_mm512_setzero_si512());
903 }
904 
905 static __inline__ __m512i __DEFAULT_FN_ATTRS512
906 _mm512_add_epi32 (__m512i __A, __m512i __B)
907 {
908  return (__m512i) ((__v16su) __A + (__v16su) __B);
909 }
910 
911 static __inline__ __m512i __DEFAULT_FN_ATTRS512
912 _mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
913 {
914  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
915  (__v16si)_mm512_add_epi32(__A, __B),
916  (__v16si)__W);
917 }
918 
919 static __inline__ __m512i __DEFAULT_FN_ATTRS512
920 _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
921 {
922  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
923  (__v16si)_mm512_add_epi32(__A, __B),
924  (__v16si)_mm512_setzero_si512());
925 }
926 
927 static __inline__ __m512i __DEFAULT_FN_ATTRS512
928 _mm512_sub_epi32 (__m512i __A, __m512i __B)
929 {
930  return (__m512i) ((__v16su) __A - (__v16su) __B);
931 }
932 
933 static __inline__ __m512i __DEFAULT_FN_ATTRS512
934 _mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
935 {
936  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
937  (__v16si)_mm512_sub_epi32(__A, __B),
938  (__v16si)__W);
939 }
940 
941 static __inline__ __m512i __DEFAULT_FN_ATTRS512
942 _mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B)
943 {
944  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
945  (__v16si)_mm512_sub_epi32(__A, __B),
946  (__v16si)_mm512_setzero_si512());
947 }
948 
949 #define _mm512_max_round_pd(A, B, R) \
950  (__m512d)__builtin_ia32_maxpd512((__v8df)(__m512d)(A), \
951  (__v8df)(__m512d)(B), (int)(R))
952 
953 #define _mm512_mask_max_round_pd(W, U, A, B, R) \
954  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
955  (__v8df)_mm512_max_round_pd((A), (B), (R)), \
956  (__v8df)(W))
957 
958 #define _mm512_maskz_max_round_pd(U, A, B, R) \
959  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
960  (__v8df)_mm512_max_round_pd((A), (B), (R)), \
961  (__v8df)_mm512_setzero_pd())
962 
963 static __inline__ __m512d __DEFAULT_FN_ATTRS512
964 _mm512_max_pd(__m512d __A, __m512d __B)
965 {
966  return (__m512d) __builtin_ia32_maxpd512((__v8df) __A, (__v8df) __B,
968 }
969 
970 static __inline__ __m512d __DEFAULT_FN_ATTRS512
971 _mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
972 {
973  return (__m512d)__builtin_ia32_selectpd_512(__U,
974  (__v8df)_mm512_max_pd(__A, __B),
975  (__v8df)__W);
976 }
977 
978 static __inline__ __m512d __DEFAULT_FN_ATTRS512
979 _mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
980 {
981  return (__m512d)__builtin_ia32_selectpd_512(__U,
982  (__v8df)_mm512_max_pd(__A, __B),
983  (__v8df)_mm512_setzero_pd());
984 }
985 
986 #define _mm512_max_round_ps(A, B, R) \
987  (__m512)__builtin_ia32_maxps512((__v16sf)(__m512)(A), \
988  (__v16sf)(__m512)(B), (int)(R))
989 
990 #define _mm512_mask_max_round_ps(W, U, A, B, R) \
991  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
992  (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
993  (__v16sf)(W))
994 
995 #define _mm512_maskz_max_round_ps(U, A, B, R) \
996  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
997  (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
998  (__v16sf)_mm512_setzero_ps())
999 
1000 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1001 _mm512_max_ps(__m512 __A, __m512 __B)
1002 {
1003  return (__m512) __builtin_ia32_maxps512((__v16sf) __A, (__v16sf) __B,
1005 }
1006 
1007 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1008 _mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
1009 {
1010  return (__m512)__builtin_ia32_selectps_512(__U,
1011  (__v16sf)_mm512_max_ps(__A, __B),
1012  (__v16sf)__W);
1013 }
1014 
1015 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1016 _mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
1017 {
1018  return (__m512)__builtin_ia32_selectps_512(__U,
1019  (__v16sf)_mm512_max_ps(__A, __B),
1020  (__v16sf)_mm512_setzero_ps());
1021 }
1022 
1023 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1024 _mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1025  return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1026  (__v4sf) __B,
1027  (__v4sf) __W,
1028  (__mmask8) __U,
1030 }
1031 
1032 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1033 _mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1034  return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1035  (__v4sf) __B,
1036  (__v4sf) _mm_setzero_ps (),
1037  (__mmask8) __U,
1039 }
1040 
1041 #define _mm_max_round_ss(A, B, R) \
1042  (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1043  (__v4sf)(__m128)(B), \
1044  (__v4sf)_mm_setzero_ps(), \
1045  (__mmask8)-1, (int)(R))
1046 
1047 #define _mm_mask_max_round_ss(W, U, A, B, R) \
1048  (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1049  (__v4sf)(__m128)(B), \
1050  (__v4sf)(__m128)(W), (__mmask8)(U), \
1051  (int)(R))
1052 
1053 #define _mm_maskz_max_round_ss(U, A, B, R) \
1054  (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1055  (__v4sf)(__m128)(B), \
1056  (__v4sf)_mm_setzero_ps(), \
1057  (__mmask8)(U), (int)(R))
1058 
1059 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1060 _mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1061  return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1062  (__v2df) __B,
1063  (__v2df) __W,
1064  (__mmask8) __U,
1066 }
1067 
1068 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1069 _mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1070  return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1071  (__v2df) __B,
1072  (__v2df) _mm_setzero_pd (),
1073  (__mmask8) __U,
1075 }
1076 
1077 #define _mm_max_round_sd(A, B, R) \
1078  (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1079  (__v2df)(__m128d)(B), \
1080  (__v2df)_mm_setzero_pd(), \
1081  (__mmask8)-1, (int)(R))
1082 
1083 #define _mm_mask_max_round_sd(W, U, A, B, R) \
1084  (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1085  (__v2df)(__m128d)(B), \
1086  (__v2df)(__m128d)(W), \
1087  (__mmask8)(U), (int)(R))
1088 
1089 #define _mm_maskz_max_round_sd(U, A, B, R) \
1090  (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1091  (__v2df)(__m128d)(B), \
1092  (__v2df)_mm_setzero_pd(), \
1093  (__mmask8)(U), (int)(R))
1094 
1095 static __inline __m512i
1097 _mm512_max_epi32(__m512i __A, __m512i __B)
1098 {
1099  return (__m512i)__builtin_ia32_pmaxsd512((__v16si)__A, (__v16si)__B);
1100 }
1101 
1102 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1103 _mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1104 {
1105  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1106  (__v16si)_mm512_max_epi32(__A, __B),
1107  (__v16si)__W);
1108 }
1109 
1110 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1111 _mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
1112 {
1113  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1114  (__v16si)_mm512_max_epi32(__A, __B),
1115  (__v16si)_mm512_setzero_si512());
1116 }
1117 
1118 static __inline __m512i __DEFAULT_FN_ATTRS512
1119 _mm512_max_epu32(__m512i __A, __m512i __B)
1120 {
1121  return (__m512i)__builtin_ia32_pmaxud512((__v16si)__A, (__v16si)__B);
1122 }
1123 
1124 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1125 _mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1126 {
1127  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1128  (__v16si)_mm512_max_epu32(__A, __B),
1129  (__v16si)__W);
1130 }
1131 
1132 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1133 _mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
1134 {
1135  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1136  (__v16si)_mm512_max_epu32(__A, __B),
1137  (__v16si)_mm512_setzero_si512());
1138 }
1139 
1140 static __inline __m512i __DEFAULT_FN_ATTRS512
1141 _mm512_max_epi64(__m512i __A, __m512i __B)
1142 {
1143  return (__m512i)__builtin_ia32_pmaxsq512((__v8di)__A, (__v8di)__B);
1144 }
1145 
1146 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1147 _mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1148 {
1149  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1150  (__v8di)_mm512_max_epi64(__A, __B),
1151  (__v8di)__W);
1152 }
1153 
1154 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1155 _mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
1156 {
1157  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1158  (__v8di)_mm512_max_epi64(__A, __B),
1159  (__v8di)_mm512_setzero_si512());
1160 }
1161 
1162 static __inline __m512i __DEFAULT_FN_ATTRS512
1163 _mm512_max_epu64(__m512i __A, __m512i __B)
1164 {
1165  return (__m512i)__builtin_ia32_pmaxuq512((__v8di)__A, (__v8di)__B);
1166 }
1167 
1168 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1169 _mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1170 {
1171  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1172  (__v8di)_mm512_max_epu64(__A, __B),
1173  (__v8di)__W);
1174 }
1175 
1176 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1177 _mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
1178 {
1179  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1180  (__v8di)_mm512_max_epu64(__A, __B),
1181  (__v8di)_mm512_setzero_si512());
1182 }
1183 
1184 #define _mm512_min_round_pd(A, B, R) \
1185  (__m512d)__builtin_ia32_minpd512((__v8df)(__m512d)(A), \
1186  (__v8df)(__m512d)(B), (int)(R))
1187 
1188 #define _mm512_mask_min_round_pd(W, U, A, B, R) \
1189  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1190  (__v8df)_mm512_min_round_pd((A), (B), (R)), \
1191  (__v8df)(W))
1192 
1193 #define _mm512_maskz_min_round_pd(U, A, B, R) \
1194  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1195  (__v8df)_mm512_min_round_pd((A), (B), (R)), \
1196  (__v8df)_mm512_setzero_pd())
1197 
1198 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1199 _mm512_min_pd(__m512d __A, __m512d __B)
1200 {
1201  return (__m512d) __builtin_ia32_minpd512((__v8df) __A, (__v8df) __B,
1203 }
1204 
1205 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1206 _mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
1207 {
1208  return (__m512d)__builtin_ia32_selectpd_512(__U,
1209  (__v8df)_mm512_min_pd(__A, __B),
1210  (__v8df)__W);
1211 }
1212 
1213 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1214 _mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
1215 {
1216  return (__m512d)__builtin_ia32_selectpd_512(__U,
1217  (__v8df)_mm512_min_pd(__A, __B),
1218  (__v8df)_mm512_setzero_pd());
1219 }
1220 
1221 #define _mm512_min_round_ps(A, B, R) \
1222  (__m512)__builtin_ia32_minps512((__v16sf)(__m512)(A), \
1223  (__v16sf)(__m512)(B), (int)(R))
1224 
1225 #define _mm512_mask_min_round_ps(W, U, A, B, R) \
1226  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1227  (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
1228  (__v16sf)(W))
1229 
1230 #define _mm512_maskz_min_round_ps(U, A, B, R) \
1231  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1232  (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
1233  (__v16sf)_mm512_setzero_ps())
1234 
1235 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1236 _mm512_min_ps(__m512 __A, __m512 __B)
1237 {
1238  return (__m512) __builtin_ia32_minps512((__v16sf) __A, (__v16sf) __B,
1240 }
1241 
1242 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1243 _mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
1244 {
1245  return (__m512)__builtin_ia32_selectps_512(__U,
1246  (__v16sf)_mm512_min_ps(__A, __B),
1247  (__v16sf)__W);
1248 }
1249 
1250 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1251 _mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
1252 {
1253  return (__m512)__builtin_ia32_selectps_512(__U,
1254  (__v16sf)_mm512_min_ps(__A, __B),
1255  (__v16sf)_mm512_setzero_ps());
1256 }
1257 
1258 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1259 _mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1260  return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1261  (__v4sf) __B,
1262  (__v4sf) __W,
1263  (__mmask8) __U,
1265 }
1266 
1267 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1268 _mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1269  return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1270  (__v4sf) __B,
1271  (__v4sf) _mm_setzero_ps (),
1272  (__mmask8) __U,
1274 }
1275 
1276 #define _mm_min_round_ss(A, B, R) \
1277  (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1278  (__v4sf)(__m128)(B), \
1279  (__v4sf)_mm_setzero_ps(), \
1280  (__mmask8)-1, (int)(R))
1281 
1282 #define _mm_mask_min_round_ss(W, U, A, B, R) \
1283  (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1284  (__v4sf)(__m128)(B), \
1285  (__v4sf)(__m128)(W), (__mmask8)(U), \
1286  (int)(R))
1287 
1288 #define _mm_maskz_min_round_ss(U, A, B, R) \
1289  (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1290  (__v4sf)(__m128)(B), \
1291  (__v4sf)_mm_setzero_ps(), \
1292  (__mmask8)(U), (int)(R))
1293 
1294 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1295 _mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1296  return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1297  (__v2df) __B,
1298  (__v2df) __W,
1299  (__mmask8) __U,
1301 }
1302 
1303 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1304 _mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1305  return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1306  (__v2df) __B,
1307  (__v2df) _mm_setzero_pd (),
1308  (__mmask8) __U,
1310 }
1311 
1312 #define _mm_min_round_sd(A, B, R) \
1313  (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1314  (__v2df)(__m128d)(B), \
1315  (__v2df)_mm_setzero_pd(), \
1316  (__mmask8)-1, (int)(R))
1317 
1318 #define _mm_mask_min_round_sd(W, U, A, B, R) \
1319  (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1320  (__v2df)(__m128d)(B), \
1321  (__v2df)(__m128d)(W), \
1322  (__mmask8)(U), (int)(R))
1323 
1324 #define _mm_maskz_min_round_sd(U, A, B, R) \
1325  (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1326  (__v2df)(__m128d)(B), \
1327  (__v2df)_mm_setzero_pd(), \
1328  (__mmask8)(U), (int)(R))
1329 
1330 static __inline __m512i
1332 _mm512_min_epi32(__m512i __A, __m512i __B)
1333 {
1334  return (__m512i)__builtin_ia32_pminsd512((__v16si)__A, (__v16si)__B);
1335 }
1336 
1337 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1338 _mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1339 {
1340  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1341  (__v16si)_mm512_min_epi32(__A, __B),
1342  (__v16si)__W);
1343 }
1344 
1345 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1346 _mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
1347 {
1348  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1349  (__v16si)_mm512_min_epi32(__A, __B),
1350  (__v16si)_mm512_setzero_si512());
1351 }
1352 
1353 static __inline __m512i __DEFAULT_FN_ATTRS512
1354 _mm512_min_epu32(__m512i __A, __m512i __B)
1355 {
1356  return (__m512i)__builtin_ia32_pminud512((__v16si)__A, (__v16si)__B);
1357 }
1358 
1359 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1360 _mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1361 {
1362  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1363  (__v16si)_mm512_min_epu32(__A, __B),
1364  (__v16si)__W);
1365 }
1366 
1367 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1368 _mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
1369 {
1370  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1371  (__v16si)_mm512_min_epu32(__A, __B),
1372  (__v16si)_mm512_setzero_si512());
1373 }
1374 
1375 static __inline __m512i __DEFAULT_FN_ATTRS512
1376 _mm512_min_epi64(__m512i __A, __m512i __B)
1377 {
1378  return (__m512i)__builtin_ia32_pminsq512((__v8di)__A, (__v8di)__B);
1379 }
1380 
1381 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1382 _mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1383 {
1384  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1385  (__v8di)_mm512_min_epi64(__A, __B),
1386  (__v8di)__W);
1387 }
1388 
1389 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1390 _mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
1391 {
1392  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1393  (__v8di)_mm512_min_epi64(__A, __B),
1394  (__v8di)_mm512_setzero_si512());
1395 }
1396 
1397 static __inline __m512i __DEFAULT_FN_ATTRS512
1398 _mm512_min_epu64(__m512i __A, __m512i __B)
1399 {
1400  return (__m512i)__builtin_ia32_pminuq512((__v8di)__A, (__v8di)__B);
1401 }
1402 
1403 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1404 _mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1405 {
1406  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1407  (__v8di)_mm512_min_epu64(__A, __B),
1408  (__v8di)__W);
1409 }
1410 
1411 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1412 _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
1413 {
1414  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1415  (__v8di)_mm512_min_epu64(__A, __B),
1416  (__v8di)_mm512_setzero_si512());
1417 }
1418 
1419 static __inline __m512i __DEFAULT_FN_ATTRS512
1420 _mm512_mul_epi32(__m512i __X, __m512i __Y)
1421 {
1422  return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y);
1423 }
1424 
1425 static __inline __m512i __DEFAULT_FN_ATTRS512
1426 _mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
1427 {
1428  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1429  (__v8di)_mm512_mul_epi32(__X, __Y),
1430  (__v8di)__W);
1431 }
1432 
1433 static __inline __m512i __DEFAULT_FN_ATTRS512
1434 _mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
1435 {
1436  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1437  (__v8di)_mm512_mul_epi32(__X, __Y),
1438  (__v8di)_mm512_setzero_si512 ());
1439 }
1440 
1441 static __inline __m512i __DEFAULT_FN_ATTRS512
1442 _mm512_mul_epu32(__m512i __X, __m512i __Y)
1443 {
1444  return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y);
1445 }
1446 
1447 static __inline __m512i __DEFAULT_FN_ATTRS512
1448 _mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
1449 {
1450  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1451  (__v8di)_mm512_mul_epu32(__X, __Y),
1452  (__v8di)__W);
1453 }
1454 
1455 static __inline __m512i __DEFAULT_FN_ATTRS512
1456 _mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y)
1457 {
1458  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1459  (__v8di)_mm512_mul_epu32(__X, __Y),
1460  (__v8di)_mm512_setzero_si512 ());
1461 }
1462 
1463 static __inline __m512i __DEFAULT_FN_ATTRS512
1464 _mm512_mullo_epi32 (__m512i __A, __m512i __B)
1465 {
1466  return (__m512i) ((__v16su) __A * (__v16su) __B);
1467 }
1468 
1469 static __inline __m512i __DEFAULT_FN_ATTRS512
1470 _mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B)
1471 {
1472  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1473  (__v16si)_mm512_mullo_epi32(__A, __B),
1474  (__v16si)_mm512_setzero_si512());
1475 }
1476 
1477 static __inline __m512i __DEFAULT_FN_ATTRS512
1478 _mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1479 {
1480  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1481  (__v16si)_mm512_mullo_epi32(__A, __B),
1482  (__v16si)__W);
1483 }
1484 
1485 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1486 _mm512_mullox_epi64 (__m512i __A, __m512i __B) {
1487  return (__m512i) ((__v8du) __A * (__v8du) __B);
1488 }
1489 
1490 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1491 _mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
1492  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1493  (__v8di)_mm512_mullox_epi64(__A, __B),
1494  (__v8di)__W);
1495 }
1496 
1497 #define _mm512_sqrt_round_pd(A, R) \
1498  (__m512d)__builtin_ia32_sqrtpd512((__v8df)(__m512d)(A), (int)(R))
1499 
1500 #define _mm512_mask_sqrt_round_pd(W, U, A, R) \
1501  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1502  (__v8df)_mm512_sqrt_round_pd((A), (R)), \
1503  (__v8df)(__m512d)(W))
1504 
1505 #define _mm512_maskz_sqrt_round_pd(U, A, R) \
1506  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1507  (__v8df)_mm512_sqrt_round_pd((A), (R)), \
1508  (__v8df)_mm512_setzero_pd())
1509 
1510 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1511 _mm512_sqrt_pd(__m512d __A)
1512 {
1513  return (__m512d)__builtin_ia32_sqrtpd512((__v8df)__A,
1515 }
1516 
1517 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1518 _mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
1519 {
1520  return (__m512d)__builtin_ia32_selectpd_512(__U,
1521  (__v8df)_mm512_sqrt_pd(__A),
1522  (__v8df)__W);
1523 }
1524 
1525 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1526 _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
1527 {
1528  return (__m512d)__builtin_ia32_selectpd_512(__U,
1529  (__v8df)_mm512_sqrt_pd(__A),
1530  (__v8df)_mm512_setzero_pd());
1531 }
1532 
1533 #define _mm512_sqrt_round_ps(A, R) \
1534  (__m512)__builtin_ia32_sqrtps512((__v16sf)(__m512)(A), (int)(R))
1535 
1536 #define _mm512_mask_sqrt_round_ps(W, U, A, R) \
1537  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1538  (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
1539  (__v16sf)(__m512)(W))
1540 
1541 #define _mm512_maskz_sqrt_round_ps(U, A, R) \
1542  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1543  (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
1544  (__v16sf)_mm512_setzero_ps())
1545 
1546 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1547 _mm512_sqrt_ps(__m512 __A)
1548 {
1549  return (__m512)__builtin_ia32_sqrtps512((__v16sf)__A,
1551 }
1552 
1553 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1554 _mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
1555 {
1556  return (__m512)__builtin_ia32_selectps_512(__U,
1557  (__v16sf)_mm512_sqrt_ps(__A),
1558  (__v16sf)__W);
1559 }
1560 
1561 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1562 _mm512_maskz_sqrt_ps( __mmask16 __U, __m512 __A)
1563 {
1564  return (__m512)__builtin_ia32_selectps_512(__U,
1565  (__v16sf)_mm512_sqrt_ps(__A),
1566  (__v16sf)_mm512_setzero_ps());
1567 }
1568 
1569 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1570 _mm512_rsqrt14_pd(__m512d __A)
1571 {
1572  return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1573  (__v8df)
1574  _mm512_setzero_pd (),
1575  (__mmask8) -1);}
1576 
1577 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1578 _mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1579 {
1580  return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1581  (__v8df) __W,
1582  (__mmask8) __U);
1583 }
1584 
1585 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1586 _mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
1587 {
1588  return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1589  (__v8df)
1590  _mm512_setzero_pd (),
1591  (__mmask8) __U);
1592 }
1593 
1594 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1596 {
1597  return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1598  (__v16sf)
1599  _mm512_setzero_ps (),
1600  (__mmask16) -1);
1601 }
1602 
1603 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1604 _mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1605 {
1606  return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1607  (__v16sf) __W,
1608  (__mmask16) __U);
1609 }
1610 
1611 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1612 _mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
1613 {
1614  return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1615  (__v16sf)
1616  _mm512_setzero_ps (),
1617  (__mmask16) __U);
1618 }
1619 
1620 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1621 _mm_rsqrt14_ss(__m128 __A, __m128 __B)
1622 {
1623  return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1624  (__v4sf) __B,
1625  (__v4sf)
1626  _mm_setzero_ps (),
1627  (__mmask8) -1);
1628 }
1629 
1630 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1631 _mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1632 {
1633  return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1634  (__v4sf) __B,
1635  (__v4sf) __W,
1636  (__mmask8) __U);
1637 }
1638 
1639 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1640 _mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1641 {
1642  return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1643  (__v4sf) __B,
1644  (__v4sf) _mm_setzero_ps (),
1645  (__mmask8) __U);
1646 }
1647 
1648 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1649 _mm_rsqrt14_sd(__m128d __A, __m128d __B)
1650 {
1651  return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
1652  (__v2df) __B,
1653  (__v2df)
1654  _mm_setzero_pd (),
1655  (__mmask8) -1);
1656 }
1657 
1658 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1659 _mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1660 {
1661  return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1662  (__v2df) __B,
1663  (__v2df) __W,
1664  (__mmask8) __U);
1665 }
1666 
1667 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1668 _mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1669 {
1670  return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1671  (__v2df) __B,
1672  (__v2df) _mm_setzero_pd (),
1673  (__mmask8) __U);
1674 }
1675 
1676 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1677 _mm512_rcp14_pd(__m512d __A)
1678 {
1679  return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1680  (__v8df)
1681  _mm512_setzero_pd (),
1682  (__mmask8) -1);
1683 }
1684 
1685 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1686 _mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1687 {
1688  return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1689  (__v8df) __W,
1690  (__mmask8) __U);
1691 }
1692 
1693 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1694 _mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
1695 {
1696  return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1697  (__v8df)
1698  _mm512_setzero_pd (),
1699  (__mmask8) __U);
1700 }
1701 
1702 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1703 _mm512_rcp14_ps(__m512 __A)
1704 {
1705  return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1706  (__v16sf)
1707  _mm512_setzero_ps (),
1708  (__mmask16) -1);
1709 }
1710 
1711 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1712 _mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1713 {
1714  return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1715  (__v16sf) __W,
1716  (__mmask16) __U);
1717 }
1718 
1719 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1720 _mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
1721 {
1722  return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1723  (__v16sf)
1724  _mm512_setzero_ps (),
1725  (__mmask16) __U);
1726 }
1727 
1728 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1729 _mm_rcp14_ss(__m128 __A, __m128 __B)
1730 {
1731  return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1732  (__v4sf) __B,
1733  (__v4sf)
1734  _mm_setzero_ps (),
1735  (__mmask8) -1);
1736 }
1737 
1738 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1739 _mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1740 {
1741  return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1742  (__v4sf) __B,
1743  (__v4sf) __W,
1744  (__mmask8) __U);
1745 }
1746 
1747 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1748 _mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1749 {
1750  return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1751  (__v4sf) __B,
1752  (__v4sf) _mm_setzero_ps (),
1753  (__mmask8) __U);
1754 }
1755 
1756 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1757 _mm_rcp14_sd(__m128d __A, __m128d __B)
1758 {
1759  return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
1760  (__v2df) __B,
1761  (__v2df)
1762  _mm_setzero_pd (),
1763  (__mmask8) -1);
1764 }
1765 
1766 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1767 _mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1768 {
1769  return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1770  (__v2df) __B,
1771  (__v2df) __W,
1772  (__mmask8) __U);
1773 }
1774 
1775 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1776 _mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1777 {
1778  return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1779  (__v2df) __B,
1780  (__v2df) _mm_setzero_pd (),
1781  (__mmask8) __U);
1782 }
1783 
1784 static __inline __m512 __DEFAULT_FN_ATTRS512
1785 _mm512_floor_ps(__m512 __A)
1786 {
1787  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1789  (__v16sf) __A, -1,
1791 }
1792 
1793 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1794 _mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
1795 {
1796  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1798  (__v16sf) __W, __U,
1800 }
1801 
1802 static __inline __m512d __DEFAULT_FN_ATTRS512
1803 _mm512_floor_pd(__m512d __A)
1804 {
1805  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1807  (__v8df) __A, -1,
1809 }
1810 
1811 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1812 _mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
1813 {
1814  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1816  (__v8df) __W, __U,
1818 }
1819 
1820 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1821 _mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
1822 {
1823  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1825  (__v16sf) __W, __U,
1827 }
1828 
1829 static __inline __m512 __DEFAULT_FN_ATTRS512
1830 _mm512_ceil_ps(__m512 __A)
1831 {
1832  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1834  (__v16sf) __A, -1,
1836 }
1837 
1838 static __inline __m512d __DEFAULT_FN_ATTRS512
1839 _mm512_ceil_pd(__m512d __A)
1840 {
1841  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1843  (__v8df) __A, -1,
1845 }
1846 
1847 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1848 _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
1849 {
1850  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1852  (__v8df) __W, __U,
1854 }
1855 
1856 static __inline __m512i __DEFAULT_FN_ATTRS512
1857 _mm512_abs_epi64(__m512i __A)
1858 {
1859  return (__m512i)__builtin_ia32_pabsq512((__v8di)__A);
1860 }
1861 
1862 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1863 _mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
1864 {
1865  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1866  (__v8di)_mm512_abs_epi64(__A),
1867  (__v8di)__W);
1868 }
1869 
1870 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1871 _mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
1872 {
1873  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1874  (__v8di)_mm512_abs_epi64(__A),
1875  (__v8di)_mm512_setzero_si512());
1876 }
1877 
1878 static __inline __m512i __DEFAULT_FN_ATTRS512
1879 _mm512_abs_epi32(__m512i __A)
1880 {
1881  return (__m512i)__builtin_ia32_pabsd512((__v16si) __A);
1882 }
1883 
1884 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1885 _mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
1886 {
1887  return (__m512i)__builtin_ia32_selectd_512(__U,
1888  (__v16si)_mm512_abs_epi32(__A),
1889  (__v16si)__W);
1890 }
1891 
1892 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1893 _mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
1894 {
1895  return (__m512i)__builtin_ia32_selectd_512(__U,
1896  (__v16si)_mm512_abs_epi32(__A),
1897  (__v16si)_mm512_setzero_si512());
1898 }
1899 
1900 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1901 _mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1902  __A = _mm_add_ss(__A, __B);
1903  return __builtin_ia32_selectss_128(__U, __A, __W);
1904 }
1905 
1906 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1907 _mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1908  __A = _mm_add_ss(__A, __B);
1909  return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
1910 }
1911 
1912 #define _mm_add_round_ss(A, B, R) \
1913  (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1914  (__v4sf)(__m128)(B), \
1915  (__v4sf)_mm_setzero_ps(), \
1916  (__mmask8)-1, (int)(R))
1917 
1918 #define _mm_mask_add_round_ss(W, U, A, B, R) \
1919  (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1920  (__v4sf)(__m128)(B), \
1921  (__v4sf)(__m128)(W), (__mmask8)(U), \
1922  (int)(R))
1923 
1924 #define _mm_maskz_add_round_ss(U, A, B, R) \
1925  (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1926  (__v4sf)(__m128)(B), \
1927  (__v4sf)_mm_setzero_ps(), \
1928  (__mmask8)(U), (int)(R))
1929 
1930 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1931 _mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1932  __A = _mm_add_sd(__A, __B);
1933  return __builtin_ia32_selectsd_128(__U, __A, __W);
1934 }
1935 
1936 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1937 _mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1938  __A = _mm_add_sd(__A, __B);
1939  return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
1940 }
1941 #define _mm_add_round_sd(A, B, R) \
1942  (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1943  (__v2df)(__m128d)(B), \
1944  (__v2df)_mm_setzero_pd(), \
1945  (__mmask8)-1, (int)(R))
1946 
1947 #define _mm_mask_add_round_sd(W, U, A, B, R) \
1948  (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1949  (__v2df)(__m128d)(B), \
1950  (__v2df)(__m128d)(W), \
1951  (__mmask8)(U), (int)(R))
1952 
1953 #define _mm_maskz_add_round_sd(U, A, B, R) \
1954  (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1955  (__v2df)(__m128d)(B), \
1956  (__v2df)_mm_setzero_pd(), \
1957  (__mmask8)(U), (int)(R))
1958 
1959 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1960 _mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
1961  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1962  (__v8df)_mm512_add_pd(__A, __B),
1963  (__v8df)__W);
1964 }
1965 
1966 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1967 _mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {
1968  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1969  (__v8df)_mm512_add_pd(__A, __B),
1970  (__v8df)_mm512_setzero_pd());
1971 }
1972 
1973 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1974 _mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
1975  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
1976  (__v16sf)_mm512_add_ps(__A, __B),
1977  (__v16sf)__W);
1978 }
1979 
1980 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1981 _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
1982  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
1983  (__v16sf)_mm512_add_ps(__A, __B),
1984  (__v16sf)_mm512_setzero_ps());
1985 }
1986 
1987 #define _mm512_add_round_pd(A, B, R) \
1988  (__m512d)__builtin_ia32_addpd512((__v8df)(__m512d)(A), \
1989  (__v8df)(__m512d)(B), (int)(R))
1990 
1991 #define _mm512_mask_add_round_pd(W, U, A, B, R) \
1992  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1993  (__v8df)_mm512_add_round_pd((A), (B), (R)), \
1994  (__v8df)(__m512d)(W));
1995 
1996 #define _mm512_maskz_add_round_pd(U, A, B, R) \
1997  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1998  (__v8df)_mm512_add_round_pd((A), (B), (R)), \
1999  (__v8df)_mm512_setzero_pd());
2000 
2001 #define _mm512_add_round_ps(A, B, R) \
2002  (__m512)__builtin_ia32_addps512((__v16sf)(__m512)(A), \
2003  (__v16sf)(__m512)(B), (int)(R))
2004 
2005 #define _mm512_mask_add_round_ps(W, U, A, B, R) \
2006  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2007  (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
2008  (__v16sf)(__m512)(W));
2009 
2010 #define _mm512_maskz_add_round_ps(U, A, B, R) \
2011  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2012  (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
2013  (__v16sf)_mm512_setzero_ps());
2014 
2015 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2016 _mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2017  __A = _mm_sub_ss(__A, __B);
2018  return __builtin_ia32_selectss_128(__U, __A, __W);
2019 }
2020 
2021 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2022 _mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2023  __A = _mm_sub_ss(__A, __B);
2024  return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
2025 }
2026 #define _mm_sub_round_ss(A, B, R) \
2027  (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2028  (__v4sf)(__m128)(B), \
2029  (__v4sf)_mm_setzero_ps(), \
2030  (__mmask8)-1, (int)(R))
2031 
2032 #define _mm_mask_sub_round_ss(W, U, A, B, R) \
2033  (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2034  (__v4sf)(__m128)(B), \
2035  (__v4sf)(__m128)(W), (__mmask8)(U), \
2036  (int)(R))
2037 
2038 #define _mm_maskz_sub_round_ss(U, A, B, R) \
2039  (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2040  (__v4sf)(__m128)(B), \
2041  (__v4sf)_mm_setzero_ps(), \
2042  (__mmask8)(U), (int)(R))
2043 
2044 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2045 _mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2046  __A = _mm_sub_sd(__A, __B);
2047  return __builtin_ia32_selectsd_128(__U, __A, __W);
2048 }
2049 
2050 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2051 _mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2052  __A = _mm_sub_sd(__A, __B);
2053  return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
2054 }
2055 
2056 #define _mm_sub_round_sd(A, B, R) \
2057  (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2058  (__v2df)(__m128d)(B), \
2059  (__v2df)_mm_setzero_pd(), \
2060  (__mmask8)-1, (int)(R))
2061 
2062 #define _mm_mask_sub_round_sd(W, U, A, B, R) \
2063  (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2064  (__v2df)(__m128d)(B), \
2065  (__v2df)(__m128d)(W), \
2066  (__mmask8)(U), (int)(R))
2067 
2068 #define _mm_maskz_sub_round_sd(U, A, B, R) \
2069  (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2070  (__v2df)(__m128d)(B), \
2071  (__v2df)_mm_setzero_pd(), \
2072  (__mmask8)(U), (int)(R))
2073 
2074 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2075 _mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2076  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2077  (__v8df)_mm512_sub_pd(__A, __B),
2078  (__v8df)__W);
2079 }
2080 
2081 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2082 _mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2083  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2084  (__v8df)_mm512_sub_pd(__A, __B),
2085  (__v8df)_mm512_setzero_pd());
2086 }
2087 
2088 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2089 _mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2090  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2091  (__v16sf)_mm512_sub_ps(__A, __B),
2092  (__v16sf)__W);
2093 }
2094 
2095 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2096 _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2097  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2098  (__v16sf)_mm512_sub_ps(__A, __B),
2099  (__v16sf)_mm512_setzero_ps());
2100 }
2101 
2102 #define _mm512_sub_round_pd(A, B, R) \
2103  (__m512d)__builtin_ia32_subpd512((__v8df)(__m512d)(A), \
2104  (__v8df)(__m512d)(B), (int)(R))
2105 
2106 #define _mm512_mask_sub_round_pd(W, U, A, B, R) \
2107  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2108  (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
2109  (__v8df)(__m512d)(W));
2110 
2111 #define _mm512_maskz_sub_round_pd(U, A, B, R) \
2112  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2113  (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
2114  (__v8df)_mm512_setzero_pd());
2115 
2116 #define _mm512_sub_round_ps(A, B, R) \
2117  (__m512)__builtin_ia32_subps512((__v16sf)(__m512)(A), \
2118  (__v16sf)(__m512)(B), (int)(R))
2119 
2120 #define _mm512_mask_sub_round_ps(W, U, A, B, R) \
2121  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2122  (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
2123  (__v16sf)(__m512)(W));
2124 
2125 #define _mm512_maskz_sub_round_ps(U, A, B, R) \
2126  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2127  (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
2128  (__v16sf)_mm512_setzero_ps());
2129 
2130 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2131 _mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2132  __A = _mm_mul_ss(__A, __B);
2133  return __builtin_ia32_selectss_128(__U, __A, __W);
2134 }
2135 
2136 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2137 _mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2138  __A = _mm_mul_ss(__A, __B);
2139  return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
2140 }
2141 #define _mm_mul_round_ss(A, B, R) \
2142  (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2143  (__v4sf)(__m128)(B), \
2144  (__v4sf)_mm_setzero_ps(), \
2145  (__mmask8)-1, (int)(R))
2146 
2147 #define _mm_mask_mul_round_ss(W, U, A, B, R) \
2148  (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2149  (__v4sf)(__m128)(B), \
2150  (__v4sf)(__m128)(W), (__mmask8)(U), \
2151  (int)(R))
2152 
2153 #define _mm_maskz_mul_round_ss(U, A, B, R) \
2154  (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2155  (__v4sf)(__m128)(B), \
2156  (__v4sf)_mm_setzero_ps(), \
2157  (__mmask8)(U), (int)(R))
2158 
2159 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2160 _mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2161  __A = _mm_mul_sd(__A, __B);
2162  return __builtin_ia32_selectsd_128(__U, __A, __W);
2163 }
2164 
2165 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2166 _mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2167  __A = _mm_mul_sd(__A, __B);
2168  return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
2169 }
2170 
2171 #define _mm_mul_round_sd(A, B, R) \
2172  (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2173  (__v2df)(__m128d)(B), \
2174  (__v2df)_mm_setzero_pd(), \
2175  (__mmask8)-1, (int)(R))
2176 
2177 #define _mm_mask_mul_round_sd(W, U, A, B, R) \
2178  (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2179  (__v2df)(__m128d)(B), \
2180  (__v2df)(__m128d)(W), \
2181  (__mmask8)(U), (int)(R))
2182 
2183 #define _mm_maskz_mul_round_sd(U, A, B, R) \
2184  (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2185  (__v2df)(__m128d)(B), \
2186  (__v2df)_mm_setzero_pd(), \
2187  (__mmask8)(U), (int)(R))
2188 
2189 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2190 _mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2191  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2192  (__v8df)_mm512_mul_pd(__A, __B),
2193  (__v8df)__W);
2194 }
2195 
2196 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2197 _mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2198  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2199  (__v8df)_mm512_mul_pd(__A, __B),
2200  (__v8df)_mm512_setzero_pd());
2201 }
2202 
2203 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2204 _mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2205  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2206  (__v16sf)_mm512_mul_ps(__A, __B),
2207  (__v16sf)__W);
2208 }
2209 
2210 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2211 _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2212  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2213  (__v16sf)_mm512_mul_ps(__A, __B),
2214  (__v16sf)_mm512_setzero_ps());
2215 }
2216 
2217 #define _mm512_mul_round_pd(A, B, R) \
2218  (__m512d)__builtin_ia32_mulpd512((__v8df)(__m512d)(A), \
2219  (__v8df)(__m512d)(B), (int)(R))
2220 
2221 #define _mm512_mask_mul_round_pd(W, U, A, B, R) \
2222  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2223  (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
2224  (__v8df)(__m512d)(W));
2225 
2226 #define _mm512_maskz_mul_round_pd(U, A, B, R) \
2227  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2228  (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
2229  (__v8df)_mm512_setzero_pd());
2230 
2231 #define _mm512_mul_round_ps(A, B, R) \
2232  (__m512)__builtin_ia32_mulps512((__v16sf)(__m512)(A), \
2233  (__v16sf)(__m512)(B), (int)(R))
2234 
2235 #define _mm512_mask_mul_round_ps(W, U, A, B, R) \
2236  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2237  (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
2238  (__v16sf)(__m512)(W));
2239 
2240 #define _mm512_maskz_mul_round_ps(U, A, B, R) \
2241  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2242  (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
2243  (__v16sf)_mm512_setzero_ps());
2244 
2245 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2246 _mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2247  __A = _mm_div_ss(__A, __B);
2248  return __builtin_ia32_selectss_128(__U, __A, __W);
2249 }
2250 
2251 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2252 _mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2253  __A = _mm_div_ss(__A, __B);
2254  return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
2255 }
2256 
2257 #define _mm_div_round_ss(A, B, R) \
2258  (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2259  (__v4sf)(__m128)(B), \
2260  (__v4sf)_mm_setzero_ps(), \
2261  (__mmask8)-1, (int)(R))
2262 
2263 #define _mm_mask_div_round_ss(W, U, A, B, R) \
2264  (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2265  (__v4sf)(__m128)(B), \
2266  (__v4sf)(__m128)(W), (__mmask8)(U), \
2267  (int)(R))
2268 
2269 #define _mm_maskz_div_round_ss(U, A, B, R) \
2270  (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2271  (__v4sf)(__m128)(B), \
2272  (__v4sf)_mm_setzero_ps(), \
2273  (__mmask8)(U), (int)(R))
2274 
2275 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2276 _mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2277  __A = _mm_div_sd(__A, __B);
2278  return __builtin_ia32_selectsd_128(__U, __A, __W);
2279 }
2280 
2281 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2282 _mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2283  __A = _mm_div_sd(__A, __B);
2284  return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
2285 }
2286 
2287 #define _mm_div_round_sd(A, B, R) \
2288  (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2289  (__v2df)(__m128d)(B), \
2290  (__v2df)_mm_setzero_pd(), \
2291  (__mmask8)-1, (int)(R))
2292 
2293 #define _mm_mask_div_round_sd(W, U, A, B, R) \
2294  (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2295  (__v2df)(__m128d)(B), \
2296  (__v2df)(__m128d)(W), \
2297  (__mmask8)(U), (int)(R))
2298 
2299 #define _mm_maskz_div_round_sd(U, A, B, R) \
2300  (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2301  (__v2df)(__m128d)(B), \
2302  (__v2df)_mm_setzero_pd(), \
2303  (__mmask8)(U), (int)(R))
2304 
2305 static __inline __m512d __DEFAULT_FN_ATTRS512
2306 _mm512_div_pd(__m512d __a, __m512d __b)
2307 {
2308  return (__m512d)((__v8df)__a/(__v8df)__b);
2309 }
2310 
2311 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2312 _mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2313  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2314  (__v8df)_mm512_div_pd(__A, __B),
2315  (__v8df)__W);
2316 }
2317 
2318 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2319 _mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2320  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2321  (__v8df)_mm512_div_pd(__A, __B),
2322  (__v8df)_mm512_setzero_pd());
2323 }
2324 
2325 static __inline __m512 __DEFAULT_FN_ATTRS512
2326 _mm512_div_ps(__m512 __a, __m512 __b)
2327 {
2328  return (__m512)((__v16sf)__a/(__v16sf)__b);
2329 }
2330 
2331 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2332 _mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2333  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2334  (__v16sf)_mm512_div_ps(__A, __B),
2335  (__v16sf)__W);
2336 }
2337 
2338 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2339 _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2340  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2341  (__v16sf)_mm512_div_ps(__A, __B),
2342  (__v16sf)_mm512_setzero_ps());
2343 }
2344 
2345 #define _mm512_div_round_pd(A, B, R) \
2346  (__m512d)__builtin_ia32_divpd512((__v8df)(__m512d)(A), \
2347  (__v8df)(__m512d)(B), (int)(R))
2348 
2349 #define _mm512_mask_div_round_pd(W, U, A, B, R) \
2350  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2351  (__v8df)_mm512_div_round_pd((A), (B), (R)), \
2352  (__v8df)(__m512d)(W));
2353 
2354 #define _mm512_maskz_div_round_pd(U, A, B, R) \
2355  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2356  (__v8df)_mm512_div_round_pd((A), (B), (R)), \
2357  (__v8df)_mm512_setzero_pd());
2358 
2359 #define _mm512_div_round_ps(A, B, R) \
2360  (__m512)__builtin_ia32_divps512((__v16sf)(__m512)(A), \
2361  (__v16sf)(__m512)(B), (int)(R))
2362 
2363 #define _mm512_mask_div_round_ps(W, U, A, B, R) \
2364  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2365  (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
2366  (__v16sf)(__m512)(W));
2367 
2368 #define _mm512_maskz_div_round_ps(U, A, B, R) \
2369  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2370  (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
2371  (__v16sf)_mm512_setzero_ps());
2372 
2373 #define _mm512_roundscale_ps(A, B) \
2374  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \
2375  (__v16sf)_mm512_undefined_ps(), \
2376  (__mmask16)-1, \
2377  _MM_FROUND_CUR_DIRECTION)
2378 
2379 #define _mm512_mask_roundscale_ps(A, B, C, imm) \
2380  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2381  (__v16sf)(__m512)(A), (__mmask16)(B), \
2382  _MM_FROUND_CUR_DIRECTION)
2383 
2384 #define _mm512_maskz_roundscale_ps(A, B, imm) \
2385  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2386  (__v16sf)_mm512_setzero_ps(), \
2387  (__mmask16)(A), \
2388  _MM_FROUND_CUR_DIRECTION)
2389 
2390 #define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) \
2391  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2392  (__v16sf)(__m512)(A), (__mmask16)(B), \
2393  (int)(R))
2394 
2395 #define _mm512_maskz_roundscale_round_ps(A, B, imm, R) \
2396  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2397  (__v16sf)_mm512_setzero_ps(), \
2398  (__mmask16)(A), (int)(R))
2399 
2400 #define _mm512_roundscale_round_ps(A, imm, R) \
2401  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \
2402  (__v16sf)_mm512_undefined_ps(), \
2403  (__mmask16)-1, (int)(R))
2404 
2405 #define _mm512_roundscale_pd(A, B) \
2406  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \
2407  (__v8df)_mm512_undefined_pd(), \
2408  (__mmask8)-1, \
2409  _MM_FROUND_CUR_DIRECTION)
2410 
2411 #define _mm512_mask_roundscale_pd(A, B, C, imm) \
2412  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2413  (__v8df)(__m512d)(A), (__mmask8)(B), \
2414  _MM_FROUND_CUR_DIRECTION)
2415 
2416 #define _mm512_maskz_roundscale_pd(A, B, imm) \
2417  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2418  (__v8df)_mm512_setzero_pd(), \
2419  (__mmask8)(A), \
2420  _MM_FROUND_CUR_DIRECTION)
2421 
2422 #define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) \
2423  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2424  (__v8df)(__m512d)(A), (__mmask8)(B), \
2425  (int)(R))
2426 
2427 #define _mm512_maskz_roundscale_round_pd(A, B, imm, R) \
2428  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2429  (__v8df)_mm512_setzero_pd(), \
2430  (__mmask8)(A), (int)(R))
2431 
2432 #define _mm512_roundscale_round_pd(A, imm, R) \
2433  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \
2434  (__v8df)_mm512_undefined_pd(), \
2435  (__mmask8)-1, (int)(R))
2436 
2437 #define _mm512_fmadd_round_pd(A, B, C, R) \
2438  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2439  (__v8df)(__m512d)(B), \
2440  (__v8df)(__m512d)(C), \
2441  (__mmask8)-1, (int)(R))
2442 
2443 
2444 #define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
2445  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2446  (__v8df)(__m512d)(B), \
2447  (__v8df)(__m512d)(C), \
2448  (__mmask8)(U), (int)(R))
2449 
2450 
2451 #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
2452  (__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \
2453  (__v8df)(__m512d)(B), \
2454  (__v8df)(__m512d)(C), \
2455  (__mmask8)(U), (int)(R))
2456 
2457 
2458 #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
2459  (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2460  (__v8df)(__m512d)(B), \
2461  (__v8df)(__m512d)(C), \
2462  (__mmask8)(U), (int)(R))
2463 
2464 
2465 #define _mm512_fmsub_round_pd(A, B, C, R) \
2466  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2467  (__v8df)(__m512d)(B), \
2468  -(__v8df)(__m512d)(C), \
2469  (__mmask8)-1, (int)(R))
2470 
2471 
2472 #define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
2473  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2474  (__v8df)(__m512d)(B), \
2475  -(__v8df)(__m512d)(C), \
2476  (__mmask8)(U), (int)(R))
2477 
2478 
2479 #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
2480  (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2481  (__v8df)(__m512d)(B), \
2482  -(__v8df)(__m512d)(C), \
2483  (__mmask8)(U), (int)(R))
2484 
2485 
2486 #define _mm512_fnmadd_round_pd(A, B, C, R) \
2487  (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2488  (__v8df)(__m512d)(B), \
2489  (__v8df)(__m512d)(C), \
2490  (__mmask8)-1, (int)(R))
2491 
2492 
2493 #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
2494  (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \
2495  (__v8df)(__m512d)(B), \
2496  (__v8df)(__m512d)(C), \
2497  (__mmask8)(U), (int)(R))
2498 
2499 
2500 #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
2501  (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2502  (__v8df)(__m512d)(B), \
2503  (__v8df)(__m512d)(C), \
2504  (__mmask8)(U), (int)(R))
2505 
2506 
2507 #define _mm512_fnmsub_round_pd(A, B, C, R) \
2508  (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2509  (__v8df)(__m512d)(B), \
2510  -(__v8df)(__m512d)(C), \
2511  (__mmask8)-1, (int)(R))
2512 
2513 
2514 #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
2515  (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2516  (__v8df)(__m512d)(B), \
2517  -(__v8df)(__m512d)(C), \
2518  (__mmask8)(U), (int)(R))
2519 
2520 
2521 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2522 _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
2523 {
2524  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2525  (__v8df) __B,
2526  (__v8df) __C,
2527  (__mmask8) -1,
2529 }
2530 
2531 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2532 _mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2533 {
2534  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2535  (__v8df) __B,
2536  (__v8df) __C,
2537  (__mmask8) __U,
2539 }
2540 
2541 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2542 _mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2543 {
2544  return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
2545  (__v8df) __B,
2546  (__v8df) __C,
2547  (__mmask8) __U,
2549 }
2550 
2551 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2552 _mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2553 {
2554  return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2555  (__v8df) __B,
2556  (__v8df) __C,
2557  (__mmask8) __U,
2559 }
2560 
2561 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2562 _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
2563 {
2564  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2565  (__v8df) __B,
2566  -(__v8df) __C,
2567  (__mmask8) -1,
2569 }
2570 
2571 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2572 _mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2573 {
2574  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2575  (__v8df) __B,
2576  -(__v8df) __C,
2577  (__mmask8) __U,
2579 }
2580 
2581 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2582 _mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2583 {
2584  return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2585  (__v8df) __B,
2586  -(__v8df) __C,
2587  (__mmask8) __U,
2589 }
2590 
2591 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2592 _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
2593 {
2594  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2595  -(__v8df) __B,
2596  (__v8df) __C,
2597  (__mmask8) -1,
2599 }
2600 
2601 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2602 _mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2603 {
2604  return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
2605  (__v8df) __B,
2606  (__v8df) __C,
2607  (__mmask8) __U,
2609 }
2610 
2611 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2612 _mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2613 {
2614  return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2615  (__v8df) __B,
2616  (__v8df) __C,
2617  (__mmask8) __U,
2619 }
2620 
2621 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2622 _mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
2623 {
2624  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2625  -(__v8df) __B,
2626  -(__v8df) __C,
2627  (__mmask8) -1,
2629 }
2630 
2631 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2632 _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2633 {
2634  return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2635  (__v8df) __B,
2636  -(__v8df) __C,
2637  (__mmask8) __U,
2639 }
2640 
2641 #define _mm512_fmadd_round_ps(A, B, C, R) \
2642  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2643  (__v16sf)(__m512)(B), \
2644  (__v16sf)(__m512)(C), \
2645  (__mmask16)-1, (int)(R))
2646 
2647 
2648 #define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
2649  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2650  (__v16sf)(__m512)(B), \
2651  (__v16sf)(__m512)(C), \
2652  (__mmask16)(U), (int)(R))
2653 
2654 
2655 #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
2656  (__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \
2657  (__v16sf)(__m512)(B), \
2658  (__v16sf)(__m512)(C), \
2659  (__mmask16)(U), (int)(R))
2660 
2661 
2662 #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
2663  (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2664  (__v16sf)(__m512)(B), \
2665  (__v16sf)(__m512)(C), \
2666  (__mmask16)(U), (int)(R))
2667 
2668 
2669 #define _mm512_fmsub_round_ps(A, B, C, R) \
2670  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2671  (__v16sf)(__m512)(B), \
2672  -(__v16sf)(__m512)(C), \
2673  (__mmask16)-1, (int)(R))
2674 
2675 
2676 #define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
2677  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2678  (__v16sf)(__m512)(B), \
2679  -(__v16sf)(__m512)(C), \
2680  (__mmask16)(U), (int)(R))
2681 
2682 
2683 #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
2684  (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2685  (__v16sf)(__m512)(B), \
2686  -(__v16sf)(__m512)(C), \
2687  (__mmask16)(U), (int)(R))
2688 
2689 
2690 #define _mm512_fnmadd_round_ps(A, B, C, R) \
2691  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2692  -(__v16sf)(__m512)(B), \
2693  (__v16sf)(__m512)(C), \
2694  (__mmask16)-1, (int)(R))
2695 
2696 
2697 #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
2698  (__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \
2699  (__v16sf)(__m512)(B), \
2700  (__v16sf)(__m512)(C), \
2701  (__mmask16)(U), (int)(R))
2702 
2703 
2704 #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
2705  (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2706  (__v16sf)(__m512)(B), \
2707  (__v16sf)(__m512)(C), \
2708  (__mmask16)(U), (int)(R))
2709 
2710 
2711 #define _mm512_fnmsub_round_ps(A, B, C, R) \
2712  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2713  -(__v16sf)(__m512)(B), \
2714  -(__v16sf)(__m512)(C), \
2715  (__mmask16)-1, (int)(R))
2716 
2717 
2718 #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
2719  (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2720  (__v16sf)(__m512)(B), \
2721  -(__v16sf)(__m512)(C), \
2722  (__mmask16)(U), (int)(R))
2723 
2724 
2725 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2726 _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
2727 {
2728  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2729  (__v16sf) __B,
2730  (__v16sf) __C,
2731  (__mmask16) -1,
2733 }
2734 
2735 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2736 _mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2737 {
2738  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2739  (__v16sf) __B,
2740  (__v16sf) __C,
2741  (__mmask16) __U,
2743 }
2744 
2745 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2746 _mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2747 {
2748  return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
2749  (__v16sf) __B,
2750  (__v16sf) __C,
2751  (__mmask16) __U,
2753 }
2754 
2755 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2756 _mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2757 {
2758  return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2759  (__v16sf) __B,
2760  (__v16sf) __C,
2761  (__mmask16) __U,
2763 }
2764 
2765 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2766 _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
2767 {
2768  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2769  (__v16sf) __B,
2770  -(__v16sf) __C,
2771  (__mmask16) -1,
2773 }
2774 
2775 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2776 _mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2777 {
2778  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2779  (__v16sf) __B,
2780  -(__v16sf) __C,
2781  (__mmask16) __U,
2783 }
2784 
2785 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2786 _mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2787 {
2788  return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2789  (__v16sf) __B,
2790  -(__v16sf) __C,
2791  (__mmask16) __U,
2793 }
2794 
2795 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2796 _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
2797 {
2798  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2799  -(__v16sf) __B,
2800  (__v16sf) __C,
2801  (__mmask16) -1,
2803 }
2804 
2805 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2806 _mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2807 {
2808  return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
2809  (__v16sf) __B,
2810  (__v16sf) __C,
2811  (__mmask16) __U,
2813 }
2814 
2815 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2816 _mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2817 {
2818  return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
2819  (__v16sf) __B,
2820  (__v16sf) __C,
2821  (__mmask16) __U,
2823 }
2824 
2825 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2826 _mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
2827 {
2828  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2829  -(__v16sf) __B,
2830  -(__v16sf) __C,
2831  (__mmask16) -1,
2833 }
2834 
2835 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2836 _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2837 {
2838  return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
2839  (__v16sf) __B,
2840  -(__v16sf) __C,
2841  (__mmask16) __U,
2843 }
2844 
2845 #define _mm512_fmaddsub_round_pd(A, B, C, R) \
2846  (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2847  (__v8df)(__m512d)(B), \
2848  (__v8df)(__m512d)(C), \
2849  (__mmask8)-1, (int)(R))
2850 
2851 
2852 #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
2853  (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2854  (__v8df)(__m512d)(B), \
2855  (__v8df)(__m512d)(C), \
2856  (__mmask8)(U), (int)(R))
2857 
2858 
2859 #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
2860  (__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \
2861  (__v8df)(__m512d)(B), \
2862  (__v8df)(__m512d)(C), \
2863  (__mmask8)(U), (int)(R))
2864 
2865 
2866 #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
2867  (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
2868  (__v8df)(__m512d)(B), \
2869  (__v8df)(__m512d)(C), \
2870  (__mmask8)(U), (int)(R))
2871 
2872 
2873 #define _mm512_fmsubadd_round_pd(A, B, C, R) \
2874  (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2875  (__v8df)(__m512d)(B), \
2876  -(__v8df)(__m512d)(C), \
2877  (__mmask8)-1, (int)(R))
2878 
2879 
2880 #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
2881  (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2882  (__v8df)(__m512d)(B), \
2883  -(__v8df)(__m512d)(C), \
2884  (__mmask8)(U), (int)(R))
2885 
2886 
2887 #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
2888  (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
2889  (__v8df)(__m512d)(B), \
2890  -(__v8df)(__m512d)(C), \
2891  (__mmask8)(U), (int)(R))
2892 
2893 
2894 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2895 _mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
2896 {
2897  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2898  (__v8df) __B,
2899  (__v8df) __C,
2900  (__mmask8) -1,
2902 }
2903 
2904 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2905 _mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2906 {
2907  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2908  (__v8df) __B,
2909  (__v8df) __C,
2910  (__mmask8) __U,
2912 }
2913 
2914 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2915 _mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2916 {
2917  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
2918  (__v8df) __B,
2919  (__v8df) __C,
2920  (__mmask8) __U,
2922 }
2923 
2924 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2925 _mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2926 {
2927  return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2928  (__v8df) __B,
2929  (__v8df) __C,
2930  (__mmask8) __U,
2932 }
2933 
2934 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2935 _mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
2936 {
2937  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2938  (__v8df) __B,
2939  -(__v8df) __C,
2940  (__mmask8) -1,
2942 }
2943 
2944 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2945 _mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2946 {
2947  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2948  (__v8df) __B,
2949  -(__v8df) __C,
2950  (__mmask8) __U,
2952 }
2953 
2954 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2955 _mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2956 {
2957  return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2958  (__v8df) __B,
2959  -(__v8df) __C,
2960  (__mmask8) __U,
2962 }
2963 
2964 #define _mm512_fmaddsub_round_ps(A, B, C, R) \
2965  (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2966  (__v16sf)(__m512)(B), \
2967  (__v16sf)(__m512)(C), \
2968  (__mmask16)-1, (int)(R))
2969 
2970 
2971 #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
2972  (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2973  (__v16sf)(__m512)(B), \
2974  (__v16sf)(__m512)(C), \
2975  (__mmask16)(U), (int)(R))
2976 
2977 
2978 #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
2979  (__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \
2980  (__v16sf)(__m512)(B), \
2981  (__v16sf)(__m512)(C), \
2982  (__mmask16)(U), (int)(R))
2983 
2984 
2985 #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
2986  (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
2987  (__v16sf)(__m512)(B), \
2988  (__v16sf)(__m512)(C), \
2989  (__mmask16)(U), (int)(R))
2990 
2991 
2992 #define _mm512_fmsubadd_round_ps(A, B, C, R) \
2993  (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2994  (__v16sf)(__m512)(B), \
2995  -(__v16sf)(__m512)(C), \
2996  (__mmask16)-1, (int)(R))
2997 
2998 
2999 #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
3000  (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
3001  (__v16sf)(__m512)(B), \
3002  -(__v16sf)(__m512)(C), \
3003  (__mmask16)(U), (int)(R))
3004 
3005 
3006 #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
3007  (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
3008  (__v16sf)(__m512)(B), \
3009  -(__v16sf)(__m512)(C), \
3010  (__mmask16)(U), (int)(R))
3011 
3012 
3013 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3014 _mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
3015 {
3016  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3017  (__v16sf) __B,
3018  (__v16sf) __C,
3019  (__mmask16) -1,
3021 }
3022 
3023 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3024 _mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3025 {
3026  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3027  (__v16sf) __B,
3028  (__v16sf) __C,
3029  (__mmask16) __U,
3031 }
3032 
3033 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3034 _mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3035 {
3036  return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
3037  (__v16sf) __B,
3038  (__v16sf) __C,
3039  (__mmask16) __U,
3041 }
3042 
3043 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3044 _mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3045 {
3046  return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3047  (__v16sf) __B,
3048  (__v16sf) __C,
3049  (__mmask16) __U,
3051 }
3052 
3053 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3054 _mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
3055 {
3056  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3057  (__v16sf) __B,
3058  -(__v16sf) __C,
3059  (__mmask16) -1,
3061 }
3062 
3063 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3064 _mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3065 {
3066  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3067  (__v16sf) __B,
3068  -(__v16sf) __C,
3069  (__mmask16) __U,
3071 }
3072 
3073 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3074 _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3075 {
3076  return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3077  (__v16sf) __B,
3078  -(__v16sf) __C,
3079  (__mmask16) __U,
3081 }
3082 
3083 #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
3084  (__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \
3085  (__v8df)(__m512d)(B), \
3086  (__v8df)(__m512d)(C), \
3087  (__mmask8)(U), (int)(R))
3088 
3089 
3090 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3091 _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3092 {
3093  return (__m512d)__builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
3094  (__v8df) __B,
3095  (__v8df) __C,
3096  (__mmask8) __U,
3098 }
3099 
3100 #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
3101  (__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \
3102  (__v16sf)(__m512)(B), \
3103  (__v16sf)(__m512)(C), \
3104  (__mmask16)(U), (int)(R))
3105 
3106 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3107 _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3108 {
3109  return (__m512)__builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
3110  (__v16sf) __B,
3111  (__v16sf) __C,
3112  (__mmask16) __U,
3114 }
3115 
3116 #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
3117  (__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \
3118  (__v8df)(__m512d)(B), \
3119  (__v8df)(__m512d)(C), \
3120  (__mmask8)(U), (int)(R))
3121 
3122 
3123 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3124 _mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3125 {
3126  return (__m512d)__builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
3127  (__v8df) __B,
3128  (__v8df) __C,
3129  (__mmask8) __U,
3131 }
3132 
3133 #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
3134  (__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \
3135  (__v16sf)(__m512)(B), \
3136  (__v16sf)(__m512)(C), \
3137  (__mmask16)(U), (int)(R))
3138 
3139 
3140 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3141 _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3142 {
3143  return (__m512)__builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3144  (__v16sf) __B,
3145  (__v16sf) __C,
3146  (__mmask16) __U,
3148 }
3149 
3150 #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
3151  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
3152  -(__v8df)(__m512d)(B), \
3153  (__v8df)(__m512d)(C), \
3154  (__mmask8)(U), (int)(R))
3155 
3156 
3157 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3158 _mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3159 {
3160  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3161  -(__v8df) __B,
3162  (__v8df) __C,
3163  (__mmask8) __U,
3165 }
3166 
3167 #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
3168  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
3169  -(__v16sf)(__m512)(B), \
3170  (__v16sf)(__m512)(C), \
3171  (__mmask16)(U), (int)(R))
3172 
3173 
3174 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3175 _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3176 {
3177  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3178  -(__v16sf) __B,
3179  (__v16sf) __C,
3180  (__mmask16) __U,
3182 }
3183 
3184 #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
3185  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
3186  -(__v8df)(__m512d)(B), \
3187  -(__v8df)(__m512d)(C), \
3188  (__mmask8)(U), (int)(R))
3189 
3190 
3191 #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
3192  (__m512d)__builtin_ia32_vfmsubpd512_mask3(-(__v8df)(__m512d)(A), \
3193  (__v8df)(__m512d)(B), \
3194  (__v8df)(__m512d)(C), \
3195  (__mmask8)(U), (int)(R))
3196 
3197 
3198 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3199 _mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3200 {
3201  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3202  -(__v8df) __B,
3203  -(__v8df) __C,
3204  (__mmask8) __U,
3206 }
3207 
3208 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3209 _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3210 {
3211  return (__m512d) __builtin_ia32_vfmsubpd512_mask3 (-(__v8df) __A,
3212  (__v8df) __B,
3213  (__v8df) __C,
3214  (__mmask8) __U,
3216 }
3217 
3218 #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
3219  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
3220  -(__v16sf)(__m512)(B), \
3221  -(__v16sf)(__m512)(C), \
3222  (__mmask16)(U), (int)(R))
3223 
3224 
3225 #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
3226  (__m512)__builtin_ia32_vfmsubps512_mask3(-(__v16sf)(__m512)(A), \
3227  (__v16sf)(__m512)(B), \
3228  (__v16sf)(__m512)(C), \
3229  (__mmask16)(U), (int)(R))
3230 
3231 
3232 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3233 _mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3234 {
3235  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3236  -(__v16sf) __B,
3237  -(__v16sf) __C,
3238  (__mmask16) __U,
3240 }
3241 
3242 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3243 _mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3244 {
3245  return (__m512) __builtin_ia32_vfmsubps512_mask3 (-(__v16sf) __A,
3246  (__v16sf) __B,
3247  (__v16sf) __C,
3248  (__mmask16) __U,
3250 }
3251 
3252 
3253 
3254 /* Vector permutations */
3255 
3256 static __inline __m512i __DEFAULT_FN_ATTRS512
3257 _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
3258 {
3259  return (__m512i)__builtin_ia32_vpermi2vard512((__v16si)__A, (__v16si) __I,
3260  (__v16si) __B);
3261 }
3262 
3263 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3264 _mm512_mask_permutex2var_epi32(__m512i __A, __mmask16 __U, __m512i __I,
3265  __m512i __B)
3266 {
3267  return (__m512i)__builtin_ia32_selectd_512(__U,
3268  (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3269  (__v16si)__A);
3270 }
3271 
3272 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3273 _mm512_mask2_permutex2var_epi32(__m512i __A, __m512i __I, __mmask16 __U,
3274  __m512i __B)
3275 {
3276  return (__m512i)__builtin_ia32_selectd_512(__U,
3277  (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3278  (__v16si)__I);
3279 }
3280 
3281 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3282 _mm512_maskz_permutex2var_epi32(__mmask16 __U, __m512i __A, __m512i __I,
3283  __m512i __B)
3284 {
3285  return (__m512i)__builtin_ia32_selectd_512(__U,
3286  (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3287  (__v16si)_mm512_setzero_si512());
3288 }
3289 
3290 static __inline __m512i __DEFAULT_FN_ATTRS512
3291 _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
3292 {
3293  return (__m512i)__builtin_ia32_vpermi2varq512((__v8di)__A, (__v8di) __I,
3294  (__v8di) __B);
3295 }
3296 
3297 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3298 _mm512_mask_permutex2var_epi64(__m512i __A, __mmask8 __U, __m512i __I,
3299  __m512i __B)
3300 {
3301  return (__m512i)__builtin_ia32_selectq_512(__U,
3302  (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3303  (__v8di)__A);
3304 }
3305 
3306 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3307 _mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 __U,
3308  __m512i __B)
3309 {
3310  return (__m512i)__builtin_ia32_selectq_512(__U,
3311  (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3312  (__v8di)__I);
3313 }
3314 
3315 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3316 _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I,
3317  __m512i __B)
3318 {
3319  return (__m512i)__builtin_ia32_selectq_512(__U,
3320  (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3321  (__v8di)_mm512_setzero_si512());
3322 }
3323 
3324 #define _mm512_alignr_epi64(A, B, I) \
3325  (__m512i)__builtin_ia32_alignq512((__v8di)(__m512i)(A), \
3326  (__v8di)(__m512i)(B), (int)(I))
3327 
3328 #define _mm512_mask_alignr_epi64(W, U, A, B, imm) \
3329  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3330  (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3331  (__v8di)(__m512i)(W))
3332 
3333 #define _mm512_maskz_alignr_epi64(U, A, B, imm) \
3334  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3335  (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3336  (__v8di)_mm512_setzero_si512())
3337 
3338 #define _mm512_alignr_epi32(A, B, I) \
3339  (__m512i)__builtin_ia32_alignd512((__v16si)(__m512i)(A), \
3340  (__v16si)(__m512i)(B), (int)(I))
3341 
3342 #define _mm512_mask_alignr_epi32(W, U, A, B, imm) \
3343  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3344  (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3345  (__v16si)(__m512i)(W))
3346 
3347 #define _mm512_maskz_alignr_epi32(U, A, B, imm) \
3348  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3349  (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3350  (__v16si)_mm512_setzero_si512())
3351 /* Vector Extract */
3352 
3353 #define _mm512_extractf64x4_pd(A, I) \
3354  (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \
3355  (__v4df)_mm256_undefined_pd(), \
3356  (__mmask8)-1)
3357 
3358 #define _mm512_mask_extractf64x4_pd(W, U, A, imm) \
3359  (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
3360  (__v4df)(__m256d)(W), \
3361  (__mmask8)(U))
3362 
3363 #define _mm512_maskz_extractf64x4_pd(U, A, imm) \
3364  (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
3365  (__v4df)_mm256_setzero_pd(), \
3366  (__mmask8)(U))
3367 
3368 #define _mm512_extractf32x4_ps(A, I) \
3369  (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \
3370  (__v4sf)_mm_undefined_ps(), \
3371  (__mmask8)-1)
3372 
3373 #define _mm512_mask_extractf32x4_ps(W, U, A, imm) \
3374  (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
3375  (__v4sf)(__m128)(W), \
3376  (__mmask8)(U))
3377 
3378 #define _mm512_maskz_extractf32x4_ps(U, A, imm) \
3379  (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
3380  (__v4sf)_mm_setzero_ps(), \
3381  (__mmask8)(U))
3382 
3383 /* Vector Blend */
3384 
3385 static __inline __m512d __DEFAULT_FN_ATTRS512
3386 _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
3387 {
3388  return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
3389  (__v8df) __W,
3390  (__v8df) __A);
3391 }
3392 
3393 static __inline __m512 __DEFAULT_FN_ATTRS512
3394 _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
3395 {
3396  return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
3397  (__v16sf) __W,
3398  (__v16sf) __A);
3399 }
3400 
3401 static __inline __m512i __DEFAULT_FN_ATTRS512
3402 _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
3403 {
3404  return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
3405  (__v8di) __W,
3406  (__v8di) __A);
3407 }
3408 
3409 static __inline __m512i __DEFAULT_FN_ATTRS512
3410 _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
3411 {
3412  return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
3413  (__v16si) __W,
3414  (__v16si) __A);
3415 }
3416 
3417 /* Compare */
3418 
3419 #define _mm512_cmp_round_ps_mask(A, B, P, R) \
3420  (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3421  (__v16sf)(__m512)(B), (int)(P), \
3422  (__mmask16)-1, (int)(R))
3423 
3424 #define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) \
3425  (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3426  (__v16sf)(__m512)(B), (int)(P), \
3427  (__mmask16)(U), (int)(R))
3428 
3429 #define _mm512_cmp_ps_mask(A, B, P) \
3430  _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3431 #define _mm512_mask_cmp_ps_mask(U, A, B, P) \
3432  _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3433 
3434 #define _mm512_cmpeq_ps_mask(A, B) \
3435  _mm512_cmp_ps_mask((A), (B), _CMP_EQ_OQ)
3436 #define _mm512_mask_cmpeq_ps_mask(k, A, B) \
3437  _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_EQ_OQ)
3438 
3439 #define _mm512_cmplt_ps_mask(A, B) \
3440  _mm512_cmp_ps_mask((A), (B), _CMP_LT_OS)
3441 #define _mm512_mask_cmplt_ps_mask(k, A, B) \
3442  _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LT_OS)
3443 
3444 #define _mm512_cmple_ps_mask(A, B) \
3445  _mm512_cmp_ps_mask((A), (B), _CMP_LE_OS)
3446 #define _mm512_mask_cmple_ps_mask(k, A, B) \
3447  _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LE_OS)
3448 
3449 #define _mm512_cmpunord_ps_mask(A, B) \
3450  _mm512_cmp_ps_mask((A), (B), _CMP_UNORD_Q)
3451 #define _mm512_mask_cmpunord_ps_mask(k, A, B) \
3452  _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_UNORD_Q)
3453 
3454 #define _mm512_cmpneq_ps_mask(A, B) \
3455  _mm512_cmp_ps_mask((A), (B), _CMP_NEQ_UQ)
3456 #define _mm512_mask_cmpneq_ps_mask(k, A, B) \
3457  _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NEQ_UQ)
3458 
3459 #define _mm512_cmpnlt_ps_mask(A, B) \
3460  _mm512_cmp_ps_mask((A), (B), _CMP_NLT_US)
3461 #define _mm512_mask_cmpnlt_ps_mask(k, A, B) \
3462  _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLT_US)
3463 
3464 #define _mm512_cmpnle_ps_mask(A, B) \
3465  _mm512_cmp_ps_mask((A), (B), _CMP_NLE_US)
3466 #define _mm512_mask_cmpnle_ps_mask(k, A, B) \
3467  _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLE_US)
3468 
3469 #define _mm512_cmpord_ps_mask(A, B) \
3470  _mm512_cmp_ps_mask((A), (B), _CMP_ORD_Q)
3471 #define _mm512_mask_cmpord_ps_mask(k, A, B) \
3472  _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q)
3473 
3474 #define _mm512_cmp_round_pd_mask(A, B, P, R) \
3475  (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3476  (__v8df)(__m512d)(B), (int)(P), \
3477  (__mmask8)-1, (int)(R))
3478 
3479 #define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) \
3480  (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3481  (__v8df)(__m512d)(B), (int)(P), \
3482  (__mmask8)(U), (int)(R))
3483 
3484 #define _mm512_cmp_pd_mask(A, B, P) \
3485  _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3486 #define _mm512_mask_cmp_pd_mask(U, A, B, P) \
3487  _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3488 
3489 #define _mm512_cmpeq_pd_mask(A, B) \
3490  _mm512_cmp_pd_mask((A), (B), _CMP_EQ_OQ)
3491 #define _mm512_mask_cmpeq_pd_mask(k, A, B) \
3492  _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_EQ_OQ)
3493 
3494 #define _mm512_cmplt_pd_mask(A, B) \
3495  _mm512_cmp_pd_mask((A), (B), _CMP_LT_OS)
3496 #define _mm512_mask_cmplt_pd_mask(k, A, B) \
3497  _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LT_OS)
3498 
3499 #define _mm512_cmple_pd_mask(A, B) \
3500  _mm512_cmp_pd_mask((A), (B), _CMP_LE_OS)
3501 #define _mm512_mask_cmple_pd_mask(k, A, B) \
3502  _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LE_OS)
3503 
3504 #define _mm512_cmpunord_pd_mask(A, B) \
3505  _mm512_cmp_pd_mask((A), (B), _CMP_UNORD_Q)
3506 #define _mm512_mask_cmpunord_pd_mask(k, A, B) \
3507  _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_UNORD_Q)
3508 
3509 #define _mm512_cmpneq_pd_mask(A, B) \
3510  _mm512_cmp_pd_mask((A), (B), _CMP_NEQ_UQ)
3511 #define _mm512_mask_cmpneq_pd_mask(k, A, B) \
3512  _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NEQ_UQ)
3513 
3514 #define _mm512_cmpnlt_pd_mask(A, B) \
3515  _mm512_cmp_pd_mask((A), (B), _CMP_NLT_US)
3516 #define _mm512_mask_cmpnlt_pd_mask(k, A, B) \
3517  _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLT_US)
3518 
3519 #define _mm512_cmpnle_pd_mask(A, B) \
3520  _mm512_cmp_pd_mask((A), (B), _CMP_NLE_US)
3521 #define _mm512_mask_cmpnle_pd_mask(k, A, B) \
3522  _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLE_US)
3523 
3524 #define _mm512_cmpord_pd_mask(A, B) \
3525  _mm512_cmp_pd_mask((A), (B), _CMP_ORD_Q)
3526 #define _mm512_mask_cmpord_pd_mask(k, A, B) \
3527  _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_ORD_Q)
3528 
3529 /* Conversion */
3530 
3531 #define _mm512_cvtt_roundps_epu32(A, R) \
3532  (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3533  (__v16si)_mm512_undefined_epi32(), \
3534  (__mmask16)-1, (int)(R))
3535 
3536 #define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) \
3537  (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3538  (__v16si)(__m512i)(W), \
3539  (__mmask16)(U), (int)(R))
3540 
3541 #define _mm512_maskz_cvtt_roundps_epu32(U, A, R) \
3542  (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3543  (__v16si)_mm512_setzero_si512(), \
3544  (__mmask16)(U), (int)(R))
3545 
3546 
3547 static __inline __m512i __DEFAULT_FN_ATTRS512
3549 {
3550  return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3551  (__v16si)
3553  (__mmask16) -1,
3555 }
3556 
3557 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3558 _mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
3559 {
3560  return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3561  (__v16si) __W,
3562  (__mmask16) __U,
3564 }
3565 
3566 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3567 _mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
3568 {
3569  return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3570  (__v16si) _mm512_setzero_si512 (),
3571  (__mmask16) __U,
3573 }
3574 
3575 #define _mm512_cvt_roundepi32_ps(A, R) \
3576  (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3577  (__v16sf)_mm512_setzero_ps(), \
3578  (__mmask16)-1, (int)(R))
3579 
3580 #define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) \
3581  (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3582  (__v16sf)(__m512)(W), \
3583  (__mmask16)(U), (int)(R))
3584 
3585 #define _mm512_maskz_cvt_roundepi32_ps(U, A, R) \
3586  (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3587  (__v16sf)_mm512_setzero_ps(), \
3588  (__mmask16)(U), (int)(R))
3589 
3590 #define _mm512_cvt_roundepu32_ps(A, R) \
3591  (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3592  (__v16sf)_mm512_setzero_ps(), \
3593  (__mmask16)-1, (int)(R))
3594 
3595 #define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) \
3596  (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3597  (__v16sf)(__m512)(W), \
3598  (__mmask16)(U), (int)(R))
3599 
3600 #define _mm512_maskz_cvt_roundepu32_ps(U, A, R) \
3601  (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3602  (__v16sf)_mm512_setzero_ps(), \
3603  (__mmask16)(U), (int)(R))
3604 
3605 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3606 _mm512_cvtepu32_ps (__m512i __A)
3607 {
3608  return (__m512)__builtin_convertvector((__v16su)__A, __v16sf);
3609 }
3610 
3611 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3612 _mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
3613 {
3614  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3615  (__v16sf)_mm512_cvtepu32_ps(__A),
3616  (__v16sf)__W);
3617 }
3618 
3619 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3620 _mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
3621 {
3622  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3623  (__v16sf)_mm512_cvtepu32_ps(__A),
3624  (__v16sf)_mm512_setzero_ps());
3625 }
3626 
3627 static __inline __m512d __DEFAULT_FN_ATTRS512
3629 {
3630  return (__m512d)__builtin_convertvector((__v8si)__A, __v8df);
3631 }
3632 
3633 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3634 _mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
3635 {
3636  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3637  (__v8df)_mm512_cvtepi32_pd(__A),
3638  (__v8df)__W);
3639 }
3640 
3641 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3642 _mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
3643 {
3644  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3645  (__v8df)_mm512_cvtepi32_pd(__A),
3646  (__v8df)_mm512_setzero_pd());
3647 }
3648 
3649 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3651 {
3652  return (__m512d) _mm512_cvtepi32_pd(_mm512_castsi512_si256(__A));
3653 }
3654 
3655 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3656 _mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
3657 {
3658  return (__m512d) _mm512_mask_cvtepi32_pd(__W, __U, _mm512_castsi512_si256(__A));
3659 }
3660 
3661 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3662 _mm512_cvtepi32_ps (__m512i __A)
3663 {
3664  return (__m512)__builtin_convertvector((__v16si)__A, __v16sf);
3665 }
3666 
3667 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3668 _mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
3669 {
3670  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3671  (__v16sf)_mm512_cvtepi32_ps(__A),
3672  (__v16sf)__W);
3673 }
3674 
3675 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3676 _mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
3677 {
3678  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3679  (__v16sf)_mm512_cvtepi32_ps(__A),
3680  (__v16sf)_mm512_setzero_ps());
3681 }
3682 
3683 static __inline __m512d __DEFAULT_FN_ATTRS512
3685 {
3686  return (__m512d)__builtin_convertvector((__v8su)__A, __v8df);
3687 }
3688 
3689 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3690 _mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
3691 {
3692  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3693  (__v8df)_mm512_cvtepu32_pd(__A),
3694  (__v8df)__W);
3695 }
3696 
3697 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3698 _mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
3699 {
3700  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3701  (__v8df)_mm512_cvtepu32_pd(__A),
3702  (__v8df)_mm512_setzero_pd());
3703 }
3704 
3705 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3707 {
3708  return (__m512d) _mm512_cvtepu32_pd(_mm512_castsi512_si256(__A));
3709 }
3710 
3711 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3712 _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
3713 {
3714  return (__m512d) _mm512_mask_cvtepu32_pd(__W, __U, _mm512_castsi512_si256(__A));
3715 }
3716 
3717 #define _mm512_cvt_roundpd_ps(A, R) \
3718  (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3719  (__v8sf)_mm256_setzero_ps(), \
3720  (__mmask8)-1, (int)(R))
3721 
3722 #define _mm512_mask_cvt_roundpd_ps(W, U, A, R) \
3723  (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3724  (__v8sf)(__m256)(W), (__mmask8)(U), \
3725  (int)(R))
3726 
3727 #define _mm512_maskz_cvt_roundpd_ps(U, A, R) \
3728  (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3729  (__v8sf)_mm256_setzero_ps(), \
3730  (__mmask8)(U), (int)(R))
3731 
3732 static __inline__ __m256 __DEFAULT_FN_ATTRS512
3733 _mm512_cvtpd_ps (__m512d __A)
3734 {
3735  return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3736  (__v8sf) _mm256_undefined_ps (),
3737  (__mmask8) -1,
3739 }
3740 
3741 static __inline__ __m256 __DEFAULT_FN_ATTRS512
3742 _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
3743 {
3744  return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3745  (__v8sf) __W,
3746  (__mmask8) __U,
3748 }
3749 
3750 static __inline__ __m256 __DEFAULT_FN_ATTRS512
3751 _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
3752 {
3753  return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3754  (__v8sf) _mm256_setzero_ps (),
3755  (__mmask8) __U,
3757 }
3758 
3759 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3760 _mm512_cvtpd_pslo (__m512d __A)
3761 {
3762  return (__m512) __builtin_shufflevector((__v8sf) _mm512_cvtpd_ps(__A),
3763  (__v8sf) _mm256_setzero_ps (),
3764  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3765 }
3766 
3767 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3768 _mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A)
3769 {
3770  return (__m512) __builtin_shufflevector (
3772  __U, __A),
3773  (__v8sf) _mm256_setzero_ps (),
3774  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3775 }
3776 
3777 #define _mm512_cvt_roundps_ph(A, I) \
3778  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3779  (__v16hi)_mm256_undefined_si256(), \
3780  (__mmask16)-1)
3781 
3782 #define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
3783  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3784  (__v16hi)(__m256i)(U), \
3785  (__mmask16)(W))
3786 
3787 #define _mm512_maskz_cvt_roundps_ph(W, A, I) \
3788  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3789  (__v16hi)_mm256_setzero_si256(), \
3790  (__mmask16)(W))
3791 
3792 #define _mm512_cvtps_ph(A, I) \
3793  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3794  (__v16hi)_mm256_setzero_si256(), \
3795  (__mmask16)-1)
3796 
3797 #define _mm512_mask_cvtps_ph(U, W, A, I) \
3798  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3799  (__v16hi)(__m256i)(U), \
3800  (__mmask16)(W))
3801 
3802 #define _mm512_maskz_cvtps_ph(W, A, I) \
3803  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3804  (__v16hi)_mm256_setzero_si256(), \
3805  (__mmask16)(W))
3806 
3807 #define _mm512_cvt_roundph_ps(A, R) \
3808  (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3809  (__v16sf)_mm512_undefined_ps(), \
3810  (__mmask16)-1, (int)(R))
3811 
3812 #define _mm512_mask_cvt_roundph_ps(W, U, A, R) \
3813  (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3814  (__v16sf)(__m512)(W), \
3815  (__mmask16)(U), (int)(R))
3816 
3817 #define _mm512_maskz_cvt_roundph_ps(U, A, R) \
3818  (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3819  (__v16sf)_mm512_setzero_ps(), \
3820  (__mmask16)(U), (int)(R))
3821 
3822 
3823 static __inline __m512 __DEFAULT_FN_ATTRS512
3824 _mm512_cvtph_ps(__m256i __A)
3825 {
3826  return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3827  (__v16sf)
3828  _mm512_setzero_ps (),
3829  (__mmask16) -1,
3831 }
3832 
3833 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3834 _mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
3835 {
3836  return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3837  (__v16sf) __W,
3838  (__mmask16) __U,
3840 }
3841 
3842 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3843 _mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
3844 {
3845  return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3846  (__v16sf) _mm512_setzero_ps (),
3847  (__mmask16) __U,
3849 }
3850 
3851 #define _mm512_cvtt_roundpd_epi32(A, R) \
3852  (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3853  (__v8si)_mm256_setzero_si256(), \
3854  (__mmask8)-1, (int)(R))
3855 
3856 #define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) \
3857  (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3858  (__v8si)(__m256i)(W), \
3859  (__mmask8)(U), (int)(R))
3860 
3861 #define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) \
3862  (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3863  (__v8si)_mm256_setzero_si256(), \
3864  (__mmask8)(U), (int)(R))
3865 
3866 static __inline __m256i __DEFAULT_FN_ATTRS512
3868 {
3869  return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a,
3870  (__v8si)_mm256_setzero_si256(),
3871  (__mmask8) -1,
3873 }
3874 
3875 static __inline__ __m256i __DEFAULT_FN_ATTRS512
3876 _mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
3877 {
3878  return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3879  (__v8si) __W,
3880  (__mmask8) __U,
3882 }
3883 
3884 static __inline__ __m256i __DEFAULT_FN_ATTRS512
3885 _mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
3886 {
3887  return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3888  (__v8si) _mm256_setzero_si256 (),
3889  (__mmask8) __U,
3891 }
3892 
3893 #define _mm512_cvtt_roundps_epi32(A, R) \
3894  (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3895  (__v16si)_mm512_setzero_si512(), \
3896  (__mmask16)-1, (int)(R))
3897 
3898 #define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) \
3899  (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3900  (__v16si)(__m512i)(W), \
3901  (__mmask16)(U), (int)(R))
3902 
3903 #define _mm512_maskz_cvtt_roundps_epi32(U, A, R) \
3904  (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3905  (__v16si)_mm512_setzero_si512(), \
3906  (__mmask16)(U), (int)(R))
3907 
3908 static __inline __m512i __DEFAULT_FN_ATTRS512
3910 {
3911  return (__m512i)
3912  __builtin_ia32_cvttps2dq512_mask((__v16sf) __a,
3913  (__v16si) _mm512_setzero_si512 (),
3914  (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
3915 }
3916 
3917 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3918 _mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
3919 {
3920  return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3921  (__v16si) __W,
3922  (__mmask16) __U,
3924 }
3925 
3926 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3927 _mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
3928 {
3929  return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3930  (__v16si) _mm512_setzero_si512 (),
3931  (__mmask16) __U,
3933 }
3934 
3935 #define _mm512_cvt_roundps_epi32(A, R) \
3936  (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3937  (__v16si)_mm512_setzero_si512(), \
3938  (__mmask16)-1, (int)(R))
3939 
3940 #define _mm512_mask_cvt_roundps_epi32(W, U, A, R) \
3941  (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3942  (__v16si)(__m512i)(W), \
3943  (__mmask16)(U), (int)(R))
3944 
3945 #define _mm512_maskz_cvt_roundps_epi32(U, A, R) \
3946  (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3947  (__v16si)_mm512_setzero_si512(), \
3948  (__mmask16)(U), (int)(R))
3949 
3950 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3952 {
3953  return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3954  (__v16si) _mm512_undefined_epi32 (),
3955  (__mmask16) -1,
3957 }
3958 
3959 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3960 _mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
3961 {
3962  return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3963  (__v16si) __W,
3964  (__mmask16) __U,
3966 }
3967 
3968 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3969 _mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
3970 {
3971  return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3972  (__v16si)
3974  (__mmask16) __U,
3976 }
3977 
3978 #define _mm512_cvt_roundpd_epi32(A, R) \
3979  (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3980  (__v8si)_mm256_setzero_si256(), \
3981  (__mmask8)-1, (int)(R))
3982 
3983 #define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) \
3984  (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3985  (__v8si)(__m256i)(W), \
3986  (__mmask8)(U), (int)(R))
3987 
3988 #define _mm512_maskz_cvt_roundpd_epi32(U, A, R) \
3989  (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3990  (__v8si)_mm256_setzero_si256(), \
3991  (__mmask8)(U), (int)(R))
3992 
3993 static __inline__ __m256i __DEFAULT_FN_ATTRS512
3994 _mm512_cvtpd_epi32 (__m512d __A)
3995 {
3996  return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3997  (__v8si)
3999  (__mmask8) -1,
4001 }
4002 
4003 static __inline__ __m256i __DEFAULT_FN_ATTRS512
4004 _mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
4005 {
4006  return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4007  (__v8si) __W,
4008  (__mmask8) __U,
4010 }
4011 
4012 static __inline__ __m256i __DEFAULT_FN_ATTRS512
4013 _mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
4014 {
4015  return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4016  (__v8si)
4018  (__mmask8) __U,
4020 }
4021 
4022 #define _mm512_cvt_roundps_epu32(A, R) \
4023  (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4024  (__v16si)_mm512_setzero_si512(), \
4025  (__mmask16)-1, (int)(R))
4026 
4027 #define _mm512_mask_cvt_roundps_epu32(W, U, A, R) \
4028  (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4029  (__v16si)(__m512i)(W), \
4030  (__mmask16)(U), (int)(R))
4031 
4032 #define _mm512_maskz_cvt_roundps_epu32(U, A, R) \
4033  (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4034  (__v16si)_mm512_setzero_si512(), \
4035  (__mmask16)(U), (int)(R))
4036 
4037 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4038 _mm512_cvtps_epu32 ( __m512 __A)
4039 {
4040  return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\
4041  (__v16si)\
4043  (__mmask16) -1,\
4045 }
4046 
4047 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4048 _mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
4049 {
4050  return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4051  (__v16si) __W,
4052  (__mmask16) __U,
4054 }
4055 
4056 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4057 _mm512_maskz_cvtps_epu32 ( __mmask16 __U, __m512 __A)
4058 {
4059  return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4060  (__v16si)
4062  (__mmask16) __U ,
4064 }
4065 
4066 #define _mm512_cvt_roundpd_epu32(A, R) \
4067  (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4068  (__v8si)_mm256_setzero_si256(), \
4069  (__mmask8)-1, (int)(R))
4070 
4071 #define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) \
4072  (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4073  (__v8si)(__m256i)(W), \
4074  (__mmask8)(U), (int)(R))
4075 
4076 #define _mm512_maskz_cvt_roundpd_epu32(U, A, R) \
4077  (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4078  (__v8si)_mm256_setzero_si256(), \
4079  (__mmask8)(U), (int)(R))
4080 
4081 static __inline__ __m256i __DEFAULT_FN_ATTRS512
4082 _mm512_cvtpd_epu32 (__m512d __A)
4083 {
4084  return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4085  (__v8si)
4087  (__mmask8) -1,
4089 }
4090 
4091 static __inline__ __m256i __DEFAULT_FN_ATTRS512
4092 _mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
4093 {
4094  return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4095  (__v8si) __W,
4096  (__mmask8) __U,
4098 }
4099 
4100 static __inline__ __m256i __DEFAULT_FN_ATTRS512
4101 _mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
4102 {
4103  return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4104  (__v8si)
4106  (__mmask8) __U,
4108 }
4109 
4110 static __inline__ double __DEFAULT_FN_ATTRS512
4112 {
4113  return __a[0];
4114 }
4115 
4116 static __inline__ float __DEFAULT_FN_ATTRS512
4118 {
4119  return __a[0];
4120 }
4121 
4122 /* Unpack and Interleave */
4123 
4124 static __inline __m512d __DEFAULT_FN_ATTRS512
4125 _mm512_unpackhi_pd(__m512d __a, __m512d __b)
4126 {
4127  return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4128  1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4129 }
4130 
4131 static __inline__ __m512d __DEFAULT_FN_ATTRS512
4132 _mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
4133 {
4134  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4135  (__v8df)_mm512_unpackhi_pd(__A, __B),
4136  (__v8df)__W);
4137 }
4138 
4139 static __inline__ __m512d __DEFAULT_FN_ATTRS512
4140 _mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
4141 {
4142  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4143  (__v8df)_mm512_unpackhi_pd(__A, __B),
4144  (__v8df)_mm512_setzero_pd());
4145 }
4146 
4147 static __inline __m512d __DEFAULT_FN_ATTRS512
4148 _mm512_unpacklo_pd(__m512d __a, __m512d __b)
4149 {
4150  return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4151  0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4152 }
4153 
4154 static __inline__ __m512d __DEFAULT_FN_ATTRS512
4155 _mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
4156 {
4157  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4158  (__v8df)_mm512_unpacklo_pd(__A, __B),
4159  (__v8df)__W);
4160 }
4161 
4162 static __inline__ __m512d __DEFAULT_FN_ATTRS512
4163 _mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
4164 {
4165  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4166  (__v8df)_mm512_unpacklo_pd(__A, __B),
4167  (__v8df)_mm512_setzero_pd());
4168 }
4169 
4170 static __inline __m512 __DEFAULT_FN_ATTRS512
4171 _mm512_unpackhi_ps(__m512 __a, __m512 __b)
4172 {
4173  return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4174  2, 18, 3, 19,
4175  2+4, 18+4, 3+4, 19+4,
4176  2+8, 18+8, 3+8, 19+8,
4177  2+12, 18+12, 3+12, 19+12);
4178 }
4179 
4180 static __inline__ __m512 __DEFAULT_FN_ATTRS512
4181 _mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
4182 {
4183  return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4184  (__v16sf)_mm512_unpackhi_ps(__A, __B),
4185  (__v16sf)__W);
4186 }
4187 
4188 static __inline__ __m512 __DEFAULT_FN_ATTRS512
4189 _mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
4190 {
4191  return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4192  (__v16sf)_mm512_unpackhi_ps(__A, __B),
4193  (__v16sf)_mm512_setzero_ps());
4194 }
4195 
4196 static __inline __m512 __DEFAULT_FN_ATTRS512
4197 _mm512_unpacklo_ps(__m512 __a, __m512 __b)
4198 {
4199  return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4200  0, 16, 1, 17,
4201  0+4, 16+4, 1+4, 17+4,
4202  0+8, 16+8, 1+8, 17+8,
4203  0+12, 16+12, 1+12, 17+12);
4204 }
4205 
4206 static __inline__ __m512 __DEFAULT_FN_ATTRS512
4207 _mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
4208 {
4209  return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4210  (__v16sf)_mm512_unpacklo_ps(__A, __B),
4211  (__v16sf)__W);
4212 }
4213 
4214 static __inline__ __m512 __DEFAULT_FN_ATTRS512
4215 _mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
4216 {
4217  return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4218  (__v16sf)_mm512_unpacklo_ps(__A, __B),
4219  (__v16sf)_mm512_setzero_ps());
4220 }
4221 
4222 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4223 _mm512_unpackhi_epi32(__m512i __A, __m512i __B)
4224 {
4225  return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4226  2, 18, 3, 19,
4227  2+4, 18+4, 3+4, 19+4,
4228  2+8, 18+8, 3+8, 19+8,
4229  2+12, 18+12, 3+12, 19+12);
4230 }
4231 
4232 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4233 _mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4234 {
4235  return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4236  (__v16si)_mm512_unpackhi_epi32(__A, __B),
4237  (__v16si)__W);
4238 }
4239 
4240 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4241 _mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
4242 {
4243  return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4244  (__v16si)_mm512_unpackhi_epi32(__A, __B),
4245  (__v16si)_mm512_setzero_si512());
4246 }
4247 
4248 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4249 _mm512_unpacklo_epi32(__m512i __A, __m512i __B)
4250 {
4251  return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4252  0, 16, 1, 17,
4253  0+4, 16+4, 1+4, 17+4,
4254  0+8, 16+8, 1+8, 17+8,
4255  0+12, 16+12, 1+12, 17+12);
4256 }
4257 
4258 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4259 _mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4260 {
4261  return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4262  (__v16si)_mm512_unpacklo_epi32(__A, __B),
4263  (__v16si)__W);
4264 }
4265 
4266 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4267 _mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
4268 {
4269  return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4270  (__v16si)_mm512_unpacklo_epi32(__A, __B),
4271  (__v16si)_mm512_setzero_si512());
4272 }
4273 
4274 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4275 _mm512_unpackhi_epi64(__m512i __A, __m512i __B)
4276 {
4277  return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4278  1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4279 }
4280 
4281 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4282 _mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4283 {
4284  return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4285  (__v8di)_mm512_unpackhi_epi64(__A, __B),
4286  (__v8di)__W);
4287 }
4288 
4289 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4290 _mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
4291 {
4292  return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4293  (__v8di)_mm512_unpackhi_epi64(__A, __B),
4294  (__v8di)_mm512_setzero_si512());
4295 }
4296 
4297 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4298 _mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
4299 {
4300  return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4301  0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4302 }
4303 
4304 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4305 _mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4306 {
4307  return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4308  (__v8di)_mm512_unpacklo_epi64(__A, __B),
4309  (__v8di)__W);
4310 }
4311 
4312 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4313 _mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4314 {
4315  return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4316  (__v8di)_mm512_unpacklo_epi64(__A, __B),
4317  (__v8di)_mm512_setzero_si512());
4318 }
4319 
4320 
4321 /* SIMD load ops */
4322 
4323 static __inline __m512i __DEFAULT_FN_ATTRS512
4324 _mm512_loadu_si512 (void const *__P)
4325 {
4326  struct __loadu_si512 {
4327  __m512i __v;
4328  } __attribute__((__packed__, __may_alias__));
4329  return ((struct __loadu_si512*)__P)->__v;
4330 }
4331 
4332 static __inline __m512i __DEFAULT_FN_ATTRS512
4333 _mm512_loadu_epi32 (void const *__P)
4334 {
4335  struct __loadu_epi32 {
4336  __m512i __v;
4337  } __attribute__((__packed__, __may_alias__));
4338  return ((struct __loadu_epi32*)__P)->__v;
4339 }
4340 
4341 static __inline __m512i __DEFAULT_FN_ATTRS512
4342 _mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
4343 {
4344  return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
4345  (__v16si) __W,
4346  (__mmask16) __U);
4347 }
4348 
4349 
4350 static __inline __m512i __DEFAULT_FN_ATTRS512
4351 _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
4352 {
4353  return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *)__P,
4354  (__v16si)
4356  (__mmask16) __U);
4357 }
4358 
4359 static __inline __m512i __DEFAULT_FN_ATTRS512
4360 _mm512_loadu_epi64 (void const *__P)
4361 {
4362  struct __loadu_epi64 {
4363  __m512i __v;
4364  } __attribute__((__packed__, __may_alias__));
4365  return ((struct __loadu_epi64*)__P)->__v;
4366 }
4367 
4368 static __inline __m512i __DEFAULT_FN_ATTRS512
4369 _mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
4370 {
4371  return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
4372  (__v8di) __W,
4373  (__mmask8) __U);
4374 }
4375 
4376 static __inline __m512i __DEFAULT_FN_ATTRS512
4377 _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
4378 {
4379  return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *)__P,
4380  (__v8di)
4382  (__mmask8) __U);
4383 }
4384 
4385 static __inline __m512 __DEFAULT_FN_ATTRS512
4386 _mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
4387 {
4388  return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
4389  (__v16sf) __W,
4390  (__mmask16) __U);
4391 }
4392 
4393 static __inline __m512 __DEFAULT_FN_ATTRS512
4394 _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
4395 {
4396  return (__m512) __builtin_ia32_loadups512_mask ((const float *)__P,
4397  (__v16sf)
4398  _mm512_setzero_ps (),
4399  (__mmask16) __U);
4400 }
4401 
4402 static __inline __m512d __DEFAULT_FN_ATTRS512
4403 _mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
4404 {
4405  return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
4406  (__v8df) __W,
4407  (__mmask8) __U);
4408 }
4409 
4410 static __inline __m512d __DEFAULT_FN_ATTRS512
4411 _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
4412 {
4413  return (__m512d) __builtin_ia32_loadupd512_mask ((const double *)__P,
4414  (__v8df)
4415  _mm512_setzero_pd (),
4416  (__mmask8) __U);
4417 }
4418 
4419 static __inline __m512d __DEFAULT_FN_ATTRS512
4420 _mm512_loadu_pd(void const *__p)
4421 {
4422  struct __loadu_pd {
4423  __m512d __v;
4424  } __attribute__((__packed__, __may_alias__));
4425  return ((struct __loadu_pd*)__p)->__v;
4426 }
4427 
4428 static __inline __m512 __DEFAULT_FN_ATTRS512
4429 _mm512_loadu_ps(void const *__p)
4430 {
4431  struct __loadu_ps {
4432  __m512 __v;
4433  } __attribute__((__packed__, __may_alias__));
4434  return ((struct __loadu_ps*)__p)->__v;
4435 }
4436 
4437 static __inline __m512 __DEFAULT_FN_ATTRS512
4438 _mm512_load_ps(void const *__p)
4439 {
4440  return *(__m512*)__p;
4441 }
4442 
4443 static __inline __m512 __DEFAULT_FN_ATTRS512
4444 _mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
4445 {
4446  return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
4447  (__v16sf) __W,
4448  (__mmask16) __U);
4449 }
4450 
4451 static __inline __m512 __DEFAULT_FN_ATTRS512
4452 _mm512_maskz_load_ps(__mmask16 __U, void const *__P)
4453 {
4454  return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
4455  (__v16sf)
4456  _mm512_setzero_ps (),
4457  (__mmask16) __U);
4458 }
4459 
4460 static __inline __m512d __DEFAULT_FN_ATTRS512
4461 _mm512_load_pd(void const *__p)
4462 {
4463  return *(__m512d*)__p;
4464 }
4465 
4466 static __inline __m512d __DEFAULT_FN_ATTRS512
4467 _mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
4468 {
4469  return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
4470  (__v8df) __W,
4471  (__mmask8) __U);
4472 }
4473 
4474 static __inline __m512d __DEFAULT_FN_ATTRS512
4475 _mm512_maskz_load_pd(__mmask8 __U, void const *__P)
4476 {
4477  return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
4478  (__v8df)
4479  _mm512_setzero_pd (),
4480  (__mmask8) __U);
4481 }
4482 
4483 static __inline __m512i __DEFAULT_FN_ATTRS512
4484 _mm512_load_si512 (void const *__P)
4485 {
4486  return *(__m512i *) __P;
4487 }
4488 
4489 static __inline __m512i __DEFAULT_FN_ATTRS512
4490 _mm512_load_epi32 (void const *__P)
4491 {
4492  return *(__m512i *) __P;
4493 }
4494 
4495 static __inline __m512i __DEFAULT_FN_ATTRS512
4496 _mm512_load_epi64 (void const *__P)
4497 {
4498  return *(__m512i *) __P;
4499 }
4500 
4501 /* SIMD store ops */
4502 
4503 static __inline void __DEFAULT_FN_ATTRS512
4504 _mm512_storeu_epi64 (void *__P, __m512i __A)
4505 {
4506  struct __storeu_epi64 {
4507  __m512i __v;
4508  } __attribute__((__packed__, __may_alias__));
4509  ((struct __storeu_epi64*)__P)->__v = __A;
4510 }
4511 
4512 static __inline void __DEFAULT_FN_ATTRS512
4513 _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
4514 {
4515  __builtin_ia32_storedqudi512_mask ((long long *)__P, (__v8di) __A,
4516  (__mmask8) __U);
4517 }
4518 
4519 static __inline void __DEFAULT_FN_ATTRS512
4520 _mm512_storeu_si512 (void *__P, __m512i __A)
4521 {
4522  struct __storeu_si512 {
4523  __m512i __v;
4524  } __attribute__((__packed__, __may_alias__));
4525  ((struct __storeu_si512*)__P)->__v = __A;
4526 }
4527 
4528 static __inline void __DEFAULT_FN_ATTRS512
4529 _mm512_storeu_epi32 (void *__P, __m512i __A)
4530 {
4531  struct __storeu_epi32 {
4532  __m512i __v;
4533  } __attribute__((__packed__, __may_alias__));
4534  ((struct __storeu_epi32*)__P)->__v = __A;
4535 }
4536 
4537 static __inline void __DEFAULT_FN_ATTRS512
4538 _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
4539 {
4540  __builtin_ia32_storedqusi512_mask ((int *)__P, (__v16si) __A,
4541  (__mmask16) __U);
4542 }
4543 
4544 static __inline void __DEFAULT_FN_ATTRS512
4545 _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
4546 {
4547  __builtin_ia32_storeupd512_mask ((double *)__P, (__v8df) __A, (__mmask8) __U);
4548 }
4549 
4550 static __inline void __DEFAULT_FN_ATTRS512
4551 _mm512_storeu_pd(void *__P, __m512d __A)
4552 {
4553  struct __storeu_pd {
4554  __m512d __v;
4555  } __attribute__((__packed__, __may_alias__));
4556  ((struct __storeu_pd*)__P)->__v = __A;
4557 }
4558 
4559 static __inline void __DEFAULT_FN_ATTRS512
4560 _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
4561 {
4562  __builtin_ia32_storeups512_mask ((float *)__P, (__v16sf) __A,
4563  (__mmask16) __U);
4564 }
4565 
4566 static __inline void __DEFAULT_FN_ATTRS512
4567 _mm512_storeu_ps(void *__P, __m512 __A)
4568 {
4569  struct __storeu_ps {
4570  __m512 __v;
4571  } __attribute__((__packed__, __may_alias__));
4572  ((struct __storeu_ps*)__P)->__v = __A;
4573 }
4574 
4575 static __inline void __DEFAULT_FN_ATTRS512
4576 _mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
4577 {
4578  __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
4579 }
4580 
4581 static __inline void __DEFAULT_FN_ATTRS512
4582 _mm512_store_pd(void *__P, __m512d __A)
4583 {
4584  *(__m512d*)__P = __A;
4585 }
4586 
4587 static __inline void __DEFAULT_FN_ATTRS512
4588 _mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
4589 {
4590  __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
4591  (__mmask16) __U);
4592 }
4593 
4594 static __inline void __DEFAULT_FN_ATTRS512
4595 _mm512_store_ps(void *__P, __m512 __A)
4596 {
4597  *(__m512*)__P = __A;
4598 }
4599 
4600 static __inline void __DEFAULT_FN_ATTRS512
4601 _mm512_store_si512 (void *__P, __m512i __A)
4602 {
4603  *(__m512i *) __P = __A;
4604 }
4605 
4606 static __inline void __DEFAULT_FN_ATTRS512
4607 _mm512_store_epi32 (void *__P, __m512i __A)
4608 {
4609  *(__m512i *) __P = __A;
4610 }
4611 
4612 static __inline void __DEFAULT_FN_ATTRS512
4613 _mm512_store_epi64 (void *__P, __m512i __A)
4614 {
4615  *(__m512i *) __P = __A;
4616 }
4617 
4618 /* Mask ops */
4619 
4620 static __inline __mmask16 __DEFAULT_FN_ATTRS
4621 _mm512_knot(__mmask16 __M)
4622 {
4623  return __builtin_ia32_knothi(__M);
4624 }
4625 
4626 /* Integer compare */
4627 
4628 #define _mm512_cmpeq_epi32_mask(A, B) \
4629  _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
4630 #define _mm512_mask_cmpeq_epi32_mask(k, A, B) \
4631  _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
4632 #define _mm512_cmpge_epi32_mask(A, B) \
4633  _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
4634 #define _mm512_mask_cmpge_epi32_mask(k, A, B) \
4635  _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
4636 #define _mm512_cmpgt_epi32_mask(A, B) \
4637  _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
4638 #define _mm512_mask_cmpgt_epi32_mask(k, A, B) \
4639  _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
4640 #define _mm512_cmple_epi32_mask(A, B) \
4641  _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
4642 #define _mm512_mask_cmple_epi32_mask(k, A, B) \
4643  _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
4644 #define _mm512_cmplt_epi32_mask(A, B) \
4645  _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
4646 #define _mm512_mask_cmplt_epi32_mask(k, A, B) \
4647  _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
4648 #define _mm512_cmpneq_epi32_mask(A, B) \
4649  _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
4650 #define _mm512_mask_cmpneq_epi32_mask(k, A, B) \
4651  _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
4652 
4653 #define _mm512_cmpeq_epu32_mask(A, B) \
4654  _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
4655 #define _mm512_mask_cmpeq_epu32_mask(k, A, B) \
4656  _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
4657 #define _mm512_cmpge_epu32_mask(A, B) \
4658  _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
4659 #define _mm512_mask_cmpge_epu32_mask(k, A, B) \
4660  _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
4661 #define _mm512_cmpgt_epu32_mask(A, B) \
4662  _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
4663 #define _mm512_mask_cmpgt_epu32_mask(k, A, B) \
4664  _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
4665 #define _mm512_cmple_epu32_mask(A, B) \
4666  _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
4667 #define _mm512_mask_cmple_epu32_mask(k, A, B) \
4668  _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
4669 #define _mm512_cmplt_epu32_mask(A, B) \
4670  _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
4671 #define _mm512_mask_cmplt_epu32_mask(k, A, B) \
4672  _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
4673 #define _mm512_cmpneq_epu32_mask(A, B) \
4674  _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
4675 #define _mm512_mask_cmpneq_epu32_mask(k, A, B) \
4676  _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
4677 
4678 #define _mm512_cmpeq_epi64_mask(A, B) \
4679  _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
4680 #define _mm512_mask_cmpeq_epi64_mask(k, A, B) \
4681  _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
4682 #define _mm512_cmpge_epi64_mask(A, B) \
4683  _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
4684 #define _mm512_mask_cmpge_epi64_mask(k, A, B) \
4685  _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
4686 #define _mm512_cmpgt_epi64_mask(A, B) \
4687  _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
4688 #define _mm512_mask_cmpgt_epi64_mask(k, A, B) \
4689  _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
4690 #define _mm512_cmple_epi64_mask(A, B) \
4691  _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
4692 #define _mm512_mask_cmple_epi64_mask(k, A, B) \
4693  _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
4694 #define _mm512_cmplt_epi64_mask(A, B) \
4695  _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
4696 #define _mm512_mask_cmplt_epi64_mask(k, A, B) \
4697  _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
4698 #define _mm512_cmpneq_epi64_mask(A, B) \
4699  _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
4700 #define _mm512_mask_cmpneq_epi64_mask(k, A, B) \
4701  _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
4702 
4703 #define _mm512_cmpeq_epu64_mask(A, B) \
4704  _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
4705 #define _mm512_mask_cmpeq_epu64_mask(k, A, B) \
4706  _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
4707 #define _mm512_cmpge_epu64_mask(A, B) \
4708  _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
4709 #define _mm512_mask_cmpge_epu64_mask(k, A, B) \
4710  _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
4711 #define _mm512_cmpgt_epu64_mask(A, B) \
4712  _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
4713 #define _mm512_mask_cmpgt_epu64_mask(k, A, B) \
4714  _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
4715 #define _mm512_cmple_epu64_mask(A, B) \
4716  _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
4717 #define _mm512_mask_cmple_epu64_mask(k, A, B) \
4718  _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
4719 #define _mm512_cmplt_epu64_mask(A, B) \
4720  _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
4721 #define _mm512_mask_cmplt_epu64_mask(k, A, B) \
4722  _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
4723 #define _mm512_cmpneq_epu64_mask(A, B) \
4724  _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
4725 #define _mm512_mask_cmpneq_epu64_mask(k, A, B) \
4726  _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
4727 
4728 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4730 {
4731  /* This function always performs a signed extension, but __v16qi is a char
4732  which may be signed or unsigned, so use __v16qs. */
4733  return (__m512i)__builtin_convertvector((__v16qs)__A, __v16si);
4734 }
4735 
4736 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4737 _mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
4738 {
4739  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4740  (__v16si)_mm512_cvtepi8_epi32(__A),
4741  (__v16si)__W);
4742 }
4743 
4744 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4745 _mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A)
4746 {
4747  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4748  (__v16si)_mm512_cvtepi8_epi32(__A),
4749  (__v16si)_mm512_setzero_si512());
4750 }
4751 
4752 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4754 {
4755  /* This function always performs a signed extension, but __v16qi is a char
4756  which may be signed or unsigned, so use __v16qs. */
4757  return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__A, (__v16qs)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
4758 }
4759 
4760 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4761 _mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
4762 {
4763  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4764  (__v8di)_mm512_cvtepi8_epi64(__A),
4765  (__v8di)__W);
4766 }
4767 
4768 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4769 _mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
4770 {
4771  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4772  (__v8di)_mm512_cvtepi8_epi64(__A),
4773  (__v8di)_mm512_setzero_si512 ());
4774 }
4775 
4776 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4778 {
4779  return (__m512i)__builtin_convertvector((__v8si)__X, __v8di);
4780 }
4781 
4782 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4783 _mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
4784 {
4785  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4786  (__v8di)_mm512_cvtepi32_epi64(__X),
4787  (__v8di)__W);
4788 }
4789 
4790 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4791 _mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X)
4792 {
4793  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4794  (__v8di)_mm512_cvtepi32_epi64(__X),
4795  (__v8di)_mm512_setzero_si512());
4796 }
4797 
4798 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4800 {
4801  return (__m512i)__builtin_convertvector((__v16hi)__A, __v16si);
4802 }
4803 
4804 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4805 _mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
4806 {
4807  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4808  (__v16si)_mm512_cvtepi16_epi32(__A),
4809  (__v16si)__W);
4810 }
4811 
4812 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4813 _mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A)
4814 {
4815  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4816  (__v16si)_mm512_cvtepi16_epi32(__A),
4817  (__v16si)_mm512_setzero_si512 ());
4818 }
4819 
4820 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4822 {
4823  return (__m512i)__builtin_convertvector((__v8hi)__A, __v8di);
4824 }
4825 
4826 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4827 _mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
4828 {
4829  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4830  (__v8di)_mm512_cvtepi16_epi64(__A),
4831  (__v8di)__W);
4832 }
4833 
4834 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4835 _mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
4836 {
4837  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4838  (__v8di)_mm512_cvtepi16_epi64(__A),
4839  (__v8di)_mm512_setzero_si512());
4840 }
4841 
4842 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4844 {
4845  return (__m512i)__builtin_convertvector((__v16qu)__A, __v16si);
4846 }
4847 
4848 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4849 _mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
4850 {
4851  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4852  (__v16si)_mm512_cvtepu8_epi32(__A),
4853  (__v16si)__W);
4854 }
4855 
4856 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4857 _mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A)
4858 {
4859  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4860  (__v16si)_mm512_cvtepu8_epi32(__A),
4861  (__v16si)_mm512_setzero_si512());
4862 }
4863 
4864 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4866 {
4867  return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__A, (__v16qu)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
4868 }
4869 
4870 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4871 _mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
4872 {
4873  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4874  (__v8di)_mm512_cvtepu8_epi64(__A),
4875  (__v8di)__W);
4876 }
4877 
4878 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4879 _mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
4880 {
4881  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4882  (__v8di)_mm512_cvtepu8_epi64(__A),
4883  (__v8di)_mm512_setzero_si512());
4884 }
4885 
4886 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4888 {
4889  return (__m512i)__builtin_convertvector((__v8su)__X, __v8di);
4890 }
4891 
4892 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4893 _mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
4894 {
4895  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4896  (__v8di)_mm512_cvtepu32_epi64(__X),
4897  (__v8di)__W);
4898 }
4899 
4900 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4901 _mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X)
4902 {
4903  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4904  (__v8di)_mm512_cvtepu32_epi64(__X),
4905  (__v8di)_mm512_setzero_si512());
4906 }
4907 
4908 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4910 {
4911  return (__m512i)__builtin_convertvector((__v16hu)__A, __v16si);
4912 }
4913 
4914 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4915 _mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
4916 {
4917  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4918  (__v16si)_mm512_cvtepu16_epi32(__A),
4919  (__v16si)__W);
4920 }
4921 
4922 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4923 _mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A)
4924 {
4925  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4926  (__v16si)_mm512_cvtepu16_epi32(__A),
4927  (__v16si)_mm512_setzero_si512());
4928 }
4929 
4930 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4932 {
4933  return (__m512i)__builtin_convertvector((__v8hu)__A, __v8di);
4934 }
4935 
4936 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4937 _mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
4938 {
4939  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4940  (__v8di)_mm512_cvtepu16_epi64(__A),
4941  (__v8di)__W);
4942 }
4943 
4944 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4945 _mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
4946 {
4947  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4948  (__v8di)_mm512_cvtepu16_epi64(__A),
4949  (__v8di)_mm512_setzero_si512());
4950 }
4951 
4952 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4953 _mm512_rorv_epi32 (__m512i __A, __m512i __B)
4954 {
4955  return (__m512i)__builtin_ia32_prorvd512((__v16si)__A, (__v16si)__B);
4956 }
4957 
4958 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4959 _mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4960 {
4961  return (__m512i)__builtin_ia32_selectd_512(__U,
4962  (__v16si)_mm512_rorv_epi32(__A, __B),
4963  (__v16si)__W);
4964 }
4965 
4966 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4967 _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4968 {
4969  return (__m512i)__builtin_ia32_selectd_512(__U,
4970  (__v16si)_mm512_rorv_epi32(__A, __B),
4971  (__v16si)_mm512_setzero_si512());
4972 }
4973 
4974 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4975 _mm512_rorv_epi64 (__m512i __A, __m512i __B)
4976 {
4977  return (__m512i)__builtin_ia32_prorvq512((__v8di)__A, (__v8di)__B);
4978 }
4979 
4980 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4981 _mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4982 {
4983  return (__m512i)__builtin_ia32_selectq_512(__U,
4984  (__v8di)_mm512_rorv_epi64(__A, __B),
4985  (__v8di)__W);
4986 }
4987 
4988 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4989 _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4990 {
4991  return (__m512i)__builtin_ia32_selectq_512(__U,
4992  (__v8di)_mm512_rorv_epi64(__A, __B),
4993  (__v8di)_mm512_setzero_si512());
4994 }
4995 
4996 
4997 
4998 #define _mm512_cmp_epi32_mask(a, b, p) \
4999  (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
5000  (__v16si)(__m512i)(b), (int)(p), \
5001  (__mmask16)-1)
5002 
5003 #define _mm512_cmp_epu32_mask(a, b, p) \
5004  (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
5005  (__v16si)(__m512i)(b), (int)(p), \
5006  (__mmask16)-1)
5007 
5008 #define _mm512_cmp_epi64_mask(a, b, p) \
5009  (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
5010  (__v8di)(__m512i)(b), (int)(p), \
5011  (__mmask8)-1)
5012 
5013 #define _mm512_cmp_epu64_mask(a, b, p) \
5014  (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
5015  (__v8di)(__m512i)(b), (int)(p), \
5016  (__mmask8)-1)
5017 
5018 #define _mm512_mask_cmp_epi32_mask(m, a, b, p) \
5019  (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
5020  (__v16si)(__m512i)(b), (int)(p), \
5021  (__mmask16)(m))
5022 
5023 #define _mm512_mask_cmp_epu32_mask(m, a, b, p) \
5024  (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
5025  (__v16si)(__m512i)(b), (int)(p), \
5026  (__mmask16)(m))
5027 
5028 #define _mm512_mask_cmp_epi64_mask(m, a, b, p) \
5029  (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
5030  (__v8di)(__m512i)(b), (int)(p), \
5031  (__mmask8)(m))
5032 
5033 #define _mm512_mask_cmp_epu64_mask(m, a, b, p) \
5034  (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
5035  (__v8di)(__m512i)(b), (int)(p), \
5036  (__mmask8)(m))
5037 
5038 #define _mm512_rol_epi32(a, b) \
5039  (__m512i)__builtin_ia32_prold512((__v16si)(__m512i)(a), (int)(b))
5040 
5041 #define _mm512_mask_rol_epi32(W, U, a, b) \
5042  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5043  (__v16si)_mm512_rol_epi32((a), (b)), \
5044  (__v16si)(__m512i)(W))
5045 
5046 #define _mm512_maskz_rol_epi32(U, a, b) \
5047  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5048  (__v16si)_mm512_rol_epi32((a), (b)), \
5049  (__v16si)_mm512_setzero_si512())
5050 
5051 #define _mm512_rol_epi64(a, b) \
5052  (__m512i)__builtin_ia32_prolq512((__v8di)(__m512i)(a), (int)(b))
5053 
5054 #define _mm512_mask_rol_epi64(W, U, a, b) \
5055  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5056  (__v8di)_mm512_rol_epi64((a), (b)), \
5057  (__v8di)(__m512i)(W))
5058 
5059 #define _mm512_maskz_rol_epi64(U, a, b) \
5060  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5061  (__v8di)_mm512_rol_epi64((a), (b)), \
5062  (__v8di)_mm512_setzero_si512())
5063 
5064 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5065 _mm512_rolv_epi32 (__m512i __A, __m512i __B)
5066 {
5067  return (__m512i)__builtin_ia32_prolvd512((__v16si)__A, (__v16si)__B);
5068 }
5069 
5070 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5071 _mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
5072 {
5073  return (__m512i)__builtin_ia32_selectd_512(__U,
5074  (__v16si)_mm512_rolv_epi32(__A, __B),
5075  (__v16si)__W);
5076 }
5077 
5078 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5079 _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
5080 {
5081  return (__m512i)__builtin_ia32_selectd_512(__U,
5082  (__v16si)_mm512_rolv_epi32(__A, __B),
5083  (__v16si)_mm512_setzero_si512());
5084 }
5085 
5086 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5087 _mm512_rolv_epi64 (__m512i __A, __m512i __B)
5088 {
5089  return (__m512i)__builtin_ia32_prolvq512((__v8di)__A, (__v8di)__B);
5090 }
5091 
5092 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5093 _mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
5094 {
5095  return (__m512i)__builtin_ia32_selectq_512(__U,
5096  (__v8di)_mm512_rolv_epi64(__A, __B),
5097  (__v8di)__W);
5098 }
5099 
5100 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5101 _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
5102 {
5103  return (__m512i)__builtin_ia32_selectq_512(__U,
5104  (__v8di)_mm512_rolv_epi64(__A, __B),
5105  (__v8di)_mm512_setzero_si512());
5106 }
5107 
5108 #define _mm512_ror_epi32(A, B) \
5109  (__m512i)__builtin_ia32_prord512((__v16si)(__m512i)(A), (int)(B))
5110 
5111 #define _mm512_mask_ror_epi32(W, U, A, B) \
5112  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5113  (__v16si)_mm512_ror_epi32((A), (B)), \
5114  (__v16si)(__m512i)(W))
5115 
5116 #define _mm512_maskz_ror_epi32(U, A, B) \
5117  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5118  (__v16si)_mm512_ror_epi32((A), (B)), \
5119  (__v16si)_mm512_setzero_si512())
5120 
5121 #define _mm512_ror_epi64(A, B) \
5122  (__m512i)__builtin_ia32_prorq512((__v8di)(__m512i)(A), (int)(B))
5123 
5124 #define _mm512_mask_ror_epi64(W, U, A, B) \
5125  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5126  (__v8di)_mm512_ror_epi64((A), (B)), \
5127  (__v8di)(__m512i)(W))
5128 
5129 #define _mm512_maskz_ror_epi64(U, A, B) \
5130  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5131  (__v8di)_mm512_ror_epi64((A), (B)), \
5132  (__v8di)_mm512_setzero_si512())
5133 
5134 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5135 _mm512_slli_epi32(__m512i __A, int __B)
5136 {
5137  return (__m512i)__builtin_ia32_pslldi512((__v16si)__A, __B);
5138 }
5139 
5140 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5141 _mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
5142 {
5143  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5144  (__v16si)_mm512_slli_epi32(__A, __B),
5145  (__v16si)__W);
5146 }
5147 
5148 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5149 _mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, int __B) {
5150  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5151  (__v16si)_mm512_slli_epi32(__A, __B),
5152  (__v16si)_mm512_setzero_si512());
5153 }
5154 
5155 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5156 _mm512_slli_epi64(__m512i __A, int __B)
5157 {
5158  return (__m512i)__builtin_ia32_psllqi512((__v8di)__A, __B);
5159 }
5160 
5161 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5162 _mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
5163 {
5164  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5165  (__v8di)_mm512_slli_epi64(__A, __B),
5166  (__v8di)__W);
5167 }
5168 
5169 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5170 _mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, int __B)
5171 {
5172  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5173  (__v8di)_mm512_slli_epi64(__A, __B),
5174  (__v8di)_mm512_setzero_si512());
5175 }
5176 
5177 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5178 _mm512_srli_epi32(__m512i __A, int __B)
5179 {
5180  return (__m512i)__builtin_ia32_psrldi512((__v16si)__A, __B);
5181 }
5182 
5183 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5184 _mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
5185 {
5186  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5187  (__v16si)_mm512_srli_epi32(__A, __B),
5188  (__v16si)__W);
5189 }
5190 
5191 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5192 _mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, int __B) {
5193  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5194  (__v16si)_mm512_srli_epi32(__A, __B),
5195  (__v16si)_mm512_setzero_si512());
5196 }
5197 
5198 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5199 _mm512_srli_epi64(__m512i __A, int __B)
5200 {
5201  return (__m512i)__builtin_ia32_psrlqi512((__v8di)__A, __B);
5202 }
5203 
5204 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5205 _mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
5206 {
5207  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5208  (__v8di)_mm512_srli_epi64(__A, __B),
5209  (__v8di)__W);
5210 }
5211 
5212 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5213 _mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, int __B)
5214 {
5215  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5216  (__v8di)_mm512_srli_epi64(__A, __B),
5217  (__v8di)_mm512_setzero_si512());
5218 }
5219 
5220 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5221 _mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
5222 {
5223  return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
5224  (__v16si) __W,
5225  (__mmask16) __U);
5226 }
5227 
5228 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5229 _mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
5230 {
5231  return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
5232  (__v16si)
5234  (__mmask16) __U);
5235 }
5236 
5237 static __inline__ void __DEFAULT_FN_ATTRS512
5238 _mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
5239 {
5240  __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
5241  (__mmask16) __U);
5242 }
5243 
5244 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5245 _mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
5246 {
5247  return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
5248  (__v16si) __A,
5249  (__v16si) __W);
5250 }
5251 
5252 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5253 _mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
5254 {
5255  return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
5256  (__v16si) __A,
5257  (__v16si) _mm512_setzero_si512 ());
5258 }
5259 
5260 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5261 _mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
5262 {
5263  return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
5264  (__v8di) __A,
5265  (__v8di) __W);
5266 }
5267 
5268 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5269 _mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
5270 {
5271  return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
5272  (__v8di) __A,
5273  (__v8di) _mm512_setzero_si512 ());
5274 }
5275 
5276 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5277 _mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
5278 {
5279  return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
5280  (__v8di) __W,
5281  (__mmask8) __U);
5282 }
5283 
5284 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5285 _mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
5286 {
5287  return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
5288  (__v8di)
5290  (__mmask8) __U);
5291 }
5292 
5293 static __inline__ void __DEFAULT_FN_ATTRS512
5294 _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
5295 {
5296  __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
5297  (__mmask8) __U);
5298 }
5299 
5300 static __inline__ __m512d __DEFAULT_FN_ATTRS512
5301 _mm512_movedup_pd (__m512d __A)
5302 {
5303  return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A,
5304  0, 0, 2, 2, 4, 4, 6, 6);
5305 }
5306 
5307 static __inline__ __m512d __DEFAULT_FN_ATTRS512
5308 _mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
5309 {
5310  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5311  (__v8df)_mm512_movedup_pd(__A),
5312  (__v8df)__W);
5313 }
5314 
5315 static __inline__ __m512d __DEFAULT_FN_ATTRS512
5316 _mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
5317 {
5318  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5319  (__v8df)_mm512_movedup_pd(__A),
5320  (__v8df)_mm512_setzero_pd());
5321 }
5322 
5323 #define _mm512_fixupimm_round_pd(A, B, C, imm, R) \
5324  (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5325  (__v8df)(__m512d)(B), \
5326  (__v8di)(__m512i)(C), (int)(imm), \
5327  (__mmask8)-1, (int)(R))
5328 
5329 #define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) \
5330  (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5331  (__v8df)(__m512d)(B), \
5332  (__v8di)(__m512i)(C), (int)(imm), \
5333  (__mmask8)(U), (int)(R))
5334 
5335 #define _mm512_fixupimm_pd(A, B, C, imm) \
5336  (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5337  (__v8df)(__m512d)(B), \
5338  (__v8di)(__m512i)(C), (int)(imm), \
5339  (__mmask8)-1, \
5340  _MM_FROUND_CUR_DIRECTION)
5341 
5342 #define _mm512_mask_fixupimm_pd(A, U, B, C, imm) \
5343  (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5344  (__v8df)(__m512d)(B), \
5345  (__v8di)(__m512i)(C), (int)(imm), \
5346  (__mmask8)(U), \
5347  _MM_FROUND_CUR_DIRECTION)
5348 
5349 #define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) \
5350  (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5351  (__v8df)(__m512d)(B), \
5352  (__v8di)(__m512i)(C), \
5353  (int)(imm), (__mmask8)(U), \
5354  (int)(R))
5355 
5356 #define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) \
5357  (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5358  (__v8df)(__m512d)(B), \
5359  (__v8di)(__m512i)(C), \
5360  (int)(imm), (__mmask8)(U), \
5361  _MM_FROUND_CUR_DIRECTION)
5362 
5363 #define _mm512_fixupimm_round_ps(A, B, C, imm, R) \
5364  (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5365  (__v16sf)(__m512)(B), \
5366  (__v16si)(__m512i)(C), (int)(imm), \
5367  (__mmask16)-1, (int)(R))
5368 
5369 #define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) \
5370  (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5371  (__v16sf)(__m512)(B), \
5372  (__v16si)(__m512i)(C), (int)(imm), \
5373  (__mmask16)(U), (int)(R))
5374 
5375 #define _mm512_fixupimm_ps(A, B, C, imm) \
5376  (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5377  (__v16sf)(__m512)(B), \
5378  (__v16si)(__m512i)(C), (int)(imm), \
5379  (__mmask16)-1, \
5380  _MM_FROUND_CUR_DIRECTION)
5381 
5382 #define _mm512_mask_fixupimm_ps(A, U, B, C, imm) \
5383  (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5384  (__v16sf)(__m512)(B), \
5385  (__v16si)(__m512i)(C), (int)(imm), \
5386  (__mmask16)(U), \
5387  _MM_FROUND_CUR_DIRECTION)
5388 
5389 #define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) \
5390  (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5391  (__v16sf)(__m512)(B), \
5392  (__v16si)(__m512i)(C), \
5393  (int)(imm), (__mmask16)(U), \
5394  (int)(R))
5395 
5396 #define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) \
5397  (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5398  (__v16sf)(__m512)(B), \
5399  (__v16si)(__m512i)(C), \
5400  (int)(imm), (__mmask16)(U), \
5401  _MM_FROUND_CUR_DIRECTION)
5402 
5403 #define _mm_fixupimm_round_sd(A, B, C, imm, R) \
5404  (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5405  (__v2df)(__m128d)(B), \
5406  (__v2di)(__m128i)(C), (int)(imm), \
5407  (__mmask8)-1, (int)(R))
5408 
5409 #define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) \
5410  (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5411  (__v2df)(__m128d)(B), \
5412  (__v2di)(__m128i)(C), (int)(imm), \
5413  (__mmask8)(U), (int)(R))
5414 
5415 #define _mm_fixupimm_sd(A, B, C, imm) \
5416  (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5417  (__v2df)(__m128d)(B), \
5418  (__v2di)(__m128i)(C), (int)(imm), \
5419  (__mmask8)-1, \
5420  _MM_FROUND_CUR_DIRECTION)
5421 
5422 #define _mm_mask_fixupimm_sd(A, U, B, C, imm) \
5423  (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5424  (__v2df)(__m128d)(B), \
5425  (__v2di)(__m128i)(C), (int)(imm), \
5426  (__mmask8)(U), \
5427  _MM_FROUND_CUR_DIRECTION)
5428 
5429 #define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) \
5430  (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5431  (__v2df)(__m128d)(B), \
5432  (__v2di)(__m128i)(C), (int)(imm), \
5433  (__mmask8)(U), (int)(R))
5434 
5435 #define _mm_maskz_fixupimm_sd(U, A, B, C, imm) \
5436  (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5437  (__v2df)(__m128d)(B), \
5438  (__v2di)(__m128i)(C), (int)(imm), \
5439  (__mmask8)(U), \
5440  _MM_FROUND_CUR_DIRECTION)
5441 
5442 #define _mm_fixupimm_round_ss(A, B, C, imm, R) \
5443  (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5444  (__v4sf)(__m128)(B), \
5445  (__v4si)(__m128i)(C), (int)(imm), \
5446  (__mmask8)-1, (int)(R))
5447 
5448 #define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) \
5449  (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5450  (__v4sf)(__m128)(B), \
5451  (__v4si)(__m128i)(C), (int)(imm), \
5452  (__mmask8)(U), (int)(R))
5453 
5454 #define _mm_fixupimm_ss(A, B, C, imm) \
5455  (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5456  (__v4sf)(__m128)(B), \
5457  (__v4si)(__m128i)(C), (int)(imm), \
5458  (__mmask8)-1, \
5459  _MM_FROUND_CUR_DIRECTION)
5460 
5461 #define _mm_mask_fixupimm_ss(A, U, B, C, imm) \
5462  (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5463  (__v4sf)(__m128)(B), \
5464  (__v4si)(__m128i)(C), (int)(imm), \
5465  (__mmask8)(U), \
5466  _MM_FROUND_CUR_DIRECTION)
5467 
5468 #define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) \
5469  (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5470  (__v4sf)(__m128)(B), \
5471  (__v4si)(__m128i)(C), (int)(imm), \
5472  (__mmask8)(U), (int)(R))
5473 
5474 #define _mm_maskz_fixupimm_ss(U, A, B, C, imm) \
5475  (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5476  (__v4sf)(__m128)(B), \
5477  (__v4si)(__m128i)(C), (int)(imm), \
5478  (__mmask8)(U), \
5479  _MM_FROUND_CUR_DIRECTION)
5480 
5481 #define _mm_getexp_round_sd(A, B, R) \
5482  (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5483  (__v2df)(__m128d)(B), \
5484  (__v2df)_mm_setzero_pd(), \
5485  (__mmask8)-1, (int)(R))
5486 
5487 
5488 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5489 _mm_getexp_sd (__m128d __A, __m128d __B)
5490 {
5491  return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A,
5492  (__v2df) __B, (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5493 }
5494 
5495 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5496 _mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
5497 {
5498  return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5499  (__v2df) __B,
5500  (__v2df) __W,
5501  (__mmask8) __U,
5503 }
5504 
5505 #define _mm_mask_getexp_round_sd(W, U, A, B, R) \
5506  (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5507  (__v2df)(__m128d)(B), \
5508  (__v2df)(__m128d)(W), \
5509  (__mmask8)(U), (int)(R))
5510 
5511 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5512 _mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
5513 {
5514  return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5515  (__v2df) __B,
5516  (__v2df) _mm_setzero_pd (),
5517  (__mmask8) __U,
5519 }
5520 
5521 #define _mm_maskz_getexp_round_sd(U, A, B, R) \
5522  (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5523  (__v2df)(__m128d)(B), \
5524  (__v2df)_mm_setzero_pd(), \
5525  (__mmask8)(U), (int)(R))
5526 
5527 #define _mm_getexp_round_ss(A, B, R) \
5528  (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5529  (__v4sf)(__m128)(B), \
5530  (__v4sf)_mm_setzero_ps(), \
5531  (__mmask8)-1, (int)(R))
5532 
5533 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5534 _mm_getexp_ss (__m128 __A, __m128 __B)
5535 {
5536  return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5537  (__v4sf) __B, (__v4sf) _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5538 }
5539 
5540 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5541 _mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
5542 {
5543  return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5544  (__v4sf) __B,
5545  (__v4sf) __W,
5546  (__mmask8) __U,
5548 }
5549 
5550 #define _mm_mask_getexp_round_ss(W, U, A, B, R) \
5551  (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5552  (__v4sf)(__m128)(B), \
5553  (__v4sf)(__m128)(W), \
5554  (__mmask8)(U), (int)(R))
5555 
5556 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5557 _mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
5558 {
5559  return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5560  (__v4sf) __B,
5561  (__v4sf) _mm_setzero_ps (),
5562  (__mmask8) __U,
5564 }
5565 
5566 #define _mm_maskz_getexp_round_ss(U, A, B, R) \
5567  (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5568  (__v4sf)(__m128)(B), \
5569  (__v4sf)_mm_setzero_ps(), \
5570  (__mmask8)(U), (int)(R))
5571 
5572 #define _mm_getmant_round_sd(A, B, C, D, R) \
5573  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5574  (__v2df)(__m128d)(B), \
5575  (int)(((D)<<2) | (C)), \
5576  (__v2df)_mm_setzero_pd(), \
5577  (__mmask8)-1, (int)(R))
5578 
5579 #define _mm_getmant_sd(A, B, C, D) \
5580  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5581  (__v2df)(__m128d)(B), \
5582  (int)(((D)<<2) | (C)), \
5583  (__v2df)_mm_setzero_pd(), \
5584  (__mmask8)-1, \
5585  _MM_FROUND_CUR_DIRECTION)
5586 
5587 #define _mm_mask_getmant_sd(W, U, A, B, C, D) \
5588  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5589  (__v2df)(__m128d)(B), \
5590  (int)(((D)<<2) | (C)), \
5591  (__v2df)(__m128d)(W), \
5592  (__mmask8)(U), \
5593  _MM_FROUND_CUR_DIRECTION)
5594 
5595 #define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R) \
5596  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5597  (__v2df)(__m128d)(B), \
5598  (int)(((D)<<2) | (C)), \
5599  (__v2df)(__m128d)(W), \
5600  (__mmask8)(U), (int)(R))
5601 
5602 #define _mm_maskz_getmant_sd(U, A, B, C, D) \
5603  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5604  (__v2df)(__m128d)(B), \
5605  (int)(((D)<<2) | (C)), \
5606  (__v2df)_mm_setzero_pd(), \
5607  (__mmask8)(U), \
5608  _MM_FROUND_CUR_DIRECTION)
5609 
5610 #define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) \
5611  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5612  (__v2df)(__m128d)(B), \
5613  (int)(((D)<<2) | (C)), \
5614  (__v2df)_mm_setzero_pd(), \
5615  (__mmask8)(U), (int)(R))
5616 
5617 #define _mm_getmant_round_ss(A, B, C, D, R) \
5618  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5619  (__v4sf)(__m128)(B), \
5620  (int)(((D)<<2) | (C)), \
5621  (__v4sf)_mm_setzero_ps(), \
5622  (__mmask8)-1, (int)(R))
5623 
5624 #define _mm_getmant_ss(A, B, C, D) \
5625  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5626  (__v4sf)(__m128)(B), \
5627  (int)(((D)<<2) | (C)), \
5628  (__v4sf)_mm_setzero_ps(), \
5629  (__mmask8)-1, \
5630  _MM_FROUND_CUR_DIRECTION)
5631 
5632 #define _mm_mask_getmant_ss(W, U, A, B, C, D) \
5633  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5634  (__v4sf)(__m128)(B), \
5635  (int)(((D)<<2) | (C)), \
5636  (__v4sf)(__m128)(W), \
5637  (__mmask8)(U), \
5638  _MM_FROUND_CUR_DIRECTION)
5639 
5640 #define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R) \
5641  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5642  (__v4sf)(__m128)(B), \
5643  (int)(((D)<<2) | (C)), \
5644  (__v4sf)(__m128)(W), \
5645  (__mmask8)(U), (int)(R))
5646 
5647 #define _mm_maskz_getmant_ss(U, A, B, C, D) \
5648  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5649  (__v4sf)(__m128)(B), \
5650  (int)(((D)<<2) | (C)), \
5651  (__v4sf)_mm_setzero_ps(), \
5652  (__mmask8)(U), \
5653  _MM_FROUND_CUR_DIRECTION)
5654 
5655 #define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) \
5656  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5657  (__v4sf)(__m128)(B), \
5658  (int)(((D)<<2) | (C)), \
5659  (__v4sf)_mm_setzero_ps(), \
5660  (__mmask8)(U), (int)(R))
5661 
5662 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5663 _mm512_kmov (__mmask16 __A)
5664 {
5665  return __A;
5666 }
5667 
5668 #define _mm_comi_round_sd(A, B, P, R) \
5669  (int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \
5670  (int)(P), (int)(R))
5671 
5672 #define _mm_comi_round_ss(A, B, P, R) \
5673  (int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
5674  (int)(P), (int)(R))
5675 
5676 #ifdef __x86_64__
5677 #define _mm_cvt_roundsd_si64(A, R) \
5678  (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R))
5679 #endif
5680 
5681 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5682 _mm512_sll_epi32(__m512i __A, __m128i __B)
5683 {
5684  return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B);
5685 }
5686 
5687 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5688 _mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
5689 {
5690  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5691  (__v16si)_mm512_sll_epi32(__A, __B),
5692  (__v16si)__W);
5693 }
5694 
5695 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5696 _mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B)
5697 {
5698  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5699  (__v16si)_mm512_sll_epi32(__A, __B),
5700  (__v16si)_mm512_setzero_si512());
5701 }
5702 
5703 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5704 _mm512_sll_epi64(__m512i __A, __m128i __B)
5705 {
5706  return (__m512i)__builtin_ia32_psllq512((__v8di)__A, (__v2di)__B);
5707 }
5708 
5709 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5710 _mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
5711 {
5712  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5713  (__v8di)_mm512_sll_epi64(__A, __B),
5714  (__v8di)__W);
5715 }
5716 
5717 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5718 _mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B)
5719 {
5720  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5721  (__v8di)_mm512_sll_epi64(__A, __B),
5722  (__v8di)_mm512_setzero_si512());
5723 }
5724 
5725 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5726 _mm512_sllv_epi32(__m512i __X, __m512i __Y)
5727 {
5728  return (__m512i)__builtin_ia32_psllv16si((__v16si)__X, (__v16si)__Y);
5729 }
5730 
5731 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5732 _mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
5733 {
5734  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5735  (__v16si)_mm512_sllv_epi32(__X, __Y),
5736  (__v16si)__W);
5737 }
5738 
5739 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5740 _mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
5741 {
5742  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5743  (__v16si)_mm512_sllv_epi32(__X, __Y),
5744  (__v16si)_mm512_setzero_si512());
5745 }
5746 
5747 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5748 _mm512_sllv_epi64(__m512i __X, __m512i __Y)
5749 {
5750  return (__m512i)__builtin_ia32_psllv8di((__v8di)__X, (__v8di)__Y);
5751 }
5752 
5753 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5754 _mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
5755 {
5756  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5757  (__v8di)_mm512_sllv_epi64(__X, __Y),
5758  (__v8di)__W);
5759 }
5760 
5761 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5762 _mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
5763 {
5764  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5765  (__v8di)_mm512_sllv_epi64(__X, __Y),
5766  (__v8di)_mm512_setzero_si512());
5767 }
5768 
5769 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5770 _mm512_sra_epi32(__m512i __A, __m128i __B)
5771 {
5772  return (__m512i)__builtin_ia32_psrad512((__v16si) __A, (__v4si)__B);
5773 }
5774 
5775 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5776 _mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
5777 {
5778  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5779  (__v16si)_mm512_sra_epi32(__A, __B),
5780  (__v16si)__W);
5781 }
5782 
5783 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5784 _mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B)
5785 {
5786  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5787  (__v16si)_mm512_sra_epi32(__A, __B),
5788  (__v16si)_mm512_setzero_si512());
5789 }
5790 
5791 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5792 _mm512_sra_epi64(__m512i __A, __m128i __B)
5793 {
5794  return (__m512i)__builtin_ia32_psraq512((__v8di)__A, (__v2di)__B);
5795 }
5796 
5797 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5798 _mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
5799 {
5800  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5801  (__v8di)_mm512_sra_epi64(__A, __B),
5802  (__v8di)__W);
5803 }
5804 
5805 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5806 _mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B)
5807 {
5808  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5809  (__v8di)_mm512_sra_epi64(__A, __B),
5810  (__v8di)_mm512_setzero_si512());
5811 }
5812 
5813 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5814 _mm512_srav_epi32(__m512i __X, __m512i __Y)
5815 {
5816  return (__m512i)__builtin_ia32_psrav16si((__v16si)__X, (__v16si)__Y);
5817 }
5818 
5819 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5820 _mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
5821 {
5822  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5823  (__v16si)_mm512_srav_epi32(__X, __Y),
5824  (__v16si)__W);
5825 }
5826 
5827 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5828 _mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
5829 {
5830  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5831  (__v16si)_mm512_srav_epi32(__X, __Y),
5832  (__v16si)_mm512_setzero_si512());
5833 }
5834 
5835 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5836 _mm512_srav_epi64(__m512i __X, __m512i __Y)
5837 {
5838  return (__m512i)__builtin_ia32_psrav8di((__v8di)__X, (__v8di)__Y);
5839 }
5840 
5841 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5842 _mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
5843 {
5844  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5845  (__v8di)_mm512_srav_epi64(__X, __Y),
5846  (__v8di)__W);
5847 }
5848 
5849 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5850 _mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
5851 {
5852  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5853  (__v8di)_mm512_srav_epi64(__X, __Y),
5854  (__v8di)_mm512_setzero_si512());
5855 }
5856 
5857 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5858 _mm512_srl_epi32(__m512i __A, __m128i __B)
5859 {
5860  return (__m512i)__builtin_ia32_psrld512((__v16si) __A, (__v4si)__B);
5861 }
5862 
5863 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5864 _mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
5865 {
5866  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5867  (__v16si)_mm512_srl_epi32(__A, __B),
5868  (__v16si)__W);
5869 }
5870 
5871 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5872 _mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B)
5873 {
5874  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5875  (__v16si)_mm512_srl_epi32(__A, __B),
5876  (__v16si)_mm512_setzero_si512());
5877 }
5878 
5879 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5880 _mm512_srl_epi64(__m512i __A, __m128i __B)
5881 {
5882  return (__m512i)__builtin_ia32_psrlq512((__v8di)__A, (__v2di)__B);
5883 }
5884 
5885 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5886 _mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
5887 {
5888  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5889  (__v8di)_mm512_srl_epi64(__A, __B),
5890  (__v8di)__W);
5891 }
5892 
5893 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5894 _mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B)
5895 {
5896  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5897  (__v8di)_mm512_srl_epi64(__A, __B),
5898  (__v8di)_mm512_setzero_si512());
5899 }
5900 
5901 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5902 _mm512_srlv_epi32(__m512i __X, __m512i __Y)
5903 {
5904  return (__m512i)__builtin_ia32_psrlv16si((__v16si)__X, (__v16si)__Y);
5905 }
5906 
5907 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5908 _mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
5909 {
5910  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5911  (__v16si)_mm512_srlv_epi32(__X, __Y),
5912  (__v16si)__W);
5913 }
5914 
5915 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5916 _mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
5917 {
5918  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5919  (__v16si)_mm512_srlv_epi32(__X, __Y),
5920  (__v16si)_mm512_setzero_si512());
5921 }
5922 
5923 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5924 _mm512_srlv_epi64 (__m512i __X, __m512i __Y)
5925 {
5926  return (__m512i)__builtin_ia32_psrlv8di((__v8di)__X, (__v8di)__Y);
5927 }
5928 
5929 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5930 _mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
5931 {
5932  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5933  (__v8di)_mm512_srlv_epi64(__X, __Y),
5934  (__v8di)__W);
5935 }
5936 
5937 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5938 _mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
5939 {
5940  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5941  (__v8di)_mm512_srlv_epi64(__X, __Y),
5942  (__v8di)_mm512_setzero_si512());
5943 }
5944 
5945 #define _mm512_ternarylogic_epi32(A, B, C, imm) \
5946  (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \
5947  (__v16si)(__m512i)(B), \
5948  (__v16si)(__m512i)(C), (int)(imm), \
5949  (__mmask16)-1)
5950 
5951 #define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm) \
5952  (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \
5953  (__v16si)(__m512i)(B), \
5954  (__v16si)(__m512i)(C), (int)(imm), \
5955  (__mmask16)(U))
5956 
5957 #define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm) \
5958  (__m512i)__builtin_ia32_pternlogd512_maskz((__v16si)(__m512i)(A), \
5959  (__v16si)(__m512i)(B), \
5960  (__v16si)(__m512i)(C), \
5961  (int)(imm), (__mmask16)(U))
5962 
5963 #define _mm512_ternarylogic_epi64(A, B, C, imm) \
5964  (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \
5965  (__v8di)(__m512i)(B), \
5966  (__v8di)(__m512i)(C), (int)(imm), \
5967  (__mmask8)-1)
5968 
5969 #define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm) \
5970  (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \
5971  (__v8di)(__m512i)(B), \
5972  (__v8di)(__m512i)(C), (int)(imm), \
5973  (__mmask8)(U))
5974 
5975 #define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) \
5976  (__m512i)__builtin_ia32_pternlogq512_maskz((__v8di)(__m512i)(A), \
5977  (__v8di)(__m512i)(B), \
5978  (__v8di)(__m512i)(C), (int)(imm), \
5979  (__mmask8)(U))
5980 
5981 #ifdef __x86_64__
5982 #define _mm_cvt_roundsd_i64(A, R) \
5983  (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R))
5984 #endif
5985 
5986 #define _mm_cvt_roundsd_si32(A, R) \
5987  (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R))
5988 
5989 #define _mm_cvt_roundsd_i32(A, R) \
5990  (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R))
5991 
5992 #define _mm_cvt_roundsd_u32(A, R) \
5993  (unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R))
5994 
5995 static __inline__ unsigned __DEFAULT_FN_ATTRS128
5996 _mm_cvtsd_u32 (__m128d __A)
5997 {
5998  return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
6000 }
6001 
6002 #ifdef __x86_64__
6003 #define _mm_cvt_roundsd_u64(A, R) \
6004  (unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \
6005  (int)(R))
6006 
6007 static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
6008 _mm_cvtsd_u64 (__m128d __A)
6009 {
6010  return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
6011  __A,
6013 }
6014 #endif
6015 
6016 #define _mm_cvt_roundss_si32(A, R) \
6017  (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R))
6018 
6019 #define _mm_cvt_roundss_i32(A, R) \
6020  (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R))
6021 
6022 #ifdef __x86_64__
6023 #define _mm_cvt_roundss_si64(A, R) \
6024  (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R))
6025 
6026 #define _mm_cvt_roundss_i64(A, R) \
6027  (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R))
6028 #endif
6029 
6030 #define _mm_cvt_roundss_u32(A, R) \
6031  (unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R))
6032 
6033 static __inline__ unsigned __DEFAULT_FN_ATTRS128
6034 _mm_cvtss_u32 (__m128 __A)
6035 {
6036  return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
6038 }
6039 
6040 #ifdef __x86_64__
6041 #define _mm_cvt_roundss_u64(A, R) \
6042  (unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \
6043  (int)(R))
6044 
6045 static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
6046 _mm_cvtss_u64 (__m128 __A)
6047 {
6048  return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
6049  __A,
6051 }
6052 #endif
6053 
6054 #define _mm_cvtt_roundsd_i32(A, R) \
6055  (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R))
6056 
6057 #define _mm_cvtt_roundsd_si32(A, R) \
6058  (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R))
6059 
6060 static __inline__ int __DEFAULT_FN_ATTRS128
6061 _mm_cvttsd_i32 (__m128d __A)
6062 {
6063  return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
6065 }
6066 
6067 #ifdef __x86_64__
6068 #define _mm_cvtt_roundsd_si64(A, R) \
6069  (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R))
6070 
6071 #define _mm_cvtt_roundsd_i64(A, R) \
6072  (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R))
6073 
6074 static __inline__ long long __DEFAULT_FN_ATTRS128
6075 _mm_cvttsd_i64 (__m128d __A)
6076 {
6077  return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
6079 }
6080 #endif
6081 
6082 #define _mm_cvtt_roundsd_u32(A, R) \
6083  (unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R))
6084 
6085 static __inline__ unsigned __DEFAULT_FN_ATTRS128
6086 _mm_cvttsd_u32 (__m128d __A)
6087 {
6088  return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
6090 }
6091 
6092 #ifdef __x86_64__
6093 #define _mm_cvtt_roundsd_u64(A, R) \
6094  (unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \
6095  (int)(R))
6096 
6097 static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
6098 _mm_cvttsd_u64 (__m128d __A)
6099 {
6100  return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
6101  __A,
6103 }
6104 #endif
6105 
6106 #define _mm_cvtt_roundss_i32(A, R) \
6107  (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R))
6108 
6109 #define _mm_cvtt_roundss_si32(A, R) \
6110  (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R))
6111 
6112 static __inline__ int __DEFAULT_FN_ATTRS128
6113 _mm_cvttss_i32 (__m128 __A)
6114 {
6115  return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
6117 }
6118 
6119 #ifdef __x86_64__
6120 #define _mm_cvtt_roundss_i64(A, R) \
6121  (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R))
6122 
6123 #define _mm_cvtt_roundss_si64(A, R) \
6124  (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R))
6125 
6126 static __inline__ long long __DEFAULT_FN_ATTRS128
6127 _mm_cvttss_i64 (__m128 __A)
6128 {
6129  return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
6131 }
6132 #endif
6133 
6134 #define _mm_cvtt_roundss_u32(A, R) \
6135  (unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R))
6136 
6137 static __inline__ unsigned __DEFAULT_FN_ATTRS128
6138 _mm_cvttss_u32 (__m128 __A)
6139 {
6140  return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
6142 }
6143 
6144 #ifdef __x86_64__
6145 #define _mm_cvtt_roundss_u64(A, R) \
6146  (unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \
6147  (int)(R))
6148 
6149 static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
6150 _mm_cvttss_u64 (__m128 __A)
6151 {
6152  return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
6153  __A,
6155 }
6156 #endif
6157 
6158 #define _mm512_permute_pd(X, C) \
6159  (__m512d)__builtin_ia32_vpermilpd512((__v8df)(__m512d)(X), (int)(C))
6160 
6161 #define _mm512_mask_permute_pd(W, U, X, C) \
6162  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6163  (__v8df)_mm512_permute_pd((X), (C)), \
6164  (__v8df)(__m512d)(W))
6165 
6166 #define _mm512_maskz_permute_pd(U, X, C) \
6167  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6168  (__v8df)_mm512_permute_pd((X), (C)), \
6169  (__v8df)_mm512_setzero_pd())
6170 
6171 #define _mm512_permute_ps(X, C) \
6172  (__m512)__builtin_ia32_vpermilps512((__v16sf)(__m512)(X), (int)(C))
6173 
6174 #define _mm512_mask_permute_ps(W, U, X, C) \
6175  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6176  (__v16sf)_mm512_permute_ps((X), (C)), \
6177  (__v16sf)(__m512)(W))
6178 
6179 #define _mm512_maskz_permute_ps(U, X, C) \
6180  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6181  (__v16sf)_mm512_permute_ps((X), (C)), \
6182  (__v16sf)_mm512_setzero_ps())
6183 
6184 static __inline__ __m512d __DEFAULT_FN_ATTRS512
6185 _mm512_permutevar_pd(__m512d __A, __m512i __C)
6186 {
6187  return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C);
6188 }
6189 
6190 static __inline__ __m512d __DEFAULT_FN_ATTRS512
6191 _mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
6192 {
6193  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
6194  (__v8df)_mm512_permutevar_pd(__A, __C),
6195  (__v8df)__W);
6196 }
6197 
6198 static __inline__ __m512d __DEFAULT_FN_ATTRS512
6199 _mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
6200 {
6201  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
6202  (__v8df)_mm512_permutevar_pd(__A, __C),
6203  (__v8df)_mm512_setzero_pd());
6204 }
6205 
6206 static __inline__ __m512 __DEFAULT_FN_ATTRS512
6207 _mm512_permutevar_ps(__m512 __A, __m512i __C)
6208 {
6209  return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C);
6210 }
6211 
6212 static __inline__ __m512 __DEFAULT_FN_ATTRS512
6213 _mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
6214 {
6215  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
6216  (__v16sf)_mm512_permutevar_ps(__A, __C),
6217  (__v16sf)__W);
6218 }
6219 
6220 static __inline__ __m512 __DEFAULT_FN_ATTRS512
6221 _mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
6222 {
6223  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
6224  (__v16sf)_mm512_permutevar_ps(__A, __C),
6225  (__v16sf)_mm512_setzero_ps());
6226 }
6227 
6228 static __inline __m512d __DEFAULT_FN_ATTRS512
6229 _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
6230 {
6231  return (__m512d)__builtin_ia32_vpermi2varpd512((__v8df)__A, (__v8di)__I,
6232  (__v8df)__B);
6233 }
6234 
6235 static __inline__ __m512d __DEFAULT_FN_ATTRS512
6236 _mm512_mask_permutex2var_pd(__m512d __A, __mmask8 __U, __m512i __I, __m512d __B)
6237 {
6238  return (__m512d)__builtin_ia32_selectpd_512(__U,
6239  (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
6240  (__v8df)__A);
6241 }
6242 
6243 static __inline__ __m512d __DEFAULT_FN_ATTRS512
6244 _mm512_mask2_permutex2var_pd(__m512d __A, __m512i __I, __mmask8 __U,
6245  __m512d __B)
6246 {
6247  return (__m512d)__builtin_ia32_selectpd_512(__U,
6248  (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
6249  (__v8df)(__m512d)__I);
6250 }
6251 
6252 static __inline__ __m512d __DEFAULT_FN_ATTRS512
6253 _mm512_maskz_permutex2var_pd(__mmask8 __U, __m512d __A, __m512i __I,
6254  __m512d __B)
6255 {
6256  return (__m512d)__builtin_ia32_selectpd_512(__U,
6257  (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
6258  (__v8df)_mm512_setzero_pd());
6259 }
6260 
6261 static __inline __m512 __DEFAULT_FN_ATTRS512
6262 _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
6263 {
6264  return (__m512)__builtin_ia32_vpermi2varps512((__v16sf)__A, (__v16si)__I,
6265  (__v16sf) __B);
6266 }
6267 
6268 static __inline__ __m512 __DEFAULT_FN_ATTRS512
6269 _mm512_mask_permutex2var_ps(__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
6270 {
6271  return (__m512)__builtin_ia32_selectps_512(__U,
6272  (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
6273  (__v16sf)__A);
6274 }
6275 
6276 static __inline__ __m512 __DEFAULT_FN_ATTRS512
6277 _mm512_mask2_permutex2var_ps(__m512 __A, __m512i __I, __mmask16 __U, __m512 __B)
6278 {
6279  return (__m512)__builtin_ia32_selectps_512(__U,
6280  (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
6281  (__v16sf)(__m512)__I);
6282 }
6283 
6284 static __inline__ __m512 __DEFAULT_FN_ATTRS512
6285 _mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I, __m512 __B)
6286 {
6287  return (__m512)__builtin_ia32_selectps_512(__U,
6288  (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
6289  (__v16sf)_mm512_setzero_ps());
6290 }
6291 
6292 
6293 #define _mm512_cvtt_roundpd_epu32(A, R) \
6294  (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6295  (__v8si)_mm256_undefined_si256(), \
6296  (__mmask8)-1, (int)(R))
6297 
6298 #define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) \
6299  (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6300  (__v8si)(__m256i)(W), \
6301  (__mmask8)(U), (int)(R))
6302 
6303 #define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) \
6304  (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6305  (__v8si)_mm256_setzero_si256(), \
6306  (__mmask8)(U), (int)(R))
6307 
6308 static __inline__ __m256i __DEFAULT_FN_ATTRS512
6309 _mm512_cvttpd_epu32 (__m512d __A)
6310 {
6311  return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6312  (__v8si)
6314  (__mmask8) -1,
6316 }
6317 
6318 static __inline__ __m256i __DEFAULT_FN_ATTRS512
6319 _mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
6320 {
6321  return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6322  (__v8si) __W,
6323  (__mmask8) __U,
6325 }
6326 
6327 static __inline__ __m256i __DEFAULT_FN_ATTRS512
6328 _mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
6329 {
6330  return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6331  (__v8si)
6333  (__mmask8) __U,
6335 }
6336 
6337 #define _mm_roundscale_round_sd(A, B, imm, R) \
6338  (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6339  (__v2df)(__m128d)(B), \
6340  (__v2df)_mm_setzero_pd(), \
6341  (__mmask8)-1, (int)(imm), \
6342  (int)(R))
6343 
6344 #define _mm_roundscale_sd(A, B, imm) \
6345  (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6346  (__v2df)(__m128d)(B), \
6347  (__v2df)_mm_setzero_pd(), \
6348  (__mmask8)-1, (int)(imm), \
6349  _MM_FROUND_CUR_DIRECTION)
6350 
6351 #define _mm_mask_roundscale_sd(W, U, A, B, imm) \
6352  (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6353  (__v2df)(__m128d)(B), \
6354  (__v2df)(__m128d)(W), \
6355  (__mmask8)(U), (int)(imm), \
6356  _MM_FROUND_CUR_DIRECTION)
6357 
6358 #define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) \
6359  (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6360  (__v2df)(__m128d)(B), \
6361  (__v2df)(__m128d)(W), \
6362  (__mmask8)(U), (int)(I), \
6363  (int)(R))
6364 
6365 #define _mm_maskz_roundscale_sd(U, A, B, I) \
6366  (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6367  (__v2df)(__m128d)(B), \
6368  (__v2df)_mm_setzero_pd(), \
6369  (__mmask8)(U), (int)(I), \
6370  _MM_FROUND_CUR_DIRECTION)
6371 
6372 #define _mm_maskz_roundscale_round_sd(U, A, B, I, R) \
6373  (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6374  (__v2df)(__m128d)(B), \
6375  (__v2df)_mm_setzero_pd(), \
6376  (__mmask8)(U), (int)(I), \
6377  (int)(R))
6378 
6379 #define _mm_roundscale_round_ss(A, B, imm, R) \
6380  (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6381  (__v4sf)(__m128)(B), \
6382  (__v4sf)_mm_setzero_ps(), \
6383  (__mmask8)-1, (int)(imm), \
6384  (int)(R))
6385 
6386 #define _mm_roundscale_ss(A, B, imm) \
6387  (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6388  (__v4sf)(__m128)(B), \
6389  (__v4sf)_mm_setzero_ps(), \
6390  (__mmask8)-1, (int)(imm), \
6391  _MM_FROUND_CUR_DIRECTION)
6392 
6393 #define _mm_mask_roundscale_ss(W, U, A, B, I) \
6394  (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6395  (__v4sf)(__m128)(B), \
6396  (__v4sf)(__m128)(W), \
6397  (__mmask8)(U), (int)(I), \
6398  _MM_FROUND_CUR_DIRECTION)
6399 
6400 #define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) \
6401  (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6402  (__v4sf)(__m128)(B), \
6403  (__v4sf)(__m128)(W), \
6404  (__mmask8)(U), (int)(I), \
6405  (int)(R))
6406 
6407 #define _mm_maskz_roundscale_ss(U, A, B, I) \
6408  (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6409  (__v4sf)(__m128)(B), \
6410  (__v4sf)_mm_setzero_ps(), \
6411  (__mmask8)(U), (int)(I), \
6412  _MM_FROUND_CUR_DIRECTION)
6413 
6414 #define _mm_maskz_roundscale_round_ss(U, A, B, I, R) \
6415  (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6416  (__v4sf)(__m128)(B), \
6417  (__v4sf)_mm_setzero_ps(), \
6418  (__mmask8)(U), (int)(I), \
6419  (int)(R))
6420 
6421 #define _mm512_scalef_round_pd(A, B, R) \
6422  (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6423  (__v8df)(__m512d)(B), \
6424  (__v8df)_mm512_undefined_pd(), \
6425  (__mmask8)-1, (int)(R))
6426 
6427 #define _mm512_mask_scalef_round_pd(W, U, A, B, R) \
6428  (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6429  (__v8df)(__m512d)(B), \
6430  (__v8df)(__m512d)(W), \
6431  (__mmask8)(U), (int)(R))
6432 
6433 #define _mm512_maskz_scalef_round_pd(U, A, B, R) \
6434  (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6435  (__v8df)(__m512d)(B), \
6436  (__v8df)_mm512_setzero_pd(), \
6437  (__mmask8)(U), (int)(R))
6438 
6439 static __inline__ __m512d __DEFAULT_FN_ATTRS512
6440 _mm512_scalef_pd (__m512d __A, __m512d __B)
6441 {
6442  return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6443  (__v8df) __B,
6444  (__v8df)
6446  (__mmask8) -1,
6448 }
6449 
6450 static __inline__ __m512d __DEFAULT_FN_ATTRS512
6451 _mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
6452 {
6453  return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6454  (__v8df) __B,
6455  (__v8df) __W,
6456  (__mmask8) __U,
6458 }
6459 
6460 static __inline__ __m512d __DEFAULT_FN_ATTRS512
6461 _mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
6462 {
6463  return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6464  (__v8df) __B,
6465  (__v8df)
6466  _mm512_setzero_pd (),
6467  (__mmask8) __U,
6469 }
6470 
6471 #define _mm512_scalef_round_ps(A, B, R) \
6472  (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6473  (__v16sf)(__m512)(B), \
6474  (__v16sf)_mm512_undefined_ps(), \
6475  (__mmask16)-1, (int)(R))
6476 
6477 #define _mm512_mask_scalef_round_ps(W, U, A, B, R) \
6478  (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6479  (__v16sf)(__m512)(B), \
6480  (__v16sf)(__m512)(W), \
6481  (__mmask16)(U), (int)(R))
6482 
6483 #define _mm512_maskz_scalef_round_ps(U, A, B, R) \
6484  (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6485  (__v16sf)(__m512)(B), \
6486  (__v16sf)_mm512_setzero_ps(), \
6487  (__mmask16)(U), (int)(R))
6488 
6489 static __inline__ __m512 __DEFAULT_FN_ATTRS512
6490 _mm512_scalef_ps (__m512 __A, __m512 __B)
6491 {
6492  return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6493  (__v16sf) __B,
6494  (__v16sf)
6496  (__mmask16) -1,
6498 }
6499 
6500 static __inline__ __m512 __DEFAULT_FN_ATTRS512
6501 _mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
6502 {
6503  return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6504  (__v16sf) __B,
6505  (__v16sf) __W,
6506  (__mmask16) __U,
6508 }
6509 
6510 static __inline__ __m512 __DEFAULT_FN_ATTRS512
6511 _mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
6512 {
6513  return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6514  (__v16sf) __B,
6515  (__v16sf)
6516  _mm512_setzero_ps (),
6517  (__mmask16) __U,
6519 }
6520 
6521 #define _mm_scalef_round_sd(A, B, R) \
6522  (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6523  (__v2df)(__m128d)(B), \
6524  (__v2df)_mm_setzero_pd(), \
6525  (__mmask8)-1, (int)(R))
6526 
6527 static __inline__ __m128d __DEFAULT_FN_ATTRS128
6528 _mm_scalef_sd (__m128d __A, __m128d __B)
6529 {
6530  return (__m128d) __builtin_ia32_scalefsd_round_mask ((__v2df) __A,
6531  (__v2df)( __B), (__v2df) _mm_setzero_pd(),
6532  (__mmask8) -1,
6534 }
6535 
6536 static __inline__ __m128d __DEFAULT_FN_ATTRS128
6537 _mm_mask_scalef_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
6538 {
6539  return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6540  (__v2df) __B,
6541  (__v2df) __W,
6542  (__mmask8) __U,
6544 }
6545 
6546 #define _mm_mask_scalef_round_sd(W, U, A, B, R) \
6547  (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6548  (__v2df)(__m128d)(B), \
6549  (__v2df)(__m128d)(W), \
6550  (__mmask8)(U), (int)(R))
6551 
6552 static __inline__ __m128d __DEFAULT_FN_ATTRS128
6553 _mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B)
6554 {
6555  return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6556  (__v2df) __B,
6557  (__v2df) _mm_setzero_pd (),
6558  (__mmask8) __U,
6560 }
6561 
6562 #define _mm_maskz_scalef_round_sd(U, A, B, R) \
6563  (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6564  (__v2df)(__m128d)(B), \
6565  (__v2df)_mm_setzero_pd(), \
6566  (__mmask8)(U), (int)(R))
6567 
6568 #define _mm_scalef_round_ss(A, B, R) \
6569  (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6570  (__v4sf)(__m128)(B), \
6571  (__v4sf)_mm_setzero_ps(), \
6572  (__mmask8)-1, (int)(R))
6573 
6574 static __inline__ __m128 __DEFAULT_FN_ATTRS128
6575 _mm_scalef_ss (__m128 __A, __m128 __B)
6576 {
6577  return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A,
6578  (__v4sf)( __B), (__v4sf) _mm_setzero_ps(),
6579  (__mmask8) -1,
6581 }
6582 
6583 static __inline__ __m128 __DEFAULT_FN_ATTRS128
6584 _mm_mask_scalef_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
6585 {
6586  return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6587  (__v4sf) __B,
6588  (__v4sf) __W,
6589  (__mmask8) __U,
6591 }
6592 
6593 #define _mm_mask_scalef_round_ss(W, U, A, B, R) \
6594  (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6595  (__v4sf)(__m128)(B), \
6596  (__v4sf)(__m128)(W), \
6597  (__mmask8)(U), (int)(R))
6598 
6599 static __inline__ __m128 __DEFAULT_FN_ATTRS128
6600 _mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B)
6601 {
6602  return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6603  (__v4sf) __B,
6604  (__v4sf) _mm_setzero_ps (),
6605  (__mmask8) __U,
6607 }
6608 
6609 #define _mm_maskz_scalef_round_ss(U, A, B, R) \
6610  (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6611  (__v4sf)(__m128)(B), \
6612  (__v4sf)_mm_setzero_ps(), \
6613  (__mmask8)(U), \
6614  (int)(R))
6615 
6616 static __inline__ __m512i __DEFAULT_FN_ATTRS512
6617 _mm512_srai_epi32(__m512i __A, int __B)
6618 {
6619  return (__m512i)__builtin_ia32_psradi512((__v16si)__A, __B);
6620 }
6621 
6622 static __inline__ __m512i __DEFAULT_FN_ATTRS512
6623 _mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
6624 {
6625  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6626  (__v16si)_mm512_srai_epi32(__A, __B),
6627  (__v16si)__W);
6628 }
6629 
6630 static __inline__ __m512i __DEFAULT_FN_ATTRS512
6631 _mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, int __B) {
6632  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6633  (__v16si)_mm512_srai_epi32(__A, __B),
6634  (__v16si)_mm512_setzero_si512());
6635 }
6636 
6637 static __inline__ __m512i __DEFAULT_FN_ATTRS512
6638 _mm512_srai_epi64(__m512i __A, int __B)
6639 {
6640  return (__m512i)__builtin_ia32_psraqi512((__v8di)__A, __B);
6641 }
6642 
6643 static __inline__ __m512i __DEFAULT_FN_ATTRS512
6644 _mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
6645 {
6646  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6647  (__v8di)_mm512_srai_epi64(__A, __B),
6648  (__v8di)__W);
6649 }
6650 
6651 static __inline__ __m512i __DEFAULT_FN_ATTRS512
6652 _mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, int __B)
6653 {
6654  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6655  (__v8di)_mm512_srai_epi64(__A, __B),
6656  (__v8di)_mm512_setzero_si512());
6657 }
6658 
6659 #define _mm512_shuffle_f32x4(A, B, imm) \
6660  (__m512)__builtin_ia32_shuf_f32x4((__v16sf)(__m512)(A), \
6661  (__v16sf)(__m512)(B), (int)(imm))
6662 
6663 #define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) \
6664  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6665  (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
6666  (__v16sf)(__m512)(W))
6667 
6668 #define _mm512_maskz_shuffle_f32x4(U, A, B, imm) \
6669  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6670  (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
6671  (__v16sf)_mm512_setzero_ps())
6672 
6673 #define _mm512_shuffle_f64x2(A, B, imm) \
6674  (__m512d)__builtin_ia32_shuf_f64x2((__v8df)(__m512d)(A), \
6675  (__v8df)(__m512d)(B), (int)(imm))
6676 
6677 #define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) \
6678  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6679  (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
6680  (__v8df)(__m512d)(W))
6681 
6682 #define _mm512_maskz_shuffle_f64x2(U, A, B, imm) \
6683  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6684  (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
6685  (__v8df)_mm512_setzero_pd())
6686 
6687 #define _mm512_shuffle_i32x4(A, B, imm) \
6688  (__m512i)__builtin_ia32_shuf_i32x4((__v16si)(__m512i)(A), \
6689  (__v16si)(__m512i)(B), (int)(imm))
6690 
6691 #define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) \
6692  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
6693  (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
6694  (__v16si)(__m512i)(W))
6695 
6696 #define _mm512_maskz_shuffle_i32x4(U, A, B, imm) \
6697  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
6698  (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
6699  (__v16si)_mm512_setzero_si512())
6700 
6701 #define _mm512_shuffle_i64x2(A, B, imm) \
6702  (__m512i)__builtin_ia32_shuf_i64x2((__v8di)(__m512i)(A), \
6703  (__v8di)(__m512i)(B), (int)(imm))
6704 
6705 #define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) \
6706  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
6707  (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
6708  (__v8di)(__m512i)(W))
6709 
6710 #define _mm512_maskz_shuffle_i64x2(U, A, B, imm) \
6711  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
6712  (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
6713  (__v8di)_mm512_setzero_si512())
6714 
6715 #define _mm512_shuffle_pd(A, B, M) \
6716  (__m512d)__builtin_ia32_shufpd512((__v8df)(__m512d)(A), \
6717  (__v8df)(__m512d)(B), (int)(M))
6718 
6719 #define _mm512_mask_shuffle_pd(W, U, A, B, M) \
6720  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6721  (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
6722  (__v8df)(__m512d)(W))
6723 
6724 #define _mm512_maskz_shuffle_pd(U, A, B, M) \
6725  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6726  (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
6727  (__v8df)_mm512_setzero_pd())
6728 
6729 #define _mm512_shuffle_ps(A, B, M) \
6730  (__m512)__builtin_ia32_shufps512((__v16sf)(__m512)(A), \
6731  (__v16sf)(__m512)(B), (int)(M))
6732 
6733 #define _mm512_mask_shuffle_ps(W, U, A, B, M) \
6734  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6735  (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
6736  (__v16sf)(__m512)(W))
6737 
6738 #define _mm512_maskz_shuffle_ps(U, A, B, M) \
6739  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6740  (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
6741  (__v16sf)_mm512_setzero_ps())
6742 
6743 #define _mm_sqrt_round_sd(A, B, R) \
6744  (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6745  (__v2df)(__m128d)(B), \
6746  (__v2df)_mm_setzero_pd(), \
6747  (__mmask8)-1, (int)(R))
6748 
6749 static __inline__ __m128d __DEFAULT_FN_ATTRS128
6750 _mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
6751 {
6752  return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
6753  (__v2df) __B,
6754  (__v2df) __W,
6755  (__mmask8) __U,
6757 }
6758 
6759 #define _mm_mask_sqrt_round_sd(W, U, A, B, R) \
6760  (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6761  (__v2df)(__m128d)(B), \
6762  (__v2df)(__m128d)(W), \
6763  (__mmask8)(U), (int)(R))
6764 
6765 static __inline__ __m128d __DEFAULT_FN_ATTRS128
6766 _mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B)
6767 {
6768  return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
6769  (__v2df) __B,
6770  (__v2df) _mm_setzero_pd (),
6771  (__mmask8) __U,
6773 }
6774 
6775 #define _mm_maskz_sqrt_round_sd(U, A, B, R) \
6776  (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6777  (__v2df)(__m128d)(B), \
6778  (__v2df)_mm_setzero_pd(), \
6779  (__mmask8)(U), (int)(R))
6780 
6781 #define _mm_sqrt_round_ss(A, B, R) \
6782  (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6783  (__v4sf)(__m128)(B), \
6784  (__v4sf)_mm_setzero_ps(), \
6785  (__mmask8)-1, (int)(R))
6786 
6787 static __inline__ __m128 __DEFAULT_FN_ATTRS128
6788 _mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
6789 {
6790  return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
6791  (__v4sf) __B,
6792  (__v4sf) __W,
6793  (__mmask8) __U,
6795 }
6796 
6797 #define _mm_mask_sqrt_round_ss(W, U, A, B, R) \
6798  (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6799  (__v4sf)(__m128)(B), \
6800  (__v4sf)(__m128)(W), (__mmask8)(U), \
6801  (int)(R))
6802 
6803 static __inline__ __m128 __DEFAULT_FN_ATTRS128
6804 _mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B)
6805 {
6806  return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
6807  (__v4sf) __B,
6808  (__v4sf) _mm_setzero_ps (),
6809  (__mmask8) __U,
6811 }
6812 
6813 #define _mm_maskz_sqrt_round_ss(U, A, B, R) \
6814  (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6815  (__v4sf)(__m128)(B), \
6816  (__v4sf)_mm_setzero_ps(), \
6817  (__mmask8)(U), (int)(R))
6818 
6819 static __inline__ __m512 __DEFAULT_FN_ATTRS512
6821 {
6822  return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
6823  0, 1, 2, 3, 0, 1, 2, 3,
6824  0, 1, 2, 3, 0, 1, 2, 3);
6825 }
6826 
6827 static __inline__ __m512 __DEFAULT_FN_ATTRS512
6828 _mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A)
6829 {
6830  return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
6831  (__v16sf)_mm512_broadcast_f32x4(__A),
6832  (__v16sf)__O);
6833 }
6834 
6835 static __inline__ __m512 __DEFAULT_FN_ATTRS512
6836 _mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A)
6837 {
6838  return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
6839  (__v16sf)_mm512_broadcast_f32x4(__A),
6840  (__v16sf)_mm512_setzero_ps());
6841 }
6842 
6843 static __inline__ __m512d __DEFAULT_FN_ATTRS512
6845 {
6846  return (__m512d)__builtin_shufflevector((__v4df)__A, (__v4df)__A,
6847  0, 1, 2, 3, 0, 1, 2, 3);
6848 }
6849 
6850 static __inline__ __m512d __DEFAULT_FN_ATTRS512
6851 _mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A)
6852 {
6853  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
6854  (__v8df)_mm512_broadcast_f64x4(__A),
6855  (__v8df)__O);
6856 }
6857 
6858 static __inline__ __m512d __DEFAULT_FN_ATTRS512
6859 _mm512_maskz_broadcast_f64x4(__mmask8 __M, __m256d __A)
6860 {
6861  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
6862  (__v8df)_mm512_broadcast_f64x4(__A),
6863  (__v8df)_mm512_setzero_pd());
6864 }
6865 
6866 static __inline__ __m512i __DEFAULT_FN_ATTRS512
6868 {
6869  return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
6870  0, 1, 2, 3, 0, 1, 2, 3,
6871  0, 1, 2, 3, 0, 1, 2, 3);
6872 }
6873 
6874 static __inline__ __m512i __DEFAULT_FN_ATTRS512
6875 _mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A)
6876 {
6877  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
6878  (__v16si)_mm512_broadcast_i32x4(__A),
6879  (__v16si)__O);
6880 }
6881 
6882 static __inline__ __m512i __DEFAULT_FN_ATTRS512
6883 _mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A)
6884 {
6885  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
6886  (__v16si)_mm512_broadcast_i32x4(__A),
6887  (__v16si)_mm512_setzero_si512());
6888 }
6889 
6890 static __inline__ __m512i __DEFAULT_FN_ATTRS512
6892 {
6893  return (__m512i)__builtin_shufflevector((__v4di)__A, (__v4di)__A,
6894  0, 1, 2, 3, 0, 1, 2, 3);
6895 }
6896 
6897 static __inline__ __m512i __DEFAULT_FN_ATTRS512
6898 _mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A)
6899 {
6900  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
6901  (__v8di)_mm512_broadcast_i64x4(__A),
6902  (__v8di)__O);
6903 }
6904 
6905 static __inline__ __m512i __DEFAULT_FN_ATTRS512
6906 _mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A)
6907 {
6908  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
6909  (__v8di)_mm512_broadcast_i64x4(__A),
6910  (__v8di)_mm512_setzero_si512());
6911 }
6912 
6913 static __inline__ __m512d __DEFAULT_FN_ATTRS512
6914 _mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
6915 {
6916  return (__m512d)__builtin_ia32_selectpd_512(__M,
6917  (__v8df) _mm512_broadcastsd_pd(__A),
6918  (__v8df) __O);
6919 }
6920 
6921 static __inline__ __m512d __DEFAULT_FN_ATTRS512
6922 _mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
6923 {
6924  return (__m512d)__builtin_ia32_selectpd_512(__M,
6925  (__v8df) _mm512_broadcastsd_pd(__A),
6926  (__v8df) _mm512_setzero_pd());
6927 }
6928 
6929 static __inline__ __m512 __DEFAULT_FN_ATTRS512
6930 _mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
6931 {
6932  return (__m512)__builtin_ia32_selectps_512(__M,
6933  (__v16sf) _mm512_broadcastss_ps(__A),
6934  (__v16sf) __O);
6935 }
6936 
6937 static __inline__ __m512 __DEFAULT_FN_ATTRS512
6938 _mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
6939 {
6940  return (__m512)__builtin_ia32_selectps_512(__M,
6941  (__v16sf) _mm512_broadcastss_ps(__A),
6942  (__v16sf) _mm512_setzero_ps());
6943 }
6944 
6945 static __inline__ __m128i __DEFAULT_FN_ATTRS512
6947 {
6948  return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6949  (__v16qi) _mm_undefined_si128 (),
6950  (__mmask16) -1);
6951 }
6952 
6953 static __inline__ __m128i __DEFAULT_FN_ATTRS512
6954 _mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
6955 {
6956  return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6957  (__v16qi) __O, __M);
6958 }
6959 
6960 static __inline__ __m128i __DEFAULT_FN_ATTRS512
6961 _mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
6962 {
6963  return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6964  (__v16qi) _mm_setzero_si128 (),
6965  __M);
6966 }
6967 
6968 static __inline__ void __DEFAULT_FN_ATTRS512
6969 _mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
6970 {
6971  __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
6972 }
6973 
6974 static __inline__ __m256i __DEFAULT_FN_ATTRS512
6976 {
6977  return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6978  (__v16hi) _mm256_undefined_si256 (),
6979  (__mmask16) -1);
6980 }
6981 
6982 static __inline__ __m256i __DEFAULT_FN_ATTRS512
6983 _mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
6984 {
6985  return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6986  (__v16hi) __O, __M);
6987 }
6988 
6989 static __inline__ __m256i __DEFAULT_FN_ATTRS512
6990 _mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
6991 {
6992  return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6993  (__v16hi) _mm256_setzero_si256 (),
6994  __M);
6995 }
6996 
6997 static __inline__ void __DEFAULT_FN_ATTRS512
6998 _mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
6999 {
7000  __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
7001 }
7002 
7003 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7005 {
7006  return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7007  (__v16qi) _mm_undefined_si128 (),
7008  (__mmask8) -1);
7009 }
7010 
7011 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7012 _mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7013 {
7014  return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7015  (__v16qi) __O, __M);
7016 }
7017 
7018 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7019 _mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
7020 {
7021  return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7022  (__v16qi) _mm_setzero_si128 (),
7023  __M);
7024 }
7025 
7026 static __inline__ void __DEFAULT_FN_ATTRS512
7027 _mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
7028 {
7029  __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7030 }
7031 
7032 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7034 {
7035  return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7036  (__v8si) _mm256_undefined_si256 (),
7037  (__mmask8) -1);
7038 }
7039 
7040 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7041 _mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7042 {
7043  return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7044  (__v8si) __O, __M);
7045 }
7046 
7047 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7048 _mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
7049 {
7050  return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7051  (__v8si) _mm256_setzero_si256 (),
7052  __M);
7053 }
7054 
7055 static __inline__ void __DEFAULT_FN_ATTRS512
7056 _mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
7057 {
7058  __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
7059 }
7060 
7061 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7063 {
7064  return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7065  (__v8hi) _mm_undefined_si128 (),
7066  (__mmask8) -1);
7067 }
7068 
7069 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7070 _mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7071 {
7072  return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7073  (__v8hi) __O, __M);
7074 }
7075 
7076 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7077 _mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
7078 {
7079  return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7080  (__v8hi) _mm_setzero_si128 (),
7081  __M);
7082 }
7083 
7084 static __inline__ void __DEFAULT_FN_ATTRS512
7085 _mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
7086 {
7087  __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
7088 }
7089 
7090 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7092 {
7093  return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7094  (__v16qi) _mm_undefined_si128 (),
7095  (__mmask16) -1);
7096 }
7097 
7098 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7099 _mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
7100 {
7101  return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7102  (__v16qi) __O,
7103  __M);
7104 }
7105 
7106 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7107 _mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
7108 {
7109  return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7110  (__v16qi) _mm_setzero_si128 (),
7111  __M);
7112 }
7113 
7114 static __inline__ void __DEFAULT_FN_ATTRS512
7115 _mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
7116 {
7117  __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7118 }
7119 
7120 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7122 {
7123  return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7124  (__v16hi) _mm256_undefined_si256 (),
7125  (__mmask16) -1);
7126 }
7127 
7128 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7129 _mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
7130 {
7131  return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7132  (__v16hi) __O,
7133  __M);
7134 }
7135 
7136 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7137 _mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
7138 {
7139  return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7140  (__v16hi) _mm256_setzero_si256 (),
7141  __M);
7142 }
7143 
7144 static __inline__ void __DEFAULT_FN_ATTRS512
7145 _mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
7146 {
7147  __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
7148 }
7149 
7150 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7152 {
7153  return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7154  (__v16qi) _mm_undefined_si128 (),
7155  (__mmask8) -1);
7156 }
7157 
7158 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7159 _mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7160 {
7161  return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7162  (__v16qi) __O,
7163  __M);
7164 }
7165 
7166 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7167 _mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
7168 {
7169  return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7170  (__v16qi) _mm_setzero_si128 (),
7171  __M);
7172 }
7173 
7174 static __inline__ void __DEFAULT_FN_ATTRS512
7175 _mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
7176 {
7177  __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7178 }
7179 
7180 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7182 {
7183  return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7184  (__v8si) _mm256_undefined_si256 (),
7185  (__mmask8) -1);
7186 }
7187 
7188 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7189 _mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7190 {
7191  return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7192  (__v8si) __O, __M);
7193 }
7194 
7195 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7196 _mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
7197 {
7198  return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7199  (__v8si) _mm256_setzero_si256 (),
7200  __M);
7201 }
7202 
7203 static __inline__ void __DEFAULT_FN_ATTRS512
7204 _mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
7205 {
7206  __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
7207 }
7208 
7209 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7211 {
7212  return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7213  (__v8hi) _mm_undefined_si128 (),
7214  (__mmask8) -1);
7215 }
7216 
7217 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7218 _mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7219 {
7220  return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7221  (__v8hi) __O, __M);
7222 }
7223 
7224 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7225 _mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
7226 {
7227  return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7228  (__v8hi) _mm_setzero_si128 (),
7229  __M);
7230 }
7231 
7232 static __inline__ void __DEFAULT_FN_ATTRS512
7233 _mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
7234 {
7235  __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
7236 }
7237 
7238 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7240 {
7241  return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7242  (__v16qi) _mm_undefined_si128 (),
7243  (__mmask16) -1);
7244 }
7245 
7246 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7247 _mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
7248 {
7249  return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7250  (__v16qi) __O, __M);
7251 }
7252 
7253 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7254 _mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
7255 {
7256  return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7257  (__v16qi) _mm_setzero_si128 (),
7258  __M);
7259 }
7260 
7261 static __inline__ void __DEFAULT_FN_ATTRS512
7262 _mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
7263 {
7264  __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7265 }
7266 
7267 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7269 {
7270  return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7271  (__v16hi) _mm256_undefined_si256 (),
7272  (__mmask16) -1);
7273 }
7274 
7275 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7276 _mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
7277 {
7278  return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7279  (__v16hi) __O, __M);
7280 }
7281 
7282 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7283 _mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
7284 {
7285  return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7286  (__v16hi) _mm256_setzero_si256 (),
7287  __M);
7288 }
7289 
7290 static __inline__ void __DEFAULT_FN_ATTRS512
7291 _mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
7292 {
7293  __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
7294 }
7295 
7296 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7298 {
7299  return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7300  (__v16qi) _mm_undefined_si128 (),
7301  (__mmask8) -1);
7302 }
7303 
7304 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7305 _mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7306 {
7307  return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7308  (__v16qi) __O, __M);
7309 }
7310 
7311 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7312 _mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
7313 {
7314  return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7315  (__v16qi) _mm_setzero_si128 (),
7316  __M);
7317 }
7318 
7319 static __inline__ void __DEFAULT_FN_ATTRS512
7320 _mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
7321 {
7322  __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7323 }
7324 
7325 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7327 {
7328  return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7329  (__v8si) _mm256_undefined_si256 (),
7330  (__mmask8) -1);
7331 }
7332 
7333 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7334 _mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7335 {
7336  return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7337  (__v8si) __O, __M);
7338 }
7339 
7340 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7341 _mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
7342 {
7343  return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7344  (__v8si) _mm256_setzero_si256 (),
7345  __M);
7346 }
7347 
7348 static __inline__ void __DEFAULT_FN_ATTRS512
7349 _mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
7350 {
7351  __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
7352 }
7353 
7354 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7356 {
7357  return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7358  (__v8hi) _mm_undefined_si128 (),
7359  (__mmask8) -1);
7360 }
7361 
7362 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7363 _mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7364 {
7365  return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7366  (__v8hi) __O, __M);
7367 }
7368 
7369 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7370 _mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
7371 {
7372  return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7373  (__v8hi) _mm_setzero_si128 (),
7374  __M);
7375 }
7376 
7377 static __inline__ void __DEFAULT_FN_ATTRS512
7378 _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
7379 {
7380  __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
7381 }
7382 
7383 #define _mm512_extracti32x4_epi32(A, imm) \
7384  (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7385  (__v4si)_mm_undefined_si128(), \
7386  (__mmask8)-1)
7387 
7388 #define _mm512_mask_extracti32x4_epi32(W, U, A, imm) \
7389  (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7390  (__v4si)(__m128i)(W), \
7391  (__mmask8)(U))
7392 
7393 #define _mm512_maskz_extracti32x4_epi32(U, A, imm) \
7394  (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7395  (__v4si)_mm_setzero_si128(), \
7396  (__mmask8)(U))
7397 
7398 #define _mm512_extracti64x4_epi64(A, imm) \
7399  (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7400  (__v4di)_mm256_undefined_si256(), \
7401  (__mmask8)-1)
7402 
7403 #define _mm512_mask_extracti64x4_epi64(W, U, A, imm) \
7404  (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7405  (__v4di)(__m256i)(W), \
7406  (__mmask8)(U))
7407 
7408 #define _mm512_maskz_extracti64x4_epi64(U, A, imm) \
7409  (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7410  (__v4di)_mm256_setzero_si256(), \
7411  (__mmask8)(U))
7412 
7413 #define _mm512_insertf64x4(A, B, imm) \
7414  (__m512d)__builtin_ia32_insertf64x4((__v8df)(__m512d)(A), \
7415  (__v4df)(__m256d)(B), (int)(imm))
7416 
7417 #define _mm512_mask_insertf64x4(W, U, A, B, imm) \
7418  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7419  (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7420  (__v8df)(__m512d)(W))
7421 
7422 #define _mm512_maskz_insertf64x4(U, A, B, imm) \
7423  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7424  (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7425  (__v8df)_mm512_setzero_pd())
7426 
7427 #define _mm512_inserti64x4(A, B, imm) \
7428  (__m512i)__builtin_ia32_inserti64x4((__v8di)(__m512i)(A), \
7429  (__v4di)(__m256i)(B), (int)(imm))
7430 
7431 #define _mm512_mask_inserti64x4(W, U, A, B, imm) \
7432  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7433  (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7434  (__v8di)(__m512i)(W))
7435 
7436 #define _mm512_maskz_inserti64x4(U, A, B, imm) \
7437  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7438  (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7439  (__v8di)_mm512_setzero_si512())
7440 
7441 #define _mm512_insertf32x4(A, B, imm) \
7442  (__m512)__builtin_ia32_insertf32x4((__v16sf)(__m512)(A), \
7443  (__v4sf)(__m128)(B), (int)(imm))
7444 
7445 #define _mm512_mask_insertf32x4(W, U, A, B, imm) \
7446  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7447  (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7448  (__v16sf)(__m512)(W))
7449 
7450 #define _mm512_maskz_insertf32x4(U, A, B, imm) \
7451  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7452  (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7453  (__v16sf)_mm512_setzero_ps())
7454 
7455 #define _mm512_inserti32x4(A, B, imm) \
7456  (__m512i)__builtin_ia32_inserti32x4((__v16si)(__m512i)(A), \
7457  (__v4si)(__m128i)(B), (int)(imm))
7458 
7459 #define _mm512_mask_inserti32x4(W, U, A, B, imm) \
7460  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7461  (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
7462  (__v16si)(__m512i)(W))
7463 
7464 #define _mm512_maskz_inserti32x4(U, A, B, imm) \
7465  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7466  (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
7467  (__v16si)_mm512_setzero_si512())
7468 
7469 #define _mm512_getmant_round_pd(A, B, C, R) \
7470  (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7471  (int)(((C)<<2) | (B)), \
7472  (__v8df)_mm512_undefined_pd(), \
7473  (__mmask8)-1, (int)(R))
7474 
7475 #define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) \
7476  (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7477  (int)(((C)<<2) | (B)), \
7478  (__v8df)(__m512d)(W), \
7479  (__mmask8)(U), (int)(R))
7480 
7481 #define _mm512_maskz_getmant_round_pd(U, A, B, C, R) \
7482  (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7483  (int)(((C)<<2) | (B)), \
7484  (__v8df)_mm512_setzero_pd(), \
7485  (__mmask8)(U), (int)(R))
7486 
7487 #define _mm512_getmant_pd(A, B, C) \
7488  (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7489  (int)(((C)<<2) | (B)), \
7490  (__v8df)_mm512_setzero_pd(), \
7491  (__mmask8)-1, \
7492  _MM_FROUND_CUR_DIRECTION)
7493 
7494 #define _mm512_mask_getmant_pd(W, U, A, B, C) \
7495  (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7496  (int)(((C)<<2) | (B)), \
7497  (__v8df)(__m512d)(W), \
7498  (__mmask8)(U), \
7499  _MM_FROUND_CUR_DIRECTION)
7500 
7501 #define _mm512_maskz_getmant_pd(U, A, B, C) \
7502  (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7503  (int)(((C)<<2) | (B)), \
7504  (__v8df)_mm512_setzero_pd(), \
7505  (__mmask8)(U), \
7506  _MM_FROUND_CUR_DIRECTION)
7507 
7508 #define _mm512_getmant_round_ps(A, B, C, R) \
7509  (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7510  (int)(((C)<<2) | (B)), \
7511  (__v16sf)_mm512_undefined_ps(), \
7512  (__mmask16)-1, (int)(R))
7513 
7514 #define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) \
7515  (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7516  (int)(((C)<<2) | (B)), \
7517  (__v16sf)(__m512)(W), \
7518  (__mmask16)(U), (int)(R))
7519 
7520 #define _mm512_maskz_getmant_round_ps(U, A, B, C, R) \
7521  (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7522  (int)(((C)<<2) | (B)), \
7523  (__v16sf)_mm512_setzero_ps(), \
7524  (__mmask16)(U), (int)(R))
7525 
7526 #define _mm512_getmant_ps(A, B, C) \
7527  (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7528  (int)(((C)<<2)|(B)), \
7529  (__v16sf)_mm512_undefined_ps(), \
7530  (__mmask16)-1, \
7531  _MM_FROUND_CUR_DIRECTION)
7532 
7533 #define _mm512_mask_getmant_ps(W, U, A, B, C) \
7534  (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7535  (int)(((C)<<2)|(B)), \
7536  (__v16sf)(__m512)(W), \
7537  (__mmask16)(U), \
7538  _MM_FROUND_CUR_DIRECTION)
7539 
7540 #define _mm512_maskz_getmant_ps(U, A, B, C) \
7541  (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7542  (int)(((C)<<2)|(B)), \
7543  (__v16sf)_mm512_setzero_ps(), \
7544  (__mmask16)(U), \
7545  _MM_FROUND_CUR_DIRECTION)
7546 
7547 #define _mm512_getexp_round_pd(A, R) \
7548  (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7549  (__v8df)_mm512_undefined_pd(), \
7550  (__mmask8)-1, (int)(R))
7551 
7552 #define _mm512_mask_getexp_round_pd(W, U, A, R) \
7553  (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7554  (__v8df)(__m512d)(W), \
7555  (__mmask8)(U), (int)(R))
7556 
7557 #define _mm512_maskz_getexp_round_pd(U, A, R) \
7558  (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7559  (__v8df)_mm512_setzero_pd(), \
7560  (__mmask8)(U), (int)(R))
7561 
7562 static __inline__ __m512d __DEFAULT_FN_ATTRS512
7563 _mm512_getexp_pd (__m512d __A)
7564 {
7565  return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7566  (__v8df) _mm512_undefined_pd (),
7567  (__mmask8) -1,
7569 }
7570 
7571 static __inline__ __m512d __DEFAULT_FN_ATTRS512
7572 _mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
7573 {
7574  return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7575  (__v8df) __W,
7576  (__mmask8) __U,
7578 }
7579 
7580 static __inline__ __m512d __DEFAULT_FN_ATTRS512
7581 _mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
7582 {
7583  return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7584  (__v8df) _mm512_setzero_pd (),
7585  (__mmask8) __U,
7587 }
7588 
7589 #define _mm512_getexp_round_ps(A, R) \
7590  (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7591  (__v16sf)_mm512_undefined_ps(), \
7592  (__mmask16)-1, (int)(R))
7593 
7594 #define _mm512_mask_getexp_round_ps(W, U, A, R) \
7595  (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7596  (__v16sf)(__m512)(W), \
7597  (__mmask16)(U), (int)(R))
7598 
7599 #define _mm512_maskz_getexp_round_ps(U, A, R) \
7600  (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7601  (__v16sf)_mm512_setzero_ps(), \
7602  (__mmask16)(U), (int)(R))
7603 
7604 static __inline__ __m512 __DEFAULT_FN_ATTRS512
7605 _mm512_getexp_ps (__m512 __A)
7606 {
7607  return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7608  (__v16sf) _mm512_undefined_ps (),
7609  (__mmask16) -1,
7611 }
7612 
7613 static __inline__ __m512 __DEFAULT_FN_ATTRS512
7614 _mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
7615 {
7616  return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7617  (__v16sf) __W,
7618  (__mmask16) __U,
7620 }
7621 
7622 static __inline__ __m512 __DEFAULT_FN_ATTRS512
7623 _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
7624 {
7625  return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7626  (__v16sf) _mm512_setzero_ps (),
7627  (__mmask16) __U,
7629 }
7630 
7631 #define _mm512_i64gather_ps(index, addr, scale) \
7632  (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \
7633  (void const *)(addr), \
7634  (__v8di)(__m512i)(index), (__mmask8)-1, \
7635  (int)(scale))
7636 
7637 #define _mm512_mask_i64gather_ps(v1_old, mask, index, addr, scale) \
7638  (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)(__m256)(v1_old),\
7639  (void const *)(addr), \
7640  (__v8di)(__m512i)(index), \
7641  (__mmask8)(mask), (int)(scale))
7642 
7643 #define _mm512_i64gather_epi32(index, addr, scale) \
7644  (__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_si256(), \
7645  (void const *)(addr), \
7646  (__v8di)(__m512i)(index), \
7647  (__mmask8)-1, (int)(scale))
7648 
7649 #define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) \
7650  (__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \
7651  (void const *)(addr), \
7652  (__v8di)(__m512i)(index), \
7653  (__mmask8)(mask), (int)(scale))
7654 
7655 #define _mm512_i64gather_pd(index, addr, scale) \
7656  (__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \
7657  (void const *)(addr), \
7658  (__v8di)(__m512i)(index), (__mmask8)-1, \
7659  (int)(scale))
7660 
7661 #define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) \
7662  (__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \
7663  (void const *)(addr), \
7664  (__v8di)(__m512i)(index), \
7665  (__mmask8)(mask), (int)(scale))
7666 
7667 #define _mm512_i64gather_epi64(index, addr, scale) \
7668  (__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_epi32(), \
7669  (void const *)(addr), \
7670  (__v8di)(__m512i)(index), (__mmask8)-1, \
7671  (int)(scale))
7672 
7673 #define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) \
7674  (__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \
7675  (void const *)(addr), \
7676  (__v8di)(__m512i)(index), \
7677  (__mmask8)(mask), (int)(scale))
7678 
7679 #define _mm512_i32gather_ps(index, addr, scale) \
7680  (__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \
7681  (void const *)(addr), \
7682  (__v16sf)(__m512)(index), \
7683  (__mmask16)-1, (int)(scale))
7684 
7685 #define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) \
7686  (__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \
7687  (void const *)(addr), \
7688  (__v16sf)(__m512)(index), \
7689  (__mmask16)(mask), (int)(scale))
7690 
7691 #define _mm512_i32gather_epi32(index, addr, scale) \
7692  (__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \
7693  (void const *)(addr), \
7694  (__v16si)(__m512i)(index), \
7695  (__mmask16)-1, (int)(scale))
7696 
7697 #define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) \
7698  (__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \
7699  (void const *)(addr), \
7700  (__v16si)(__m512i)(index), \
7701  (__mmask16)(mask), (int)(scale))
7702 
7703 #define _mm512_i32gather_pd(index, addr, scale) \
7704  (__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \
7705  (void const *)(addr), \
7706  (__v8si)(__m256i)(index), (__mmask8)-1, \
7707  (int)(scale))
7708 
7709 #define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) \
7710  (__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \
7711  (void const *)(addr), \
7712  (__v8si)(__m256i)(index), \
7713  (__mmask8)(mask), (int)(scale))
7714 
7715 #define _mm512_i32gather_epi64(index, addr, scale) \
7716  (__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \
7717  (void const *)(addr), \
7718  (__v8si)(__m256i)(index), (__mmask8)-1, \
7719  (int)(scale))
7720 
7721 #define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) \
7722  (__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \
7723  (void const *)(addr), \
7724  (__v8si)(__m256i)(index), \
7725  (__mmask8)(mask), (int)(scale))
7726 
7727 #define _mm512_i64scatter_ps(addr, index, v1, scale) \
7728  __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)-1, \
7729  (__v8di)(__m512i)(index), \
7730  (__v8sf)(__m256)(v1), (int)(scale))
7731 
7732 #define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) \
7733  __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)(mask), \
7734  (__v8di)(__m512i)(index), \
7735  (__v8sf)(__m256)(v1), (int)(scale))
7736 
7737 #define _mm512_i64scatter_epi32(addr, index, v1, scale) \
7738  __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)-1, \
7739  (__v8di)(__m512i)(index), \
7740  (__v8si)(__m256i)(v1), (int)(scale))
7741 
7742 #define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
7743  __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)(mask), \
7744  (__v8di)(__m512i)(index), \
7745  (__v8si)(__m256i)(v1), (int)(scale))
7746 
7747 #define _mm512_i64scatter_pd(addr, index, v1, scale) \
7748  __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)-1, \
7749  (__v8di)(__m512i)(index), \
7750  (__v8df)(__m512d)(v1), (int)(scale))
7751 
7752 #define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) \
7753  __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)(mask), \
7754  (__v8di)(__m512i)(index), \
7755  (__v8df)(__m512d)(v1), (int)(scale))
7756 
7757 #define _mm512_i64scatter_epi64(addr, index, v1, scale) \
7758  __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)-1, \
7759  (__v8di)(__m512i)(index), \
7760  (__v8di)(__m512i)(v1), (int)(scale))
7761 
7762 #define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
7763  __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)(mask), \
7764  (__v8di)(__m512i)(index), \
7765  (__v8di)(__m512i)(v1), (int)(scale))
7766 
7767 #define _mm512_i32scatter_ps(addr, index, v1, scale) \
7768  __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)-1, \
7769  (__v16si)(__m512i)(index), \
7770  (__v16sf)(__m512)(v1), (int)(scale))
7771 
7772 #define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) \
7773  __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)(mask), \
7774  (__v16si)(__m512i)(index), \
7775  (__v16sf)(__m512)(v1), (int)(scale))
7776 
7777 #define _mm512_i32scatter_epi32(addr, index, v1, scale) \
7778  __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)-1, \
7779  (__v16si)(__m512i)(index), \
7780  (__v16si)(__m512i)(v1), (int)(scale))
7781 
7782 #define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
7783  __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)(mask), \
7784  (__v16si)(__m512i)(index), \
7785  (__v16si)(__m512i)(v1), (int)(scale))
7786 
7787 #define _mm512_i32scatter_pd(addr, index, v1, scale) \
7788  __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)-1, \
7789  (__v8si)(__m256i)(index), \
7790  (__v8df)(__m512d)(v1), (int)(scale))
7791 
7792 #define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) \
7793  __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)(mask), \
7794  (__v8si)(__m256i)(index), \
7795  (__v8df)(__m512d)(v1), (int)(scale))
7796 
7797 #define _mm512_i32scatter_epi64(addr, index, v1, scale) \
7798  __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)-1, \
7799  (__v8si)(__m256i)(index), \
7800  (__v8di)(__m512i)(v1), (int)(scale))
7801 
7802 #define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
7803  __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)(mask), \
7804  (__v8si)(__m256i)(index), \
7805  (__v8di)(__m512i)(v1), (int)(scale))
7806 
7807 static __inline__ __m128 __DEFAULT_FN_ATTRS128
7808 _mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7809 {
7810  return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7811  (__v4sf)__A,
7812  (__v4sf)__B,
7813  (__mmask8)__U,
7815 }
7816 
7817 #define _mm_fmadd_round_ss(A, B, C, R) \
7818  (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7819  (__v4sf)(__m128)(B), \
7820  (__v4sf)(__m128)(C), (__mmask8)-1, \
7821  (int)(R))
7822 
7823 #define _mm_mask_fmadd_round_ss(W, U, A, B, R) \
7824  (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7825  (__v4sf)(__m128)(A), \
7826  (__v4sf)(__m128)(B), (__mmask8)(U), \
7827  (int)(R))
7828 
7829 static __inline__ __m128 __DEFAULT_FN_ATTRS128
7830 _mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7831 {
7832  return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7833  (__v4sf)__B,
7834  (__v4sf)__C,
7835  (__mmask8)__U,
7837 }
7838 
7839 #define _mm_maskz_fmadd_round_ss(U, A, B, C, R) \
7840  (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7841  (__v4sf)(__m128)(B), \
7842  (__v4sf)(__m128)(C), (__mmask8)(U), \
7843  (int)(R))
7844 
7845 static __inline__ __m128 __DEFAULT_FN_ATTRS128
7846 _mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7847 {
7848  return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W,
7849  (__v4sf)__X,
7850  (__v4sf)__Y,
7851  (__mmask8)__U,
7853 }
7854 
7855 #define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) \
7856  (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
7857  (__v4sf)(__m128)(X), \
7858  (__v4sf)(__m128)(Y), (__mmask8)(U), \
7859  (int)(R))
7860 
7861 static __inline__ __m128 __DEFAULT_FN_ATTRS128
7862 _mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7863 {
7864  return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7865  (__v4sf)__A,
7866  -(__v4sf)__B,
7867  (__mmask8)__U,
7869 }
7870 
7871 #define _mm_fmsub_round_ss(A, B, C, R) \
7872  (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7873  (__v4sf)(__m128)(B), \
7874  -(__v4sf)(__m128)(C), (__mmask8)-1, \
7875  (int)(R))
7876 
7877 #define _mm_mask_fmsub_round_ss(W, U, A, B, R) \
7878  (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7879  (__v4sf)(__m128)(A), \
7880  -(__v4sf)(__m128)(B), (__mmask8)(U), \
7881  (int)(R))
7882 
7883 static __inline__ __m128 __DEFAULT_FN_ATTRS128
7884 _mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7885 {
7886  return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7887  (__v4sf)__B,
7888  -(__v4sf)__C,
7889  (__mmask8)__U,
7891 }
7892 
7893 #define _mm_maskz_fmsub_round_ss(U, A, B, C, R) \
7894  (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7895  (__v4sf)(__m128)(B), \
7896  -(__v4sf)(__m128)(C), (__mmask8)(U), \
7897  (int)(R))
7898 
7899 static __inline__ __m128 __DEFAULT_FN_ATTRS128
7900 _mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7901 {
7902  return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W,
7903  (__v4sf)__X,
7904  (__v4sf)__Y,
7905  (__mmask8)__U,
7907 }
7908 
7909 #define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) \
7910  (__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
7911  (__v4sf)(__m128)(X), \
7912  (__v4sf)(__m128)(Y), (__mmask8)(U), \
7913  (int)(R))
7914 
7915 static __inline__ __m128 __DEFAULT_FN_ATTRS128
7916 _mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7917 {
7918  return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7919  -(__v4sf)__A,
7920  (__v4sf)__B,
7921  (__mmask8)__U,
7923 }
7924 
7925 #define _mm_fnmadd_round_ss(A, B, C, R) \
7926  (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7927  -(__v4sf)(__m128)(B), \
7928  (__v4sf)(__m128)(C), (__mmask8)-1, \
7929  (int)(R))
7930 
7931 #define _mm_mask_fnmadd_round_ss(W, U, A, B, R) \
7932  (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7933  -(__v4sf)(__m128)(A), \
7934  (__v4sf)(__m128)(B), (__mmask8)(U), \
7935  (int)(R))
7936 
7937 static __inline__ __m128 __DEFAULT_FN_ATTRS128
7938 _mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7939 {
7940  return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7941  -(__v4sf)__B,
7942  (__v4sf)__C,
7943  (__mmask8)__U,
7945 }
7946 
7947 #define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) \
7948  (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7949  -(__v4sf)(__m128)(B), \
7950  (__v4sf)(__m128)(C), (__mmask8)(U), \
7951  (int)(R))
7952 
7953 static __inline__ __m128 __DEFAULT_FN_ATTRS128
7954 _mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7955 {
7956  return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W,
7957  -(__v4sf)__X,
7958  (__v4sf)__Y,
7959  (__mmask8)__U,
7961 }
7962 
7963 #define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) \
7964  (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
7965  -(__v4sf)(__m128)(X), \
7966  (__v4sf)(__m128)(Y), (__mmask8)(U), \
7967  (int)(R))
7968 
7969 static __inline__ __m128 __DEFAULT_FN_ATTRS128
7970 _mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7971 {
7972  return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7973  -(__v4sf)__A,
7974  -(__v4sf)__B,
7975  (__mmask8)__U,
7977 }
7978 
7979 #define _mm_fnmsub_round_ss(A, B, C, R) \
7980  (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7981  -(__v4sf)(__m128)(B), \
7982  -(__v4sf)(__m128)(C), (__mmask8)-1, \
7983  (int)(R))
7984 
7985 #define _mm_mask_fnmsub_round_ss(W, U, A, B, R) \
7986  (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7987  -(__v4sf)(__m128)(A), \
7988  -(__v4sf)(__m128)(B), (__mmask8)(U), \
7989  (int)(R))
7990 
7991 static __inline__ __m128 __DEFAULT_FN_ATTRS128
7992 _mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7993 {
7994  return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7995  -(__v4sf)__B,
7996  -(__v4sf)__C,
7997  (__mmask8)__U,
7999 }
8000 
8001 #define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) \
8002  (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
8003  -(__v4sf)(__m128)(B), \
8004  -(__v4sf)(__m128)(C), (__mmask8)(U), \
8005  (int)(R))
8006 
8007 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8008 _mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
8009 {
8010  return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W,
8011  -(__v4sf)__X,
8012  (__v4sf)__Y,
8013  (__mmask8)__U,
8015 }
8016 
8017 #define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) \
8018  (__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
8019  -(__v4sf)(__m128)(X), \
8020  (__v4sf)(__m128)(Y), (__mmask8)(U), \
8021  (int)(R))
8022 
8023 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8024 _mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8025 {
8026  return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
8027  (__v2df)__A,
8028  (__v2df)__B,
8029  (__mmask8)__U,
8031 }
8032 
8033 #define _mm_fmadd_round_sd(A, B, C, R) \
8034  (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
8035  (__v2df)(__m128d)(B), \
8036  (__v2df)(__m128d)(C), (__mmask8)-1, \
8037  (int)(R))
8038 
8039 #define _mm_mask_fmadd_round_sd(W, U, A, B, R) \
8040  (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8041  (__v2df)(__m128d)(A), \
8042  (__v2df)(__m128d)(B), (__mmask8)(U), \
8043  (int)(R))
8044 
8045 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8046 _mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8047 {
8048  return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8049  (__v2df)__B,
8050  (__v2df)__C,
8051  (__mmask8)__U,
8053 }
8054 
8055 #define _mm_maskz_fmadd_round_sd(U, A, B, C, R) \
8056  (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8057  (__v2df)(__m128d)(B), \
8058  (__v2df)(__m128d)(C), (__mmask8)(U), \
8059  (int)(R))
8060 
8061 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8062 _mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8063 {
8064  return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W,
8065  (__v2df)__X,
8066  (__v2df)__Y,
8067  (__mmask8)__U,
8069 }
8070 
8071 #define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) \
8072  (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
8073  (__v2df)(__m128d)(X), \
8074  (__v2df)(__m128d)(Y), (__mmask8)(U), \
8075  (int)(R))
8076 
8077 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8078 _mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8079 {
8080  return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
8081  (__v2df)__A,
8082  -(__v2df)__B,
8083  (__mmask8)__U,
8085 }
8086 
8087 #define _mm_fmsub_round_sd(A, B, C, R) \
8088  (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
8089  (__v2df)(__m128d)(B), \
8090  -(__v2df)(__m128d)(C), (__mmask8)-1, \
8091  (int)(R))
8092 
8093 #define _mm_mask_fmsub_round_sd(W, U, A, B, R) \
8094  (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8095  (__v2df)(__m128d)(A), \
8096  -(__v2df)(__m128d)(B), (__mmask8)(U), \
8097  (int)(R))
8098 
8099 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8100 _mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8101 {
8102  return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8103  (__v2df)__B,
8104  -(__v2df)__C,
8105  (__mmask8)__U,
8107 }
8108 
8109 #define _mm_maskz_fmsub_round_sd(U, A, B, C, R) \
8110  (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8111  (__v2df)(__m128d)(B), \
8112  -(__v2df)(__m128d)(C), \
8113  (__mmask8)(U), (int)(R))
8114 
8115 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8116 _mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8117 {
8118  return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W,
8119  (__v2df)__X,
8120  (__v2df)__Y,
8121  (__mmask8)__U,
8123 }
8124 
8125 #define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) \
8126  (__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
8127  (__v2df)(__m128d)(X), \
8128  (__v2df)(__m128d)(Y), \
8129  (__mmask8)(U), (int)(R))
8130 
8131 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8132 _mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8133 {
8134  return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
8135  -(__v2df)__A,
8136  (__v2df)__B,
8137  (__mmask8)__U,
8139 }
8140 
8141 #define _mm_fnmadd_round_sd(A, B, C, R) \
8142  (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
8143  -(__v2df)(__m128d)(B), \
8144  (__v2df)(__m128d)(C), (__mmask8)-1, \
8145  (int)(R))
8146 
8147 #define _mm_mask_fnmadd_round_sd(W, U, A, B, R) \
8148  (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8149  -(__v2df)(__m128d)(A), \
8150  (__v2df)(__m128d)(B), (__mmask8)(U), \
8151  (int)(R))
8152 
8153 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8154 _mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8155 {
8156  return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8157  -(__v2df)__B,
8158  (__v2df)__C,
8159  (__mmask8)__U,
8161 }
8162 
8163 #define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) \
8164  (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8165  -(__v2df)(__m128d)(B), \
8166  (__v2df)(__m128d)(C), (__mmask8)(U), \
8167  (int)(R))
8168 
8169 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8170 _mm_mask3_fnmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8171 {
8172  return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W,
8173  -(__v2df)__X,
8174  (__v2df)__Y,
8175  (__mmask8)__U,
8177 }
8178 
8179 #define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) \
8180  (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
8181  -(__v2df)(__m128d)(X), \
8182  (__v2df)(__m128d)(Y), (__mmask8)(U), \
8183  (int)(R))
8184 
8185 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8186 _mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8187 {
8188  return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
8189  -(__v2df)__A,
8190  -(__v2df)__B,
8191  (__mmask8)__U,
8193 }
8194 
8195 #define _mm_fnmsub_round_sd(A, B, C, R) \
8196  (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
8197  -(__v2df)(__m128d)(B), \
8198  -(__v2df)(__m128d)(C), (__mmask8)-1, \
8199  (int)(R))
8200 
8201 #define _mm_mask_fnmsub_round_sd(W, U, A, B, R) \
8202  (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8203  -(__v2df)(__m128d)(A), \
8204  -(__v2df)(__m128d)(B), (__mmask8)(U), \
8205  (int)(R))
8206 
8207 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8208 _mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8209 {
8210  return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8211  -(__v2df)__B,
8212  -(__v2df)__C,
8213  (__mmask8)__U,
8215 }
8216 
8217 #define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) \
8218  (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8219  -(__v2df)(__m128d)(B), \
8220  -(__v2df)(__m128d)(C), \
8221  (__mmask8)(U), \
8222  (int)(R))
8223 
8224 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8225 _mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8226 {
8227  return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W,
8228  -(__v2df)__X,
8229  (__v2df)__Y,
8230  (__mmask8)__U,
8232 }
8233 
8234 #define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) \
8235  (__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
8236  -(__v2df)(__m128d)(X), \
8237  (__v2df)(__m128d)(Y), \
8238  (__mmask8)(U), (int)(R))
8239 
8240 #define _mm512_permutex_pd(X, C) \
8241  (__m512d)__builtin_ia32_permdf512((__v8df)(__m512d)(X), (int)(C))
8242 
8243 #define _mm512_mask_permutex_pd(W, U, X, C) \
8244  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8245  (__v8df)_mm512_permutex_pd((X), (C)), \
8246  (__v8df)(__m512d)(W))
8247 
8248 #define _mm512_maskz_permutex_pd(U, X, C) \
8249  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8250  (__v8df)_mm512_permutex_pd((X), (C)), \
8251  (__v8df)_mm512_setzero_pd())
8252 
8253 #define _mm512_permutex_epi64(X, C) \
8254  (__m512i)__builtin_ia32_permdi512((__v8di)(__m512i)(X), (int)(C))
8255 
8256 #define _mm512_mask_permutex_epi64(W, U, X, C) \
8257  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8258  (__v8di)_mm512_permutex_epi64((X), (C)), \
8259  (__v8di)(__m512i)(W))
8260 
8261 #define _mm512_maskz_permutex_epi64(U, X, C) \
8262  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8263  (__v8di)_mm512_permutex_epi64((X), (C)), \
8264  (__v8di)_mm512_setzero_si512())
8265 
8266 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8267 _mm512_permutexvar_pd (__m512i __X, __m512d __Y)
8268 {
8269  return (__m512d)__builtin_ia32_permvardf512((__v8df) __Y, (__v8di) __X);
8270 }
8271 
8272 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8273 _mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
8274 {
8275  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8276  (__v8df)_mm512_permutexvar_pd(__X, __Y),
8277  (__v8df)__W);
8278 }
8279 
8280 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8281 _mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
8282 {
8283  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8284  (__v8df)_mm512_permutexvar_pd(__X, __Y),
8285  (__v8df)_mm512_setzero_pd());
8286 }
8287 
8288 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8289 _mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
8290 {
8291  return (__m512i)__builtin_ia32_permvardi512((__v8di)__Y, (__v8di)__X);
8292 }
8293 
8294 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8295 _mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
8296 {
8297  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
8298  (__v8di)_mm512_permutexvar_epi64(__X, __Y),
8299  (__v8di)_mm512_setzero_si512());
8300 }
8301 
8302 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8303 _mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
8304  __m512i __Y)
8305 {
8306  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
8307  (__v8di)_mm512_permutexvar_epi64(__X, __Y),
8308  (__v8di)__W);
8309 }
8310 
8311 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8312 _mm512_permutexvar_ps (__m512i __X, __m512 __Y)
8313 {
8314  return (__m512)__builtin_ia32_permvarsf512((__v16sf)__Y, (__v16si)__X);
8315 }
8316 
8317 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8318 _mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
8319 {
8320  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8321  (__v16sf)_mm512_permutexvar_ps(__X, __Y),
8322  (__v16sf)__W);
8323 }
8324 
8325 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8326 _mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
8327 {
8328  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8329  (__v16sf)_mm512_permutexvar_ps(__X, __Y),
8330  (__v16sf)_mm512_setzero_ps());
8331 }
8332 
8333 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8334 _mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
8335 {
8336  return (__m512i)__builtin_ia32_permvarsi512((__v16si)__Y, (__v16si)__X);
8337 }
8338 
8339 #define _mm512_permutevar_epi32 _mm512_permutexvar_epi32
8340 
8341 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8342 _mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
8343 {
8344  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
8345  (__v16si)_mm512_permutexvar_epi32(__X, __Y),
8346  (__v16si)_mm512_setzero_si512());
8347 }
8348 
8349 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8350 _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
8351  __m512i __Y)
8352 {
8353  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
8354  (__v16si)_mm512_permutexvar_epi32(__X, __Y),
8355  (__v16si)__W);
8356 }
8357 
8358 #define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32
8359 
8360 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8361 _mm512_kand (__mmask16 __A, __mmask16 __B)
8362 {
8363  return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
8364 }
8365 
8366 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8367 _mm512_kandn (__mmask16 __A, __mmask16 __B)
8368 {
8369  return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
8370 }
8371 
8372 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8373 _mm512_kor (__mmask16 __A, __mmask16 __B)
8374 {
8375  return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
8376 }
8377 
8378 static __inline__ int __DEFAULT_FN_ATTRS
8379 _mm512_kortestc (__mmask16 __A, __mmask16 __B)
8380 {
8381  return __builtin_ia32_kortestchi ((__mmask16) __A, (__mmask16) __B);
8382 }
8383 
8384 static __inline__ int __DEFAULT_FN_ATTRS
8385 _mm512_kortestz (__mmask16 __A, __mmask16 __B)
8386 {
8387  return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B);
8388 }
8389 
8390 static __inline__ unsigned char __DEFAULT_FN_ATTRS
8391 _kortestc_mask16_u8(__mmask16 __A, __mmask16 __B)
8392 {
8393  return (unsigned char)__builtin_ia32_kortestchi(__A, __B);
8394 }
8395 
8396 static __inline__ unsigned char __DEFAULT_FN_ATTRS
8397 _kortestz_mask16_u8(__mmask16 __A, __mmask16 __B)
8398 {
8399  return (unsigned char)__builtin_ia32_kortestzhi(__A, __B);
8400 }
8401 
8402 static __inline__ unsigned char __DEFAULT_FN_ATTRS
8403 _kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) {
8404  *__C = (unsigned char)__builtin_ia32_kortestchi(__A, __B);
8405  return (unsigned char)__builtin_ia32_kortestzhi(__A, __B);
8406 }
8407 
8408 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8409 _mm512_kunpackb (__mmask16 __A, __mmask16 __B)
8410 {
8411  return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
8412 }
8413 
8414 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8415 _mm512_kxnor (__mmask16 __A, __mmask16 __B)
8416 {
8417  return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
8418 }
8419 
8420 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8421 _mm512_kxor (__mmask16 __A, __mmask16 __B)
8422 {
8423  return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
8424 }
8425 
8426 #define _kand_mask16 _mm512_kand
8427 #define _kandn_mask16 _mm512_kandn
8428 #define _knot_mask16 _mm512_knot
8429 #define _kor_mask16 _mm512_kor
8430 #define _kxnor_mask16 _mm512_kxnor
8431 #define _kxor_mask16 _mm512_kxor
8432 
8433 #define _kshiftli_mask16(A, I) \
8434  (__mmask16)__builtin_ia32_kshiftlihi((__mmask16)(A), (unsigned int)(I))
8435 
8436 #define _kshiftri_mask16(A, I) \
8437  (__mmask16)__builtin_ia32_kshiftrihi((__mmask16)(A), (unsigned int)(I))
8438 
8439 static __inline__ unsigned int __DEFAULT_FN_ATTRS
8440 _cvtmask16_u32(__mmask16 __A) {
8441  return (unsigned int)__builtin_ia32_kmovw((__mmask16)__A);
8442 }
8443 
8444 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8445 _cvtu32_mask16(unsigned int __A) {
8446  return (__mmask16)__builtin_ia32_kmovw((__mmask16)__A);
8447 }
8448 
8449 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8450 _load_mask16(__mmask16 *__A) {
8451  return (__mmask16)__builtin_ia32_kmovw(*(__mmask16 *)__A);
8452 }
8453 
8454 static __inline__ void __DEFAULT_FN_ATTRS
8455 _store_mask16(__mmask16 *__A, __mmask16 __B) {
8456  *(__mmask16 *)__A = __builtin_ia32_kmovw((__mmask16)__B);
8457 }
8458 
8459 static __inline__ void __DEFAULT_FN_ATTRS512
8460 _mm512_stream_si512 (__m512i * __P, __m512i __A)
8461 {
8462  typedef __v8di __v8di_aligned __attribute__((aligned(64)));
8463  __builtin_nontemporal_store((__v8di_aligned)__A, (__v8di_aligned*)__P);
8464 }
8465 
8466 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8467 _mm512_stream_load_si512 (void const *__P)
8468 {
8469  typedef __v8di __v8di_aligned __attribute__((aligned(64)));
8470  return (__m512i) __builtin_nontemporal_load((const __v8di_aligned *)__P);
8471 }
8472 
8473 static __inline__ void __DEFAULT_FN_ATTRS512
8474 _mm512_stream_pd (double *__P, __m512d __A)
8475 {
8476  typedef __v8df __v8df_aligned __attribute__((aligned(64)));
8477  __builtin_nontemporal_store((__v8df_aligned)__A, (__v8df_aligned*)__P);
8478 }
8479 
8480 static __inline__ void __DEFAULT_FN_ATTRS512
8481 _mm512_stream_ps (float *__P, __m512 __A)
8482 {
8483  typedef __v16sf __v16sf_aligned __attribute__((aligned(64)));
8484  __builtin_nontemporal_store((__v16sf_aligned)__A, (__v16sf_aligned*)__P);
8485 }
8486 
8487 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8488 _mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
8489 {
8490  return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8491  (__v8df) __W,
8492  (__mmask8) __U);
8493 }
8494 
8495 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8496 _mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
8497 {
8498  return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8499  (__v8df)
8500  _mm512_setzero_pd (),
8501  (__mmask8) __U);
8502 }
8503 
8504 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8505 _mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
8506 {
8507  return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8508  (__v8di) __W,
8509  (__mmask8) __U);
8510 }
8511 
8512 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8513 _mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
8514 {
8515  return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8516  (__v8di)
8518  (__mmask8) __U);
8519 }
8520 
8521 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8522 _mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
8523 {
8524  return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8525  (__v16sf) __W,
8526  (__mmask16) __U);
8527 }
8528 
8529 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8530 _mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
8531 {
8532  return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8533  (__v16sf)
8534  _mm512_setzero_ps (),
8535  (__mmask16) __U);
8536 }
8537 
8538 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8539 _mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
8540 {
8541  return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8542  (__v16si) __W,
8543  (__mmask16) __U);
8544 }
8545 
8546 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8547 _mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
8548 {
8549  return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8550  (__v16si)
8552  (__mmask16) __U);
8553 }
8554 
8555 #define _mm_cmp_round_ss_mask(X, Y, P, R) \
8556  (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8557  (__v4sf)(__m128)(Y), (int)(P), \
8558  (__mmask8)-1, (int)(R))
8559 
8560 #define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
8561  (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8562  (__v4sf)(__m128)(Y), (int)(P), \
8563  (__mmask8)(M), (int)(R))
8564 
8565 #define _mm_cmp_ss_mask(X, Y, P) \
8566  (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8567  (__v4sf)(__m128)(Y), (int)(P), \
8568  (__mmask8)-1, \
8569  _MM_FROUND_CUR_DIRECTION)
8570 
8571 #define _mm_mask_cmp_ss_mask(M, X, Y, P) \
8572  (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8573  (__v4sf)(__m128)(Y), (int)(P), \
8574  (__mmask8)(M), \
8575  _MM_FROUND_CUR_DIRECTION)
8576 
8577 #define _mm_cmp_round_sd_mask(X, Y, P, R) \
8578  (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8579  (__v2df)(__m128d)(Y), (int)(P), \
8580  (__mmask8)-1, (int)(R))
8581 
8582 #define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
8583  (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8584  (__v2df)(__m128d)(Y), (int)(P), \
8585  (__mmask8)(M), (int)(R))
8586 
8587 #define _mm_cmp_sd_mask(X, Y, P) \
8588  (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8589  (__v2df)(__m128d)(Y), (int)(P), \
8590  (__mmask8)-1, \
8591  _MM_FROUND_CUR_DIRECTION)
8592 
8593 #define _mm_mask_cmp_sd_mask(M, X, Y, P) \
8594  (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8595  (__v2df)(__m128d)(Y), (int)(P), \
8596  (__mmask8)(M), \
8597  _MM_FROUND_CUR_DIRECTION)
8598 
8599 /* Bit Test */
8600 
8601 static __inline __mmask16 __DEFAULT_FN_ATTRS512
8602 _mm512_test_epi32_mask (__m512i __A, __m512i __B)
8603 {
8604  return _mm512_cmpneq_epi32_mask (_mm512_and_epi32(__A, __B),
8606 }
8607 
8608 static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
8609 _mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8610 {
8611  return _mm512_mask_cmpneq_epi32_mask (__U, _mm512_and_epi32 (__A, __B),
8613 }
8614 
8615 static __inline __mmask8 __DEFAULT_FN_ATTRS512
8616 _mm512_test_epi64_mask (__m512i __A, __m512i __B)
8617 {
8618  return _mm512_cmpneq_epi64_mask (_mm512_and_epi32 (__A, __B),
8620 }
8621 
8622 static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
8623 _mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8624 {
8625  return _mm512_mask_cmpneq_epi64_mask (__U, _mm512_and_epi32 (__A, __B),
8627 }
8628 
8629 static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
8630 _mm512_testn_epi32_mask (__m512i __A, __m512i __B)
8631 {
8632  return _mm512_cmpeq_epi32_mask (_mm512_and_epi32 (__A, __B),
8634 }
8635 
8636 static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
8637 _mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8638 {
8639  return _mm512_mask_cmpeq_epi32_mask (__U, _mm512_and_epi32 (__A, __B),
8641 }
8642 
8643 static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
8644 _mm512_testn_epi64_mask (__m512i __A, __m512i __B)
8645 {
8646  return _mm512_cmpeq_epi64_mask (_mm512_and_epi32 (__A, __B),
8648 }
8649 
8650 static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
8651 _mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8652 {
8653  return _mm512_mask_cmpeq_epi64_mask (__U, _mm512_and_epi32 (__A, __B),
8655 }
8656 
8657 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8659 {
8660  return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
8661  1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15);
8662 }
8663 
8664 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8665 _mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
8666 {
8667  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8668  (__v16sf)_mm512_movehdup_ps(__A),
8669  (__v16sf)__W);
8670 }
8671 
8672 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8673 _mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
8674 {
8675  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8676  (__v16sf)_mm512_movehdup_ps(__A),
8677  (__v16sf)_mm512_setzero_ps());
8678 }
8679 
8680 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8682 {
8683  return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
8684  0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14);
8685 }
8686 
8687 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8688 _mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
8689 {
8690  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8691  (__v16sf)_mm512_moveldup_ps(__A),
8692  (__v16sf)__W);
8693 }
8694 
8695 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8696 _mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
8697 {
8698  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8699  (__v16sf)_mm512_moveldup_ps(__A),
8700  (__v16sf)_mm512_setzero_ps());
8701 }
8702 
8703 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8704 _mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8705 {
8706  return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B), __W);
8707 }
8708 
8709 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8710 _mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B)
8711 {
8712  return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B),
8713  _mm_setzero_ps());
8714 }
8715 
8716 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8717 _mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8718 {
8719  return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), __W);
8720 }
8721 
8722 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8723 _mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B)
8724 {
8725  return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B),
8726  _mm_setzero_pd());
8727 }
8728 
8729 static __inline__ void __DEFAULT_FN_ATTRS128
8730 _mm_mask_store_ss (float * __W, __mmask8 __U, __m128 __A)
8731 {
8732  __builtin_ia32_storess128_mask ((__v4sf *)__W, __A, __U & 1);
8733 }
8734 
8735 static __inline__ void __DEFAULT_FN_ATTRS128
8736 _mm_mask_store_sd (double * __W, __mmask8 __U, __m128d __A)
8737 {
8738  __builtin_ia32_storesd128_mask ((__v2df *)__W, __A, __U & 1);
8739 }
8740 
8741 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8742 _mm_mask_load_ss (__m128 __W, __mmask8 __U, const float* __A)
8743 {
8744  __m128 src = (__v4sf) __builtin_shufflevector((__v4sf) __W,
8745  (__v4sf)_mm_setzero_ps(),
8746  0, 4, 4, 4);
8747 
8748  return (__m128) __builtin_ia32_loadss128_mask ((__v4sf *) __A, src, __U & 1);
8749 }
8750 
8751 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8752 _mm_maskz_load_ss (__mmask8 __U, const float* __A)
8753 {
8754  return (__m128)__builtin_ia32_loadss128_mask ((__v4sf *) __A,
8755  (__v4sf) _mm_setzero_ps(),
8756  __U & 1);
8757 }
8758 
8759 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8760 _mm_mask_load_sd (__m128d __W, __mmask8 __U, const double* __A)
8761 {
8762  __m128d src = (__v2df) __builtin_shufflevector((__v2df) __W,
8763  (__v2df)_mm_setzero_pd(),
8764  0, 2);
8765 
8766  return (__m128d) __builtin_ia32_loadsd128_mask ((__v2df *) __A, src, __U & 1);
8767 }
8768 
8769 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8770 _mm_maskz_load_sd (__mmask8 __U, const double* __A)
8771 {
8772  return (__m128d) __builtin_ia32_loadsd128_mask ((__v2df *) __A,
8773  (__v2df) _mm_setzero_pd(),
8774  __U & 1);
8775 }
8776 
8777 #define _mm512_shuffle_epi32(A, I) \
8778  (__m512i)__builtin_ia32_pshufd512((__v16si)(__m512i)(A), (int)(I))
8779 
8780 #define _mm512_mask_shuffle_epi32(W, U, A, I) \
8781  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
8782  (__v16si)_mm512_shuffle_epi32((A), (I)), \
8783  (__v16si)(__m512i)(W))
8784 
8785 #define _mm512_maskz_shuffle_epi32(U, A, I) \
8786  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
8787  (__v16si)_mm512_shuffle_epi32((A), (I)), \
8788  (__v16si)_mm512_setzero_si512())
8789 
8790 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8791 _mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
8792 {
8793  return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
8794  (__v8df) __W,
8795  (__mmask8) __U);
8796 }
8797 
8798 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8799 _mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
8800 {
8801  return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
8802  (__v8df) _mm512_setzero_pd (),
8803  (__mmask8) __U);
8804 }
8805 
8806 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8807 _mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
8808 {
8809  return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
8810  (__v8di) __W,
8811  (__mmask8) __U);
8812 }
8813 
8814 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8815 _mm512_maskz_expand_epi64 ( __mmask8 __U, __m512i __A)
8816 {
8817  return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
8818  (__v8di) _mm512_setzero_si512 (),
8819  (__mmask8) __U);
8820 }
8821 
8822 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8823 _mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
8824 {
8825  return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
8826  (__v8df) __W,
8827  (__mmask8) __U);
8828 }
8829 
8830 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8831 _mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
8832 {
8833  return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
8834  (__v8df) _mm512_setzero_pd(),
8835  (__mmask8) __U);
8836 }
8837 
8838 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8839 _mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
8840 {
8841  return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
8842  (__v8di) __W,
8843  (__mmask8) __U);
8844 }
8845 
8846 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8847 _mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
8848 {
8849  return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
8850  (__v8di) _mm512_setzero_si512(),
8851  (__mmask8) __U);
8852 }
8853 
8854 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8855 _mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
8856 {
8857  return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
8858  (__v16sf) __W,
8859  (__mmask16) __U);
8860 }
8861 
8862 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8863 _mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P)
8864 {
8865  return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
8866  (__v16sf) _mm512_setzero_ps(),
8867  (__mmask16) __U);
8868 }
8869 
8870 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8871 _mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
8872 {
8873  return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
8874  (__v16si) __W,
8875  (__mmask16) __U);
8876 }
8877 
8878 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8879 _mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P)
8880 {
8881  return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
8882  (__v16si) _mm512_setzero_si512(),
8883  (__mmask16) __U);
8884 }
8885 
8886 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8887 _mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
8888 {
8889  return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
8890  (__v16sf) __W,
8891  (__mmask16) __U);
8892 }
8893 
8894 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8895 _mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
8896 {
8897  return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
8898  (__v16sf) _mm512_setzero_ps(),
8899  (__mmask16) __U);
8900 }
8901 
8902 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8903 _mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
8904 {
8905  return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
8906  (__v16si) __W,
8907  (__mmask16) __U);
8908 }
8909 
8910 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8911 _mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
8912 {
8913  return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
8914  (__v16si) _mm512_setzero_si512(),
8915  (__mmask16) __U);
8916 }
8917 
8918 #define _mm512_cvt_roundps_pd(A, R) \
8919  (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8920  (__v8df)_mm512_undefined_pd(), \
8921  (__mmask8)-1, (int)(R))
8922 
8923 #define _mm512_mask_cvt_roundps_pd(W, U, A, R) \
8924  (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8925  (__v8df)(__m512d)(W), \
8926  (__mmask8)(U), (int)(R))
8927 
8928 #define _mm512_maskz_cvt_roundps_pd(U, A, R) \
8929  (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8930  (__v8df)_mm512_setzero_pd(), \
8931  (__mmask8)(U), (int)(R))
8932 
8933 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8934 _mm512_cvtps_pd (__m256 __A)
8935 {
8936  return (__m512d) __builtin_convertvector((__v8sf)__A, __v8df);
8937 }
8938 
8939 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8940 _mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
8941 {
8942  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8943  (__v8df)_mm512_cvtps_pd(__A),
8944  (__v8df)__W);
8945 }
8946 
8947 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8948 _mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
8949 {
8950  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8951  (__v8df)_mm512_cvtps_pd(__A),
8952  (__v8df)_mm512_setzero_pd());
8953 }
8954 
8955 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8956 _mm512_cvtpslo_pd (__m512 __A)
8957 {
8958  return (__m512d) _mm512_cvtps_pd(_mm512_castps512_ps256(__A));
8959 }
8960 
8961 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8962 _mm512_mask_cvtpslo_pd (__m512d __W, __mmask8 __U, __m512 __A)
8963 {
8964  return (__m512d) _mm512_mask_cvtps_pd(__W, __U, _mm512_castps512_ps256(__A));
8965 }
8966 
8967 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8968 _mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
8969 {
8970  return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
8971  (__v8df) __A,
8972  (__v8df) __W);
8973 }
8974 
8975 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8976 _mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
8977 {
8978  return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
8979  (__v8df) __A,
8980  (__v8df) _mm512_setzero_pd ());
8981 }
8982 
8983 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8984 _mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
8985 {
8986  return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
8987  (__v16sf) __A,
8988  (__v16sf) __W);
8989 }
8990 
8991 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8992 _mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
8993 {
8994  return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
8995  (__v16sf) __A,
8996  (__v16sf) _mm512_setzero_ps ());
8997 }
8998 
8999 static __inline__ void __DEFAULT_FN_ATTRS512
9000 _mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
9001 {
9002  __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
9003  (__mmask8) __U);
9004 }
9005 
9006 static __inline__ void __DEFAULT_FN_ATTRS512
9007 _mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
9008 {
9009  __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
9010  (__mmask8) __U);
9011 }
9012 
9013 static __inline__ void __DEFAULT_FN_ATTRS512
9014 _mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
9015 {
9016  __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
9017  (__mmask16) __U);
9018 }
9019 
9020 static __inline__ void __DEFAULT_FN_ATTRS512
9021 _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
9022 {
9023  __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
9024  (__mmask16) __U);
9025 }
9026 
9027 #define _mm_cvt_roundsd_ss(A, B, R) \
9028  (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9029  (__v2df)(__m128d)(B), \
9030  (__v4sf)_mm_undefined_ps(), \
9031  (__mmask8)-1, (int)(R))
9032 
9033 #define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) \
9034  (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9035  (__v2df)(__m128d)(B), \
9036  (__v4sf)(__m128)(W), \
9037  (__mmask8)(U), (int)(R))
9038 
9039 #define _mm_maskz_cvt_roundsd_ss(U, A, B, R) \
9040  (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9041  (__v2df)(__m128d)(B), \
9042  (__v4sf)_mm_setzero_ps(), \
9043  (__mmask8)(U), (int)(R))
9044 
9045 static __inline__ __m128 __DEFAULT_FN_ATTRS128
9046 _mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
9047 {
9048  return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
9049  (__v2df)__B,
9050  (__v4sf)__W,
9051  (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
9052 }
9053 
9054 static __inline__ __m128 __DEFAULT_FN_ATTRS128
9055 _mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B)
9056 {
9057  return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
9058  (__v2df)__B,
9059  (__v4sf)_mm_setzero_ps(),
9060  (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
9061 }
9062 
9063 #define _mm_cvtss_i32 _mm_cvtss_si32
9064 #define _mm_cvtsd_i32 _mm_cvtsd_si32
9065 #define _mm_cvti32_sd _mm_cvtsi32_sd
9066 #define _mm_cvti32_ss _mm_cvtsi32_ss
9067 #ifdef __x86_64__
9068 #define _mm_cvtss_i64 _mm_cvtss_si64
9069 #define _mm_cvtsd_i64 _mm_cvtsd_si64
9070 #define _mm_cvti64_sd _mm_cvtsi64_sd
9071 #define _mm_cvti64_ss _mm_cvtsi64_ss
9072 #endif
9073 
9074 #ifdef __x86_64__
9075 #define _mm_cvt_roundi64_sd(A, B, R) \
9076  (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
9077  (int)(R))
9078 
9079 #define _mm_cvt_roundsi64_sd(A, B, R) \
9080  (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
9081  (int)(R))
9082 #endif
9083 
9084 #define _mm_cvt_roundsi32_ss(A, B, R) \
9085  (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R))
9086 
9087 #define _mm_cvt_roundi32_ss(A, B, R) \
9088  (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R))
9089 
9090 #ifdef __x86_64__
9091 #define _mm_cvt_roundsi64_ss(A, B, R) \
9092  (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
9093  (int)(R))
9094 
9095 #define _mm_cvt_roundi64_ss(A, B, R) \
9096  (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
9097  (int)(R))
9098 #endif
9099 
9100 #define _mm_cvt_roundss_sd(A, B, R) \
9101  (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9102  (__v4sf)(__m128)(B), \
9103  (__v2df)_mm_undefined_pd(), \
9104  (__mmask8)-1, (int)(R))
9105 
9106 #define _mm_mask_cvt_roundss_sd(W, U, A, B, R) \
9107  (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9108  (__v4sf)(__m128)(B), \
9109  (__v2df)(__m128d)(W), \
9110  (__mmask8)(U), (int)(R))
9111 
9112 #define _mm_maskz_cvt_roundss_sd(U, A, B, R) \
9113  (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9114  (__v4sf)(__m128)(B), \
9115  (__v2df)_mm_setzero_pd(), \
9116  (__mmask8)(U), (int)(R))
9117 
9118 static __inline__ __m128d __DEFAULT_FN_ATTRS128
9119 _mm_mask_cvtss_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
9120 {
9121  return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
9122  (__v4sf)__B,
9123  (__v2df)__W,
9124  (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
9125 }
9126 
9127 static __inline__ __m128d __DEFAULT_FN_ATTRS128
9128 _mm_maskz_cvtss_sd (__mmask8 __U, __m128d __A, __m128 __B)
9129 {
9130  return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
9131  (__v4sf)__B,
9132  (__v2df)_mm_setzero_pd(),
9133  (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
9134 }
9135 
9136 static __inline__ __m128d __DEFAULT_FN_ATTRS128
9137 _mm_cvtu32_sd (__m128d __A, unsigned __B)
9138 {
9139  __A[0] = __B;
9140  return __A;
9141 }
9142 
9143 #ifdef __x86_64__
9144 #define _mm_cvt_roundu64_sd(A, B, R) \
9145  (__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \
9146  (unsigned long long)(B), (int)(R))
9147 
9148 static __inline__ __m128d __DEFAULT_FN_ATTRS128
9149 _mm_cvtu64_sd (__m128d __A, unsigned long long __B)
9150 {
9151  __A[0] = __B;
9152  return __A;
9153 }
9154 #endif
9155 
9156 #define _mm_cvt_roundu32_ss(A, B, R) \
9157  (__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \
9158  (int)(R))
9159 
9160 static __inline__ __m128 __DEFAULT_FN_ATTRS128
9161 _mm_cvtu32_ss (__m128 __A, unsigned __B)
9162 {
9163  __A[0] = __B;
9164  return __A;
9165 }
9166 
9167 #ifdef __x86_64__
9168 #define _mm_cvt_roundu64_ss(A, B, R) \
9169  (__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \
9170  (unsigned long long)(B), (int)(R))
9171 
9172 static __inline__ __m128 __DEFAULT_FN_ATTRS128
9173 _mm_cvtu64_ss (__m128 __A, unsigned long long __B)
9174 {
9175  __A[0] = __B;
9176  return __A;
9177 }
9178 #endif
9179 
9180 static __inline__ __m512i __DEFAULT_FN_ATTRS512
9181 _mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
9182 {
9183  return (__m512i) __builtin_ia32_selectd_512(__M,
9184  (__v16si) _mm512_set1_epi32(__A),
9185  (__v16si) __O);
9186 }
9187 
9188 static __inline__ __m512i __DEFAULT_FN_ATTRS512
9189 _mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
9190 {
9191  return (__m512i) __builtin_ia32_selectq_512(__M,
9192  (__v8di) _mm512_set1_epi64(__A),
9193  (__v8di) __O);
9194 }
9195 
9196 static __inline __m512i __DEFAULT_FN_ATTRS512
9197 _mm512_set_epi8 (char __e63, char __e62, char __e61, char __e60, char __e59,
9198  char __e58, char __e57, char __e56, char __e55, char __e54, char __e53,
9199  char __e52, char __e51, char __e50, char __e49, char __e48, char __e47,
9200  char __e46, char __e45, char __e44, char __e43, char __e42, char __e41,
9201  char __e40, char __e39, char __e38, char __e37, char __e36, char __e35,
9202  char __e34, char __e33, char __e32, char __e31, char __e30, char __e29,
9203  char __e28, char __e27, char __e26, char __e25, char __e24, char __e23,
9204  char __e22, char __e21, char __e20, char __e19, char __e18, char __e17,
9205  char __e16, char __e15, char __e14, char __e13, char __e12, char __e11,
9206  char __e10, char __e9, char __e8, char __e7, char __e6, char __e5,
9207  char __e4, char __e3, char __e2, char __e1, char __e0) {
9208 
9209  return __extension__ (__m512i)(__v64qi)
9210  {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
9211  __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
9212  __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
9213  __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31,
9214  __e32, __e33, __e34, __e35, __e36, __e37, __e38, __e39,
9215  __e40, __e41, __e42, __e43, __e44, __e45, __e46, __e47,
9216  __e48, __e49, __e50, __e51, __e52, __e53, __e54, __e55,
9217  __e56, __e57, __e58, __e59, __e60, __e61, __e62, __e63};
9218 }
9219 
9220 static __inline __m512i __DEFAULT_FN_ATTRS512
9221 _mm512_set_epi16(short __e31, short __e30, short __e29, short __e28,
9222  short __e27, short __e26, short __e25, short __e24, short __e23,
9223  short __e22, short __e21, short __e20, short __e19, short __e18,
9224  short __e17, short __e16, short __e15, short __e14, short __e13,
9225  short __e12, short __e11, short __e10, short __e9, short __e8,
9226  short __e7, short __e6, short __e5, short __e4, short __e3,
9227  short __e2, short __e1, short __e0) {
9228  return __extension__ (__m512i)(__v32hi)
9229  {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
9230  __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
9231  __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
9232  __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31 };
9233 }
9234 
9235 static __inline __m512i __DEFAULT_FN_ATTRS512
9236 _mm512_set_epi32 (int __A, int __B, int __C, int __D,
9237  int __E, int __F, int __G, int __H,
9238  int __I, int __J, int __K, int __L,
9239  int __M, int __N, int __O, int __P)
9240 {
9241  return __extension__ (__m512i)(__v16si)
9242  { __P, __O, __N, __M, __L, __K, __J, __I,
9243  __H, __G, __F, __E, __D, __C, __B, __A };
9244 }
9245 
9246 #define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \
9247  e8,e9,e10,e11,e12,e13,e14,e15) \
9248  _mm512_set_epi32((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6), \
9249  (e5),(e4),(e3),(e2),(e1),(e0))
9250 
9251 static __inline__ __m512i __DEFAULT_FN_ATTRS512
9252 _mm512_set_epi64 (long long __A, long long __B, long long __C,
9253  long long __D, long long __E, long long __F,
9254  long long __G, long long __H)
9255 {
9256  return __extension__ (__m512i) (__v8di)
9257  { __H, __G, __F, __E, __D, __C, __B, __A };
9258 }
9259 
9260 #define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \
9261  _mm512_set_epi64((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
9262 
9263 static __inline__ __m512d __DEFAULT_FN_ATTRS512
9264 _mm512_set_pd (double __A, double __B, double __C, double __D,
9265  double __E, double __F, double __G, double __H)
9266 {
9267  return __extension__ (__m512d)
9268  { __H, __G, __F, __E, __D, __C, __B, __A };
9269 }
9270 
9271 #define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \
9272  _mm512_set_pd((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
9273 
9274 static __inline__ __m512 __DEFAULT_FN_ATTRS512
9275 _mm512_set_ps (float __A, float __B, float __C, float __D,
9276  float __E, float __F, float __G, float __H,
9277  float __I, float __J, float __K, float __L,
9278  float __M, float __N, float __O, float __P)
9279 {
9280  return __extension__ (__m512)
9281  { __P, __O, __N, __M, __L, __K, __J, __I,
9282  __H, __G, __F, __E, __D, __C, __B, __A };
9283 }
9284 
9285 #define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
9286  _mm512_set_ps((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6),(e5), \
9287  (e4),(e3),(e2),(e1),(e0))
9288 
9289 static __inline__ __m512 __DEFAULT_FN_ATTRS512
9290 _mm512_abs_ps(__m512 __A)
9291 {
9292  return (__m512)_mm512_and_epi32(_mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
9293 }
9294 
9295 static __inline__ __m512 __DEFAULT_FN_ATTRS512
9296 _mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A)
9297 {
9298  return (__m512)_mm512_mask_and_epi32((__m512i)__W, __K, _mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
9299 }
9300 
9301 static __inline__ __m512d __DEFAULT_FN_ATTRS512
9302 _mm512_abs_pd(__m512d __A)
9303 {
9304  return (__m512d)_mm512_and_epi64(_mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A) ;
9305 }
9306 
9307 static __inline__ __m512d __DEFAULT_FN_ATTRS512
9308 _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A)
9309 {
9310  return (__m512d)_mm512_mask_and_epi64((__v8di)__W, __K, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A);
9311 }
9312 
9313 /* Vector-reduction arithmetic accepts vectors as inputs and produces scalars as
9314  * outputs. This class of vector operation forms the basis of many scientific
9315  * computations. In vector-reduction arithmetic, the evaluation off is
9316  * independent of the order of the input elements of V.
9317 
9318  * Used bisection method. At each step, we partition the vector with previous
9319  * step in half, and the operation is performed on its two halves.
9320  * This takes log2(n) steps where n is the number of elements in the vector.
9321  */
9322 
9323 #define _mm512_mask_reduce_operator(op) \
9324  __v4du __t1 = (__v4du)_mm512_extracti64x4_epi64(__W, 0); \
9325  __v4du __t2 = (__v4du)_mm512_extracti64x4_epi64(__W, 1); \
9326  __m256i __t3 = (__m256i)(__t1 op __t2); \
9327  __v2du __t4 = (__v2du)_mm256_extracti128_si256(__t3, 0); \
9328  __v2du __t5 = (__v2du)_mm256_extracti128_si256(__t3, 1); \
9329  __v2du __t6 = __t4 op __t5; \
9330  __v2du __t7 = __builtin_shufflevector(__t6, __t6, 1, 0); \
9331  __v2du __t8 = __t6 op __t7; \
9332  return __t8[0];
9333 
9334 static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_add_epi64(__m512i __W) {
9336 }
9337 
9338 static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_epi64(__m512i __W) {
9340 }
9341 
9342 static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_and_epi64(__m512i __W) {
9344 }
9345 
9346 static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_or_epi64(__m512i __W) {
9348 }
9349 
9350 static __inline__ long long __DEFAULT_FN_ATTRS512
9351 _mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W) {
9352  __W = _mm512_maskz_mov_epi64(__M, __W);
9354 }
9355 
9356 static __inline__ long long __DEFAULT_FN_ATTRS512
9357 _mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W) {
9358  __W = _mm512_mask_mov_epi64(_mm512_set1_epi64(1), __M, __W);
9360 }
9361 
9362 static __inline__ long long __DEFAULT_FN_ATTRS512
9363 _mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W) {
9364  __W = _mm512_mask_mov_epi64(_mm512_set1_epi64(~0ULL), __M, __W);
9366 }
9367 
9368 static __inline__ long long __DEFAULT_FN_ATTRS512
9369 _mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W) {
9370  __W = _mm512_maskz_mov_epi64(__M, __W);
9372 }
9373 #undef _mm512_mask_reduce_operator
9374 
9375 #define _mm512_mask_reduce_operator(op) \
9376  __m256d __t1 = _mm512_extractf64x4_pd(__W, 0); \
9377  __m256d __t2 = _mm512_extractf64x4_pd(__W, 1); \
9378  __m256d __t3 = __t1 op __t2; \
9379  __m128d __t4 = _mm256_extractf128_pd(__t3, 0); \
9380  __m128d __t5 = _mm256_extractf128_pd(__t3, 1); \
9381  __m128d __t6 = __t4 op __t5; \
9382  __m128d __t7 = __builtin_shufflevector(__t6, __t6, 1, 0); \
9383  __m128d __t8 = __t6 op __t7; \
9384  return __t8[0];
9385 
9386 static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_add_pd(__m512d __W) {
9388 }
9389 
9390 static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_pd(__m512d __W) {
9392 }
9393 
9394 static __inline__ double __DEFAULT_FN_ATTRS512
9395 _mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W) {
9396  __W = _mm512_maskz_mov_pd(__M, __W);
9398 }
9399 
9400 static __inline__ double __DEFAULT_FN_ATTRS512
9401 _mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W) {
9402  __W = _mm512_mask_mov_pd(_mm512_set1_pd(1.0), __M, __W);
9404 }
9405 #undef _mm512_mask_reduce_operator
9406 
9407 #define _mm512_mask_reduce_operator(op) \
9408  __v8su __t1 = (__v8su)_mm512_extracti64x4_epi64(__W, 0); \
9409  __v8su __t2 = (__v8su)_mm512_extracti64x4_epi64(__W, 1); \
9410  __m256i __t3 = (__m256i)(__t1 op __t2); \
9411  __v4su __t4 = (__v4su)_mm256_extracti128_si256(__t3, 0); \
9412  __v4su __t5 = (__v4su)_mm256_extracti128_si256(__t3, 1); \
9413  __v4su __t6 = __t4 op __t5; \
9414  __v4su __t7 = __builtin_shufflevector(__t6, __t6, 2, 3, 0, 1); \
9415  __v4su __t8 = __t6 op __t7; \
9416  __v4su __t9 = __builtin_shufflevector(__t8, __t8, 1, 0, 3, 2); \
9417  __v4su __t10 = __t8 op __t9; \
9418  return __t10[0];
9419 
9420 static __inline__ int __DEFAULT_FN_ATTRS512
9423 }
9424 
9425 static __inline__ int __DEFAULT_FN_ATTRS512
9428 }
9429 
9430 static __inline__ int __DEFAULT_FN_ATTRS512
9433 }
9434 
9435 static __inline__ int __DEFAULT_FN_ATTRS512
9438 }
9439 
9440 static __inline__ int __DEFAULT_FN_ATTRS512
9441 _mm512_mask_reduce_add_epi32( __mmask16 __M, __m512i __W) {
9442  __W = _mm512_maskz_mov_epi32(__M, __W);
9444 }
9445 
9446 static __inline__ int __DEFAULT_FN_ATTRS512
9447 _mm512_mask_reduce_mul_epi32( __mmask16 __M, __m512i __W) {
9448  __W = _mm512_mask_mov_epi32(_mm512_set1_epi32(1), __M, __W);
9450 }
9451 
9452 static __inline__ int __DEFAULT_FN_ATTRS512
9453 _mm512_mask_reduce_and_epi32( __mmask16 __M, __m512i __W) {
9454  __W = _mm512_mask_mov_epi32(_mm512_set1_epi32(~0U), __M, __W);
9456 }
9457 
9458 static __inline__ int __DEFAULT_FN_ATTRS512
9459 _mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W) {
9460  __W = _mm512_maskz_mov_epi32(__M, __W);
9462 }
9463 #undef _mm512_mask_reduce_operator
9464 
9465 #define _mm512_mask_reduce_operator(op) \
9466  __m256 __t1 = (__m256)_mm512_extractf64x4_pd((__m512d)__W, 0); \
9467  __m256 __t2 = (__m256)_mm512_extractf64x4_pd((__m512d)__W, 1); \
9468  __m256 __t3 = __t1 op __t2; \
9469  __m128 __t4 = _mm256_extractf128_ps(__t3, 0); \
9470  __m128 __t5 = _mm256_extractf128_ps(__t3, 1); \
9471  __m128 __t6 = __t4 op __t5; \
9472  __m128 __t7 = __builtin_shufflevector(__t6, __t6, 2, 3, 0, 1); \
9473  __m128 __t8 = __t6 op __t7; \
9474  __m128 __t9 = __builtin_shufflevector(__t8, __t8, 1, 0, 3, 2); \
9475  __m128 __t10 = __t8 op __t9; \
9476  return __t10[0];
9477 
9478 static __inline__ float __DEFAULT_FN_ATTRS512
9481 }
9482 
9483 static __inline__ float __DEFAULT_FN_ATTRS512
9486 }
9487 
9488 static __inline__ float __DEFAULT_FN_ATTRS512
9489 _mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W) {
9490  __W = _mm512_maskz_mov_ps(__M, __W);
9492 }
9493 
9494 static __inline__ float __DEFAULT_FN_ATTRS512
9495 _mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W) {
9496  __W = _mm512_mask_mov_ps(_mm512_set1_ps(1.0f), __M, __W);
9498 }
9499 #undef _mm512_mask_reduce_operator
9500 
9501 #define _mm512_mask_reduce_operator(op) \
9502  __m512i __t1 = (__m512i)__builtin_shufflevector((__v8di)__V, (__v8di)__V, 4, 5, 6, 7, 0, 1, 2, 3); \
9503  __m512i __t2 = _mm512_##op(__V, __t1); \
9504  __m512i __t3 = (__m512i)__builtin_shufflevector((__v8di)__t2, (__v8di)__t2, 2, 3, 0, 1, 6, 7, 4, 5); \
9505  __m512i __t4 = _mm512_##op(__t2, __t3); \
9506  __m512i __t5 = (__m512i)__builtin_shufflevector((__v8di)__t4, (__v8di)__t4, 1, 0, 3, 2, 5, 4, 7, 6); \
9507  __v8di __t6 = (__v8di)_mm512_##op(__t4, __t5); \
9508  return __t6[0];
9509 
9510 static __inline__ long long __DEFAULT_FN_ATTRS512
9512  _mm512_mask_reduce_operator(max_epi64);
9513 }
9514 
9515 static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
9517  _mm512_mask_reduce_operator(max_epu64);
9518 }
9519 
9520 static __inline__ long long __DEFAULT_FN_ATTRS512
9522  _mm512_mask_reduce_operator(min_epi64);
9523 }
9524 
9525 static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
9527  _mm512_mask_reduce_operator(min_epu64);
9528 }
9529 
9530 static __inline__ long long __DEFAULT_FN_ATTRS512
9531 _mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __V) {
9532  __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(-__LONG_LONG_MAX__ - 1LL), __M, __V);
9533  _mm512_mask_reduce_operator(max_epi64);
9534 }
9535 
9536 static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
9537 _mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V) {
9538  __V = _mm512_maskz_mov_epi64(__M, __V);
9539  _mm512_mask_reduce_operator(max_epu64);
9540 }
9541 
9542 static __inline__ long long __DEFAULT_FN_ATTRS512
9543 _mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __V) {
9544  __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(__LONG_LONG_MAX__), __M, __V);
9545  _mm512_mask_reduce_operator(min_epi64);
9546 }
9547 
9548 static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
9549 _mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V) {
9550  __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(~0ULL), __M, __V);
9551  _mm512_mask_reduce_operator(min_epu64);
9552 }
9553 #undef _mm512_mask_reduce_operator
9554 
9555 #define _mm512_mask_reduce_operator(op) \
9556  __m256i __t1 = _mm512_extracti64x4_epi64(__V, 0); \
9557  __m256i __t2 = _mm512_extracti64x4_epi64(__V, 1); \
9558  __m256i __t3 = _mm256_##op(__t1, __t2); \
9559  __m128i __t4 = _mm256_extracti128_si256(__t3, 0); \
9560  __m128i __t5 = _mm256_extracti128_si256(__t3, 1); \
9561  __m128i __t6 = _mm_##op(__t4, __t5); \
9562  __m128i __t7 = (__m128i)__builtin_shufflevector((__v4si)__t6, (__v4si)__t6, 2, 3, 0, 1); \
9563  __m128i __t8 = _mm_##op(__t6, __t7); \
9564  __m128i __t9 = (__m128i)__builtin_shufflevector((__v4si)__t8, (__v4si)__t8, 1, 0, 3, 2); \
9565  __v4si __t10 = (__v4si)_mm_##op(__t8, __t9); \
9566  return __t10[0];
9567 
9568 static __inline__ int __DEFAULT_FN_ATTRS512
9570  _mm512_mask_reduce_operator(max_epi32);
9571 }
9572 
9573 static __inline__ unsigned int __DEFAULT_FN_ATTRS512
9575  _mm512_mask_reduce_operator(max_epu32);
9576 }
9577 
9578 static __inline__ int __DEFAULT_FN_ATTRS512
9580  _mm512_mask_reduce_operator(min_epi32);
9581 }
9582 
9583 static __inline__ unsigned int __DEFAULT_FN_ATTRS512
9585  _mm512_mask_reduce_operator(min_epu32);
9586 }
9587 
9588 static __inline__ int __DEFAULT_FN_ATTRS512
9589 _mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __V) {
9590  __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(-__INT_MAX__ - 1), __M, __V);
9591  _mm512_mask_reduce_operator(max_epi32);
9592 }
9593 
9594 static __inline__ unsigned int __DEFAULT_FN_ATTRS512
9595 _mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V) {
9596  __V = _mm512_maskz_mov_epi32(__M, __V);
9597  _mm512_mask_reduce_operator(max_epu32);
9598 }
9599 
9600 static __inline__ int __DEFAULT_FN_ATTRS512
9601 _mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __V) {
9602  __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(__INT_MAX__), __M, __V);
9603  _mm512_mask_reduce_operator(min_epi32);
9604 }
9605 
9606 static __inline__ unsigned int __DEFAULT_FN_ATTRS512
9607 _mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V) {
9608  __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(~0U), __M, __V);
9609  _mm512_mask_reduce_operator(min_epu32);
9610 }
9611 #undef _mm512_mask_reduce_operator
9612 
9613 #define _mm512_mask_reduce_operator(op) \
9614  __m256d __t1 = _mm512_extractf64x4_pd(__V, 0); \
9615  __m256d __t2 = _mm512_extractf64x4_pd(__V, 1); \
9616  __m256d __t3 = _mm256_##op(__t1, __t2); \
9617  __m128d __t4 = _mm256_extractf128_pd(__t3, 0); \
9618  __m128d __t5 = _mm256_extractf128_pd(__t3, 1); \
9619  __m128d __t6 = _mm_##op(__t4, __t5); \
9620  __m128d __t7 = __builtin_shufflevector(__t6, __t6, 1, 0); \
9621  __m128d __t8 = _mm_##op(__t6, __t7); \
9622  return __t8[0];
9623 
9624 static __inline__ double __DEFAULT_FN_ATTRS512
9625 _mm512_reduce_max_pd(__m512d __V) {
9627 }
9628 
9629 static __inline__ double __DEFAULT_FN_ATTRS512
9630 _mm512_reduce_min_pd(__m512d __V) {
9632 }
9633 
9634 static __inline__ double __DEFAULT_FN_ATTRS512
9635 _mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V) {
9636  __V = _mm512_mask_mov_pd(_mm512_set1_pd(-__builtin_inf()), __M, __V);
9638 }
9639 
9640 static __inline__ double __DEFAULT_FN_ATTRS512
9641 _mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V) {
9642  __V = _mm512_mask_mov_pd(_mm512_set1_pd(__builtin_inf()), __M, __V);
9644 }
9645 #undef _mm512_mask_reduce_operator
9646 
9647 #define _mm512_mask_reduce_operator(op) \
9648  __m256 __t1 = (__m256)_mm512_extractf64x4_pd((__m512d)__V, 0); \
9649  __m256 __t2 = (__m256)_mm512_extractf64x4_pd((__m512d)__V, 1); \
9650  __m256 __t3 = _mm256_##op(__t1, __t2); \
9651  __m128 __t4 = _mm256_extractf128_ps(__t3, 0); \
9652  __m128 __t5 = _mm256_extractf128_ps(__t3, 1); \
9653  __m128 __t6 = _mm_##op(__t4, __t5); \
9654  __m128 __t7 = __builtin_shufflevector(__t6, __t6, 2, 3, 0, 1); \
9655  __m128 __t8 = _mm_##op(__t6, __t7); \
9656  __m128 __t9 = __builtin_shufflevector(__t8, __t8, 1, 0, 3, 2); \
9657  __m128 __t10 = _mm_##op(__t8, __t9); \
9658  return __t10[0];
9659 
9660 static __inline__ float __DEFAULT_FN_ATTRS512
9663 }
9664 
9665 static __inline__ float __DEFAULT_FN_ATTRS512
9668 }
9669 
9670 static __inline__ float __DEFAULT_FN_ATTRS512
9671 _mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V) {
9672  __V = _mm512_mask_mov_ps(_mm512_set1_ps(-__builtin_inff()), __M, __V);
9674 }
9675 
9676 static __inline__ float __DEFAULT_FN_ATTRS512
9677 _mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V) {
9678  __V = _mm512_mask_mov_ps(_mm512_set1_ps(__builtin_inff()), __M, __V);
9680 }
9681 #undef _mm512_mask_reduce_operator
9682 
9683 #undef __DEFAULT_FN_ATTRS512
9684 #undef __DEFAULT_FN_ATTRS128
9685 #undef __DEFAULT_FN_ATTRS
9686 
9687 #endif /* __AVX512FINTRIN_H */
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_max_ps(__m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_load_epi64(__mmask8 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_abs_epi32(__m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_sub_pd(__m512d __a, __m512d __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_max_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_epi32(__mmask16 __U, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epu64(__m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
#define _mm512_cmpeq_epi32_mask(A, B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_ss(float *__W, __mmask8 __U, __m128 __A)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_reduce_max_epu32(__m512i __V)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mul_pd(__m512d __a, __m512d __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_rolv_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_max_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_getexp_sd(__m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_min_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_set1_epi32(__m512i __O, __mmask16 __M, int __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kor(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_rorv_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mul_epi32(__m512i __X, __m512i __Y)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_pd(void *__P, __m512d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_setzero_ps(void)
Constructs a 256-bit floating-point vector of [8 x float] with all vector elements initialized to zer...
Definition: avxintrin.h:4278
static __inline __m256 __DEFAULT_FN_ATTRS512 _mm512_castps512_ps256(__m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_min_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_load_epi32(__mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_epi64(__mmask8 __U, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_load_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __V)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_epu32(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sra_epi32(__m512i __A, __m128i __B)
struct __storeu_i16 *__P __v
Definition: immintrin.h:330
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_unpackhi_pd(__m512d __a, __m512d __b)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_pd(__m512d __W)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_sqrt_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_epi32(__mmask16 __U, __m512i __A)
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttsd_i32(__m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kunpackb(__mmask16 __A, __mmask16 __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castps_pd(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srl_epi32(__m512i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi8(__mmask8 __M, __m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_unpacklo_pd(__m512d __a, __m512d __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_epi32(void *__P, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A)
#define __DEFAULT_FN_ATTRS512
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
static __inline __m128d __DEFAULT_FN_ATTRS512 _mm512_castpd512_pd128(__m512d __a)
static __inline __m256i __DEFAULT_FN_ATTRS512 _mm512_cvttpd_epi32(__m512d __a)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_permutex2var_pd(__m512d __A, __mmask8 __U, __m512i __I, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_broadcast_i32x4(__m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_rsqrt14_pd(__m512d __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_max_epi64(__m512i __V)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_zextps256_ps512(__m256 __a)
Constructs a 512-bit floating-point vector of [16 x float] from a 256-bit floating-point vector of [8...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_sub_ss(__m128 __a, __m128 __b)
Subtracts the 32-bit float value in the low-order bits of the second operand from the corresponding v...
Definition: xmmintrin.h:104
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_ceil_pd(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_compress_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_ps(__m512 __W)
static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_ps(__m256 __W, __mmask8 __U, __m512d __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kxnor(__mmask16 __A, __mmask16 __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_mask_test_epi64_mask(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sllv_epi32(__m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_pd(__mmask8 __U, __m512d __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epu64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epu32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set4_epi32(int __A, int __B, int __C, int __D)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_permutevar_ps(__m512 __A, __m512i __C)
unsigned char __mmask8
Definition: avx512fintrin.h:47
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_mov_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sub_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_pd(__m256i __A)
#define _mm512_cmpneq_epi32_mask(A, B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srli_epi64(__m512i __A, int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mov_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_epu32(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcastq_epi64(__m512i __O, __mmask8 __M, __m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi32_epi8(__mmask16 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sub_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutex2var_epi64(__m512i __A, __mmask8 __U, __m512i __I, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_sqrt_ps(__mmask16 __U, __m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_castpd_si512(__m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set_epi8(char __e63, char __e62, char __e61, char __e60, char __e59, char __e58, char __e57, char __e56, char __e55, char __e54, char __e53, char __e52, char __e51, char __e50, char __e49, char __e48, char __e47, char __e46, char __e45, char __e44, char __e43, char __e42, char __e41, char __e40, char __e39, char __e38, char __e37, char __e36, char __e35, char __e34, char __e33, char __e32, char __e31, char __e30, char __e29, char __e28, char __e27, char __e26, char __e25, char __e24, char __e23, char __e22, char __e21, char __e20, char __e19, char __e18, char __e17, char __e16, char __e15, char __e14, char __e13, char __e12, char __e11, char __e10, char __e9, char __e8, char __e7, char __e6, char __e5, char __e4, char __e3, char __e2, char __e1, char __e0)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi32(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_epi16(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_min_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
#define _MM_FROUND_CEIL
Definition: smmintrin.h:44
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttpd_epu32(__mmask8 __U, __m512d __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_add_ps(__m512 __a, __m512 __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
static __inline __m128 __DEFAULT_FN_ATTRS512 _mm512_castps512_ps128(__m512 __a)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtph_ps(__m512 __W, __mmask16 __U, __m256i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_add_pd(__m512d __W)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _cvtu32_mask16(unsigned int __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_unpacklo_epi32(__m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_castps_si512(__m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_moveldup_ps(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi16(__m512i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_max_pd(__m512d __V)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_rorv_epi64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srlv_epi32(__m512i __X, __m512i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ unsigned char __DEFAULT_FN_ATTRS _kortestc_mask16_u8(__mmask16 __A, __mmask16 __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_set4_pd(double __A, double __B, double __C, double __D)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srai_epi64(__m512i __A, int __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_rsqrt14_pd(__mmask8 __U, __m512d __A)
static __inline__ void short __D
Definition: immintrin.h:326
static __inline__ int __DEFAULT_FN_ATTRS _mm512_kortestz(__mmask16 __A, __mmask16 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu32_pd(__m512d __W, __mmask8 __U, __m256i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_stream_load_si512(void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_sqrt_ps(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_rolv_epi32(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_and_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sub_epi64(__m512i __A, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_zextpd128_pd512(__m128d __a)
Constructs a 512-bit floating-point vector of [8 x double] from a 128-bit floating-point vector of [2...
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_load_sd(__m128d __W, __mmask8 __U, const double *__A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi8(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
static __inline__ void const void * __src
Definition: movdirintrin.h:59
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_abs_epi32(__mmask16 __U, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_broadcastsd_pd(__m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sll_epi32(__m512i __A, __m128i __B)
static __inline __m256d __DEFAULT_FN_ATTRS512 _mm512_castpd512_pd256(__m512d __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_min_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_epi32(__m512i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpslo_pd(__m512d __W, __mmask8 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_mul_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_max_pd(__m512d __A, __m512d __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_mask_test_epi32_mask(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_max_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_and_epi32(__m512i __a, __m512i __b)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_setzero_ps(void)
static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_cvtpd_ps(__m512d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_mul_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask2_permutex2var_pd(__m512d __A, __m512i __I, __mmask8 __U, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_epi32(__mmask16 __U, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttps_epi32(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sll_epi64(__m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_add_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srlv_epi64(__m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_undefined_ps(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_set1_epi64(__m512i __O, __mmask8 __M, long long __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_min_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_ps(__mmask16 __U, __m512 __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_setzero_pd(void)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set4_epi64(long long __A, long long __B, long long __C, long long __D)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_min_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epi64(__m512i __A, __m512i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kand(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtps_epi32(__m512 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_scalef_sd(__m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_sqrt_pd(__m512d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_abs_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttpd_epi32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_si512(void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_moveldup_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtps_pd(__m256 __A)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_add_ps(__m512 __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_rcp14_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttps_epu32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_min_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
#define _mm512_mask_cmpneq_epi32_mask(k, A, B)
static __inline__ void __DEFAULT_FN_ATTRS _store_mask16(__mmask16 *__A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_min_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
static __inline __m128i __DEFAULT_FN_ATTRS512 _mm512_castsi512_si128(__m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_add_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi32_epi8(__mmask16 __M, __m512i __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __V)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_add_sd(__m128d __a, __m128d __b)
Adds lower double-precision values in both operands and returns the sum in the lower 64 bits of the r...
Definition: emmintrin.h:67
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_rolv_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepu32_epi64(__m256i __X)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_epi64(__mmask8 __U, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_floor_ps(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_abs_pd(__m512d __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_min_epi64(__m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mov_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castsi512_pd(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_abs_epi64(__m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_reduce_max_epu64(__m512i __V)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_move_ss(__m128 __a, __m128 __b)
Constructs a 128-bit floating-point vector of [4 x float].
Definition: xmmintrin.h:2672
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_load_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_permutevar_pd(__m512d __A, __m512i __C)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_max_ps(__m512 __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_ceil_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_ps(__m512 __W, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_permutexvar_epi64(__m512i __X, __m512i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rcp14_sd(__m128d __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_compress_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_epi64(void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_undefined(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srai_epi32(__m512i __A, int __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_max_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_slli_epi32(__m512i __A, int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, int __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_undefined_si256(void)
Create a 256-bit integer vector with undefined values.
Definition: avxintrin.h:3604
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_epi32(__mmask16 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcastss_ps(__mmask16 __M, __m128 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtps_epu32(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcastd_epi32(__m512i __O, __mmask16 __M, __m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_permutexvar_pd(__m512i __X, __m512d __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
static __inline__ void int __a
Definition: emmintrin.h:4204
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_ps(void *__P, __m512 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi32_epi16(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_abs_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_pd(__mmask8 __U, __m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_setzero_si512(void)
#define _mm512_cmpeq_epi64_mask(A, B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_unpackhi_epi64(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_compress_epi64(__m512i __W, __mmask8 __U, __m512i __A)
_MM_MANTISSA_NORM_ENUM
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttps_epi32(__m512 __a)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_sub_ps(__m512 __a, __m512 __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_permutex2var_pd(__mmask8 __U, __m512d __A, __m512i __I, __m512d __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtepu32_pd(__m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_scalef_ps(__m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_expand_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi16(__m512i __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_mask_testn_epi32_mask(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sllv_epi64(__m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_compress_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_min_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epu32(__m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mul_ss(__m128 __a, __m128 __b)
Multiplies two 32-bit float values in the low-order bits of the operands.
Definition: xmmintrin.h:147
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepi8_epi64(__m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, int __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_floor_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_add_epi64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_add_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_unpacklo_ps(__m512 __a, __m512 __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_set_pd(double __A, double __B, double __C, double __D, double __E, double __F, double __G, double __H)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epi64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_rcp14_ps(__mmask16 __U, __m512 __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_div_ps(__m512 __a, __m512 __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
#define _mm512_mask_reduce_operator(op)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_or_epi32(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_epi32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_store_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
static __inline unsigned char unsigned int unsigned int unsigned int * __p
Definition: adxintrin.h:38
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_move_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_permutexvar_ps(__m512i __X, __m512 __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_moveldup_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_rolv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 __U, __m512i __B)
static __inline __mmask16 __DEFAULT_FN_ATTRS _mm512_knot(__mmask16 __M)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_epi64(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_ps(__mmask16 __U, __m512 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi32(__mmask8 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epi32(__m512i __A, __m512i __B)
static __inline __mmask16 __DEFAULT_FN_ATTRS512 _mm512_test_epi32_mask(__m512i __A, __m512i __B)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_reduce_min_epu64(__m512i __V)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rsqrt14_sd(__m128d __A, __m128d __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_min_ps(__m512 __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P)
_MM_CMPINT_ENUM
Definition: avx512fintrin.h:58
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_ceil_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_getexp_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_epi32(void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_setzero_ps(void)
Constructs a 128-bit floating-point vector of [4 x float] initialized to zero.
Definition: xmmintrin.h:1915
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_getexp_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutex2var_epi32(__m512i __A, __mmask16 __U, __m512i __I, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtpd_epi32(__m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttss_i32(__m128 __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_sd(double *__W, __mmask8 __U, __m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepu16_epi32(__m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setzero_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] initialized to zero.
Definition: emmintrin.h:1922
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_abs_epi64(__mmask8 __U, __m512i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_undefined_ps(void)
Create a 256-bit vector of [8 x float] with undefined values.
Definition: avxintrin.h:3591
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_scalef_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_epi32(void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_epi64(__mmask8 __U, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_div_sd(__m128d __a, __m128d __b)
Divides the lower double-precision value of the first operand by the lower double-precision value of ...
Definition: emmintrin.h:192
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_move_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtpd_epu32(__m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ unsigned int __DEFAULT_FN_ATTRS _cvtmask16_u32(__mmask16 __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kandn(__mmask16 __A, __mmask16 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_getexp_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rsqrt14_ss(__m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mullox_epi64(__m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi32(__mmask8 __M, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepu8_epi64(__m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_pd(__mmask8 __U, __m512i __X, __m512d __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_ps(__mmask16 __U, __m512i __X, __m512 __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_broadcast_f32x4(__m128 __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castpd_ps(__m512d __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_testn_epi32_mask(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_pd(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_movedup_pd(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_rorv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_zextpd256_pd512(__m256d __a)
Constructs a 512-bit floating-point vector of [8 x double] from a 256-bit floating-point vector of [4...
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsd_ss(__mmask8 __U, __m128 __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepu8_epi32(__m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepi16_epi32(__m256i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_cvtss_f32(__m512 __a)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epu64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_andnot_epi32(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_f64x4(__mmask8 __M, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi32(__mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
static __inline__ unsigned char __DEFAULT_FN_ATTRS _kortestz_mask16_u8(__mmask16 __A, __mmask16 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_abs_ps(__m512 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_div_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_set_ps(float __A, float __B, float __C, float __D, float __E, float __F, float __G, float __H, float __I, float __J, float __K, float __L, float __M, float __N, float __O, float __P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_pd(__m512d __W, __mmask8 __U, __m256i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcastsd_pd(__mmask8 __M, __m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_pd(void *__P, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_move_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srli_epi32(__m512i __A, int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_xor_epi32(__m512i __a, __m512i __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtepi32lo_pd(__m512i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_max_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_si512(__m512i *__P, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtpd_pslo(__m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_rsqrt14_ps(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_int2mask(int __a)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set1_epi32(int __s)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi8(__mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, int __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_broadcast_f64x4(__m256d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_permutexvar_epi32(__m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_si512(void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_move_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask2_permutex2var_epi32(__m512i __A, __m512i __I, __mmask16 __U, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi32(__m512i __A)
static __inline__ vector float vector float __b
Definition: altivec.h:534
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_div_ss(__m128 __a, __m128 __b)
Divides the value in the low-order 32 bits of the first operand by the corresponding value in the sec...
Definition: xmmintrin.h:189
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __V)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set1_epi8(char __w)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtph_ps(__mmask16 __U, __m256i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_ps(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_broadcast_i64x4(__m256i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castpd256_pd512(__m256d __a)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtss_sd(__mmask8 __U, __m128d __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kxor(__mmask16 __A, __mmask16 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_load_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_min_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_mov_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtepu32lo_pd(__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_min_ps(__m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_epi64(void const *__P)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_epi64(void *__P, __m512i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_pd(double *__P, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_add_epi32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_scalef_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mul_sd(__m128d __a, __m128d __b)
Multiplies lower double-precision values in both operands and returns the product in the lower 64 bit...
Definition: emmintrin.h:150
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_scalef_pd(__m512d __A, __m512d __B)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_add_epi64(__m512i __W)
_MM_PERM_ENUM
Definition: avx512fintrin.h:70
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srl_epi64(__m512i __A, __m128i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_getexp_ps(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_and_epi64(__m512i __a, __m512i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sub_sd(__m128d __a, __m128d __b)
Subtracts the lower double-precision value of the second operand from the lower double-precision valu...
Definition: emmintrin.h:109
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_getexp_ss(__m128 __A, __m128 __B)
#define __DEFAULT_FN_ATTRS128
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mul_epu32(__m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_broadcastss_ps(__m128 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_andnot_si512(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_or_si512(__m512i __a, __m512i __b)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_ceil_ps(__m512 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtsd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_rolv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_reduce_min_epu32(__m512i __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline __mmask8 __DEFAULT_FN_ATTRS512 _mm512_test_epi64_mask(__m512i __A, __m512i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _load_mask16(__mmask16 *__A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_rcp14_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_set1_pd(double __w)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvttss_u32(__m128 __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_load_pd(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_load_sd(__mmask8 __U, const double *__A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_castsi128_si512(__m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epi32(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_castps128_ps512(__m128 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_xor_epi64(__m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_epi32(__mmask8 __U, __m512d __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi8(__mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_permutex2var_ps(__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_undefined_epi32(void)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_epi64(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_ps(__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_broadcastd_epi32(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttpd_epu32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_load_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_load_ps(__mmask16 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_and_epi64(__m512i __W)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttps_epi32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_rsqrt14_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_epi32(__m512i __W, __mmask16 __M, __m512i __X, __m512i __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ int __DEFAULT_FN_ATTRS _mm512_mask2int(__mmask16 __a)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu32_ps(__mmask16 __U, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epi64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srav_epi32(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, int __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttps_epu32(__m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvtss_u32(__m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_min_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_min_pd(__m512d __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_undefined_pd(void)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_pd(__mmask8 __U, __m256i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_movedup_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_getexp_pd(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_max_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_expand_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_ps(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_epu32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_rolv_epi64(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_getexp_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_floor_pd(__m512d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
Definition: emmintrin.h:3996
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_max_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvttsd_u32(__m128d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_sqrt_pd(__mmask8 __U, __m512d __A)
#define _MM_FROUND_CUR_DIRECTION
Definition: avx512fintrin.h:55
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_getexp_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srav_epi64(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_min_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_max_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_max_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_scalef_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_max_epi32(__m512i __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epu32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_and_epi32(__m512i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_xor_si512(__m512i __a, __m512i __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_broadcastsd_pd(__m512d __O, __mmask8 __M, __m128d __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castsi512_ps(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu32_pd(__mmask8 __U, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_mul_sd(__mmask8 __U, __m128d __A, __m128d __B)
_MM_MANTISSA_SIGN_ENUM
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kmov(__mmask16 __A)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epu32(__m512i __A, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_div_pd(__m512d __a, __m512d __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_movehdup_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castps256_ps512(__m256 __a)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rcp14_ss(__m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_permutex2var_epi32(__mmask16 __U, __m512i __A, __m512i __I, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_broadcastq_epi64(__m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline __m256i __DEFAULT_FN_ATTRS512 _mm512_castsi512_si256(__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_load_ss(__m128 __W, __mmask8 __U, const float *__A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_rorv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepu16_epi64(__m128i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_zextsi256_si512(__m256i __a)
Constructs a 512-bit integer vector from a 256-bit integer vector.
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_zextsi128_si512(__m128i __a)
Constructs a 512-bit integer vector from a 128-bit integer vector.
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_epi64(__m256i __X)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_epi8(__m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi32(__m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_epu32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_set_epi64(long long __A, long long __B, long long __C, long long __D, long long __E, long long __F, long long __G, long long __H)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_epi64(__m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcastd_epi32(__mmask16 __M, __m128i __A)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvtsd_u32(__m128d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_broadcastss_ps(__m512 __O, __mmask16 __M, __m128 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi32_epi16(__m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_unpackhi_ps(__m512 __a, __m512 __b)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtph_ps(__m256i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_cvtsd_f64(__m512d __a)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_ps(__mmask16 __U, __m512i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_store_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epu64(__m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_scalef_ss(__m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void)
Generates a 128-bit vector of [4 x i32] with unspecified content.
Definition: emmintrin.h:3606
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi32_epi8(__m512i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_add_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_unpackhi_epi32(__m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtu32_sd(__m128d __A, unsigned __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_floor_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_rcp14_pd(__m512d __A)
static __inline__ unsigned char __DEFAULT_FN_ATTRS _kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expand_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_zextps128_ps512(__m128 __a)
Constructs a 512-bit floating-point vector of [16 x float] from a 128-bit floating-point vector of [4...
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_and_si512(__m512i __a, __m512i __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_max_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_move_sd(__m128d __a, __m128d __b)
Constructs a 128-bit floating-point vector of [2 x double].
Definition: emmintrin.h:1943
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_ps(void *__P, __m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set_epi16(short __e31, short __e30, short __e29, short __e28, short __e27, short __e26, short __e25, short __e24, short __e23, short __e22, short __e21, short __e20, short __e19, short __e18, short __e17, short __e16, short __e15, short __e14, short __e13, short __e12, short __e11, short __e10, short __e9, short __e8, short __e7, short __e6, short __e5, short __e4, short __e3, short __e2, short __e1, short __e0)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_pd(__m512d __W, __mmask8 __U, __m256 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_rorv_epi32(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_ps(__mmask16 __U, __m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mullo_epi32(__m512i __A, __m512i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi16(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_div_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_unpacklo_epi64(__m512i __A, __m512i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_movedup_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_pd(__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi32_epi8(__m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_set1_epi32(__mmask16 __M, int __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_max_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
#define __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_rsqrt14_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_add_epi32(__m512i __W)
#define _mm512_mask_cmpeq_epi32_mask(k, A, B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttpd_epi32(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sra_epi64(__m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_max_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
Definition: avxintrin.h:4291
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_load_ss(__mmask8 __U, const float *__A)
#define _mm512_mask_cmpeq_epi64_mask(k, A, B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_add_pd(__m512d __a, __m512d __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_div_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu32_ps(__m512 __W, __mmask16 __U, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_add_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_rsqrt14_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi8(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_min_pd(__m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtepu32_ps(__m512i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_epi64(void *__P, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_pslo(__m512 __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepi8_epi32(__m128i __A)
unsigned short __mmask16
Definition: avx512fintrin.h:48
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sub_epi32(__m512i __A, __m512i __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_or_epi32(__m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expand_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_epi32(__mmask16 __M, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set1_epi16(short __w)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_rcp14_ps(__m512 __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_loadu_pd(void const *__p)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_min_epi32(__m512i __V)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set_epi32(int __A, int __B, int __C, int __D, int __E, int __F, int __G, int __H, int __I, int __J, int __K, int __L, int __M, int __N, int __O, int __P)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_set4_ps(float __A, float __B, float __C, float __D)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_slli_epi64(__m512i __A, int __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_scalef_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W)
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_setzero_pd(void)
Constructs a 256-bit floating-point vector of [4 x double] with all vector elements initialized to ze...
Definition: avxintrin.h:4264
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_or_epi64(__m512i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvttpd_epu32(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_or_epi64(__m512i __a, __m512i __b)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_epi32(void *__P, __m512i __A)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_mask_testn_epi64_mask(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_scalef_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_scalef_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_epi8(__mmask16 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtpslo_pd(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_andnot_epi64(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_div_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_movehdup_ps(__m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_loadu_ps(void const *__p)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepi16_epi64(__m128i __A)
#define _mm512_mask_cmpneq_epi64_mask(k, A, B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
#define _mm512_cmpneq_epi64_mask(A, B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_ps(float *__P, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttps_epu32(__mmask16 __U, __m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_movehdup_ps(__mmask16 __U, __m512 __A)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi8(__m512i __A)
char __v64qi __attribute__((__vector_size__(64)))
Definition: avx512fintrin.h:30
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set1_epi64(long long __d)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_add_ss(__m128 __a, __m128 __b)
Adds the 32-bit float values in the low-order bits of the operands.
Definition: xmmintrin.h:62
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_testn_epi64_mask(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_castpd128_pd512(__m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_castsi256_si512(__m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_load_ps(void const *__p)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_set1_ps(float __w)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_load_pd(void const *__p)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_mul_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_epi32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask2_permutex2var_ps(__m512 __A, __m512i __I, __mmask16 __U, __m512 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B)
#define _MM_FROUND_FLOOR
Definition: smmintrin.h:43
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_si512(void *__P, __m512i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_si512(void *__P, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtss_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epi64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtu32_ss(__m128 __A, unsigned __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_rcp14_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_getexp_ps(__mmask16 __U, __m512 __A)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_rorv_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
static __inline__ int __DEFAULT_FN_ATTRS _mm512_kortestc(__mmask16 __A, __mmask16 __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mul_ps(__m512 __a, __m512 __b)