clang  8.0.0
CGBuiltin.cpp
Go to the documentation of this file.
1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit Builtin calls as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCXXABI.h"
15 #include "CGObjCRuntime.h"
16 #include "CGOpenCLRuntime.h"
17 #include "CGRecordLayout.h"
18 #include "CodeGenFunction.h"
19 #include "CodeGenModule.h"
20 #include "ConstantEmitter.h"
21 #include "TargetInfo.h"
22 #include "clang/AST/ASTContext.h"
23 #include "clang/AST/Decl.h"
24 #include "clang/AST/OSLog.h"
26 #include "clang/Basic/TargetInfo.h"
28 #include "llvm/ADT/SmallPtrSet.h"
29 #include "llvm/ADT/StringExtras.h"
30 #include "llvm/IR/CallSite.h"
31 #include "llvm/IR/DataLayout.h"
32 #include "llvm/IR/InlineAsm.h"
33 #include "llvm/IR/Intrinsics.h"
34 #include "llvm/IR/MDBuilder.h"
35 #include "llvm/Support/ConvertUTF.h"
36 #include "llvm/Support/ScopedPrinter.h"
37 #include "llvm/Support/TargetParser.h"
38 #include <sstream>
39 
40 using namespace clang;
41 using namespace CodeGen;
42 using namespace llvm;
43 
44 static
45 int64_t clamp(int64_t Value, int64_t Low, int64_t High) {
46  return std::min(High, std::max(Low, Value));
47 }
48 
49 /// getBuiltinLibFunction - Given a builtin id for a function like
50 /// "__builtin_fabsf", return a Function* for "fabsf".
52  unsigned BuiltinID) {
53  assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
54 
55  // Get the name, skip over the __builtin_ prefix (if necessary).
56  StringRef Name;
57  GlobalDecl D(FD);
58 
59  // If the builtin has been declared explicitly with an assembler label,
60  // use the mangled name. This differs from the plain label on platforms
61  // that prefix labels.
62  if (FD->hasAttr<AsmLabelAttr>())
63  Name = getMangledName(D);
64  else
65  Name = Context.BuiltinInfo.getName(BuiltinID) + 10;
66 
67  llvm::FunctionType *Ty =
68  cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
69 
70  return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
71 }
72 
73 /// Emit the conversions required to turn the given value into an
74 /// integer of the given size.
76  QualType T, llvm::IntegerType *IntType) {
77  V = CGF.EmitToMemory(V, T);
78 
79  if (V->getType()->isPointerTy())
80  return CGF.Builder.CreatePtrToInt(V, IntType);
81 
82  assert(V->getType() == IntType);
83  return V;
84 }
85 
87  QualType T, llvm::Type *ResultType) {
88  V = CGF.EmitFromMemory(V, T);
89 
90  if (ResultType->isPointerTy())
91  return CGF.Builder.CreateIntToPtr(V, ResultType);
92 
93  assert(V->getType() == ResultType);
94  return V;
95 }
96 
97 /// Utility to insert an atomic instruction based on Intrinsic::ID
98 /// and the expression node.
100  CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E,
101  AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
102  QualType T = E->getType();
103  assert(E->getArg(0)->getType()->isPointerType());
104  assert(CGF.getContext().hasSameUnqualifiedType(T,
105  E->getArg(0)->getType()->getPointeeType()));
106  assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
107 
108  llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
109  unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
110 
111  llvm::IntegerType *IntType =
112  llvm::IntegerType::get(CGF.getLLVMContext(),
113  CGF.getContext().getTypeSize(T));
114  llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
115 
116  llvm::Value *Args[2];
117  Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
118  Args[1] = CGF.EmitScalarExpr(E->getArg(1));
119  llvm::Type *ValueType = Args[1]->getType();
120  Args[1] = EmitToInt(CGF, Args[1], T, IntType);
121 
122  llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
123  Kind, Args[0], Args[1], Ordering);
124  return EmitFromInt(CGF, Result, T, ValueType);
125 }
126 
128  Value *Val = CGF.EmitScalarExpr(E->getArg(0));
129  Value *Address = CGF.EmitScalarExpr(E->getArg(1));
130 
131  // Convert the type of the pointer to a pointer to the stored type.
132  Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
133  Value *BC = CGF.Builder.CreateBitCast(
134  Address, llvm::PointerType::getUnqual(Val->getType()), "cast");
135  LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType());
136  LV.setNontemporal(true);
137  CGF.EmitStoreOfScalar(Val, LV, false);
138  return nullptr;
139 }
140 
142  Value *Address = CGF.EmitScalarExpr(E->getArg(0));
143 
144  LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType());
145  LV.setNontemporal(true);
146  return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
147 }
148 
150  llvm::AtomicRMWInst::BinOp Kind,
151  const CallExpr *E) {
152  return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
153 }
154 
155 /// Utility to insert an atomic instruction based Intrinsic::ID and
156 /// the expression node, where the return value is the result of the
157 /// operation.
159  llvm::AtomicRMWInst::BinOp Kind,
160  const CallExpr *E,
161  Instruction::BinaryOps Op,
162  bool Invert = false) {
163  QualType T = E->getType();
164  assert(E->getArg(0)->getType()->isPointerType());
165  assert(CGF.getContext().hasSameUnqualifiedType(T,
166  E->getArg(0)->getType()->getPointeeType()));
167  assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
168 
169  llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
170  unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
171 
172  llvm::IntegerType *IntType =
173  llvm::IntegerType::get(CGF.getLLVMContext(),
174  CGF.getContext().getTypeSize(T));
175  llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
176 
177  llvm::Value *Args[2];
178  Args[1] = CGF.EmitScalarExpr(E->getArg(1));
179  llvm::Type *ValueType = Args[1]->getType();
180  Args[1] = EmitToInt(CGF, Args[1], T, IntType);
181  Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
182 
183  llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
184  Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
185  Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
186  if (Invert)
187  Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
188  llvm::ConstantInt::get(IntType, -1));
189  Result = EmitFromInt(CGF, Result, T, ValueType);
190  return RValue::get(Result);
191 }
192 
193 /// Utility to insert an atomic cmpxchg instruction.
194 ///
195 /// @param CGF The current codegen function.
196 /// @param E Builtin call expression to convert to cmpxchg.
197 /// arg0 - address to operate on
198 /// arg1 - value to compare with
199 /// arg2 - new value
200 /// @param ReturnBool Specifies whether to return success flag of
201 /// cmpxchg result or the old value.
202 ///
203 /// @returns result of cmpxchg, according to ReturnBool
204 ///
205 /// Note: In order to lower Microsoft's _InterlockedCompareExchange* intrinsics
206 /// invoke the function EmitAtomicCmpXchgForMSIntrin.
208  bool ReturnBool) {
209  QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
210  llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
211  unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
212 
213  llvm::IntegerType *IntType = llvm::IntegerType::get(
214  CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
215  llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
216 
217  Value *Args[3];
218  Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
219  Args[1] = CGF.EmitScalarExpr(E->getArg(1));
220  llvm::Type *ValueType = Args[1]->getType();
221  Args[1] = EmitToInt(CGF, Args[1], T, IntType);
222  Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
223 
224  Value *Pair = CGF.Builder.CreateAtomicCmpXchg(
225  Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent,
226  llvm::AtomicOrdering::SequentiallyConsistent);
227  if (ReturnBool)
228  // Extract boolean success flag and zext it to int.
229  return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
230  CGF.ConvertType(E->getType()));
231  else
232  // Extract old value and emit it using the same type as compare value.
233  return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
234  ValueType);
235 }
236 
237 /// This function should be invoked to emit atomic cmpxchg for Microsoft's
238 /// _InterlockedCompareExchange* intrinsics which have the following signature:
239 /// T _InterlockedCompareExchange(T volatile *Destination,
240 /// T Exchange,
241 /// T Comparand);
242 ///
243 /// Whereas the llvm 'cmpxchg' instruction has the following syntax:
244 /// cmpxchg *Destination, Comparand, Exchange.
245 /// So we need to swap Comparand and Exchange when invoking
246 /// CreateAtomicCmpXchg. That is the reason we could not use the above utility
247 /// function MakeAtomicCmpXchgValue since it expects the arguments to be
248 /// already swapped.
249 
250 static
252  AtomicOrdering SuccessOrdering = AtomicOrdering::SequentiallyConsistent) {
253  assert(E->getArg(0)->getType()->isPointerType());
254  assert(CGF.getContext().hasSameUnqualifiedType(
255  E->getType(), E->getArg(0)->getType()->getPointeeType()));
256  assert(CGF.getContext().hasSameUnqualifiedType(E->getType(),
257  E->getArg(1)->getType()));
258  assert(CGF.getContext().hasSameUnqualifiedType(E->getType(),
259  E->getArg(2)->getType()));
260 
261  auto *Destination = CGF.EmitScalarExpr(E->getArg(0));
262  auto *Comparand = CGF.EmitScalarExpr(E->getArg(2));
263  auto *Exchange = CGF.EmitScalarExpr(E->getArg(1));
264 
265  // For Release ordering, the failure ordering should be Monotonic.
266  auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release ?
267  AtomicOrdering::Monotonic :
268  SuccessOrdering;
269 
270  auto *Result = CGF.Builder.CreateAtomicCmpXchg(
271  Destination, Comparand, Exchange,
272  SuccessOrdering, FailureOrdering);
273  Result->setVolatile(true);
274  return CGF.Builder.CreateExtractValue(Result, 0);
275 }
276 
278  AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
279  assert(E->getArg(0)->getType()->isPointerType());
280 
281  auto *IntTy = CGF.ConvertType(E->getType());
282  auto *Result = CGF.Builder.CreateAtomicRMW(
283  AtomicRMWInst::Add,
284  CGF.EmitScalarExpr(E->getArg(0)),
285  ConstantInt::get(IntTy, 1),
286  Ordering);
287  return CGF.Builder.CreateAdd(Result, ConstantInt::get(IntTy, 1));
288 }
289 
291  AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
292  assert(E->getArg(0)->getType()->isPointerType());
293 
294  auto *IntTy = CGF.ConvertType(E->getType());
295  auto *Result = CGF.Builder.CreateAtomicRMW(
296  AtomicRMWInst::Sub,
297  CGF.EmitScalarExpr(E->getArg(0)),
298  ConstantInt::get(IntTy, 1),
299  Ordering);
300  return CGF.Builder.CreateSub(Result, ConstantInt::get(IntTy, 1));
301 }
302 
303 // Emit a simple mangled intrinsic that has 1 argument and a return type
304 // matching the argument type.
306  const CallExpr *E,
307  unsigned IntrinsicID) {
308  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
309 
310  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
311  return CGF.Builder.CreateCall(F, Src0);
312 }
313 
314 // Emit an intrinsic that has 2 operands of the same type as its result.
316  const CallExpr *E,
317  unsigned IntrinsicID) {
318  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
319  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
320 
321  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
322  return CGF.Builder.CreateCall(F, { Src0, Src1 });
323 }
324 
325 // Emit an intrinsic that has 3 operands of the same type as its result.
327  const CallExpr *E,
328  unsigned IntrinsicID) {
329  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
330  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
331  llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
332 
333  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
334  return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
335 }
336 
337 // Emit an intrinsic that has 1 float or double operand, and 1 integer.
339  const CallExpr *E,
340  unsigned IntrinsicID) {
341  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
342  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
343 
344  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
345  return CGF.Builder.CreateCall(F, {Src0, Src1});
346 }
347 
348 /// EmitFAbs - Emit a call to @llvm.fabs().
349 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) {
350  Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
351  llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
352  Call->setDoesNotAccessMemory();
353  return Call;
354 }
355 
356 /// Emit the computation of the sign bit for a floating point value. Returns
357 /// the i1 sign bit value.
359  LLVMContext &C = CGF.CGM.getLLVMContext();
360 
361  llvm::Type *Ty = V->getType();
362  int Width = Ty->getPrimitiveSizeInBits();
363  llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
364  V = CGF.Builder.CreateBitCast(V, IntTy);
365  if (Ty->isPPC_FP128Ty()) {
366  // We want the sign bit of the higher-order double. The bitcast we just
367  // did works as if the double-double was stored to memory and then
368  // read as an i128. The "store" will put the higher-order double in the
369  // lower address in both little- and big-Endian modes, but the "load"
370  // will treat those bits as a different part of the i128: the low bits in
371  // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
372  // we need to shift the high bits down to the low before truncating.
373  Width >>= 1;
374  if (CGF.getTarget().isBigEndian()) {
375  Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
376  V = CGF.Builder.CreateLShr(V, ShiftCst);
377  }
378  // We are truncating value in order to extract the higher-order
379  // double, which we will be using to extract the sign from.
380  IntTy = llvm::IntegerType::get(C, Width);
381  V = CGF.Builder.CreateTrunc(V, IntTy);
382  }
383  Value *Zero = llvm::Constant::getNullValue(IntTy);
384  return CGF.Builder.CreateICmpSLT(V, Zero);
385 }
386 
388  const CallExpr *E, llvm::Constant *calleeValue) {
389  CGCallee callee = CGCallee::forDirect(calleeValue, GlobalDecl(FD));
390  return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot());
391 }
392 
393 /// Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
394 /// depending on IntrinsicID.
395 ///
396 /// \arg CGF The current codegen function.
397 /// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
398 /// \arg X The first argument to the llvm.*.with.overflow.*.
399 /// \arg Y The second argument to the llvm.*.with.overflow.*.
400 /// \arg Carry The carry returned by the llvm.*.with.overflow.*.
401 /// \returns The result (i.e. sum/product) returned by the intrinsic.
403  const llvm::Intrinsic::ID IntrinsicID,
405  llvm::Value *&Carry) {
406  // Make sure we have integers of the same width.
407  assert(X->getType() == Y->getType() &&
408  "Arguments must be the same type. (Did you forget to make sure both "
409  "arguments have the same integer width?)");
410 
411  llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
412  llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
413  Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
414  return CGF.Builder.CreateExtractValue(Tmp, 0);
415 }
416 
418  unsigned IntrinsicID,
419  int low, int high) {
420  llvm::MDBuilder MDHelper(CGF.getLLVMContext());
421  llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high));
422  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, {});
423  llvm::Instruction *Call = CGF.Builder.CreateCall(F);
424  Call->setMetadata(llvm::LLVMContext::MD_range, RNode);
425  return Call;
426 }
427 
428 namespace {
429  struct WidthAndSignedness {
430  unsigned Width;
431  bool Signed;
432  };
433 }
434 
435 static WidthAndSignedness
437  const clang::QualType Type) {
438  assert(Type->isIntegerType() && "Given type is not an integer.");
439  unsigned Width = Type->isBooleanType() ? 1 : context.getTypeInfo(Type).Width;
440  bool Signed = Type->isSignedIntegerType();
441  return {Width, Signed};
442 }
443 
444 // Given one or more integer types, this function produces an integer type that
445 // encompasses them: any value in one of the given types could be expressed in
446 // the encompassing type.
447 static struct WidthAndSignedness
448 EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
449  assert(Types.size() > 0 && "Empty list of types.");
450 
451  // If any of the given types is signed, we must return a signed type.
452  bool Signed = false;
453  for (const auto &Type : Types) {
454  Signed |= Type.Signed;
455  }
456 
457  // The encompassing type must have a width greater than or equal to the width
458  // of the specified types. Additionally, if the encompassing type is signed,
459  // its width must be strictly greater than the width of any unsigned types
460  // given.
461  unsigned Width = 0;
462  for (const auto &Type : Types) {
463  unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
464  if (Width < MinWidth) {
465  Width = MinWidth;
466  }
467  }
468 
469  return {Width, Signed};
470 }
471 
472 Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
473  llvm::Type *DestType = Int8PtrTy;
474  if (ArgValue->getType() != DestType)
475  ArgValue =
476  Builder.CreateBitCast(ArgValue, DestType, ArgValue->getName().data());
477 
478  Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
479  return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue);
480 }
481 
482 /// Checks if using the result of __builtin_object_size(p, @p From) in place of
483 /// __builtin_object_size(p, @p To) is correct
484 static bool areBOSTypesCompatible(int From, int To) {
485  // Note: Our __builtin_object_size implementation currently treats Type=0 and
486  // Type=2 identically. Encoding this implementation detail here may make
487  // improving __builtin_object_size difficult in the future, so it's omitted.
488  return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);
489 }
490 
491 static llvm::Value *
492 getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
493  return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true);
494 }
495 
496 llvm::Value *
497 CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
498  llvm::IntegerType *ResType,
499  llvm::Value *EmittedE) {
500  uint64_t ObjectSize;
501  if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
502  return emitBuiltinObjectSize(E, Type, ResType, EmittedE);
503  return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
504 }
505 
506 /// Returns a Value corresponding to the size of the given expression.
507 /// This Value may be either of the following:
508 /// - A llvm::Argument (if E is a param with the pass_object_size attribute on
509 /// it)
510 /// - A call to the @llvm.objectsize intrinsic
511 ///
512 /// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null
513 /// and we wouldn't otherwise try to reference a pass_object_size parameter,
514 /// we'll call @llvm.objectsize on EmittedE, rather than emitting E.
515 llvm::Value *
516 CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
517  llvm::IntegerType *ResType,
518  llvm::Value *EmittedE) {
519  // We need to reference an argument if the pointer is a parameter with the
520  // pass_object_size attribute.
521  if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
522  auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());
523  auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
524  if (Param != nullptr && PS != nullptr &&
525  areBOSTypesCompatible(PS->getType(), Type)) {
526  auto Iter = SizeArguments.find(Param);
527  assert(Iter != SizeArguments.end());
528 
529  const ImplicitParamDecl *D = Iter->second;
530  auto DIter = LocalDeclMap.find(D);
531  assert(DIter != LocalDeclMap.end());
532 
533  return EmitLoadOfScalar(DIter->second, /*volatile=*/false,
534  getContext().getSizeType(), E->getBeginLoc());
535  }
536  }
537 
538  // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
539  // evaluate E for side-effects. In either case, we shouldn't lower to
540  // @llvm.objectsize.
541  if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext())))
542  return getDefaultBuiltinObjectSizeResult(Type, ResType);
543 
544  Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E);
545  assert(Ptr->getType()->isPointerTy() &&
546  "Non-pointer passed to __builtin_object_size?");
547 
548  Value *F = CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()});
549 
550  // LLVM only supports 0 and 2, make sure that we pass along that as a boolean.
551  Value *Min = Builder.getInt1((Type & 2) != 0);
552  // For GCC compatibility, __builtin_object_size treat NULL as unknown size.
553  Value *NullIsUnknown = Builder.getTrue();
554  return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown});
555 }
556 
557 namespace {
558 /// A struct to generically describe a bit test intrinsic.
559 struct BitTest {
560  enum ActionKind : uint8_t { TestOnly, Complement, Reset, Set };
561  enum InterlockingKind : uint8_t {
562  Unlocked,
563  Sequential,
564  Acquire,
565  Release,
566  NoFence
567  };
568 
569  ActionKind Action;
570  InterlockingKind Interlocking;
571  bool Is64Bit;
572 
573  static BitTest decodeBitTestBuiltin(unsigned BuiltinID);
574 };
575 } // namespace
576 
577 BitTest BitTest::decodeBitTestBuiltin(unsigned BuiltinID) {
578  switch (BuiltinID) {
579  // Main portable variants.
580  case Builtin::BI_bittest:
581  return {TestOnly, Unlocked, false};
582  case Builtin::BI_bittestandcomplement:
583  return {Complement, Unlocked, false};
584  case Builtin::BI_bittestandreset:
585  return {Reset, Unlocked, false};
586  case Builtin::BI_bittestandset:
587  return {Set, Unlocked, false};
588  case Builtin::BI_interlockedbittestandreset:
589  return {Reset, Sequential, false};
590  case Builtin::BI_interlockedbittestandset:
591  return {Set, Sequential, false};
592 
593  // X86-specific 64-bit variants.
594  case Builtin::BI_bittest64:
595  return {TestOnly, Unlocked, true};
596  case Builtin::BI_bittestandcomplement64:
597  return {Complement, Unlocked, true};
598  case Builtin::BI_bittestandreset64:
599  return {Reset, Unlocked, true};
600  case Builtin::BI_bittestandset64:
601  return {Set, Unlocked, true};
602  case Builtin::BI_interlockedbittestandreset64:
603  return {Reset, Sequential, true};
604  case Builtin::BI_interlockedbittestandset64:
605  return {Set, Sequential, true};
606 
607  // ARM/AArch64-specific ordering variants.
608  case Builtin::BI_interlockedbittestandset_acq:
609  return {Set, Acquire, false};
610  case Builtin::BI_interlockedbittestandset_rel:
611  return {Set, Release, false};
612  case Builtin::BI_interlockedbittestandset_nf:
613  return {Set, NoFence, false};
614  case Builtin::BI_interlockedbittestandreset_acq:
615  return {Reset, Acquire, false};
616  case Builtin::BI_interlockedbittestandreset_rel:
617  return {Reset, Release, false};
618  case Builtin::BI_interlockedbittestandreset_nf:
619  return {Reset, NoFence, false};
620  }
621  llvm_unreachable("expected only bittest intrinsics");
622 }
623 
624 static char bitActionToX86BTCode(BitTest::ActionKind A) {
625  switch (A) {
626  case BitTest::TestOnly: return '\0';
627  case BitTest::Complement: return 'c';
628  case BitTest::Reset: return 'r';
629  case BitTest::Set: return 's';
630  }
631  llvm_unreachable("invalid action");
632 }
633 
635  BitTest BT,
636  const CallExpr *E, Value *BitBase,
637  Value *BitPos) {
638  char Action = bitActionToX86BTCode(BT.Action);
639  char SizeSuffix = BT.Is64Bit ? 'q' : 'l';
640 
641  // Build the assembly.
642  SmallString<64> Asm;
643  raw_svector_ostream AsmOS(Asm);
644  if (BT.Interlocking != BitTest::Unlocked)
645  AsmOS << "lock ";
646  AsmOS << "bt";
647  if (Action)
648  AsmOS << Action;
649  AsmOS << SizeSuffix << " $2, ($1)\n\tsetc ${0:b}";
650 
651  // Build the constraints. FIXME: We should support immediates when possible.
652  std::string Constraints = "=r,r,r,~{cc},~{flags},~{fpsr}";
653  llvm::IntegerType *IntType = llvm::IntegerType::get(
654  CGF.getLLVMContext(),
655  CGF.getContext().getTypeSize(E->getArg(1)->getType()));
656  llvm::Type *IntPtrType = IntType->getPointerTo();
657  llvm::FunctionType *FTy =
658  llvm::FunctionType::get(CGF.Int8Ty, {IntPtrType, IntType}, false);
659 
660  llvm::InlineAsm *IA =
661  llvm::InlineAsm::get(FTy, Asm, Constraints, /*SideEffects=*/true);
662  return CGF.Builder.CreateCall(IA, {BitBase, BitPos});
663 }
664 
665 static llvm::AtomicOrdering
666 getBitTestAtomicOrdering(BitTest::InterlockingKind I) {
667  switch (I) {
668  case BitTest::Unlocked: return llvm::AtomicOrdering::NotAtomic;
669  case BitTest::Sequential: return llvm::AtomicOrdering::SequentiallyConsistent;
670  case BitTest::Acquire: return llvm::AtomicOrdering::Acquire;
671  case BitTest::Release: return llvm::AtomicOrdering::Release;
672  case BitTest::NoFence: return llvm::AtomicOrdering::Monotonic;
673  }
674  llvm_unreachable("invalid interlocking");
675 }
676 
677 /// Emit a _bittest* intrinsic. These intrinsics take a pointer to an array of
678 /// bits and a bit position and read and optionally modify the bit at that
679 /// position. The position index can be arbitrarily large, i.e. it can be larger
680 /// than 31 or 63, so we need an indexed load in the general case.
682  unsigned BuiltinID,
683  const CallExpr *E) {
684  Value *BitBase = CGF.EmitScalarExpr(E->getArg(0));
685  Value *BitPos = CGF.EmitScalarExpr(E->getArg(1));
686 
687  BitTest BT = BitTest::decodeBitTestBuiltin(BuiltinID);
688 
689  // X86 has special BT, BTC, BTR, and BTS instructions that handle the array
690  // indexing operation internally. Use them if possible.
691  llvm::Triple::ArchType Arch = CGF.getTarget().getTriple().getArch();
692  if (Arch == llvm::Triple::x86 || Arch == llvm::Triple::x86_64)
693  return EmitX86BitTestIntrinsic(CGF, BT, E, BitBase, BitPos);
694 
695  // Otherwise, use generic code to load one byte and test the bit. Use all but
696  // the bottom three bits as the array index, and the bottom three bits to form
697  // a mask.
698  // Bit = BitBaseI8[BitPos >> 3] & (1 << (BitPos & 0x7)) != 0;
699  Value *ByteIndex = CGF.Builder.CreateAShr(
700  BitPos, llvm::ConstantInt::get(BitPos->getType(), 3), "bittest.byteidx");
701  Value *BitBaseI8 = CGF.Builder.CreatePointerCast(BitBase, CGF.Int8PtrTy);
702  Address ByteAddr(CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, BitBaseI8,
703  ByteIndex, "bittest.byteaddr"),
704  CharUnits::One());
705  Value *PosLow =
706  CGF.Builder.CreateAnd(CGF.Builder.CreateTrunc(BitPos, CGF.Int8Ty),
707  llvm::ConstantInt::get(CGF.Int8Ty, 0x7));
708 
709  // The updating instructions will need a mask.
710  Value *Mask = nullptr;
711  if (BT.Action != BitTest::TestOnly) {
712  Mask = CGF.Builder.CreateShl(llvm::ConstantInt::get(CGF.Int8Ty, 1), PosLow,
713  "bittest.mask");
714  }
715 
716  // Check the action and ordering of the interlocked intrinsics.
717  llvm::AtomicOrdering Ordering = getBitTestAtomicOrdering(BT.Interlocking);
718 
719  Value *OldByte = nullptr;
720  if (Ordering != llvm::AtomicOrdering::NotAtomic) {
721  // Emit a combined atomicrmw load/store operation for the interlocked
722  // intrinsics.
723  llvm::AtomicRMWInst::BinOp RMWOp = llvm::AtomicRMWInst::Or;
724  if (BT.Action == BitTest::Reset) {
725  Mask = CGF.Builder.CreateNot(Mask);
726  RMWOp = llvm::AtomicRMWInst::And;
727  }
728  OldByte = CGF.Builder.CreateAtomicRMW(RMWOp, ByteAddr.getPointer(), Mask,
729  Ordering);
730  } else {
731  // Emit a plain load for the non-interlocked intrinsics.
732  OldByte = CGF.Builder.CreateLoad(ByteAddr, "bittest.byte");
733  Value *NewByte = nullptr;
734  switch (BT.Action) {
735  case BitTest::TestOnly:
736  // Don't store anything.
737  break;
738  case BitTest::Complement:
739  NewByte = CGF.Builder.CreateXor(OldByte, Mask);
740  break;
741  case BitTest::Reset:
742  NewByte = CGF.Builder.CreateAnd(OldByte, CGF.Builder.CreateNot(Mask));
743  break;
744  case BitTest::Set:
745  NewByte = CGF.Builder.CreateOr(OldByte, Mask);
746  break;
747  }
748  if (NewByte)
749  CGF.Builder.CreateStore(NewByte, ByteAddr);
750  }
751 
752  // However we loaded the old byte, either by plain load or atomicrmw, shift
753  // the bit into the low position and mask it to 0 or 1.
754  Value *ShiftedByte = CGF.Builder.CreateLShr(OldByte, PosLow, "bittest.shr");
755  return CGF.Builder.CreateAnd(
756  ShiftedByte, llvm::ConstantInt::get(CGF.Int8Ty, 1), "bittest.res");
757 }
758 
759 namespace {
760 enum class MSVCSetJmpKind {
761  _setjmpex,
762  _setjmp3,
763  _setjmp
764 };
765 }
766 
767 /// MSVC handles setjmp a bit differently on different platforms. On every
768 /// architecture except 32-bit x86, the frame address is passed. On x86, extra
769 /// parameters can be passed as variadic arguments, but we always pass none.
771  const CallExpr *E) {
772  llvm::Value *Arg1 = nullptr;
773  llvm::Type *Arg1Ty = nullptr;
774  StringRef Name;
775  bool IsVarArg = false;
776  if (SJKind == MSVCSetJmpKind::_setjmp3) {
777  Name = "_setjmp3";
778  Arg1Ty = CGF.Int32Ty;
779  Arg1 = llvm::ConstantInt::get(CGF.IntTy, 0);
780  IsVarArg = true;
781  } else {
782  Name = SJKind == MSVCSetJmpKind::_setjmp ? "_setjmp" : "_setjmpex";
783  Arg1Ty = CGF.Int8PtrTy;
784  if (CGF.getTarget().getTriple().getArch() == llvm::Triple::aarch64) {
785  Arg1 = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(Intrinsic::sponentry));
786  } else
787  Arg1 = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(Intrinsic::frameaddress),
788  llvm::ConstantInt::get(CGF.Int32Ty, 0));
789  }
790 
791  // Mark the call site and declaration with ReturnsTwice.
792  llvm::Type *ArgTypes[2] = {CGF.Int8PtrTy, Arg1Ty};
793  llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
794  CGF.getLLVMContext(), llvm::AttributeList::FunctionIndex,
795  llvm::Attribute::ReturnsTwice);
796  llvm::Constant *SetJmpFn = CGF.CGM.CreateRuntimeFunction(
797  llvm::FunctionType::get(CGF.IntTy, ArgTypes, IsVarArg), Name,
798  ReturnsTwiceAttr, /*Local=*/true);
799 
800  llvm::Value *Buf = CGF.Builder.CreateBitOrPointerCast(
801  CGF.EmitScalarExpr(E->getArg(0)), CGF.Int8PtrTy);
802  llvm::Value *Args[] = {Buf, Arg1};
803  llvm::CallSite CS = CGF.EmitRuntimeCallOrInvoke(SetJmpFn, Args);
804  CS.setAttributes(ReturnsTwiceAttr);
805  return RValue::get(CS.getInstruction());
806 }
807 
808 // Many of MSVC builtins are on x64, ARM and AArch64; to avoid repeating code,
809 // we handle them here.
811  _BitScanForward,
812  _BitScanReverse,
813  _InterlockedAnd,
814  _InterlockedDecrement,
815  _InterlockedExchange,
816  _InterlockedExchangeAdd,
817  _InterlockedExchangeSub,
818  _InterlockedIncrement,
819  _InterlockedOr,
820  _InterlockedXor,
821  _InterlockedExchangeAdd_acq,
822  _InterlockedExchangeAdd_rel,
823  _InterlockedExchangeAdd_nf,
824  _InterlockedExchange_acq,
825  _InterlockedExchange_rel,
826  _InterlockedExchange_nf,
827  _InterlockedCompareExchange_acq,
828  _InterlockedCompareExchange_rel,
829  _InterlockedCompareExchange_nf,
830  _InterlockedOr_acq,
831  _InterlockedOr_rel,
832  _InterlockedOr_nf,
833  _InterlockedXor_acq,
834  _InterlockedXor_rel,
835  _InterlockedXor_nf,
836  _InterlockedAnd_acq,
837  _InterlockedAnd_rel,
838  _InterlockedAnd_nf,
839  _InterlockedIncrement_acq,
840  _InterlockedIncrement_rel,
841  _InterlockedIncrement_nf,
842  _InterlockedDecrement_acq,
843  _InterlockedDecrement_rel,
844  _InterlockedDecrement_nf,
845  __fastfail,
846 };
847 
849  const CallExpr *E) {
850  switch (BuiltinID) {
851  case MSVCIntrin::_BitScanForward:
852  case MSVCIntrin::_BitScanReverse: {
853  Value *ArgValue = EmitScalarExpr(E->getArg(1));
854 
855  llvm::Type *ArgType = ArgValue->getType();
856  llvm::Type *IndexType =
857  EmitScalarExpr(E->getArg(0))->getType()->getPointerElementType();
858  llvm::Type *ResultType = ConvertType(E->getType());
859 
860  Value *ArgZero = llvm::Constant::getNullValue(ArgType);
861  Value *ResZero = llvm::Constant::getNullValue(ResultType);
862  Value *ResOne = llvm::ConstantInt::get(ResultType, 1);
863 
864  BasicBlock *Begin = Builder.GetInsertBlock();
865  BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn);
866  Builder.SetInsertPoint(End);
867  PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result");
868 
869  Builder.SetInsertPoint(Begin);
870  Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero);
871  BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn);
872  Builder.CreateCondBr(IsZero, End, NotZero);
873  Result->addIncoming(ResZero, Begin);
874 
875  Builder.SetInsertPoint(NotZero);
876  Address IndexAddress = EmitPointerWithAlignment(E->getArg(0));
877 
878  if (BuiltinID == MSVCIntrin::_BitScanForward) {
879  Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
880  Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
881  ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
882  Builder.CreateStore(ZeroCount, IndexAddress, false);
883  } else {
884  unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
885  Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1);
886 
887  Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
888  Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
889  ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
890  Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount);
891  Builder.CreateStore(Index, IndexAddress, false);
892  }
893  Builder.CreateBr(End);
894  Result->addIncoming(ResOne, NotZero);
895 
896  Builder.SetInsertPoint(End);
897  return Result;
898  }
899  case MSVCIntrin::_InterlockedAnd:
900  return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E);
901  case MSVCIntrin::_InterlockedExchange:
902  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E);
903  case MSVCIntrin::_InterlockedExchangeAdd:
904  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E);
905  case MSVCIntrin::_InterlockedExchangeSub:
906  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E);
907  case MSVCIntrin::_InterlockedOr:
908  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E);
909  case MSVCIntrin::_InterlockedXor:
910  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);
911  case MSVCIntrin::_InterlockedExchangeAdd_acq:
912  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
913  AtomicOrdering::Acquire);
914  case MSVCIntrin::_InterlockedExchangeAdd_rel:
915  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
916  AtomicOrdering::Release);
917  case MSVCIntrin::_InterlockedExchangeAdd_nf:
918  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
919  AtomicOrdering::Monotonic);
920  case MSVCIntrin::_InterlockedExchange_acq:
921  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
922  AtomicOrdering::Acquire);
923  case MSVCIntrin::_InterlockedExchange_rel:
924  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
925  AtomicOrdering::Release);
926  case MSVCIntrin::_InterlockedExchange_nf:
927  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
928  AtomicOrdering::Monotonic);
929  case MSVCIntrin::_InterlockedCompareExchange_acq:
930  return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Acquire);
931  case MSVCIntrin::_InterlockedCompareExchange_rel:
932  return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Release);
933  case MSVCIntrin::_InterlockedCompareExchange_nf:
934  return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Monotonic);
935  case MSVCIntrin::_InterlockedOr_acq:
936  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
937  AtomicOrdering::Acquire);
938  case MSVCIntrin::_InterlockedOr_rel:
939  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
940  AtomicOrdering::Release);
941  case MSVCIntrin::_InterlockedOr_nf:
942  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
943  AtomicOrdering::Monotonic);
944  case MSVCIntrin::_InterlockedXor_acq:
945  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
946  AtomicOrdering::Acquire);
947  case MSVCIntrin::_InterlockedXor_rel:
948  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
949  AtomicOrdering::Release);
950  case MSVCIntrin::_InterlockedXor_nf:
951  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
952  AtomicOrdering::Monotonic);
953  case MSVCIntrin::_InterlockedAnd_acq:
954  return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
955  AtomicOrdering::Acquire);
956  case MSVCIntrin::_InterlockedAnd_rel:
957  return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
958  AtomicOrdering::Release);
959  case MSVCIntrin::_InterlockedAnd_nf:
960  return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
961  AtomicOrdering::Monotonic);
962  case MSVCIntrin::_InterlockedIncrement_acq:
963  return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Acquire);
964  case MSVCIntrin::_InterlockedIncrement_rel:
965  return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Release);
966  case MSVCIntrin::_InterlockedIncrement_nf:
967  return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Monotonic);
968  case MSVCIntrin::_InterlockedDecrement_acq:
969  return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Acquire);
970  case MSVCIntrin::_InterlockedDecrement_rel:
971  return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Release);
972  case MSVCIntrin::_InterlockedDecrement_nf:
973  return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Monotonic);
974 
975  case MSVCIntrin::_InterlockedDecrement:
976  return EmitAtomicDecrementValue(*this, E);
977  case MSVCIntrin::_InterlockedIncrement:
978  return EmitAtomicIncrementValue(*this, E);
979 
980  case MSVCIntrin::__fastfail: {
981  // Request immediate process termination from the kernel. The instruction
982  // sequences to do this are documented on MSDN:
983  // https://msdn.microsoft.com/en-us/library/dn774154.aspx
984  llvm::Triple::ArchType ISA = getTarget().getTriple().getArch();
985  StringRef Asm, Constraints;
986  switch (ISA) {
987  default:
988  ErrorUnsupported(E, "__fastfail call for this architecture");
989  break;
990  case llvm::Triple::x86:
991  case llvm::Triple::x86_64:
992  Asm = "int $$0x29";
993  Constraints = "{cx}";
994  break;
995  case llvm::Triple::thumb:
996  Asm = "udf #251";
997  Constraints = "{r0}";
998  break;
999  }
1000  llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false);
1001  llvm::InlineAsm *IA =
1002  llvm::InlineAsm::get(FTy, Asm, Constraints, /*SideEffects=*/true);
1003  llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
1004  getLLVMContext(), llvm::AttributeList::FunctionIndex,
1005  llvm::Attribute::NoReturn);
1006  CallSite CS = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0)));
1007  CS.setAttributes(NoReturnAttr);
1008  return CS.getInstruction();
1009  }
1010  }
1011  llvm_unreachable("Incorrect MSVC intrinsic!");
1012 }
1013 
1014 namespace {
1015 // ARC cleanup for __builtin_os_log_format
1016 struct CallObjCArcUse final : EHScopeStack::Cleanup {
1017  CallObjCArcUse(llvm::Value *object) : object(object) {}
1018  llvm::Value *object;
1019 
1020  void Emit(CodeGenFunction &CGF, Flags flags) override {
1021  CGF.EmitARCIntrinsicUse(object);
1022  }
1023 };
1024 }
1025 
1028  assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero)
1029  && "Unsupported builtin check kind");
1030 
1031  Value *ArgValue = EmitScalarExpr(E);
1032  if (!SanOpts.has(SanitizerKind::Builtin) || !getTarget().isCLZForZeroUndef())
1033  return ArgValue;
1034 
1035  SanitizerScope SanScope(this);
1036  Value *Cond = Builder.CreateICmpNE(
1037  ArgValue, llvm::Constant::getNullValue(ArgValue->getType()));
1038  EmitCheck(std::make_pair(Cond, SanitizerKind::Builtin),
1039  SanitizerHandler::InvalidBuiltin,
1040  {EmitCheckSourceLocation(E->getExprLoc()),
1041  llvm::ConstantInt::get(Builder.getInt8Ty(), Kind)},
1042  None);
1043  return ArgValue;
1044 }
1045 
1046 /// Get the argument type for arguments to os_log_helper.
1048  QualType UnsignedTy = C.getIntTypeForBitwidth(Size * 8, /*Signed=*/false);
1049  return C.getCanonicalType(UnsignedTy);
1050 }
1051 
1053  const analyze_os_log::OSLogBufferLayout &Layout,
1054  CharUnits BufferAlignment) {
1055  ASTContext &Ctx = getContext();
1056 
1057  llvm::SmallString<64> Name;
1058  {
1059  raw_svector_ostream OS(Name);
1060  OS << "__os_log_helper";
1061  OS << "_" << BufferAlignment.getQuantity();
1062  OS << "_" << int(Layout.getSummaryByte());
1063  OS << "_" << int(Layout.getNumArgsByte());
1064  for (const auto &Item : Layout.Items)
1065  OS << "_" << int(Item.getSizeByte()) << "_"
1066  << int(Item.getDescriptorByte());
1067  }
1068 
1069  if (llvm::Function *F = CGM.getModule().getFunction(Name))
1070  return F;
1071 
1074  Params.emplace_back(Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"),
1076  ArgTys.emplace_back(Ctx.VoidPtrTy);
1077 
1078  for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I) {
1079  char Size = Layout.Items[I].getSizeByte();
1080  if (!Size)
1081  continue;
1082 
1083  QualType ArgTy = getOSLogArgType(Ctx, Size);
1084  Params.emplace_back(
1085  Ctx, nullptr, SourceLocation(),
1086  &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)), ArgTy,
1088  ArgTys.emplace_back(ArgTy);
1089  }
1090 
1091  FunctionArgList Args;
1092  for (auto &P : Params)
1093  Args.push_back(&P);
1094 
1095  QualType ReturnTy = Ctx.VoidTy;
1096  QualType FuncionTy = Ctx.getFunctionType(ReturnTy, ArgTys, {});
1097 
1098  // The helper function has linkonce_odr linkage to enable the linker to merge
1099  // identical functions. To ensure the merging always happens, 'noinline' is
1100  // attached to the function when compiling with -Oz.
1101  const CGFunctionInfo &FI =
1102  CGM.getTypes().arrangeBuiltinFunctionDeclaration(ReturnTy, Args);
1103  llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(FI);
1104  llvm::Function *Fn = llvm::Function::Create(
1105  FuncTy, llvm::GlobalValue::LinkOnceODRLinkage, Name, &CGM.getModule());
1106  Fn->setVisibility(llvm::GlobalValue::HiddenVisibility);
1107  CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, Fn);
1108  CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Fn);
1109 
1110  // Attach 'noinline' at -Oz.
1111  if (CGM.getCodeGenOpts().OptimizeSize == 2)
1112  Fn->addFnAttr(llvm::Attribute::NoInline);
1113 
1114  auto NL = ApplyDebugLocation::CreateEmpty(*this);
1115  IdentifierInfo *II = &Ctx.Idents.get(Name);
1118  FuncionTy, nullptr, SC_PrivateExtern, false, false);
1119 
1120  StartFunction(FD, ReturnTy, Fn, FI, Args);
1121 
1122  // Create a scope with an artificial location for the body of this function.
1123  auto AL = ApplyDebugLocation::CreateArtificial(*this);
1124 
1125  CharUnits Offset;
1126  Address BufAddr(Builder.CreateLoad(GetAddrOfLocalVar(&Params[0]), "buf"),
1127  BufferAlignment);
1128  Builder.CreateStore(Builder.getInt8(Layout.getSummaryByte()),
1129  Builder.CreateConstByteGEP(BufAddr, Offset++, "summary"));
1130  Builder.CreateStore(Builder.getInt8(Layout.getNumArgsByte()),
1131  Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs"));
1132 
1133  unsigned I = 1;
1134  for (const auto &Item : Layout.Items) {
1135  Builder.CreateStore(
1136  Builder.getInt8(Item.getDescriptorByte()),
1137  Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor"));
1138  Builder.CreateStore(
1139  Builder.getInt8(Item.getSizeByte()),
1140  Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize"));
1141 
1142  CharUnits Size = Item.size();
1143  if (!Size.getQuantity())
1144  continue;
1145 
1146  Address Arg = GetAddrOfLocalVar(&Params[I]);
1147  Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset, "argData");
1148  Addr = Builder.CreateBitCast(Addr, Arg.getPointer()->getType(),
1149  "argDataCast");
1150  Builder.CreateStore(Builder.CreateLoad(Arg), Addr);
1151  Offset += Size;
1152  ++I;
1153  }
1154 
1155  FinishFunction();
1156 
1157  return Fn;
1158 }
1159 
1161  assert(E.getNumArgs() >= 2 &&
1162  "__builtin_os_log_format takes at least 2 arguments");
1163  ASTContext &Ctx = getContext();
1166  Address BufAddr = EmitPointerWithAlignment(E.getArg(0));
1167  llvm::SmallVector<llvm::Value *, 4> RetainableOperands;
1168 
1169  // Ignore argument 1, the format string. It is not currently used.
1170  CallArgList Args;
1171  Args.add(RValue::get(BufAddr.getPointer()), Ctx.VoidPtrTy);
1172 
1173  for (const auto &Item : Layout.Items) {
1174  int Size = Item.getSizeByte();
1175  if (!Size)
1176  continue;
1177 
1178  llvm::Value *ArgVal;
1179 
1180  if (Item.getKind() == analyze_os_log::OSLogBufferItem::MaskKind) {
1181  uint64_t Val = 0;
1182  for (unsigned I = 0, E = Item.getMaskType().size(); I < E; ++I)
1183  Val |= ((uint64_t)Item.getMaskType()[I]) << I * 8;
1184  ArgVal = llvm::Constant::getIntegerValue(Int64Ty, llvm::APInt(64, Val));
1185  } else if (const Expr *TheExpr = Item.getExpr()) {
1186  ArgVal = EmitScalarExpr(TheExpr, /*Ignore*/ false);
1187 
1188  // Check if this is a retainable type.
1189  if (TheExpr->getType()->isObjCRetainableType()) {
1190  assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar &&
1191  "Only scalar can be a ObjC retainable type");
1192  // Check if the object is constant, if not, save it in
1193  // RetainableOperands.
1194  if (!isa<Constant>(ArgVal))
1195  RetainableOperands.push_back(ArgVal);
1196  }
1197  } else {
1198  ArgVal = Builder.getInt32(Item.getConstValue().getQuantity());
1199  }
1200 
1201  unsigned ArgValSize =
1202  CGM.getDataLayout().getTypeSizeInBits(ArgVal->getType());
1203  llvm::IntegerType *IntTy = llvm::Type::getIntNTy(getLLVMContext(),
1204  ArgValSize);
1205  ArgVal = Builder.CreateBitOrPointerCast(ArgVal, IntTy);
1206  CanQualType ArgTy = getOSLogArgType(Ctx, Size);
1207  // If ArgVal has type x86_fp80, zero-extend ArgVal.
1208  ArgVal = Builder.CreateZExtOrBitCast(ArgVal, ConvertType(ArgTy));
1209  Args.add(RValue::get(ArgVal), ArgTy);
1210  }
1211 
1212  const CGFunctionInfo &FI =
1213  CGM.getTypes().arrangeBuiltinFunctionCall(Ctx.VoidTy, Args);
1214  llvm::Function *F = CodeGenFunction(CGM).generateBuiltinOSLogHelperFunction(
1215  Layout, BufAddr.getAlignment());
1216  EmitCall(FI, CGCallee::forDirect(F), ReturnValueSlot(), Args);
1217 
1218  // Push a clang.arc.use cleanup for each object in RetainableOperands. The
1219  // cleanup will cause the use to appear after the final log call, keeping
1220  // the object valid while it’s held in the log buffer. Note that if there’s
1221  // a release cleanup on the object, it will already be active; since
1222  // cleanups are emitted in reverse order, the use will occur before the
1223  // object is released.
1224  if (!RetainableOperands.empty() && getLangOpts().ObjCAutoRefCount &&
1225  CGM.getCodeGenOpts().OptimizationLevel != 0)
1226  for (llvm::Value *Object : RetainableOperands)
1227  pushFullExprCleanup<CallObjCArcUse>(getARCCleanupKind(), Object);
1228 
1229  return RValue::get(BufAddr.getPointer());
1230 }
1231 
1232 /// Determine if a binop is a checked mixed-sign multiply we can specialize.
1233 static bool isSpecialMixedSignMultiply(unsigned BuiltinID,
1234  WidthAndSignedness Op1Info,
1235  WidthAndSignedness Op2Info,
1236  WidthAndSignedness ResultInfo) {
1237  return BuiltinID == Builtin::BI__builtin_mul_overflow &&
1238  std::max(Op1Info.Width, Op2Info.Width) >= ResultInfo.Width &&
1239  Op1Info.Signed != Op2Info.Signed;
1240 }
1241 
1242 /// Emit a checked mixed-sign multiply. This is a cheaper specialization of
1243 /// the generic checked-binop irgen.
1244 static RValue
1246  WidthAndSignedness Op1Info, const clang::Expr *Op2,
1247  WidthAndSignedness Op2Info,
1248  const clang::Expr *ResultArg, QualType ResultQTy,
1249  WidthAndSignedness ResultInfo) {
1250  assert(isSpecialMixedSignMultiply(Builtin::BI__builtin_mul_overflow, Op1Info,
1251  Op2Info, ResultInfo) &&
1252  "Not a mixed-sign multipliction we can specialize");
1253 
1254  // Emit the signed and unsigned operands.
1255  const clang::Expr *SignedOp = Op1Info.Signed ? Op1 : Op2;
1256  const clang::Expr *UnsignedOp = Op1Info.Signed ? Op2 : Op1;
1257  llvm::Value *Signed = CGF.EmitScalarExpr(SignedOp);
1258  llvm::Value *Unsigned = CGF.EmitScalarExpr(UnsignedOp);
1259  unsigned SignedOpWidth = Op1Info.Signed ? Op1Info.Width : Op2Info.Width;
1260  unsigned UnsignedOpWidth = Op1Info.Signed ? Op2Info.Width : Op1Info.Width;
1261 
1262  // One of the operands may be smaller than the other. If so, [s|z]ext it.
1263  if (SignedOpWidth < UnsignedOpWidth)
1264  Signed = CGF.Builder.CreateSExt(Signed, Unsigned->getType(), "op.sext");
1265  if (UnsignedOpWidth < SignedOpWidth)
1266  Unsigned = CGF.Builder.CreateZExt(Unsigned, Signed->getType(), "op.zext");
1267 
1268  llvm::Type *OpTy = Signed->getType();
1269  llvm::Value *Zero = llvm::Constant::getNullValue(OpTy);
1270  Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
1271  llvm::Type *ResTy = ResultPtr.getElementType();
1272  unsigned OpWidth = std::max(Op1Info.Width, Op2Info.Width);
1273 
1274  // Take the absolute value of the signed operand.
1275  llvm::Value *IsNegative = CGF.Builder.CreateICmpSLT(Signed, Zero);
1276  llvm::Value *AbsOfNegative = CGF.Builder.CreateSub(Zero, Signed);
1277  llvm::Value *AbsSigned =
1278  CGF.Builder.CreateSelect(IsNegative, AbsOfNegative, Signed);
1279 
1280  // Perform a checked unsigned multiplication.
1281  llvm::Value *UnsignedOverflow;
1282  llvm::Value *UnsignedResult =
1283  EmitOverflowIntrinsic(CGF, llvm::Intrinsic::umul_with_overflow, AbsSigned,
1284  Unsigned, UnsignedOverflow);
1285 
1286  llvm::Value *Overflow, *Result;
1287  if (ResultInfo.Signed) {
1288  // Signed overflow occurs if the result is greater than INT_MAX or lesser
1289  // than INT_MIN, i.e when |Result| > (INT_MAX + IsNegative).
1290  auto IntMax =
1291  llvm::APInt::getSignedMaxValue(ResultInfo.Width).zextOrSelf(OpWidth);
1292  llvm::Value *MaxResult =
1293  CGF.Builder.CreateAdd(llvm::ConstantInt::get(OpTy, IntMax),
1294  CGF.Builder.CreateZExt(IsNegative, OpTy));
1295  llvm::Value *SignedOverflow =
1296  CGF.Builder.CreateICmpUGT(UnsignedResult, MaxResult);
1297  Overflow = CGF.Builder.CreateOr(UnsignedOverflow, SignedOverflow);
1298 
1299  // Prepare the signed result (possibly by negating it).
1300  llvm::Value *NegativeResult = CGF.Builder.CreateNeg(UnsignedResult);
1301  llvm::Value *SignedResult =
1302  CGF.Builder.CreateSelect(IsNegative, NegativeResult, UnsignedResult);
1303  Result = CGF.Builder.CreateTrunc(SignedResult, ResTy);
1304  } else {
1305  // Unsigned overflow occurs if the result is < 0 or greater than UINT_MAX.
1306  llvm::Value *Underflow = CGF.Builder.CreateAnd(
1307  IsNegative, CGF.Builder.CreateIsNotNull(UnsignedResult));
1308  Overflow = CGF.Builder.CreateOr(UnsignedOverflow, Underflow);
1309  if (ResultInfo.Width < OpWidth) {
1310  auto IntMax =
1311  llvm::APInt::getMaxValue(ResultInfo.Width).zext(OpWidth);
1312  llvm::Value *TruncOverflow = CGF.Builder.CreateICmpUGT(
1313  UnsignedResult, llvm::ConstantInt::get(OpTy, IntMax));
1314  Overflow = CGF.Builder.CreateOr(Overflow, TruncOverflow);
1315  }
1316 
1317  // Negate the product if it would be negative in infinite precision.
1318  Result = CGF.Builder.CreateSelect(
1319  IsNegative, CGF.Builder.CreateNeg(UnsignedResult), UnsignedResult);
1320 
1321  Result = CGF.Builder.CreateTrunc(Result, ResTy);
1322  }
1323  assert(Overflow && Result && "Missing overflow or result");
1324 
1325  bool isVolatile =
1326  ResultArg->getType()->getPointeeType().isVolatileQualified();
1327  CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
1328  isVolatile);
1329  return RValue::get(Overflow);
1330 }
1331 
1333  Value *&RecordPtr, CharUnits Align, Value *Func,
1334  int Lvl) {
1335  const auto *RT = RType->getAs<RecordType>();
1336  ASTContext &Context = CGF.getContext();
1337  RecordDecl *RD = RT->getDecl()->getDefinition();
1338  ASTContext &Ctx = RD->getASTContext();
1339  const ASTRecordLayout &RL = Ctx.getASTRecordLayout(RD);
1340  std::string Pad = std::string(Lvl * 4, ' ');
1341 
1342  Value *GString =
1343  CGF.Builder.CreateGlobalStringPtr(RType.getAsString() + " {\n");
1344  Value *Res = CGF.Builder.CreateCall(Func, {GString});
1345 
1346  static llvm::DenseMap<QualType, const char *> Types;
1347  if (Types.empty()) {
1348  Types[Context.CharTy] = "%c";
1349  Types[Context.BoolTy] = "%d";
1350  Types[Context.SignedCharTy] = "%hhd";
1351  Types[Context.UnsignedCharTy] = "%hhu";
1352  Types[Context.IntTy] = "%d";
1353  Types[Context.UnsignedIntTy] = "%u";
1354  Types[Context.LongTy] = "%ld";
1355  Types[Context.UnsignedLongTy] = "%lu";
1356  Types[Context.LongLongTy] = "%lld";
1357  Types[Context.UnsignedLongLongTy] = "%llu";
1358  Types[Context.ShortTy] = "%hd";
1359  Types[Context.UnsignedShortTy] = "%hu";
1360  Types[Context.VoidPtrTy] = "%p";
1361  Types[Context.FloatTy] = "%f";
1362  Types[Context.DoubleTy] = "%f";
1363  Types[Context.LongDoubleTy] = "%Lf";
1364  Types[Context.getPointerType(Context.CharTy)] = "%s";
1365  Types[Context.getPointerType(Context.getConstType(Context.CharTy))] = "%s";
1366  }
1367 
1368  for (const auto *FD : RD->fields()) {
1369  uint64_t Off = RL.getFieldOffset(FD->getFieldIndex());
1370  Off = Ctx.toCharUnitsFromBits(Off).getQuantity();
1371 
1372  Value *FieldPtr = RecordPtr;
1373  if (RD->isUnion())
1374  FieldPtr = CGF.Builder.CreatePointerCast(
1375  FieldPtr, CGF.ConvertType(Context.getPointerType(FD->getType())));
1376  else
1377  FieldPtr = CGF.Builder.CreateStructGEP(CGF.ConvertType(RType), FieldPtr,
1378  FD->getFieldIndex());
1379 
1380  GString = CGF.Builder.CreateGlobalStringPtr(
1381  llvm::Twine(Pad)
1382  .concat(FD->getType().getAsString())
1383  .concat(llvm::Twine(' '))
1384  .concat(FD->getNameAsString())
1385  .concat(" : ")
1386  .str());
1387  Value *TmpRes = CGF.Builder.CreateCall(Func, {GString});
1388  Res = CGF.Builder.CreateAdd(Res, TmpRes);
1389 
1390  QualType CanonicalType =
1391  FD->getType().getUnqualifiedType().getCanonicalType();
1392 
1393  // We check whether we are in a recursive type
1394  if (CanonicalType->isRecordType()) {
1395  Value *TmpRes =
1396  dumpRecord(CGF, CanonicalType, FieldPtr, Align, Func, Lvl + 1);
1397  Res = CGF.Builder.CreateAdd(TmpRes, Res);
1398  continue;
1399  }
1400 
1401  // We try to determine the best format to print the current field
1402  llvm::Twine Format = Types.find(CanonicalType) == Types.end()
1403  ? Types[Context.VoidPtrTy]
1404  : Types[CanonicalType];
1405 
1406  Address FieldAddress = Address(FieldPtr, Align);
1407  FieldPtr = CGF.Builder.CreateLoad(FieldAddress);
1408 
1409  // FIXME Need to handle bitfield here
1410  GString = CGF.Builder.CreateGlobalStringPtr(
1411  Format.concat(llvm::Twine('\n')).str());
1412  TmpRes = CGF.Builder.CreateCall(Func, {GString, FieldPtr});
1413  Res = CGF.Builder.CreateAdd(Res, TmpRes);
1414  }
1415 
1416  GString = CGF.Builder.CreateGlobalStringPtr(Pad + "}\n");
1417  Value *TmpRes = CGF.Builder.CreateCall(Func, {GString});
1418  Res = CGF.Builder.CreateAdd(Res, TmpRes);
1419  return Res;
1420 }
1421 
1422 static bool
1424  llvm::SmallPtrSetImpl<const Decl *> &Seen) {
1425  if (const auto *Arr = Ctx.getAsArrayType(Ty))
1426  Ty = Ctx.getBaseElementType(Arr);
1427 
1428  const auto *Record = Ty->getAsCXXRecordDecl();
1429  if (!Record)
1430  return false;
1431 
1432  // We've already checked this type, or are in the process of checking it.
1433  if (!Seen.insert(Record).second)
1434  return false;
1435 
1436  assert(Record->hasDefinition() &&
1437  "Incomplete types should already be diagnosed");
1438 
1439  if (Record->isDynamicClass())
1440  return true;
1441 
1442  for (FieldDecl *F : Record->fields()) {
1443  if (TypeRequiresBuiltinLaunderImp(Ctx, F->getType(), Seen))
1444  return true;
1445  }
1446  return false;
1447 }
1448 
1449 /// Determine if the specified type requires laundering by checking if it is a
1450 /// dynamic class type or contains a subobject which is a dynamic class type.
1452  if (!CGM.getCodeGenOpts().StrictVTablePointers)
1453  return false;
1454  llvm::SmallPtrSet<const Decl *, 16> Seen;
1455  return TypeRequiresBuiltinLaunderImp(CGM.getContext(), Ty, Seen);
1456 }
1457 
1458 RValue CodeGenFunction::emitRotate(const CallExpr *E, bool IsRotateRight) {
1459  llvm::Value *Src = EmitScalarExpr(E->getArg(0));
1460  llvm::Value *ShiftAmt = EmitScalarExpr(E->getArg(1));
1461 
1462  // The builtin's shift arg may have a different type than the source arg and
1463  // result, but the LLVM intrinsic uses the same type for all values.
1464  llvm::Type *Ty = Src->getType();
1465  ShiftAmt = Builder.CreateIntCast(ShiftAmt, Ty, false);
1466 
1467  // Rotate is a special case of LLVM funnel shift - 1st 2 args are the same.
1468  unsigned IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
1469  Value *F = CGM.getIntrinsic(IID, Ty);
1470  return RValue::get(Builder.CreateCall(F, { Src, Src, ShiftAmt }));
1471 }
1472 
1474  const CallExpr *E,
1475  ReturnValueSlot ReturnValue) {
1476  const FunctionDecl *FD = GD.getDecl()->getAsFunction();
1477  // See if we can constant fold this builtin. If so, don't emit it at all.
1478  Expr::EvalResult Result;
1479  if (E->EvaluateAsRValue(Result, CGM.getContext()) &&
1480  !Result.hasSideEffects()) {
1481  if (Result.Val.isInt())
1482  return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
1483  Result.Val.getInt()));
1484  if (Result.Val.isFloat())
1485  return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
1486  Result.Val.getFloat()));
1487  }
1488 
1489  // There are LLVM math intrinsics/instructions corresponding to math library
1490  // functions except the LLVM op will never set errno while the math library
1491  // might. Also, math builtins have the same semantics as their math library
1492  // twins. Thus, we can transform math library and builtin calls to their
1493  // LLVM counterparts if the call is marked 'const' (known to never set errno).
1494  if (FD->hasAttr<ConstAttr>()) {
1495  switch (BuiltinID) {
1496  case Builtin::BIceil:
1497  case Builtin::BIceilf:
1498  case Builtin::BIceill:
1499  case Builtin::BI__builtin_ceil:
1500  case Builtin::BI__builtin_ceilf:
1501  case Builtin::BI__builtin_ceill:
1502  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::ceil));
1503 
1504  case Builtin::BIcopysign:
1505  case Builtin::BIcopysignf:
1506  case Builtin::BIcopysignl:
1507  case Builtin::BI__builtin_copysign:
1508  case Builtin::BI__builtin_copysignf:
1509  case Builtin::BI__builtin_copysignl:
1510  case Builtin::BI__builtin_copysignf128:
1512 
1513  case Builtin::BIcos:
1514  case Builtin::BIcosf:
1515  case Builtin::BIcosl:
1516  case Builtin::BI__builtin_cos:
1517  case Builtin::BI__builtin_cosf:
1518  case Builtin::BI__builtin_cosl:
1519  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::cos));
1520 
1521  case Builtin::BIexp:
1522  case Builtin::BIexpf:
1523  case Builtin::BIexpl:
1524  case Builtin::BI__builtin_exp:
1525  case Builtin::BI__builtin_expf:
1526  case Builtin::BI__builtin_expl:
1527  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::exp));
1528 
1529  case Builtin::BIexp2:
1530  case Builtin::BIexp2f:
1531  case Builtin::BIexp2l:
1532  case Builtin::BI__builtin_exp2:
1533  case Builtin::BI__builtin_exp2f:
1534  case Builtin::BI__builtin_exp2l:
1535  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::exp2));
1536 
1537  case Builtin::BIfabs:
1538  case Builtin::BIfabsf:
1539  case Builtin::BIfabsl:
1540  case Builtin::BI__builtin_fabs:
1541  case Builtin::BI__builtin_fabsf:
1542  case Builtin::BI__builtin_fabsl:
1543  case Builtin::BI__builtin_fabsf128:
1544  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs));
1545 
1546  case Builtin::BIfloor:
1547  case Builtin::BIfloorf:
1548  case Builtin::BIfloorl:
1549  case Builtin::BI__builtin_floor:
1550  case Builtin::BI__builtin_floorf:
1551  case Builtin::BI__builtin_floorl:
1552  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::floor));
1553 
1554  case Builtin::BIfma:
1555  case Builtin::BIfmaf:
1556  case Builtin::BIfmal:
1557  case Builtin::BI__builtin_fma:
1558  case Builtin::BI__builtin_fmaf:
1559  case Builtin::BI__builtin_fmal:
1560  return RValue::get(emitTernaryBuiltin(*this, E, Intrinsic::fma));
1561 
1562  case Builtin::BIfmax:
1563  case Builtin::BIfmaxf:
1564  case Builtin::BIfmaxl:
1565  case Builtin::BI__builtin_fmax:
1566  case Builtin::BI__builtin_fmaxf:
1567  case Builtin::BI__builtin_fmaxl:
1568  return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::maxnum));
1569 
1570  case Builtin::BIfmin:
1571  case Builtin::BIfminf:
1572  case Builtin::BIfminl:
1573  case Builtin::BI__builtin_fmin:
1574  case Builtin::BI__builtin_fminf:
1575  case Builtin::BI__builtin_fminl:
1576  return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::minnum));
1577 
1578  // fmod() is a special-case. It maps to the frem instruction rather than an
1579  // LLVM intrinsic.
1580  case Builtin::BIfmod:
1581  case Builtin::BIfmodf:
1582  case Builtin::BIfmodl:
1583  case Builtin::BI__builtin_fmod:
1584  case Builtin::BI__builtin_fmodf:
1585  case Builtin::BI__builtin_fmodl: {
1586  Value *Arg1 = EmitScalarExpr(E->getArg(0));
1587  Value *Arg2 = EmitScalarExpr(E->getArg(1));
1588  return RValue::get(Builder.CreateFRem(Arg1, Arg2, "fmod"));
1589  }
1590 
1591  case Builtin::BIlog:
1592  case Builtin::BIlogf:
1593  case Builtin::BIlogl:
1594  case Builtin::BI__builtin_log:
1595  case Builtin::BI__builtin_logf:
1596  case Builtin::BI__builtin_logl:
1597  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log));
1598 
1599  case Builtin::BIlog10:
1600  case Builtin::BIlog10f:
1601  case Builtin::BIlog10l:
1602  case Builtin::BI__builtin_log10:
1603  case Builtin::BI__builtin_log10f:
1604  case Builtin::BI__builtin_log10l:
1605  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log10));
1606 
1607  case Builtin::BIlog2:
1608  case Builtin::BIlog2f:
1609  case Builtin::BIlog2l:
1610  case Builtin::BI__builtin_log2:
1611  case Builtin::BI__builtin_log2f:
1612  case Builtin::BI__builtin_log2l:
1613  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log2));
1614 
1615  case Builtin::BInearbyint:
1616  case Builtin::BInearbyintf:
1617  case Builtin::BInearbyintl:
1618  case Builtin::BI__builtin_nearbyint:
1619  case Builtin::BI__builtin_nearbyintf:
1620  case Builtin::BI__builtin_nearbyintl:
1622 
1623  case Builtin::BIpow:
1624  case Builtin::BIpowf:
1625  case Builtin::BIpowl:
1626  case Builtin::BI__builtin_pow:
1627  case Builtin::BI__builtin_powf:
1628  case Builtin::BI__builtin_powl:
1629  return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::pow));
1630 
1631  case Builtin::BIrint:
1632  case Builtin::BIrintf:
1633  case Builtin::BIrintl:
1634  case Builtin::BI__builtin_rint:
1635  case Builtin::BI__builtin_rintf:
1636  case Builtin::BI__builtin_rintl:
1637  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::rint));
1638 
1639  case Builtin::BIround:
1640  case Builtin::BIroundf:
1641  case Builtin::BIroundl:
1642  case Builtin::BI__builtin_round:
1643  case Builtin::BI__builtin_roundf:
1644  case Builtin::BI__builtin_roundl:
1645  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::round));
1646 
1647  case Builtin::BIsin:
1648  case Builtin::BIsinf:
1649  case Builtin::BIsinl:
1650  case Builtin::BI__builtin_sin:
1651  case Builtin::BI__builtin_sinf:
1652  case Builtin::BI__builtin_sinl:
1653  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::sin));
1654 
1655  case Builtin::BIsqrt:
1656  case Builtin::BIsqrtf:
1657  case Builtin::BIsqrtl:
1658  case Builtin::BI__builtin_sqrt:
1659  case Builtin::BI__builtin_sqrtf:
1660  case Builtin::BI__builtin_sqrtl:
1661  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::sqrt));
1662 
1663  case Builtin::BItrunc:
1664  case Builtin::BItruncf:
1665  case Builtin::BItruncl:
1666  case Builtin::BI__builtin_trunc:
1667  case Builtin::BI__builtin_truncf:
1668  case Builtin::BI__builtin_truncl:
1669  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc));
1670 
1671  default:
1672  break;
1673  }
1674  }
1675 
1676  switch (BuiltinID) {
1677  default: break;
1678  case Builtin::BI__builtin___CFStringMakeConstantString:
1679  case Builtin::BI__builtin___NSStringMakeConstantString:
1680  return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType()));
1681  case Builtin::BI__builtin_stdarg_start:
1682  case Builtin::BI__builtin_va_start:
1683  case Builtin::BI__va_start:
1684  case Builtin::BI__builtin_va_end:
1685  return RValue::get(
1686  EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
1687  ? EmitScalarExpr(E->getArg(0))
1688  : EmitVAListRef(E->getArg(0)).getPointer(),
1689  BuiltinID != Builtin::BI__builtin_va_end));
1690  case Builtin::BI__builtin_va_copy: {
1691  Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer();
1692  Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer();
1693 
1694  llvm::Type *Type = Int8PtrTy;
1695 
1696  DstPtr = Builder.CreateBitCast(DstPtr, Type);
1697  SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
1698  return RValue::get(Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy),
1699  {DstPtr, SrcPtr}));
1700  }
1701  case Builtin::BI__builtin_abs:
1702  case Builtin::BI__builtin_labs:
1703  case Builtin::BI__builtin_llabs: {
1704  // X < 0 ? -X : X
1705  // The negation has 'nsw' because abs of INT_MIN is undefined.
1706  Value *ArgValue = EmitScalarExpr(E->getArg(0));
1707  Value *NegOp = Builder.CreateNSWNeg(ArgValue, "neg");
1708  Constant *Zero = llvm::Constant::getNullValue(ArgValue->getType());
1709  Value *CmpResult = Builder.CreateICmpSLT(ArgValue, Zero, "abscond");
1710  Value *Result = Builder.CreateSelect(CmpResult, NegOp, ArgValue, "abs");
1711  return RValue::get(Result);
1712  }
1713  case Builtin::BI__builtin_conj:
1714  case Builtin::BI__builtin_conjf:
1715  case Builtin::BI__builtin_conjl: {
1716  ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
1717  Value *Real = ComplexVal.first;
1718  Value *Imag = ComplexVal.second;
1719  Value *Zero =
1720  Imag->getType()->isFPOrFPVectorTy()
1721  ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType())
1722  : llvm::Constant::getNullValue(Imag->getType());
1723 
1724  Imag = Builder.CreateFSub(Zero, Imag, "sub");
1725  return RValue::getComplex(std::make_pair(Real, Imag));
1726  }
1727  case Builtin::BI__builtin_creal:
1728  case Builtin::BI__builtin_crealf:
1729  case Builtin::BI__builtin_creall:
1730  case Builtin::BIcreal:
1731  case Builtin::BIcrealf:
1732  case Builtin::BIcreall: {
1733  ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
1734  return RValue::get(ComplexVal.first);
1735  }
1736 
1737  case Builtin::BI__builtin_dump_struct: {
1738  Value *Func = EmitScalarExpr(E->getArg(1)->IgnoreImpCasts());
1739  CharUnits Arg0Align = EmitPointerWithAlignment(E->getArg(0)).getAlignment();
1740 
1741  const Expr *Arg0 = E->getArg(0)->IgnoreImpCasts();
1742  QualType Arg0Type = Arg0->getType()->getPointeeType();
1743 
1744  Value *RecordPtr = EmitScalarExpr(Arg0);
1745  Value *Res = dumpRecord(*this, Arg0Type, RecordPtr, Arg0Align, Func, 0);
1746  return RValue::get(Res);
1747  }
1748 
1749  case Builtin::BI__builtin_cimag:
1750  case Builtin::BI__builtin_cimagf:
1751  case Builtin::BI__builtin_cimagl:
1752  case Builtin::BIcimag:
1753  case Builtin::BIcimagf:
1754  case Builtin::BIcimagl: {
1755  ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
1756  return RValue::get(ComplexVal.second);
1757  }
1758 
1759  case Builtin::BI__builtin_clrsb:
1760  case Builtin::BI__builtin_clrsbl:
1761  case Builtin::BI__builtin_clrsbll: {
1762  // clrsb(x) -> clz(x < 0 ? ~x : x) - 1 or
1763  Value *ArgValue = EmitScalarExpr(E->getArg(0));
1764 
1765  llvm::Type *ArgType = ArgValue->getType();
1766  Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
1767 
1768  llvm::Type *ResultType = ConvertType(E->getType());
1769  Value *Zero = llvm::Constant::getNullValue(ArgType);
1770  Value *IsNeg = Builder.CreateICmpSLT(ArgValue, Zero, "isneg");
1771  Value *Inverse = Builder.CreateNot(ArgValue, "not");
1772  Value *Tmp = Builder.CreateSelect(IsNeg, Inverse, ArgValue);
1773  Value *Ctlz = Builder.CreateCall(F, {Tmp, Builder.getFalse()});
1774  Value *Result = Builder.CreateSub(Ctlz, llvm::ConstantInt::get(ArgType, 1));
1775  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1776  "cast");
1777  return RValue::get(Result);
1778  }
1779  case Builtin::BI__builtin_ctzs:
1780  case Builtin::BI__builtin_ctz:
1781  case Builtin::BI__builtin_ctzl:
1782  case Builtin::BI__builtin_ctzll: {
1783  Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CTZPassedZero);
1784 
1785  llvm::Type *ArgType = ArgValue->getType();
1786  Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
1787 
1788  llvm::Type *ResultType = ConvertType(E->getType());
1789  Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
1790  Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
1791  if (Result->getType() != ResultType)
1792  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1793  "cast");
1794  return RValue::get(Result);
1795  }
1796  case Builtin::BI__builtin_clzs:
1797  case Builtin::BI__builtin_clz:
1798  case Builtin::BI__builtin_clzl:
1799  case Builtin::BI__builtin_clzll: {
1800  Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CLZPassedZero);
1801 
1802  llvm::Type *ArgType = ArgValue->getType();
1803  Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
1804 
1805  llvm::Type *ResultType = ConvertType(E->getType());
1806  Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
1807  Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
1808  if (Result->getType() != ResultType)
1809  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1810  "cast");
1811  return RValue::get(Result);
1812  }
1813  case Builtin::BI__builtin_ffs:
1814  case Builtin::BI__builtin_ffsl:
1815  case Builtin::BI__builtin_ffsll: {
1816  // ffs(x) -> x ? cttz(x) + 1 : 0
1817  Value *ArgValue = EmitScalarExpr(E->getArg(0));
1818 
1819  llvm::Type *ArgType = ArgValue->getType();
1820  Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
1821 
1822  llvm::Type *ResultType = ConvertType(E->getType());
1823  Value *Tmp =
1824  Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
1825  llvm::ConstantInt::get(ArgType, 1));
1826  Value *Zero = llvm::Constant::getNullValue(ArgType);
1827  Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
1828  Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
1829  if (Result->getType() != ResultType)
1830  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1831  "cast");
1832  return RValue::get(Result);
1833  }
1834  case Builtin::BI__builtin_parity:
1835  case Builtin::BI__builtin_parityl:
1836  case Builtin::BI__builtin_parityll: {
1837  // parity(x) -> ctpop(x) & 1
1838  Value *ArgValue = EmitScalarExpr(E->getArg(0));
1839 
1840  llvm::Type *ArgType = ArgValue->getType();
1841  Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
1842 
1843  llvm::Type *ResultType = ConvertType(E->getType());
1844  Value *Tmp = Builder.CreateCall(F, ArgValue);
1845  Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
1846  if (Result->getType() != ResultType)
1847  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1848  "cast");
1849  return RValue::get(Result);
1850  }
1851  case Builtin::BI__lzcnt16:
1852  case Builtin::BI__lzcnt:
1853  case Builtin::BI__lzcnt64: {
1854  Value *ArgValue = EmitScalarExpr(E->getArg(0));
1855 
1856  llvm::Type *ArgType = ArgValue->getType();
1857  Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
1858 
1859  llvm::Type *ResultType = ConvertType(E->getType());
1860  Value *Result = Builder.CreateCall(F, {ArgValue, Builder.getFalse()});
1861  if (Result->getType() != ResultType)
1862  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1863  "cast");
1864  return RValue::get(Result);
1865  }
1866  case Builtin::BI__popcnt16:
1867  case Builtin::BI__popcnt:
1868  case Builtin::BI__popcnt64:
1869  case Builtin::BI__builtin_popcount:
1870  case Builtin::BI__builtin_popcountl:
1871  case Builtin::BI__builtin_popcountll: {
1872  Value *ArgValue = EmitScalarExpr(E->getArg(0));
1873 
1874  llvm::Type *ArgType = ArgValue->getType();
1875  Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
1876 
1877  llvm::Type *ResultType = ConvertType(E->getType());
1878  Value *Result = Builder.CreateCall(F, ArgValue);
1879  if (Result->getType() != ResultType)
1880  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1881  "cast");
1882  return RValue::get(Result);
1883  }
1884  case Builtin::BI__builtin_unpredictable: {
1885  // Always return the argument of __builtin_unpredictable. LLVM does not
1886  // handle this builtin. Metadata for this builtin should be added directly
1887  // to instructions such as branches or switches that use it.
1888  return RValue::get(EmitScalarExpr(E->getArg(0)));
1889  }
1890  case Builtin::BI__builtin_expect: {
1891  Value *ArgValue = EmitScalarExpr(E->getArg(0));
1892  llvm::Type *ArgType = ArgValue->getType();
1893 
1894  Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
1895  // Don't generate llvm.expect on -O0 as the backend won't use it for
1896  // anything.
1897  // Note, we still IRGen ExpectedValue because it could have side-effects.
1898  if (CGM.getCodeGenOpts().OptimizationLevel == 0)
1899  return RValue::get(ArgValue);
1900 
1901  Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
1902  Value *Result =
1903  Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
1904  return RValue::get(Result);
1905  }
1906  case Builtin::BI__builtin_assume_aligned: {
1907  const Expr *Ptr = E->getArg(0);
1908  Value *PtrValue = EmitScalarExpr(Ptr);
1909  Value *OffsetValue =
1910  (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
1911 
1912  Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
1913  ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
1914  unsigned Alignment = (unsigned)AlignmentCI->getZExtValue();
1915 
1916  EmitAlignmentAssumption(PtrValue, Ptr, /*The expr loc is sufficient.*/ SourceLocation(),
1917  Alignment, OffsetValue);
1918  return RValue::get(PtrValue);
1919  }
1920  case Builtin::BI__assume:
1921  case Builtin::BI__builtin_assume: {
1922  if (E->getArg(0)->HasSideEffects(getContext()))
1923  return RValue::get(nullptr);
1924 
1925  Value *ArgValue = EmitScalarExpr(E->getArg(0));
1926  Value *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
1927  return RValue::get(Builder.CreateCall(FnAssume, ArgValue));
1928  }
1929  case Builtin::BI__builtin_bswap16:
1930  case Builtin::BI__builtin_bswap32:
1931  case Builtin::BI__builtin_bswap64: {
1932  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap));
1933  }
1934  case Builtin::BI__builtin_bitreverse8:
1935  case Builtin::BI__builtin_bitreverse16:
1936  case Builtin::BI__builtin_bitreverse32:
1937  case Builtin::BI__builtin_bitreverse64: {
1938  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse));
1939  }
1940  case Builtin::BI__builtin_rotateleft8:
1941  case Builtin::BI__builtin_rotateleft16:
1942  case Builtin::BI__builtin_rotateleft32:
1943  case Builtin::BI__builtin_rotateleft64:
1944  case Builtin::BI_rotl8: // Microsoft variants of rotate left
1945  case Builtin::BI_rotl16:
1946  case Builtin::BI_rotl:
1947  case Builtin::BI_lrotl:
1948  case Builtin::BI_rotl64:
1949  return emitRotate(E, false);
1950 
1951  case Builtin::BI__builtin_rotateright8:
1952  case Builtin::BI__builtin_rotateright16:
1953  case Builtin::BI__builtin_rotateright32:
1954  case Builtin::BI__builtin_rotateright64:
1955  case Builtin::BI_rotr8: // Microsoft variants of rotate right
1956  case Builtin::BI_rotr16:
1957  case Builtin::BI_rotr:
1958  case Builtin::BI_lrotr:
1959  case Builtin::BI_rotr64:
1960  return emitRotate(E, true);
1961 
1962  case Builtin::BI__builtin_constant_p: {
1963  llvm::Type *ResultType = ConvertType(E->getType());
1964  if (CGM.getCodeGenOpts().OptimizationLevel == 0)
1965  // At -O0, we don't perform inlining, so we don't need to delay the
1966  // processing.
1967  return RValue::get(ConstantInt::get(ResultType, 0));
1968 
1969  const Expr *Arg = E->getArg(0);
1970  QualType ArgType = Arg->getType();
1971  if (!hasScalarEvaluationKind(ArgType) || ArgType->isFunctionType())
1972  // We can only reason about scalar types.
1973  return RValue::get(ConstantInt::get(ResultType, 0));
1974 
1975  Value *ArgValue = EmitScalarExpr(Arg);
1976  Value *F = CGM.getIntrinsic(Intrinsic::is_constant, ConvertType(ArgType));
1977  Value *Result = Builder.CreateCall(F, ArgValue);
1978  if (Result->getType() != ResultType)
1979  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/false);
1980  return RValue::get(Result);
1981  }
1982  case Builtin::BI__builtin_object_size: {
1983  unsigned Type =
1984  E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
1985  auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));
1986 
1987  // We pass this builtin onto the optimizer so that it can figure out the
1988  // object size in more complex cases.
1989  return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType,
1990  /*EmittedE=*/nullptr));
1991  }
1992  case Builtin::BI__builtin_prefetch: {
1993  Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
1994  // FIXME: Technically these constants should of type 'int', yes?
1995  RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
1996  llvm::ConstantInt::get(Int32Ty, 0);
1997  Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
1998  llvm::ConstantInt::get(Int32Ty, 3);
1999  Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
2000  Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
2001  return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data}));
2002  }
2003  case Builtin::BI__builtin_readcyclecounter: {
2004  Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
2005  return RValue::get(Builder.CreateCall(F));
2006  }
2007  case Builtin::BI__builtin___clear_cache: {
2008  Value *Begin = EmitScalarExpr(E->getArg(0));
2009  Value *End = EmitScalarExpr(E->getArg(1));
2010  Value *F = CGM.getIntrinsic(Intrinsic::clear_cache);
2011  return RValue::get(Builder.CreateCall(F, {Begin, End}));
2012  }
2013  case Builtin::BI__builtin_trap:
2014  return RValue::get(EmitTrapCall(Intrinsic::trap));
2015  case Builtin::BI__debugbreak:
2016  return RValue::get(EmitTrapCall(Intrinsic::debugtrap));
2017  case Builtin::BI__builtin_unreachable: {
2018  EmitUnreachable(E->getExprLoc());
2019 
2020  // We do need to preserve an insertion point.
2021  EmitBlock(createBasicBlock("unreachable.cont"));
2022 
2023  return RValue::get(nullptr);
2024  }
2025 
2026  case Builtin::BI__builtin_powi:
2027  case Builtin::BI__builtin_powif:
2028  case Builtin::BI__builtin_powil: {
2029  Value *Base = EmitScalarExpr(E->getArg(0));
2030  Value *Exponent = EmitScalarExpr(E->getArg(1));
2031  llvm::Type *ArgType = Base->getType();
2032  Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType);
2033  return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
2034  }
2035 
2036  case Builtin::BI__builtin_isgreater:
2037  case Builtin::BI__builtin_isgreaterequal:
2038  case Builtin::BI__builtin_isless:
2039  case Builtin::BI__builtin_islessequal:
2040  case Builtin::BI__builtin_islessgreater:
2041  case Builtin::BI__builtin_isunordered: {
2042  // Ordered comparisons: we know the arguments to these are matching scalar
2043  // floating point values.
2044  Value *LHS = EmitScalarExpr(E->getArg(0));
2045  Value *RHS = EmitScalarExpr(E->getArg(1));
2046 
2047  switch (BuiltinID) {
2048  default: llvm_unreachable("Unknown ordered comparison");
2049  case Builtin::BI__builtin_isgreater:
2050  LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
2051  break;
2052  case Builtin::BI__builtin_isgreaterequal:
2053  LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
2054  break;
2055  case Builtin::BI__builtin_isless:
2056  LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
2057  break;
2058  case Builtin::BI__builtin_islessequal:
2059  LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
2060  break;
2061  case Builtin::BI__builtin_islessgreater:
2062  LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
2063  break;
2064  case Builtin::BI__builtin_isunordered:
2065  LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
2066  break;
2067  }
2068  // ZExt bool to int type.
2069  return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
2070  }
2071  case Builtin::BI__builtin_isnan: {
2072  Value *V = EmitScalarExpr(E->getArg(0));
2073  V = Builder.CreateFCmpUNO(V, V, "cmp");
2074  return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
2075  }
2076 
2077  case Builtin::BIfinite:
2078  case Builtin::BI__finite:
2079  case Builtin::BIfinitef:
2080  case Builtin::BI__finitef:
2081  case Builtin::BIfinitel:
2082  case Builtin::BI__finitel:
2083  case Builtin::BI__builtin_isinf:
2084  case Builtin::BI__builtin_isfinite: {
2085  // isinf(x) --> fabs(x) == infinity
2086  // isfinite(x) --> fabs(x) != infinity
2087  // x != NaN via the ordered compare in either case.
2088  Value *V = EmitScalarExpr(E->getArg(0));
2089  Value *Fabs = EmitFAbs(*this, V);
2090  Constant *Infinity = ConstantFP::getInfinity(V->getType());
2091  CmpInst::Predicate Pred = (BuiltinID == Builtin::BI__builtin_isinf)
2092  ? CmpInst::FCMP_OEQ
2093  : CmpInst::FCMP_ONE;
2094  Value *FCmp = Builder.CreateFCmp(Pred, Fabs, Infinity, "cmpinf");
2095  return RValue::get(Builder.CreateZExt(FCmp, ConvertType(E->getType())));
2096  }
2097 
2098  case Builtin::BI__builtin_isinf_sign: {
2099  // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
2100  Value *Arg = EmitScalarExpr(E->getArg(0));
2101  Value *AbsArg = EmitFAbs(*this, Arg);
2102  Value *IsInf = Builder.CreateFCmpOEQ(
2103  AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
2104  Value *IsNeg = EmitSignBit(*this, Arg);
2105 
2106  llvm::Type *IntTy = ConvertType(E->getType());
2107  Value *Zero = Constant::getNullValue(IntTy);
2108  Value *One = ConstantInt::get(IntTy, 1);
2109  Value *NegativeOne = ConstantInt::get(IntTy, -1);
2110  Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
2111  Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
2112  return RValue::get(Result);
2113  }
2114 
2115  case Builtin::BI__builtin_isnormal: {
2116  // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
2117  Value *V = EmitScalarExpr(E->getArg(0));
2118  Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
2119 
2120  Value *Abs = EmitFAbs(*this, V);
2121  Value *IsLessThanInf =
2122  Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
2123  APFloat Smallest = APFloat::getSmallestNormalized(
2124  getContext().getFloatTypeSemantics(E->getArg(0)->getType()));
2125  Value *IsNormal =
2126  Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest),
2127  "isnormal");
2128  V = Builder.CreateAnd(Eq, IsLessThanInf, "and");
2129  V = Builder.CreateAnd(V, IsNormal, "and");
2130  return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
2131  }
2132 
2133  case Builtin::BI__builtin_fpclassify: {
2134  Value *V = EmitScalarExpr(E->getArg(5));
2135  llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
2136 
2137  // Create Result
2138  BasicBlock *Begin = Builder.GetInsertBlock();
2139  BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
2140  Builder.SetInsertPoint(End);
2141  PHINode *Result =
2142  Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
2143  "fpclassify_result");
2144 
2145  // if (V==0) return FP_ZERO
2146  Builder.SetInsertPoint(Begin);
2147  Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
2148  "iszero");
2149  Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
2150  BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
2151  Builder.CreateCondBr(IsZero, End, NotZero);
2152  Result->addIncoming(ZeroLiteral, Begin);
2153 
2154  // if (V != V) return FP_NAN
2155  Builder.SetInsertPoint(NotZero);
2156  Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
2157  Value *NanLiteral = EmitScalarExpr(E->getArg(0));
2158  BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
2159  Builder.CreateCondBr(IsNan, End, NotNan);
2160  Result->addIncoming(NanLiteral, NotZero);
2161 
2162  // if (fabs(V) == infinity) return FP_INFINITY
2163  Builder.SetInsertPoint(NotNan);
2164  Value *VAbs = EmitFAbs(*this, V);
2165  Value *IsInf =
2166  Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
2167  "isinf");
2168  Value *InfLiteral = EmitScalarExpr(E->getArg(1));
2169  BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
2170  Builder.CreateCondBr(IsInf, End, NotInf);
2171  Result->addIncoming(InfLiteral, NotNan);
2172 
2173  // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
2174  Builder.SetInsertPoint(NotInf);
2175  APFloat Smallest = APFloat::getSmallestNormalized(
2176  getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
2177  Value *IsNormal =
2178  Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
2179  "isnormal");
2180  Value *NormalResult =
2181  Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
2182  EmitScalarExpr(E->getArg(3)));
2183  Builder.CreateBr(End);
2184  Result->addIncoming(NormalResult, NotInf);
2185 
2186  // return Result
2187  Builder.SetInsertPoint(End);
2188  return RValue::get(Result);
2189  }
2190 
2191  case Builtin::BIalloca:
2192  case Builtin::BI_alloca:
2193  case Builtin::BI__builtin_alloca: {
2194  Value *Size = EmitScalarExpr(E->getArg(0));
2195  const TargetInfo &TI = getContext().getTargetInfo();
2196  // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__.
2197  unsigned SuitableAlignmentInBytes =
2198  CGM.getContext()
2199  .toCharUnitsFromBits(TI.getSuitableAlign())
2200  .getQuantity();
2201  AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
2202  AI->setAlignment(SuitableAlignmentInBytes);
2203  return RValue::get(AI);
2204  }
2205 
2206  case Builtin::BI__builtin_alloca_with_align: {
2207  Value *Size = EmitScalarExpr(E->getArg(0));
2208  Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1));
2209  auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue);
2210  unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue();
2211  unsigned AlignmentInBytes =
2212  CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getQuantity();
2213  AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
2214  AI->setAlignment(AlignmentInBytes);
2215  return RValue::get(AI);
2216  }
2217 
2218  case Builtin::BIbzero:
2219  case Builtin::BI__builtin_bzero: {
2220  Address Dest = EmitPointerWithAlignment(E->getArg(0));
2221  Value *SizeVal = EmitScalarExpr(E->getArg(1));
2222  EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
2223  E->getArg(0)->getExprLoc(), FD, 0);
2224  Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
2225  return RValue::get(nullptr);
2226  }
2227  case Builtin::BImemcpy:
2228  case Builtin::BI__builtin_memcpy: {
2229  Address Dest = EmitPointerWithAlignment(E->getArg(0));
2230  Address Src = EmitPointerWithAlignment(E->getArg(1));
2231  Value *SizeVal = EmitScalarExpr(E->getArg(2));
2232  EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
2233  E->getArg(0)->getExprLoc(), FD, 0);
2234  EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
2235  E->getArg(1)->getExprLoc(), FD, 1);
2236  Builder.CreateMemCpy(Dest, Src, SizeVal, false);
2237  return RValue::get(Dest.getPointer());
2238  }
2239 
2240  case Builtin::BI__builtin_char_memchr:
2241  BuiltinID = Builtin::BI__builtin_memchr;
2242  break;
2243 
2244  case Builtin::BI__builtin___memcpy_chk: {
2245  // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
2246  Expr::EvalResult SizeResult, DstSizeResult;
2247  if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
2248  !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
2249  break;
2250  llvm::APSInt Size = SizeResult.Val.getInt();
2251  llvm::APSInt DstSize = DstSizeResult.Val.getInt();
2252  if (Size.ugt(DstSize))
2253  break;
2254  Address Dest = EmitPointerWithAlignment(E->getArg(0));
2255  Address Src = EmitPointerWithAlignment(E->getArg(1));
2256  Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
2257  Builder.CreateMemCpy(Dest, Src, SizeVal, false);
2258  return RValue::get(Dest.getPointer());
2259  }
2260 
2261  case Builtin::BI__builtin_objc_memmove_collectable: {
2262  Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
2263  Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
2264  Value *SizeVal = EmitScalarExpr(E->getArg(2));
2265  CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
2266  DestAddr, SrcAddr, SizeVal);
2267  return RValue::get(DestAddr.getPointer());
2268  }
2269 
2270  case Builtin::BI__builtin___memmove_chk: {
2271  // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
2272  Expr::EvalResult SizeResult, DstSizeResult;
2273  if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
2274  !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
2275  break;
2276  llvm::APSInt Size = SizeResult.Val.getInt();
2277  llvm::APSInt DstSize = DstSizeResult.Val.getInt();
2278  if (Size.ugt(DstSize))
2279  break;
2280  Address Dest = EmitPointerWithAlignment(E->getArg(0));
2281  Address Src = EmitPointerWithAlignment(E->getArg(1));
2282  Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
2283  Builder.CreateMemMove(Dest, Src, SizeVal, false);
2284  return RValue::get(Dest.getPointer());
2285  }
2286 
2287  case Builtin::BImemmove:
2288  case Builtin::BI__builtin_memmove: {
2289  Address Dest = EmitPointerWithAlignment(E->getArg(0));
2290  Address Src = EmitPointerWithAlignment(E->getArg(1));
2291  Value *SizeVal = EmitScalarExpr(E->getArg(2));
2292  EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
2293  E->getArg(0)->getExprLoc(), FD, 0);
2294  EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
2295  E->getArg(1)->getExprLoc(), FD, 1);
2296  Builder.CreateMemMove(Dest, Src, SizeVal, false);
2297  return RValue::get(Dest.getPointer());
2298  }
2299  case Builtin::BImemset:
2300  case Builtin::BI__builtin_memset: {
2301  Address Dest = EmitPointerWithAlignment(E->getArg(0));
2302  Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
2303  Builder.getInt8Ty());
2304  Value *SizeVal = EmitScalarExpr(E->getArg(2));
2305  EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
2306  E->getArg(0)->getExprLoc(), FD, 0);
2307  Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
2308  return RValue::get(Dest.getPointer());
2309  }
2310  case Builtin::BI__builtin___memset_chk: {
2311  // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
2312  Expr::EvalResult SizeResult, DstSizeResult;
2313  if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
2314  !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
2315  break;
2316  llvm::APSInt Size = SizeResult.Val.getInt();
2317  llvm::APSInt DstSize = DstSizeResult.Val.getInt();
2318  if (Size.ugt(DstSize))
2319  break;
2320  Address Dest = EmitPointerWithAlignment(E->getArg(0));
2321  Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
2322  Builder.getInt8Ty());
2323  Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
2324  Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
2325  return RValue::get(Dest.getPointer());
2326  }
2327  case Builtin::BI__builtin_wmemcmp: {
2328  // The MSVC runtime library does not provide a definition of wmemcmp, so we
2329  // need an inline implementation.
2330  if (!getTarget().getTriple().isOSMSVCRT())
2331  break;
2332 
2333  llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
2334 
2335  Value *Dst = EmitScalarExpr(E->getArg(0));
2336  Value *Src = EmitScalarExpr(E->getArg(1));
2337  Value *Size = EmitScalarExpr(E->getArg(2));
2338 
2339  BasicBlock *Entry = Builder.GetInsertBlock();
2340  BasicBlock *CmpGT = createBasicBlock("wmemcmp.gt");
2341  BasicBlock *CmpLT = createBasicBlock("wmemcmp.lt");
2342  BasicBlock *Next = createBasicBlock("wmemcmp.next");
2343  BasicBlock *Exit = createBasicBlock("wmemcmp.exit");
2344  Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));
2345  Builder.CreateCondBr(SizeEq0, Exit, CmpGT);
2346 
2347  EmitBlock(CmpGT);
2348  PHINode *DstPhi = Builder.CreatePHI(Dst->getType(), 2);
2349  DstPhi->addIncoming(Dst, Entry);
2350  PHINode *SrcPhi = Builder.CreatePHI(Src->getType(), 2);
2351  SrcPhi->addIncoming(Src, Entry);
2352  PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);
2353  SizePhi->addIncoming(Size, Entry);
2354  CharUnits WCharAlign =
2355  getContext().getTypeAlignInChars(getContext().WCharTy);
2356  Value *DstCh = Builder.CreateAlignedLoad(WCharTy, DstPhi, WCharAlign);
2357  Value *SrcCh = Builder.CreateAlignedLoad(WCharTy, SrcPhi, WCharAlign);
2358  Value *DstGtSrc = Builder.CreateICmpUGT(DstCh, SrcCh);
2359  Builder.CreateCondBr(DstGtSrc, Exit, CmpLT);
2360 
2361  EmitBlock(CmpLT);
2362  Value *DstLtSrc = Builder.CreateICmpULT(DstCh, SrcCh);
2363  Builder.CreateCondBr(DstLtSrc, Exit, Next);
2364 
2365  EmitBlock(Next);
2366  Value *NextDst = Builder.CreateConstInBoundsGEP1_32(WCharTy, DstPhi, 1);
2367  Value *NextSrc = Builder.CreateConstInBoundsGEP1_32(WCharTy, SrcPhi, 1);
2368  Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));
2369  Value *NextSizeEq0 =
2370  Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));
2371  Builder.CreateCondBr(NextSizeEq0, Exit, CmpGT);
2372  DstPhi->addIncoming(NextDst, Next);
2373  SrcPhi->addIncoming(NextSrc, Next);
2374  SizePhi->addIncoming(NextSize, Next);
2375 
2376  EmitBlock(Exit);
2377  PHINode *Ret = Builder.CreatePHI(IntTy, 4);
2378  Ret->addIncoming(ConstantInt::get(IntTy, 0), Entry);
2379  Ret->addIncoming(ConstantInt::get(IntTy, 1), CmpGT);
2380  Ret->addIncoming(ConstantInt::get(IntTy, -1), CmpLT);
2381  Ret->addIncoming(ConstantInt::get(IntTy, 0), Next);
2382  return RValue::get(Ret);
2383  }
2384  case Builtin::BI__builtin_dwarf_cfa: {
2385  // The offset in bytes from the first argument to the CFA.
2386  //
2387  // Why on earth is this in the frontend? Is there any reason at
2388  // all that the backend can't reasonably determine this while
2389  // lowering llvm.eh.dwarf.cfa()?
2390  //
2391  // TODO: If there's a satisfactory reason, add a target hook for
2392  // this instead of hard-coding 0, which is correct for most targets.
2393  int32_t Offset = 0;
2394 
2395  Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
2396  return RValue::get(Builder.CreateCall(F,
2397  llvm::ConstantInt::get(Int32Ty, Offset)));
2398  }
2399  case Builtin::BI__builtin_return_address: {
2400  Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
2401  getContext().UnsignedIntTy);
2402  Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
2403  return RValue::get(Builder.CreateCall(F, Depth));
2404  }
2405  case Builtin::BI_ReturnAddress: {
2406  Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
2407  return RValue::get(Builder.CreateCall(F, Builder.getInt32(0)));
2408  }
2409  case Builtin::BI__builtin_frame_address: {
2410  Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
2411  getContext().UnsignedIntTy);
2412  Value *F = CGM.getIntrinsic(Intrinsic::frameaddress);
2413  return RValue::get(Builder.CreateCall(F, Depth));
2414  }
2415  case Builtin::BI__builtin_extract_return_addr: {
2416  Value *Address = EmitScalarExpr(E->getArg(0));
2417  Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
2418  return RValue::get(Result);
2419  }
2420  case Builtin::BI__builtin_frob_return_addr: {
2421  Value *Address = EmitScalarExpr(E->getArg(0));
2422  Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
2423  return RValue::get(Result);
2424  }
2425  case Builtin::BI__builtin_dwarf_sp_column: {
2426  llvm::IntegerType *Ty
2427  = cast<llvm::IntegerType>(ConvertType(E->getType()));
2428  int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
2429  if (Column == -1) {
2430  CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
2431  return RValue::get(llvm::UndefValue::get(Ty));
2432  }
2433  return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
2434  }
2435  case Builtin::BI__builtin_init_dwarf_reg_size_table: {
2436  Value *Address = EmitScalarExpr(E->getArg(0));
2437  if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
2438  CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
2439  return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
2440  }
2441  case Builtin::BI__builtin_eh_return: {
2442  Value *Int = EmitScalarExpr(E->getArg(0));
2443  Value *Ptr = EmitScalarExpr(E->getArg(1));
2444 
2445  llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
2446  assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
2447  "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
2448  Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32
2449  ? Intrinsic::eh_return_i32
2450  : Intrinsic::eh_return_i64);
2451  Builder.CreateCall(F, {Int, Ptr});
2452  Builder.CreateUnreachable();
2453 
2454  // We do need to preserve an insertion point.
2455  EmitBlock(createBasicBlock("builtin_eh_return.cont"));
2456 
2457  return RValue::get(nullptr);
2458  }
2459  case Builtin::BI__builtin_unwind_init: {
2460  Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
2461  return RValue::get(Builder.CreateCall(F));
2462  }
2463  case Builtin::BI__builtin_extend_pointer: {
2464  // Extends a pointer to the size of an _Unwind_Word, which is
2465  // uint64_t on all platforms. Generally this gets poked into a
2466  // register and eventually used as an address, so if the
2467  // addressing registers are wider than pointers and the platform
2468  // doesn't implicitly ignore high-order bits when doing
2469  // addressing, we need to make sure we zext / sext based on
2470  // the platform's expectations.
2471  //
2472  // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
2473 
2474  // Cast the pointer to intptr_t.
2475  Value *Ptr = EmitScalarExpr(E->getArg(0));
2476  Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
2477 
2478  // If that's 64 bits, we're done.
2479  if (IntPtrTy->getBitWidth() == 64)
2480  return RValue::get(Result);
2481 
2482  // Otherwise, ask the codegen data what to do.
2483  if (getTargetHooks().extendPointerWithSExt())
2484  return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
2485  else
2486  return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
2487  }
2488  case Builtin::BI__builtin_setjmp: {
2489  // Buffer is a void**.
2490  Address Buf = EmitPointerWithAlignment(E->getArg(0));
2491 
2492  // Store the frame pointer to the setjmp buffer.
2493  Value *FrameAddr =
2494  Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
2495  ConstantInt::get(Int32Ty, 0));
2496  Builder.CreateStore(FrameAddr, Buf);
2497 
2498  // Store the stack pointer to the setjmp buffer.
2499  Value *StackAddr =
2500  Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
2501  Address StackSaveSlot =
2502  Builder.CreateConstInBoundsGEP(Buf, 2, getPointerSize());
2503  Builder.CreateStore(StackAddr, StackSaveSlot);
2504 
2505  // Call LLVM's EH setjmp, which is lightweight.
2506  Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
2507  Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
2508  return RValue::get(Builder.CreateCall(F, Buf.getPointer()));
2509  }
2510  case Builtin::BI__builtin_longjmp: {
2511  Value *Buf = EmitScalarExpr(E->getArg(0));
2512  Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
2513 
2514  // Call LLVM's EH longjmp, which is lightweight.
2515  Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
2516 
2517  // longjmp doesn't return; mark this as unreachable.
2518  Builder.CreateUnreachable();
2519 
2520  // We do need to preserve an insertion point.
2521  EmitBlock(createBasicBlock("longjmp.cont"));
2522 
2523  return RValue::get(nullptr);
2524  }
2525  case Builtin::BI__builtin_launder: {
2526  const Expr *Arg = E->getArg(0);
2527  QualType ArgTy = Arg->getType()->getPointeeType();
2528  Value *Ptr = EmitScalarExpr(Arg);
2529  if (TypeRequiresBuiltinLaunder(CGM, ArgTy))
2530  Ptr = Builder.CreateLaunderInvariantGroup(Ptr);
2531 
2532  return RValue::get(Ptr);
2533  }
2534  case Builtin::BI__sync_fetch_and_add:
2535  case Builtin::BI__sync_fetch_and_sub:
2536  case Builtin::BI__sync_fetch_and_or:
2537  case Builtin::BI__sync_fetch_and_and:
2538  case Builtin::BI__sync_fetch_and_xor:
2539  case Builtin::BI__sync_fetch_and_nand:
2540  case Builtin::BI__sync_add_and_fetch:
2541  case Builtin::BI__sync_sub_and_fetch:
2542  case Builtin::BI__sync_and_and_fetch:
2543  case Builtin::BI__sync_or_and_fetch:
2544  case Builtin::BI__sync_xor_and_fetch:
2545  case Builtin::BI__sync_nand_and_fetch:
2546  case Builtin::BI__sync_val_compare_and_swap:
2547  case Builtin::BI__sync_bool_compare_and_swap:
2548  case Builtin::BI__sync_lock_test_and_set:
2549  case Builtin::BI__sync_lock_release:
2550  case Builtin::BI__sync_swap:
2551  llvm_unreachable("Shouldn't make it through sema");
2552  case Builtin::BI__sync_fetch_and_add_1:
2553  case Builtin::BI__sync_fetch_and_add_2:
2554  case Builtin::BI__sync_fetch_and_add_4:
2555  case Builtin::BI__sync_fetch_and_add_8:
2556  case Builtin::BI__sync_fetch_and_add_16:
2557  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
2558  case Builtin::BI__sync_fetch_and_sub_1:
2559  case Builtin::BI__sync_fetch_and_sub_2:
2560  case Builtin::BI__sync_fetch_and_sub_4:
2561  case Builtin::BI__sync_fetch_and_sub_8:
2562  case Builtin::BI__sync_fetch_and_sub_16:
2563  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
2564  case Builtin::BI__sync_fetch_and_or_1:
2565  case Builtin::BI__sync_fetch_and_or_2:
2566  case Builtin::BI__sync_fetch_and_or_4:
2567  case Builtin::BI__sync_fetch_and_or_8:
2568  case Builtin::BI__sync_fetch_and_or_16:
2569  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
2570  case Builtin::BI__sync_fetch_and_and_1:
2571  case Builtin::BI__sync_fetch_and_and_2:
2572  case Builtin::BI__sync_fetch_and_and_4:
2573  case Builtin::BI__sync_fetch_and_and_8:
2574  case Builtin::BI__sync_fetch_and_and_16:
2575  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
2576  case Builtin::BI__sync_fetch_and_xor_1:
2577  case Builtin::BI__sync_fetch_and_xor_2:
2578  case Builtin::BI__sync_fetch_and_xor_4:
2579  case Builtin::BI__sync_fetch_and_xor_8:
2580  case Builtin::BI__sync_fetch_and_xor_16:
2581  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
2582  case Builtin::BI__sync_fetch_and_nand_1:
2583  case Builtin::BI__sync_fetch_and_nand_2:
2584  case Builtin::BI__sync_fetch_and_nand_4:
2585  case Builtin::BI__sync_fetch_and_nand_8:
2586  case Builtin::BI__sync_fetch_and_nand_16:
2587  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
2588 
2589  // Clang extensions: not overloaded yet.
2590  case Builtin::BI__sync_fetch_and_min:
2591  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
2592  case Builtin::BI__sync_fetch_and_max:
2593  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
2594  case Builtin::BI__sync_fetch_and_umin:
2595  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
2596  case Builtin::BI__sync_fetch_and_umax:
2597  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
2598 
2599  case Builtin::BI__sync_add_and_fetch_1:
2600  case Builtin::BI__sync_add_and_fetch_2:
2601  case Builtin::BI__sync_add_and_fetch_4:
2602  case Builtin::BI__sync_add_and_fetch_8:
2603  case Builtin::BI__sync_add_and_fetch_16:
2604  return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
2605  llvm::Instruction::Add);
2606  case Builtin::BI__sync_sub_and_fetch_1:
2607  case Builtin::BI__sync_sub_and_fetch_2:
2608  case Builtin::BI__sync_sub_and_fetch_4:
2609  case Builtin::BI__sync_sub_and_fetch_8:
2610  case Builtin::BI__sync_sub_and_fetch_16:
2611  return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
2612  llvm::Instruction::Sub);
2613  case Builtin::BI__sync_and_and_fetch_1:
2614  case Builtin::BI__sync_and_and_fetch_2:
2615  case Builtin::BI__sync_and_and_fetch_4:
2616  case Builtin::BI__sync_and_and_fetch_8:
2617  case Builtin::BI__sync_and_and_fetch_16:
2620  case Builtin::BI__sync_or_and_fetch_1:
2621  case Builtin::BI__sync_or_and_fetch_2:
2622  case Builtin::BI__sync_or_and_fetch_4:
2623  case Builtin::BI__sync_or_and_fetch_8:
2624  case Builtin::BI__sync_or_and_fetch_16:
2625  return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
2626  llvm::Instruction::Or);
2627  case Builtin::BI__sync_xor_and_fetch_1:
2628  case Builtin::BI__sync_xor_and_fetch_2:
2629  case Builtin::BI__sync_xor_and_fetch_4:
2630  case Builtin::BI__sync_xor_and_fetch_8:
2631  case Builtin::BI__sync_xor_and_fetch_16:
2632  return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
2633  llvm::Instruction::Xor);
2634  case Builtin::BI__sync_nand_and_fetch_1:
2635  case Builtin::BI__sync_nand_and_fetch_2:
2636  case Builtin::BI__sync_nand_and_fetch_4:
2637  case Builtin::BI__sync_nand_and_fetch_8:
2638  case Builtin::BI__sync_nand_and_fetch_16:
2639  return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
2640  llvm::Instruction::And, true);
2641 
2642  case Builtin::BI__sync_val_compare_and_swap_1:
2643  case Builtin::BI__sync_val_compare_and_swap_2:
2644  case Builtin::BI__sync_val_compare_and_swap_4:
2645  case Builtin::BI__sync_val_compare_and_swap_8:
2646  case Builtin::BI__sync_val_compare_and_swap_16:
2647  return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
2648 
2649  case Builtin::BI__sync_bool_compare_and_swap_1:
2650  case Builtin::BI__sync_bool_compare_and_swap_2:
2651  case Builtin::BI__sync_bool_compare_and_swap_4:
2652  case Builtin::BI__sync_bool_compare_and_swap_8:
2653  case Builtin::BI__sync_bool_compare_and_swap_16:
2654  return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
2655 
2656  case Builtin::BI__sync_swap_1:
2657  case Builtin::BI__sync_swap_2:
2658  case Builtin::BI__sync_swap_4:
2659  case Builtin::BI__sync_swap_8:
2660  case Builtin::BI__sync_swap_16:
2661  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
2662 
2663  case Builtin::BI__sync_lock_test_and_set_1:
2664  case Builtin::BI__sync_lock_test_and_set_2:
2665  case Builtin::BI__sync_lock_test_and_set_4:
2666  case Builtin::BI__sync_lock_test_and_set_8:
2667  case Builtin::BI__sync_lock_test_and_set_16:
2668  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
2669 
2670  case Builtin::BI__sync_lock_release_1:
2671  case Builtin::BI__sync_lock_release_2:
2672  case Builtin::BI__sync_lock_release_4:
2673  case Builtin::BI__sync_lock_release_8:
2674  case Builtin::BI__sync_lock_release_16: {
2675  Value *Ptr = EmitScalarExpr(E->getArg(0));
2676  QualType ElTy = E->getArg(0)->getType()->getPointeeType();
2677  CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
2678  llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
2679  StoreSize.getQuantity() * 8);
2680  Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
2681  llvm::StoreInst *Store =
2682  Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr,
2683  StoreSize);
2684  Store->setAtomic(llvm::AtomicOrdering::Release);
2685  return RValue::get(nullptr);
2686  }
2687 
2688  case Builtin::BI__sync_synchronize: {
2689  // We assume this is supposed to correspond to a C++0x-style
2690  // sequentially-consistent fence (i.e. this is only usable for
2691  // synchronization, not device I/O or anything like that). This intrinsic
2692  // is really badly designed in the sense that in theory, there isn't
2693  // any way to safely use it... but in practice, it mostly works
2694  // to use it with non-atomic loads and stores to get acquire/release
2695  // semantics.
2696  Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
2697  return RValue::get(nullptr);
2698  }
2699 
2700  case Builtin::BI__builtin_nontemporal_load:
2701  return RValue::get(EmitNontemporalLoad(*this, E));
2702  case Builtin::BI__builtin_nontemporal_store:
2703  return RValue::get(EmitNontemporalStore(*this, E));
2704  case Builtin::BI__c11_atomic_is_lock_free:
2705  case Builtin::BI__atomic_is_lock_free: {
2706  // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
2707  // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
2708  // _Atomic(T) is always properly-aligned.
2709  const char *LibCallName = "__atomic_is_lock_free";
2710  CallArgList Args;
2711  Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
2712  getContext().getSizeType());
2713  if (BuiltinID == Builtin::BI__atomic_is_lock_free)
2714  Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
2715  getContext().VoidPtrTy);
2716  else
2717  Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
2718  getContext().VoidPtrTy);
2719  const CGFunctionInfo &FuncInfo =
2720  CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args);
2721  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
2722  llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
2723  return EmitCall(FuncInfo, CGCallee::forDirect(Func),
2724  ReturnValueSlot(), Args);
2725  }
2726 
2727  case Builtin::BI__atomic_test_and_set: {
2728  // Look at the argument type to determine whether this is a volatile
2729  // operation. The parameter type is always volatile.
2730  QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
2731  bool Volatile =
2732  PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
2733 
2734  Value *Ptr = EmitScalarExpr(E->getArg(0));
2735  unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
2736  Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
2737  Value *NewVal = Builder.getInt8(1);
2738  Value *Order = EmitScalarExpr(E->getArg(1));
2739  if (isa<llvm::ConstantInt>(Order)) {
2740  int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
2741  AtomicRMWInst *Result = nullptr;
2742  switch (ord) {
2743  case 0: // memory_order_relaxed
2744  default: // invalid order
2745  Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
2746  llvm::AtomicOrdering::Monotonic);
2747  break;
2748  case 1: // memory_order_consume
2749  case 2: // memory_order_acquire
2750  Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
2751  llvm::AtomicOrdering::Acquire);
2752  break;
2753  case 3: // memory_order_release
2754  Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
2755  llvm::AtomicOrdering::Release);
2756  break;
2757  case 4: // memory_order_acq_rel
2758 
2759  Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
2760  llvm::AtomicOrdering::AcquireRelease);
2761  break;
2762  case 5: // memory_order_seq_cst
2763  Result = Builder.CreateAtomicRMW(
2764  llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
2765  llvm::AtomicOrdering::SequentiallyConsistent);
2766  break;
2767  }
2768  Result->setVolatile(Volatile);
2769  return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
2770  }
2771 
2772  llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
2773 
2774  llvm::BasicBlock *BBs[5] = {
2775  createBasicBlock("monotonic", CurFn),
2776  createBasicBlock("acquire", CurFn),
2777  createBasicBlock("release", CurFn),
2778  createBasicBlock("acqrel", CurFn),
2779  createBasicBlock("seqcst", CurFn)
2780  };
2781  llvm::AtomicOrdering Orders[5] = {
2782  llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire,
2783  llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease,
2784  llvm::AtomicOrdering::SequentiallyConsistent};
2785 
2786  Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
2787  llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
2788 
2789  Builder.SetInsertPoint(ContBB);
2790  PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
2791 
2792  for (unsigned i = 0; i < 5; ++i) {
2793  Builder.SetInsertPoint(BBs[i]);
2794  AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
2795  Ptr, NewVal, Orders[i]);
2796  RMW->setVolatile(Volatile);
2797  Result->addIncoming(RMW, BBs[i]);
2798  Builder.CreateBr(ContBB);
2799  }
2800 
2801  SI->addCase(Builder.getInt32(0), BBs[0]);
2802  SI->addCase(Builder.getInt32(1), BBs[1]);
2803  SI->addCase(Builder.getInt32(2), BBs[1]);
2804  SI->addCase(Builder.getInt32(3), BBs[2]);
2805  SI->addCase(Builder.getInt32(4), BBs[3]);
2806  SI->addCase(Builder.getInt32(5), BBs[4]);
2807 
2808  Builder.SetInsertPoint(ContBB);
2809  return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
2810  }
2811 
2812  case Builtin::BI__atomic_clear: {
2813  QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
2814  bool Volatile =
2815  PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
2816 
2817  Address Ptr = EmitPointerWithAlignment(E->getArg(0));
2818  unsigned AddrSpace = Ptr.getPointer()->getType()->getPointerAddressSpace();
2819  Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
2820  Value *NewVal = Builder.getInt8(0);
2821  Value *Order = EmitScalarExpr(E->getArg(1));
2822  if (isa<llvm::ConstantInt>(Order)) {
2823  int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
2824  StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
2825  switch (ord) {
2826  case 0: // memory_order_relaxed
2827  default: // invalid order
2828  Store->setOrdering(llvm::AtomicOrdering::Monotonic);
2829  break;
2830  case 3: // memory_order_release
2831  Store->setOrdering(llvm::AtomicOrdering::Release);
2832  break;
2833  case 5: // memory_order_seq_cst
2834  Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent);
2835  break;
2836  }
2837  return RValue::get(nullptr);
2838  }
2839 
2840  llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
2841 
2842  llvm::BasicBlock *BBs[3] = {
2843  createBasicBlock("monotonic", CurFn),
2844  createBasicBlock("release", CurFn),
2845  createBasicBlock("seqcst", CurFn)
2846  };
2847  llvm::AtomicOrdering Orders[3] = {
2848  llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release,
2849  llvm::AtomicOrdering::SequentiallyConsistent};
2850 
2851  Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
2852  llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
2853 
2854  for (unsigned i = 0; i < 3; ++i) {
2855  Builder.SetInsertPoint(BBs[i]);
2856  StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
2857  Store->setOrdering(Orders[i]);
2858  Builder.CreateBr(ContBB);
2859  }
2860 
2861  SI->addCase(Builder.getInt32(0), BBs[0]);
2862  SI->addCase(Builder.getInt32(3), BBs[1]);
2863  SI->addCase(Builder.getInt32(5), BBs[2]);
2864 
2865  Builder.SetInsertPoint(ContBB);
2866  return RValue::get(nullptr);
2867  }
2868 
2869  case Builtin::BI__atomic_thread_fence:
2870  case Builtin::BI__atomic_signal_fence:
2871  case Builtin::BI__c11_atomic_thread_fence:
2872  case Builtin::BI__c11_atomic_signal_fence: {
2873  llvm::SyncScope::ID SSID;
2874  if (BuiltinID == Builtin::BI__atomic_signal_fence ||
2875  BuiltinID == Builtin::BI__c11_atomic_signal_fence)
2876  SSID = llvm::SyncScope::SingleThread;
2877  else
2878  SSID = llvm::SyncScope::System;
2879  Value *Order = EmitScalarExpr(E->getArg(0));
2880  if (isa<llvm::ConstantInt>(Order)) {
2881  int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
2882  switch (ord) {
2883  case 0: // memory_order_relaxed
2884  default: // invalid order
2885  break;
2886  case 1: // memory_order_consume
2887  case 2: // memory_order_acquire
2888  Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
2889  break;
2890  case 3: // memory_order_release
2891  Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
2892  break;
2893  case 4: // memory_order_acq_rel
2894  Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
2895  break;
2896  case 5: // memory_order_seq_cst
2897  Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
2898  break;
2899  }
2900  return RValue::get(nullptr);
2901  }
2902 
2903  llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
2904  AcquireBB = createBasicBlock("acquire", CurFn);
2905  ReleaseBB = createBasicBlock("release", CurFn);
2906  AcqRelBB = createBasicBlock("acqrel", CurFn);
2907  SeqCstBB = createBasicBlock("seqcst", CurFn);
2908  llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
2909 
2910  Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
2911  llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
2912 
2913  Builder.SetInsertPoint(AcquireBB);
2914  Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
2915  Builder.CreateBr(ContBB);
2916  SI->addCase(Builder.getInt32(1), AcquireBB);
2917  SI->addCase(Builder.getInt32(2), AcquireBB);
2918 
2919  Builder.SetInsertPoint(ReleaseBB);
2920  Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
2921  Builder.CreateBr(ContBB);
2922  SI->addCase(Builder.getInt32(3), ReleaseBB);
2923 
2924  Builder.SetInsertPoint(AcqRelBB);
2925  Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
2926  Builder.CreateBr(ContBB);
2927  SI->addCase(Builder.getInt32(4), AcqRelBB);
2928 
2929  Builder.SetInsertPoint(SeqCstBB);
2930  Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
2931  Builder.CreateBr(ContBB);
2932  SI->addCase(Builder.getInt32(5), SeqCstBB);
2933 
2934  Builder.SetInsertPoint(ContBB);
2935  return RValue::get(nullptr);
2936  }
2937 
2938  case Builtin::BI__builtin_signbit:
2939  case Builtin::BI__builtin_signbitf:
2940  case Builtin::BI__builtin_signbitl: {
2941  return RValue::get(
2942  Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
2943  ConvertType(E->getType())));
2944  }
2945  case Builtin::BI__annotation: {
2946  // Re-encode each wide string to UTF8 and make an MDString.
2948  for (const Expr *Arg : E->arguments()) {
2949  const auto *Str = cast<StringLiteral>(Arg->IgnoreParenCasts());
2950  assert(Str->getCharByteWidth() == 2);
2951  StringRef WideBytes = Str->getBytes();
2952  std::string StrUtf8;
2953  if (!convertUTF16ToUTF8String(
2954  makeArrayRef(WideBytes.data(), WideBytes.size()), StrUtf8)) {
2955  CGM.ErrorUnsupported(E, "non-UTF16 __annotation argument");
2956  continue;
2957  }
2958  Strings.push_back(llvm::MDString::get(getLLVMContext(), StrUtf8));
2959  }
2960 
2961  // Build and MDTuple of MDStrings and emit the intrinsic call.
2962  llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {});
2963  MDTuple *StrTuple = MDTuple::get(getLLVMContext(), Strings);
2964  Builder.CreateCall(F, MetadataAsValue::get(getLLVMContext(), StrTuple));
2965  return RValue::getIgnored();
2966  }
2967  case Builtin::BI__builtin_annotation: {
2968  llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
2969  llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation,
2970  AnnVal->getType());
2971 
2972  // Get the annotation string, go through casts. Sema requires this to be a
2973  // non-wide string literal, potentially casted, so the cast<> is safe.
2974  const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
2975  StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
2976  return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc()));
2977  }
2978  case Builtin::BI__builtin_addcb:
2979  case Builtin::BI__builtin_addcs:
2980  case Builtin::BI__builtin_addc:
2981  case Builtin::BI__builtin_addcl:
2982  case Builtin::BI__builtin_addcll:
2983  case Builtin::BI__builtin_subcb:
2984  case Builtin::BI__builtin_subcs:
2985  case Builtin::BI__builtin_subc:
2986  case Builtin::BI__builtin_subcl:
2987  case Builtin::BI__builtin_subcll: {
2988 
2989  // We translate all of these builtins from expressions of the form:
2990  // int x = ..., y = ..., carryin = ..., carryout, result;
2991  // result = __builtin_addc(x, y, carryin, &carryout);
2992  //
2993  // to LLVM IR of the form:
2994  //
2995  // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
2996  // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
2997  // %carry1 = extractvalue {i32, i1} %tmp1, 1
2998  // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
2999  // i32 %carryin)
3000  // %result = extractvalue {i32, i1} %tmp2, 0
3001  // %carry2 = extractvalue {i32, i1} %tmp2, 1
3002  // %tmp3 = or i1 %carry1, %carry2
3003  // %tmp4 = zext i1 %tmp3 to i32
3004  // store i32 %tmp4, i32* %carryout
3005 
3006  // Scalarize our inputs.
3007  llvm::Value *X = EmitScalarExpr(E->getArg(0));
3008  llvm::Value *Y = EmitScalarExpr(E->getArg(1));
3009  llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
3010  Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
3011 
3012  // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
3013  llvm::Intrinsic::ID IntrinsicId;
3014  switch (BuiltinID) {
3015  default: llvm_unreachable("Unknown multiprecision builtin id.");
3016  case Builtin::BI__builtin_addcb:
3017  case Builtin::BI__builtin_addcs:
3018  case Builtin::BI__builtin_addc:
3019  case Builtin::BI__builtin_addcl:
3020  case Builtin::BI__builtin_addcll:
3021  IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
3022  break;
3023  case Builtin::BI__builtin_subcb:
3024  case Builtin::BI__builtin_subcs:
3025  case Builtin::BI__builtin_subc:
3026  case Builtin::BI__builtin_subcl:
3027  case Builtin::BI__builtin_subcll:
3028  IntrinsicId = llvm::Intrinsic::usub_with_overflow;
3029  break;
3030  }
3031 
3032  // Construct our resulting LLVM IR expression.
3033  llvm::Value *Carry1;
3034  llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
3035  X, Y, Carry1);
3036  llvm::Value *Carry2;
3037  llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
3038  Sum1, Carryin, Carry2);
3039  llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
3040  X->getType());
3041  Builder.CreateStore(CarryOut, CarryOutPtr);
3042  return RValue::get(Sum2);
3043  }
3044 
3045  case Builtin::BI__builtin_add_overflow:
3046  case Builtin::BI__builtin_sub_overflow:
3047  case Builtin::BI__builtin_mul_overflow: {
3048  const clang::Expr *LeftArg = E->getArg(0);
3049  const clang::Expr *RightArg = E->getArg(1);
3050  const clang::Expr *ResultArg = E->getArg(2);
3051 
3052  clang::QualType ResultQTy =
3053  ResultArg->getType()->castAs<PointerType>()->getPointeeType();
3054 
3055  WidthAndSignedness LeftInfo =
3056  getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType());
3057  WidthAndSignedness RightInfo =
3058  getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType());
3059  WidthAndSignedness ResultInfo =
3060  getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy);
3061 
3062  // Handle mixed-sign multiplication as a special case, because adding
3063  // runtime or backend support for our generic irgen would be too expensive.
3064  if (isSpecialMixedSignMultiply(BuiltinID, LeftInfo, RightInfo, ResultInfo))
3065  return EmitCheckedMixedSignMultiply(*this, LeftArg, LeftInfo, RightArg,
3066  RightInfo, ResultArg, ResultQTy,
3067  ResultInfo);
3068 
3069  WidthAndSignedness EncompassingInfo =
3070  EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
3071 
3072  llvm::Type *EncompassingLLVMTy =
3073  llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);
3074 
3075  llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);
3076 
3077  llvm::Intrinsic::ID IntrinsicId;
3078  switch (BuiltinID) {
3079  default:
3080  llvm_unreachable("Unknown overflow builtin id.");
3081  case Builtin::BI__builtin_add_overflow:
3082  IntrinsicId = EncompassingInfo.Signed
3083  ? llvm::Intrinsic::sadd_with_overflow
3084  : llvm::Intrinsic::uadd_with_overflow;
3085  break;
3086  case Builtin::BI__builtin_sub_overflow:
3087  IntrinsicId = EncompassingInfo.Signed
3088  ? llvm::Intrinsic::ssub_with_overflow
3089  : llvm::Intrinsic::usub_with_overflow;
3090  break;
3091  case Builtin::BI__builtin_mul_overflow:
3092  IntrinsicId = EncompassingInfo.Signed
3093  ? llvm::Intrinsic::smul_with_overflow
3094  : llvm::Intrinsic::umul_with_overflow;
3095  break;
3096  }
3097 
3098  llvm::Value *Left = EmitScalarExpr(LeftArg);
3099  llvm::Value *Right = EmitScalarExpr(RightArg);
3100  Address ResultPtr = EmitPointerWithAlignment(ResultArg);
3101 
3102  // Extend each operand to the encompassing type.
3103  Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
3104  Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
3105 
3106  // Perform the operation on the extended values.
3107  llvm::Value *Overflow, *Result;
3108  Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);
3109 
3110  if (EncompassingInfo.Width > ResultInfo.Width) {
3111  // The encompassing type is wider than the result type, so we need to
3112  // truncate it.
3113  llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);
3114 
3115  // To see if the truncation caused an overflow, we will extend
3116  // the result and then compare it to the original result.
3117  llvm::Value *ResultTruncExt = Builder.CreateIntCast(
3118  ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
3119  llvm::Value *TruncationOverflow =
3120  Builder.CreateICmpNE(Result, ResultTruncExt);
3121 
3122  Overflow = Builder.CreateOr(Overflow, TruncationOverflow);
3123  Result = ResultTrunc;
3124  }
3125 
3126  // Finally, store the result using the pointer.
3127  bool isVolatile =
3128  ResultArg->getType()->getPointeeType().isVolatileQualified();
3129  Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);
3130 
3131  return RValue::get(Overflow);
3132  }
3133 
3134  case Builtin::BI__builtin_uadd_overflow:
3135  case Builtin::BI__builtin_uaddl_overflow:
3136  case Builtin::BI__builtin_uaddll_overflow:
3137  case Builtin::BI__builtin_usub_overflow:
3138  case Builtin::BI__builtin_usubl_overflow:
3139  case Builtin::BI__builtin_usubll_overflow:
3140  case Builtin::BI__builtin_umul_overflow:
3141  case Builtin::BI__builtin_umull_overflow:
3142  case Builtin::BI__builtin_umulll_overflow:
3143  case Builtin::BI__builtin_sadd_overflow:
3144  case Builtin::BI__builtin_saddl_overflow:
3145  case Builtin::BI__builtin_saddll_overflow:
3146  case Builtin::BI__builtin_ssub_overflow:
3147  case Builtin::BI__builtin_ssubl_overflow:
3148  case Builtin::BI__builtin_ssubll_overflow:
3149  case Builtin::BI__builtin_smul_overflow:
3150  case Builtin::BI__builtin_smull_overflow:
3151  case Builtin::BI__builtin_smulll_overflow: {
3152 
3153  // We translate all of these builtins directly to the relevant llvm IR node.
3154 
3155  // Scalarize our inputs.
3156  llvm::Value *X = EmitScalarExpr(E->getArg(0));
3157  llvm::Value *Y = EmitScalarExpr(E->getArg(1));
3158  Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
3159 
3160  // Decide which of the overflow intrinsics we are lowering to:
3161  llvm::Intrinsic::ID IntrinsicId;
3162  switch (BuiltinID) {
3163  default: llvm_unreachable("Unknown overflow builtin id.");
3164  case Builtin::BI__builtin_uadd_overflow:
3165  case Builtin::BI__builtin_uaddl_overflow:
3166  case Builtin::BI__builtin_uaddll_overflow:
3167  IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
3168  break;
3169  case Builtin::BI__builtin_usub_overflow:
3170  case Builtin::BI__builtin_usubl_overflow:
3171  case Builtin::BI__builtin_usubll_overflow:
3172  IntrinsicId = llvm::Intrinsic::usub_with_overflow;
3173  break;
3174  case Builtin::BI__builtin_umul_overflow:
3175  case Builtin::BI__builtin_umull_overflow:
3176  case Builtin::BI__builtin_umulll_overflow:
3177  IntrinsicId = llvm::Intrinsic::umul_with_overflow;
3178  break;
3179  case Builtin::BI__builtin_sadd_overflow:
3180  case Builtin::BI__builtin_saddl_overflow:
3181  case Builtin::BI__builtin_saddll_overflow:
3182  IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
3183  break;
3184  case Builtin::BI__builtin_ssub_overflow:
3185  case Builtin::BI__builtin_ssubl_overflow:
3186  case Builtin::BI__builtin_ssubll_overflow:
3187  IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
3188  break;
3189  case Builtin::BI__builtin_smul_overflow:
3190  case Builtin::BI__builtin_smull_overflow:
3191  case Builtin::BI__builtin_smulll_overflow:
3192  IntrinsicId = llvm::Intrinsic::smul_with_overflow;
3193  break;
3194  }
3195 
3196 
3197  llvm::Value *Carry;
3198  llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
3199  Builder.CreateStore(Sum, SumOutPtr);
3200 
3201  return RValue::get(Carry);
3202  }
3203  case Builtin::BI__builtin_addressof:
3204  return RValue::get(EmitLValue(E->getArg(0)).getPointer());
3205  case Builtin::BI__builtin_operator_new:
3206  return EmitBuiltinNewDeleteCall(
3207  E->getCallee()->getType()->castAs<FunctionProtoType>(), E, false);
3208  case Builtin::BI__builtin_operator_delete:
3209  return EmitBuiltinNewDeleteCall(
3210  E->getCallee()->getType()->castAs<FunctionProtoType>(), E, true);
3211 
3212  case Builtin::BI__noop:
3213  // __noop always evaluates to an integer literal zero.
3214  return RValue::get(ConstantInt::get(IntTy, 0));
3215  case Builtin::BI__builtin_call_with_static_chain: {
3216  const CallExpr *Call = cast<CallExpr>(E->getArg(0));
3217  const Expr *Chain = E->getArg(1);
3218  return EmitCall(Call->getCallee()->getType(),
3219  EmitCallee(Call->getCallee()), Call, ReturnValue,
3220  EmitScalarExpr(Chain));
3221  }
3222  case Builtin::BI_InterlockedExchange8:
3223  case Builtin::BI_InterlockedExchange16:
3224  case Builtin::BI_InterlockedExchange:
3225  case Builtin::BI_InterlockedExchangePointer:
3226  return RValue::get(
3227  EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E));
3228  case Builtin::BI_InterlockedCompareExchangePointer:
3229  case Builtin::BI_InterlockedCompareExchangePointer_nf: {
3230  llvm::Type *RTy;
3231  llvm::IntegerType *IntType =
3232  IntegerType::get(getLLVMContext(),
3233  getContext().getTypeSize(E->getType()));
3234  llvm::Type *IntPtrType = IntType->getPointerTo();
3235 
3236  llvm::Value *Destination =
3237  Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType);
3238 
3239  llvm::Value *Exchange = EmitScalarExpr(E->getArg(1));
3240  RTy = Exchange->getType();
3241  Exchange = Builder.CreatePtrToInt(Exchange, IntType);
3242 
3243  llvm::Value *Comparand =
3244  Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
3245 
3246  auto Ordering =
3247  BuiltinID == Builtin::BI_InterlockedCompareExchangePointer_nf ?
3248  AtomicOrdering::Monotonic : AtomicOrdering::SequentiallyConsistent;
3249 
3250  auto Result = Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
3251  Ordering, Ordering);
3252  Result->setVolatile(true);
3253 
3254  return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
3255  0),
3256  RTy));
3257  }
3258  case Builtin::BI_InterlockedCompareExchange8:
3259  case Builtin::BI_InterlockedCompareExchange16:
3260  case Builtin::BI_InterlockedCompareExchange:
3261  case Builtin::BI_InterlockedCompareExchange64:
3262  return RValue::get(EmitAtomicCmpXchgForMSIntrin(*this, E));
3263  case Builtin::BI_InterlockedIncrement16:
3264  case Builtin::BI_InterlockedIncrement:
3265  return RValue::get(
3266  EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E));
3267  case Builtin::BI_InterlockedDecrement16:
3268  case Builtin::BI_InterlockedDecrement:
3269  return RValue::get(
3270  EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E));
3271  case Builtin::BI_InterlockedAnd8:
3272  case Builtin::BI_InterlockedAnd16:
3273  case Builtin::BI_InterlockedAnd:
3274  return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E));
3275  case Builtin::BI_InterlockedExchangeAdd8:
3276  case Builtin::BI_InterlockedExchangeAdd16:
3277  case Builtin::BI_InterlockedExchangeAdd:
3278  return RValue::get(
3279  EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E));
3280  case Builtin::BI_InterlockedExchangeSub8:
3281  case Builtin::BI_InterlockedExchangeSub16:
3282  case Builtin::BI_InterlockedExchangeSub:
3283  return RValue::get(
3284  EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E));
3285  case Builtin::BI_InterlockedOr8:
3286  case Builtin::BI_InterlockedOr16:
3287  case Builtin::BI_InterlockedOr:
3288  return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E));
3289  case Builtin::BI_InterlockedXor8:
3290  case Builtin::BI_InterlockedXor16:
3291  case Builtin::BI_InterlockedXor:
3292  return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));
3293 
3294  case Builtin::BI_bittest64:
3295  case Builtin::BI_bittest:
3296  case Builtin::BI_bittestandcomplement64:
3297  case Builtin::BI_bittestandcomplement:
3298  case Builtin::BI_bittestandreset64:
3299  case Builtin::BI_bittestandreset:
3300  case Builtin::BI_bittestandset64:
3301  case Builtin::BI_bittestandset:
3302  case Builtin::BI_interlockedbittestandreset:
3303  case Builtin::BI_interlockedbittestandreset64:
3304  case Builtin::BI_interlockedbittestandset64:
3305  case Builtin::BI_interlockedbittestandset:
3306  case Builtin::BI_interlockedbittestandset_acq:
3307  case Builtin::BI_interlockedbittestandset_rel:
3308  case Builtin::BI_interlockedbittestandset_nf:
3309  case Builtin::BI_interlockedbittestandreset_acq:
3310  case Builtin::BI_interlockedbittestandreset_rel:
3311  case Builtin::BI_interlockedbittestandreset_nf:
3312  return RValue::get(EmitBitTestIntrinsic(*this, BuiltinID, E));
3313 
3314  case Builtin::BI__exception_code:
3315  case Builtin::BI_exception_code:
3316  return RValue::get(EmitSEHExceptionCode());
3317  case Builtin::BI__exception_info:
3318  case Builtin::BI_exception_info:
3319  return RValue::get(EmitSEHExceptionInfo());
3320  case Builtin::BI__abnormal_termination:
3321  case Builtin::BI_abnormal_termination:
3322  return RValue::get(EmitSEHAbnormalTermination());
3323  case Builtin::BI_setjmpex:
3324  if (getTarget().getTriple().isOSMSVCRT())
3325  return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
3326  break;
3327  case Builtin::BI_setjmp:
3328  if (getTarget().getTriple().isOSMSVCRT()) {
3329  if (getTarget().getTriple().getArch() == llvm::Triple::x86)
3330  return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp3, E);
3331  else if (getTarget().getTriple().getArch() == llvm::Triple::aarch64)
3332  return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
3333  return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp, E);
3334  }
3335  break;
3336 
3337  case Builtin::BI__GetExceptionInfo: {
3338  if (llvm::GlobalVariable *GV =
3339  CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType()))
3340  return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy));
3341  break;
3342  }
3343 
3344  case Builtin::BI__fastfail:
3345  return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E));
3346 
3347  case Builtin::BI__builtin_coro_size: {
3348  auto & Context = getContext();
3349  auto SizeTy = Context.getSizeType();
3350  auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy));
3351  Value *F = CGM.getIntrinsic(Intrinsic::coro_size, T);
3352  return RValue::get(Builder.CreateCall(F));
3353  }
3354 
3355  case Builtin::BI__builtin_coro_id:
3356  return EmitCoroutineIntrinsic(E, Intrinsic::coro_id);
3357  case Builtin::BI__builtin_coro_promise:
3358  return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise);
3359  case Builtin::BI__builtin_coro_resume:
3360  return EmitCoroutineIntrinsic(E, Intrinsic::coro_resume);
3361  case Builtin::BI__builtin_coro_frame:
3362  return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame);
3363  case Builtin::BI__builtin_coro_noop:
3364  return EmitCoroutineIntrinsic(E, Intrinsic::coro_noop);
3365  case Builtin::BI__builtin_coro_free:
3366  return EmitCoroutineIntrinsic(E, Intrinsic::coro_free);
3367  case Builtin::BI__builtin_coro_destroy:
3368  return EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy);
3369  case Builtin::BI__builtin_coro_done:
3370  return EmitCoroutineIntrinsic(E, Intrinsic::coro_done);
3371  case Builtin::BI__builtin_coro_alloc:
3372  return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc);
3373  case Builtin::BI__builtin_coro_begin:
3374  return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin);
3375  case Builtin::BI__builtin_coro_end:
3376  return EmitCoroutineIntrinsic(E, Intrinsic::coro_end);
3377  case Builtin::BI__builtin_coro_suspend:
3378  return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend);
3379  case Builtin::BI__builtin_coro_param:
3380  return EmitCoroutineIntrinsic(E, Intrinsic::coro_param);
3381 
3382  // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
3383  case Builtin::BIread_pipe:
3384  case Builtin::BIwrite_pipe: {
3385  Value *Arg0 = EmitScalarExpr(E->getArg(0)),
3386  *Arg1 = EmitScalarExpr(E->getArg(1));
3387  CGOpenCLRuntime OpenCLRT(CGM);
3388  Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
3389  Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
3390 
3391  // Type of the generic packet parameter.
3392  unsigned GenericAS =
3393  getContext().getTargetAddressSpace(LangAS::opencl_generic);
3394  llvm::Type *I8PTy = llvm::PointerType::get(
3395  llvm::Type::getInt8Ty(getLLVMContext()), GenericAS);
3396 
3397  // Testing which overloaded version we should generate the call for.
3398  if (2U == E->getNumArgs()) {
3399  const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
3400  : "__write_pipe_2";
3401  // Creating a generic function type to be able to call with any builtin or
3402  // user defined type.
3403  llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty};
3404  llvm::FunctionType *FTy = llvm::FunctionType::get(
3405  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
3406  Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy);
3407  return RValue::get(
3408  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
3409  {Arg0, BCast, PacketSize, PacketAlign}));
3410  } else {
3411  assert(4 == E->getNumArgs() &&
3412  "Illegal number of parameters to pipe function");
3413  const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
3414  : "__write_pipe_4";
3415 
3416  llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy,
3417  Int32Ty, Int32Ty};
3418  Value *Arg2 = EmitScalarExpr(E->getArg(2)),
3419  *Arg3 = EmitScalarExpr(E->getArg(3));
3420  llvm::FunctionType *FTy = llvm::FunctionType::get(
3421  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
3422  Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy);
3423  // We know the third argument is an integer type, but we may need to cast
3424  // it to i32.
3425  if (Arg2->getType() != Int32Ty)
3426  Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
3427  return RValue::get(Builder.CreateCall(
3428  CGM.CreateRuntimeFunction(FTy, Name),
3429  {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign}));
3430  }
3431  }
3432  // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
3433  // functions
3434  case Builtin::BIreserve_read_pipe:
3435  case Builtin::BIreserve_write_pipe:
3436  case Builtin::BIwork_group_reserve_read_pipe:
3437  case Builtin::BIwork_group_reserve_write_pipe:
3438  case Builtin::BIsub_group_reserve_read_pipe:
3439  case Builtin::BIsub_group_reserve_write_pipe: {
3440  // Composing the mangled name for the function.
3441  const char *Name;
3442  if (BuiltinID == Builtin::BIreserve_read_pipe)
3443  Name = "__reserve_read_pipe";
3444  else if (BuiltinID == Builtin::BIreserve_write_pipe)
3445  Name = "__reserve_write_pipe";
3446  else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
3447  Name = "__work_group_reserve_read_pipe";
3448  else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
3449  Name = "__work_group_reserve_write_pipe";
3450  else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
3451  Name = "__sub_group_reserve_read_pipe";
3452  else
3453  Name = "__sub_group_reserve_write_pipe";
3454 
3455  Value *Arg0 = EmitScalarExpr(E->getArg(0)),
3456  *Arg1 = EmitScalarExpr(E->getArg(1));
3457  llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
3458  CGOpenCLRuntime OpenCLRT(CGM);
3459  Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
3460  Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
3461 
3462  // Building the generic function prototype.
3463  llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty};
3464  llvm::FunctionType *FTy = llvm::FunctionType::get(
3465  ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
3466  // We know the second argument is an integer type, but we may need to cast
3467  // it to i32.
3468  if (Arg1->getType() != Int32Ty)
3469  Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
3470  return RValue::get(
3471  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
3472  {Arg0, Arg1, PacketSize, PacketAlign}));
3473  }
3474  // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
3475  // functions
3476  case Builtin::BIcommit_read_pipe:
3477  case Builtin::BIcommit_write_pipe:
3478  case Builtin::BIwork_group_commit_read_pipe:
3479  case Builtin::BIwork_group_commit_write_pipe:
3480  case Builtin::BIsub_group_commit_read_pipe:
3481  case Builtin::BIsub_group_commit_write_pipe: {
3482  const char *Name;
3483  if (BuiltinID == Builtin::BIcommit_read_pipe)
3484  Name = "__commit_read_pipe";
3485  else if (BuiltinID == Builtin::BIcommit_write_pipe)
3486  Name = "__commit_write_pipe";
3487  else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
3488  Name = "__work_group_commit_read_pipe";
3489  else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
3490  Name = "__work_group_commit_write_pipe";
3491  else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
3492  Name = "__sub_group_commit_read_pipe";
3493  else
3494  Name = "__sub_group_commit_write_pipe";
3495 
3496  Value *Arg0 = EmitScalarExpr(E->getArg(0)),
3497  *Arg1 = EmitScalarExpr(E->getArg(1));
3498  CGOpenCLRuntime OpenCLRT(CGM);
3499  Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
3500  Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
3501 
3502  // Building the generic function prototype.
3503  llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty};
3504  llvm::FunctionType *FTy =
3505  llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
3506  llvm::ArrayRef<llvm::Type *>(ArgTys), false);
3507 
3508  return RValue::get(
3509  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
3510  {Arg0, Arg1, PacketSize, PacketAlign}));
3511  }
3512  // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
3513  case Builtin::BIget_pipe_num_packets:
3514  case Builtin::BIget_pipe_max_packets: {
3515  const char *BaseName;
3516  const PipeType *PipeTy = E->getArg(0)->getType()->getAs<PipeType>();
3517  if (BuiltinID == Builtin::BIget_pipe_num_packets)
3518  BaseName = "__get_pipe_num_packets";
3519  else
3520  BaseName = "__get_pipe_max_packets";
3521  auto Name = std::string(BaseName) +
3522  std::string(PipeTy->isReadOnly() ? "_ro" : "_wo");
3523 
3524  // Building the generic function prototype.
3525  Value *Arg0 = EmitScalarExpr(E->getArg(0));
3526  CGOpenCLRuntime OpenCLRT(CGM);
3527  Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
3528  Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
3529  llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty};
3530  llvm::FunctionType *FTy = llvm::FunctionType::get(
3531  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
3532 
3533  return RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
3534  {Arg0, PacketSize, PacketAlign}));
3535  }
3536 
3537  // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
3538  case Builtin::BIto_global:
3539  case Builtin::BIto_local:
3540  case Builtin::BIto_private: {
3541  auto Arg0 = EmitScalarExpr(E->getArg(0));
3542  auto NewArgT = llvm::PointerType::get(Int8Ty,
3543  CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
3544  auto NewRetT = llvm::PointerType::get(Int8Ty,
3545  CGM.getContext().getTargetAddressSpace(
3547  auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
3548  llvm::Value *NewArg;
3549  if (Arg0->getType()->getPointerAddressSpace() !=
3550  NewArgT->getPointerAddressSpace())
3551  NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT);
3552  else
3553  NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
3554  auto NewName = std::string("__") + E->getDirectCallee()->getName().str();
3555  auto NewCall =
3556  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
3557  return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
3558  ConvertType(E->getType())));
3559  }
3560 
3561  // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
3562  // It contains four different overload formats specified in Table 6.13.17.1.
3563  case Builtin::BIenqueue_kernel: {
3564  StringRef Name; // Generated function call name
3565  unsigned NumArgs = E->getNumArgs();
3566 
3567  llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
3568  llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
3569  getContext().getTargetAddressSpace(LangAS::opencl_generic));
3570 
3571  llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
3572  llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
3573  LValue NDRangeL = EmitAggExprToLValue(E->getArg(2));
3574  llvm::Value *Range = NDRangeL.getAddress().getPointer();
3575  llvm::Type *RangeTy = NDRangeL.getAddress().getType();
3576 
3577  if (NumArgs == 4) {
3578  // The most basic form of the call with parameters:
3579  // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
3580  Name = "__enqueue_kernel_basic";
3581  llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy,
3582  GenericVoidPtrTy};
3583  llvm::FunctionType *FTy = llvm::FunctionType::get(
3584  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
3585 
3586  auto Info =
3587  CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
3588  llvm::Value *Kernel =
3589  Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
3590  llvm::Value *Block =
3591  Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
3592 
3593  AttrBuilder B;
3594  B.addAttribute(Attribute::ByVal);
3595  llvm::AttributeList ByValAttrSet =
3596  llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B);
3597 
3598  auto RTCall =
3599  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet),
3600  {Queue, Flags, Range, Kernel, Block});
3601  RTCall->setAttributes(ByValAttrSet);
3602  return RValue::get(RTCall);
3603  }
3604  assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
3605 
3606  // Create a temporary array to hold the sizes of local pointer arguments
3607  // for the block. \p First is the position of the first size argument.
3608  auto CreateArrayForSizeVar = [=](unsigned First)
3609  -> std::tuple<llvm::Value *, llvm::Value *, llvm::Value *> {
3610  llvm::APInt ArraySize(32, NumArgs - First);
3611  QualType SizeArrayTy = getContext().getConstantArrayType(
3612  getContext().getSizeType(), ArraySize, ArrayType::Normal,
3613  /*IndexTypeQuals=*/0);
3614  auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes");
3615  llvm::Value *TmpPtr = Tmp.getPointer();
3616  llvm::Value *TmpSize = EmitLifetimeStart(
3617  CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), TmpPtr);
3618  llvm::Value *ElemPtr;
3619  // Each of the following arguments specifies the size of the corresponding
3620  // argument passed to the enqueued block.
3621  auto *Zero = llvm::ConstantInt::get(IntTy, 0);
3622  for (unsigned I = First; I < NumArgs; ++I) {
3623  auto *Index = llvm::ConstantInt::get(IntTy, I - First);
3624  auto *GEP = Builder.CreateGEP(TmpPtr, {Zero, Index});
3625  if (I == First)
3626  ElemPtr = GEP;
3627  auto *V =
3628  Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy);
3629  Builder.CreateAlignedStore(
3630  V, GEP, CGM.getDataLayout().getPrefTypeAlignment(SizeTy));
3631  }
3632  return std::tie(ElemPtr, TmpSize, TmpPtr);
3633  };
3634 
3635  // Could have events and/or varargs.
3636  if (E->getArg(3)->getType()->isBlockPointerType()) {
3637  // No events passed, but has variadic arguments.
3638  Name = "__enqueue_kernel_varargs";
3639  auto Info =
3640  CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
3641  llvm::Value *Kernel =
3642  Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
3643  auto *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
3644  llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
3645  std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(4);
3646 
3647  // Create a vector of the arguments, as well as a constant value to
3648  // express to the runtime the number of variadic arguments.
3649  std::vector<llvm::Value *> Args = {
3650  Queue, Flags, Range,
3651  Kernel, Block, ConstantInt::get(IntTy, NumArgs - 4),
3652  ElemPtr};
3653  std::vector<llvm::Type *> ArgTys = {
3654  QueueTy, IntTy, RangeTy, GenericVoidPtrTy,
3655  GenericVoidPtrTy, IntTy, ElemPtr->getType()};
3656 
3657  llvm::FunctionType *FTy = llvm::FunctionType::get(
3658  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
3659  auto Call =
3660  RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
3662  if (TmpSize)
3663  EmitLifetimeEnd(TmpSize, TmpPtr);
3664  return Call;
3665  }
3666  // Any calls now have event arguments passed.
3667  if (NumArgs >= 7) {
3668  llvm::Type *EventTy = ConvertType(getContext().OCLClkEventTy);
3669  llvm::Type *EventPtrTy = EventTy->getPointerTo(
3670  CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
3671 
3672  llvm::Value *NumEvents =
3673  Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty);
3674  llvm::Value *EventList =
3675  E->getArg(4)->getType()->isArrayType()
3676  ? EmitArrayToPointerDecay(E->getArg(4)).getPointer()
3677  : EmitScalarExpr(E->getArg(4));
3678  llvm::Value *ClkEvent = EmitScalarExpr(E->getArg(5));
3679  // Convert to generic address space.
3680  EventList = Builder.CreatePointerCast(EventList, EventPtrTy);
3681  ClkEvent = ClkEvent->getType()->isIntegerTy()
3682  ? Builder.CreateBitOrPointerCast(ClkEvent, EventPtrTy)
3683  : Builder.CreatePointerCast(ClkEvent, EventPtrTy);
3684  auto Info =
3685  CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(6));
3686  llvm::Value *Kernel =
3687  Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
3688  llvm::Value *Block =
3689  Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
3690 
3691  std::vector<llvm::Type *> ArgTys = {
3692  QueueTy, Int32Ty, RangeTy, Int32Ty,
3693  EventPtrTy, EventPtrTy, GenericVoidPtrTy, GenericVoidPtrTy};
3694 
3695  std::vector<llvm::Value *> Args = {Queue, Flags, Range, NumEvents,
3696  EventList, ClkEvent, Kernel, Block};
3697 
3698  if (NumArgs == 7) {
3699  // Has events but no variadics.
3700  Name = "__enqueue_kernel_basic_events";
3701  llvm::FunctionType *FTy = llvm::FunctionType::get(
3702  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
3703  return RValue::get(
3704  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
3706  }
3707  // Has event info and variadics
3708  // Pass the number of variadics to the runtime function too.
3709  Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
3710  ArgTys.push_back(Int32Ty);
3711  Name = "__enqueue_kernel_events_varargs";
3712 
3713  llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
3714  std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(7);
3715  Args.push_back(ElemPtr);
3716  ArgTys.push_back(ElemPtr->getType());
3717 
3718  llvm::FunctionType *FTy = llvm::FunctionType::get(
3719  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
3720  auto Call =
3721  RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
3723  if (TmpSize)
3724  EmitLifetimeEnd(TmpSize, TmpPtr);
3725  return Call;
3726  }
3727  LLVM_FALLTHROUGH;
3728  }
3729  // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
3730  // parameter.
3731  case Builtin::BIget_kernel_work_group_size: {
3732  llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
3733  getContext().getTargetAddressSpace(LangAS::opencl_generic));
3734  auto Info =
3735  CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
3736  Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
3737  Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
3738  return RValue::get(Builder.CreateCall(
3739  CGM.CreateRuntimeFunction(
3740  llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
3741  false),
3742  "__get_kernel_work_group_size_impl"),
3743  {Kernel, Arg}));
3744  }
3745  case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
3746  llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
3747  getContext().getTargetAddressSpace(LangAS::opencl_generic));
3748  auto Info =
3749  CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
3750  Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
3751  Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
3752  return RValue::get(Builder.CreateCall(
3753  CGM.CreateRuntimeFunction(
3754  llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
3755  false),
3756  "__get_kernel_preferred_work_group_size_multiple_impl"),
3757  {Kernel, Arg}));
3758  }
3759  case Builtin::BIget_kernel_max_sub_group_size_for_ndrange:
3760  case Builtin::BIget_kernel_sub_group_count_for_ndrange: {
3761  llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
3762  getContext().getTargetAddressSpace(LangAS::opencl_generic));
3763  LValue NDRangeL = EmitAggExprToLValue(E->getArg(0));
3764  llvm::Value *NDRange = NDRangeL.getAddress().getPointer();
3765  auto Info =
3766  CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(1));
3767  Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
3768  Value *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
3769  const char *Name =
3770  BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange
3771  ? "__get_kernel_max_sub_group_size_for_ndrange_impl"
3772  : "__get_kernel_sub_group_count_for_ndrange_impl";
3773  return RValue::get(Builder.CreateCall(
3774  CGM.CreateRuntimeFunction(
3775  llvm::FunctionType::get(
3776  IntTy, {NDRange->getType(), GenericVoidPtrTy, GenericVoidPtrTy},
3777  false),
3778  Name),
3779  {NDRange, Kernel, Block}));
3780  }
3781 
3782  case Builtin::BI__builtin_store_half:
3783  case Builtin::BI__builtin_store_halff: {
3784  Value *Val = EmitScalarExpr(E->getArg(0));
3785  Address Address = EmitPointerWithAlignment(E->getArg(1));
3786  Value *HalfVal = Builder.CreateFPTrunc(Val, Builder.getHalfTy());
3787  return RValue::get(Builder.CreateStore(HalfVal, Address));
3788  }
3789  case Builtin::BI__builtin_load_half: {
3790  Address Address = EmitPointerWithAlignment(E->getArg(0));
3791  Value *HalfVal = Builder.CreateLoad(Address);
3792  return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getDoubleTy()));
3793  }
3794  case Builtin::BI__builtin_load_halff: {
3795  Address Address = EmitPointerWithAlignment(E->getArg(0));
3796  Value *HalfVal = Builder.CreateLoad(Address);
3797  return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy()));
3798  }
3799  case Builtin::BIprintf:
3800  if (getTarget().getTriple().isNVPTX())
3801  return EmitNVPTXDevicePrintfCallExpr(E, ReturnValue);
3802  break;
3803  case Builtin::BI__builtin_canonicalize:
3804  case Builtin::BI__builtin_canonicalizef:
3805  case Builtin::BI__builtin_canonicalizel:
3806  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize));
3807 
3808  case Builtin::BI__builtin_thread_pointer: {
3809  if (!getContext().getTargetInfo().isTLSSupported())
3810  CGM.ErrorUnsupported(E, "__builtin_thread_pointer");
3811  // Fall through - it's already mapped to the intrinsic by GCCBuiltin.
3812  break;
3813  }
3814  case Builtin::BI__builtin_os_log_format:
3815  return emitBuiltinOSLogFormat(*E);
3816 
3817  case Builtin::BI__xray_customevent: {
3818  if (!ShouldXRayInstrumentFunction())
3819  return RValue::getIgnored();
3820 
3821  if (!CGM.getCodeGenOpts().XRayInstrumentationBundle.has(
3823  return RValue::getIgnored();
3824 
3825  if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
3826  if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayCustomEvents())
3827  return RValue::getIgnored();
3828 
3829  Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent);
3830  auto FTy = F->getFunctionType();
3831  auto Arg0 = E->getArg(0);
3832  auto Arg0Val = EmitScalarExpr(Arg0);
3833  auto Arg0Ty = Arg0->getType();
3834  auto PTy0 = FTy->getParamType(0);
3835  if (PTy0 != Arg0Val->getType()) {
3836  if (Arg0Ty->isArrayType())
3837  Arg0Val = EmitArrayToPointerDecay(Arg0).getPointer();
3838  else
3839  Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0);
3840  }
3841  auto Arg1 = EmitScalarExpr(E->getArg(1));
3842  auto PTy1 = FTy->getParamType(1);
3843  if (PTy1 != Arg1->getType())
3844  Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1);
3845  return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1}));
3846  }
3847 
3848  case Builtin::BI__xray_typedevent: {
3849  // TODO: There should be a way to always emit events even if the current
3850  // function is not instrumented. Losing events in a stream can cripple
3851  // a trace.
3852  if (!ShouldXRayInstrumentFunction())
3853  return RValue::getIgnored();
3854 
3855  if (!CGM.getCodeGenOpts().XRayInstrumentationBundle.has(
3857  return RValue::getIgnored();
3858 
3859  if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
3860  if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayTypedEvents())
3861  return RValue::getIgnored();
3862 
3863  Function *F = CGM.getIntrinsic(Intrinsic::xray_typedevent);
3864  auto FTy = F->getFunctionType();
3865  auto Arg0 = EmitScalarExpr(E->getArg(0));
3866  auto PTy0 = FTy->getParamType(0);
3867  if (PTy0 != Arg0->getType())
3868  Arg0 = Builder.CreateTruncOrBitCast(Arg0, PTy0);
3869  auto Arg1 = E->getArg(1);
3870  auto Arg1Val = EmitScalarExpr(Arg1);
3871  auto Arg1Ty = Arg1->getType();
3872  auto PTy1 = FTy->getParamType(1);
3873  if (PTy1 != Arg1Val->getType()) {
3874  if (Arg1Ty->isArrayType())
3875  Arg1Val = EmitArrayToPointerDecay(Arg1).getPointer();
3876  else
3877  Arg1Val = Builder.CreatePointerCast(Arg1Val, PTy1);
3878  }
3879  auto Arg2 = EmitScalarExpr(E->getArg(2));
3880  auto PTy2 = FTy->getParamType(2);
3881  if (PTy2 != Arg2->getType())
3882  Arg2 = Builder.CreateTruncOrBitCast(Arg2, PTy2);
3883  return RValue::get(Builder.CreateCall(F, {Arg0, Arg1Val, Arg2}));
3884  }
3885 
3886  case Builtin::BI__builtin_ms_va_start:
3887  case Builtin::BI__builtin_ms_va_end:
3888  return RValue::get(
3889  EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(),
3890  BuiltinID == Builtin::BI__builtin_ms_va_start));
3891 
3892  case Builtin::BI__builtin_ms_va_copy: {
3893  // Lower this manually. We can't reliably determine whether or not any
3894  // given va_copy() is for a Win64 va_list from the calling convention
3895  // alone, because it's legal to do this from a System V ABI function.
3896  // With opaque pointer types, we won't have enough information in LLVM
3897  // IR to determine this from the argument types, either. Best to do it
3898  // now, while we have enough information.
3899  Address DestAddr = EmitMSVAListRef(E->getArg(0));
3900  Address SrcAddr = EmitMSVAListRef(E->getArg(1));
3901 
3902  llvm::Type *BPP = Int8PtrPtrTy;
3903 
3904  DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"),
3905  DestAddr.getAlignment());
3906  SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"),
3907  SrcAddr.getAlignment());
3908 
3909  Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
3910  return RValue::get(Builder.CreateStore(ArgPtr, DestAddr));
3911  }
3912  }
3913 
3914  // If this is an alias for a lib function (e.g. __builtin_sin), emit
3915  // the call using the normal call path, but using the unmangled
3916  // version of the function name.
3917  if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
3918  return emitLibraryCall(*this, FD, E,
3919  CGM.getBuiltinLibFunction(FD, BuiltinID));
3920 
3921  // If this is a predefined lib function (e.g. malloc), emit the call
3922  // using exactly the normal call path.
3923  if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
3924  return emitLibraryCall(*this, FD, E,
3925  cast<llvm::Constant>(EmitScalarExpr(E->getCallee())));
3926 
3927  // Check that a call to a target specific builtin has the correct target
3928  // features.
3929  // This is down here to avoid non-target specific builtins, however, if
3930  // generic builtins start to require generic target features then we
3931  // can move this up to the beginning of the function.
3932  checkTargetFeatures(E, FD);
3933 
3934  if (unsigned VectorWidth = getContext().BuiltinInfo.getRequiredVectorWidth(BuiltinID))
3935  LargestVectorWidth = std::max(LargestVectorWidth, VectorWidth);
3936 
3937  // See if we have a target specific intrinsic.
3938  const char *Name = getContext().BuiltinInfo.getName(BuiltinID);
3939  Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
3940  StringRef Prefix =
3941  llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());
3942  if (!Prefix.empty()) {
3943  IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix.data(), Name);
3944  // NOTE we don't need to perform a compatibility flag check here since the
3945  // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
3946  // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
3947  if (IntrinsicID == Intrinsic::not_intrinsic)
3948  IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name);
3949  }
3950 
3951  if (IntrinsicID != Intrinsic::not_intrinsic) {
3953 
3954  // Find out if any arguments are required to be integer constant
3955  // expressions.
3956  unsigned ICEArguments = 0;
3958  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
3959  assert(Error == ASTContext::GE_None && "Should not codegen an error");
3960 
3961  Function *F = CGM.getIntrinsic(IntrinsicID);
3962  llvm::FunctionType *FTy = F->getFunctionType();
3963 
3964  for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
3965  Value *ArgValue;
3966  // If this is a normal argument, just emit it as a scalar.
3967  if ((ICEArguments & (1 << i)) == 0) {
3968  ArgValue = EmitScalarExpr(E->getArg(i));
3969  } else {
3970  // If this is required to be a constant, constant fold it so that we
3971  // know that the generated intrinsic gets a ConstantInt.
3972  llvm::APSInt Result;
3973  bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext());
3974  assert(IsConst && "Constant arg isn't actually constant?");
3975  (void)IsConst;
3976  ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result);
3977  }
3978 
3979  // If the intrinsic arg type is different from the builtin arg type
3980  // we need to do a bit cast.
3981  llvm::Type *PTy = FTy->getParamType(i);
3982  if (PTy != ArgValue->getType()) {
3983  // XXX - vector of pointers?
3984  if (auto *PtrTy = dyn_cast<llvm::PointerType>(PTy)) {
3985  if (PtrTy->getAddressSpace() !=
3986  ArgValue->getType()->getPointerAddressSpace()) {
3987  ArgValue = Builder.CreateAddrSpaceCast(
3988  ArgValue,
3989  ArgValue->getType()->getPointerTo(PtrTy->getAddressSpace()));
3990  }
3991  }
3992 
3993  assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
3994  "Must be able to losslessly bit cast to param");
3995  ArgValue = Builder.CreateBitCast(ArgValue, PTy);
3996  }
3997 
3998  Args.push_back(ArgValue);
3999  }
4000 
4001  Value *V = Builder.CreateCall(F, Args);
4002  QualType BuiltinRetType = E->getType();
4003 
4004  llvm::Type *RetTy = VoidTy;
4005  if (!BuiltinRetType->isVoidType())
4006  RetTy = ConvertType(BuiltinRetType);
4007 
4008  if (RetTy != V->getType()) {
4009  // XXX - vector of pointers?
4010  if (auto *PtrTy = dyn_cast<llvm::PointerType>(RetTy)) {
4011  if (PtrTy->getAddressSpace() != V->getType()->getPointerAddressSpace()) {
4012  V = Builder.CreateAddrSpaceCast(
4013  V, V->getType()->getPointerTo(PtrTy->getAddressSpace()));
4014  }
4015  }
4016 
4017  assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
4018  "Must be able to losslessly bit cast result type");
4019  V = Builder.CreateBitCast(V, RetTy);
4020  }
4021 
4022  return RValue::get(V);
4023  }
4024 
4025  // See if we have a target specific builtin that needs to be lowered.
4026  if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E))
4027  return RValue::get(V);
4028 
4029  ErrorUnsupported(E, "builtin function");
4030 
4031  // Unknown builtin, for now just dump it out and return undef.
4032  return GetUndefRValue(E->getType());
4033 }
4034 
4036  unsigned BuiltinID, const CallExpr *E,
4037  llvm::Triple::ArchType Arch) {
4038  switch (Arch) {
4039  case llvm::Triple::arm:
4040  case llvm::Triple::armeb:
4041  case llvm::Triple::thumb:
4042  case llvm::Triple::thumbeb:
4043  return CGF->EmitARMBuiltinExpr(BuiltinID, E, Arch);
4044  case llvm::Triple::aarch64:
4045  case llvm::Triple::aarch64_be:
4046  return CGF->EmitAArch64BuiltinExpr(BuiltinID, E, Arch);
4047  case llvm::Triple::x86:
4048  case llvm::Triple::x86_64:
4049  return CGF->EmitX86BuiltinExpr(BuiltinID, E);
4050  case llvm::Triple::ppc:
4051  case llvm::Triple::ppc64:
4052  case llvm::Triple::ppc64le:
4053  return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
4054  case llvm::Triple::r600:
4055  case llvm::Triple::amdgcn:
4056  return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
4057  case llvm::Triple::systemz:
4058  return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
4059  case llvm::Triple::nvptx:
4060  case llvm::Triple::nvptx64:
4061  return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
4062  case llvm::Triple::wasm32:
4063  case llvm::Triple::wasm64:
4064  return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
4065  case llvm::Triple::hexagon:
4066  return CGF->EmitHexagonBuiltinExpr(BuiltinID, E);
4067  default:
4068  return nullptr;
4069  }
4070 }
4071 
4073  const CallExpr *E) {
4074  if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
4075  assert(getContext().getAuxTargetInfo() && "Missing aux target info");
4077  this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
4078  getContext().getAuxTargetInfo()->getTriple().getArch());
4079  }
4080 
4081  return EmitTargetArchBuiltinExpr(this, BuiltinID, E,
4082  getTarget().getTriple().getArch());
4083 }
4084 
4085 static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
4086  NeonTypeFlags TypeFlags,
4087  bool HasLegalHalfType=true,
4088  bool V1Ty=false) {
4089  int IsQuad = TypeFlags.isQuad();
4090  switch (TypeFlags.getEltType()) {
4091  case NeonTypeFlags::Int8:
4092  case NeonTypeFlags::Poly8:
4093  return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
4094  case NeonTypeFlags::Int16:
4095  case NeonTypeFlags::Poly16:
4096  return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
4098  if (HasLegalHalfType)
4099  return llvm::VectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad));
4100  else
4101  return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
4102  case NeonTypeFlags::Int32:
4103  return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
4104  case NeonTypeFlags::Int64:
4105  case NeonTypeFlags::Poly64:
4106  return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
4108  // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
4109  // There is a lot of i128 and f128 API missing.
4110  // so we use v16i8 to represent poly128 and get pattern matched.
4111  return llvm::VectorType::get(CGF->Int8Ty, 16);
4113  return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
4115  return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
4116  }
4117  llvm_unreachable("Unknown vector element type!");
4118 }
4119 
4120 static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
4121  NeonTypeFlags IntTypeFlags) {
4122  int IsQuad = IntTypeFlags.isQuad();
4123  switch (IntTypeFlags.getEltType()) {
4124  case NeonTypeFlags::Int16:
4125  return llvm::VectorType::get(CGF->HalfTy, (4 << IsQuad));
4126  case NeonTypeFlags::Int32:
4127  return llvm::VectorType::get(CGF->FloatTy, (2 << IsQuad));
4128  case NeonTypeFlags::Int64:
4129  return llvm::VectorType::get(CGF->DoubleTy, (1 << IsQuad));
4130  default:
4131  llvm_unreachable("Type can't be converted to floating-point!");
4132  }
4133 }
4134 
4136  unsigned nElts = V->getType()->getVectorNumElements();
4137  Value* SV = llvm::ConstantVector::getSplat(nElts, C);
4138  return Builder.CreateShuffleVector(V, V, SV, "lane");
4139 }
4140 
4142  const char *name,
4143  unsigned shift, bool rightshift) {
4144  unsigned j = 0;
4145  for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
4146  ai != ae; ++ai, ++j)
4147  if (shift > 0 && shift == j)
4148  Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
4149  else
4150  Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
4151 
4152  return Builder.CreateCall(F, Ops, name);
4153 }
4154 
4156  bool neg) {
4157  int SV = cast<ConstantInt>(V)->getSExtValue();
4158  return ConstantInt::get(Ty, neg ? -SV : SV);
4159 }
4160 
4161 // Right-shift a vector by a constant.
4163  llvm::Type *Ty, bool usgn,
4164  const char *name) {
4165  llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
4166 
4167  int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
4168  int EltSize = VTy->getScalarSizeInBits();
4169 
4170  Vec = Builder.CreateBitCast(Vec, Ty);
4171 
4172  // lshr/ashr are undefined when the shift amount is equal to the vector
4173  // element size.
4174  if (ShiftAmt == EltSize) {
4175  if (usgn) {
4176  // Right-shifting an unsigned value by its size yields 0.
4177  return llvm::ConstantAggregateZero::get(VTy);
4178  } else {
4179  // Right-shifting a signed value by its size is equivalent
4180  // to a shift of size-1.
4181  --ShiftAmt;
4182  Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
4183  }
4184  }
4185 
4186  Shift = EmitNeonShiftVector(Shift, Ty, false);
4187  if (usgn)
4188  return Builder.CreateLShr(Vec, Shift, name);
4189  else
4190  return Builder.CreateAShr(Vec, Shift, name);
4191 }
4192 
4193 enum {
4194  AddRetType = (1 << 0),
4195  Add1ArgType = (1 << 1),
4196  Add2ArgTypes = (1 << 2),
4197 
4198  VectorizeRetType = (1 << 3),
4199  VectorizeArgTypes = (1 << 4),
4200 
4201  InventFloatType = (1 << 5),
4202  UnsignedAlts = (1 << 6),
4203 
4204  Use64BitVectors = (1 << 7),
4205  Use128BitVectors = (1 << 8),
4206 
4213 };
4214 
4215 namespace {
4216 struct NeonIntrinsicInfo {
4217  const char *NameHint;
4218  unsigned BuiltinID;
4219  unsigned LLVMIntrinsic;
4220  unsigned AltLLVMIntrinsic;
4221  unsigned TypeModifier;
4222 
4223  bool operator<(unsigned RHSBuiltinID) const {
4224  return BuiltinID < RHSBuiltinID;
4225  }
4226  bool operator<(const NeonIntrinsicInfo &TE) const {
4227  return BuiltinID < TE.BuiltinID;
4228  }
4229 };
4230 } // end anonymous namespace
4231 
4232 #define NEONMAP0(NameBase) \
4233  { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
4234 
4235 #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
4236  { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
4237  Intrinsic::LLVMIntrinsic, 0, TypeModifier }
4238 
4239 #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
4240  { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
4241  Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
4242  TypeModifier }
4243 
4244 static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
4245  NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
4246  NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
4247  NEONMAP1(vabs_v, arm_neon_vabs, 0),
4248  NEONMAP1(vabsq_v, arm_neon_vabs, 0),
4249  NEONMAP0(vaddhn_v),
4250  NEONMAP1(vaesdq_v, arm_neon_aesd, 0),
4251  NEONMAP1(vaeseq_v, arm_neon_aese, 0),
4252  NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0),
4253  NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0),
4254  NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
4255  NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
4256  NEONMAP1(vcage_v, arm_neon_vacge, 0),
4257  NEONMAP1(vcageq_v, arm_neon_vacge, 0),
4258  NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
4259  NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
4260  NEONMAP1(vcale_v, arm_neon_vacge, 0),
4261  NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
4262  NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
4263  NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
4264  NEONMAP0(vceqz_v),
4265  NEONMAP0(vceqzq_v),
4266  NEONMAP0(vcgez_v),
4267  NEONMAP0(vcgezq_v),
4268  NEONMAP0(vcgtz_v),
4269  NEONMAP0(vcgtzq_v),
4270  NEONMAP0(vclez_v),
4271  NEONMAP0(vclezq_v),
4272  NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
4273  NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
4274  NEONMAP0(vcltz_v),
4275  NEONMAP0(vcltzq_v),
4276  NEONMAP1(vclz_v, ctlz, Add1ArgType),
4277  NEONMAP1(vclzq_v, ctlz, Add1ArgType),
4278  NEONMAP1(vcnt_v, ctpop, Add1ArgType),
4279  NEONMAP1(vcntq_v, ctpop, Add1ArgType),
4280  NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
4281  NEONMAP0(vcvt_f16_v),
4282  NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
4283  NEONMAP0(vcvt_f32_v),
4284  NEONMAP2(vcvt_n_f16_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
4285  NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
4286  NEONMAP1(vcvt_n_s16_v, arm_neon_vcvtfp2fxs, 0),
4287  NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
4288  NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
4289  NEONMAP1(vcvt_n_u16_v, arm_neon_vcvtfp2fxu, 0),
4290  NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
4291  NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
4292  NEONMAP0(vcvt_s16_v),
4293  NEONMAP0(vcvt_s32_v),
4294  NEONMAP0(vcvt_s64_v),
4295  NEONMAP0(vcvt_u16_v),
4296  NEONMAP0(vcvt_u32_v),
4297  NEONMAP0(vcvt_u64_v),
4298  NEONMAP1(vcvta_s16_v, arm_neon_vcvtas, 0),
4299  NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
4300  NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
4301  NEONMAP1(vcvta_u16_v, arm_neon_vcvtau, 0),
4302  NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
4303  NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
4304  NEONMAP1(vcvtaq_s16_v, arm_neon_vcvtas, 0),
4305  NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
4306  NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
4307  NEONMAP1(vcvtaq_u16_v, arm_neon_vcvtau, 0),
4308  NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
4309  NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
4310  NEONMAP1(vcvtm_s16_v, arm_neon_vcvtms, 0),
4311  NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
4312  NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
4313  NEONMAP1(vcvtm_u16_v, arm_neon_vcvtmu, 0),
4314  NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
4315  NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
4316  NEONMAP1(vcvtmq_s16_v, arm_neon_vcvtms, 0),
4317  NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
4318  NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
4319  NEONMAP1(vcvtmq_u16_v, arm_neon_vcvtmu, 0),
4320  NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
4321  NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
4322  NEONMAP1(vcvtn_s16_v, arm_neon_vcvtns, 0),
4323  NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
4324  NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
4325  NEONMAP1(vcvtn_u16_v, arm_neon_vcvtnu, 0),
4326  NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
4327  NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
4328  NEONMAP1(vcvtnq_s16_v, arm_neon_vcvtns, 0),
4329  NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
4330  NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
4331  NEONMAP1(vcvtnq_u16_v, arm_neon_vcvtnu, 0),
4332  NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
4333  NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
4334  NEONMAP1(vcvtp_s16_v, arm_neon_vcvtps, 0),
4335  NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
4336  NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
4337  NEONMAP1(vcvtp_u16_v, arm_neon_vcvtpu, 0),
4338  NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
4339  NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
4340  NEONMAP1(vcvtpq_s16_v, arm_neon_vcvtps, 0),
4341  NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
4342  NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
4343  NEONMAP1(vcvtpq_u16_v, arm_neon_vcvtpu, 0),
4344  NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
4345  NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
4346  NEONMAP0(vcvtq_f16_v),
4347  NEONMAP0(vcvtq_f32_v),
4348  NEONMAP2(vcvtq_n_f16_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
4349  NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
4350  NEONMAP1(vcvtq_n_s16_v, arm_neon_vcvtfp2fxs, 0),
4351  NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
4352  NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
4353  NEONMAP1(vcvtq_n_u16_v, arm_neon_vcvtfp2fxu, 0),
4354  NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
4355  NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
4356  NEONMAP0(vcvtq_s16_v),
4357  NEONMAP0(vcvtq_s32_v),
4358  NEONMAP0(vcvtq_s64_v),
4359  NEONMAP0(vcvtq_u16_v),
4360  NEONMAP0(vcvtq_u32_v),
4361  NEONMAP0(vcvtq_u64_v),
4362  NEONMAP2(vdot_v, arm_neon_udot, arm_neon_sdot, 0),
4363  NEONMAP2(vdotq_v, arm_neon_udot, arm_neon_sdot, 0),
4364  NEONMAP0(vext_v),
4365  NEONMAP0(vextq_v),
4366  NEONMAP0(vfma_v),
4367  NEONMAP0(vfmaq_v),
4368  NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
4369  NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
4370  NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
4371  NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
4372  NEONMAP0(vld1_dup_v),
4373  NEONMAP1(vld1_v, arm_neon_vld1, 0),
4374  NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
4375  NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
4376  NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
4377  NEONMAP0(vld1q_dup_v),
4378  NEONMAP1(vld1q_v, arm_neon_vld1, 0),
4379  NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
4380  NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
4381  NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
4382  NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
4383  NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
4384  NEONMAP1(vld2_v, arm_neon_vld2, 0),
4385  NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
4386  NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
4387  NEONMAP1(vld2q_v, arm_neon_vld2, 0),
4388  NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
4389  NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
4390  NEONMAP1(vld3_v, arm_neon_vld3, 0),
4391  NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
4392  NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
4393  NEONMAP1(vld3q_v, arm_neon_vld3, 0),
4394  NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
4395  NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
4396  NEONMAP1(vld4_v, arm_neon_vld4, 0),
4397  NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
4398  NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
4399  NEONMAP1(vld4q_v, arm_neon_vld4, 0),
4400  NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
4401  NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
4402  NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
4403  NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
4404  NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
4405  NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
4406  NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
4407  NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
4408  NEONMAP0(vmovl_v),
4409  NEONMAP0(vmovn_v),
4410  NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
4411  NEONMAP0(vmull_v),
4412  NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
4413  NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
4414  NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
4415  NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
4416  NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
4417  NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
4418  NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
4419  NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
4420  NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
4421  NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
4422  NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
4423  NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
4424  NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
4425  NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0),
4426  NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0),
4427  NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
4428  NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
4429  NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
4430  NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
4431  NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
4432  NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
4433  NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
4434  NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
4435  NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
4436  NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
4437  NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
4438  NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
4439  NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
4440  NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
4441  NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
4442  NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
4443  NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
4444  NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
4445  NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
4446  NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
4447  NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
4448  NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
4449  NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
4450  NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
4451  NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
4452  NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
4453  NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
4454  NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
4455  NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
4456  NEONMAP0(vrndi_v),
4457  NEONMAP0(vrndiq_v),
4458  NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
4459  NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
4460  NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
4461  NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
4462  NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
4463  NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
4464  NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
4465  NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
4466  NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
4467  NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
4468  NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
4469  NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
4470  NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
4471  NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
4472  NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
4473  NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
4474  NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
4475  NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
4476  NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0),
4477  NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0),
4478  NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0),
4479  NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0),
4480  NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0),
4481  NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0),
4482  NEONMAP0(vshl_n_v),
4483  NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
4484  NEONMAP0(vshll_n_v),
4485  NEONMAP0(vshlq_n_v),
4486  NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
4487  NEONMAP0(vshr_n_v),
4488  NEONMAP0(vshrn_n_v),
4489  NEONMAP0(vshrq_n_v),
4490  NEONMAP1(vst1_v, arm_neon_vst1, 0),
4491  NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
4492  NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
4493  NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
4494  NEONMAP1(vst1q_v, arm_neon_vst1, 0),
4495  NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
4496  NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
4497  NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
4498  NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
4499  NEONMAP1(vst2_v, arm_neon_vst2, 0),
4500  NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
4501  NEONMAP1(vst2q_v, arm_neon_vst2, 0),
4502  NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
4503  NEONMAP1(vst3_v, arm_neon_vst3, 0),
4504  NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
4505  NEONMAP1(vst3q_v, arm_neon_vst3, 0),
4506  NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
4507  NEONMAP1(vst4_v, arm_neon_vst4, 0),
4508  NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
4509  NEONMAP1(vst4q_v, arm_neon_vst4, 0),
4510  NEONMAP0(vsubhn_v),
4511  NEONMAP0(vtrn_v),
4512  NEONMAP0(vtrnq_v),
4513  NEONMAP0(vtst_v),
4514  NEONMAP0(vtstq_v),
4515  NEONMAP0(vuzp_v),
4516  NEONMAP0(vuzpq_v),
4517  NEONMAP0(vzip_v),
4518  NEONMAP0(vzipq_v)
4519 };
4520 
4521 static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
4522  NEONMAP1(vabs_v, aarch64_neon_abs, 0),
4523  NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
4524  NEONMAP0(vaddhn_v),
4525  NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0),
4526  NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0),
4527  NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0),
4528  NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0),
4529  NEONMAP1(vcage_v, aarch64_neon_facge, 0),
4530  NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
4531  NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
4532  NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
4533  NEONMAP1(vcale_v, aarch64_neon_facge, 0),
4534  NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
4535  NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
4536  NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
4537  NEONMAP0(vceqz_v),
4538  NEONMAP0(vceqzq_v),
4539  NEONMAP0(vcgez_v),
4540  NEONMAP0(vcgezq_v),
4541  NEONMAP0(vcgtz_v),
4542  NEONMAP0(vcgtzq_v),
4543  NEONMAP0(vclez_v),
4544  NEONMAP0(vclezq_v),
4545  NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
4546  NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
4547  NEONMAP0(vcltz_v),
4548  NEONMAP0(vcltzq_v),
4549  NEONMAP1(vclz_v, ctlz, Add1ArgType),
4550  NEONMAP1(vclzq_v, ctlz, Add1ArgType),
4551  NEONMAP1(vcnt_v, ctpop, Add1ArgType),
4552  NEONMAP1(vcntq_v, ctpop, Add1ArgType),
4553  NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
4554  NEONMAP0(vcvt_f16_v),
4555  NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
4556  NEONMAP0(vcvt_f32_v),
4557  NEONMAP2(vcvt_n_f16_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
4558  NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
4559  NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
4560  NEONMAP1(vcvt_n_s16_v, aarch64_neon_vcvtfp2fxs, 0),
4561  NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
4562  NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
4563  NEONMAP1(vcvt_n_u16_v, aarch64_neon_vcvtfp2fxu, 0),
4564  NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
4565  NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
4566  NEONMAP0(vcvtq_f16_v),
4567  NEONMAP0(vcvtq_f32_v),
4568  NEONMAP2(vcvtq_n_f16_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
4569  NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
4570  NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
4571  NEONMAP1(vcvtq_n_s16_v, aarch64_neon_vcvtfp2fxs, 0),
4572  NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
4573  NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
4574  NEONMAP1(vcvtq_n_u16_v, aarch64_neon_vcvtfp2fxu, 0),
4575  NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
4576  NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
4577  NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
4578  NEONMAP2(vdot_v, aarch64_neon_udot, aarch64_neon_sdot, 0),
4579  NEONMAP2(vdotq_v, aarch64_neon_udot, aarch64_neon_sdot, 0),
4580  NEONMAP0(vext_v),
4581  NEONMAP0(vextq_v),
4582  NEONMAP0(vfma_v),
4583  NEONMAP0(vfmaq_v),
4584  NEONMAP1(vfmlal_high_v, aarch64_neon_fmlal2, 0),
4585  NEONMAP1(vfmlal_low_v, aarch64_neon_fmlal, 0),
4586  NEONMAP1(vfmlalq_high_v, aarch64_neon_fmlal2, 0),
4587  NEONMAP1(vfmlalq_low_v, aarch64_neon_fmlal, 0),
4588  NEONMAP1(vfmlsl_high_v, aarch64_neon_fmlsl2, 0),
4589  NEONMAP1(vfmlsl_low_v, aarch64_neon_fmlsl, 0),
4590  NEONMAP1(vfmlslq_high_v, aarch64_neon_fmlsl2, 0),
4591  NEONMAP1(vfmlslq_low_v, aarch64_neon_fmlsl, 0),
4592  NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
4593  NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
4594  NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
4595  NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
4596  NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0),
4597  NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0),
4598  NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0),
4599  NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0),
4600  NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0),
4601  NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0),
4602  NEONMAP0(vmovl_v),
4603  NEONMAP0(vmovn_v),
4604  NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
4605  NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
4606  NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
4607  NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
4608  NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
4609  NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
4610  NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
4611  NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
4612  NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
4613  NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
4614  NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
4615  NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
4616  NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
4617  NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
4618  NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
4619  NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
4620  NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
4621  NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
4622  NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
4623  NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
4624  NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
4625  NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
4626  NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
4627  NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
4628  NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
4629  NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
4630  NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
4631  NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
4632  NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
4633  NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
4634  NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
4635  NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
4636  NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
4637  NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
4638  NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
4639  NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
4640  NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
4641  NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
4642  NEONMAP0(vrndi_v),
4643  NEONMAP0(vrndiq_v),
4644  NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
4645  NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
4646  NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
4647  NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
4648  NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
4649  NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
4650  NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
4651  NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
4652  NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
4653  NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0),
4654  NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0),
4655  NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0),
4656  NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0),
4657  NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0),
4658  NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0),
4659  NEONMAP0(vshl_n_v),
4660  NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
4661  NEONMAP0(vshll_n_v),
4662  NEONMAP0(vshlq_n_v),
4663  NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
4664  NEONMAP0(vshr_n_v),
4665  NEONMAP0(vshrn_n_v),
4666  NEONMAP0(vshrq_n_v),
4667  NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0),
4668  NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0),
4669  NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0),
4670  NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0),
4671  NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0),
4672  NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0),
4673  NEONMAP0(vsubhn_v),
4674  NEONMAP0(vtst_v),
4675  NEONMAP0(vtstq_v),
4676 };
4677 
4678 static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = {
4679  NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
4680  NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
4681  NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
4682  NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
4683  NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
4684  NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
4685  NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
4686  NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
4687  NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
4688  NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
4689  NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
4690  NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
4691  NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
4692  NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
4693  NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
4694  NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
4695  NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
4696  NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
4697  NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
4698  NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
4699  NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
4700  NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
4701  NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
4702  NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
4703  NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
4704  NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
4705  NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
4706  NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
4707  NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
4708  NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
4709  NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
4710  NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
4711  NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
4712  NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
4713  NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
4714  NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
4715  NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
4716  NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
4717  NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
4718  NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
4719  NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
4720  NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
4721  NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
4722  NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
4723  NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
4724  NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
4725  NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
4726  NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
4727  NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
4728  NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
4729  NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
4730  NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
4731  NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
4732  NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
4733  NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
4734  NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
4735  NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
4736  NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
4737  NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
4738  NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
4739  NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
4740  NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
4741  NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
4742  NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
4743  NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
4744  NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
4745  NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
4746  NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
4747  NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
4748  NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
4749  NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
4750  NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
4751  NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
4752  NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
4753  NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
4754  NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
4755  NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
4756  NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
4757  NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
4758  NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
4759  NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
4760  NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
4761  NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
4762  NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
4763  NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
4764  NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
4765  NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
4766  NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
4767  NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
4768  NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
4769  NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
4770  NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
4771  NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
4772  NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
4773  NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
4774  NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
4775  NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
4776  NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
4777  NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
4778  NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
4779  NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
4780  NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
4781  NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
4782  NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
4783  NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
4784  NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
4785  NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
4786  NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
4787  NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
4788  NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
4789  NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
4790  NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
4791  NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
4792  NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
4793  NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
4794  NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
4795  NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
4796  NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
4797  NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
4798  NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
4799  NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
4800  NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
4801  NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
4802  NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
4803  NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
4804  NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
4805  NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
4806  NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
4807  NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
4808  NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
4809  NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
4810  NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
4811  NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
4812  NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
4813  NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
4814  NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
4815  NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
4816  NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
4817  NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
4818  NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
4819  NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
4820  NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
4821  NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
4822  NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
4823  NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
4824  NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
4825  NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
4826  NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
4827  NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
4828  NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
4829  NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
4830  NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
4831  NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
4832  NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
4833  NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
4834  NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
4835  NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
4836  NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
4837  NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
4838  NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
4839  NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
4840  NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
4841  NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
4842  NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
4843  NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
4844  NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
4845  NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
4846  NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
4847  NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
4848  NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
4849  NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
4850  NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
4851  NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
4852  NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
4853  NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
4854  NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
4855  NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
4856  NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
4857  NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
4858  NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
4859  NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
4860  NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
4861  NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
4862  NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
4863  NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
4864  NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
4865  NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
4866  NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
4867  NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
4868  NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
4869  NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
4870  NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
4871  // FP16 scalar intrinisics go here.
4872  NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType),
4873  NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
4874  NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
4875  NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
4876  NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
4877  NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
4878  NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
4879  NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
4880  NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
4881  NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
4882  NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
4883  NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
4884  NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
4885  NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
4886  NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
4887  NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
4888  NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
4889  NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
4890  NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
4891  NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
4892  NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
4893  NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
4894  NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
4895  NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
4896  NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
4897  NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType),
4898  NEONMAP1(vrecpeh_f16, aarch64_neon_frecpe, Add1ArgType),
4899  NEONMAP1(vrecpxh_f16, aarch64_neon_frecpx, Add1ArgType),
4900  NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType),
4901  NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType),
4902 };
4903 
4904 #undef NEONMAP0
4905 #undef NEONMAP1
4906 #undef NEONMAP2
4907 
4909 
4912 
4913 
4914 static const NeonIntrinsicInfo *
4916  unsigned BuiltinID, bool &MapProvenSorted) {
4917 
4918 #ifndef NDEBUG
4919  if (!MapProvenSorted) {
4920  assert(std::is_sorted(std::begin(IntrinsicMap), std::end(IntrinsicMap)));
4921  MapProvenSorted = true;
4922  }
4923 #endif
4924 
4925  const NeonIntrinsicInfo *Builtin =
4926  std::lower_bound(IntrinsicMap.begin(), IntrinsicMap.end(), BuiltinID);
4927 
4928  if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
4929  return Builtin;
4930 
4931  return nullptr;
4932 }
4933 
4934 Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
4935  unsigned Modifier,
4936  llvm::Type *ArgType,
4937  const CallExpr *E) {
4938  int VectorSize = 0;
4939  if (Modifier & Use64BitVectors)
4940  VectorSize = 64;
4941  else if (Modifier & Use128BitVectors)
4942  VectorSize = 128;
4943 
4944  // Return type.
4946  if (Modifier & AddRetType) {
4947  llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
4948  if (Modifier & VectorizeRetType)
4949  Ty = llvm::VectorType::get(
4950  Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
4951 
4952  Tys.push_back(Ty);
4953  }
4954 
4955  // Arguments.
4956  if (Modifier & VectorizeArgTypes) {
4957  int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
4958  ArgType = llvm::VectorType::get(ArgType, Elts);
4959  }
4960 
4961  if (Modifier & (Add1ArgType | Add2ArgTypes))
4962  Tys.push_back(ArgType);
4963 
4964  if (Modifier & Add2ArgTypes)
4965  Tys.push_back(ArgType);
4966 
4967  if (Modifier & InventFloatType)
4968  Tys.push_back(FloatTy);
4969 
4970  return CGM.getIntrinsic(IntrinsicID, Tys);
4971 }
4972 
4974  const NeonIntrinsicInfo &SISDInfo,
4976  const CallExpr *E) {
4977  unsigned BuiltinID = SISDInfo.BuiltinID;
4978  unsigned int Int = SISDInfo.LLVMIntrinsic;
4979  unsigned Modifier = SISDInfo.TypeModifier;
4980  const char *s = SISDInfo.NameHint;
4981 
4982  switch (BuiltinID) {
4983  case NEON::BI__builtin_neon_vcled_s64:
4984  case NEON::BI__builtin_neon_vcled_u64:
4985  case NEON::BI__builtin_neon_vcles_f32:
4986  case NEON::BI__builtin_neon_vcled_f64:
4987  case NEON::BI__builtin_neon_vcltd_s64:
4988  case NEON::BI__builtin_neon_vcltd_u64:
4989  case NEON::BI__builtin_neon_vclts_f32:
4990  case NEON::BI__builtin_neon_vcltd_f64:
4991  case NEON::BI__builtin_neon_vcales_f32:
4992  case NEON::BI__builtin_neon_vcaled_f64:
4993  case NEON::BI__builtin_neon_vcalts_f32:
4994  case NEON::BI__builtin_neon_vcaltd_f64:
4995  // Only one direction of comparisons actually exist, cmle is actually a cmge
4996  // with swapped operands. The table gives us the right intrinsic but we
4997  // still need to do the swap.
4998  std::swap(Ops[0], Ops[1]);
4999  break;
5000  }
5001 
5002  assert(Int && "Generic code assumes a valid intrinsic");
5003 
5004  // Determine the type(s) of this overloaded AArch64 intrinsic.
5005  const Expr *Arg = E->getArg(0);
5006  llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
5007  Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
5008 
5009  int j = 0;
5010  ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
5011  for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
5012  ai != ae; ++ai, ++j) {
5013  llvm::Type *ArgTy = ai->getType();
5014  if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
5015  ArgTy->getPrimitiveSizeInBits())
5016  continue;
5017 
5018  assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
5019  // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
5020  // it before inserting.
5021  Ops[j] =
5022  CGF.Builder.CreateTruncOrBitCast(Ops[j], ArgTy->getVectorElementType());
5023  Ops[j] =
5024  CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0);
5025  }
5026 
5027  Value *Result = CGF.EmitNeonCall(F, Ops, s);
5028  llvm::Type *ResultType = CGF.ConvertType(E->getType());
5029  if (ResultType->getPrimitiveSizeInBits() <
5030  Result->getType()->getPrimitiveSizeInBits())
5031  return CGF.Builder.CreateExtractElement(Result, C0);
5032 
5033  return CGF.Builder.CreateBitCast(Result, ResultType, s);
5034 }
5035 
5037  unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
5038  const char *NameHint, unsigned Modifier, const CallExpr *E,
5039  SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1,
5040  llvm::Triple::ArchType Arch) {
5041  // Get the last argument, which specifies the vector type.
5042  llvm::APSInt NeonTypeConst;
5043  const Expr *Arg = E->getArg(E->getNumArgs() - 1);
5044  if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext()))
5045  return nullptr;
5046 
5047  // Determine the type of this overloaded NEON intrinsic.
5048  NeonTypeFlags Type(NeonTypeConst.getZExtValue());
5049  bool Usgn = Type.isUnsigned();
5050  bool Quad = Type.isQuad();
5051  const bool HasLegalHalfType = getTarget().hasLegalHalfType();
5052 
5053  llvm::VectorType *VTy = GetNeonType(this, Type, HasLegalHalfType);
5054  llvm::Type *Ty = VTy;
5055  if (!Ty)
5056  return nullptr;
5057 
5058  auto getAlignmentValue32 = [&](Address addr) -> Value* {
5059  return Builder.getInt32(addr.getAlignment().getQuantity());
5060  };
5061 
5062  unsigned Int = LLVMIntrinsic;
5063  if ((Modifier & UnsignedAlts) && !Usgn)
5064  Int = AltLLVMIntrinsic;
5065 
5066  switch (BuiltinID) {
5067  default: break;
5068  case NEON::BI__builtin_neon_vabs_v:
5069  case NEON::BI__builtin_neon_vabsq_v:
5070  if (VTy->getElementType()->isFloatingPointTy())
5071  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
5072  return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
5073  case NEON::BI__builtin_neon_vaddhn_v: {
5074  llvm::VectorType *SrcTy =
5075  llvm::VectorType::getExtendedElementVectorType(VTy);
5076 
5077  // %sum = add <4 x i32> %lhs, %rhs
5078  Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
5079  Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
5080  Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
5081 
5082  // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
5083  Constant *ShiftAmt =
5084  ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
5085  Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
5086 
5087  // %res = trunc <4 x i32> %high to <4 x i16>
5088  return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
5089  }
5090  case NEON::BI__builtin_neon_vcale_v:
5091  case NEON::BI__builtin_neon_vcaleq_v:
5092  case NEON::BI__builtin_neon_vcalt_v:
5093  case NEON::BI__builtin_neon_vcaltq_v:
5094  std::swap(Ops[0], Ops[1]);
5095  LLVM_FALLTHROUGH;
5096  case NEON::BI__builtin_neon_vcage_v:
5097  case NEON::BI__builtin_neon_vcageq_v:
5098  case NEON::BI__builtin_neon_vcagt_v:
5099  case NEON::BI__builtin_neon_vcagtq_v: {
5100  llvm::Type *Ty;
5101  switch (VTy->getScalarSizeInBits()) {
5102  default: llvm_unreachable("unexpected type");
5103  case 32:
5104  Ty = FloatTy;
5105  break;
5106  case 64:
5107  Ty = DoubleTy;
5108  break;
5109  case 16:
5110  Ty = HalfTy;
5111  break;
5112  }
5113  llvm::Type *VecFlt = llvm::VectorType::get(Ty, VTy->getNumElements());
5114  llvm::Type *Tys[] = { VTy, VecFlt };
5115  Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
5116  return EmitNeonCall(F, Ops, NameHint);
5117  }
5118  case NEON::BI__builtin_neon_vceqz_v:
5119  case NEON::BI__builtin_neon_vceqzq_v:
5120  return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
5121  ICmpInst::ICMP_EQ, "vceqz");
5122  case NEON::BI__builtin_neon_vcgez_v:
5123  case NEON::BI__builtin_neon_vcgezq_v:
5124  return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
5125  ICmpInst::ICMP_SGE, "vcgez");
5126  case NEON::BI__builtin_neon_vclez_v:
5127  case NEON::BI__builtin_neon_vclezq_v:
5128  return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
5129  ICmpInst::ICMP_SLE, "vclez");
5130  case NEON::BI__builtin_neon_vcgtz_v:
5131  case NEON::BI__builtin_neon_vcgtzq_v:
5132  return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
5133  ICmpInst::ICMP_SGT, "vcgtz");
5134  case NEON::BI__builtin_neon_vcltz_v:
5135  case NEON::BI__builtin_neon_vcltzq_v:
5136  return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
5137  ICmpInst::ICMP_SLT, "vcltz");
5138  case NEON::BI__builtin_neon_vclz_v:
5139  case NEON::BI__builtin_neon_vclzq_v:
5140  // We generate target-independent intrinsic, which needs a second argument
5141  // for whether or not clz of zero is undefined; on ARM it isn't.
5142  Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
5143  break;
5144  case NEON::BI__builtin_neon_vcvt_f32_v:
5145  case NEON::BI__builtin_neon_vcvtq_f32_v:
5146  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5147  Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad),
5148  HasLegalHalfType);
5149  return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
5150  : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
5151  case NEON::BI__builtin_neon_vcvt_f16_v:
5152  case NEON::BI__builtin_neon_vcvtq_f16_v:
5153  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5154  Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad),
5155  HasLegalHalfType);
5156  return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
5157  : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
5158  case NEON::BI__builtin_neon_vcvt_n_f16_v:
5159  case NEON::BI__builtin_neon_vcvt_n_f32_v:
5160  case NEON::BI__builtin_neon_vcvt_n_f64_v:
5161  case NEON::BI__builtin_neon_vcvtq_n_f16_v:
5162  case NEON::BI__builtin_neon_vcvtq_n_f32_v:
5163  case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
5164  llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
5165  Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
5166  Function *F = CGM.getIntrinsic(Int, Tys);
5167  return EmitNeonCall(F, Ops, "vcvt_n");
5168  }
5169  case NEON::BI__builtin_neon_vcvt_n_s16_v:
5170  case NEON::BI__builtin_neon_vcvt_n_s32_v:
5171  case NEON::BI__builtin_neon_vcvt_n_u16_v:
5172  case NEON::BI__builtin_neon_vcvt_n_u32_v:
5173  case NEON::BI__builtin_neon_vcvt_n_s64_v:
5174  case NEON::BI__builtin_neon_vcvt_n_u64_v:
5175  case NEON::BI__builtin_neon_vcvtq_n_s16_v:
5176  case NEON::BI__builtin_neon_vcvtq_n_s32_v:
5177  case NEON::BI__builtin_neon_vcvtq_n_u16_v:
5178  case NEON::BI__builtin_neon_vcvtq_n_u32_v:
5179  case NEON::BI__builtin_neon_vcvtq_n_s64_v:
5180  case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
5181  llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
5182  Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
5183  return EmitNeonCall(F, Ops, "vcvt_n");
5184  }
5185  case NEON::BI__builtin_neon_vcvt_s32_v:
5186  case NEON::BI__builtin_neon_vcvt_u32_v:
5187  case NEON::BI__builtin_neon_vcvt_s64_v:
5188  case NEON::BI__builtin_neon_vcvt_u64_v:
5189  case NEON::BI__builtin_neon_vcvt_s16_v:
5190  case NEON::BI__builtin_neon_vcvt_u16_v:
5191  case NEON::BI__builtin_neon_vcvtq_s32_v:
5192  case NEON::BI__builtin_neon_vcvtq_u32_v:
5193  case NEON::BI__builtin_neon_vcvtq_s64_v:
5194  case NEON::BI__builtin_neon_vcvtq_u64_v:
5195  case NEON::BI__builtin_neon_vcvtq_s16_v:
5196  case NEON::BI__builtin_neon_vcvtq_u16_v: {
5197  Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
5198  return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
5199  : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
5200  }
5201  case NEON::BI__builtin_neon_vcvta_s16_v:
5202  case NEON::BI__builtin_neon_vcvta_s32_v:
5203  case NEON::BI__builtin_neon_vcvta_s64_v:
5204  case NEON::BI__builtin_neon_vcvta_u16_v:
5205  case NEON::BI__builtin_neon_vcvta_u32_v:
5206  case NEON::BI__builtin_neon_vcvta_u64_v:
5207  case NEON::BI__builtin_neon_vcvtaq_s16_v:
5208  case NEON::BI__builtin_neon_vcvtaq_s32_v:
5209  case NEON::BI__builtin_neon_vcvtaq_s64_v:
5210  case NEON::BI__builtin_neon_vcvtaq_u16_v:
5211  case NEON::BI__builtin_neon_vcvtaq_u32_v:
5212  case NEON::BI__builtin_neon_vcvtaq_u64_v:
5213  case NEON::BI__builtin_neon_vcvtn_s16_v:
5214  case NEON::BI__builtin_neon_vcvtn_s32_v:
5215  case NEON::BI__builtin_neon_vcvtn_s64_v:
5216  case NEON::BI__builtin_neon_vcvtn_u16_v:
5217  case NEON::BI__builtin_neon_vcvtn_u32_v:
5218  case NEON::BI__builtin_neon_vcvtn_u64_v:
5219  case NEON::BI__builtin_neon_vcvtnq_s16_v:
5220  case NEON::BI__builtin_neon_vcvtnq_s32_v:
5221  case NEON::BI__builtin_neon_vcvtnq_s64_v:
5222  case NEON::BI__builtin_neon_vcvtnq_u16_v:
5223  case NEON::BI__builtin_neon_vcvtnq_u32_v:
5224  case NEON::BI__builtin_neon_vcvtnq_u64_v:
5225  case NEON::BI__builtin_neon_vcvtp_s16_v:
5226  case NEON::BI__builtin_neon_vcvtp_s32_v:
5227  case NEON::BI__builtin_neon_vcvtp_s64_v:
5228  case NEON::BI__builtin_neon_vcvtp_u16_v:
5229  case NEON::BI__builtin_neon_vcvtp_u32_v:
5230  case NEON::BI__builtin_neon_vcvtp_u64_v:
5231  case NEON::BI__builtin_neon_vcvtpq_s16_v:
5232  case NEON::BI__builtin_neon_vcvtpq_s32_v:
5233  case NEON::BI__builtin_neon_vcvtpq_s64_v:
5234  case NEON::BI__builtin_neon_vcvtpq_u16_v:
5235  case NEON::BI__builtin_neon_vcvtpq_u32_v:
5236  case NEON::BI__builtin_neon_vcvtpq_u64_v:
5237  case NEON::BI__builtin_neon_vcvtm_s16_v:
5238  case NEON::BI__builtin_neon_vcvtm_s32_v:
5239  case NEON::BI__builtin_neon_vcvtm_s64_v:
5240  case NEON::BI__builtin_neon_vcvtm_u16_v:
5241  case NEON::BI__builtin_neon_vcvtm_u32_v:
5242  case NEON::BI__builtin_neon_vcvtm_u64_v:
5243  case NEON::BI__builtin_neon_vcvtmq_s16_v:
5244  case NEON::BI__builtin_neon_vcvtmq_s32_v:
5245  case NEON::BI__builtin_neon_vcvtmq_s64_v:
5246  case NEON::BI__builtin_neon_vcvtmq_u16_v:
5247  case NEON::BI__builtin_neon_vcvtmq_u32_v:
5248  case NEON::BI__builtin_neon_vcvtmq_u64_v: {
5249  llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
5250  return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
5251  }
5252  case NEON::BI__builtin_neon_vext_v:
5253  case NEON::BI__builtin_neon_vextq_v: {
5254  int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
5255  SmallVector<uint32_t, 16> Indices;
5256  for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
5257  Indices.push_back(i+CV);
5258 
5259  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5260  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5261  return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
5262  }
5263  case NEON::BI__builtin_neon_vfma_v:
5264  case NEON::BI__builtin_neon_vfmaq_v: {
5265  Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
5266  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5267  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5268  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
5269 
5270  // NEON intrinsic puts accumulator first, unlike the LLVM fma.
5271  return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
5272  }
5273  case NEON::BI__builtin_neon_vld1_v:
5274  case NEON::BI__builtin_neon_vld1q_v: {
5275  llvm::Type *Tys[] = {Ty, Int8PtrTy};
5276  Ops.push_back(getAlignmentValue32(PtrOp0));
5277  return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
5278  }
5279  case NEON::BI__builtin_neon_vld1_x2_v:
5280  case NEON::BI__builtin_neon_vld1q_x2_v:
5281  case NEON::BI__builtin_neon_vld1_x3_v:
5282  case NEON::BI__builtin_neon_vld1q_x3_v:
5283  case NEON::BI__builtin_neon_vld1_x4_v:
5284  case NEON::BI__builtin_neon_vld1q_x4_v: {
5285  llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
5286  Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
5287  llvm::Type *Tys[2] = { VTy, PTy };
5288  Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
5289  Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
5290  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5291  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5292  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5293  }
5294  case NEON::BI__builtin_neon_vld2_v:
5295  case NEON::BI__builtin_neon_vld2q_v:
5296  case NEON::BI__builtin_neon_vld3_v:
5297  case NEON::BI__builtin_neon_vld3q_v:
5298  case NEON::BI__builtin_neon_vld4_v:
5299  case NEON::BI__builtin_neon_vld4q_v:
5300  case NEON::BI__builtin_neon_vld2_dup_v:
5301  case NEON::BI__builtin_neon_vld2q_dup_v:
5302  case NEON::BI__builtin_neon_vld3_dup_v:
5303  case NEON::BI__builtin_neon_vld3q_dup_v:
5304  case NEON::BI__builtin_neon_vld4_dup_v:
5305  case NEON::BI__builtin_neon_vld4q_dup_v: {
5306  llvm::Type *Tys[] = {Ty, Int8PtrTy};
5307  Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
5308  Value *Align = getAlignmentValue32(PtrOp1);
5309  Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
5310  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5311  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5312  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5313  }
5314  case NEON::BI__builtin_neon_vld1_dup_v:
5315  case NEON::BI__builtin_neon_vld1q_dup_v: {
5316  Value *V = UndefValue::get(Ty);
5317  Ty = llvm::PointerType::getUnqual(VTy->getElementType());
5318  PtrOp0 = Builder.CreateBitCast(PtrOp0, Ty);
5319  LoadInst *Ld = Builder.CreateLoad(PtrOp0);
5320  llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
5321  Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
5322  return EmitNeonSplat(Ops[0], CI);
5323  }
5324  case NEON::BI__builtin_neon_vld2_lane_v:
5325  case NEON::BI__builtin_neon_vld2q_lane_v:
5326  case NEON::BI__builtin_neon_vld3_lane_v:
5327  case NEON::BI__builtin_neon_vld3q_lane_v:
5328  case NEON::BI__builtin_neon_vld4_lane_v:
5329  case NEON::BI__builtin_neon_vld4q_lane_v: {
5330  llvm::Type *Tys[] = {Ty, Int8PtrTy};
5331  Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
5332  for (unsigned I = 2; I < Ops.size() - 1; ++I)
5333  Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
5334  Ops.push_back(getAlignmentValue32(PtrOp1));
5335  Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint);
5336  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5337  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5338  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5339  }
5340  case NEON::BI__builtin_neon_vmovl_v: {
5341  llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
5342  Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
5343  if (Usgn)
5344  return Builder.CreateZExt(Ops[0], Ty, "vmovl");
5345  return Builder.CreateSExt(Ops[0], Ty, "vmovl");
5346  }
5347  case NEON::BI__builtin_neon_vmovn_v: {
5348  llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy);
5349  Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
5350  return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
5351  }
5352  case NEON::BI__builtin_neon_vmull_v:
5353  // FIXME: the integer vmull operations could be emitted in terms of pure
5354  // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
5355  // hoisting the exts outside loops. Until global ISel comes along that can
5356  // see through such movement this leads to bad CodeGen. So we need an
5357  // intrinsic for now.
5358  Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
5359  Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
5360  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
5361  case NEON::BI__builtin_neon_vpadal_v:
5362  case NEON::BI__builtin_neon_vpadalq_v: {
5363  // The source operand type has twice as many elements of half the size.
5364  unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
5365  llvm::Type *EltTy =
5366  llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
5367  llvm::Type *NarrowTy =
5368  llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
5369  llvm::Type *Tys[2] = { Ty, NarrowTy };
5370  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
5371  }
5372  case NEON::BI__builtin_neon_vpaddl_v:
5373  case NEON::BI__builtin_neon_vpaddlq_v: {
5374  // The source operand type has twice as many elements of half the size.
5375  unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
5376  llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
5377  llvm::Type *NarrowTy =
5378  llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
5379  llvm::Type *Tys[2] = { Ty, NarrowTy };
5380  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
5381  }
5382  case NEON::BI__builtin_neon_vqdmlal_v:
5383  case NEON::BI__builtin_neon_vqdmlsl_v: {
5384  SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
5385  Ops[1] =
5386  EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
5387  Ops.resize(2);
5388  return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
5389  }
5390  case NEON::BI__builtin_neon_vqshl_n_v:
5391  case NEON::BI__builtin_neon_vqshlq_n_v:
5392  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
5393  1, false);
5394  case NEON::BI__builtin_neon_vqshlu_n_v:
5395  case NEON::BI__builtin_neon_vqshluq_n_v:
5396  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
5397  1, false);
5398  case NEON::BI__builtin_neon_vrecpe_v:
5399  case NEON::BI__builtin_neon_vrecpeq_v:
5400  case NEON::BI__builtin_neon_vrsqrte_v:
5401  case NEON::BI__builtin_neon_vrsqrteq_v:
5402  Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
5403  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
5404  case NEON::BI__builtin_neon_vrndi_v:
5405  case NEON::BI__builtin_neon_vrndiq_v:
5406  Int = Intrinsic::nearbyint;
5407  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
5408  case NEON::BI__builtin_neon_vrshr_n_v:
5409  case NEON::BI__builtin_neon_vrshrq_n_v:
5410  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
5411  1, true);
5412  case NEON::BI__builtin_neon_vshl_n_v:
5413  case NEON::BI__builtin_neon_vshlq_n_v:
5414  Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
5415  return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
5416  "vshl_n");
5417  case NEON::BI__builtin_neon_vshll_n_v: {
5418  llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy);
5419  Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
5420  if (Usgn)
5421  Ops[0] = Builder.CreateZExt(Ops[0], VTy);
5422  else
5423  Ops[0] = Builder.CreateSExt(Ops[0], VTy);
5424  Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
5425  return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
5426  }
5427  case NEON::BI__builtin_neon_vshrn_n_v: {
5428  llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy);
5429  Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
5430  Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
5431  if (Usgn)
5432  Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
5433  else
5434  Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
5435  return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
5436  }
5437  case NEON::BI__builtin_neon_vshr_n_v:
5438  case NEON::BI__builtin_neon_vshrq_n_v:
5439  return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
5440  case NEON::BI__builtin_neon_vst1_v:
5441  case NEON::BI__builtin_neon_vst1q_v:
5442  case NEON::BI__builtin_neon_vst2_v:
5443  case NEON::BI__builtin_neon_vst2q_v:
5444  case NEON::BI__builtin_neon_vst3_v:
5445  case NEON::BI__builtin_neon_vst3q_v:
5446  case NEON::BI__builtin_neon_vst4_v:
5447  case NEON::BI__builtin_neon_vst4q_v:
5448  case NEON::BI__builtin_neon_vst2_lane_v:
5449  case NEON::BI__builtin_neon_vst2q_lane_v:
5450  case NEON::BI__builtin_neon_vst3_lane_v:
5451  case NEON::BI__builtin_neon_vst3q_lane_v:
5452  case NEON::BI__builtin_neon_vst4_lane_v:
5453  case NEON::BI__builtin_neon_vst4q_lane_v: {
5454  llvm::Type *Tys[] = {Int8PtrTy, Ty};
5455  Ops.push_back(getAlignmentValue32(PtrOp0));
5456  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
5457  }
5458  case NEON::BI__builtin_neon_vst1_x2_v:
5459  case NEON::BI__builtin_neon_vst1q_x2_v:
5460  case NEON::BI__builtin_neon_vst1_x3_v:
5461  case NEON::BI__builtin_neon_vst1q_x3_v:
5462  case NEON::BI__builtin_neon_vst1_x4_v:
5463  case NEON::BI__builtin_neon_vst1q_x4_v: {
5464  llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
5465  // TODO: Currently in AArch32 mode the pointer operand comes first, whereas
5466  // in AArch64 it comes last. We may want to stick to one or another.
5467  if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be) {
5468  llvm::Type *Tys[2] = { VTy, PTy };
5469  std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
5470  return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
5471  }
5472  llvm::Type *Tys[2] = { PTy, VTy };
5473  return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
5474  }
5475  case NEON::BI__builtin_neon_vsubhn_v: {
5476  llvm::VectorType *SrcTy =
5477  llvm::VectorType::getExtendedElementVectorType(VTy);
5478 
5479  // %sum = add <4 x i32> %lhs, %rhs
5480  Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
5481  Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
5482  Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
5483 
5484  // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
5485  Constant *ShiftAmt =
5486  ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
5487  Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
5488 
5489  // %res = trunc <4 x i32> %high to <4 x i16>
5490  return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
5491  }
5492  case NEON::BI__builtin_neon_vtrn_v:
5493  case NEON::BI__builtin_neon_vtrnq_v: {
5494  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
5495  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5496  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
5497  Value *SV = nullptr;
5498 
5499  for (unsigned vi = 0; vi != 2; ++vi) {
5500  SmallVector<uint32_t, 16> Indices;
5501  for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
5502  Indices.push_back(i+vi);
5503  Indices.push_back(i+e+vi);
5504  }
5505  Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
5506  SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
5507  SV = Builder.CreateDefaultAlignedStore(SV, Addr);
5508  }
5509  return SV;
5510  }
5511  case NEON::BI__builtin_neon_vtst_v:
5512  case NEON::BI__builtin_neon_vtstq_v: {
5513  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5514  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5515  Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
5516  Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
5517  ConstantAggregateZero::get(Ty));
5518  return Builder.CreateSExt(Ops[0], Ty, "vtst");
5519  }
5520  case NEON::BI__builtin_neon_vuzp_v:
5521  case NEON::BI__builtin_neon_vuzpq_v: {
5522  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
5523  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5524  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
5525  Value *SV = nullptr;
5526 
5527  for (unsigned vi = 0; vi != 2; ++vi) {
5528  SmallVector<uint32_t, 16> Indices;
5529  for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
5530  Indices.push_back(2*i+vi);
5531 
5532  Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
5533  SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
5534  SV = Builder.CreateDefaultAlignedStore(SV, Addr);
5535  }
5536  return SV;
5537  }
5538  case NEON::BI__builtin_neon_vzip_v:
5539  case NEON::BI__builtin_neon_vzipq_v: {
5540  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
5541  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5542  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
5543  Value *SV = nullptr;
5544 
5545  for (unsigned vi = 0; vi != 2; ++vi) {
5546  SmallVector<uint32_t, 16> Indices;
5547  for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
5548  Indices.push_back((i + vi*e) >> 1);
5549  Indices.push_back(((i + vi*e) >> 1)+e);
5550  }
5551  Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
5552  SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
5553  SV = Builder.CreateDefaultAlignedStore(SV, Addr);
5554  }
5555  return SV;
5556  }
5557  case NEON::BI__builtin_neon_vdot_v:
5558  case NEON::BI__builtin_neon_vdotq_v: {
5559  llvm::Type *InputTy =
5560  llvm::VectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
5561  llvm::Type *Tys[2] = { Ty, InputTy };
5562  Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
5563  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot");
5564  }
5565  case NEON::BI__builtin_neon_vfmlal_low_v:
5566  case NEON::BI__builtin_neon_vfmlalq_low_v: {
5567  llvm::Type *InputTy =
5568  llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
5569  llvm::Type *Tys[2] = { Ty, InputTy };
5570  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_low");
5571  }
5572  case NEON::BI__builtin_neon_vfmlsl_low_v:
5573  case NEON::BI__builtin_neon_vfmlslq_low_v: {
5574  llvm::Type *InputTy =
5575  llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
5576  llvm::Type *Tys[2] = { Ty, InputTy };
5577  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_low");
5578  }
5579  case NEON::BI__builtin_neon_vfmlal_high_v:
5580  case NEON::BI__builtin_neon_vfmlalq_high_v: {
5581  llvm::Type *InputTy =
5582  llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
5583  llvm::Type *Tys[2] = { Ty, InputTy };
5584  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_high");
5585  }
5586  case NEON::BI__builtin_neon_vfmlsl_high_v:
5587  case NEON::BI__builtin_neon_vfmlslq_high_v: {
5588  llvm::Type *InputTy =
5589  llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
5590  llvm::Type *Tys[2] = { Ty, InputTy };
5591  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_high");
5592  }
5593  }
5594 
5595  assert(Int && "Expected valid intrinsic number");
5596 
5597  // Determine the type(s) of this overloaded AArch64 intrinsic.
5598  Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
5599 
5600  Value *Result = EmitNeonCall(F, Ops, NameHint);
5601  llvm::Type *ResultType = ConvertType(E->getType());
5602  // AArch64 intrinsic one-element vector type cast to
5603  // scalar type expected by the builtin
5604  return Builder.CreateBitCast(Result, ResultType, NameHint);
5605 }
5606 
5608  Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
5609  const CmpInst::Predicate Ip, const Twine &Name) {
5610  llvm::Type *OTy = Op->getType();
5611 
5612  // FIXME: this is utterly horrific. We should not be looking at previous
5613  // codegen context to find out what needs doing. Unfortunately TableGen
5614  // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
5615  // (etc).
5616  if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
5617  OTy = BI->getOperand(0)->getType();
5618 
5619  Op = Builder.CreateBitCast(Op, OTy);
5620  if (OTy->getScalarType()->isFloatingPointTy()) {
5621  Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
5622  } else {
5623  Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
5624  }
5625  return Builder.CreateSExt(Op, Ty, Name);
5626 }
5627 
5629  Value *ExtOp, Value *IndexOp,
5630  llvm::Type *ResTy, unsigned IntID,
5631  const char *Name) {
5632  SmallVector<Value *, 2> TblOps;
5633  if (ExtOp)
5634  TblOps.push_back(ExtOp);
5635 
5636  // Build a vector containing sequential number like (0, 1, 2, ..., 15)
5637  SmallVector<uint32_t, 16> Indices;
5638  llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType());
5639  for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
5640  Indices.push_back(2*i);
5641  Indices.push_back(2*i+1);
5642  }
5643 
5644  int PairPos = 0, End = Ops.size() - 1;
5645  while (PairPos < End) {
5646  TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
5647  Ops[PairPos+1], Indices,
5648  Name));
5649  PairPos += 2;
5650  }
5651 
5652  // If there's an odd number of 64-bit lookup table, fill the high 64-bit
5653  // of the 128-bit lookup table with zero.
5654  if (PairPos == End) {
5655  Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
5656  TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
5657  ZeroTbl, Indices, Name));
5658  }
5659 
5660  Function *TblF;
5661  TblOps.push_back(IndexOp);
5662  TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
5663 
5664  return CGF.EmitNeonCall(TblF, TblOps, Name);
5665 }
5666 
5667 Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
5668  unsigned Value;
5669  switch (BuiltinID) {
5670  default:
5671  return nullptr;
5672  case ARM::BI__builtin_arm_nop:
5673  Value = 0;
5674  break;
5675  case ARM::BI__builtin_arm_yield:
5676  case ARM::BI__yield:
5677  Value = 1;
5678  break;
5679  case ARM::BI__builtin_arm_wfe:
5680  case ARM::BI__wfe:
5681  Value = 2;
5682  break;
5683  case ARM::BI__builtin_arm_wfi:
5684  case ARM::BI__wfi:
5685  Value = 3;
5686  break;
5687  case ARM::BI__builtin_arm_sev:
5688  case ARM::BI__sev:
5689  Value = 4;
5690  break;
5691  case ARM::BI__builtin_arm_sevl:
5692  case ARM::BI__sevl:
5693  Value = 5;
5694  break;
5695  }
5696 
5697  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
5698  llvm::ConstantInt::get(Int32Ty, Value));
5699 }
5700 
5701 // Generates the IR for the read/write special register builtin,
5702 // ValueType is the type of the value that is to be written or read,
5703 // RegisterType is the type of the register being written to or read from.
5705  const CallExpr *E,
5706  llvm::Type *RegisterType,
5707  llvm::Type *ValueType,
5708  bool IsRead,
5709  StringRef SysReg = "") {
5710  // write and register intrinsics only support 32 and 64 bit operations.
5711  assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64))
5712  && "Unsupported size for register.");
5713 
5714  CodeGen::CGBuilderTy &Builder = CGF.Builder;
5715  CodeGen::CodeGenModule &CGM = CGF.CGM;
5716  LLVMContext &Context = CGM.getLLVMContext();
5717 
5718  if (SysReg.empty()) {
5719  const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
5720  SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
5721  }
5722 
5723  llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
5724  llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
5725  llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
5726 
5727  llvm::Type *Types[] = { RegisterType };
5728 
5729  bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
5730  assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
5731  && "Can't fit 64-bit value in 32-bit register");
5732 
5733  if (IsRead) {
5734  llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
5735  llvm::Value *Call = Builder.CreateCall(F, Metadata);
5736 
5737  if (MixedTypes)
5738  // Read into 64 bit register and then truncate result to 32 bit.
5739  return Builder.CreateTrunc(Call, ValueType);
5740 
5741  if (ValueType->isPointerTy())
5742  // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
5743  return Builder.CreateIntToPtr(Call, ValueType);
5744 
5745  return Call;
5746  }
5747 
5748  llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
5749  llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
5750  if (MixedTypes) {
5751  // Extend 32 bit write value to 64 bit to pass to write.
5752  ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
5753  return Builder.CreateCall(F, { Metadata, ArgValue });
5754  }
5755 
5756  if (ValueType->isPointerTy()) {
5757  // Have VoidPtrTy ArgValue but want to return an i32/i64.
5758  ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
5759  return Builder.CreateCall(F, { Metadata, ArgValue });
5760  }
5761 
5762  return Builder.CreateCall(F, { Metadata, ArgValue });
5763 }
5764 
5765 /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
5766 /// argument that specifies the vector type.
5767 static bool HasExtraNeonArgument(unsigned BuiltinID) {
5768  switch (BuiltinID) {
5769  default: break;
5770  case NEON::BI__builtin_neon_vget_lane_i8:
5771  case NEON::BI__builtin_neon_vget_lane_i16:
5772  case NEON::BI__builtin_neon_vget_lane_i32:
5773  case NEON::BI__builtin_neon_vget_lane_i64:
5774  case NEON::BI__builtin_neon_vget_lane_f32:
5775  case NEON::BI__builtin_neon_vgetq_lane_i8:
5776  case NEON::BI__builtin_neon_vgetq_lane_i16:
5777  case NEON::BI__builtin_neon_vgetq_lane_i32:
5778  case NEON::BI__builtin_neon_vgetq_lane_i64:
5779  case NEON::BI__builtin_neon_vgetq_lane_f32:
5780  case NEON::BI__builtin_neon_vset_lane_i8:
5781  case NEON::BI__builtin_neon_vset_lane_i16:
5782  case NEON::BI__builtin_neon_vset_lane_i32:
5783  case NEON::BI__builtin_neon_vset_lane_i64:
5784  case NEON::BI__builtin_neon_vset_lane_f32:
5785  case NEON::BI__builtin_neon_vsetq_lane_i8:
5786  case NEON::BI__builtin_neon_vsetq_lane_i16:
5787  case NEON::BI__builtin_neon_vsetq_lane_i32:
5788  case NEON::BI__builtin_neon_vsetq_lane_i64:
5789  case NEON::BI__builtin_neon_vsetq_lane_f32:
5790  case NEON::BI__builtin_neon_vsha1h_u32:
5791  case NEON::BI__builtin_neon_vsha1cq_u32:
5792  case NEON::BI__builtin_neon_vsha1pq_u32:
5793  case NEON::BI__builtin_neon_vsha1mq_u32:
5794  case clang::ARM::BI_MoveToCoprocessor:
5795  case clang::ARM::BI_MoveToCoprocessor2:
5796  return false;
5797  }
5798  return true;
5799 }
5800 
5802  Value *Ptr = EmitScalarExpr(E->getArg(0));
5803  QualType ElTy = E->getArg(0)->getType()->getPointeeType();
5804  CharUnits LoadSize = getContext().getTypeSizeInChars(ElTy);
5805  llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
5806  LoadSize.getQuantity() * 8);
5807  Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
5808  llvm::LoadInst *Load =
5809  Builder.CreateAlignedLoad(Ptr, LoadSize);
5810  Load->setVolatile(true);
5811  return Load;
5812 }
5813 
5815  Value *Ptr = EmitScalarExpr(E->getArg(0));
5816  Value *Value = EmitScalarExpr(E->getArg(1));
5817  QualType ElTy = E->getArg(0)->getType()->getPointeeType();
5818  CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
5819  llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
5820  StoreSize.getQuantity() * 8);
5821  Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
5822  llvm::StoreInst *Store =
5823  Builder.CreateAlignedStore(Value, Ptr,
5824  StoreSize);
5825  Store->setVolatile(true);
5826  return Store;
5827 }
5828 
5830  const CallExpr *E,
5831  llvm::Triple::ArchType Arch) {
5832  if (auto Hint = GetValueForARMHint(BuiltinID))
5833  return Hint;
5834 
5835  if (BuiltinID == ARM::BI__emit) {
5836  bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
5837  llvm::FunctionType *FTy =
5838  llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
5839 
5840  Expr::EvalResult Result;
5841  if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
5842  llvm_unreachable("Sema will ensure that the parameter is constant");
5843 
5844  llvm::APSInt Value = Result.Val.getInt();
5845  uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
5846 
5847  llvm::InlineAsm *Emit =
5848  IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
5849  /*SideEffects=*/true)
5850  : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
5851  /*SideEffects=*/true);
5852 
5853  return Builder.CreateCall(Emit);
5854  }
5855 
5856  if (BuiltinID == ARM::BI__builtin_arm_dbg) {
5857  Value *Option = EmitScalarExpr(E->getArg(0));
5858  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
5859  }
5860 
5861  if (BuiltinID == ARM::BI__builtin_arm_prefetch) {
5862  Value *Address = EmitScalarExpr(E->getArg(0));
5863  Value *RW = EmitScalarExpr(E->getArg(1));
5864  Value *IsData = EmitScalarExpr(E->getArg(2));
5865 
5866  // Locality is not supported on ARM target
5867  Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
5868 
5869  Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
5870  return Builder.CreateCall(F, {Address, RW, Locality, IsData});
5871  }
5872 
5873  if (BuiltinID == ARM::BI__builtin_arm_rbit) {
5874  llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5875  return Builder.CreateCall(
5876  CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
5877  }
5878 
5879  if (BuiltinID == ARM::BI__clear_cache) {
5880  assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
5881  const FunctionDecl *FD = E->getDirectCallee();
5882  Value *Ops[2];
5883  for (unsigned i = 0; i < 2; i++)
5884  Ops[i] = EmitScalarExpr(E->getArg(i));
5885  llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
5886  llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
5887  StringRef Name = FD->getName();
5888  return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
5889  }
5890 
5891  if (BuiltinID == ARM::BI__builtin_arm_mcrr ||
5892  BuiltinID == ARM::BI__builtin_arm_mcrr2) {
5893  Function *F;
5894 
5895  switch (BuiltinID) {
5896  default: llvm_unreachable("unexpected builtin");
5897  case ARM::BI__builtin_arm_mcrr:
5898  F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
5899  break;
5900  case ARM::BI__builtin_arm_mcrr2:
5901  F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
5902  break;
5903  }
5904 
5905  // MCRR{2} instruction has 5 operands but
5906  // the intrinsic has 4 because Rt and Rt2
5907  // are represented as a single unsigned 64
5908  // bit integer in the intrinsic definition
5909  // but internally it's represented as 2 32
5910  // bit integers.
5911 
5912  Value *Coproc = EmitScalarExpr(E->getArg(0));
5913  Value *Opc1 = EmitScalarExpr(E->getArg(1));
5914  Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
5915  Value *CRm = EmitScalarExpr(E->getArg(3));
5916 
5917  Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
5918  Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
5919  Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
5920  Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
5921 
5922  return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
5923  }
5924 
5925  if (BuiltinID == ARM::BI__builtin_arm_mrrc ||
5926  BuiltinID == ARM::BI__builtin_arm_mrrc2) {
5927  Function *F;
5928 
5929  switch (BuiltinID) {
5930  default: llvm_unreachable("unexpected builtin");
5931  case ARM::BI__builtin_arm_mrrc:
5932  F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
5933  break;
5934  case ARM::BI__builtin_arm_mrrc2:
5935  F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
5936  break;
5937  }
5938 
5939  Value *Coproc = EmitScalarExpr(E->getArg(0));
5940  Value *Opc1 = EmitScalarExpr(E->getArg(1));
5941  Value *CRm = EmitScalarExpr(E->getArg(2));
5942  Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
5943 
5944  // Returns an unsigned 64 bit integer, represented
5945  // as two 32 bit integers.
5946 
5947  Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
5948  Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
5949  Rt = Builder.CreateZExt(Rt, Int64Ty);
5950  Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
5951 
5952  Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
5953  RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
5954  RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
5955 
5956  return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
5957  }
5958 
5959  if (BuiltinID == ARM::BI__builtin_arm_ldrexd ||
5960  ((BuiltinID == ARM::BI__builtin_arm_ldrex ||
5961  BuiltinID == ARM::BI__builtin_arm_ldaex) &&
5962  getContext().getTypeSize(E->getType()) == 64) ||
5963  BuiltinID == ARM::BI__ldrexd) {
5964  Function *F;
5965 
5966  switch (BuiltinID) {
5967  default: llvm_unreachable("unexpected builtin");
5968  case ARM::BI__builtin_arm_ldaex:
5969  F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
5970  break;
5971  case ARM::BI__builtin_arm_ldrexd:
5972  case ARM::BI__builtin_arm_ldrex:
5973  case ARM::BI__ldrexd:
5974  F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
5975  break;
5976  }
5977 
5978  Value *LdPtr = EmitScalarExpr(E->getArg(0));
5979  Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
5980  "ldrexd");
5981 
5982  Value *Val0 = Builder.CreateExtractValue(Val, 1);
5983  Value *Val1 = Builder.CreateExtractValue(Val, 0);
5984  Val0 = Builder.CreateZExt(Val0, Int64Ty);
5985  Val1 = Builder.CreateZExt(Val1, Int64Ty);
5986 
5987  Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
5988  Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
5989  Val = Builder.CreateOr(Val, Val1);
5990  return Builder.CreateBitCast(Val, ConvertType(E->getType()));
5991  }
5992 
5993  if (BuiltinID == ARM::BI__builtin_arm_ldrex ||
5994  BuiltinID == ARM::BI__builtin_arm_ldaex) {
5995  Value *LoadAddr = EmitScalarExpr(E->getArg(0));
5996 
5997  QualType Ty = E->getType();
5998  llvm::Type *RealResTy = ConvertType(Ty);
5999  llvm::Type *PtrTy = llvm::IntegerType::get(
6000  getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo();
6001  LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy);
6002 
6003  Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex
6004  ? Intrinsic::arm_ldaex
6005  : Intrinsic::arm_ldrex,
6006  PtrTy);
6007  Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
6008 
6009  if (RealResTy->isPointerTy())
6010  return Builder.CreateIntToPtr(Val, RealResTy);
6011  else {
6012  llvm::Type *IntResTy = llvm::IntegerType::get(
6013  getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
6014  Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
6015  return Builder.CreateBitCast(Val, RealResTy);
6016  }
6017  }
6018 
6019  if (BuiltinID == ARM::BI__builtin_arm_strexd ||
6020  ((BuiltinID == ARM::BI__builtin_arm_stlex ||
6021  BuiltinID == ARM::BI__builtin_arm_strex) &&
6022  getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
6023  Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
6024  ? Intrinsic::arm_stlexd
6025  : Intrinsic::arm_strexd);
6026  llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
6027 
6028  Address Tmp = CreateMemTemp(E->getArg(0)->getType());
6029  Value *Val = EmitScalarExpr(E->getArg(0));
6030  Builder.CreateStore(Val, Tmp);
6031 
6032  Address LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy));
6033  Val = Builder.CreateLoad(LdPtr);
6034 
6035  Value *Arg0 = Builder.CreateExtractValue(Val, 0);
6036  Value *Arg1 = Builder.CreateExtractValue(Val, 1);
6037  Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy);
6038  return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
6039  }
6040 
6041  if (BuiltinID == ARM::BI__builtin_arm_strex ||
6042  BuiltinID == ARM::BI__builtin_arm_stlex) {
6043  Value *StoreVal = EmitScalarExpr(E->getArg(0));
6044  Value *StoreAddr = EmitScalarExpr(E->getArg(1));
6045 
6046  QualType Ty = E->getArg(0)->getType();
6047  llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
6048  getContext().getTypeSize(Ty));
6049  StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
6050 
6051  if (StoreVal->getType()->isPointerTy())
6052  StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
6053  else {
6054  llvm::Type *IntTy = llvm::IntegerType::get(
6055  getLLVMContext(),
6056  CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
6057  StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
6058  StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
6059  }
6060 
6061  Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
6062  ? Intrinsic::arm_stlex
6063  : Intrinsic::arm_strex,
6064  StoreAddr->getType());
6065  return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
6066  }
6067 
6068  switch (BuiltinID) {
6069  case ARM::BI__iso_volatile_load8:
6070  case ARM::BI__iso_volatile_load16:
6071  case ARM::BI__iso_volatile_load32:
6072  case ARM::BI__iso_volatile_load64:
6073  return EmitISOVolatileLoad(E);
6074  case ARM::BI__iso_volatile_store8:
6075  case ARM::BI__iso_volatile_store16:
6076  case ARM::BI__iso_volatile_store32:
6077  case ARM::BI__iso_volatile_store64:
6078  return EmitISOVolatileStore(E);
6079  }
6080 
6081  if (BuiltinID == ARM::BI__builtin_arm_clrex) {
6082  Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
6083  return Builder.CreateCall(F);
6084  }
6085 
6086  // CRC32
6087  Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
6088  switch (BuiltinID) {
6089  case ARM::BI__builtin_arm_crc32b:
6090  CRCIntrinsicID = Intrinsic::arm_crc32b; break;
6091  case ARM::BI__builtin_arm_crc32cb:
6092  CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
6093  case ARM::BI__builtin_arm_crc32h:
6094  CRCIntrinsicID = Intrinsic::arm_crc32h; break;
6095  case ARM::BI__builtin_arm_crc32ch:
6096  CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
6097  case ARM::BI__builtin_arm_crc32w:
6098  case ARM::BI__builtin_arm_crc32d:
6099  CRCIntrinsicID = Intrinsic::arm_crc32w; break;
6100  case ARM::BI__builtin_arm_crc32cw:
6101  case ARM::BI__builtin_arm_crc32cd:
6102  CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
6103  }
6104 
6105  if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
6106  Value *Arg0 = EmitScalarExpr(E->getArg(0));
6107  Value *Arg1 = EmitScalarExpr(E->getArg(1));
6108 
6109  // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w
6110  // intrinsics, hence we need different codegen for these cases.
6111  if (BuiltinID == ARM::BI__builtin_arm_crc32d ||
6112  BuiltinID == ARM::BI__builtin_arm_crc32cd) {
6113  Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
6114  Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
6115  Value *Arg1b = Builder.CreateLShr(Arg1, C1);
6116  Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
6117 
6118  Function *F = CGM.getIntrinsic(CRCIntrinsicID);
6119  Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
6120  return Builder.CreateCall(F, {Res, Arg1b});
6121  } else {
6122  Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
6123 
6124  Function *F = CGM.getIntrinsic(CRCIntrinsicID);
6125  return Builder.CreateCall(F, {Arg0, Arg1});
6126  }
6127  }
6128 
6129  if (BuiltinID == ARM::BI__builtin_arm_rsr ||
6130  BuiltinID == ARM::BI__builtin_arm_rsr64 ||
6131  BuiltinID == ARM::BI__builtin_arm_rsrp ||
6132  BuiltinID == ARM::BI__builtin_arm_wsr ||
6133  BuiltinID == ARM::BI__builtin_arm_wsr64 ||
6134  BuiltinID == ARM::BI__builtin_arm_wsrp) {
6135 
6136  bool IsRead = BuiltinID == ARM::BI__builtin_arm_rsr ||
6137  BuiltinID == ARM::BI__builtin_arm_rsr64 ||
6138  BuiltinID == ARM::BI__builtin_arm_rsrp;
6139 
6140  bool IsPointerBuiltin = BuiltinID == ARM::BI__builtin_arm_rsrp ||
6141  BuiltinID == ARM::BI__builtin_arm_wsrp;
6142 
6143  bool Is64Bit = BuiltinID == ARM::BI__builtin_arm_rsr64 ||
6144  BuiltinID == ARM::BI__builtin_arm_wsr64;
6145 
6146  llvm::Type *ValueType;
6147  llvm::Type *RegisterType;
6148  if (IsPointerBuiltin) {
6149  ValueType = VoidPtrTy;
6150  RegisterType = Int32Ty;
6151  } else if (Is64Bit) {
6152  ValueType = RegisterType = Int64Ty;
6153  } else {
6154  ValueType = RegisterType = Int32Ty;
6155  }
6156 
6157  return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
6158  }
6159 
6160  // Find out if any arguments are required to be integer constant
6161  // expressions.
6162  unsigned ICEArguments = 0;
6164  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
6165  assert(Error == ASTContext::GE_None && "Should not codegen an error");
6166 
6167  auto getAlignmentValue32 = [&](Address addr) -> Value* {
6168  return Builder.getInt32(addr.getAlignment().getQuantity());
6169  };
6170 
6171  Address PtrOp0 = Address::invalid();
6172  Address PtrOp1 = Address::invalid();
6174  bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
6175  unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
6176  for (unsigned i = 0, e = NumArgs; i != e; i++) {
6177  if (i == 0) {
6178  switch (BuiltinID) {
6179  case NEON::BI__builtin_neon_vld1_v:
6180  case NEON::BI__builtin_neon_vld1q_v:
6181  case NEON::BI__builtin_neon_vld1q_lane_v:
6182  case NEON::BI__builtin_neon_vld1_lane_v:
6183  case NEON::BI__builtin_neon_vld1_dup_v:
6184  case NEON::BI__builtin_neon_vld1q_dup_v:
6185  case NEON::BI__builtin_neon_vst1_v:
6186  case NEON::BI__builtin_neon_vst1q_v:
6187  case NEON::BI__builtin_neon_vst1q_lane_v:
6188  case NEON::BI__builtin_neon_vst1_lane_v:
6189  case NEON::BI__builtin_neon_vst2_v:
6190  case NEON::BI__builtin_neon_vst2q_v:
6191  case NEON::BI__builtin_neon_vst2_lane_v:
6192  case NEON::BI__builtin_neon_vst2q_lane_v:
6193  case NEON::BI__builtin_neon_vst3_v:
6194  case NEON::BI__builtin_neon_vst3q_v:
6195  case NEON::BI__builtin_neon_vst3_lane_v:
6196  case NEON::BI__builtin_neon_vst3q_lane_v:
6197  case NEON::BI__builtin_neon_vst4_v:
6198  case NEON::BI__builtin_neon_vst4q_v:
6199  case NEON::BI__builtin_neon_vst4_lane_v:
6200  case NEON::BI__builtin_neon_vst4q_lane_v:
6201  // Get the alignment for the argument in addition to the value;
6202  // we'll use it later.
6203  PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
6204  Ops.push_back(PtrOp0.getPointer());
6205  continue;
6206  }
6207  }
6208  if (i == 1) {
6209  switch (BuiltinID) {
6210  case NEON::BI__builtin_neon_vld2_v:
6211  case NEON::BI__builtin_neon_vld2q_v:
6212  case NEON::BI__builtin_neon_vld3_v:
6213  case NEON::BI__builtin_neon_vld3q_v:
6214  case NEON::BI__builtin_neon_vld4_v:
6215  case NEON::BI__builtin_neon_vld4q_v:
6216  case NEON::BI__builtin_neon_vld2_lane_v:
6217  case NEON::BI__builtin_neon_vld2q_lane_v:
6218  case NEON::BI__builtin_neon_vld3_lane_v:
6219  case NEON::BI__builtin_neon_vld3q_lane_v:
6220  case NEON::BI__builtin_neon_vld4_lane_v:
6221  case NEON::BI__builtin_neon_vld4q_lane_v:
6222  case NEON::BI__builtin_neon_vld2_dup_v:
6223  case NEON::BI__builtin_neon_vld2q_dup_v:
6224  case NEON::BI__builtin_neon_vld3_dup_v:
6225  case NEON::BI__builtin_neon_vld3q_dup_v:
6226  case NEON::BI__builtin_neon_vld4_dup_v:
6227  case NEON::BI__builtin_neon_vld4q_dup_v:
6228  // Get the alignment for the argument in addition to the value;
6229  // we'll use it later.
6230  PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
6231  Ops.push_back(PtrOp1.getPointer());
6232  continue;
6233  }
6234  }
6235 
6236  if ((ICEArguments & (1 << i)) == 0) {
6237  Ops.push_back(EmitScalarExpr(E->getArg(i)));
6238  } else {
6239  // If this is required to be a constant, constant fold it so that we know
6240  // that the generated intrinsic gets a ConstantInt.
6241  llvm::APSInt Result;
6242  bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
6243  assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
6244  Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
6245  }
6246  }
6247 
6248  switch (BuiltinID) {
6249  default: break;
6250 
6251  case NEON::BI__builtin_neon_vget_lane_i8:
6252  case NEON::BI__builtin_neon_vget_lane_i16:
6253  case NEON::BI__builtin_neon_vget_lane_i32:
6254  case NEON::BI__builtin_neon_vget_lane_i64:
6255  case NEON::BI__builtin_neon_vget_lane_f32:
6256  case NEON::BI__builtin_neon_vgetq_lane_i8:
6257  case NEON::BI__builtin_neon_vgetq_lane_i16:
6258  case NEON::BI__builtin_neon_vgetq_lane_i32:
6259  case NEON::BI__builtin_neon_vgetq_lane_i64:
6260  case NEON::BI__builtin_neon_vgetq_lane_f32:
6261  return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
6262 
6263  case NEON::BI__builtin_neon_vrndns_f32: {
6264  Value *Arg = EmitScalarExpr(E->getArg(0));
6265  llvm::Type *Tys[] = {Arg->getType()};
6266  Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vrintn, Tys);
6267  return Builder.CreateCall(F, {Arg}, "vrndn"); }
6268 
6269  case NEON::BI__builtin_neon_vset_lane_i8:
6270  case NEON::BI__builtin_neon_vset_lane_i16:
6271  case NEON::BI__builtin_neon_vset_lane_i32:
6272  case NEON::BI__builtin_neon_vset_lane_i64:
6273  case NEON::BI__builtin_neon_vset_lane_f32:
6274  case NEON::BI__builtin_neon_vsetq_lane_i8:
6275  case NEON::BI__builtin_neon_vsetq_lane_i16:
6276  case NEON::BI__builtin_neon_vsetq_lane_i32:
6277  case NEON::BI__builtin_neon_vsetq_lane_i64:
6278  case NEON::BI__builtin_neon_vsetq_lane_f32:
6279  return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
6280 
6281  case NEON::BI__builtin_neon_vsha1h_u32:
6282  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
6283  "vsha1h");
6284  case NEON::BI__builtin_neon_vsha1cq_u32:
6285  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
6286  "vsha1h");
6287  case NEON::BI__builtin_neon_vsha1pq_u32:
6288  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
6289  "vsha1h");
6290  case NEON::BI__builtin_neon_vsha1mq_u32:
6291  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
6292  "vsha1h");
6293 
6294  // The ARM _MoveToCoprocessor builtins put the input register value as
6295  // the first argument, but the LLVM intrinsic expects it as the third one.
6296  case ARM::BI_MoveToCoprocessor:
6297  case ARM::BI_MoveToCoprocessor2: {
6298  Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ?
6299  Intrinsic::arm_mcr : Intrinsic::arm_mcr2);
6300  return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
6301  Ops[3], Ops[4], Ops[5]});
6302  }
6303  case ARM::BI_BitScanForward:
6304  case ARM::BI_BitScanForward64:
6305  return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
6306  case ARM::BI_BitScanReverse:
6307  case ARM::BI_BitScanReverse64:
6308  return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
6309 
6310  case ARM::BI_InterlockedAnd64:
6311  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
6312  case ARM::BI_InterlockedExchange64:
6313  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
6314  case ARM::BI_InterlockedExchangeAdd64:
6315  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
6316  case ARM::BI_InterlockedExchangeSub64:
6317  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
6318  case ARM::BI_InterlockedOr64:
6319  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
6320  case ARM::BI_InterlockedXor64:
6321  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
6322  case ARM::BI_InterlockedDecrement64:
6323  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
6324  case ARM::BI_InterlockedIncrement64:
6325  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
6326  case ARM::BI_InterlockedExchangeAdd8_acq:
6327  case ARM::BI_InterlockedExchangeAdd16_acq:
6328  case ARM::BI_InterlockedExchangeAdd_acq:
6329  case ARM::BI_InterlockedExchangeAdd64_acq:
6330  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_acq, E);
6331  case ARM::BI_InterlockedExchangeAdd8_rel:
6332  case ARM::BI_InterlockedExchangeAdd16_rel:
6333  case ARM::BI_InterlockedExchangeAdd_rel:
6334  case ARM::BI_InterlockedExchangeAdd64_rel:
6335  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_rel, E);
6336  case ARM::BI_InterlockedExchangeAdd8_nf:
6337  case ARM::BI_InterlockedExchangeAdd16_nf:
6338  case ARM::BI_InterlockedExchangeAdd_nf:
6339  case ARM::BI_InterlockedExchangeAdd64_nf:
6340  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_nf, E);
6341  case ARM::BI_InterlockedExchange8_acq:
6342  case ARM::BI_InterlockedExchange16_acq:
6343  case ARM::BI_InterlockedExchange_acq:
6344  case ARM::BI_InterlockedExchange64_acq:
6345  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_acq, E);
6346  case ARM::BI_InterlockedExchange8_rel:
6347  case ARM::BI_InterlockedExchange16_rel:
6348  case ARM::BI_InterlockedExchange_rel:
6349  case ARM::BI_InterlockedExchange64_rel:
6350  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_rel, E);
6351  case ARM::BI_InterlockedExchange8_nf:
6352  case ARM::BI_InterlockedExchange16_nf:
6353  case ARM::BI_InterlockedExchange_nf:
6354  case ARM::BI_InterlockedExchange64_nf:
6355  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_nf, E);
6356  case ARM::BI_InterlockedCompareExchange8_acq:
6357  case ARM::BI_InterlockedCompareExchange16_acq:
6358  case ARM::BI_InterlockedCompareExchange_acq:
6359  case ARM::BI_InterlockedCompareExchange64_acq:
6360  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_acq, E);
6361  case ARM::BI_InterlockedCompareExchange8_rel:
6362  case ARM::BI_InterlockedCompareExchange16_rel:
6363  case ARM::BI_InterlockedCompareExchange_rel:
6364  case ARM::BI_InterlockedCompareExchange64_rel:
6365  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_rel, E);
6366  case ARM::BI_InterlockedCompareExchange8_nf:
6367  case ARM::BI_InterlockedCompareExchange16_nf:
6368  case ARM::BI_InterlockedCompareExchange_nf:
6369  case ARM::BI_InterlockedCompareExchange64_nf:
6370  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_nf, E);
6371  case ARM::BI_InterlockedOr8_acq:
6372  case ARM::BI_InterlockedOr16_acq:
6373  case ARM::BI_InterlockedOr_acq:
6374  case ARM::BI_InterlockedOr64_acq:
6375  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_acq, E);
6376  case ARM::BI_InterlockedOr8_rel:
6377  case ARM::BI_InterlockedOr16_rel:
6378  case ARM::BI_InterlockedOr_rel:
6379  case ARM::BI_InterlockedOr64_rel:
6380  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_rel, E);
6381  case ARM::BI_InterlockedOr8_nf:
6382  case ARM::BI_InterlockedOr16_nf:
6383  case ARM::BI_InterlockedOr_nf:
6384  case ARM::BI_InterlockedOr64_nf:
6385  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_nf, E);
6386  case ARM::BI_InterlockedXor8_acq:
6387  case ARM::BI_InterlockedXor16_acq:
6388  case ARM::BI_InterlockedXor_acq:
6389  case ARM::BI_InterlockedXor64_acq:
6390  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_acq, E);
6391  case ARM::BI_InterlockedXor8_rel:
6392  case ARM::BI_InterlockedXor16_rel:
6393  case ARM::BI_InterlockedXor_rel:
6394  case ARM::BI_InterlockedXor64_rel:
6395  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_rel, E);
6396  case ARM::BI_InterlockedXor8_nf:
6397  case ARM::BI_InterlockedXor16_nf:
6398  case ARM::BI_InterlockedXor_nf:
6399  case ARM::BI_InterlockedXor64_nf:
6400  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_nf, E);
6401  case ARM::BI_InterlockedAnd8_acq:
6402  case ARM::BI_InterlockedAnd16_acq:
6403  case ARM::BI_InterlockedAnd_acq:
6404  case ARM::BI_InterlockedAnd64_acq:
6405  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_acq, E);
6406  case ARM::BI_InterlockedAnd8_rel:
6407  case ARM::BI_InterlockedAnd16_rel:
6408  case ARM::BI_InterlockedAnd_rel:
6409  case ARM::BI_InterlockedAnd64_rel:
6410  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_rel, E);
6411  case ARM::BI_InterlockedAnd8_nf:
6412  case ARM::BI_InterlockedAnd16_nf:
6413  case ARM::BI_InterlockedAnd_nf:
6414  case ARM::BI_InterlockedAnd64_nf:
6415  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_nf, E);
6416  case ARM::BI_InterlockedIncrement16_acq:
6417  case ARM::BI_InterlockedIncrement_acq:
6418  case ARM::BI_InterlockedIncrement64_acq:
6419  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_acq, E);
6420  case ARM::BI_InterlockedIncrement16_rel:
6421  case ARM::BI_InterlockedIncrement_rel:
6422  case ARM::BI_InterlockedIncrement64_rel:
6423  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_rel, E);
6424  case ARM::BI_InterlockedIncrement16_nf:
6425  case ARM::BI_InterlockedIncrement_nf:
6426  case ARM::BI_InterlockedIncrement64_nf:
6427  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_nf, E);
6428  case ARM::BI_InterlockedDecrement16_acq:
6429  case ARM::BI_InterlockedDecrement_acq:
6430  case ARM::BI_InterlockedDecrement64_acq:
6431  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_acq, E);
6432  case ARM::BI_InterlockedDecrement16_rel:
6433  case ARM::BI_InterlockedDecrement_rel:
6434  case ARM::BI_InterlockedDecrement64_rel:
6435  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_rel, E);
6436  case ARM::BI_InterlockedDecrement16_nf:
6437  case ARM::BI_InterlockedDecrement_nf:
6438  case ARM::BI_InterlockedDecrement64_nf:
6439  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_nf, E);
6440  }
6441 
6442  // Get the last argument, which specifies the vector type.
6443  assert(HasExtraArg);
6444  llvm::APSInt Result;
6445  const Expr *Arg = E->getArg(E->getNumArgs()-1);
6446  if (!Arg->isIntegerConstantExpr(Result, getContext()))
6447  return nullptr;
6448 
6449  if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f ||
6450  BuiltinID == ARM::BI__builtin_arm_vcvtr_d) {
6451  // Determine the overloaded type of this builtin.
6452  llvm::Type *Ty;
6453  if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f)
6454  Ty = FloatTy;
6455  else
6456  Ty = DoubleTy;
6457 
6458  // Determine whether this is an unsigned conversion or not.
6459  bool usgn = Result.getZExtValue() == 1;
6460  unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
6461 
6462  // Call the appropriate intrinsic.
6463  Function *F = CGM.getIntrinsic(Int, Ty);
6464  return Builder.CreateCall(F, Ops, "vcvtr");
6465  }
6466 
6467  // Determine the type of this overloaded NEON intrinsic.
6468  NeonTypeFlags Type(Result.getZExtValue());
6469  bool usgn = Type.isUnsigned();
6470  bool rightShift = false;
6471 
6472  llvm::VectorType *VTy = GetNeonType(this, Type,
6473  getTarget().hasLegalHalfType());
6474  llvm::Type *Ty = VTy;
6475  if (!Ty)
6476  return nullptr;
6477 
6478  // Many NEON builtins have identical semantics and uses in ARM and
6479  // AArch64. Emit these in a single function.
6480  auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap);
6481  const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
6482  IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
6483  if (Builtin)
6484  return EmitCommonNeonBuiltinExpr(
6485  Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
6486  Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1, Arch);
6487 
6488  unsigned Int;
6489  switch (BuiltinID) {
6490  default: return nullptr;
6491  case NEON::BI__builtin_neon_vld1q_lane_v:
6492  // Handle 64-bit integer elements as a special case. Use shuffles of
6493  // one-element vectors to avoid poor code for i64 in the backend.
6494  if (VTy->getElementType()->isIntegerTy(64)) {
6495  // Extract the other lane.
6496  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6497  uint32_t Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
6498  Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
6499  Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
6500  // Load the value as a one-element vector.
6501  Ty = llvm::VectorType::get(VTy->getElementType(), 1);
6502  llvm::Type *Tys[] = {Ty, Int8PtrTy};
6503  Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
6504  Value *Align = getAlignmentValue32(PtrOp0);
6505  Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
6506  // Combine them.
6507  uint32_t Indices[] = {1 - Lane, Lane};
6508  SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
6509  return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane");
6510  }
6511  LLVM_FALLTHROUGH;
6512  case NEON::BI__builtin_neon_vld1_lane_v: {
6513  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6514  PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType());
6515  Value *Ld = Builder.CreateLoad(PtrOp0);
6516  return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
6517  }
6518  case NEON::BI__builtin_neon_vqrshrn_n_v:
6519  Int =
6520  usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
6521  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
6522  1, true);
6523  case NEON::BI__builtin_neon_vqrshrun_n_v:
6524  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
6525  Ops, "vqrshrun_n", 1, true);
6526  case NEON::BI__builtin_neon_vqshrn_n_v:
6527  Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
6528  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
6529  1, true);
6530  case NEON::BI__builtin_neon_vqshrun_n_v:
6531  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
6532  Ops, "vqshrun_n", 1, true);
6533  case NEON::BI__builtin_neon_vrecpe_v:
6534  case NEON::BI__builtin_neon_vrecpeq_v:
6535  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
6536  Ops, "vrecpe");
6537  case NEON::BI__builtin_neon_vrshrn_n_v:
6538  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
6539  Ops, "vrshrn_n", 1, true);
6540  case NEON::BI__builtin_neon_vrsra_n_v:
6541  case NEON::BI__builtin_neon_vrsraq_n_v:
6542  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6543  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6544  Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
6545  Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
6546  Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
6547  return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
6548  case NEON::BI__builtin_neon_vsri_n_v:
6549  case NEON::BI__builtin_neon_vsriq_n_v:
6550  rightShift = true;
6551  LLVM_FALLTHROUGH;
6552  case NEON::BI__builtin_neon_vsli_n_v:
6553  case NEON::BI__builtin_neon_vsliq_n_v:
6554  Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
6555  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
6556  Ops, "vsli_n");
6557  case NEON::BI__builtin_neon_vsra_n_v:
6558  case NEON::BI__builtin_neon_vsraq_n_v:
6559  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6560  Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
6561  return Builder.CreateAdd(Ops[0], Ops[1]);
6562  case NEON::BI__builtin_neon_vst1q_lane_v:
6563  // Handle 64-bit integer elements as a special case. Use a shuffle to get
6564  // a one-element vector and avoid poor code for i64 in the backend.
6565  if (VTy->getElementType()->isIntegerTy(64)) {
6566  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6567  Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
6568  Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
6569  Ops[2] = getAlignmentValue32(PtrOp0);
6570  llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
6571  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
6572  Tys), Ops);
6573  }
6574  LLVM_FALLTHROUGH;
6575  case NEON::BI__builtin_neon_vst1_lane_v: {
6576  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6577  Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
6578  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6579  auto St = Builder.CreateStore(Ops[1], Builder.CreateBitCast(PtrOp0, Ty));
6580  return St;
6581  }
6582  case NEON::BI__builtin_neon_vtbl1_v:
6583  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
6584  Ops, "vtbl1");
6585  case NEON::BI__builtin_neon_vtbl2_v:
6586  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
6587  Ops, "vtbl2");
6588  case NEON::BI__builtin_neon_vtbl3_v:
6589  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
6590  Ops, "vtbl3");
6591  case NEON::BI__builtin_neon_vtbl4_v:
6592  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
6593  Ops, "vtbl4");
6594  case NEON::BI__builtin_neon_vtbx1_v:
6595  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
6596  Ops, "vtbx1");
6597  case NEON::BI__builtin_neon_vtbx2_v:
6598  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
6599  Ops, "vtbx2");
6600  case NEON::BI__builtin_neon_vtbx3_v:
6601  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
6602  Ops, "vtbx3");
6603  case NEON::BI__builtin_neon_vtbx4_v:
6604  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
6605  Ops, "vtbx4");
6606  }
6607 }
6608 
6609 static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
6610  const CallExpr *E,
6612  llvm::Triple::ArchType Arch) {
6613  unsigned int Int = 0;
6614  const char *s = nullptr;
6615 
6616  switch (BuiltinID) {
6617  default:
6618  return nullptr;
6619  case NEON::BI__builtin_neon_vtbl1_v:
6620  case NEON::BI__builtin_neon_vqtbl1_v:
6621  case NEON::BI__builtin_neon_vqtbl1q_v:
6622  case NEON::BI__builtin_neon_vtbl2_v:
6623  case NEON::BI__builtin_neon_vqtbl2_v:
6624  case NEON::BI__builtin_neon_vqtbl2q_v:
6625  case NEON::BI__builtin_neon_vtbl3_v:
6626  case NEON::BI__builtin_neon_vqtbl3_v:
6627  case NEON::BI__builtin_neon_vqtbl3q_v:
6628  case NEON::BI__builtin_neon_vtbl4_v:
6629  case NEON::BI__builtin_neon_vqtbl4_v:
6630  case NEON::BI__builtin_neon_vqtbl4q_v:
6631  break;
6632  case NEON::BI__builtin_neon_vtbx1_v:
6633  case NEON::BI__builtin_neon_vqtbx1_v:
6634  case NEON::BI__builtin_neon_vqtbx1q_v:
6635  case NEON::BI__builtin_neon_vtbx2_v:
6636  case NEON::BI__builtin_neon_vqtbx2_v:
6637  case NEON::BI__builtin_neon_vqtbx2q_v:
6638  case NEON::BI__builtin_neon_vtbx3_v:
6639  case NEON::BI__builtin_neon_vqtbx3_v:
6640  case NEON::BI__builtin_neon_vqtbx3q_v:
6641  case NEON::BI__builtin_neon_vtbx4_v:
6642  case NEON::BI__builtin_neon_vqtbx4_v:
6643  case NEON::BI__builtin_neon_vqtbx4q_v:
6644  break;
6645  }
6646 
6647  assert(E->getNumArgs() >= 3);
6648 
6649  // Get the last argument, which specifies the vector type.
6650  llvm::APSInt Result;
6651  const Expr *Arg = E->getArg(E->getNumArgs() - 1);
6652  if (!Arg->isIntegerConstantExpr(Result, CGF.getContext()))
6653  return nullptr;
6654 
6655  // Determine the type of this overloaded NEON intrinsic.
6656  NeonTypeFlags Type(Result.getZExtValue());
6657  llvm::VectorType *Ty = GetNeonType(&CGF, Type);
6658  if (!Ty)
6659  return nullptr;
6660 
6661  CodeGen::CGBuilderTy &Builder = CGF.Builder;
6662 
6663  // AArch64 scalar builtins are not overloaded, they do not have an extra
6664  // argument that specifies the vector type, need to handle each case.
6665  switch (BuiltinID) {
6666  case NEON::BI__builtin_neon_vtbl1_v: {
6667  return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 1), nullptr,
6668  Ops[1], Ty, Intrinsic::aarch64_neon_tbl1,
6669  "vtbl1");
6670  }
6671  case NEON::BI__builtin_neon_vtbl2_v: {
6672  return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 2), nullptr,
6673  Ops[2], Ty, Intrinsic::aarch64_neon_tbl1,
6674  "vtbl1");
6675  }
6676  case NEON::BI__builtin_neon_vtbl3_v: {
6677  return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 3), nullptr,
6678  Ops[3], Ty, Intrinsic::aarch64_neon_tbl2,
6679  "vtbl2");
6680  }
6681  case NEON::BI__builtin_neon_vtbl4_v: {
6682  return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 4), nullptr,
6683  Ops[4], Ty, Intrinsic::aarch64_neon_tbl2,
6684  "vtbl2");
6685  }
6686  case NEON::BI__builtin_neon_vtbx1_v: {
6687  Value *TblRes =
6688  packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 1), nullptr, Ops[2],
6689  Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
6690 
6691  llvm::Constant *EightV = ConstantInt::get(Ty, 8);
6692  Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
6693  CmpRes = Builder.CreateSExt(CmpRes, Ty);
6694 
6695  Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
6696  Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
6697  return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
6698  }
6699  case NEON::BI__builtin_neon_vtbx2_v: {
6700  return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 2), Ops[0],
6701  Ops[3], Ty, Intrinsic::aarch64_neon_tbx1,
6702  "vtbx1");
6703  }
6704  case NEON::BI__builtin_neon_vtbx3_v: {
6705  Value *TblRes =
6706  packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 3), nullptr, Ops[4],
6707  Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
6708 
6709  llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
6710  Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
6711  TwentyFourV);
6712  CmpRes = Builder.CreateSExt(CmpRes, Ty);
6713 
6714  Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
6715  Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
6716  return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
6717  }
6718  case NEON::BI__builtin_neon_vtbx4_v: {
6719  return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 4), Ops[0],
6720  Ops[5], Ty, Intrinsic::aarch64_neon_tbx2,
6721  "vtbx2");
6722  }
6723  case NEON::BI__builtin_neon_vqtbl1_v:
6724  case NEON::BI__builtin_neon_vqtbl1q_v:
6725  Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
6726  case NEON::BI__builtin_neon_vqtbl2_v:
6727  case NEON::BI__builtin_neon_vqtbl2q_v: {
6728  Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
6729  case NEON::BI__builtin_neon_vqtbl3_v:
6730  case NEON::BI__builtin_neon_vqtbl3q_v:
6731  Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
6732  case NEON::BI__builtin_neon_vqtbl4_v:
6733  case NEON::BI__builtin_neon_vqtbl4q_v:
6734  Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
6735  case NEON::BI__builtin_neon_vqtbx1_v:
6736  case NEON::BI__builtin_neon_vqtbx1q_v:
6737  Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
6738  case NEON::BI__builtin_neon_vqtbx2_v:
6739  case NEON::BI__builtin_neon_vqtbx2q_v:
6740  Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
6741  case NEON::BI__builtin_neon_vqtbx3_v:
6742  case NEON::BI__builtin_neon_vqtbx3q_v:
6743  Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
6744  case NEON::BI__builtin_neon_vqtbx4_v:
6745  case NEON::BI__builtin_neon_vqtbx4q_v:
6746  Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
6747  }
6748  }
6749 
6750  if (!Int)
6751  return nullptr;
6752 
6753  Function *F = CGF.CGM.getIntrinsic(Int, Ty);
6754  return CGF.EmitNeonCall(F, Ops, s);
6755 }
6756 
6758  llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4);
6759  Op = Builder.CreateBitCast(Op, Int16Ty);
6760  Value *V = UndefValue::get(VTy);
6761  llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
6762  Op = Builder.CreateInsertElement(V, Op, CI);
6763  return Op;
6764 }
6765 
6767  const CallExpr *E,
6768  llvm::Triple::ArchType Arch) {
6769  unsigned HintID = static_cast<unsigned>(-1);
6770  switch (BuiltinID) {
6771  default: break;
6772  case AArch64::BI__builtin_arm_nop:
6773  HintID = 0;
6774  break;
6775  case AArch64::BI__builtin_arm_yield:
6776  case AArch64::BI__yield:
6777  HintID = 1;
6778  break;
6779  case AArch64::BI__builtin_arm_wfe:
6780  case AArch64::BI__wfe:
6781  HintID = 2;
6782  break;
6783  case AArch64::BI__builtin_arm_wfi:
6784  case AArch64::BI__wfi:
6785  HintID = 3;
6786  break;
6787  case AArch64::BI__builtin_arm_sev:
6788  case AArch64::BI__sev:
6789  HintID = 4;
6790  break;
6791  case AArch64::BI__builtin_arm_sevl:
6792  case AArch64::BI__sevl:
6793  HintID = 5;
6794  break;
6795  }
6796 
6797  if (HintID != static_cast<unsigned>(-1)) {
6798  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
6799  return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
6800  }
6801 
6802  if (BuiltinID == AArch64::BI__builtin_arm_prefetch) {
6803  Value *Address = EmitScalarExpr(E->getArg(0));
6804  Value *RW = EmitScalarExpr(E->getArg(1));
6805  Value *CacheLevel = EmitScalarExpr(E->getArg(2));
6806  Value *RetentionPolicy = EmitScalarExpr(E->getArg(3));
6807  Value *IsData = EmitScalarExpr(E->getArg(4));
6808 
6809  Value *Locality = nullptr;
6810  if (cast<llvm::ConstantInt>(RetentionPolicy)->isZero()) {
6811  // Temporal fetch, needs to convert cache level to locality.
6812  Locality = llvm::ConstantInt::get(Int32Ty,
6813  -cast<llvm::ConstantInt>(CacheLevel)->getValue() + 3);
6814  } else {
6815  // Streaming fetch.
6816  Locality = llvm::ConstantInt::get(Int32Ty, 0);
6817  }
6818 
6819  // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify
6820  // PLDL3STRM or PLDL2STRM.
6821  Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
6822  return Builder.CreateCall(F, {Address, RW, Locality, IsData});
6823  }
6824 
6825  if (BuiltinID == AArch64::BI__builtin_arm_rbit) {
6826  assert((getContext().getTypeSize(E->getType()) == 32) &&
6827  "rbit of unusual size!");
6828  llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
6829  return Builder.CreateCall(
6830  CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
6831  }
6832  if (BuiltinID == AArch64::BI__builtin_arm_rbit64) {
6833  assert((getContext().getTypeSize(E->getType()) == 64) &&
6834  "rbit of unusual size!");
6835  llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
6836  return Builder.CreateCall(
6837  CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
6838  }
6839 
6840  if (BuiltinID == AArch64::BI__clear_cache) {
6841  assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
6842  const FunctionDecl *FD = E->getDirectCallee();
6843  Value *Ops[2];
6844  for (unsigned i = 0; i < 2; i++)
6845  Ops[i] = EmitScalarExpr(E->getArg(i));
6846  llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
6847  llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
6848  StringRef Name = FD->getName();
6849  return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
6850  }
6851 
6852  if ((BuiltinID == AArch64::BI__builtin_arm_ldrex ||
6853  BuiltinID == AArch64::BI__builtin_arm_ldaex) &&
6854  getContext().getTypeSize(E->getType()) == 128) {
6855  Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
6856  ? Intrinsic::aarch64_ldaxp
6857  : Intrinsic::aarch64_ldxp);
6858 
6859  Value *LdPtr = EmitScalarExpr(E->getArg(0));
6860  Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
6861  "ldxp");
6862 
6863  Value *Val0 = Builder.CreateExtractValue(Val, 1);
6864  Value *Val1 = Builder.CreateExtractValue(Val, 0);
6865  llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
6866  Val0 = Builder.CreateZExt(Val0, Int128Ty);
6867  Val1 = Builder.CreateZExt(Val1, Int128Ty);
6868 
6869  Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
6870  Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
6871  Val = Builder.CreateOr(Val, Val1);
6872  return Builder.CreateBitCast(Val, ConvertType(E->getType()));
6873  } else if (BuiltinID == AArch64::BI__builtin_arm_ldrex ||
6874  BuiltinID == AArch64::BI__builtin_arm_ldaex) {
6875  Value *LoadAddr = EmitScalarExpr(E->getArg(0));
6876 
6877  QualType Ty = E->getType();
6878  llvm::Type *RealResTy = ConvertType(Ty);
6879  llvm::Type *PtrTy = llvm::IntegerType::get(
6880  getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo();
6881  LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy);
6882 
6883  Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
6884  ? Intrinsic::aarch64_ldaxr
6885  : Intrinsic::aarch64_ldxr,
6886  PtrTy);
6887  Value *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
6888 
6889  if (RealResTy->isPointerTy())
6890  return Builder.CreateIntToPtr(Val, RealResTy);
6891 
6892  llvm::Type *IntResTy = llvm::IntegerType::get(
6893  getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
6894  Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
6895  return Builder.CreateBitCast(Val, RealResTy);
6896  }
6897 
6898  if ((BuiltinID == AArch64::BI__builtin_arm_strex ||
6899  BuiltinID == AArch64::BI__builtin_arm_stlex) &&
6900  getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
6901  Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
6902  ? Intrinsic::aarch64_stlxp
6903  : Intrinsic::aarch64_stxp);
6904  llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty);
6905 
6906  Address Tmp = CreateMemTemp(E->getArg(0)->getType());
6907  EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
6908 
6909  Tmp = Builder.CreateBitCast(Tmp, llvm::PointerType::getUnqual(STy));
6910  llvm::Value *Val = Builder.CreateLoad(Tmp);
6911 
6912  Value *Arg0 = Builder.CreateExtractValue(Val, 0);
6913  Value *Arg1 = Builder.CreateExtractValue(Val, 1);
6914  Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)),
6915  Int8PtrTy);
6916  return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
6917  }
6918 
6919  if (BuiltinID == AArch64::BI__builtin_arm_strex ||
6920  BuiltinID == AArch64::BI__builtin_arm_stlex) {
6921  Value *StoreVal = EmitScalarExpr(E->getArg(0));
6922  Value *StoreAddr = EmitScalarExpr(E->getArg(1));
6923 
6924  QualType Ty = E->getArg(0)->getType();
6925  llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
6926  getContext().getTypeSize(Ty));
6927  StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
6928 
6929  if (StoreVal->getType()->isPointerTy())
6930  StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
6931  else {
6932  llvm::Type *IntTy = llvm::IntegerType::get(
6933  getLLVMContext(),
6934  CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
6935  StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
6936  StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
6937  }
6938 
6939  Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
6940  ? Intrinsic::aarch64_stlxr
6941  : Intrinsic::aarch64_stxr,
6942  StoreAddr->getType());
6943  return Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
6944  }
6945 
6946  if (BuiltinID == AArch64::BI__getReg) {
6947  Expr::EvalResult Result;
6948  if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
6949  llvm_unreachable("Sema will ensure that the parameter is constant");
6950 
6951  llvm::APSInt Value = Result.Val.getInt();
6952  LLVMContext &Context = CGM.getLLVMContext();
6953  std::string Reg = Value == 31 ? "sp" : "x" + Value.toString(10);
6954 
6955  llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)};
6956  llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
6957  llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
6958 
6959  llvm::Value *F =
6960  CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
6961  return Builder.CreateCall(F, Metadata);
6962  }
6963 
6964  if (BuiltinID == AArch64::BI__builtin_arm_clrex) {
6965  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
6966  return Builder.CreateCall(F);
6967  }
6968 
6969  if (BuiltinID == AArch64::BI_ReadWriteBarrier)
6970  return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
6971  llvm::SyncScope::SingleThread);
6972 
6973  // CRC32
6974  Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
6975  switch (BuiltinID) {
6976  case AArch64::BI__builtin_arm_crc32b:
6977  CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
6978  case AArch64::BI__builtin_arm_crc32cb:
6979  CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
6980  case AArch64::BI__builtin_arm_crc32h:
6981  CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
6982  case AArch64::BI__builtin_arm_crc32ch:
6983  CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
6984  case AArch64::BI__builtin_arm_crc32w:
6985  CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
6986  case AArch64::BI__builtin_arm_crc32cw:
6987  CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
6988  case AArch64::BI__builtin_arm_crc32d:
6989  CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
6990  case AArch64::BI__builtin_arm_crc32cd:
6991  CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
6992  }
6993 
6994  if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
6995  Value *Arg0 = EmitScalarExpr(E->getArg(0));
6996  Value *Arg1 = EmitScalarExpr(E->getArg(1));
6997  Function *F = CGM.getIntrinsic(CRCIntrinsicID);
6998 
6999  llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
7000  Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
7001 
7002  return Builder.CreateCall(F, {Arg0, Arg1});
7003  }
7004 
7005  if (BuiltinID == AArch64::BI__builtin_arm_rsr ||
7006  BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
7007  BuiltinID == AArch64::BI__builtin_arm_rsrp ||
7008  BuiltinID == AArch64::BI__builtin_arm_wsr ||
7009  BuiltinID == AArch64::BI__builtin_arm_wsr64 ||
7010  BuiltinID == AArch64::BI__builtin_arm_wsrp) {
7011 
7012  bool IsRead = BuiltinID == AArch64::BI__builtin_arm_rsr ||
7013  BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
7014  BuiltinID == AArch64::BI__builtin_arm_rsrp;
7015 
7016  bool IsPointerBuiltin = BuiltinID == AArch64::BI__builtin_arm_rsrp ||
7017  BuiltinID == AArch64::BI__builtin_arm_wsrp;
7018 
7019  bool Is64Bit = BuiltinID != AArch64::BI__builtin_arm_rsr &&
7020  BuiltinID != AArch64::BI__builtin_arm_wsr;
7021 
7022  llvm::Type *ValueType;
7023  llvm::Type *RegisterType = Int64Ty;
7024  if (IsPointerBuiltin) {
7025  ValueType = VoidPtrTy;
7026  } else if (Is64Bit) {
7027  ValueType = Int64Ty;
7028  } else {
7029  ValueType = Int32Ty;
7030  }
7031 
7032  return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
7033  }
7034 
7035  if (BuiltinID == AArch64::BI_ReadStatusReg ||
7036  BuiltinID == AArch64::BI_WriteStatusReg) {
7037  LLVMContext &Context = CGM.getLLVMContext();
7038 
7039  unsigned SysReg =
7040  E->getArg(0)->EvaluateKnownConstInt(getContext()).getZExtValue();
7041 
7042  std::string SysRegStr;
7043  llvm::raw_string_ostream(SysRegStr) <<
7044  ((1 << 1) | ((SysReg >> 14) & 1)) << ":" <<
7045  ((SysReg >> 11) & 7) << ":" <<
7046  ((SysReg >> 7) & 15) << ":" <<
7047  ((SysReg >> 3) & 15) << ":" <<
7048  ( SysReg & 7);
7049 
7050  llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) };
7051  llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
7052  llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
7053 
7054  llvm::Type *RegisterType = Int64Ty;
7055  llvm::Type *Types[] = { RegisterType };
7056 
7057  if (BuiltinID == AArch64::BI_ReadStatusReg) {
7058  llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
7059 
7060  return Builder.CreateCall(F, Metadata);
7061  }
7062 
7063  llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
7064  llvm::Value *ArgValue = EmitScalarExpr(E->getArg(1));
7065 
7066  return Builder.CreateCall(F, { Metadata, ArgValue });
7067  }
7068 
7069  if (BuiltinID == AArch64::BI_AddressOfReturnAddress) {
7070  llvm::Value *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress);
7071  return Builder.CreateCall(F);
7072  }
7073 
7074  // Find out if any arguments are required to be integer constant
7075  // expressions.
7076  unsigned ICEArguments = 0;
7078  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
7079  assert(Error == ASTContext::GE_None && "Should not codegen an error");
7080 
7082  for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
7083  if ((ICEArguments & (1 << i)) == 0) {
7084  Ops.push_back(EmitScalarExpr(E->getArg(i)));
7085  } else {
7086  // If this is required to be a constant, constant fold it so that we know
7087  // that the generated intrinsic gets a ConstantInt.
7088  llvm::APSInt Result;
7089  bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
7090  assert(IsConst && "Constant arg isn't actually constant?");
7091  (void)IsConst;
7092  Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
7093  }
7094  }
7095 
7096  auto SISDMap = makeArrayRef(AArch64SISDIntrinsicMap);
7097  const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
7098  SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
7099 
7100  if (Builtin) {
7101  Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
7102  Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
7103  assert(Result && "SISD intrinsic should have been handled");
7104  return Result;
7105  }
7106 
7107  llvm::APSInt Result;
7108  const Expr *Arg = E->getArg(E->getNumArgs()-1);
7109  NeonTypeFlags Type(0);
7110  if (Arg->isIntegerConstantExpr(Result, getContext()))
7111  // Determine the type of this overloaded NEON intrinsic.
7112  Type = NeonTypeFlags(Result.getZExtValue());
7113 
7114  bool usgn = Type.isUnsigned();
7115  bool quad = Type.isQuad();
7116 
7117  // Handle non-overloaded intrinsics first.
7118  switch (BuiltinID) {
7119  default: break;
7120  case NEON::BI__builtin_neon_vabsh_f16:
7121  Ops.push_back(EmitScalarExpr(E->getArg(0)));
7122  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs");
7123  case NEON::BI__builtin_neon_vldrq_p128: {
7124  llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
7125  llvm::Type *Int128PTy = llvm::PointerType::get(Int128Ty, 0);
7126  Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy);
7127  return Builder.CreateAlignedLoad(Int128Ty, Ptr,
7129  }
7130  case NEON::BI__builtin_neon_vstrq_p128: {
7131  llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
7132  Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy);
7133  return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
7134  }
7135  case NEON::BI__builtin_neon_vcvts_u32_f32:
7136  case NEON::BI__builtin_neon_vcvtd_u64_f64:
7137  usgn = true;
7138  LLVM_FALLTHROUGH;
7139  case NEON::BI__builtin_neon_vcvts_s32_f32:
7140  case NEON::BI__builtin_neon_vcvtd_s64_f64: {
7141  Ops.push_back(EmitScalarExpr(E->getArg(0)));
7142  bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
7143  llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
7144  llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
7145  Ops[0] = Builder.CreateBitCast(Ops[0], FTy);
7146  if (usgn)
7147  return Builder.CreateFPToUI(Ops[0], InTy);
7148  return Builder.CreateFPToSI(Ops[0], InTy);
7149  }
7150  case NEON::BI__builtin_neon_vcvts_f32_u32:
7151  case NEON::BI__builtin_neon_vcvtd_f64_u64:
7152  usgn = true;
7153  LLVM_FALLTHROUGH;
7154  case NEON::BI__builtin_neon_vcvts_f32_s32:
7155  case NEON::BI__builtin_neon_vcvtd_f64_s64: {
7156  Ops.push_back(EmitScalarExpr(E->getArg(0)));
7157  bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
7158  llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
7159  llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
7160  Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
7161  if (usgn)
7162  return Builder.CreateUIToFP(Ops[0], FTy);
7163  return Builder.CreateSIToFP(Ops[0], FTy);
7164  }
7165  case NEON::BI__builtin_neon_vcvth_f16_u16:
7166  case NEON::BI__builtin_neon_vcvth_f16_u32:
7167  case NEON::BI__builtin_neon_vcvth_f16_u64:
7168  usgn = true;
7169  LLVM_FALLTHROUGH;
7170  case NEON::BI__builtin_neon_vcvth_f16_s16:
7171  case NEON::BI__builtin_neon_vcvth_f16_s32:
7172  case NEON::BI__builtin_neon_vcvth_f16_s64: {
7173  Ops.push_back(EmitScalarExpr(E->getArg(0)));
7174  llvm::Type *FTy = HalfTy;
7175  llvm::Type *InTy;
7176  if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64)
7177  InTy = Int64Ty;
7178  else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32)
7179  InTy = Int32Ty;
7180  else
7181  InTy = Int16Ty;
7182  Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
7183  if (usgn)
7184  return Builder.CreateUIToFP(Ops[0], FTy);
7185  return Builder.CreateSIToFP(Ops[0], FTy);
7186  }
7187  case NEON::BI__builtin_neon_vcvth_u16_f16:
7188  usgn = true;
7189  LLVM_FALLTHROUGH;
7190  case NEON::BI__builtin_neon_vcvth_s16_f16: {
7191  Ops.push_back(EmitScalarExpr(E->getArg(0)));
7192  Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
7193  if (usgn)
7194  return Builder.CreateFPToUI(Ops[0], Int16Ty);
7195  return Builder.CreateFPToSI(Ops[0], Int16Ty);
7196  }
7197  case NEON::BI__builtin_neon_vcvth_u32_f16:
7198  usgn = true;
7199  LLVM_FALLTHROUGH;
7200  case NEON::BI__builtin_neon_vcvth_s32_f16: {
7201  Ops.push_back(EmitScalarExpr(E->getArg(0)));
7202  Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
7203  if (usgn)
7204  return Builder.CreateFPToUI(Ops[0], Int32Ty);
7205  return Builder.CreateFPToSI(Ops[0], Int32Ty);
7206  }
7207  case NEON::BI__builtin_neon_vcvth_u64_f16:
7208  usgn = true;
7209  LLVM_FALLTHROUGH;
7210  case NEON::BI__builtin_neon_vcvth_s64_f16: {
7211  Ops.push_back(EmitScalarExpr(E->getArg(0)));
7212  Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
7213  if (usgn)
7214  return Builder.CreateFPToUI(Ops[0], Int64Ty);
7215  return Builder.CreateFPToSI(Ops[0], Int64Ty);
7216  }
7217  case NEON::BI__builtin_neon_vcvtah_u16_f16:
7218  case NEON::BI__builtin_neon_vcvtmh_u16_f16:
7219  case NEON::BI__builtin_neon_vcvtnh_u16_f16:
7220  case NEON::BI__builtin_neon_vcvtph_u16_f16:
7221  case NEON::BI__builtin_neon_vcvtah_s16_f16:
7222  case NEON::BI__builtin_neon_vcvtmh_s16_f16:
7223  case NEON::BI__builtin_neon_vcvtnh_s16_f16:
7224  case NEON::BI__builtin_neon_vcvtph_s16_f16: {
7225  unsigned Int;
7226  llvm::Type* InTy = Int32Ty;
7227  llvm::Type* FTy = HalfTy;
7228  llvm::Type *Tys[2] = {InTy, FTy};
7229  Ops.push_back(EmitScalarExpr(E->getArg(0)));
7230  switch (BuiltinID) {
7231  default: llvm_unreachable("missing builtin ID in switch!");
7232  case NEON::BI__builtin_neon_vcvtah_u16_f16:
7233  Int = Intrinsic::aarch64_neon_fcvtau; break;
7234  case NEON::BI__builtin_neon_vcvtmh_u16_f16:
7235  Int = Intrinsic::aarch64_neon_fcvtmu; break;
7236  case NEON::BI__builtin_neon_vcvtnh_u16_f16:
7237  Int = Intrinsic::aarch64_neon_fcvtnu; break;
7238  case NEON::BI__builtin_neon_vcvtph_u16_f16:
7239  Int = Intrinsic::aarch64_neon_fcvtpu; break;
7240  case NEON::BI__builtin_neon_vcvtah_s16_f16:
7241  Int = Intrinsic::aarch64_neon_fcvtas; break;
7242  case NEON::BI__builtin_neon_vcvtmh_s16_f16:
7243  Int = Intrinsic::aarch64_neon_fcvtms; break;
7244  case NEON::BI__builtin_neon_vcvtnh_s16_f16:
7245  Int = Intrinsic::aarch64_neon_fcvtns; break;
7246  case NEON::BI__builtin_neon_vcvtph_s16_f16:
7247  Int = Intrinsic::aarch64_neon_fcvtps; break;
7248  }
7249  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
7250  return Builder.CreateTrunc(Ops[0], Int16Ty);
7251  }
7252  case NEON::BI__builtin_neon_vcaleh_f16:
7253  case NEON::BI__builtin_neon_vcalth_f16:
7254  case NEON::BI__builtin_neon_vcageh_f16:
7255  case NEON::BI__builtin_neon_vcagth_f16: {
7256  unsigned Int;
7257  llvm::Type* InTy = Int32Ty;
7258  llvm::Type* FTy = HalfTy;
7259  llvm::Type *Tys[2] = {InTy, FTy};
7260  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7261  switch (BuiltinID) {
7262  default: llvm_unreachable("missing builtin ID in switch!");
7263  case NEON::BI__builtin_neon_vcageh_f16:
7264  Int = Intrinsic::aarch64_neon_facge; break;
7265  case NEON::BI__builtin_neon_vcagth_f16:
7266  Int = Intrinsic::aarch64_neon_facgt; break;
7267  case NEON::BI__builtin_neon_vcaleh_f16:
7268  Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]); break;
7269  case NEON::BI__builtin_neon_vcalth_f16:
7270  Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]); break;
7271  }
7272  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "facg");
7273  return Builder.CreateTrunc(Ops[0], Int16Ty);
7274  }
7275  case NEON::BI__builtin_neon_vcvth_n_s16_f16:
7276  case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
7277  unsigned Int;
7278  llvm::Type* InTy = Int32Ty;
7279  llvm::Type* FTy = HalfTy;
7280  llvm::Type *Tys[2] = {InTy, FTy};
7281  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7282  switch (BuiltinID) {
7283  default: llvm_unreachable("missing builtin ID in switch!");
7284  case NEON::BI__builtin_neon_vcvth_n_s16_f16:
7285  Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break;
7286  case NEON::BI__builtin_neon_vcvth_n_u16_f16:
7287  Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break;
7288  }
7289  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
7290  return Builder.CreateTrunc(Ops[0], Int16Ty);
7291  }
7292  case NEON::BI__builtin_neon_vcvth_n_f16_s16:
7293  case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
7294  unsigned Int;
7295  llvm::Type* FTy = HalfTy;
7296  llvm::Type* InTy = Int32Ty;
7297  llvm::Type *Tys[2] = {FTy, InTy};
7298  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7299  switch (BuiltinID) {
7300  default: llvm_unreachable("missing builtin ID in switch!");
7301  case NEON::BI__builtin_neon_vcvth_n_f16_s16:
7302  Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
7303  Ops[0] = Builder.CreateSExt(Ops[0], InTy, "sext");
7304  break;
7305  case NEON::BI__builtin_neon_vcvth_n_f16_u16:
7306  Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
7307  Ops[0] = Builder.CreateZExt(Ops[0], InTy);
7308  break;
7309  }
7310  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
7311  }
7312  case NEON::BI__builtin_neon_vpaddd_s64: {
7313  llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2);
7314  Value *Vec = EmitScalarExpr(E->getArg(0));
7315  // The vector is v2f64, so make sure it's bitcast to that.
7316  Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
7317  llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
7318  llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
7319  Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
7320  Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
7321  // Pairwise addition of a v2f64 into a scalar f64.
7322  return Builder.CreateAdd(Op0, Op1, "vpaddd");
7323  }
7324  case NEON::BI__builtin_neon_vpaddd_f64: {
7325  llvm::Type *Ty =
7326  llvm::VectorType::get(DoubleTy, 2);
7327  Value *Vec = EmitScalarExpr(E->getArg(0));
7328  // The vector is v2f64, so make sure it's bitcast to that.
7329  Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
7330  llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
7331  llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
7332  Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
7333  Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
7334  // Pairwise addition of a v2f64 into a scalar f64.
7335  return Builder.CreateFAdd(Op0, Op1, "vpaddd");
7336  }
7337  case NEON::BI__builtin_neon_vpadds_f32: {
7338  llvm::Type *Ty =
7339  llvm::VectorType::get(FloatTy, 2);
7340  Value *Vec = EmitScalarExpr(E->getArg(0));
7341  // The vector is v2f32, so make sure it's bitcast to that.
7342  Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
7343  llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
7344  llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
7345  Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
7346  Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
7347  // Pairwise addition of a v2f32 into a scalar f32.
7348  return Builder.CreateFAdd(Op0, Op1, "vpaddd");
7349  }
7350  case NEON::BI__builtin_neon_vceqzd_s64:
7351  case NEON::BI__builtin_neon_vceqzd_f64:
7352  case NEON::BI__builtin_neon_vceqzs_f32:
7353  case NEON::BI__builtin_neon_vceqzh_f16:
7354  Ops.push_back(EmitScalarExpr(E->getArg(0)));
7355  return EmitAArch64CompareBuiltinExpr(
7356  Ops[0], ConvertType(E->getCallReturnType(getContext())),
7357  ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
7358  case NEON::BI__builtin_neon_vcgezd_s64:
7359  case NEON::BI__builtin_neon_vcgezd_f64:
7360  case NEON::BI__builtin_neon_vcgezs_f32:
7361  case NEON::BI__builtin_neon_vcgezh_f16:
7362  Ops.push_back(EmitScalarExpr(E->getArg(0)));
7363  return EmitAArch64CompareBuiltinExpr(
7364  Ops[0], ConvertType(E->getCallReturnType(getContext())),
7365  ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
7366  case NEON::BI__builtin_neon_vclezd_s64:
7367  case NEON::BI__builtin_neon_vclezd_f64:
7368  case NEON::BI__builtin_neon_vclezs_f32:
7369  case NEON::BI__builtin_neon_vclezh_f16:
7370  Ops.push_back(EmitScalarExpr(E->getArg(0)));
7371  return EmitAArch64CompareBuiltinExpr(
7372  Ops[0], ConvertType(E->getCallReturnType(getContext())),
7373  ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
7374  case NEON::BI__builtin_neon_vcgtzd_s64:
7375  case NEON::BI__builtin_neon_vcgtzd_f64:
7376  case NEON::BI__builtin_neon_vcgtzs_f32:
7377  case NEON::BI__builtin_neon_vcgtzh_f16:
7378  Ops.push_back(EmitScalarExpr(E->getArg(0)));
7379  return EmitAArch64CompareBuiltinExpr(
7380  Ops[0], ConvertType(E->getCallReturnType(getContext())),
7381  ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
7382  case NEON::BI__builtin_neon_vcltzd_s64:
7383  case NEON::BI__builtin_neon_vcltzd_f64:
7384  case NEON::BI__builtin_neon_vcltzs_f32:
7385  case NEON::BI__builtin_neon_vcltzh_f16:
7386  Ops.push_back(EmitScalarExpr(E->getArg(0)));
7387  return EmitAArch64CompareBuiltinExpr(
7388  Ops[0], ConvertType(E->getCallReturnType(getContext())),
7389  ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
7390 
7391  case NEON::BI__builtin_neon_vceqzd_u64: {
7392  Ops.push_back(EmitScalarExpr(E->getArg(0)));
7393  Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
7394  Ops[0] =
7395  Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
7396  return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
7397  }
7398  case NEON::BI__builtin_neon_vceqd_f64:
7399  case NEON::BI__builtin_neon_vcled_f64:
7400  case NEON::BI__builtin_neon_vcltd_f64:
7401  case NEON::BI__builtin_neon_vcged_f64:
7402  case NEON::BI__builtin_neon_vcgtd_f64: {
7403  llvm::CmpInst::Predicate P;
7404  switch (BuiltinID) {
7405  default: llvm_unreachable("missing builtin ID in switch!");
7406  case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
7407  case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
7408  case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
7409  case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
7410  case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
7411  }
7412  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7413  Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
7414  Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
7415  Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
7416  return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
7417  }
7418  case NEON::BI__builtin_neon_vceqs_f32:
7419  case NEON::BI__builtin_neon_vcles_f32:
7420  case NEON::BI__builtin_neon_vclts_f32:
7421  case NEON::BI__builtin_neon_vcges_f32:
7422  case NEON::BI__builtin_neon_vcgts_f32: {
7423  llvm::CmpInst::Predicate P;
7424  switch (BuiltinID) {
7425  default: llvm_unreachable("missing builtin ID in switch!");
7426  case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
7427  case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
7428  case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
7429  case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
7430  case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
7431  }
7432  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7433  Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
7434  Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
7435  Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
7436  return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
7437  }
7438  case NEON::BI__builtin_neon_vceqh_f16:
7439  case NEON::BI__builtin_neon_vcleh_f16:
7440  case NEON::BI__builtin_neon_vclth_f16:
7441  case NEON::BI__builtin_neon_vcgeh_f16:
7442  case NEON::BI__builtin_neon_vcgth_f16: {
7443  llvm::CmpInst::Predicate P;
7444  switch (BuiltinID) {
7445  default: llvm_unreachable("missing builtin ID in switch!");
7446  case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break;
7447  case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break;
7448  case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break;
7449  case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break;
7450  case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break;
7451  }
7452  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7453  Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
7454  Ops[1] = Builder.CreateBitCast(Ops[1], HalfTy);
7455  Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
7456  return Builder.CreateSExt(Ops[0], Int16Ty, "vcmpd");
7457  }
7458  case NEON::BI__builtin_neon_vceqd_s64:
7459  case NEON::BI__builtin_neon_vceqd_u64:
7460  case NEON::BI__builtin_neon_vcgtd_s64:
7461  case NEON::BI__builtin_neon_vcgtd_u64:
7462  case NEON::BI__builtin_neon_vcltd_s64:
7463  case NEON::BI__builtin_neon_vcltd_u64:
7464  case NEON::BI__builtin_neon_vcged_u64:
7465  case NEON::BI__builtin_neon_vcged_s64:
7466  case NEON::BI__builtin_neon_vcled_u64:
7467  case NEON::BI__builtin_neon_vcled_s64: {
7468  llvm::CmpInst::Predicate P;
7469  switch (BuiltinID) {
7470  default: llvm_unreachable("missing builtin ID in switch!");
7471  case NEON::BI__builtin_neon_vceqd_s64:
7472  case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
7473  case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
7474  case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
7475  case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
7476  case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
7477  case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
7478  case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
7479  case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
7480  case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
7481  }
7482  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7483  Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
7484  Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
7485  Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
7486  return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
7487  }
7488  case NEON::BI__builtin_neon_vtstd_s64:
7489  case NEON::BI__builtin_neon_vtstd_u64: {
7490  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7491  Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
7492  Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
7493  Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
7494  Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
7495  llvm::Constant::getNullValue(Int64Ty));
7496  return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
7497  }
7498  case NEON::BI__builtin_neon_vset_lane_i8:
7499  case NEON::BI__builtin_neon_vset_lane_i16:
7500  case NEON::BI__builtin_neon_vset_lane_i32:
7501  case NEON::BI__builtin_neon_vset_lane_i64:
7502  case NEON::BI__builtin_neon_vset_lane_f32:
7503  case NEON::BI__builtin_neon_vsetq_lane_i8:
7504  case NEON::BI__builtin_neon_vsetq_lane_i16:
7505  case NEON::BI__builtin_neon_vsetq_lane_i32:
7506  case NEON::BI__builtin_neon_vsetq_lane_i64:
7507  case NEON::BI__builtin_neon_vsetq_lane_f32:
7508  Ops.push_back(EmitScalarExpr(E->getArg(2)));
7509  return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
7510  case NEON::BI__builtin_neon_vset_lane_f64:
7511  // The vector type needs a cast for the v1f64 variant.
7512  Ops[1] = Builder.CreateBitCast(Ops[1],
7513  llvm::VectorType::get(DoubleTy, 1));
7514  Ops.push_back(EmitScalarExpr(E->getArg(2)));
7515  return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
7516  case NEON::BI__builtin_neon_vsetq_lane_f64:
7517  // The vector type needs a cast for the v2f64 variant.
7518  Ops[1] = Builder.CreateBitCast(Ops[1],
7519  llvm::VectorType::get(DoubleTy, 2));
7520  Ops.push_back(EmitScalarExpr(E->getArg(2)));
7521  return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
7522 
7523  case NEON::BI__builtin_neon_vget_lane_i8:
7524  case NEON::BI__builtin_neon_vdupb_lane_i8:
7525  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 8));
7526  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7527  "vget_lane");
7528  case NEON::BI__builtin_neon_vgetq_lane_i8:
7529  case NEON::BI__builtin_neon_vdupb_laneq_i8:
7530  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 16));
7531  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7532  "vgetq_lane");
7533  case NEON::BI__builtin_neon_vget_lane_i16:
7534  case NEON::BI__builtin_neon_vduph_lane_i16:
7535  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 4));
7536  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7537  "vget_lane");
7538  case NEON::BI__builtin_neon_vgetq_lane_i16:
7539  case NEON::BI__builtin_neon_vduph_laneq_i16:
7540  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 8));
7541  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7542  "vgetq_lane");
7543  case NEON::BI__builtin_neon_vget_lane_i32:
7544  case NEON::BI__builtin_neon_vdups_lane_i32:
7545  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 2));
7546  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7547  "vget_lane");
7548  case NEON::BI__builtin_neon_vdups_lane_f32:
7549  Ops[0] = Builder.CreateBitCast(Ops[0],
7550  llvm::VectorType::get(FloatTy, 2));
7551  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7552  "vdups_lane");
7553  case NEON::BI__builtin_neon_vgetq_lane_i32:
7554  case NEON::BI__builtin_neon_vdups_laneq_i32:
7555  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
7556  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7557  "vgetq_lane");
7558  case NEON::BI__builtin_neon_vget_lane_i64:
7559  case NEON::BI__builtin_neon_vdupd_lane_i64:
7560  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 1));
7561  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7562  "vget_lane");
7563  case NEON::BI__builtin_neon_vdupd_lane_f64:
7564  Ops[0] = Builder.CreateBitCast(Ops[0],
7565  llvm::VectorType::get(DoubleTy, 1));
7566  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7567  "vdupd_lane");
7568  case NEON::BI__builtin_neon_vgetq_lane_i64:
7569  case NEON::BI__builtin_neon_vdupd_laneq_i64:
7570  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
7571  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7572  "vgetq_lane");
7573  case NEON::BI__builtin_neon_vget_lane_f32:
7574  Ops[0] = Builder.CreateBitCast(Ops[0],
7575  llvm::VectorType::get(FloatTy, 2));
7576  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7577  "vget_lane");
7578  case NEON::BI__builtin_neon_vget_lane_f64:
7579  Ops[0] = Builder.CreateBitCast(Ops[0],
7580  llvm::VectorType::get(DoubleTy, 1));
7581  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7582  "vget_lane");
7583  case NEON::BI__builtin_neon_vgetq_lane_f32:
7584  case NEON::BI__builtin_neon_vdups_laneq_f32:
7585  Ops[0] = Builder.CreateBitCast(Ops[0],
7586  llvm::VectorType::get(FloatTy, 4));
7587  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7588  "vgetq_lane");
7589  case NEON::BI__builtin_neon_vgetq_lane_f64:
7590  case NEON::BI__builtin_neon_vdupd_laneq_f64:
7591  Ops[0] = Builder.CreateBitCast(Ops[0],
7592  llvm::VectorType::get(DoubleTy, 2));
7593  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7594  "vgetq_lane");
7595  case NEON::BI__builtin_neon_vaddh_f16:
7596  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7597  return Builder.CreateFAdd(Ops[0], Ops[1], "vaddh");
7598  case NEON::BI__builtin_neon_vsubh_f16:
7599  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7600  return Builder.CreateFSub(Ops[0], Ops[1], "vsubh");
7601  case NEON::BI__builtin_neon_vmulh_f16:
7602  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7603  return Builder.CreateFMul(Ops[0], Ops[1], "vmulh");
7604  case NEON::BI__builtin_neon_vdivh_f16:
7605  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7606  return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh");
7607  case NEON::BI__builtin_neon_vfmah_f16: {
7608  Value *F = CGM.getIntrinsic(Intrinsic::fma, HalfTy);
7609  // NEON intrinsic puts accumulator first, unlike the LLVM fma.
7610  return Builder.CreateCall(F,
7611  {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]});
7612  }
7613  case NEON::BI__builtin_neon_vfmsh_f16: {
7614  Value *F = CGM.getIntrinsic(Intrinsic::fma, HalfTy);
7615  Value *Zero = llvm::ConstantFP::getZeroValueForNegation(HalfTy);
7616  Value* Sub = Builder.CreateFSub(Zero, EmitScalarExpr(E->getArg(1)), "vsubh");
7617  // NEON intrinsic puts accumulator first, unlike the LLVM fma.
7618  return Builder.CreateCall(F, {Sub, EmitScalarExpr(E->getArg(2)), Ops[0]});
7619  }
7620  case NEON::BI__builtin_neon_vaddd_s64:
7621  case NEON::BI__builtin_neon_vaddd_u64:
7622  return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
7623  case NEON::BI__builtin_neon_vsubd_s64:
7624  case NEON::BI__builtin_neon_vsubd_u64:
7625  return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
7626  case NEON::BI__builtin_neon_vqdmlalh_s16:
7627  case NEON::BI__builtin_neon_vqdmlslh_s16: {
7628  SmallVector<Value *, 2> ProductOps;
7629  ProductOps.push_back(vectorWrapScalar16(Ops[1]));
7630  ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
7631  llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
7632  Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
7633  ProductOps, "vqdmlXl");
7634  Constant *CI = ConstantInt::get(SizeTy, 0);
7635  Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
7636 
7637  unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
7638  ? Intrinsic::aarch64_neon_sqadd
7639  : Intrinsic::aarch64_neon_sqsub;
7640  return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
7641  }
7642  case NEON::BI__builtin_neon_vqshlud_n_s64: {
7643  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7644  Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
7645  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
7646  Ops, "vqshlu_n");
7647  }
7648  case NEON::BI__builtin_neon_vqshld_n_u64:
7649  case NEON::BI__builtin_neon_vqshld_n_s64: {
7650  unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
7651  ? Intrinsic::aarch64_neon_uqshl
7652  : Intrinsic::aarch64_neon_sqshl;
7653  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7654  Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
7655  return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
7656  }
7657  case NEON::BI__builtin_neon_vrshrd_n_u64:
7658  case NEON::BI__builtin_neon_vrshrd_n_s64: {
7659  unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
7660  ? Intrinsic::aarch64_neon_urshl
7661  : Intrinsic::aarch64_neon_srshl;
7662  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7663  int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
7664  Ops[1] = ConstantInt::get(Int64Ty, -SV);
7665  return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
7666  }
7667  case NEON::BI__builtin_neon_vrsrad_n_u64:
7668  case NEON::BI__builtin_neon_vrsrad_n_s64: {
7669  unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
7670  ? Intrinsic::aarch64_neon_urshl
7671  : Intrinsic::aarch64_neon_srshl;
7672  Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
7673  Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
7674  Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
7675  {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
7676  return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
7677  }
7678  case NEON::BI__builtin_neon_vshld_n_s64:
7679  case NEON::BI__builtin_neon_vshld_n_u64: {
7680  llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
7681  return Builder.CreateShl(
7682  Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
7683  }
7684  case NEON::BI__builtin_neon_vshrd_n_s64: {
7685  llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
7686  return Builder.CreateAShr(
7687  Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
7688  Amt->getZExtValue())),
7689  "shrd_n");
7690  }
7691  case NEON::BI__builtin_neon_vshrd_n_u64: {
7692  llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
7693  uint64_t ShiftAmt = Amt->getZExtValue();
7694  // Right-shifting an unsigned value by its size yields 0.
7695  if (ShiftAmt == 64)
7696  return ConstantInt::get(Int64Ty, 0);
7697  return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
7698  "shrd_n");
7699  }
7700  case NEON::BI__builtin_neon_vsrad_n_s64: {
7701  llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
7702  Ops[1] = Builder.CreateAShr(
7703  Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
7704  Amt->getZExtValue())),
7705  "shrd_n");
7706  return Builder.CreateAdd(Ops[0], Ops[1]);
7707  }
7708  case NEON::BI__builtin_neon_vsrad_n_u64: {
7709  llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
7710  uint64_t ShiftAmt = Amt->getZExtValue();
7711  // Right-shifting an unsigned value by its size yields 0.
7712  // As Op + 0 = Op, return Ops[0] directly.
7713  if (ShiftAmt == 64)
7714  return Ops[0];
7715  Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
7716  "shrd_n");
7717  return Builder.CreateAdd(Ops[0], Ops[1]);
7718  }
7719  case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
7720  case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
7721  case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
7722  case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
7723  Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
7724  "lane");
7725  SmallVector<Value *, 2> ProductOps;
7726  ProductOps.push_back(vectorWrapScalar16(Ops[1]));
7727  ProductOps.push_back(vectorWrapScalar16(Ops[2]));
7728  llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
7729  Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
7730  ProductOps, "vqdmlXl");
7731  Constant *CI = ConstantInt::get(SizeTy, 0);
7732  Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
7733  Ops.pop_back();
7734 
7735  unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
7736  BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
7737  ? Intrinsic::aarch64_neon_sqadd
7738  : Intrinsic::aarch64_neon_sqsub;
7739  return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
7740  }
7741  case NEON::BI__builtin_neon_vqdmlals_s32:
7742  case NEON::BI__builtin_neon_vqdmlsls_s32: {
7743  SmallVector<Value *, 2> ProductOps;
7744  ProductOps.push_back(Ops[1]);
7745  ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
7746  Ops[1] =
7747  EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
7748  ProductOps, "vqdmlXl");
7749 
7750  unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
7751  ? Intrinsic::aarch64_neon_sqadd
7752  : Intrinsic::aarch64_neon_sqsub;
7753  return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
7754  }
7755  case NEON::BI__builtin_neon_vqdmlals_lane_s32:
7756  case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
7757  case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
7758  case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
7759  Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
7760  "lane");
7761  SmallVector<Value *, 2> ProductOps;
7762  ProductOps.push_back(Ops[1]);
7763  ProductOps.push_back(Ops[2]);
7764  Ops[1] =
7765  EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
7766  ProductOps, "vqdmlXl");
7767  Ops.pop_back();
7768 
7769  unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
7770  BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
7771  ? Intrinsic::aarch64_neon_sqadd
7772  : Intrinsic::aarch64_neon_sqsub;
7773  return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
7774  }
7775  }
7776 
7777  llvm::VectorType *VTy = GetNeonType(this, Type);
7778  llvm::Type *Ty = VTy;
7779  if (!Ty)
7780  return nullptr;
7781 
7782  // Not all intrinsics handled by the common case work for AArch64 yet, so only
7783  // defer to common code if it's been added to our special map.
7784  Builtin = findNeonIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID,
7785  AArch64SIMDIntrinsicsProvenSorted);
7786 
7787  if (Builtin)
7788  return EmitCommonNeonBuiltinExpr(
7789  Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
7790  Builtin->NameHint, Builtin->TypeModifier, E, Ops,
7791  /*never use addresses*/ Address::invalid(), Address::invalid(), Arch);
7792 
7793  if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops, Arch))
7794  return V;
7795 
7796  unsigned Int;
7797  switch (BuiltinID) {
7798  default: return nullptr;
7799  case NEON::BI__builtin_neon_vbsl_v:
7800  case NEON::BI__builtin_neon_vbslq_v: {
7801  llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
7802  Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
7803  Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
7804  Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
7805 
7806  Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
7807  Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
7808  Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
7809  return Builder.CreateBitCast(Ops[0], Ty);
7810  }
7811  case NEON::BI__builtin_neon_vfma_lane_v:
7812  case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
7813  // The ARM builtins (and instructions) have the addend as the first
7814  // operand, but the 'fma' intrinsics have it last. Swap it around here.
7815  Value *Addend = Ops[0];
7816  Value *Multiplicand = Ops[1];
7817  Value *LaneSource = Ops[2];
7818  Ops[0] = Multiplicand;
7819  Ops[1] = LaneSource;
7820  Ops[2] = Addend;
7821 
7822  // Now adjust things to handle the lane access.
7823  llvm::Type *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v ?
7824  llvm::VectorType::get(VTy->getElementType(), VTy->getNumElements() / 2) :
7825  VTy;
7826  llvm::Constant *cst = cast<Constant>(Ops[3]);
7827  Value *SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), cst);
7828  Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
7829  Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
7830 
7831  Ops.pop_back();
7832  Int = Intrinsic::fma;
7833  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
7834  }
7835  case NEON::BI__builtin_neon_vfma_laneq_v: {
7836  llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
7837  // v1f64 fma should be mapped to Neon scalar f64 fma
7838  if (VTy && VTy->getElementType() == DoubleTy) {
7839  Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
7840  Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
7841  llvm::Type *VTy = GetNeonType(this,
7842  NeonTypeFlags(NeonTypeFlags::Float64, false, true));
7843  Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
7844  Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
7845  Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy);
7846  Value *Result = Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
7847  return Builder.CreateBitCast(Result, Ty);
7848  }
7849  Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
7850  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7851  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7852 
7853  llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(),
7854  VTy->getNumElements() * 2);
7855  Ops[2] = Builder.CreateBitCast(Ops[2], STy);
7856  Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(),
7857  cast<ConstantInt>(Ops[3]));
7858  Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
7859 
7860  return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
7861  }
7862  case NEON::BI__builtin_neon_vfmaq_laneq_v: {
7863  Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
7864  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7865  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7866 
7867  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7868  Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
7869  return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
7870  }
7871  case NEON::BI__builtin_neon_vfmah_lane_f16:
7872  case NEON::BI__builtin_neon_vfmas_lane_f32:
7873  case NEON::BI__builtin_neon_vfmah_laneq_f16:
7874  case NEON::BI__builtin_neon_vfmas_laneq_f32:
7875  case NEON::BI__builtin_neon_vfmad_lane_f64:
7876  case NEON::BI__builtin_neon_vfmad_laneq_f64: {
7877  Ops.push_back(EmitScalarExpr(E->getArg(3)));
7878  llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
7879  Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
7880  Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
7881  return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
7882  }
7883  case NEON::BI__builtin_neon_vmull_v:
7884  // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
7885  Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
7886  if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
7887  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
7888  case NEON::BI__builtin_neon_vmax_v:
7889  case NEON::BI__builtin_neon_vmaxq_v:
7890  // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
7891  Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
7892  if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
7893  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
7894  case NEON::BI__builtin_neon_vmaxh_f16: {
7895  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7896  Int = Intrinsic::aarch64_neon_fmax;
7897  return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmax");
7898  }
7899  case NEON::BI__builtin_neon_vmin_v:
7900  case NEON::BI__builtin_neon_vminq_v:
7901  // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
7902  Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
7903  if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
7904  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
7905  case NEON::BI__builtin_neon_vminh_f16: {
7906  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7907  Int = Intrinsic::aarch64_neon_fmin;
7908  return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmin");
7909  }
7910  case NEON::BI__builtin_neon_vabd_v:
7911  case NEON::BI__builtin_neon_vabdq_v:
7912  // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
7913  Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
7914  if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
7915  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
7916  case NEON::BI__builtin_neon_vpadal_v:
7917  case NEON::BI__builtin_neon_vpadalq_v: {
7918  unsigned ArgElts = VTy->getNumElements();
7919  llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
7920  unsigned BitWidth = EltTy->getBitWidth();
7921  llvm::Type *ArgTy = llvm::VectorType::get(
7922  llvm::IntegerType::get(getLLVMContext(), BitWidth/2), 2*ArgElts);
7923  llvm::Type* Tys[2] = { VTy, ArgTy };
7924  Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
7926  TmpOps.push_back(Ops[1]);
7927  Function *F = CGM.getIntrinsic(Int, Tys);
7928  llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
7929  llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
7930  return Builder.CreateAdd(tmp, addend);
7931  }
7932  case NEON::BI__builtin_neon_vpmin_v:
7933  case NEON::BI__builtin_neon_vpminq_v:
7934  // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
7935  Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
7936  if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
7937  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
7938  case NEON::BI__builtin_neon_vpmax_v:
7939  case NEON::BI__builtin_neon_vpmaxq_v:
7940  // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
7941  Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
7942  if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
7943  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
7944  case NEON::BI__builtin_neon_vminnm_v:
7945  case NEON::BI__builtin_neon_vminnmq_v:
7946  Int = Intrinsic::aarch64_neon_fminnm;
7947  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
7948  case NEON::BI__builtin_neon_vminnmh_f16:
7949  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7950  Int = Intrinsic::aarch64_neon_fminnm;
7951  return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vminnm");
7952  case NEON::BI__builtin_neon_vmaxnm_v:
7953  case NEON::BI__builtin_neon_vmaxnmq_v:
7954  Int = Intrinsic::aarch64_neon_fmaxnm;
7955  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
7956  case NEON::BI__builtin_neon_vmaxnmh_f16:
7957  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7958  Int = Intrinsic::aarch64_neon_fmaxnm;
7959  return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmaxnm");
7960  case NEON::BI__builtin_neon_vrecpss_f32: {
7961  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7962  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
7963  Ops, "vrecps");
7964  }
7965  case NEON::BI__builtin_neon_vrecpsd_f64:
7966  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7967  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
7968  Ops, "vrecps");
7969  case NEON::BI__builtin_neon_vrecpsh_f16:
7970  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7971  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, HalfTy),
7972  Ops, "vrecps");
7973  case NEON::BI__builtin_neon_vqshrun_n_v:
7974  Int = Intrinsic::aarch64_neon_sqshrun;
7975  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
7976  case NEON::BI__builtin_neon_vqrshrun_n_v:
7977  Int = Intrinsic::aarch64_neon_sqrshrun;
7978  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
7979  case NEON::BI__builtin_neon_vqshrn_n_v:
7980  Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
7981  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
7982  case NEON::BI__builtin_neon_vrshrn_n_v:
7983  Int = Intrinsic::aarch64_neon_rshrn;
7984  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
7985  case NEON::BI__builtin_neon_vqrshrn_n_v:
7986  Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
7987  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
7988  case NEON::BI__builtin_neon_vrndah_f16: {
7989  Ops.push_back(EmitScalarExpr(E->getArg(0)));
7990  Int = Intrinsic::round;
7991  return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda");
7992  }
7993  case NEON::BI__builtin_neon_vrnda_v:
7994  case NEON::BI__builtin_neon_vrndaq_v: {
7995  Int = Intrinsic::round;
7996  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
7997  }
7998  case NEON::BI__builtin_neon_vrndih_f16: {
7999  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8000  Int = Intrinsic::nearbyint;
8001  return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi");
8002  }
8003  case NEON::BI__builtin_neon_vrndmh_f16: {
8004  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8005  Int = Intrinsic::floor;
8006  return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm");
8007  }
8008  case NEON::BI__builtin_neon_vrndm_v:
8009  case NEON::BI__builtin_neon_vrndmq_v: {
8010  Int = Intrinsic::floor;
8011  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
8012  }
8013  case NEON::BI__builtin_neon_vrndnh_f16: {
8014  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8015  Int = Intrinsic::aarch64_neon_frintn;
8016  return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn");
8017  }
8018  case NEON::BI__builtin_neon_vrndn_v:
8019  case NEON::BI__builtin_neon_vrndnq_v: {
8020  Int = Intrinsic::aarch64_neon_frintn;
8021  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
8022  }
8023  case NEON::BI__builtin_neon_vrndns_f32: {
8024  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8025  Int = Intrinsic::aarch64_neon_frintn;
8026  return EmitNeonCall(CGM.getIntrinsic(Int, FloatTy), Ops, "vrndn");
8027  }
8028  case NEON::BI__builtin_neon_vrndph_f16: {
8029  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8030  Int = Intrinsic::ceil;
8031  return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp");
8032  }
8033  case NEON::BI__builtin_neon_vrndp_v:
8034  case NEON::BI__builtin_neon_vrndpq_v: {
8035  Int = Intrinsic::ceil;
8036  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
8037  }
8038  case NEON::BI__builtin_neon_vrndxh_f16: {
8039  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8040  Int = Intrinsic::rint;
8041  return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx");
8042  }
8043  case NEON::BI__builtin_neon_vrndx_v:
8044  case NEON::BI__builtin_neon_vrndxq_v: {
8045  Int = Intrinsic::rint;
8046  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
8047  }
8048  case NEON::BI__builtin_neon_vrndh_f16: {
8049  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8050  Int = Intrinsic::trunc;
8051  return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz");
8052  }
8053  case NEON::BI__builtin_neon_vrnd_v:
8054  case NEON::BI__builtin_neon_vrndq_v: {
8055  Int = Intrinsic::trunc;
8056  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
8057  }
8058  case NEON::BI__builtin_neon_vcvt_f64_v:
8059  case NEON::BI__builtin_neon_vcvtq_f64_v:
8060  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8061  Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
8062  return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
8063  : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
8064  case NEON::BI__builtin_neon_vcvt_f64_f32: {
8065  assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
8066  "unexpected vcvt_f64_f32 builtin");
8067  NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
8068  Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
8069 
8070  return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
8071  }
8072  case NEON::BI__builtin_neon_vcvt_f32_f64: {
8073  assert(Type.getEltType() == NeonTypeFlags::Float32 &&
8074  "unexpected vcvt_f32_f64 builtin");
8075  NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
8076  Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
8077 
8078  return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
8079  }
8080  case NEON::BI__builtin_neon_vcvt_s32_v:
8081  case NEON::BI__builtin_neon_vcvt_u32_v:
8082  case NEON::BI__builtin_neon_vcvt_s64_v:
8083  case NEON::BI__builtin_neon_vcvt_u64_v:
8084  case NEON::BI__builtin_neon_vcvt_s16_v:
8085  case NEON::BI__builtin_neon_vcvt_u16_v:
8086  case NEON::BI__builtin_neon_vcvtq_s32_v:
8087  case NEON::BI__builtin_neon_vcvtq_u32_v:
8088  case NEON::BI__builtin_neon_vcvtq_s64_v:
8089  case NEON::BI__builtin_neon_vcvtq_u64_v:
8090  case NEON::BI__builtin_neon_vcvtq_s16_v:
8091  case NEON::BI__builtin_neon_vcvtq_u16_v: {
8092  Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
8093  if (usgn)
8094  return Builder.CreateFPToUI(Ops[0], Ty);
8095  return Builder.CreateFPToSI(Ops[0], Ty);
8096  }
8097  case NEON::BI__builtin_neon_vcvta_s16_v:
8098  case NEON::BI__builtin_neon_vcvta_u16_v:
8099  case NEON::BI__builtin_neon_vcvta_s32_v:
8100  case NEON::BI__builtin_neon_vcvtaq_s16_v:
8101  case NEON::BI__builtin_neon_vcvtaq_s32_v:
8102  case NEON::BI__builtin_neon_vcvta_u32_v:
8103  case NEON::BI__builtin_neon_vcvtaq_u16_v:
8104  case NEON::BI__builtin_neon_vcvtaq_u32_v:
8105  case NEON::BI__builtin_neon_vcvta_s64_v:
8106  case NEON::BI__builtin_neon_vcvtaq_s64_v:
8107  case NEON::BI__builtin_neon_vcvta_u64_v:
8108  case NEON::BI__builtin_neon_vcvtaq_u64_v: {
8109  Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
8110  llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
8111  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
8112  }
8113  case NEON::BI__builtin_neon_vcvtm_s16_v:
8114  case NEON::BI__builtin_neon_vcvtm_s32_v:
8115  case NEON::BI__builtin_neon_vcvtmq_s16_v:
8116  case NEON::BI__builtin_neon_vcvtmq_s32_v:
8117  case NEON::BI__builtin_neon_vcvtm_u16_v:
8118  case NEON::BI__builtin_neon_vcvtm_u32_v:
8119  case NEON::BI__builtin_neon_vcvtmq_u16_v:
8120  case NEON::BI__builtin_neon_vcvtmq_u32_v:
8121  case NEON::BI__builtin_neon_vcvtm_s64_v:
8122  case NEON::BI__builtin_neon_vcvtmq_s64_v:
8123  case NEON::BI__builtin_neon_vcvtm_u64_v:
8124  case NEON::BI__builtin_neon_vcvtmq_u64_v: {
8125  Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
8126  llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
8127  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
8128  }
8129  case NEON::BI__builtin_neon_vcvtn_s16_v:
8130  case NEON::BI__builtin_neon_vcvtn_s32_v:
8131  case NEON::BI__builtin_neon_vcvtnq_s16_v:
8132  case NEON::BI__builtin_neon_vcvtnq_s32_v:
8133  case NEON::BI__builtin_neon_vcvtn_u16_v:
8134  case NEON::BI__builtin_neon_vcvtn_u32_v:
8135  case NEON::BI__builtin_neon_vcvtnq_u16_v:
8136  case NEON::BI__builtin_neon_vcvtnq_u32_v:
8137  case NEON::BI__builtin_neon_vcvtn_s64_v:
8138  case NEON::BI__builtin_neon_vcvtnq_s64_v:
8139  case NEON::BI__builtin_neon_vcvtn_u64_v:
8140  case NEON::BI__builtin_neon_vcvtnq_u64_v: {
8141  Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
8142  llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
8143  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
8144  }
8145  case NEON::BI__builtin_neon_vcvtp_s16_v:
8146  case NEON::BI__builtin_neon_vcvtp_s32_v:
8147  case NEON::BI__builtin_neon_vcvtpq_s16_v:
8148  case NEON::BI__builtin_neon_vcvtpq_s32_v:
8149  case NEON::BI__builtin_neon_vcvtp_u16_v:
8150  case NEON::BI__builtin_neon_vcvtp_u32_v:
8151  case NEON::BI__builtin_neon_vcvtpq_u16_v:
8152  case NEON::BI__builtin_neon_vcvtpq_u32_v:
8153  case NEON::BI__builtin_neon_vcvtp_s64_v:
8154  case NEON::BI__builtin_neon_vcvtpq_s64_v:
8155  case NEON::BI__builtin_neon_vcvtp_u64_v:
8156  case NEON::BI__builtin_neon_vcvtpq_u64_v: {
8157  Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
8158  llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
8159  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
8160  }
8161  case NEON::BI__builtin_neon_vmulx_v:
8162  case NEON::BI__builtin_neon_vmulxq_v: {
8163  Int = Intrinsic::aarch64_neon_fmulx;
8164  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
8165  }
8166  case NEON::BI__builtin_neon_vmulxh_lane_f16:
8167  case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
8168  // vmulx_lane should be mapped to Neon scalar mulx after
8169  // extracting the scalar element
8170  Ops.push_back(EmitScalarExpr(E->getArg(2)));
8171  Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
8172  Ops.pop_back();
8173  Int = Intrinsic::aarch64_neon_fmulx;
8174  return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmulx");
8175  }
8176  case NEON::BI__builtin_neon_vmul_lane_v:
8177  case NEON::BI__builtin_neon_vmul_laneq_v: {
8178  // v1f64 vmul_lane should be mapped to Neon scalar mul lane
8179  bool Quad = false;
8180  if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
8181  Quad = true;
8182  Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
8183  llvm::Type *VTy = GetNeonType(this,
8184  NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
8185  Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
8186  Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
8187  Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
8188  return Builder.CreateBitCast(Result, Ty);
8189  }
8190  case NEON::BI__builtin_neon_vnegd_s64:
8191  return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
8192  case NEON::BI__builtin_neon_vnegh_f16:
8193  return Builder.CreateFNeg(EmitScalarExpr(E->getArg(0)), "vnegh");
8194  case NEON::BI__builtin_neon_vpmaxnm_v:
8195  case NEON::BI__builtin_neon_vpmaxnmq_v: {
8196  Int = Intrinsic::aarch64_neon_fmaxnmp;
8197  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
8198  }
8199  case NEON::BI__builtin_neon_vpminnm_v:
8200  case NEON::BI__builtin_neon_vpminnmq_v: {
8201  Int = Intrinsic::aarch64_neon_fminnmp;
8202  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
8203  }
8204  case NEON::BI__builtin_neon_vsqrth_f16: {
8205  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8206  Int = Intrinsic::sqrt;
8207  return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt");
8208  }
8209  case NEON::BI__builtin_neon_vsqrt_v:
8210  case NEON::BI__builtin_neon_vsqrtq_v: {
8211  Int = Intrinsic::sqrt;
8212  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8213  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
8214  }
8215  case NEON::BI__builtin_neon_vrbit_v:
8216  case NEON::BI__builtin_neon_vrbitq_v: {
8217  Int = Intrinsic::aarch64_neon_rbit;
8218  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
8219  }
8220  case NEON::BI__builtin_neon_vaddv_u8:
8221  // FIXME: These are handled by the AArch64 scalar code.
8222  usgn = true;
8223  LLVM_FALLTHROUGH;
8224  case NEON::BI__builtin_neon_vaddv_s8: {
8225  Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
8226  Ty = Int32Ty;
8227  VTy = llvm::VectorType::get(Int8Ty, 8);
8228  llvm::Type *Tys[2] = { Ty, VTy };
8229  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8230  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
8231  return Builder.CreateTrunc(Ops[0], Int8Ty);
8232  }
8233  case NEON::BI__builtin_neon_vaddv_u16:
8234  usgn = true;
8235  LLVM_FALLTHROUGH;
8236  case NEON::BI__builtin_neon_vaddv_s16: {
8237  Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
8238  Ty = Int32Ty;
8239  VTy = llvm::VectorType::get(Int16Ty, 4);
8240  llvm::Type *Tys[2] = { Ty, VTy };
8241  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8242  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
8243  return Builder.CreateTrunc(Ops[0], Int16Ty);
8244  }
8245  case NEON::BI__builtin_neon_vaddvq_u8:
8246  usgn = true;
8247  LLVM_FALLTHROUGH;
8248  case NEON::BI__builtin_neon_vaddvq_s8: {
8249  Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
8250  Ty = Int32Ty;
8251  VTy = llvm::VectorType::get(Int8Ty, 16);
8252  llvm::Type *Tys[2] = { Ty, VTy };
8253  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8254  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
8255  return Builder.CreateTrunc(Ops[0], Int8Ty);
8256  }
8257  case NEON::BI__builtin_neon_vaddvq_u16:
8258  usgn = true;
8259  LLVM_FALLTHROUGH;
8260  case NEON::BI__builtin_neon_vaddvq_s16: {
8261  Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
8262  Ty = Int32Ty;
8263  VTy = llvm::VectorType::get(Int16Ty, 8);
8264  llvm::Type *Tys[2] = { Ty, VTy };
8265  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8266  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
8267  return Builder.CreateTrunc(Ops[0], Int16Ty);
8268  }
8269  case NEON::BI__builtin_neon_vmaxv_u8: {
8270  Int = Intrinsic::aarch64_neon_umaxv;
8271  Ty = Int32Ty;
8272  VTy = llvm::VectorType::get(Int8Ty, 8);
8273  llvm::Type *Tys[2] = { Ty, VTy };
8274  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8275  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
8276  return Builder.CreateTrunc(Ops[0], Int8Ty);
8277  }
8278  case NEON::BI__builtin_neon_vmaxv_u16: {
8279  Int = Intrinsic::aarch64_neon_umaxv;
8280  Ty = Int32Ty;
8281  VTy = llvm::VectorType::get(Int16Ty, 4);
8282  llvm::Type *Tys[2] = { Ty, VTy };
8283  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8284  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
8285  return Builder.CreateTrunc(Ops[0], Int16Ty);
8286  }
8287  case NEON::BI__builtin_neon_vmaxvq_u8: {
8288  Int = Intrinsic::aarch64_neon_umaxv;
8289  Ty = Int32Ty;
8290  VTy = llvm::VectorType::get(Int8Ty, 16);
8291  llvm::Type *Tys[2] = { Ty, VTy };
8292  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8293  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
8294  return Builder.CreateTrunc(Ops[0], Int8Ty);
8295  }
8296  case NEON::BI__builtin_neon_vmaxvq_u16: {
8297  Int = Intrinsic::aarch64_neon_umaxv;
8298  Ty = Int32Ty;
8299  VTy = llvm::VectorType::get(Int16Ty, 8);
8300  llvm::Type *Tys[2] = { Ty, VTy };
8301  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8302  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
8303  return Builder.CreateTrunc(Ops[0], Int16Ty);
8304  }
8305  case NEON::BI__builtin_neon_vmaxv_s8: {
8306  Int = Intrinsic::aarch64_neon_smaxv;
8307  Ty = Int32Ty;
8308  VTy = llvm::VectorType::get(Int8Ty, 8);
8309  llvm::Type *Tys[2] = { Ty, VTy };
8310  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8311  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
8312  return Builder.CreateTrunc(Ops[0], Int8Ty);
8313  }
8314  case NEON::BI__builtin_neon_vmaxv_s16: {
8315  Int = Intrinsic::aarch64_neon_smaxv;
8316  Ty = Int32Ty;
8317  VTy = llvm::VectorType::get(Int16Ty, 4);
8318  llvm::Type *Tys[2] = { Ty, VTy };
8319  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8320  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
8321  return Builder.CreateTrunc(Ops[0], Int16Ty);
8322  }
8323  case NEON::BI__builtin_neon_vmaxvq_s8: {
8324  Int = Intrinsic::aarch64_neon_smaxv;
8325  Ty = Int32Ty;
8326  VTy = llvm::VectorType::get(Int8Ty, 16);
8327  llvm::Type *Tys[2] = { Ty, VTy };
8328  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8329  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
8330  return Builder.CreateTrunc(Ops[0], Int8Ty);
8331  }
8332  case NEON::BI__builtin_neon_vmaxvq_s16: {
8333  Int = Intrinsic::aarch64_neon_smaxv;
8334  Ty = Int32Ty;
8335  VTy = llvm::VectorType::get(Int16Ty, 8);
8336  llvm::Type *Tys[2] = { Ty, VTy };
8337  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8338  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
8339  return Builder.CreateTrunc(Ops[0], Int16Ty);
8340  }
8341  case NEON::BI__builtin_neon_vmaxv_f16: {
8342  Int = Intrinsic::aarch64_neon_fmaxv;
8343  Ty = HalfTy;
8344  VTy = llvm::VectorType::get(HalfTy, 4);
8345  llvm::Type *Tys[2] = { Ty, VTy };
8346  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8347  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
8348  return Builder.CreateTrunc(Ops[0], HalfTy);
8349  }
8350  case NEON::BI__builtin_neon_vmaxvq_f16: {
8351  Int = Intrinsic::aarch64_neon_fmaxv;
8352  Ty = HalfTy;
8353  VTy = llvm::VectorType::get(HalfTy, 8);
8354  llvm::Type *Tys[2] = { Ty, VTy };
8355  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8356  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
8357  return Builder.CreateTrunc(Ops[0], HalfTy);
8358  }
8359  case NEON::BI__builtin_neon_vminv_u8: {
8360  Int = Intrinsic::aarch64_neon_uminv;
8361  Ty = Int32Ty;
8362  VTy = llvm::VectorType::get(Int8Ty, 8);
8363  llvm::Type *Tys[2] = { Ty, VTy };
8364  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8365  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
8366  return Builder.CreateTrunc(Ops[0], Int8Ty);
8367  }
8368  case NEON::BI__builtin_neon_vminv_u16: {
8369  Int = Intrinsic::aarch64_neon_uminv;
8370  Ty = Int32Ty;
8371  VTy = llvm::VectorType::get(Int16Ty, 4);
8372  llvm::Type *Tys[2] = { Ty, VTy };
8373  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8374  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
8375  return Builder.CreateTrunc(Ops[0], Int16Ty);
8376  }
8377  case NEON::BI__builtin_neon_vminvq_u8: {
8378  Int = Intrinsic::aarch64_neon_uminv;
8379  Ty = Int32Ty;
8380  VTy = llvm::VectorType::get(Int8Ty, 16);
8381  llvm::Type *Tys[2] = { Ty, VTy };
8382  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8383  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
8384  return Builder.CreateTrunc(Ops[0], Int8Ty);
8385  }
8386  case NEON::BI__builtin_neon_vminvq_u16: {
8387  Int = Intrinsic::aarch64_neon_uminv;
8388  Ty = Int32Ty;
8389  VTy = llvm::VectorType::get(Int16Ty, 8);
8390  llvm::Type *Tys[2] = { Ty, VTy };
8391  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8392  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
8393  return Builder.CreateTrunc(Ops[0], Int16Ty);
8394  }
8395  case NEON::BI__builtin_neon_vminv_s8: {
8396  Int = Intrinsic::aarch64_neon_sminv;
8397  Ty = Int32Ty;
8398  VTy = llvm::VectorType::get(Int8Ty, 8);
8399  llvm::Type *Tys[2] = { Ty, VTy };
8400  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8401  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
8402  return Builder.CreateTrunc(Ops[0], Int8Ty);
8403  }
8404  case NEON::BI__builtin_neon_vminv_s16: {
8405  Int = Intrinsic::aarch64_neon_sminv;
8406  Ty = Int32Ty;
8407  VTy = llvm::VectorType::get(Int16Ty, 4);
8408  llvm::Type *Tys[2] = { Ty, VTy };
8409  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8410  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
8411  return Builder.CreateTrunc(Ops[0], Int16Ty);
8412  }
8413  case NEON::BI__builtin_neon_vminvq_s8: {
8414  Int = Intrinsic::aarch64_neon_sminv;
8415  Ty = Int32Ty;
8416  VTy = llvm::VectorType::get(Int8Ty, 16);
8417  llvm::Type *Tys[2] = { Ty, VTy };
8418  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8419  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
8420  return Builder.CreateTrunc(Ops[0], Int8Ty);
8421  }
8422  case NEON::BI__builtin_neon_vminvq_s16: {
8423  Int = Intrinsic::aarch64_neon_sminv;
8424  Ty = Int32Ty;
8425  VTy = llvm::VectorType::get(Int16Ty, 8);
8426  llvm::Type *Tys[2] = { Ty, VTy };
8427  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8428  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
8429  return Builder.CreateTrunc(Ops[0], Int16Ty);
8430  }
8431  case NEON::BI__builtin_neon_vminv_f16: {
8432  Int = Intrinsic::aarch64_neon_fminv;
8433  Ty = HalfTy;
8434  VTy = llvm::VectorType::get(HalfTy, 4);
8435  llvm::Type *Tys[2] = { Ty, VTy };
8436  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8437  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
8438  return Builder.CreateTrunc(Ops[0], HalfTy);
8439  }
8440  case NEON::BI__builtin_neon_vminvq_f16: {
8441  Int = Intrinsic::aarch64_neon_fminv;
8442  Ty = HalfTy;
8443  VTy = llvm::VectorType::get(HalfTy, 8);
8444  llvm::Type *Tys[2] = { Ty, VTy };
8445  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8446  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
8447  return Builder.CreateTrunc(Ops[0], HalfTy);
8448  }
8449  case NEON::BI__builtin_neon_vmaxnmv_f16: {
8450  Int = Intrinsic::aarch64_neon_fmaxnmv;
8451  Ty = HalfTy;
8452  VTy = llvm::VectorType::get(HalfTy, 4);
8453  llvm::Type *Tys[2] = { Ty, VTy };
8454  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8455  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
8456  return Builder.CreateTrunc(Ops[0], HalfTy);
8457  }
8458  case NEON::BI__builtin_neon_vmaxnmvq_f16: {
8459  Int = Intrinsic::aarch64_neon_fmaxnmv;
8460  Ty = HalfTy;
8461  VTy = llvm::VectorType::get(HalfTy, 8);
8462  llvm::Type *Tys[2] = { Ty, VTy };
8463  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8464  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
8465  return Builder.CreateTrunc(Ops[0], HalfTy);
8466  }
8467  case NEON::BI__builtin_neon_vminnmv_f16: {
8468  Int = Intrinsic::aarch64_neon_fminnmv;
8469  Ty = HalfTy;
8470  VTy = llvm::VectorType::get(HalfTy, 4);
8471  llvm::Type *Tys[2] = { Ty, VTy };
8472  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8473  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
8474  return Builder.CreateTrunc(Ops[0], HalfTy);
8475  }
8476  case NEON::BI__builtin_neon_vminnmvq_f16: {
8477  Int = Intrinsic::aarch64_neon_fminnmv;
8478  Ty = HalfTy;
8479  VTy = llvm::VectorType::get(HalfTy, 8);
8480  llvm::Type *Tys[2] = { Ty, VTy };
8481  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8482  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
8483  return Builder.CreateTrunc(Ops[0], HalfTy);
8484  }
8485  case NEON::BI__builtin_neon_vmul_n_f64: {
8486  Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
8487  Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
8488  return Builder.CreateFMul(Ops[0], RHS);
8489  }
8490  case NEON::BI__builtin_neon_vaddlv_u8: {
8491  Int = Intrinsic::aarch64_neon_uaddlv;
8492  Ty = Int32Ty;
8493  VTy = llvm::VectorType::get(Int8Ty, 8);
8494  llvm::Type *Tys[2] = { Ty, VTy };
8495  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8496  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
8497  return Builder.CreateTrunc(Ops[0], Int16Ty);
8498  }
8499  case NEON::BI__builtin_neon_vaddlv_u16: {
8500  Int = Intrinsic::aarch64_neon_uaddlv;
8501  Ty = Int32Ty;
8502  VTy = llvm::VectorType::get(Int16Ty, 4);
8503  llvm::Type *Tys[2] = { Ty, VTy };
8504  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8505  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
8506  }
8507  case NEON::BI__builtin_neon_vaddlvq_u8: {
8508  Int = Intrinsic::aarch64_neon_uaddlv;
8509  Ty = Int32Ty;
8510  VTy = llvm::VectorType::get(Int8Ty, 16);
8511  llvm::Type *Tys[2] = { Ty, VTy };
8512  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8513  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
8514  return Builder.CreateTrunc(Ops[0], Int16Ty);
8515  }
8516  case NEON::BI__builtin_neon_vaddlvq_u16: {
8517  Int = Intrinsic::aarch64_neon_uaddlv;
8518  Ty = Int32Ty;
8519  VTy = llvm::VectorType::get(Int16Ty, 8);
8520  llvm::Type *Tys[2] = { Ty, VTy };
8521  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8522  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
8523  }
8524  case NEON::BI__builtin_neon_vaddlv_s8: {
8525  Int = Intrinsic::aarch64_neon_saddlv;
8526  Ty = Int32Ty;
8527  VTy = llvm::VectorType::get(Int8Ty, 8);
8528  llvm::Type *Tys[2] = { Ty, VTy };
8529  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8530  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
8531  return Builder.CreateTrunc(Ops[0], Int16Ty);
8532  }
8533  case NEON::BI__builtin_neon_vaddlv_s16: {
8534  Int = Intrinsic::aarch64_neon_saddlv;
8535  Ty = Int32Ty;
8536  VTy = llvm::VectorType::get(Int16Ty, 4);
8537  llvm::Type *Tys[2] = { Ty, VTy };
8538  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8539  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
8540  }
8541  case NEON::BI__builtin_neon_vaddlvq_s8: {
8542  Int = Intrinsic::aarch64_neon_saddlv;
8543  Ty = Int32Ty;
8544  VTy = llvm::VectorType::get(Int8Ty, 16);
8545  llvm::Type *Tys[2] = { Ty, VTy };
8546  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8547  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
8548  return Builder.CreateTrunc(Ops[0], Int16Ty);
8549  }
8550  case NEON::BI__builtin_neon_vaddlvq_s16: {
8551  Int = Intrinsic::aarch64_neon_saddlv;
8552  Ty = Int32Ty;
8553  VTy = llvm::VectorType::get(Int16Ty, 8);
8554  llvm::Type *Tys[2] = { Ty, VTy };
8555  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8556  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
8557  }
8558  case NEON::BI__builtin_neon_vsri_n_v:
8559  case NEON::BI__builtin_neon_vsriq_n_v: {
8560  Int = Intrinsic::aarch64_neon_vsri;
8561  llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
8562  return EmitNeonCall(Intrin, Ops, "vsri_n");
8563  }
8564  case NEON::BI__builtin_neon_vsli_n_v:
8565  case NEON::BI__builtin_neon_vsliq_n_v: {
8566  Int = Intrinsic::aarch64_neon_vsli;
8567  llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
8568  return EmitNeonCall(Intrin, Ops, "vsli_n");
8569  }
8570  case NEON::BI__builtin_neon_vsra_n_v:
8571  case NEON::BI__builtin_neon_vsraq_n_v:
8572  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8573  Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
8574  return Builder.CreateAdd(Ops[0], Ops[1]);
8575  case NEON::BI__builtin_neon_vrsra_n_v:
8576  case NEON::BI__builtin_neon_vrsraq_n_v: {
8577  Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
8579  TmpOps.push_back(Ops[1]);
8580  TmpOps.push_back(Ops[2]);
8581  Function* F = CGM.getIntrinsic(Int, Ty);
8582  llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
8583  Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
8584  return Builder.CreateAdd(Ops[0], tmp);
8585  }
8586  case NEON::BI__builtin_neon_vld1_v:
8587  case NEON::BI__builtin_neon_vld1q_v: {
8588  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
8589  auto Alignment = CharUnits::fromQuantity(
8590  BuiltinID == NEON::BI__builtin_neon_vld1_v ? 8 : 16);
8591  return Builder.CreateAlignedLoad(VTy, Ops[0], Alignment);
8592  }
8593  case NEON::BI__builtin_neon_vst1_v:
8594  case NEON::BI__builtin_neon_vst1q_v:
8595  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
8596  Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
8597  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8598  case NEON::BI__builtin_neon_vld1_lane_v:
8599  case NEON::BI__builtin_neon_vld1q_lane_v: {
8600  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8601  Ty = llvm::PointerType::getUnqual(VTy->getElementType());
8602  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8603  auto Alignment = CharUnits::fromQuantity(
8604  BuiltinID == NEON::BI__builtin_neon_vld1_lane_v ? 8 : 16);
8605  Ops[0] =
8606  Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment);
8607  return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
8608  }
8609  case NEON::BI__builtin_neon_vld1_dup_v:
8610  case NEON::BI__builtin_neon_vld1q_dup_v: {
8611  Value *V = UndefValue::get(Ty);
8612  Ty = llvm::PointerType::getUnqual(VTy->getElementType());
8613  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8614  auto Alignment = CharUnits::fromQuantity(
8615  BuiltinID == NEON::BI__builtin_neon_vld1_dup_v ? 8 : 16);
8616  Ops[0] =
8617  Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment);
8618  llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
8619  Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
8620  return EmitNeonSplat(Ops[0], CI);
8621  }
8622  case NEON::BI__builtin_neon_vst1_lane_v:
8623  case NEON::BI__builtin_neon_vst1q_lane_v:
8624  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8625  Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
8626  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
8627  return Builder.CreateDefaultAlignedStore(Ops[1],
8628  Builder.CreateBitCast(Ops[0], Ty));
8629  case NEON::BI__builtin_neon_vld2_v:
8630  case NEON::BI__builtin_neon_vld2q_v: {
8631  llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
8632  Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
8633  llvm::Type *Tys[2] = { VTy, PTy };
8634  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
8635  Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
8636  Ops[0] = Builder.CreateBitCast(Ops[0],
8637  llvm::PointerType::getUnqual(Ops[1]->getType()));
8638  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8639  }
8640  case NEON::BI__builtin_neon_vld3_v:
8641  case NEON::BI__builtin_neon_vld3q_v: {
8642  llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
8643  Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
8644  llvm::Type *Tys[2] = { VTy, PTy };
8645  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
8646  Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
8647  Ops[0] = Builder.CreateBitCast(Ops[0],
8648  llvm::PointerType::getUnqual(Ops[1]->getType()));
8649  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8650  }
8651  case NEON::BI__builtin_neon_vld4_v:
8652  case NEON::BI__builtin_neon_vld4q_v: {
8653  llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
8654  Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
8655  llvm::Type *Tys[2] = { VTy, PTy };
8656  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
8657  Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
8658  Ops[0] = Builder.CreateBitCast(Ops[0],
8659  llvm::PointerType::getUnqual(Ops[1]->getType()));
8660  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8661  }
8662  case NEON::BI__builtin_neon_vld2_dup_v:
8663  case NEON::BI__builtin_neon_vld2q_dup_v: {
8664  llvm::Type *PTy =
8665  llvm::PointerType::getUnqual(VTy->getElementType());
8666  Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
8667  llvm::Type *Tys[2] = { VTy, PTy };
8668  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
8669  Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
8670  Ops[0] = Builder.CreateBitCast(Ops[0],
8671  llvm::PointerType::getUnqual(Ops[1]->getType()));
8672  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8673  }
8674  case NEON::BI__builtin_neon_vld3_dup_v:
8675  case NEON::BI__builtin_neon_vld3q_dup_v: {
8676  llvm::Type *PTy =
8677  llvm::PointerType::getUnqual(VTy->getElementType());
8678  Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
8679  llvm::Type *Tys[2] = { VTy, PTy };
8680  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
8681  Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
8682  Ops[0] = Builder.CreateBitCast(Ops[0],
8683  llvm::PointerType::getUnqual(Ops[1]->getType()));
8684  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8685  }
8686  case NEON::BI__builtin_neon_vld4_dup_v:
8687  case NEON::BI__builtin_neon_vld4q_dup_v: {
8688  llvm::Type *PTy =
8689  llvm::PointerType::getUnqual(VTy->getElementType());
8690  Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
8691  llvm::Type *Tys[2] = { VTy, PTy };
8692  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
8693  Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
8694  Ops[0] = Builder.CreateBitCast(Ops[0],
8695  llvm::PointerType::getUnqual(Ops[1]->getType()));
8696  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8697  }
8698  case NEON::BI__builtin_neon_vld2_lane_v:
8699  case NEON::BI__builtin_neon_vld2q_lane_v: {
8700  llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
8701  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
8702  Ops.push_back(Ops[1]);
8703  Ops.erase(Ops.begin()+1);
8704  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8705  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8706  Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
8707  Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane");
8708  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
8709  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8710  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8711  }
8712  case NEON::BI__builtin_neon_vld3_lane_v:
8713  case NEON::BI__builtin_neon_vld3q_lane_v: {
8714  llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
8715  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
8716  Ops.push_back(Ops[1]);
8717  Ops.erase(Ops.begin()+1);
8718  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8719  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8720  Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
8721  Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
8722  Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
8723  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
8724  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8725  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8726  }
8727  case NEON::BI__builtin_neon_vld4_lane_v:
8728  case NEON::BI__builtin_neon_vld4q_lane_v: {
8729  llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
8730  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
8731  Ops.push_back(Ops[1]);
8732  Ops.erase(Ops.begin()+1);
8733  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8734  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8735  Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
8736  Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
8737  Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
8738  Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld4_lane");
8739  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
8740  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8741  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8742  }
8743  case NEON::BI__builtin_neon_vst2_v:
8744  case NEON::BI__builtin_neon_vst2q_v: {
8745  Ops.push_back(Ops[0]);
8746  Ops.erase(Ops.begin());
8747  llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
8748  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
8749  Ops, "");
8750  }
8751  case NEON::BI__builtin_neon_vst2_lane_v:
8752  case NEON::BI__builtin_neon_vst2q_lane_v: {
8753  Ops.push_back(Ops[0]);
8754  Ops.erase(Ops.begin());
8755  Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
8756  llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
8757  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
8758  Ops, "");
8759  }
8760  case NEON::BI__builtin_neon_vst3_v:
8761  case NEON::BI__builtin_neon_vst3q_v: {
8762  Ops.push_back(Ops[0]);
8763  Ops.erase(Ops.begin());
8764  llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
8765  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
8766  Ops, "");
8767  }
8768  case NEON::BI__builtin_neon_vst3_lane_v:
8769  case NEON::BI__builtin_neon_vst3q_lane_v: {
8770  Ops.push_back(Ops[0]);
8771  Ops.erase(Ops.begin());
8772  Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
8773  llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
8774  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
8775  Ops, "");
8776  }
8777  case NEON::BI__builtin_neon_vst4_v:
8778  case NEON::BI__builtin_neon_vst4q_v: {
8779  Ops.push_back(Ops[0]);
8780  Ops.erase(Ops.begin());
8781  llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
8782  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
8783  Ops, "");
8784  }
8785  case NEON::BI__builtin_neon_vst4_lane_v:
8786  case NEON::BI__builtin_neon_vst4q_lane_v: {
8787  Ops.push_back(Ops[0]);
8788  Ops.erase(Ops.begin());
8789  Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
8790  llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
8791  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
8792  Ops, "");
8793  }
8794  case NEON::BI__builtin_neon_vtrn_v:
8795  case NEON::BI__builtin_neon_vtrnq_v: {
8796  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
8797  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8798  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8799  Value *SV = nullptr;
8800 
8801  for (unsigned vi = 0; vi != 2; ++vi) {
8802  SmallVector<uint32_t, 16> Indices;
8803  for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
8804  Indices.push_back(i+vi);
8805  Indices.push_back(i+e+vi);
8806  }
8807  Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8808  SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
8809  SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8810  }
8811  return SV;
8812  }
8813  case NEON::BI__builtin_neon_vuzp_v:
8814  case NEON::BI__builtin_neon_vuzpq_v: {
8815  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
8816  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8817  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8818  Value *SV = nullptr;
8819 
8820  for (unsigned vi = 0; vi != 2; ++vi) {
8821  SmallVector<uint32_t, 16> Indices;
8822  for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
8823  Indices.push_back(2*i+vi);
8824 
8825  Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8826  SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
8827  SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8828  }
8829  return SV;
8830  }
8831  case NEON::BI__builtin_neon_vzip_v:
8832  case NEON::BI__builtin_neon_vzipq_v: {
8833  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
8834  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8835  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8836  Value *SV = nullptr;
8837 
8838  for (unsigned vi = 0; vi != 2; ++vi) {
8839  SmallVector<uint32_t, 16> Indices;
8840  for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
8841  Indices.push_back((i + vi*e) >> 1);
8842  Indices.push_back(((i + vi*e) >> 1)+e);
8843  }
8844  Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8845  SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
8846  SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8847  }
8848  return SV;
8849  }
8850  case NEON::BI__builtin_neon_vqtbl1q_v: {
8851  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
8852  Ops, "vtbl1");
8853  }
8854  case NEON::BI__builtin_neon_vqtbl2q_v: {
8855  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
8856  Ops, "vtbl2");
8857  }
8858  case NEON::BI__builtin_neon_vqtbl3q_v: {
8859  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
8860  Ops, "vtbl3");
8861  }
8862  case NEON::BI__builtin_neon_vqtbl4q_v: {
8863  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
8864  Ops, "vtbl4");
8865  }
8866  case NEON::BI__builtin_neon_vqtbx1q_v: {
8867  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
8868  Ops, "vtbx1");
8869  }
8870  case NEON::BI__builtin_neon_vqtbx2q_v: {
8871  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
8872  Ops, "vtbx2");
8873  }
8874  case NEON::BI__builtin_neon_vqtbx3q_v: {
8875  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
8876  Ops, "vtbx3");
8877  }
8878  case NEON::BI__builtin_neon_vqtbx4q_v: {
8879  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
8880  Ops, "vtbx4");
8881  }
8882  case NEON::BI__builtin_neon_vsqadd_v:
8883  case NEON::BI__builtin_neon_vsqaddq_v: {
8884  Int = Intrinsic::aarch64_neon_usqadd;
8885  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
8886  }
8887  case NEON::BI__builtin_neon_vuqadd_v:
8888  case NEON::BI__builtin_neon_vuqaddq_v: {
8889  Int = Intrinsic::aarch64_neon_suqadd;
8890  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
8891  }
8892  case AArch64::BI__iso_volatile_load8:
8893  case AArch64::BI__iso_volatile_load16:
8894  case AArch64::BI__iso_volatile_load32:
8895  case AArch64::BI__iso_volatile_load64:
8896  return EmitISOVolatileLoad(E);
8897  case AArch64::BI__iso_volatile_store8:
8898  case AArch64::BI__iso_volatile_store16:
8899  case AArch64::BI__iso_volatile_store32:
8900  case AArch64::BI__iso_volatile_store64:
8901  return EmitISOVolatileStore(E);
8902  case AArch64::BI_BitScanForward:
8903  case AArch64::BI_BitScanForward64:
8904  return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
8905  case AArch64::BI_BitScanReverse:
8906  case AArch64::BI_BitScanReverse64:
8907  return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
8908  case AArch64::BI_InterlockedAnd64:
8909  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
8910  case AArch64::BI_InterlockedExchange64:
8911  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
8912  case AArch64::BI_InterlockedExchangeAdd64:
8913  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
8914  case AArch64::BI_InterlockedExchangeSub64:
8915  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
8916  case AArch64::BI_InterlockedOr64:
8917  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
8918  case AArch64::BI_InterlockedXor64:
8919  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
8920  case AArch64::BI_InterlockedDecrement64:
8921  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
8922  case AArch64::BI_InterlockedIncrement64:
8923  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
8924  case AArch64::BI_InterlockedExchangeAdd8_acq:
8925  case AArch64::BI_InterlockedExchangeAdd16_acq:
8926  case AArch64::BI_InterlockedExchangeAdd_acq:
8927  case AArch64::BI_InterlockedExchangeAdd64_acq:
8928  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_acq, E);
8929  case AArch64::BI_InterlockedExchangeAdd8_rel:
8930  case AArch64::BI_InterlockedExchangeAdd16_rel:
8931  case AArch64::BI_InterlockedExchangeAdd_rel:
8932  case AArch64::BI_InterlockedExchangeAdd64_rel:
8933  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_rel, E);
8934  case AArch64::BI_InterlockedExchangeAdd8_nf:
8935  case AArch64::BI_InterlockedExchangeAdd16_nf:
8936  case AArch64::BI_InterlockedExchangeAdd_nf:
8937  case AArch64::BI_InterlockedExchangeAdd64_nf:
8938  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_nf, E);
8939  case AArch64::BI_InterlockedExchange8_acq:
8940  case AArch64::BI_InterlockedExchange16_acq:
8941  case AArch64::BI_InterlockedExchange_acq:
8942  case AArch64::BI_InterlockedExchange64_acq:
8943  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_acq, E);
8944  case AArch64::BI_InterlockedExchange8_rel:
8945  case AArch64::BI_InterlockedExchange16_rel:
8946  case AArch64::BI_InterlockedExchange_rel:
8947  case AArch64::BI_InterlockedExchange64_rel:
8948  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_rel, E);
8949  case AArch64::BI_InterlockedExchange8_nf:
8950  case AArch64::BI_InterlockedExchange16_nf:
8951  case AArch64::BI_InterlockedExchange_nf:
8952  case AArch64::BI_InterlockedExchange64_nf:
8953  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_nf, E);
8954  case AArch64::BI_InterlockedCompareExchange8_acq:
8955  case AArch64::BI_InterlockedCompareExchange16_acq:
8956  case AArch64::BI_InterlockedCompareExchange_acq:
8957  case AArch64::BI_InterlockedCompareExchange64_acq:
8958  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_acq, E);
8959  case AArch64::BI_InterlockedCompareExchange8_rel:
8960  case AArch64::BI_InterlockedCompareExchange16_rel:
8961  case AArch64::BI_InterlockedCompareExchange_rel:
8962  case AArch64::BI_InterlockedCompareExchange64_rel:
8963  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_rel, E);
8964  case AArch64::BI_InterlockedCompareExchange8_nf:
8965  case AArch64::BI_InterlockedCompareExchange16_nf:
8966  case AArch64::BI_InterlockedCompareExchange_nf:
8967  case AArch64::BI_InterlockedCompareExchange64_nf:
8968  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_nf, E);
8969  case AArch64::BI_InterlockedOr8_acq:
8970  case AArch64::BI_InterlockedOr16_acq:
8971  case AArch64::BI_InterlockedOr_acq:
8972  case AArch64::BI_InterlockedOr64_acq:
8973  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_acq, E);
8974  case AArch64::BI_InterlockedOr8_rel:
8975  case AArch64::BI_InterlockedOr16_rel:
8976  case AArch64::BI_InterlockedOr_rel:
8977  case AArch64::BI_InterlockedOr64_rel:
8978  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_rel, E);
8979  case AArch64::BI_InterlockedOr8_nf:
8980  case AArch64::BI_InterlockedOr16_nf:
8981  case AArch64::BI_InterlockedOr_nf:
8982  case AArch64::BI_InterlockedOr64_nf:
8983  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_nf, E);
8984  case AArch64::BI_InterlockedXor8_acq:
8985  case AArch64::BI_InterlockedXor16_acq:
8986  case AArch64::BI_InterlockedXor_acq:
8987  case AArch64::BI_InterlockedXor64_acq:
8988  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_acq, E);
8989  case AArch64::BI_InterlockedXor8_rel:
8990  case AArch64::BI_InterlockedXor16_rel:
8991  case AArch64::BI_InterlockedXor_rel:
8992  case AArch64::BI_InterlockedXor64_rel:
8993  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_rel, E);
8994  case AArch64::BI_InterlockedXor8_nf:
8995  case AArch64::BI_InterlockedXor16_nf:
8996  case AArch64::BI_InterlockedXor_nf:
8997  case AArch64::BI_InterlockedXor64_nf:
8998  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_nf, E);
8999  case AArch64::BI_InterlockedAnd8_acq:
9000  case AArch64::BI_InterlockedAnd16_acq:
9001  case AArch64::BI_InterlockedAnd_acq:
9002  case AArch64::BI_InterlockedAnd64_acq:
9003  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_acq, E);
9004  case AArch64::BI_InterlockedAnd8_rel:
9005  case AArch64::BI_InterlockedAnd16_rel:
9006  case AArch64::BI_InterlockedAnd_rel:
9007  case AArch64::BI_InterlockedAnd64_rel:
9008  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_rel, E);
9009  case AArch64::BI_InterlockedAnd8_nf:
9010  case AArch64::BI_InterlockedAnd16_nf:
9011  case AArch64::BI_InterlockedAnd_nf:
9012  case AArch64::BI_InterlockedAnd64_nf:
9013  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_nf, E);
9014  case AArch64::BI_InterlockedIncrement16_acq:
9015  case AArch64::BI_InterlockedIncrement_acq:
9016  case AArch64::BI_InterlockedIncrement64_acq:
9017  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_acq, E);
9018  case AArch64::BI_InterlockedIncrement16_rel:
9019  case AArch64::BI_InterlockedIncrement_rel:
9020  case AArch64::BI_InterlockedIncrement64_rel:
9021  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_rel, E);
9022  case AArch64::BI_InterlockedIncrement16_nf:
9023  case AArch64::BI_InterlockedIncrement_nf:
9024  case AArch64::BI_InterlockedIncrement64_nf:
9025  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_nf, E);
9026  case AArch64::BI_InterlockedDecrement16_acq:
9027  case AArch64::BI_InterlockedDecrement_acq:
9028  case AArch64::BI_InterlockedDecrement64_acq:
9029  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_acq, E);
9030  case AArch64::BI_InterlockedDecrement16_rel:
9031  case AArch64::BI_InterlockedDecrement_rel:
9032  case AArch64::BI_InterlockedDecrement64_rel:
9033  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_rel, E);
9034  case AArch64::BI_InterlockedDecrement16_nf:
9035  case AArch64::BI_InterlockedDecrement_nf:
9036  case AArch64::BI_InterlockedDecrement64_nf:
9037  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_nf, E);
9038 
9039  case AArch64::BI_InterlockedAdd: {
9040  Value *Arg0 = EmitScalarExpr(E->getArg(0));
9041  Value *Arg1 = EmitScalarExpr(E->getArg(1));
9042  AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
9043  AtomicRMWInst::Add, Arg0, Arg1,
9044  llvm::AtomicOrdering::SequentiallyConsistent);
9045  return Builder.CreateAdd(RMWI, Arg1);
9046  }
9047  }
9048 }
9049 
9052  assert((Ops.size() & (Ops.size() - 1)) == 0 &&
9053  "Not a power-of-two sized vector!");
9054  bool AllConstants = true;
9055  for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
9056  AllConstants &= isa<Constant>(Ops[i]);
9057 
9058  // If this is a constant vector, create a ConstantVector.
9059  if (AllConstants) {
9061  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
9062  CstOps.push_back(cast<Constant>(Ops[i]));
9063  return llvm::ConstantVector::get(CstOps);
9064  }
9065 
9066  // Otherwise, insertelement the values to build the vector.
9067  Value *Result =
9068  llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size()));
9069 
9070  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
9071  Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i));
9072 
9073  return Result;
9074 }
9075 
9076 // Convert the mask from an integer type to a vector of i1.
9078  unsigned NumElts) {
9079 
9080  llvm::VectorType *MaskTy = llvm::VectorType::get(CGF.Builder.getInt1Ty(),
9081  cast<IntegerType>(Mask->getType())->getBitWidth());
9082  Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
9083 
9084  // If we have less than 8 elements, then the starting mask was an i8 and
9085  // we need to extract down to the right number of elements.
9086  if (NumElts < 8) {
9087  uint32_t Indices[4];
9088  for (unsigned i = 0; i != NumElts; ++i)
9089  Indices[i] = i;
9090  MaskVec = CGF.Builder.CreateShuffleVector(MaskVec, MaskVec,
9091  makeArrayRef(Indices, NumElts),
9092  "extract");
9093  }
9094  return MaskVec;
9095 }
9096 
9098  ArrayRef<Value *> Ops,
9099  unsigned Align) {
9100  // Cast the pointer to right type.
9101  Value *Ptr = CGF.Builder.CreateBitCast(Ops[0],
9102  llvm::PointerType::getUnqual(Ops[1]->getType()));
9103 
9104  Value *MaskVec = getMaskVecValue(CGF, Ops[2],
9105  Ops[1]->getType()->getVectorNumElements());
9106 
9107  return CGF.Builder.CreateMaskedStore(Ops[1], Ptr, Align, MaskVec);
9108 }
9109 
9111  ArrayRef<Value *> Ops, unsigned Align) {
9112  // Cast the pointer to right type.
9113  Value *Ptr = CGF.Builder.CreateBitCast(Ops[0],
9114  llvm::PointerType::getUnqual(Ops[1]->getType()));
9115 
9116  Value *MaskVec = getMaskVecValue(CGF, Ops[2],
9117  Ops[1]->getType()->getVectorNumElements());
9118 
9119  return CGF.Builder.CreateMaskedLoad(Ptr, Align, MaskVec, Ops[1]);
9120 }
9121 
9123  ArrayRef<Value *> Ops) {
9124  llvm::Type *ResultTy = Ops[1]->getType();
9125  llvm::Type *PtrTy = ResultTy->getVectorElementType();
9126 
9127  // Cast the pointer to element type.
9128  Value *Ptr = CGF.Builder.CreateBitCast(Ops[0],
9129  llvm::PointerType::getUnqual(PtrTy));
9130 
9131  Value *MaskVec = getMaskVecValue(CGF, Ops[2],
9132  ResultTy->getVectorNumElements());
9133 
9134  llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_expandload,
9135  ResultTy);
9136  return CGF.Builder.CreateCall(F, { Ptr, MaskVec, Ops[1] });
9137 }
9138 
9140  ArrayRef<Value *> Ops) {
9141  llvm::Type *ResultTy = Ops[1]->getType();
9142  llvm::Type *PtrTy = ResultTy->getVectorElementType();
9143 
9144  // Cast the pointer to element type.
9145  Value *Ptr = CGF.Builder.CreateBitCast(Ops[0],
9146  llvm::PointerType::getUnqual(PtrTy));
9147 
9148  Value *MaskVec = getMaskVecValue(CGF, Ops[2],
9149  ResultTy->getVectorNumElements());
9150 
9151  llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_compressstore,
9152  ResultTy);
9153  return CGF.Builder.CreateCall(F, { Ops[1], Ptr, MaskVec });
9154 }
9155 
9156 static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc,
9157  ArrayRef<Value *> Ops,
9158  bool InvertLHS = false) {
9159  unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
9160  Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts);
9161  Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts);
9162 
9163  if (InvertLHS)
9164  LHS = CGF.Builder.CreateNot(LHS);
9165 
9166  return CGF.Builder.CreateBitCast(CGF.Builder.CreateBinOp(Opc, LHS, RHS),
9167  Ops[0]->getType());
9168 }
9169 
9171  Value *Amt, bool IsRight) {
9172  llvm::Type *Ty = Op0->getType();
9173 
9174  // Amount may be scalar immediate, in which case create a splat vector.
9175  // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
9176  // we only care about the lowest log2 bits anyway.
9177  if (Amt->getType() != Ty) {
9178  unsigned NumElts = Ty->getVectorNumElements();
9179  Amt = CGF.Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
9180  Amt = CGF.Builder.CreateVectorSplat(NumElts, Amt);
9181  }
9182 
9183  unsigned IID = IsRight ? Intrinsic::fshr : Intrinsic::fshl;
9184  Value *F = CGF.CGM.getIntrinsic(IID, Ty);
9185  return CGF.Builder.CreateCall(F, {Op0, Op1, Amt});
9186 }
9187 
9189  Value *Mask, Value *Op0, Value *Op1) {
9190 
9191  // If the mask is all ones just return first argument.
9192  if (const auto *C = dyn_cast<Constant>(Mask))
9193  if (C->isAllOnesValue())
9194  return Op0;
9195 
9196  Mask = getMaskVecValue(CGF, Mask, Op0->getType()->getVectorNumElements());
9197 
9198  return CGF.Builder.CreateSelect(Mask, Op0, Op1);
9199 }
9200 
9202  Value *Mask, Value *Op0, Value *Op1) {
9203  // If the mask is all ones just return first argument.
9204  if (const auto *C = dyn_cast<Constant>(Mask))
9205  if (C->isAllOnesValue())
9206  return Op0;
9207 
9208  llvm::VectorType *MaskTy =
9209  llvm::VectorType::get(CGF.Builder.getInt1Ty(),
9210  Mask->getType()->getIntegerBitWidth());
9211  Mask = CGF.Builder.CreateBitCast(Mask, MaskTy);
9212  Mask = CGF.Builder.CreateExtractElement(Mask, (uint64_t)0);
9213  return CGF.Builder.CreateSelect(Mask, Op0, Op1);
9214 }
9215 
9217  unsigned NumElts, Value *MaskIn) {
9218  if (MaskIn) {
9219  const auto *C = dyn_cast<Constant>(MaskIn);
9220  if (!C || !C->isAllOnesValue())
9221  Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, MaskIn, NumElts));
9222  }
9223 
9224  if (NumElts < 8) {
9225  uint32_t Indices[8];
9226  for (unsigned i = 0; i != NumElts; ++i)
9227  Indices[i] = i;
9228  for (unsigned i = NumElts; i != 8; ++i)
9229  Indices[i] = i % NumElts + NumElts;
9230  Cmp = CGF.Builder.CreateShuffleVector(
9231  Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
9232  }
9233 
9234  return CGF.Builder.CreateBitCast(Cmp,
9235  IntegerType::get(CGF.getLLVMContext(),
9236  std::max(NumElts, 8U)));
9237 }
9238 
9239 static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC,
9240  bool Signed, ArrayRef<Value *> Ops) {
9241  assert((Ops.size() == 2 || Ops.size() == 4) &&
9242  "Unexpected number of arguments");
9243  unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
9244  Value *Cmp;
9245 
9246  if (CC == 3) {
9247  Cmp = Constant::getNullValue(
9248  llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
9249  } else if (CC == 7) {
9250  Cmp = Constant::getAllOnesValue(
9251  llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
9252  } else {
9253  ICmpInst::Predicate Pred;
9254  switch (CC) {
9255  default: llvm_unreachable("Unknown condition code");
9256  case 0: Pred = ICmpInst::ICMP_EQ; break;
9257  case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
9258  case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
9259  case 4: Pred = ICmpInst::ICMP_NE; break;
9260  case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
9261  case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
9262  }
9263  Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
9264  }
9265 
9266  Value *MaskIn = nullptr;
9267  if (Ops.size() == 4)
9268  MaskIn = Ops[3];
9269 
9270  return EmitX86MaskedCompareResult(CGF, Cmp, NumElts, MaskIn);
9271 }
9272 
9274  Value *Zero = Constant::getNullValue(In->getType());
9275  return EmitX86MaskedCompare(CGF, 1, true, { In, Zero });
9276 }
9277 
9279 
9280  llvm::Type *Ty = Ops[0]->getType();
9281  Value *Zero = llvm::Constant::getNullValue(Ty);
9282  Value *Sub = CGF.Builder.CreateSub(Zero, Ops[0]);
9283  Value *Cmp = CGF.Builder.CreateICmp(ICmpInst::ICMP_SGT, Ops[0], Zero);
9284  Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Sub);
9285  return Res;
9286 }
9287 
9288 static Value *EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred,
9289  ArrayRef<Value *> Ops) {
9290  Value *Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
9291  Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
9292 
9293  assert(Ops.size() == 2);
9294  return Res;
9295 }
9296 
9297 // Lowers X86 FMA intrinsics to IR.
9299  unsigned BuiltinID, bool IsAddSub) {
9300 
9301  bool Subtract = false;
9302  Intrinsic::ID IID = Intrinsic::not_intrinsic;
9303  switch (BuiltinID) {
9304  default: break;
9305  case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
9306  Subtract = true;
9307  LLVM_FALLTHROUGH;
9308  case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
9309  case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
9310  case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
9311  IID = llvm::Intrinsic::x86_avx512_vfmadd_ps_512; break;
9312  case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
9313  Subtract = true;
9314  LLVM_FALLTHROUGH;
9315  case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
9316  case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
9317  case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
9318  IID = llvm::Intrinsic::x86_avx512_vfmadd_pd_512; break;
9319  case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
9320  Subtract = true;
9321  LLVM_FALLTHROUGH;
9322  case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
9323  case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
9324  case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
9325  IID = llvm::Intrinsic::x86_avx512_vfmaddsub_ps_512;
9326  break;
9327  case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
9328  Subtract = true;
9329  LLVM_FALLTHROUGH;
9330  case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
9331  case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
9332  case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
9333  IID = llvm::Intrinsic::x86_avx512_vfmaddsub_pd_512;
9334  break;
9335  }
9336 
9337  Value *A = Ops[0];
9338  Value *B = Ops[1];
9339  Value *C = Ops[2];
9340 
9341  if (Subtract)
9342  C = CGF.Builder.CreateFNeg(C);
9343 
9344  Value *Res;
9345 
9346  // Only handle in case of _MM_FROUND_CUR_DIRECTION/4 (no rounding).
9347  if (IID != Intrinsic::not_intrinsic &&
9348  cast<llvm::ConstantInt>(Ops.back())->getZExtValue() != (uint64_t)4) {
9349  Function *Intr = CGF.CGM.getIntrinsic(IID);
9350  Res = CGF.Builder.CreateCall(Intr, {A, B, C, Ops.back() });
9351  } else {
9352  llvm::Type *Ty = A->getType();
9353  Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty);
9354  Res = CGF.Builder.CreateCall(FMA, {A, B, C} );
9355 
9356  if (IsAddSub) {
9357  // Negate even elts in C using a mask.
9358  unsigned NumElts = Ty->getVectorNumElements();
9359  SmallVector<uint32_t, 16> Indices(NumElts);
9360  for (unsigned i = 0; i != NumElts; ++i)
9361  Indices[i] = i + (i % 2) * NumElts;
9362 
9363  Value *NegC = CGF.Builder.CreateFNeg(C);
9364  Value *FMSub = CGF.Builder.CreateCall(FMA, {A, B, NegC} );
9365  Res = CGF.Builder.CreateShuffleVector(FMSub, Res, Indices);
9366  }
9367  }
9368 
9369  // Handle any required masking.
9370  Value *MaskFalseVal = nullptr;
9371  switch (BuiltinID) {
9372  case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
9373  case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
9374  case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
9375  case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
9376  MaskFalseVal = Ops[0];
9377  break;
9378  case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
9379  case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
9380  case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
9381  case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
9382  MaskFalseVal = Constant::getNullValue(Ops[0]->getType());
9383  break;
9384  case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
9385  case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
9386  case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
9387  case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
9388  case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
9389  case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
9390  case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
9391  case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
9392  MaskFalseVal = Ops[2];
9393  break;
9394  }
9395 
9396  if (MaskFalseVal)
9397  return EmitX86Select(CGF, Ops[3], Res, MaskFalseVal);
9398 
9399  return Res;
9400 }
9401 
9402 static Value *
9404  Value *Upper, bool ZeroMask = false, unsigned PTIdx = 0,
9405  bool NegAcc = false) {
9406  unsigned Rnd = 4;
9407  if (Ops.size() > 4)
9408  Rnd = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
9409 
9410  if (NegAcc)
9411  Ops[2] = CGF.Builder.CreateFNeg(Ops[2]);
9412 
9413  Ops[0] = CGF.Builder.CreateExtractElement(Ops[0], (uint64_t)0);
9414  Ops[1] = CGF.Builder.CreateExtractElement(Ops[1], (uint64_t)0);
9415  Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], (uint64_t)0);
9416  Value *Res;
9417  if (Rnd != 4) {
9418  Intrinsic::ID IID = Ops[0]->getType()->getPrimitiveSizeInBits() == 32 ?
9419  Intrinsic::x86_avx512_vfmadd_f32 :
9420  Intrinsic::x86_avx512_vfmadd_f64;
9421  Res = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
9422  {Ops[0], Ops[1], Ops[2], Ops[4]});
9423  } else {
9424  Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ops[0]->getType());
9425  Res = CGF.Builder.CreateCall(FMA, Ops.slice(0, 3));
9426  }
9427  // If we have more than 3 arguments, we need to do masking.
9428  if (Ops.size() > 3) {
9429  Value *PassThru = ZeroMask ? Constant::getNullValue(Res->getType())
9430  : Ops[PTIdx];
9431 
9432  // If we negated the accumulator and the its the PassThru value we need to
9433  // bypass the negate. Conveniently Upper should be the same thing in this
9434  // case.
9435  if (NegAcc && PTIdx == 2)
9436  PassThru = CGF.Builder.CreateExtractElement(Upper, (uint64_t)0);
9437 
9438  Res = EmitX86ScalarSelect(CGF, Ops[3], Res, PassThru);
9439  }
9440  return CGF.Builder.CreateInsertElement(Upper, Res, (uint64_t)0);
9441 }
9442 
9443 static Value *EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned,
9444  ArrayRef<Value *> Ops) {
9445  llvm::Type *Ty = Ops[0]->getType();
9446  // Arguments have a vXi32 type so cast to vXi64.
9447  Ty = llvm::VectorType::get(CGF.Int64Ty,
9448  Ty->getPrimitiveSizeInBits() / 64);
9449  Value *LHS = CGF.Builder.CreateBitCast(Ops[0], Ty);
9450  Value *RHS = CGF.Builder.CreateBitCast(Ops[1], Ty);
9451 
9452  if (IsSigned) {
9453  // Shift left then arithmetic shift right.
9454  Constant *ShiftAmt = ConstantInt::get(Ty, 32);
9455  LHS = CGF.Builder.CreateShl(LHS, ShiftAmt);
9456  LHS = CGF.Builder.CreateAShr(LHS, ShiftAmt);
9457  RHS = CGF.Builder.CreateShl(RHS, ShiftAmt);
9458  RHS = CGF.Builder.CreateAShr(RHS, ShiftAmt);
9459  } else {
9460  // Clear the upper bits.
9461  Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
9462  LHS = CGF.Builder.CreateAnd(LHS, Mask);
9463  RHS = CGF.Builder.CreateAnd(RHS, Mask);
9464  }
9465 
9466  return CGF.Builder.CreateMul(LHS, RHS);
9467 }
9468 
9469 // Emit a masked pternlog intrinsic. This only exists because the header has to
9470 // use a macro and we aren't able to pass the input argument to a pternlog
9471 // builtin and a select builtin without evaluating it twice.
9472 static Value *EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask,
9473  ArrayRef<Value *> Ops) {
9474  llvm::Type *Ty = Ops[0]->getType();
9475 
9476  unsigned VecWidth = Ty->getPrimitiveSizeInBits();
9477  unsigned EltWidth = Ty->getScalarSizeInBits();
9478  Intrinsic::ID IID;
9479  if (VecWidth == 128 && EltWidth == 32)
9480  IID = Intrinsic::x86_avx512_pternlog_d_128;
9481  else if (VecWidth == 256 && EltWidth == 32)
9482  IID = Intrinsic::x86_avx512_pternlog_d_256;
9483  else if (VecWidth == 512 && EltWidth == 32)
9484  IID = Intrinsic::x86_avx512_pternlog_d_512;
9485  else if (VecWidth == 128 && EltWidth == 64)
9486  IID = Intrinsic::x86_avx512_pternlog_q_128;
9487  else if (VecWidth == 256 && EltWidth == 64)
9488  IID = Intrinsic::x86_avx512_pternlog_q_256;
9489  else if (VecWidth == 512 && EltWidth == 64)
9490  IID = Intrinsic::x86_avx512_pternlog_q_512;
9491  else
9492  llvm_unreachable("Unexpected intrinsic");
9493 
9494  Value *Ternlog = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
9495  Ops.drop_back());
9496  Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty) : Ops[0];
9497  return EmitX86Select(CGF, Ops[4], Ternlog, PassThru);
9498 }
9499 
9501  llvm::Type *DstTy) {
9502  unsigned NumberOfElements = DstTy->getVectorNumElements();
9503  Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements);
9504  return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
9505 }
9506 
9507 // Emit addition or subtraction with signed/unsigned saturation.
9509  ArrayRef<Value *> Ops, bool IsSigned,
9510  bool IsAddition) {
9511  Intrinsic::ID IID =
9512  IsSigned ? (IsAddition ? Intrinsic::sadd_sat : Intrinsic::ssub_sat)
9513  : (IsAddition ? Intrinsic::uadd_sat : Intrinsic::usub_sat);
9514  llvm::Function *F = CGF.CGM.getIntrinsic(IID, Ops[0]->getType());
9515  return CGF.Builder.CreateCall(F, {Ops[0], Ops[1]});
9516 }
9517 
9518 Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) {
9519  const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
9520  StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
9521  return EmitX86CpuIs(CPUStr);
9522 }
9523 
9524 Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) {
9525 
9526  llvm::Type *Int32Ty = Builder.getInt32Ty();
9527 
9528  // Matching the struct layout from the compiler-rt/libgcc structure that is
9529  // filled in:
9530  // unsigned int __cpu_vendor;
9531  // unsigned int __cpu_type;
9532  // unsigned int __cpu_subtype;
9533  // unsigned int __cpu_features[1];
9534  llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
9535  llvm::ArrayType::get(Int32Ty, 1));
9536 
9537  // Grab the global __cpu_model.
9538  llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
9539  cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
9540 
9541  // Calculate the index needed to access the correct field based on the
9542  // range. Also adjust the expected value.
9543  unsigned Index;
9544  unsigned Value;
9545  std::tie(Index, Value) = StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
9546 #define X86_VENDOR(ENUM, STRING) \
9547  .Case(STRING, {0u, static_cast<unsigned>(llvm::X86::ENUM)})
9548 #define X86_CPU_TYPE_COMPAT_WITH_ALIAS(ARCHNAME, ENUM, STR, ALIAS) \
9549  .Cases(STR, ALIAS, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
9550 #define X86_CPU_TYPE_COMPAT(ARCHNAME, ENUM, STR) \
9551  .Case(STR, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
9552 #define X86_CPU_SUBTYPE_COMPAT(ARCHNAME, ENUM, STR) \
9553  .Case(STR, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
9554 #include "llvm/Support/X86TargetParser.def"
9555  .Default({0, 0});
9556  assert(Value != 0 && "Invalid CPUStr passed to CpuIs");
9557 
9558  // Grab the appropriate field from __cpu_model.
9559  llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0),
9560  ConstantInt::get(Int32Ty, Index)};
9561  llvm::Value *CpuValue = Builder.CreateGEP(STy, CpuModel, Idxs);
9562  CpuValue = Builder.CreateAlignedLoad(CpuValue, CharUnits::fromQuantity(4));
9563 
9564  // Check the value of the field against the requested value.
9565  return Builder.CreateICmpEQ(CpuValue,
9566  llvm::ConstantInt::get(Int32Ty, Value));
9567 }
9568 
9569 Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) {
9570  const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
9571  StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
9572  return EmitX86CpuSupports(FeatureStr);
9573 }
9574 
9575 uint64_t
9577  // Processor features and mapping to processor feature value.
9578  uint64_t FeaturesMask = 0;
9579  for (const StringRef &FeatureStr : FeatureStrs) {
9580  unsigned Feature =
9581  StringSwitch<unsigned>(FeatureStr)
9582 #define X86_FEATURE_COMPAT(VAL, ENUM, STR) .Case(STR, VAL)
9583 #include "llvm/Support/X86TargetParser.def"
9584  ;
9585  FeaturesMask |= (1ULL << Feature);
9586  }
9587  return FeaturesMask;
9588 }
9589 
9590 Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) {
9591  return EmitX86CpuSupports(GetX86CpuSupportsMask(FeatureStrs));
9592 }
9593 
9594 llvm::Value *CodeGenFunction::EmitX86CpuSupports(uint64_t FeaturesMask) {
9595  uint32_t Features1 = Lo_32(FeaturesMask);
9596  uint32_t Features2 = Hi_32(FeaturesMask);
9597 
9598  Value *Result = Builder.getTrue();
9599 
9600  if (Features1 != 0) {
9601  // Matching the struct layout from the compiler-rt/libgcc structure that is
9602  // filled in:
9603  // unsigned int __cpu_vendor;
9604  // unsigned int __cpu_type;
9605  // unsigned int __cpu_subtype;
9606  // unsigned int __cpu_features[1];
9607  llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
9608  llvm::ArrayType::get(Int32Ty, 1));
9609 
9610  // Grab the global __cpu_model.
9611  llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
9612  cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
9613 
9614  // Grab the first (0th) element from the field __cpu_features off of the
9615  // global in the struct STy.
9616  Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(3),
9617  Builder.getInt32(0)};
9618  Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs);
9619  Value *Features =
9620  Builder.CreateAlignedLoad(CpuFeatures, CharUnits::fromQuantity(4));
9621 
9622  // Check the value of the bit corresponding to the feature requested.
9623  Value *Mask = Builder.getInt32(Features1);
9624  Value *Bitset = Builder.CreateAnd(Features, Mask);
9625  Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
9626  Result = Builder.CreateAnd(Result, Cmp);
9627  }
9628 
9629  if (Features2 != 0) {
9630  llvm::Constant *CpuFeatures2 = CGM.CreateRuntimeVariable(Int32Ty,
9631  "__cpu_features2");
9632  cast<llvm::GlobalValue>(CpuFeatures2)->setDSOLocal(true);
9633 
9634  Value *Features =
9635  Builder.CreateAlignedLoad(CpuFeatures2, CharUnits::fromQuantity(4));
9636 
9637  // Check the value of the bit corresponding to the feature requested.
9638  Value *Mask = Builder.getInt32(Features2);
9639  Value *Bitset = Builder.CreateAnd(Features, Mask);
9640  Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
9641  Result = Builder.CreateAnd(Result, Cmp);
9642  }
9643 
9644  return Result;
9645 }
9646 
9647 Value *CodeGenFunction::EmitX86CpuInit() {
9648  llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy,
9649  /*Variadic*/ false);
9650  llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, "__cpu_indicator_init");
9651  cast<llvm::GlobalValue>(Func)->setDSOLocal(true);
9652  cast<llvm::GlobalValue>(Func)->setDLLStorageClass(
9653  llvm::GlobalValue::DefaultStorageClass);
9654  return Builder.CreateCall(Func);
9655 }
9656 
9658  const CallExpr *E) {
9659  if (BuiltinID == X86::BI__builtin_cpu_is)
9660  return EmitX86CpuIs(E);
9661  if (BuiltinID == X86::BI__builtin_cpu_supports)
9662  return EmitX86CpuSupports(E);
9663  if (BuiltinID == X86::BI__builtin_cpu_init)
9664  return EmitX86CpuInit();
9665 
9667 
9668  // Find out if any arguments are required to be integer constant expressions.
9669  unsigned ICEArguments = 0;
9671  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
9672  assert(Error == ASTContext::GE_None && "Should not codegen an error");
9673 
9674  for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
9675  // If this is a normal argument, just emit it as a scalar.
9676  if ((ICEArguments & (1 << i)) == 0) {
9677  Ops.push_back(EmitScalarExpr(E->getArg(i)));
9678  continue;
9679  }
9680 
9681  // If this is required to be a constant, constant fold it so that we know
9682  // that the generated intrinsic gets a ConstantInt.
9683  llvm::APSInt Result;
9684  bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
9685  assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
9686  Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
9687  }
9688 
9689  // These exist so that the builtin that takes an immediate can be bounds
9690  // checked by clang to avoid passing bad immediates to the backend. Since
9691  // AVX has a larger immediate than SSE we would need separate builtins to
9692  // do the different bounds checking. Rather than create a clang specific
9693  // SSE only builtin, this implements eight separate builtins to match gcc
9694  // implementation.
9695  auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
9696  Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
9697  llvm::Function *F = CGM.getIntrinsic(ID);
9698  return Builder.CreateCall(F, Ops);
9699  };
9700 
9701  // For the vector forms of FP comparisons, translate the builtins directly to
9702  // IR.
9703  // TODO: The builtins could be removed if the SSE header files used vector
9704  // extension comparisons directly (vector ordered/unordered may need
9705  // additional support via __builtin_isnan()).
9706  auto getVectorFCmpIR = [this, &Ops](CmpInst::Predicate Pred) {
9707  Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
9708  llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
9709  llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
9710  Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
9711  return Builder.CreateBitCast(Sext, FPVecTy);
9712  };
9713 
9714  switch (BuiltinID) {
9715  default: return nullptr;
9716  case X86::BI_mm_prefetch: {
9717  Value *Address = Ops[0];
9718  ConstantInt *C = cast<ConstantInt>(Ops[1]);
9719  Value *RW = ConstantInt::get(Int32Ty, (C->getZExtValue() >> 2) & 0x1);
9720  Value *Locality = ConstantInt::get(Int32Ty, C->getZExtValue() & 0x3);
9721  Value *Data = ConstantInt::get(Int32Ty, 1);
9722  Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
9723  return Builder.CreateCall(F, {Address, RW, Locality, Data});
9724  }
9725  case X86::BI_mm_clflush: {
9726  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush),
9727  Ops[0]);
9728  }
9729  case X86::BI_mm_lfence: {
9730  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence));
9731  }
9732  case X86::BI_mm_mfence: {
9733  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence));
9734  }
9735  case X86::BI_mm_sfence: {
9736  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence));
9737  }
9738  case X86::BI_mm_pause: {
9739  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause));
9740  }
9741  case X86::BI__rdtsc: {
9742  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc));
9743  }
9744  case X86::BI__builtin_ia32_rdtscp: {
9745  Value *Call = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtscp));
9746  Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
9747  Ops[0]);
9748  return Builder.CreateExtractValue(Call, 0);
9749  }
9750  case X86::BI__builtin_ia32_lzcnt_u16:
9751  case X86::BI__builtin_ia32_lzcnt_u32:
9752  case X86::BI__builtin_ia32_lzcnt_u64: {
9753  Value *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
9754  return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
9755  }
9756  case X86::BI__builtin_ia32_tzcnt_u16:
9757  case X86::BI__builtin_ia32_tzcnt_u32:
9758  case X86::BI__builtin_ia32_tzcnt_u64: {
9759  Value *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType());
9760  return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
9761  }
9762  case X86::BI__builtin_ia32_undef128:
9763  case X86::BI__builtin_ia32_undef256:
9764  case X86::BI__builtin_ia32_undef512:
9765  // The x86 definition of "undef" is not the same as the LLVM definition
9766  // (PR32176). We leave optimizing away an unnecessary zero constant to the
9767  // IR optimizer and backend.
9768  // TODO: If we had a "freeze" IR instruction to generate a fixed undef
9769  // value, we should use that here instead of a zero.
9770  return llvm::Constant::getNullValue(ConvertType(E->getType()));
9771  case X86::BI__builtin_ia32_vec_init_v8qi:
9772  case X86::BI__builtin_ia32_vec_init_v4hi:
9773  case X86::BI__builtin_ia32_vec_init_v2si:
9774  return Builder.CreateBitCast(BuildVector(Ops),
9775  llvm::Type::getX86_MMXTy(getLLVMContext()));
9776  case X86::BI__builtin_ia32_vec_ext_v2si:
9777  case X86::BI__builtin_ia32_vec_ext_v16qi:
9778  case X86::BI__builtin_ia32_vec_ext_v8hi:
9779  case X86::BI__builtin_ia32_vec_ext_v4si:
9780  case X86::BI__builtin_ia32_vec_ext_v4sf:
9781  case X86::BI__builtin_ia32_vec_ext_v2di:
9782  case X86::BI__builtin_ia32_vec_ext_v32qi:
9783  case X86::BI__builtin_ia32_vec_ext_v16hi:
9784  case X86::BI__builtin_ia32_vec_ext_v8si:
9785  case X86::BI__builtin_ia32_vec_ext_v4di: {
9786  unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
9787  uint64_t Index = cast<ConstantInt>(Ops[1])->getZExtValue();
9788  Index &= NumElts - 1;
9789  // These builtins exist so we can ensure the index is an ICE and in range.
9790  // Otherwise we could just do this in the header file.
9791  return Builder.CreateExtractElement(Ops[0], Index);
9792  }
9793  case X86::BI__builtin_ia32_vec_set_v16qi:
9794  case X86::BI__builtin_ia32_vec_set_v8hi:
9795  case X86::BI__builtin_ia32_vec_set_v4si:
9796  case X86::BI__builtin_ia32_vec_set_v2di:
9797  case X86::BI__builtin_ia32_vec_set_v32qi:
9798  case X86::BI__builtin_ia32_vec_set_v16hi:
9799  case X86::BI__builtin_ia32_vec_set_v8si:
9800  case X86::BI__builtin_ia32_vec_set_v4di: {
9801  unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
9802  unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
9803  Index &= NumElts - 1;
9804  // These builtins exist so we can ensure the index is an ICE and in range.
9805  // Otherwise we could just do this in the header file.
9806  return Builder.CreateInsertElement(Ops[0], Ops[1], Index);
9807  }
9808  case X86::BI_mm_setcsr:
9809  case X86::BI__builtin_ia32_ldmxcsr: {
9810  Address Tmp = CreateMemTemp(E->getArg(0)->getType());
9811  Builder.CreateStore(Ops[0], Tmp);
9812  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
9813  Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
9814  }
9815  case X86::BI_mm_getcsr:
9816  case X86::BI__builtin_ia32_stmxcsr: {
9817  Address Tmp = CreateMemTemp(E->getType());
9818  Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
9819  Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
9820  return Builder.CreateLoad(Tmp, "stmxcsr");
9821  }
9822  case X86::BI__builtin_ia32_xsave:
9823  case X86::BI__builtin_ia32_xsave64:
9824  case X86::BI__builtin_ia32_xrstor:
9825  case X86::BI__builtin_ia32_xrstor64:
9826  case X86::BI__builtin_ia32_xsaveopt:
9827  case X86::BI__builtin_ia32_xsaveopt64:
9828  case X86::BI__builtin_ia32_xrstors:
9829  case X86::BI__builtin_ia32_xrstors64:
9830  case X86::BI__builtin_ia32_xsavec:
9831  case X86::BI__builtin_ia32_xsavec64:
9832  case X86::BI__builtin_ia32_xsaves:
9833  case X86::BI__builtin_ia32_xsaves64: {
9834  Intrinsic::ID ID;
9835 #define INTRINSIC_X86_XSAVE_ID(NAME) \
9836  case X86::BI__builtin_ia32_##NAME: \
9837  ID = Intrinsic::x86_##NAME; \
9838  break
9839  switch (BuiltinID) {
9840  default: llvm_unreachable("Unsupported intrinsic!");
9841  INTRINSIC_X86_XSAVE_ID(xsave);
9842  INTRINSIC_X86_XSAVE_ID(xsave64);
9843  INTRINSIC_X86_XSAVE_ID(xrstor);
9844  INTRINSIC_X86_XSAVE_ID(xrstor64);
9845  INTRINSIC_X86_XSAVE_ID(xsaveopt);
9846  INTRINSIC_X86_XSAVE_ID(xsaveopt64);
9847  INTRINSIC_X86_XSAVE_ID(xrstors);
9848  INTRINSIC_X86_XSAVE_ID(xrstors64);
9849  INTRINSIC_X86_XSAVE_ID(xsavec);
9850  INTRINSIC_X86_XSAVE_ID(xsavec64);
9851  INTRINSIC_X86_XSAVE_ID(xsaves);
9852  INTRINSIC_X86_XSAVE_ID(xsaves64);
9853  }
9854 #undef INTRINSIC_X86_XSAVE_ID
9855  Value *Mhi = Builder.CreateTrunc(
9856  Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);
9857  Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);
9858  Ops[1] = Mhi;
9859  Ops.push_back(Mlo);
9860  return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
9861  }
9862  case X86::BI__builtin_ia32_storedqudi128_mask:
9863  case X86::BI__builtin_ia32_storedqusi128_mask:
9864  case X86::BI__builtin_ia32_storedquhi128_mask:
9865  case X86::BI__builtin_ia32_storedquqi128_mask:
9866  case X86::BI__builtin_ia32_storeupd128_mask:
9867  case X86::BI__builtin_ia32_storeups128_mask:
9868  case X86::BI__builtin_ia32_storedqudi256_mask:
9869  case X86::BI__builtin_ia32_storedqusi256_mask:
9870  case X86::BI__builtin_ia32_storedquhi256_mask:
9871  case X86::BI__builtin_ia32_storedquqi256_mask:
9872  case X86::BI__builtin_ia32_storeupd256_mask:
9873  case X86::BI__builtin_ia32_storeups256_mask:
9874  case X86::BI__builtin_ia32_storedqudi512_mask:
9875  case X86::BI__builtin_ia32_storedqusi512_mask:
9876  case X86::BI__builtin_ia32_storedquhi512_mask:
9877  case X86::BI__builtin_ia32_storedquqi512_mask:
9878  case X86::BI__builtin_ia32_storeupd512_mask:
9879  case X86::BI__builtin_ia32_storeups512_mask:
9880  return EmitX86MaskedStore(*this, Ops, 1);
9881 
9882  case X86::BI__builtin_ia32_storess128_mask:
9883  case X86::BI__builtin_ia32_storesd128_mask: {
9884  return EmitX86MaskedStore(*this, Ops, 1);
9885  }
9886  case X86::BI__builtin_ia32_vpopcntb_128:
9887  case X86::BI__builtin_ia32_vpopcntd_128:
9888  case X86::BI__builtin_ia32_vpopcntq_128:
9889  case X86::BI__builtin_ia32_vpopcntw_128:
9890  case X86::BI__builtin_ia32_vpopcntb_256:
9891  case X86::BI__builtin_ia32_vpopcntd_256:
9892  case X86::BI__builtin_ia32_vpopcntq_256:
9893  case X86::BI__builtin_ia32_vpopcntw_256:
9894  case X86::BI__builtin_ia32_vpopcntb_512:
9895  case X86::BI__builtin_ia32_vpopcntd_512:
9896  case X86::BI__builtin_ia32_vpopcntq_512:
9897  case X86::BI__builtin_ia32_vpopcntw_512: {
9898  llvm::Type *ResultType = ConvertType(E->getType());
9899  llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
9900  return Builder.CreateCall(F, Ops);
9901  }
9902  case X86::BI__builtin_ia32_cvtmask2b128:
9903  case X86::BI__builtin_ia32_cvtmask2b256:
9904  case X86::BI__builtin_ia32_cvtmask2b512:
9905  case X86::BI__builtin_ia32_cvtmask2w128:
9906  case X86::BI__builtin_ia32_cvtmask2w256:
9907  case X86::BI__builtin_ia32_cvtmask2w512:
9908  case X86::BI__builtin_ia32_cvtmask2d128:
9909  case X86::BI__builtin_ia32_cvtmask2d256:
9910  case X86::BI__builtin_ia32_cvtmask2d512:
9911  case X86::BI__builtin_ia32_cvtmask2q128:
9912  case X86::BI__builtin_ia32_cvtmask2q256:
9913  case X86::BI__builtin_ia32_cvtmask2q512:
9914  return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType()));
9915 
9916  case X86::BI__builtin_ia32_cvtb2mask128:
9917  case X86::BI__builtin_ia32_cvtb2mask256:
9918  case X86::BI__builtin_ia32_cvtb2mask512:
9919  case X86::BI__builtin_ia32_cvtw2mask128:
9920  case X86::BI__builtin_ia32_cvtw2mask256:
9921  case X86::BI__builtin_ia32_cvtw2mask512:
9922  case X86::BI__builtin_ia32_cvtd2mask128:
9923  case X86::BI__builtin_ia32_cvtd2mask256:
9924  case X86::BI__builtin_ia32_cvtd2mask512:
9925  case X86::BI__builtin_ia32_cvtq2mask128:
9926  case X86::BI__builtin_ia32_cvtq2mask256:
9927  case X86::BI__builtin_ia32_cvtq2mask512:
9928  return EmitX86ConvertToMask(*this, Ops[0]);
9929 
9930  case X86::BI__builtin_ia32_vfmaddss3:
9931  case X86::BI__builtin_ia32_vfmaddsd3:
9932  case X86::BI__builtin_ia32_vfmaddss3_mask:
9933  case X86::BI__builtin_ia32_vfmaddsd3_mask:
9934  return EmitScalarFMAExpr(*this, Ops, Ops[0]);
9935  case X86::BI__builtin_ia32_vfmaddss:
9936  case X86::BI__builtin_ia32_vfmaddsd:
9937  return EmitScalarFMAExpr(*this, Ops,
9938  Constant::getNullValue(Ops[0]->getType()));
9939  case X86::BI__builtin_ia32_vfmaddss3_maskz:
9940  case X86::BI__builtin_ia32_vfmaddsd3_maskz:
9941  return EmitScalarFMAExpr(*this, Ops, Ops[0], /*ZeroMask*/true);
9942  case X86::BI__builtin_ia32_vfmaddss3_mask3:
9943  case X86::BI__builtin_ia32_vfmaddsd3_mask3:
9944  return EmitScalarFMAExpr(*this, Ops, Ops[2], /*ZeroMask*/false, 2);
9945  case X86::BI__builtin_ia32_vfmsubss3_mask3:
9946  case X86::BI__builtin_ia32_vfmsubsd3_mask3:
9947  return EmitScalarFMAExpr(*this, Ops, Ops[2], /*ZeroMask*/false, 2,
9948  /*NegAcc*/true);
9949  case X86::BI__builtin_ia32_vfmaddps:
9950  case X86::BI__builtin_ia32_vfmaddpd:
9951  case X86::BI__builtin_ia32_vfmaddps256:
9952  case X86::BI__builtin_ia32_vfmaddpd256:
9953  case X86::BI__builtin_ia32_vfmaddps512_mask:
9954  case X86::BI__builtin_ia32_vfmaddps512_maskz:
9955  case X86::BI__builtin_ia32_vfmaddps512_mask3:
9956  case X86::BI__builtin_ia32_vfmsubps512_mask3:
9957  case X86::BI__builtin_ia32_vfmaddpd512_mask:
9958  case X86::BI__builtin_ia32_vfmaddpd512_maskz:
9959  case X86::BI__builtin_ia32_vfmaddpd512_mask3:
9960  case X86::BI__builtin_ia32_vfmsubpd512_mask3:
9961  return EmitX86FMAExpr(*this, Ops, BuiltinID, /*IsAddSub*/false);
9962  case X86::BI__builtin_ia32_vfmaddsubps:
9963  case X86::BI__builtin_ia32_vfmaddsubpd:
9964  case X86::BI__builtin_ia32_vfmaddsubps256:
9965  case X86::BI__builtin_ia32_vfmaddsubpd256:
9966  case X86::BI__builtin_ia32_vfmaddsubps512_mask:
9967  case X86::BI__builtin_ia32_vfmaddsubps512_maskz:
9968  case X86::BI__builtin_ia32_vfmaddsubps512_mask3:
9969  case X86::BI__builtin_ia32_vfmsubaddps512_mask3:
9970  case X86::BI__builtin_ia32_vfmaddsubpd512_mask:
9971  case X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
9972  case X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
9973  case X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
9974  return EmitX86FMAExpr(*this, Ops, BuiltinID, /*IsAddSub*/true);
9975 
9976  case X86::BI__builtin_ia32_movdqa32store128_mask:
9977  case X86::BI__builtin_ia32_movdqa64store128_mask:
9978  case X86::BI__builtin_ia32_storeaps128_mask:
9979  case X86::BI__builtin_ia32_storeapd128_mask:
9980  case X86::BI__builtin_ia32_movdqa32store256_mask:
9981  case X86::BI__builtin_ia32_movdqa64store256_mask:
9982  case X86::BI__builtin_ia32_storeaps256_mask:
9983  case X86::BI__builtin_ia32_storeapd256_mask:
9984  case X86::BI__builtin_ia32_movdqa32store512_mask:
9985  case X86::BI__builtin_ia32_movdqa64store512_mask:
9986  case X86::BI__builtin_ia32_storeaps512_mask:
9987  case X86::BI__builtin_ia32_storeapd512_mask: {
9988  unsigned Align =
9989  getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
9990  return EmitX86MaskedStore(*this, Ops, Align);
9991  }
9992  case X86::BI__builtin_ia32_loadups128_mask:
9993  case X86::BI__builtin_ia32_loadups256_mask:
9994  case X86::BI__builtin_ia32_loadups512_mask:
9995  case X86::BI__builtin_ia32_loadupd128_mask:
9996  case X86::BI__builtin_ia32_loadupd256_mask:
9997  case X86::BI__builtin_ia32_loadupd512_mask:
9998  case X86::BI__builtin_ia32_loaddquqi128_mask:
9999  case X86::BI__builtin_ia32_loaddquqi256_mask:
10000  case X86::BI__builtin_ia32_loaddquqi512_mask:
10001  case X86::BI__builtin_ia32_loaddquhi128_mask:
10002  case X86::BI__builtin_ia32_loaddquhi256_mask:
10003  case X86::BI__builtin_ia32_loaddquhi512_mask:
10004  case X86::BI__builtin_ia32_loaddqusi128_mask:
10005  case X86::BI__builtin_ia32_loaddqusi256_mask:
10006  case X86::BI__builtin_ia32_loaddqusi512_mask:
10007  case X86::BI__builtin_ia32_loaddqudi128_mask:
10008  case X86::BI__builtin_ia32_loaddqudi256_mask:
10009  case X86::BI__builtin_ia32_loaddqudi512_mask:
10010  return EmitX86MaskedLoad(*this, Ops, 1);
10011 
10012  case X86::BI__builtin_ia32_loadss128_mask:
10013  case X86::BI__builtin_ia32_loadsd128_mask:
10014  return EmitX86MaskedLoad(*this, Ops, 1);
10015 
10016  case X86::BI__builtin_ia32_loadaps128_mask:
10017  case X86::BI__builtin_ia32_loadaps256_mask:
10018  case X86::BI__builtin_ia32_loadaps512_mask:
10019  case X86::BI__builtin_ia32_loadapd128_mask:
10020  case X86::BI__builtin_ia32_loadapd256_mask:
10021  case X86::BI__builtin_ia32_loadapd512_mask:
10022  case X86::BI__builtin_ia32_movdqa32load128_mask:
10023  case X86::BI__builtin_ia32_movdqa32load256_mask:
10024  case X86::BI__builtin_ia32_movdqa32load512_mask:
10025  case X86::BI__builtin_ia32_movdqa64load128_mask:
10026  case X86::BI__builtin_ia32_movdqa64load256_mask:
10027  case X86::BI__builtin_ia32_movdqa64load512_mask: {
10028  unsigned Align =
10029  getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
10030  return EmitX86MaskedLoad(*this, Ops, Align);
10031  }
10032 
10033  case X86::BI__builtin_ia32_expandloaddf128_mask:
10034  case X86::BI__builtin_ia32_expandloaddf256_mask:
10035  case X86::BI__builtin_ia32_expandloaddf512_mask:
10036  case X86::BI__builtin_ia32_expandloadsf128_mask:
10037  case X86::BI__builtin_ia32_expandloadsf256_mask:
10038  case X86::BI__builtin_ia32_expandloadsf512_mask:
10039  case X86::BI__builtin_ia32_expandloaddi128_mask:
10040  case X86::BI__builtin_ia32_expandloaddi256_mask:
10041  case X86::BI__builtin_ia32_expandloaddi512_mask:
10042  case X86::BI__builtin_ia32_expandloadsi128_mask:
10043  case X86::BI__builtin_ia32_expandloadsi256_mask:
10044  case X86::BI__builtin_ia32_expandloadsi512_mask:
10045  case X86::BI__builtin_ia32_expandloadhi128_mask:
10046  case X86::BI__builtin_ia32_expandloadhi256_mask:
10047  case X86::BI__builtin_ia32_expandloadhi512_mask:
10048  case X86::BI__builtin_ia32_expandloadqi128_mask:
10049  case X86::BI__builtin_ia32_expandloadqi256_mask:
10050  case X86::BI__builtin_ia32_expandloadqi512_mask:
10051  return EmitX86ExpandLoad(*this, Ops);
10052 
10053  case X86::BI__builtin_ia32_compressstoredf128_mask:
10054  case X86::BI__builtin_ia32_compressstoredf256_mask:
10055  case X86::BI__builtin_ia32_compressstoredf512_mask:
10056  case X86::BI__builtin_ia32_compressstoresf128_mask:
10057  case X86::BI__builtin_ia32_compressstoresf256_mask:
10058  case X86::BI__builtin_ia32_compressstoresf512_mask:
10059  case X86::BI__builtin_ia32_compressstoredi128_mask:
10060  case X86::BI__builtin_ia32_compressstoredi256_mask:
10061  case X86::BI__builtin_ia32_compressstoredi512_mask:
10062  case X86::BI__builtin_ia32_compressstoresi128_mask:
10063  case X86::BI__builtin_ia32_compressstoresi256_mask:
10064  case X86::BI__builtin_ia32_compressstoresi512_mask:
10065  case X86::BI__builtin_ia32_compressstorehi128_mask:
10066  case X86::BI__builtin_ia32_compressstorehi256_mask:
10067  case X86::BI__builtin_ia32_compressstorehi512_mask:
10068  case X86::BI__builtin_ia32_compressstoreqi128_mask:
10069  case X86::BI__builtin_ia32_compressstoreqi256_mask:
10070  case X86::BI__builtin_ia32_compressstoreqi512_mask:
10071  return EmitX86CompressStore(*this, Ops);
10072 
10073  case X86::BI__builtin_ia32_storehps:
10074  case X86::BI__builtin_ia32_storelps: {
10075  llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty);
10076  llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
10077 
10078  // cast val v2i64
10079  Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast");
10080 
10081  // extract (0, 1)
10082  unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1;
10083  Ops[1] = Builder.CreateExtractElement(Ops[1], Index, "extract");
10084 
10085  // cast pointer to i64 & store
10086  Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy);
10087  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
10088  }
10089  case X86::BI__builtin_ia32_vextractf128_pd256:
10090  case X86::BI__builtin_ia32_vextractf128_ps256:
10091  case X86::BI__builtin_ia32_vextractf128_si256:
10092  case X86::BI__builtin_ia32_extract128i256:
10093  case X86::BI__builtin_ia32_extractf64x4_mask:
10094  case X86::BI__builtin_ia32_extractf32x4_mask:
10095  case X86::BI__builtin_ia32_extracti64x4_mask:
10096  case X86::BI__builtin_ia32_extracti32x4_mask:
10097  case X86::BI__builtin_ia32_extractf32x8_mask:
10098  case X86::BI__builtin_ia32_extracti32x8_mask:
10099  case X86::BI__builtin_ia32_extractf32x4_256_mask:
10100  case X86::BI__builtin_ia32_extracti32x4_256_mask:
10101  case X86::BI__builtin_ia32_extractf64x2_256_mask:
10102  case X86::BI__builtin_ia32_extracti64x2_256_mask:
10103  case X86::BI__builtin_ia32_extractf64x2_512_mask:
10104  case X86::BI__builtin_ia32_extracti64x2_512_mask: {
10105  llvm::Type *DstTy = ConvertType(E->getType());
10106  unsigned NumElts = DstTy->getVectorNumElements();
10107  unsigned SrcNumElts = Ops[0]->getType()->getVectorNumElements();
10108  unsigned SubVectors = SrcNumElts / NumElts;
10109  unsigned Index = cast<ConstantInt>(Ops[1])->getZExtValue();
10110  assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
10111  Index &= SubVectors - 1; // Remove any extra bits.
10112  Index *= NumElts;
10113 
10114  uint32_t Indices[16];
10115  for (unsigned i = 0; i != NumElts; ++i)
10116  Indices[i] = i + Index;
10117 
10118  Value *Res = Builder.CreateShuffleVector(Ops[0],
10119  UndefValue::get(Ops[0]->getType()),
10120  makeArrayRef(Indices, NumElts),
10121  "extract");
10122 
10123  if (Ops.size() == 4)
10124  Res = EmitX86Select(*this, Ops[3], Res, Ops[2]);
10125 
10126  return Res;
10127  }
10128  case X86::BI__builtin_ia32_vinsertf128_pd256:
10129  case X86::BI__builtin_ia32_vinsertf128_ps256:
10130  case X86::BI__builtin_ia32_vinsertf128_si256:
10131  case X86::BI__builtin_ia32_insert128i256:
10132  case X86::BI__builtin_ia32_insertf64x4:
10133  case X86::BI__builtin_ia32_insertf32x4:
10134  case X86::BI__builtin_ia32_inserti64x4:
10135  case X86::BI__builtin_ia32_inserti32x4:
10136  case X86::BI__builtin_ia32_insertf32x8:
10137  case X86::BI__builtin_ia32_inserti32x8:
10138  case X86::BI__builtin_ia32_insertf32x4_256:
10139  case X86::BI__builtin_ia32_inserti32x4_256:
10140  case X86::BI__builtin_ia32_insertf64x2_256:
10141  case X86::BI__builtin_ia32_inserti64x2_256:
10142  case X86::BI__builtin_ia32_insertf64x2_512:
10143  case X86::BI__builtin_ia32_inserti64x2_512: {
10144  unsigned DstNumElts = Ops[0]->getType()->getVectorNumElements();
10145  unsigned SrcNumElts = Ops[1]->getType()->getVectorNumElements();
10146  unsigned SubVectors = DstNumElts / SrcNumElts;
10147  unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
10148  assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
10149  Index &= SubVectors - 1; // Remove any extra bits.
10150  Index *= SrcNumElts;
10151 
10152  uint32_t Indices[16];
10153  for (unsigned i = 0; i != DstNumElts; ++i)
10154  Indices[i] = (i >= SrcNumElts) ? SrcNumElts + (i % SrcNumElts) : i;
10155 
10156  Value *Op1 = Builder.CreateShuffleVector(Ops[1],
10157  UndefValue::get(Ops[1]->getType()),
10158  makeArrayRef(Indices, DstNumElts),
10159  "widen");
10160 
10161  for (unsigned i = 0; i != DstNumElts; ++i) {
10162  if (i >= Index && i < (Index + SrcNumElts))
10163  Indices[i] = (i - Index) + DstNumElts;
10164  else
10165  Indices[i] = i;
10166  }
10167 
10168  return Builder.CreateShuffleVector(Ops[0], Op1,
10169  makeArrayRef(Indices, DstNumElts),
10170  "insert");
10171  }
10172  case X86::BI__builtin_ia32_pmovqd512_mask:
10173  case X86::BI__builtin_ia32_pmovwb512_mask: {
10174  Value *Res = Builder.CreateTrunc(Ops[0], Ops[1]->getType());
10175  return EmitX86Select(*this, Ops[2], Res, Ops[1]);
10176  }
10177  case X86::BI__builtin_ia32_pmovdb512_mask:
10178  case X86::BI__builtin_ia32_pmovdw512_mask:
10179  case X86::BI__builtin_ia32_pmovqw512_mask: {
10180  if (const auto *C = dyn_cast<Constant>(Ops[2]))
10181  if (C->isAllOnesValue())
10182  return Builder.CreateTrunc(Ops[0], Ops[1]->getType());
10183 
10184  Intrinsic::ID IID;
10185  switch (BuiltinID) {
10186  default: llvm_unreachable("Unsupported intrinsic!");
10187  case X86::BI__builtin_ia32_pmovdb512_mask:
10188  IID = Intrinsic::x86_avx512_mask_pmov_db_512;
10189  break;
10190  case X86::BI__builtin_ia32_pmovdw512_mask:
10191  IID = Intrinsic::x86_avx512_mask_pmov_dw_512;
10192  break;
10193  case X86::BI__builtin_ia32_pmovqw512_mask:
10194  IID = Intrinsic::x86_avx512_mask_pmov_qw_512;
10195  break;
10196  }
10197 
10198  Function *Intr = CGM.getIntrinsic(IID);
10199  return Builder.CreateCall(Intr, Ops);
10200  }
10201  case X86::BI__builtin_ia32_pblendw128:
10202  case X86::BI__builtin_ia32_blendpd:
10203  case X86::BI__builtin_ia32_blendps:
10204  case X86::BI__builtin_ia32_blendpd256:
10205  case X86::BI__builtin_ia32_blendps256:
10206  case X86::BI__builtin_ia32_pblendw256:
10207  case X86::BI__builtin_ia32_pblendd128:
10208  case X86::BI__builtin_ia32_pblendd256: {
10209  unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
10210  unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
10211 
10212  uint32_t Indices[16];
10213  // If there are more than 8 elements, the immediate is used twice so make
10214  // sure we handle that.
10215  for (unsigned i = 0; i != NumElts; ++i)
10216  Indices[i] = ((Imm >> (i % 8)) & 0x1) ? NumElts + i : i;
10217 
10218  return Builder.CreateShuffleVector(Ops[0], Ops[1],
10219  makeArrayRef(Indices, NumElts),
10220  "blend");
10221  }
10222  case X86::BI__builtin_ia32_pshuflw:
10223  case X86::BI__builtin_ia32_pshuflw256:
10224  case X86::BI__builtin_ia32_pshuflw512: {
10225  uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
10226  llvm::Type *Ty = Ops[0]->getType();
10227  unsigned NumElts = Ty->getVectorNumElements();
10228 
10229  // Splat the 8-bits of immediate 4 times to help the loop wrap around.
10230  Imm = (Imm & 0xff) * 0x01010101;
10231 
10232  uint32_t Indices[32];
10233  for (unsigned l = 0; l != NumElts; l += 8) {
10234  for (unsigned i = 0; i != 4; ++i) {
10235  Indices[l + i] = l + (Imm & 3);
10236  Imm >>= 2;
10237  }
10238  for (unsigned i = 4; i != 8; ++i)
10239  Indices[l + i] = l + i;
10240  }
10241 
10242  return Builder.CreateShuffleVector(Ops[0], UndefValue::get(Ty),
10243  makeArrayRef(Indices, NumElts),
10244  "pshuflw");
10245  }
10246  case X86::BI__builtin_ia32_pshufhw:
10247  case X86::BI__builtin_ia32_pshufhw256:
10248  case X86::BI__builtin_ia32_pshufhw512: {
10249  uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
10250  llvm::Type *Ty = Ops[0]->getType();
10251  unsigned NumElts = Ty->getVectorNumElements();
10252 
10253  // Splat the 8-bits of immediate 4 times to help the loop wrap around.
10254  Imm = (Imm & 0xff) * 0x01010101;
10255 
10256  uint32_t Indices[32];
10257  for (unsigned l = 0; l != NumElts; l += 8) {
10258  for (unsigned i = 0; i != 4; ++i)
10259  Indices[l + i] = l + i;
10260  for (unsigned i = 4; i != 8; ++i) {
10261  Indices[l + i] = l + 4 + (Imm & 3);
10262  Imm >>= 2;
10263  }
10264  }
10265 
10266  return Builder.CreateShuffleVector(Ops[0], UndefValue::get(Ty),
10267  makeArrayRef(Indices, NumElts),
10268  "pshufhw");
10269  }
10270  case X86::BI__builtin_ia32_pshufd:
10271  case X86::BI__builtin_ia32_pshufd256:
10272  case X86::BI__builtin_ia32_pshufd512:
10273  case X86::BI__builtin_ia32_vpermilpd:
10274  case X86::BI__builtin_ia32_vpermilps:
10275  case X86::BI__builtin_ia32_vpermilpd256:
10276  case X86::BI__builtin_ia32_vpermilps256:
10277  case X86::BI__builtin_ia32_vpermilpd512:
10278  case X86::BI__builtin_ia32_vpermilps512: {
10279  uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
10280  llvm::Type *Ty = Ops[0]->getType();
10281  unsigned NumElts = Ty->getVectorNumElements();
10282  unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
10283  unsigned NumLaneElts = NumElts / NumLanes;
10284 
10285  // Splat the 8-bits of immediate 4 times to help the loop wrap around.
10286  Imm = (Imm & 0xff) * 0x01010101;
10287 
10288  uint32_t Indices[16];
10289  for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
10290  for (unsigned i = 0; i != NumLaneElts; ++i) {
10291  Indices[i + l] = (Imm % NumLaneElts) + l;
10292  Imm /= NumLaneElts;
10293  }
10294  }
10295 
10296  return Builder.CreateShuffleVector(Ops[0], UndefValue::get(Ty),
10297  makeArrayRef(Indices, NumElts),
10298  "permil");
10299  }
10300  case X86::BI__builtin_ia32_shufpd:
10301  case X86::BI__builtin_ia32_shufpd256:
10302  case X86::BI__builtin_ia32_shufpd512:
10303  case X86::BI__builtin_ia32_shufps:
10304  case X86::BI__builtin_ia32_shufps256:
10305  case X86::BI__builtin_ia32_shufps512: {
10306  uint32_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
10307  llvm::Type *Ty = Ops[0]->getType();
10308  unsigned NumElts = Ty->getVectorNumElements();
10309  unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
10310  unsigned NumLaneElts = NumElts / NumLanes;
10311 
10312  // Splat the 8-bits of immediate 4 times to help the loop wrap around.
10313  Imm = (Imm & 0xff) * 0x01010101;
10314 
10315  uint32_t Indices[16];
10316  for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
10317  for (unsigned i = 0; i != NumLaneElts; ++i) {
10318  unsigned Index = Imm % NumLaneElts;
10319  Imm /= NumLaneElts;
10320  if (i >= (NumLaneElts / 2))
10321  Index += NumElts;
10322  Indices[l + i] = l + Index;
10323  }
10324  }
10325 
10326  return Builder.CreateShuffleVector(Ops[0], Ops[1],
10327  makeArrayRef(Indices, NumElts),
10328  "shufp");
10329  }
10330  case X86::BI__builtin_ia32_permdi256:
10331  case X86::BI__builtin_ia32_permdf256:
10332  case X86::BI__builtin_ia32_permdi512:
10333  case X86::BI__builtin_ia32_permdf512: {
10334  unsigned Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
10335  llvm::Type *Ty = Ops[0]->getType();
10336  unsigned NumElts = Ty->getVectorNumElements();
10337 
10338  // These intrinsics operate on 256-bit lanes of four 64-bit elements.
10339  uint32_t Indices[8];
10340  for (unsigned l = 0; l != NumElts; l += 4)
10341  for (unsigned i = 0; i != 4; ++i)
10342  Indices[l + i] = l + ((Imm >> (2 * i)) & 0x3);
10343 
10344  return Builder.CreateShuffleVector(Ops[0], UndefValue::get(Ty),
10345  makeArrayRef(Indices, NumElts),
10346  "perm");
10347  }
10348  case X86::BI__builtin_ia32_palignr128:
10349  case X86::BI__builtin_ia32_palignr256:
10350  case X86::BI__builtin_ia32_palignr512: {
10351  unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
10352 
10353  unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
10354  assert(NumElts % 16 == 0);
10355 
10356  // If palignr is shifting the pair of vectors more than the size of two
10357  // lanes, emit zero.
10358  if (ShiftVal >= 32)
10359  return llvm::Constant::getNullValue(ConvertType(E->getType()));
10360 
10361  // If palignr is shifting the pair of input vectors more than one lane,
10362  // but less than two lanes, convert to shifting in zeroes.
10363  if (ShiftVal > 16) {
10364  ShiftVal -= 16;
10365  Ops[1] = Ops[0];
10366  Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
10367  }
10368 
10369  uint32_t Indices[64];
10370  // 256-bit palignr operates on 128-bit lanes so we need to handle that
10371  for (unsigned l = 0; l != NumElts; l += 16) {
10372  for (unsigned i = 0; i != 16; ++i) {
10373  unsigned Idx = ShiftVal + i;
10374  if (Idx >= 16)
10375  Idx += NumElts - 16; // End of lane, switch operand.
10376  Indices[l + i] = Idx + l;
10377  }
10378  }
10379 
10380  return Builder.CreateShuffleVector(Ops[1], Ops[0],
10381  makeArrayRef(Indices, NumElts),
10382  "palignr");
10383  }
10384  case X86::BI__builtin_ia32_alignd128:
10385  case X86::BI__builtin_ia32_alignd256:
10386  case X86::BI__builtin_ia32_alignd512:
10387  case X86::BI__builtin_ia32_alignq128:
10388  case X86::BI__builtin_ia32_alignq256:
10389  case X86::BI__builtin_ia32_alignq512: {
10390  unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
10391  unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
10392 
10393  // Mask the shift amount to width of two vectors.
10394  ShiftVal &= (2 * NumElts) - 1;
10395 
10396  uint32_t Indices[16];
10397  for (unsigned i = 0; i != NumElts; ++i)
10398  Indices[i] = i + ShiftVal;
10399 
10400  return Builder.CreateShuffleVector(Ops[1], Ops[0],
10401  makeArrayRef(Indices, NumElts),
10402  "valign");
10403  }
10404  case X86::BI__builtin_ia32_shuf_f32x4_256:
10405  case X86::BI__builtin_ia32_shuf_f64x2_256:
10406  case X86::BI__builtin_ia32_shuf_i32x4_256:
10407  case X86::BI__builtin_ia32_shuf_i64x2_256:
10408  case X86::BI__builtin_ia32_shuf_f32x4:
10409  case X86::BI__builtin_ia32_shuf_f64x2:
10410  case X86::BI__builtin_ia32_shuf_i32x4:
10411  case X86::BI__builtin_ia32_shuf_i64x2: {
10412  unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
10413  llvm::Type *Ty = Ops[0]->getType();
10414  unsigned NumElts = Ty->getVectorNumElements();
10415  unsigned NumLanes = Ty->getPrimitiveSizeInBits() == 512 ? 4 : 2;
10416  unsigned NumLaneElts = NumElts / NumLanes;
10417 
10418  uint32_t Indices[16];
10419  for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
10420  unsigned Index = (Imm % NumLanes) * NumLaneElts;
10421  Imm /= NumLanes; // Discard the bits we just used.
10422  if (l >= (NumElts / 2))
10423  Index += NumElts; // Switch to other source.
10424  for (unsigned i = 0; i != NumLaneElts; ++i) {
10425  Indices[l + i] = Index + i;
10426  }
10427  }
10428 
10429  return Builder.CreateShuffleVector(Ops[0], Ops[1],
10430  makeArrayRef(Indices, NumElts),
10431  "shuf");
10432  }
10433 
10434  case X86::BI__builtin_ia32_vperm2f128_pd256:
10435  case X86::BI__builtin_ia32_vperm2f128_ps256:
10436  case X86::BI__builtin_ia32_vperm2f128_si256:
10437  case X86::BI__builtin_ia32_permti256: {
10438  unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
10439  unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
10440 
10441  // This takes a very simple approach since there are two lanes and a
10442  // shuffle can have 2 inputs. So we reserve the first input for the first
10443  // lane and the second input for the second lane. This may result in
10444  // duplicate sources, but this can be dealt with in the backend.
10445 
10446  Value *OutOps[2];
10447  uint32_t Indices[8];
10448  for (unsigned l = 0; l != 2; ++l) {
10449  // Determine the source for this lane.
10450  if (Imm & (1 << ((l * 4) + 3)))
10451  OutOps[l] = llvm::ConstantAggregateZero::get(Ops[0]->getType());
10452  else if (Imm & (1 << ((l * 4) + 1)))
10453  OutOps[l] = Ops[1];
10454  else
10455  OutOps[l] = Ops[0];
10456 
10457  for (unsigned i = 0; i != NumElts/2; ++i) {
10458  // Start with ith element of the source for this lane.
10459  unsigned Idx = (l * NumElts) + i;
10460  // If bit 0 of the immediate half is set, switch to the high half of
10461  // the source.
10462  if (Imm & (1 << (l * 4)))
10463  Idx += NumElts/2;
10464  Indices[(l * (NumElts/2)) + i] = Idx;
10465  }
10466  }
10467 
10468  return Builder.CreateShuffleVector(OutOps[0], OutOps[1],
10469  makeArrayRef(Indices, NumElts),
10470  "vperm");
10471  }
10472 
10473  case X86::BI__builtin_ia32_pslldqi128_byteshift:
10474  case X86::BI__builtin_ia32_pslldqi256_byteshift:
10475  case X86::BI__builtin_ia32_pslldqi512_byteshift: {
10476  unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
10477  llvm::Type *ResultType = Ops[0]->getType();
10478  // Builtin type is vXi64 so multiply by 8 to get bytes.
10479  unsigned NumElts = ResultType->getVectorNumElements() * 8;
10480 
10481  // If pslldq is shifting the vector more than 15 bytes, emit zero.
10482  if (ShiftVal >= 16)
10483  return llvm::Constant::getNullValue(ResultType);
10484 
10485  uint32_t Indices[64];
10486  // 256/512-bit pslldq operates on 128-bit lanes so we need to handle that
10487  for (unsigned l = 0; l != NumElts; l += 16) {
10488  for (unsigned i = 0; i != 16; ++i) {
10489  unsigned Idx = NumElts + i - ShiftVal;
10490  if (Idx < NumElts) Idx -= NumElts - 16; // end of lane, switch operand.
10491  Indices[l + i] = Idx + l;
10492  }
10493  }
10494 
10495  llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, NumElts);
10496  Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
10497  Value *Zero = llvm::Constant::getNullValue(VecTy);
10498  Value *SV = Builder.CreateShuffleVector(Zero, Cast,
10499  makeArrayRef(Indices, NumElts),
10500  "pslldq");
10501  return Builder.CreateBitCast(SV, Ops[0]->getType(), "cast");
10502  }
10503  case X86::BI__builtin_ia32_psrldqi128_byteshift:
10504  case X86::BI__builtin_ia32_psrldqi256_byteshift:
10505  case X86::BI__builtin_ia32_psrldqi512_byteshift: {
10506  unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
10507  llvm::Type *ResultType = Ops[0]->getType();
10508  // Builtin type is vXi64 so multiply by 8 to get bytes.
10509  unsigned NumElts = ResultType->getVectorNumElements() * 8;
10510 
10511  // If psrldq is shifting the vector more than 15 bytes, emit zero.
10512  if (ShiftVal >= 16)
10513  return llvm::Constant::getNullValue(ResultType);
10514 
10515  uint32_t Indices[64];
10516  // 256/512-bit psrldq operates on 128-bit lanes so we need to handle that
10517  for (unsigned l = 0; l != NumElts; l += 16) {
10518  for (unsigned i = 0; i != 16; ++i) {
10519  unsigned Idx = i + ShiftVal;
10520  if (Idx >= 16) Idx += NumElts - 16; // end of lane, switch operand.
10521  Indices[l + i] = Idx + l;
10522  }
10523  }
10524 
10525  llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, NumElts);
10526  Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
10527  Value *Zero = llvm::Constant::getNullValue(VecTy);
10528  Value *SV = Builder.CreateShuffleVector(Cast, Zero,
10529  makeArrayRef(Indices, NumElts),
10530  "psrldq");
10531  return Builder.CreateBitCast(SV, ResultType, "cast");
10532  }
10533  case X86::BI__builtin_ia32_kshiftliqi:
10534  case X86::BI__builtin_ia32_kshiftlihi:
10535  case X86::BI__builtin_ia32_kshiftlisi:
10536  case X86::BI__builtin_ia32_kshiftlidi: {
10537  unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
10538  unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
10539 
10540  if (ShiftVal >= NumElts)
10541  return llvm::Constant::getNullValue(Ops[0]->getType());
10542 
10543  Value *In = getMaskVecValue(*this, Ops[0], NumElts);
10544 
10545  uint32_t Indices[64];
10546  for (unsigned i = 0; i != NumElts; ++i)
10547  Indices[i] = NumElts + i - ShiftVal;
10548 
10549  Value *Zero = llvm::Constant::getNullValue(In->getType());
10550  Value *SV = Builder.CreateShuffleVector(Zero, In,
10551  makeArrayRef(Indices, NumElts),
10552  "kshiftl");
10553  return Builder.CreateBitCast(SV, Ops[0]->getType());
10554  }
10555  case X86::BI__builtin_ia32_kshiftriqi:
10556  case X86::BI__builtin_ia32_kshiftrihi:
10557  case X86::BI__builtin_ia32_kshiftrisi:
10558  case X86::BI__builtin_ia32_kshiftridi: {
10559  unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
10560  unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
10561 
10562  if (ShiftVal >= NumElts)
10563  return llvm::Constant::getNullValue(Ops[0]->getType());
10564 
10565  Value *In = getMaskVecValue(*this, Ops[0], NumElts);
10566 
10567  uint32_t Indices[64];
10568  for (unsigned i = 0; i != NumElts; ++i)
10569  Indices[i] = i + ShiftVal;
10570 
10571  Value *Zero = llvm::Constant::getNullValue(In->getType());
10572  Value *SV = Builder.CreateShuffleVector(In, Zero,
10573  makeArrayRef(Indices, NumElts),
10574  "kshiftr");
10575  return Builder.CreateBitCast(SV, Ops[0]->getType());
10576  }
10577  case X86::BI__builtin_ia32_movnti:
10578  case X86::BI__builtin_ia32_movnti64:
10579  case X86::BI__builtin_ia32_movntsd:
10580  case X86::BI__builtin_ia32_movntss: {
10581  llvm::MDNode *Node = llvm::MDNode::get(
10582  getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
10583 
10584  Value *Ptr = Ops[0];
10585  Value *Src = Ops[1];
10586 
10587  // Extract the 0'th element of the source vector.
10588  if (BuiltinID == X86::BI__builtin_ia32_movntsd ||
10589  BuiltinID == X86::BI__builtin_ia32_movntss)
10590  Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract");
10591 
10592  // Convert the type of the pointer to a pointer to the stored type.
10593  Value *BC = Builder.CreateBitCast(
10594  Ptr, llvm::PointerType::getUnqual(Src->getType()), "cast");
10595 
10596  // Unaligned nontemporal store of the scalar value.
10597  StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, BC);
10598  SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
10599  SI->setAlignment(1);
10600  return SI;
10601  }
10602  // Rotate is a special case of funnel shift - 1st 2 args are the same.
10603  case X86::BI__builtin_ia32_vprotb:
10604  case X86::BI__builtin_ia32_vprotw:
10605  case X86::BI__builtin_ia32_vprotd:
10606  case X86::BI__builtin_ia32_vprotq:
10607  case X86::BI__builtin_ia32_vprotbi:
10608  case X86::BI__builtin_ia32_vprotwi:
10609  case X86::BI__builtin_ia32_vprotdi:
10610  case X86::BI__builtin_ia32_vprotqi:
10611  case X86::BI__builtin_ia32_prold128:
10612  case X86::BI__builtin_ia32_prold256:
10613  case X86::BI__builtin_ia32_prold512:
10614  case X86::BI__builtin_ia32_prolq128:
10615  case X86::BI__builtin_ia32_prolq256:
10616  case X86::BI__builtin_ia32_prolq512:
10617  case X86::BI__builtin_ia32_prolvd128:
10618  case X86::BI__builtin_ia32_prolvd256:
10619  case X86::BI__builtin_ia32_prolvd512:
10620  case X86::BI__builtin_ia32_prolvq128:
10621  case X86::BI__builtin_ia32_prolvq256:
10622  case X86::BI__builtin_ia32_prolvq512:
10623  return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], false);
10624  case X86::BI__builtin_ia32_prord128:
10625  case X86::BI__builtin_ia32_prord256:
10626  case X86::BI__builtin_ia32_prord512:
10627  case X86::BI__builtin_ia32_prorq128:
10628  case X86::BI__builtin_ia32_prorq256:
10629  case X86::BI__builtin_ia32_prorq512:
10630  case X86::BI__builtin_ia32_prorvd128:
10631  case X86::BI__builtin_ia32_prorvd256:
10632  case X86::BI__builtin_ia32_prorvd512:
10633  case X86::BI__builtin_ia32_prorvq128:
10634  case X86::BI__builtin_ia32_prorvq256:
10635  case X86::BI__builtin_ia32_prorvq512:
10636  return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], true);
10637  case X86::BI__builtin_ia32_selectb_128:
10638  case X86::BI__builtin_ia32_selectb_256:
10639  case X86::BI__builtin_ia32_selectb_512:
10640  case X86::BI__builtin_ia32_selectw_128:
10641  case X86::BI__builtin_ia32_selectw_256:
10642  case X86::BI__builtin_ia32_selectw_512:
10643  case X86::BI__builtin_ia32_selectd_128:
10644  case X86::BI__builtin_ia32_selectd_256:
10645  case X86::BI__builtin_ia32_selectd_512:
10646  case X86::BI__builtin_ia32_selectq_128:
10647  case X86::BI__builtin_ia32_selectq_256:
10648  case X86::BI__builtin_ia32_selectq_512:
10649  case X86::BI__builtin_ia32_selectps_128:
10650  case X86::BI__builtin_ia32_selectps_256:
10651  case X86::BI__builtin_ia32_selectps_512:
10652  case X86::BI__builtin_ia32_selectpd_128:
10653  case X86::BI__builtin_ia32_selectpd_256:
10654  case X86::BI__builtin_ia32_selectpd_512:
10655  return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
10656  case X86::BI__builtin_ia32_selectss_128:
10657  case X86::BI__builtin_ia32_selectsd_128: {
10658  Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
10659  Value *B = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
10660  A = EmitX86ScalarSelect(*this, Ops[0], A, B);
10661  return Builder.CreateInsertElement(Ops[1], A, (uint64_t)0);
10662  }
10663  case X86::BI__builtin_ia32_cmpb128_mask:
10664  case X86::BI__builtin_ia32_cmpb256_mask:
10665  case X86::BI__builtin_ia32_cmpb512_mask:
10666  case X86::BI__builtin_ia32_cmpw128_mask:
10667  case X86::BI__builtin_ia32_cmpw256_mask:
10668  case X86::BI__builtin_ia32_cmpw512_mask:
10669  case X86::BI__builtin_ia32_cmpd128_mask:
10670  case X86::BI__builtin_ia32_cmpd256_mask:
10671  case X86::BI__builtin_ia32_cmpd512_mask:
10672  case X86::BI__builtin_ia32_cmpq128_mask:
10673  case X86::BI__builtin_ia32_cmpq256_mask:
10674  case X86::BI__builtin_ia32_cmpq512_mask: {
10675  unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
10676  return EmitX86MaskedCompare(*this, CC, true, Ops);
10677  }
10678  case X86::BI__builtin_ia32_ucmpb128_mask:
10679  case X86::BI__builtin_ia32_ucmpb256_mask:
10680  case X86::BI__builtin_ia32_ucmpb512_mask:
10681  case X86::BI__builtin_ia32_ucmpw128_mask:
10682  case X86::BI__builtin_ia32_ucmpw256_mask:
10683  case X86::BI__builtin_ia32_ucmpw512_mask:
10684  case X86::BI__builtin_ia32_ucmpd128_mask:
10685  case X86::BI__builtin_ia32_ucmpd256_mask:
10686  case X86::BI__builtin_ia32_ucmpd512_mask:
10687  case X86::BI__builtin_ia32_ucmpq128_mask:
10688  case X86::BI__builtin_ia32_ucmpq256_mask:
10689  case X86::BI__builtin_ia32_ucmpq512_mask: {
10690  unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
10691  return EmitX86MaskedCompare(*this, CC, false, Ops);
10692  }
10693 
10694  case X86::BI__builtin_ia32_kortestcqi:
10695  case X86::BI__builtin_ia32_kortestchi:
10696  case X86::BI__builtin_ia32_kortestcsi:
10697  case X86::BI__builtin_ia32_kortestcdi: {
10698  Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
10699  Value *C = llvm::Constant::getAllOnesValue(Ops[0]->getType());
10700  Value *Cmp = Builder.CreateICmpEQ(Or, C);
10701  return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
10702  }
10703  case X86::BI__builtin_ia32_kortestzqi:
10704  case X86::BI__builtin_ia32_kortestzhi:
10705  case X86::BI__builtin_ia32_kortestzsi:
10706  case X86::BI__builtin_ia32_kortestzdi: {
10707  Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
10708  Value *C = llvm::Constant::getNullValue(Ops[0]->getType());
10709  Value *Cmp = Builder.CreateICmpEQ(Or, C);
10710  return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
10711  }
10712 
10713  case X86::BI__builtin_ia32_ktestcqi:
10714  case X86::BI__builtin_ia32_ktestzqi:
10715  case X86::BI__builtin_ia32_ktestchi:
10716  case X86::BI__builtin_ia32_ktestzhi:
10717  case X86::BI__builtin_ia32_ktestcsi:
10718  case X86::BI__builtin_ia32_ktestzsi:
10719  case X86::BI__builtin_ia32_ktestcdi:
10720  case X86::BI__builtin_ia32_ktestzdi: {
10721  Intrinsic::ID IID;
10722  switch (BuiltinID) {
10723  default: llvm_unreachable("Unsupported intrinsic!");
10724  case X86::BI__builtin_ia32_ktestcqi:
10725  IID = Intrinsic::x86_avx512_ktestc_b;
10726  break;
10727  case X86::BI__builtin_ia32_ktestzqi:
10728  IID = Intrinsic::x86_avx512_ktestz_b;
10729  break;
10730  case X86::BI__builtin_ia32_ktestchi:
10731  IID = Intrinsic::x86_avx512_ktestc_w;
10732  break;
10733  case X86::BI__builtin_ia32_ktestzhi:
10734  IID = Intrinsic::x86_avx512_ktestz_w;
10735  break;
10736  case X86::BI__builtin_ia32_ktestcsi:
10737  IID = Intrinsic::x86_avx512_ktestc_d;
10738  break;
10739  case X86::BI__builtin_ia32_ktestzsi:
10740  IID = Intrinsic::x86_avx512_ktestz_d;
10741  break;
10742  case X86::BI__builtin_ia32_ktestcdi:
10743  IID = Intrinsic::x86_avx512_ktestc_q;
10744  break;
10745  case X86::BI__builtin_ia32_ktestzdi:
10746  IID = Intrinsic::x86_avx512_ktestz_q;
10747  break;
10748  }
10749 
10750  unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
10751  Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
10752  Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
10753  Function *Intr = CGM.getIntrinsic(IID);
10754  return Builder.CreateCall(Intr, {LHS, RHS});
10755  }
10756 
10757  case X86::BI__builtin_ia32_kaddqi:
10758  case X86::BI__builtin_ia32_kaddhi:
10759  case X86::BI__builtin_ia32_kaddsi:
10760  case X86::BI__builtin_ia32_kadddi: {
10761  Intrinsic::ID IID;
10762  switch (BuiltinID) {
10763  default: llvm_unreachable("Unsupported intrinsic!");
10764  case X86::BI__builtin_ia32_kaddqi:
10765  IID = Intrinsic::x86_avx512_kadd_b;
10766  break;
10767  case X86::BI__builtin_ia32_kaddhi:
10768  IID = Intrinsic::x86_avx512_kadd_w;
10769  break;
10770  case X86::BI__builtin_ia32_kaddsi:
10771  IID = Intrinsic::x86_avx512_kadd_d;
10772  break;
10773  case X86::BI__builtin_ia32_kadddi:
10774  IID = Intrinsic::x86_avx512_kadd_q;
10775  break;
10776  }
10777 
10778  unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
10779  Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
10780  Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
10781  Function *Intr = CGM.getIntrinsic(IID);
10782  Value *Res = Builder.CreateCall(Intr, {LHS, RHS});
10783  return Builder.CreateBitCast(Res, Ops[0]->getType());
10784  }
10785  case X86::BI__builtin_ia32_kandqi:
10786  case X86::BI__builtin_ia32_kandhi:
10787  case X86::BI__builtin_ia32_kandsi:
10788  case X86::BI__builtin_ia32_kanddi:
10789  return EmitX86MaskLogic(*this, Instruction::And, Ops);
10790  case X86::BI__builtin_ia32_kandnqi:
10791  case X86::BI__builtin_ia32_kandnhi:
10792  case X86::BI__builtin_ia32_kandnsi:
10793  case X86::BI__builtin_ia32_kandndi:
10794  return EmitX86MaskLogic(*this, Instruction::And, Ops, true);
10795  case X86::BI__builtin_ia32_korqi:
10796  case X86::BI__builtin_ia32_korhi:
10797  case X86::BI__builtin_ia32_korsi:
10798  case X86::BI__builtin_ia32_kordi:
10799  return EmitX86MaskLogic(*this, Instruction::Or, Ops);
10800  case X86::BI__builtin_ia32_kxnorqi:
10801  case X86::BI__builtin_ia32_kxnorhi:
10802  case X86::BI__builtin_ia32_kxnorsi:
10803  case X86::BI__builtin_ia32_kxnordi:
10804  return EmitX86MaskLogic(*this, Instruction::Xor, Ops, true);
10805  case X86::BI__builtin_ia32_kxorqi:
10806  case X86::BI__builtin_ia32_kxorhi:
10807  case X86::BI__builtin_ia32_kxorsi:
10808  case X86::BI__builtin_ia32_kxordi:
10809  return EmitX86MaskLogic(*this, Instruction::Xor, Ops);
10810  case X86::BI__builtin_ia32_knotqi:
10811  case X86::BI__builtin_ia32_knothi:
10812  case X86::BI__builtin_ia32_knotsi:
10813  case X86::BI__builtin_ia32_knotdi: {
10814  unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
10815  Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
10816  return Builder.CreateBitCast(Builder.CreateNot(Res),
10817  Ops[0]->getType());
10818  }
10819  case X86::BI__builtin_ia32_kmovb:
10820  case X86::BI__builtin_ia32_kmovw:
10821  case X86::BI__builtin_ia32_kmovd:
10822  case X86::BI__builtin_ia32_kmovq: {
10823  // Bitcast to vXi1 type and then back to integer. This gets the mask
10824  // register type into the IR, but might be optimized out depending on
10825  // what's around it.
10826  unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
10827  Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
10828  return Builder.CreateBitCast(Res, Ops[0]->getType());
10829  }
10830 
10831  case X86::BI__builtin_ia32_kunpckdi:
10832  case X86::BI__builtin_ia32_kunpcksi:
10833  case X86::BI__builtin_ia32_kunpckhi: {
10834  unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
10835  Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
10836  Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
10837  uint32_t Indices[64];
10838  for (unsigned i = 0; i != NumElts; ++i)
10839  Indices[i] = i;
10840 
10841  // First extract half of each vector. This gives better codegen than
10842  // doing it in a single shuffle.
10843  LHS = Builder.CreateShuffleVector(LHS, LHS,
10844  makeArrayRef(Indices, NumElts / 2));
10845  RHS = Builder.CreateShuffleVector(RHS, RHS,
10846  makeArrayRef(Indices, NumElts / 2));
10847  // Concat the vectors.
10848  // NOTE: Operands are swapped to match the intrinsic definition.
10849  Value *Res = Builder.CreateShuffleVector(RHS, LHS,
10850  makeArrayRef(Indices, NumElts));
10851  return Builder.CreateBitCast(Res, Ops[0]->getType());
10852  }
10853 
10854  case X86::BI__builtin_ia32_vplzcntd_128:
10855  case X86::BI__builtin_ia32_vplzcntd_256:
10856  case X86::BI__builtin_ia32_vplzcntd_512:
10857  case X86::BI__builtin_ia32_vplzcntq_128:
10858  case X86::BI__builtin_ia32_vplzcntq_256:
10859  case X86::BI__builtin_ia32_vplzcntq_512: {
10860  Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
10861  return Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)});
10862  }
10863  case X86::BI__builtin_ia32_sqrtss:
10864  case X86::BI__builtin_ia32_sqrtsd: {
10865  Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
10866  Function *F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
10867  A = Builder.CreateCall(F, {A});
10868  return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
10869  }
10870  case X86::BI__builtin_ia32_sqrtsd_round_mask:
10871  case X86::BI__builtin_ia32_sqrtss_round_mask: {
10872  unsigned CC = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
10873  // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
10874  // otherwise keep the intrinsic.
10875  if (CC != 4) {
10876  Intrinsic::ID IID = BuiltinID == X86::BI__builtin_ia32_sqrtsd_round_mask ?
10877  Intrinsic::x86_avx512_mask_sqrt_sd :
10878  Intrinsic::x86_avx512_mask_sqrt_ss;
10879  return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
10880  }
10881  Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
10882  Function *F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
10883  A = Builder.CreateCall(F, A);
10884  Value *Src = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
10885  A = EmitX86ScalarSelect(*this, Ops[3], A, Src);
10886  return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
10887  }
10888  case X86::BI__builtin_ia32_sqrtpd256:
10889  case X86::BI__builtin_ia32_sqrtpd:
10890  case X86::BI__builtin_ia32_sqrtps256:
10891  case X86::BI__builtin_ia32_sqrtps:
10892  case X86::BI__builtin_ia32_sqrtps512:
10893  case X86::BI__builtin_ia32_sqrtpd512: {
10894  if (Ops.size() == 2) {
10895  unsigned CC = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
10896  // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
10897  // otherwise keep the intrinsic.
10898  if (CC != 4) {
10899  Intrinsic::ID IID = BuiltinID == X86::BI__builtin_ia32_sqrtps512 ?
10900  Intrinsic::x86_avx512_sqrt_ps_512 :
10901  Intrinsic::x86_avx512_sqrt_pd_512;
10902  return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
10903  }
10904  }
10905  Function *F = CGM.getIntrinsic(Intrinsic::sqrt, Ops[0]->getType());
10906  return Builder.CreateCall(F, Ops[0]);
10907  }
10908  case X86::BI__builtin_ia32_pabsb128:
10909  case X86::BI__builtin_ia32_pabsw128:
10910  case X86::BI__builtin_ia32_pabsd128:
10911  case X86::BI__builtin_ia32_pabsb256:
10912  case X86::BI__builtin_ia32_pabsw256:
10913  case X86::BI__builtin_ia32_pabsd256:
10914  case X86::BI__builtin_ia32_pabsq128:
10915  case X86::BI__builtin_ia32_pabsq256:
10916  case X86::BI__builtin_ia32_pabsb512:
10917  case X86::BI__builtin_ia32_pabsw512:
10918  case X86::BI__builtin_ia32_pabsd512:
10919  case X86::BI__builtin_ia32_pabsq512:
10920  return EmitX86Abs(*this, Ops);
10921 
10922  case X86::BI__builtin_ia32_pmaxsb128:
10923  case X86::BI__builtin_ia32_pmaxsw128:
10924  case X86::BI__builtin_ia32_pmaxsd128:
10925  case X86::BI__builtin_ia32_pmaxsq128:
10926  case X86::BI__builtin_ia32_pmaxsb256:
10927  case X86::BI__builtin_ia32_pmaxsw256:
10928  case X86::BI__builtin_ia32_pmaxsd256:
10929  case X86::BI__builtin_ia32_pmaxsq256:
10930  case X86::BI__builtin_ia32_pmaxsb512:
10931  case X86::BI__builtin_ia32_pmaxsw512:
10932  case X86::BI__builtin_ia32_pmaxsd512:
10933  case X86::BI__builtin_ia32_pmaxsq512:
10934  return EmitX86MinMax(*this, ICmpInst::ICMP_SGT, Ops);
10935  case X86::BI__builtin_ia32_pmaxub128:
10936  case X86::BI__builtin_ia32_pmaxuw128:
10937  case X86::BI__builtin_ia32_pmaxud128:
10938  case X86::BI__builtin_ia32_pmaxuq128:
10939  case X86::BI__builtin_ia32_pmaxub256:
10940  case X86::BI__builtin_ia32_pmaxuw256:
10941  case X86::BI__builtin_ia32_pmaxud256:
10942  case X86::BI__builtin_ia32_pmaxuq256:
10943  case X86::BI__builtin_ia32_pmaxub512:
10944  case X86::BI__builtin_ia32_pmaxuw512:
10945  case X86::BI__builtin_ia32_pmaxud512:
10946  case X86::BI__builtin_ia32_pmaxuq512:
10947  return EmitX86MinMax(*this, ICmpInst::ICMP_UGT, Ops);
10948  case X86::BI__builtin_ia32_pminsb128:
10949  case X86::BI__builtin_ia32_pminsw128:
10950  case X86::BI__builtin_ia32_pminsd128:
10951  case X86::BI__builtin_ia32_pminsq128:
10952  case X86::BI__builtin_ia32_pminsb256:
10953  case X86::BI__builtin_ia32_pminsw256:
10954  case X86::BI__builtin_ia32_pminsd256:
10955  case X86::BI__builtin_ia32_pminsq256:
10956  case X86::BI__builtin_ia32_pminsb512:
10957  case X86::BI__builtin_ia32_pminsw512:
10958  case X86::BI__builtin_ia32_pminsd512:
10959  case X86::BI__builtin_ia32_pminsq512:
10960  return EmitX86MinMax(*this, ICmpInst::ICMP_SLT, Ops);
10961  case X86::BI__builtin_ia32_pminub128:
10962  case X86::BI__builtin_ia32_pminuw128:
10963  case X86::BI__builtin_ia32_pminud128:
10964  case X86::BI__builtin_ia32_pminuq128:
10965  case X86::BI__builtin_ia32_pminub256:
10966  case X86::BI__builtin_ia32_pminuw256:
10967  case X86::BI__builtin_ia32_pminud256:
10968  case X86::BI__builtin_ia32_pminuq256:
10969  case X86::BI__builtin_ia32_pminub512:
10970  case X86::BI__builtin_ia32_pminuw512:
10971  case X86::BI__builtin_ia32_pminud512:
10972  case X86::BI__builtin_ia32_pminuq512:
10973  return EmitX86MinMax(*this, ICmpInst::ICMP_ULT, Ops);
10974 
10975  case X86::BI__builtin_ia32_pmuludq128:
10976  case X86::BI__builtin_ia32_pmuludq256:
10977  case X86::BI__builtin_ia32_pmuludq512:
10978  return EmitX86Muldq(*this, /*IsSigned*/false, Ops);
10979 
10980  case X86::BI__builtin_ia32_pmuldq128:
10981  case X86::BI__builtin_ia32_pmuldq256:
10982  case X86::BI__builtin_ia32_pmuldq512:
10983  return EmitX86Muldq(*this, /*IsSigned*/true, Ops);
10984 
10985  case X86::BI__builtin_ia32_pternlogd512_mask:
10986  case X86::BI__builtin_ia32_pternlogq512_mask:
10987  case X86::BI__builtin_ia32_pternlogd128_mask:
10988  case X86::BI__builtin_ia32_pternlogd256_mask:
10989  case X86::BI__builtin_ia32_pternlogq128_mask:
10990  case X86::BI__builtin_ia32_pternlogq256_mask:
10991  return EmitX86Ternlog(*this, /*ZeroMask*/false, Ops);
10992 
10993  case X86::BI__builtin_ia32_pternlogd512_maskz:
10994  case X86::BI__builtin_ia32_pternlogq512_maskz:
10995  case X86::BI__builtin_ia32_pternlogd128_maskz:
10996  case X86::BI__builtin_ia32_pternlogd256_maskz:
10997  case X86::BI__builtin_ia32_pternlogq128_maskz:
10998  case X86::BI__builtin_ia32_pternlogq256_maskz:
10999  return EmitX86Ternlog(*this, /*ZeroMask*/true, Ops);
11000 
11001  case X86::BI__builtin_ia32_vpshldd128:
11002  case X86::BI__builtin_ia32_vpshldd256:
11003  case X86::BI__builtin_ia32_vpshldd512:
11004  case X86::BI__builtin_ia32_vpshldq128:
11005  case X86::BI__builtin_ia32_vpshldq256:
11006  case X86::BI__builtin_ia32_vpshldq512:
11007  case X86::BI__builtin_ia32_vpshldw128:
11008  case X86::BI__builtin_ia32_vpshldw256:
11009  case X86::BI__builtin_ia32_vpshldw512:
11010  return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
11011 
11012  case X86::BI__builtin_ia32_vpshrdd128:
11013  case X86::BI__builtin_ia32_vpshrdd256:
11014  case X86::BI__builtin_ia32_vpshrdd512:
11015  case X86::BI__builtin_ia32_vpshrdq128:
11016  case X86::BI__builtin_ia32_vpshrdq256:
11017  case X86::BI__builtin_ia32_vpshrdq512:
11018  case X86::BI__builtin_ia32_vpshrdw128:
11019  case X86::BI__builtin_ia32_vpshrdw256:
11020  case X86::BI__builtin_ia32_vpshrdw512:
11021  // Ops 0 and 1 are swapped.
11022  return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
11023 
11024  case X86::BI__builtin_ia32_vpshldvd128:
11025  case X86::BI__builtin_ia32_vpshldvd256:
11026  case X86::BI__builtin_ia32_vpshldvd512:
11027  case X86::BI__builtin_ia32_vpshldvq128:
11028  case X86::BI__builtin_ia32_vpshldvq256:
11029  case X86::BI__builtin_ia32_vpshldvq512:
11030  case X86::BI__builtin_ia32_vpshldvw128:
11031  case X86::BI__builtin_ia32_vpshldvw256:
11032  case X86::BI__builtin_ia32_vpshldvw512:
11033  return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
11034 
11035  case X86::BI__builtin_ia32_vpshrdvd128:
11036  case X86::BI__builtin_ia32_vpshrdvd256:
11037  case X86::BI__builtin_ia32_vpshrdvd512:
11038  case X86::BI__builtin_ia32_vpshrdvq128:
11039  case X86::BI__builtin_ia32_vpshrdvq256:
11040  case X86::BI__builtin_ia32_vpshrdvq512:
11041  case X86::BI__builtin_ia32_vpshrdvw128:
11042  case X86::BI__builtin_ia32_vpshrdvw256:
11043  case X86::BI__builtin_ia32_vpshrdvw512:
11044  // Ops 0 and 1 are swapped.
11045  return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
11046 
11047  // 3DNow!
11048  case X86::BI__builtin_ia32_pswapdsf:
11049  case X86::BI__builtin_ia32_pswapdsi: {
11050  llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext());
11051  Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast");
11052  llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd);
11053  return Builder.CreateCall(F, Ops, "pswapd");
11054  }
11055  case X86::BI__builtin_ia32_rdrand16_step:
11056  case X86::BI__builtin_ia32_rdrand32_step:
11057  case X86::BI__builtin_ia32_rdrand64_step:
11058  case X86::BI__builtin_ia32_rdseed16_step:
11059  case X86::BI__builtin_ia32_rdseed32_step:
11060  case X86::BI__builtin_ia32_rdseed64_step: {
11061  Intrinsic::ID ID;
11062  switch (BuiltinID) {
11063  default: llvm_unreachable("Unsupported intrinsic!");
11064  case X86::BI__builtin_ia32_rdrand16_step:
11065  ID = Intrinsic::x86_rdrand_16;
11066  break;
11067  case X86::BI__builtin_ia32_rdrand32_step:
11068  ID = Intrinsic::x86_rdrand_32;
11069  break;
11070  case X86::BI__builtin_ia32_rdrand64_step:
11071  ID = Intrinsic::x86_rdrand_64;
11072  break;
11073  case X86::BI__builtin_ia32_rdseed16_step:
11074  ID = Intrinsic::x86_rdseed_16;
11075  break;
11076  case X86::BI__builtin_ia32_rdseed32_step:
11077  ID = Intrinsic::x86_rdseed_32;
11078  break;
11079  case X86::BI__builtin_ia32_rdseed64_step:
11080  ID = Intrinsic::x86_rdseed_64;
11081  break;
11082  }
11083 
11084  Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
11085  Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
11086  Ops[0]);
11087  return Builder.CreateExtractValue(Call, 1);
11088  }
11089  case X86::BI__builtin_ia32_addcarryx_u32:
11090  case X86::BI__builtin_ia32_addcarryx_u64:
11091  case X86::BI__builtin_ia32_subborrow_u32:
11092  case X86::BI__builtin_ia32_subborrow_u64: {
11093  Intrinsic::ID IID;
11094  switch (BuiltinID) {
11095  default: llvm_unreachable("Unsupported intrinsic!");
11096  case X86::BI__builtin_ia32_addcarryx_u32:
11097  IID = Intrinsic::x86_addcarry_32;
11098  break;
11099  case X86::BI__builtin_ia32_addcarryx_u64:
11100  IID = Intrinsic::x86_addcarry_64;
11101  break;
11102  case X86::BI__builtin_ia32_subborrow_u32:
11103  IID = Intrinsic::x86_subborrow_32;
11104  break;
11105  case X86::BI__builtin_ia32_subborrow_u64:
11106  IID = Intrinsic::x86_subborrow_64;
11107  break;
11108  }
11109 
11110  Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID),
11111  { Ops[0], Ops[1], Ops[2] });
11112  Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
11113  Ops[3]);
11114  return Builder.CreateExtractValue(Call, 0);
11115  }
11116 
11117  case X86::BI__builtin_ia32_fpclassps128_mask:
11118  case X86::BI__builtin_ia32_fpclassps256_mask:
11119  case X86::BI__builtin_ia32_fpclassps512_mask:
11120  case X86::BI__builtin_ia32_fpclasspd128_mask:
11121  case X86::BI__builtin_ia32_fpclasspd256_mask:
11122  case X86::BI__builtin_ia32_fpclasspd512_mask: {
11123  unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
11124  Value *MaskIn = Ops[2];
11125  Ops.erase(&Ops[2]);
11126 
11127  Intrinsic::ID ID;
11128  switch (BuiltinID) {
11129  default: llvm_unreachable("Unsupported intrinsic!");
11130  case X86::BI__builtin_ia32_fpclassps128_mask:
11131  ID = Intrinsic::x86_avx512_fpclass_ps_128;
11132  break;
11133  case X86::BI__builtin_ia32_fpclassps256_mask:
11134  ID = Intrinsic::x86_avx512_fpclass_ps_256;
11135  break;
11136  case X86::BI__builtin_ia32_fpclassps512_mask:
11137  ID = Intrinsic::x86_avx512_fpclass_ps_512;
11138  break;
11139  case X86::BI__builtin_ia32_fpclasspd128_mask:
11140  ID = Intrinsic::x86_avx512_fpclass_pd_128;
11141  break;
11142  case X86::BI__builtin_ia32_fpclasspd256_mask:
11143  ID = Intrinsic::x86_avx512_fpclass_pd_256;
11144  break;
11145  case X86::BI__builtin_ia32_fpclasspd512_mask:
11146  ID = Intrinsic::x86_avx512_fpclass_pd_512;
11147  break;
11148  }
11149 
11150  Value *Fpclass = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
11151  return EmitX86MaskedCompareResult(*this, Fpclass, NumElts, MaskIn);
11152  }
11153 
11154  case X86::BI__builtin_ia32_vpmultishiftqb128:
11155  case X86::BI__builtin_ia32_vpmultishiftqb256:
11156  case X86::BI__builtin_ia32_vpmultishiftqb512: {
11157  Intrinsic::ID ID;
11158  switch (BuiltinID) {
11159  default: llvm_unreachable("Unsupported intrinsic!");
11160  case X86::BI__builtin_ia32_vpmultishiftqb128:
11161  ID = Intrinsic::x86_avx512_pmultishift_qb_128;
11162  break;
11163  case X86::BI__builtin_ia32_vpmultishiftqb256:
11164  ID = Intrinsic::x86_avx512_pmultishift_qb_256;
11165  break;
11166  case X86::BI__builtin_ia32_vpmultishiftqb512:
11167  ID = Intrinsic::x86_avx512_pmultishift_qb_512;
11168  break;
11169  }
11170 
11171  return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
11172  }
11173 
11174  case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
11175  case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
11176  case X86::BI__builtin_ia32_vpshufbitqmb512_mask: {
11177  unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
11178  Value *MaskIn = Ops[2];
11179  Ops.erase(&Ops[2]);
11180 
11181  Intrinsic::ID ID;
11182  switch (BuiltinID) {
11183  default: llvm_unreachable("Unsupported intrinsic!");
11184  case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
11185  ID = Intrinsic::x86_avx512_vpshufbitqmb_128;
11186  break;
11187  case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
11188  ID = Intrinsic::x86_avx512_vpshufbitqmb_256;
11189  break;
11190  case X86::BI__builtin_ia32_vpshufbitqmb512_mask:
11191  ID = Intrinsic::x86_avx512_vpshufbitqmb_512;
11192  break;
11193  }
11194 
11195  Value *Shufbit = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
11196  return EmitX86MaskedCompareResult(*this, Shufbit, NumElts, MaskIn);
11197  }
11198 
11199  // packed comparison intrinsics
11200  case X86::BI__builtin_ia32_cmpeqps:
11201  case X86::BI__builtin_ia32_cmpeqpd:
11202  return getVectorFCmpIR(CmpInst::FCMP_OEQ);
11203  case X86::BI__builtin_ia32_cmpltps:
11204  case X86::BI__builtin_ia32_cmpltpd:
11205  return getVectorFCmpIR(CmpInst::FCMP_OLT);
11206  case X86::BI__builtin_ia32_cmpleps:
11207  case X86::BI__builtin_ia32_cmplepd:
11208  return getVectorFCmpIR(CmpInst::FCMP_OLE);
11209  case X86::BI__builtin_ia32_cmpunordps:
11210  case X86::BI__builtin_ia32_cmpunordpd:
11211  return getVectorFCmpIR(CmpInst::FCMP_UNO);
11212  case X86::BI__builtin_ia32_cmpneqps:
11213  case X86::BI__builtin_ia32_cmpneqpd:
11214  return getVectorFCmpIR(CmpInst::FCMP_UNE);
11215  case X86::BI__builtin_ia32_cmpnltps:
11216  case X86::BI__builtin_ia32_cmpnltpd:
11217  return getVectorFCmpIR(CmpInst::FCMP_UGE);
11218  case X86::BI__builtin_ia32_cmpnleps:
11219  case X86::BI__builtin_ia32_cmpnlepd:
11220  return getVectorFCmpIR(CmpInst::FCMP_UGT);
11221  case X86::BI__builtin_ia32_cmpordps:
11222  case X86::BI__builtin_ia32_cmpordpd:
11223  return getVectorFCmpIR(CmpInst::FCMP_ORD);
11224  case X86::BI__builtin_ia32_cmpps:
11225  case X86::BI__builtin_ia32_cmpps256:
11226  case X86::BI__builtin_ia32_cmppd:
11227  case X86::BI__builtin_ia32_cmppd256:
11228  case X86::BI__builtin_ia32_cmpps128_mask:
11229  case X86::BI__builtin_ia32_cmpps256_mask:
11230  case X86::BI__builtin_ia32_cmpps512_mask:
11231  case X86::BI__builtin_ia32_cmppd128_mask:
11232  case X86::BI__builtin_ia32_cmppd256_mask:
11233  case X86::BI__builtin_ia32_cmppd512_mask: {
11234  // Lowering vector comparisons to fcmp instructions, while
11235  // ignoring signalling behaviour requested
11236  // ignoring rounding mode requested
11237  // This is is only possible as long as FENV_ACCESS is not implemented.
11238  // See also: https://reviews.llvm.org/D45616
11239 
11240  // The third argument is the comparison condition, and integer in the
11241  // range [0, 31]
11242  unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x1f;
11243 
11244  // Lowering to IR fcmp instruction.
11245  // Ignoring requested signaling behaviour,
11246  // e.g. both _CMP_GT_OS & _CMP_GT_OQ are translated to FCMP_OGT.
11247  FCmpInst::Predicate Pred;
11248  switch (CC) {
11249  case 0x00: Pred = FCmpInst::FCMP_OEQ; break;
11250  case 0x01: Pred = FCmpInst::FCMP_OLT; break;
11251  case 0x02: Pred = FCmpInst::FCMP_OLE; break;
11252  case 0x03: Pred = FCmpInst::FCMP_UNO; break;
11253  case 0x04: Pred = FCmpInst::FCMP_UNE; break;
11254  case 0x05: Pred = FCmpInst::FCMP_UGE; break;
11255  case 0x06: Pred = FCmpInst::FCMP_UGT; break;
11256  case 0x07: Pred = FCmpInst::FCMP_ORD; break;
11257  case 0x08: Pred = FCmpInst::FCMP_UEQ; break;
11258  case 0x09: Pred = FCmpInst::FCMP_ULT; break;
11259  case 0x0a: Pred = FCmpInst::FCMP_ULE; break;
11260  case 0x0b: Pred = FCmpInst::FCMP_FALSE; break;
11261  case 0x0c: Pred = FCmpInst::FCMP_ONE; break;
11262  case 0x0d: Pred = FCmpInst::FCMP_OGE; break;
11263  case 0x0e: Pred = FCmpInst::FCMP_OGT; break;
11264  case 0x0f: Pred = FCmpInst::FCMP_TRUE; break;
11265  case 0x10: Pred = FCmpInst::FCMP_OEQ; break;
11266  case 0x11: Pred = FCmpInst::FCMP_OLT; break;
11267  case 0x12: Pred = FCmpInst::FCMP_OLE; break;
11268  case 0x13: Pred = FCmpInst::FCMP_UNO; break;
11269  case 0x14: Pred = FCmpInst::FCMP_UNE; break;
11270  case 0x15: Pred = FCmpInst::FCMP_UGE; break;
11271  case 0x16: Pred = FCmpInst::FCMP_UGT; break;
11272  case 0x17: Pred = FCmpInst::FCMP_ORD; break;
11273  case 0x18: Pred = FCmpInst::FCMP_UEQ; break;
11274  case 0x19: Pred = FCmpInst::FCMP_ULT; break;
11275  case 0x1a: Pred = FCmpInst::FCMP_ULE; break;
11276  case 0x1b: Pred = FCmpInst::FCMP_FALSE; break;
11277  case 0x1c: Pred = FCmpInst::FCMP_ONE; break;
11278  case 0x1d: Pred = FCmpInst::FCMP_OGE; break;
11279  case 0x1e: Pred = FCmpInst::FCMP_OGT; break;
11280  case 0x1f: Pred = FCmpInst::FCMP_TRUE; break;
11281  default: llvm_unreachable("Unhandled CC");
11282  }
11283 
11284  // Builtins without the _mask suffix return a vector of integers
11285  // of the same width as the input vectors
11286  switch (BuiltinID) {
11287  case X86::BI__builtin_ia32_cmpps512_mask:
11288  case X86::BI__builtin_ia32_cmppd512_mask:
11289  case X86::BI__builtin_ia32_cmpps128_mask:
11290  case X86::BI__builtin_ia32_cmpps256_mask:
11291  case X86::BI__builtin_ia32_cmppd128_mask:
11292  case X86::BI__builtin_ia32_cmppd256_mask: {
11293  unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
11294  Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
11295  return EmitX86MaskedCompareResult(*this, Cmp, NumElts, Ops[3]);
11296  }
11297  default:
11298  return getVectorFCmpIR(Pred);
11299  }
11300  }
11301 
11302  // SSE scalar comparison intrinsics
11303  case X86::BI__builtin_ia32_cmpeqss:
11304  return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
11305  case X86::BI__builtin_ia32_cmpltss:
11306  return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
11307  case X86::BI__builtin_ia32_cmpless:
11308  return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
11309  case X86::BI__builtin_ia32_cmpunordss:
11310  return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
11311  case X86::BI__builtin_ia32_cmpneqss:
11312  return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
11313  case X86::BI__builtin_ia32_cmpnltss:
11314  return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
11315  case X86::BI__builtin_ia32_cmpnless:
11316  return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
11317  case X86::BI__builtin_ia32_cmpordss:
11318  return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
11319  case X86::BI__builtin_ia32_cmpeqsd:
11320  return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
11321  case X86::BI__builtin_ia32_cmpltsd:
11322  return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
11323  case X86::BI__builtin_ia32_cmplesd:
11324  return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
11325  case X86::BI__builtin_ia32_cmpunordsd:
11326  return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
11327  case X86::BI__builtin_ia32_cmpneqsd:
11328  return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
11329  case X86::BI__builtin_ia32_cmpnltsd:
11330  return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
11331  case X86::BI__builtin_ia32_cmpnlesd:
11332  return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
11333  case X86::BI__builtin_ia32_cmpordsd:
11334  return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
11335 
11336  case X86::BI__emul:
11337  case X86::BI__emulu: {
11338  llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64);
11339  bool isSigned = (BuiltinID == X86::BI__emul);
11340  Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned);
11341  Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned);
11342  return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned);
11343  }
11344  case X86::BI__mulh:
11345  case X86::BI__umulh:
11346  case X86::BI_mul128:
11347  case X86::BI_umul128: {
11348  llvm::Type *ResType = ConvertType(E->getType());
11349  llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
11350 
11351  bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128);
11352  Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned);
11353  Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned);
11354 
11355  Value *MulResult, *HigherBits;
11356  if (IsSigned) {
11357  MulResult = Builder.CreateNSWMul(LHS, RHS);
11358  HigherBits = Builder.CreateAShr(MulResult, 64);
11359  } else {
11360  MulResult = Builder.CreateNUWMul(LHS, RHS);
11361  HigherBits = Builder.CreateLShr(MulResult, 64);
11362  }
11363  HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
11364 
11365  if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh)
11366  return HigherBits;
11367 
11368  Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2));
11369  Builder.CreateStore(HigherBits, HighBitsAddress);
11370  return Builder.CreateIntCast(MulResult, ResType, IsSigned);
11371  }
11372 
11373  case X86::BI__faststorefence: {
11374  return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
11376  }
11377  case X86::BI__shiftleft128:
11378  case X86::BI__shiftright128: {
11379  // FIXME: Once fshl/fshr no longer add an unneeded and and cmov, do this:
11380  // llvm::Function *F = CGM.getIntrinsic(
11381  // BuiltinID == X86::BI__shiftleft128 ? Intrinsic::fshl : Intrinsic::fshr,
11382  // Int64Ty);
11383  // Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
11384  // return Builder.CreateCall(F, Ops);
11385  llvm::Type *Int128Ty = Builder.getInt128Ty();
11386  Value *Val = Builder.CreateOr(
11387  Builder.CreateShl(Builder.CreateZExt(Ops[1], Int128Ty), 64),
11388  Builder.CreateZExt(Ops[0], Int128Ty));
11389  Value *Amt = Builder.CreateAnd(Builder.CreateZExt(Ops[2], Int128Ty),
11390  llvm::ConstantInt::get(Int128Ty, 0x3f));
11391  Value *Res;
11392  if (BuiltinID == X86::BI__shiftleft128)
11393  Res = Builder.CreateLShr(Builder.CreateShl(Val, Amt), 64);
11394  else
11395  Res = Builder.CreateLShr(Val, Amt);
11396  return Builder.CreateTrunc(Res, Int64Ty);
11397  }
11398  case X86::BI_ReadWriteBarrier:
11399  case X86::BI_ReadBarrier:
11400  case X86::BI_WriteBarrier: {
11401  return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
11402  llvm::SyncScope::SingleThread);
11403  }
11404  case X86::BI_BitScanForward:
11405  case X86::BI_BitScanForward64:
11406  return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
11407  case X86::BI_BitScanReverse:
11408  case X86::BI_BitScanReverse64:
11409  return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
11410 
11411  case X86::BI_InterlockedAnd64:
11412  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
11413  case X86::BI_InterlockedExchange64:
11414  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
11415  case X86::BI_InterlockedExchangeAdd64:
11416  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
11417  case X86::BI_InterlockedExchangeSub64:
11418  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
11419  case X86::BI_InterlockedOr64:
11420  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
11421  case X86::BI_InterlockedXor64:
11422  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
11423  case X86::BI_InterlockedDecrement64:
11424  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
11425  case X86::BI_InterlockedIncrement64:
11426  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
11427  case X86::BI_InterlockedCompareExchange128: {
11428  // InterlockedCompareExchange128 doesn't directly refer to 128bit ints,
11429  // instead it takes pointers to 64bit ints for Destination and
11430  // ComparandResult, and exchange is taken as two 64bit ints (high & low).
11431  // The previous value is written to ComparandResult, and success is
11432  // returned.
11433 
11434  llvm::Type *Int128Ty = Builder.getInt128Ty();
11435  llvm::Type *Int128PtrTy = Int128Ty->getPointerTo();
11436 
11437  Value *Destination =
11438  Builder.CreateBitCast(Ops[0], Int128PtrTy);
11439  Value *ExchangeHigh128 = Builder.CreateZExt(Ops[1], Int128Ty);
11440  Value *ExchangeLow128 = Builder.CreateZExt(Ops[2], Int128Ty);
11441  Address ComparandResult(Builder.CreateBitCast(Ops[3], Int128PtrTy),
11442  getContext().toCharUnitsFromBits(128));
11443 
11444  Value *Exchange = Builder.CreateOr(
11445  Builder.CreateShl(ExchangeHigh128, 64, "", false, false),
11446  ExchangeLow128);
11447 
11448  Value *Comparand = Builder.CreateLoad(ComparandResult);
11449 
11450  AtomicCmpXchgInst *CXI =
11451  Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
11452  AtomicOrdering::SequentiallyConsistent,
11453  AtomicOrdering::SequentiallyConsistent);
11454  CXI->setVolatile(true);
11455 
11456  // Write the result back to the inout pointer.
11457  Builder.CreateStore(Builder.CreateExtractValue(CXI, 0), ComparandResult);
11458 
11459  // Get the success boolean and zero extend it to i8.
11460  Value *Success = Builder.CreateExtractValue(CXI, 1);
11461  return Builder.CreateZExt(Success, ConvertType(E->getType()));
11462  }
11463 
11464  case X86::BI_AddressOfReturnAddress: {
11465  Value *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress);
11466  return Builder.CreateCall(F);
11467  }
11468  case X86::BI__stosb: {
11469  // We treat __stosb as a volatile memset - it may not generate "rep stosb"
11470  // instruction, but it will create a memset that won't be optimized away.
11471  return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], 1, true);
11472  }
11473  case X86::BI__ud2:
11474  // llvm.trap makes a ud2a instruction on x86.
11475  return EmitTrapCall(Intrinsic::trap);
11476  case X86::BI__int2c: {
11477  // This syscall signals a driver assertion failure in x86 NT kernels.
11478  llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
11479  llvm::InlineAsm *IA =
11480  llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*SideEffects=*/true);
11481  llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
11482  getLLVMContext(), llvm::AttributeList::FunctionIndex,
11483  llvm::Attribute::NoReturn);
11484  CallSite CS = Builder.CreateCall(IA);
11485  CS.setAttributes(NoReturnAttr);
11486  return CS.getInstruction();
11487  }
11488  case X86::BI__readfsbyte:
11489  case X86::BI__readfsword:
11490  case X86::BI__readfsdword:
11491  case X86::BI__readfsqword: {
11492  llvm::Type *IntTy = ConvertType(E->getType());
11493  Value *Ptr =
11494  Builder.CreateIntToPtr(Ops[0], llvm::PointerType::get(IntTy, 257));
11495  LoadInst *Load = Builder.CreateAlignedLoad(
11496  IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
11497  Load->setVolatile(true);
11498  return Load;
11499  }
11500  case X86::BI__readgsbyte:
11501  case X86::BI__readgsword:
11502  case X86::BI__readgsdword:
11503  case X86::BI__readgsqword: {
11504  llvm::Type *IntTy = ConvertType(E->getType());
11505  Value *Ptr =
11506  Builder.CreateIntToPtr(Ops[0], llvm::PointerType::get(IntTy, 256));
11507  LoadInst *Load = Builder.CreateAlignedLoad(
11508  IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
11509  Load->setVolatile(true);
11510  return Load;
11511  }
11512  case X86::BI__builtin_ia32_paddsb512:
11513  case X86::BI__builtin_ia32_paddsw512:
11514  case X86::BI__builtin_ia32_paddsb256:
11515  case X86::BI__builtin_ia32_paddsw256:
11516  case X86::BI__builtin_ia32_paddsb128:
11517  case X86::BI__builtin_ia32_paddsw128:
11518  return EmitX86AddSubSatExpr(*this, Ops, true, true);
11519  case X86::BI__builtin_ia32_paddusb512:
11520  case X86::BI__builtin_ia32_paddusw512:
11521  case X86::BI__builtin_ia32_paddusb256:
11522  case X86::BI__builtin_ia32_paddusw256:
11523  case X86::BI__builtin_ia32_paddusb128:
11524  case X86::BI__builtin_ia32_paddusw128:
11525  return EmitX86AddSubSatExpr(*this, Ops, false, true);
11526  case X86::BI__builtin_ia32_psubsb512:
11527  case X86::BI__builtin_ia32_psubsw512:
11528  case X86::BI__builtin_ia32_psubsb256:
11529  case X86::BI__builtin_ia32_psubsw256:
11530  case X86::BI__builtin_ia32_psubsb128:
11531  case X86::BI__builtin_ia32_psubsw128:
11532  return EmitX86AddSubSatExpr(*this, Ops, true, false);
11533  case X86::BI__builtin_ia32_psubusb512:
11534  case X86::BI__builtin_ia32_psubusw512:
11535  case X86::BI__builtin_ia32_psubusb256:
11536  case X86::BI__builtin_ia32_psubusw256:
11537  case X86::BI__builtin_ia32_psubusb128:
11538  case X86::BI__builtin_ia32_psubusw128:
11539  return EmitX86AddSubSatExpr(*this, Ops, false, false);
11540  }
11541 }
11542 
11544  const CallExpr *E) {
11546 
11547  for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
11548  Ops.push_back(EmitScalarExpr(E->getArg(i)));
11549 
11550  Intrinsic::ID ID = Intrinsic::not_intrinsic;
11551 
11552  switch (BuiltinID) {
11553  default: return nullptr;
11554 
11555  // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
11556  // call __builtin_readcyclecounter.
11557  case PPC::BI__builtin_ppc_get_timebase:
11558  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));
11559 
11560  // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr
11561  case PPC::BI__builtin_altivec_lvx:
11562  case PPC::BI__builtin_altivec_lvxl:
11563  case PPC::BI__builtin_altivec_lvebx:
11564  case PPC::BI__builtin_altivec_lvehx:
11565  case PPC::BI__builtin_altivec_lvewx:
11566  case PPC::BI__builtin_altivec_lvsl:
11567  case PPC::BI__builtin_altivec_lvsr:
11568  case PPC::BI__builtin_vsx_lxvd2x:
11569  case PPC::BI__builtin_vsx_lxvw4x:
11570  case PPC::BI__builtin_vsx_lxvd2x_be:
11571  case PPC::BI__builtin_vsx_lxvw4x_be:
11572  case PPC::BI__builtin_vsx_lxvl:
11573  case PPC::BI__builtin_vsx_lxvll:
11574  {
11575  if(BuiltinID == PPC::BI__builtin_vsx_lxvl ||
11576  BuiltinID == PPC::BI__builtin_vsx_lxvll){
11577  Ops[0] = Builder.CreateBitCast(Ops[0], Int8PtrTy);
11578  }else {
11579  Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
11580  Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]);
11581  Ops.pop_back();
11582  }
11583 
11584  switch (BuiltinID) {
11585  default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
11586  case PPC::BI__builtin_altivec_lvx:
11587  ID = Intrinsic::ppc_altivec_lvx;
11588  break;
11589  case PPC::BI__builtin_altivec_lvxl:
11590  ID = Intrinsic::ppc_altivec_lvxl;
11591  break;
11592  case PPC::BI__builtin_altivec_lvebx:
11593  ID = Intrinsic::ppc_altivec_lvebx;
11594  break;
11595  case PPC::BI__builtin_altivec_lvehx:
11596  ID = Intrinsic::ppc_altivec_lvehx;
11597  break;
11598  case PPC::BI__builtin_altivec_lvewx:
11599  ID = Intrinsic::ppc_altivec_lvewx;
11600  break;
11601  case PPC::BI__builtin_altivec_lvsl:
11602  ID = Intrinsic::ppc_altivec_lvsl;
11603  break;
11604  case PPC::BI__builtin_altivec_lvsr:
11605  ID = Intrinsic::ppc_altivec_lvsr;
11606  break;
11607  case PPC::BI__builtin_vsx_lxvd2x:
11608  ID = Intrinsic::ppc_vsx_lxvd2x;
11609  break;
11610  case PPC::BI__builtin_vsx_lxvw4x:
11611  ID = Intrinsic::ppc_vsx_lxvw4x;
11612  break;
11613  case PPC::BI__builtin_vsx_lxvd2x_be:
11614  ID = Intrinsic::ppc_vsx_lxvd2x_be;
11615  break;
11616  case PPC::BI__builtin_vsx_lxvw4x_be:
11617  ID = Intrinsic::ppc_vsx_lxvw4x_be;
11618  break;
11619  case PPC::BI__builtin_vsx_lxvl:
11620  ID = Intrinsic::ppc_vsx_lxvl;
11621  break;
11622  case PPC::BI__builtin_vsx_lxvll:
11623  ID = Intrinsic::ppc_vsx_lxvll;
11624  break;
11625  }
11626  llvm::Function *F = CGM.getIntrinsic(ID);
11627  return Builder.CreateCall(F, Ops, "");
11628  }
11629 
11630  // vec_st, vec_xst_be
11631  case PPC::BI__builtin_altivec_stvx:
11632  case PPC::BI__builtin_altivec_stvxl:
11633  case PPC::BI__builtin_altivec_stvebx:
11634  case PPC::BI__builtin_altivec_stvehx:
11635  case PPC::BI__builtin_altivec_stvewx:
11636  case PPC::BI__builtin_vsx_stxvd2x:
11637  case PPC::BI__builtin_vsx_stxvw4x:
11638  case PPC::BI__builtin_vsx_stxvd2x_be:
11639  case PPC::BI__builtin_vsx_stxvw4x_be:
11640  case PPC::BI__builtin_vsx_stxvl:
11641  case PPC::BI__builtin_vsx_stxvll:
11642  {
11643  if(BuiltinID == PPC::BI__builtin_vsx_stxvl ||
11644  BuiltinID == PPC::BI__builtin_vsx_stxvll ){
11645  Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
11646  }else {
11647  Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
11648  Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]);
11649  Ops.pop_back();
11650  }
11651 
11652  switch (BuiltinID) {
11653  default: llvm_unreachable("Unsupported st intrinsic!");
11654  case PPC::BI__builtin_altivec_stvx:
11655  ID = Intrinsic::ppc_altivec_stvx;
11656  break;
11657  case PPC::BI__builtin_altivec_stvxl:
11658  ID = Intrinsic::ppc_altivec_stvxl;
11659  break;
11660  case PPC::BI__builtin_altivec_stvebx:
11661  ID = Intrinsic::ppc_altivec_stvebx;
11662  break;
11663  case PPC::BI__builtin_altivec_stvehx:
11664  ID = Intrinsic::ppc_altivec_stvehx;
11665  break;
11666  case PPC::BI__builtin_altivec_stvewx:
11667  ID = Intrinsic::ppc_altivec_stvewx;
11668  break;
11669  case PPC::BI__builtin_vsx_stxvd2x:
11670  ID = Intrinsic::ppc_vsx_stxvd2x;
11671  break;
11672  case PPC::BI__builtin_vsx_stxvw4x:
11673  ID = Intrinsic::ppc_vsx_stxvw4x;
11674  break;
11675  case PPC::BI__builtin_vsx_stxvd2x_be:
11676  ID = Intrinsic::ppc_vsx_stxvd2x_be;
11677  break;
11678  case PPC::BI__builtin_vsx_stxvw4x_be:
11679  ID = Intrinsic::ppc_vsx_stxvw4x_be;
11680  break;
11681  case PPC::BI__builtin_vsx_stxvl:
11682  ID = Intrinsic::ppc_vsx_stxvl;
11683  break;
11684  case PPC::BI__builtin_vsx_stxvll:
11685  ID = Intrinsic::ppc_vsx_stxvll;
11686  break;
11687  }
11688  llvm::Function *F = CGM.getIntrinsic(ID);
11689  return Builder.CreateCall(F, Ops, "");
11690  }
11691  // Square root
11692  case PPC::BI__builtin_vsx_xvsqrtsp:
11693  case PPC::BI__builtin_vsx_xvsqrtdp: {
11694  llvm::Type *ResultType = ConvertType(E->getType());
11695  Value *X = EmitScalarExpr(E->getArg(0));
11696  ID = Intrinsic::sqrt;
11697  llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
11698  return Builder.CreateCall(F, X);
11699  }
11700  // Count leading zeros
11701  case PPC::BI__builtin_altivec_vclzb:
11702  case PPC::BI__builtin_altivec_vclzh:
11703  case PPC::BI__builtin_altivec_vclzw:
11704  case PPC::BI__builtin_altivec_vclzd: {
11705  llvm::Type *ResultType = ConvertType(E->getType());
11706  Value *X = EmitScalarExpr(E->getArg(0));
11707  Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
11708  Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
11709  return Builder.CreateCall(F, {X, Undef});
11710  }
11711  case PPC::BI__builtin_altivec_vctzb:
11712  case PPC::BI__builtin_altivec_vctzh:
11713  case PPC::BI__builtin_altivec_vctzw:
11714  case PPC::BI__builtin_altivec_vctzd: {
11715  llvm::Type *ResultType = ConvertType(E->getType());
11716  Value *X = EmitScalarExpr(E->getArg(0));
11717  Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
11718  Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
11719  return Builder.CreateCall(F, {X, Undef});
11720  }
11721  case PPC::BI__builtin_altivec_vpopcntb:
11722  case PPC::BI__builtin_altivec_vpopcnth:
11723  case PPC::BI__builtin_altivec_vpopcntw:
11724  case PPC::BI__builtin_altivec_vpopcntd: {
11725  llvm::Type *ResultType = ConvertType(E->getType());
11726  Value *X = EmitScalarExpr(E->getArg(0));
11727  llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
11728  return Builder.CreateCall(F, X);
11729  }
11730  // Copy sign
11731  case PPC::BI__builtin_vsx_xvcpsgnsp:
11732  case PPC::BI__builtin_vsx_xvcpsgndp: {
11733  llvm::Type *ResultType = ConvertType(E->getType());
11734  Value *X = EmitScalarExpr(E->getArg(0));
11735  Value *Y = EmitScalarExpr(E->getArg(1));
11736  ID = Intrinsic::copysign;
11737  llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
11738  return Builder.CreateCall(F, {X, Y});
11739  }
11740  // Rounding/truncation
11741  case PPC::BI__builtin_vsx_xvrspip:
11742  case PPC::BI__builtin_vsx_xvrdpip:
11743  case PPC::BI__builtin_vsx_xvrdpim:
11744  case PPC::BI__builtin_vsx_xvrspim:
11745  case PPC::BI__builtin_vsx_xvrdpi:
11746  case PPC::BI__builtin_vsx_xvrspi:
11747  case PPC::BI__builtin_vsx_xvrdpic:
11748  case PPC::BI__builtin_vsx_xvrspic:
11749  case PPC::BI__builtin_vsx_xvrdpiz:
11750  case PPC::BI__builtin_vsx_xvrspiz: {
11751  llvm::Type *ResultType = ConvertType(E->getType());
11752  Value *X = EmitScalarExpr(E->getArg(0));
11753  if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
11754  BuiltinID == PPC::BI__builtin_vsx_xvrspim)
11755  ID = Intrinsic::floor;
11756  else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
11757  BuiltinID == PPC::BI__builtin_vsx_xvrspi)
11758  ID = Intrinsic::round;
11759  else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
11760  BuiltinID == PPC::BI__builtin_vsx_xvrspic)
11761  ID = Intrinsic::nearbyint;
11762  else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
11763  BuiltinID == PPC::BI__builtin_vsx_xvrspip)
11764  ID = Intrinsic::ceil;
11765  else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
11766  BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
11767  ID = Intrinsic::trunc;
11768  llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
11769  return Builder.CreateCall(F, X);
11770  }
11771 
11772  // Absolute value
11773  case PPC::BI__builtin_vsx_xvabsdp:
11774  case PPC::BI__builtin_vsx_xvabssp: {
11775  llvm::Type *ResultType = ConvertType(E->getType());
11776  Value *X = EmitScalarExpr(E->getArg(0));
11777  llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
11778  return Builder.CreateCall(F, X);
11779  }
11780 
11781  // FMA variations
11782  case PPC::BI__builtin_vsx_xvmaddadp:
11783  case PPC::BI__builtin_vsx_xvmaddasp:
11784  case PPC::BI__builtin_vsx_xvnmaddadp:
11785  case PPC::BI__builtin_vsx_xvnmaddasp:
11786  case PPC::BI__builtin_vsx_xvmsubadp:
11787  case PPC::BI__builtin_vsx_xvmsubasp:
11788  case PPC::BI__builtin_vsx_xvnmsubadp:
11789  case PPC::BI__builtin_vsx_xvnmsubasp: {
11790  llvm::Type *ResultType = ConvertType(E->getType());
11791  Value *X = EmitScalarExpr(E->getArg(0));
11792  Value *Y = EmitScalarExpr(E->getArg(1));
11793  Value *Z = EmitScalarExpr(E->getArg(2));
11794  Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
11795  llvm::Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
11796  switch (BuiltinID) {
11797  case PPC::BI__builtin_vsx_xvmaddadp:
11798  case PPC::BI__builtin_vsx_xvmaddasp:
11799  return Builder.CreateCall(F, {X, Y, Z});
11800  case PPC::BI__builtin_vsx_xvnmaddadp:
11801  case PPC::BI__builtin_vsx_xvnmaddasp:
11802  return Builder.CreateFSub(Zero,
11803  Builder.CreateCall(F, {X, Y, Z}), "sub");
11804  case PPC::BI__builtin_vsx_xvmsubadp:
11805  case PPC::BI__builtin_vsx_xvmsubasp:
11806  return Builder.CreateCall(F,
11807  {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
11808  case PPC::BI__builtin_vsx_xvnmsubadp:
11809  case PPC::BI__builtin_vsx_xvnmsubasp:
11810  Value *FsubRes =
11811  Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
11812  return Builder.CreateFSub(Zero, FsubRes, "sub");
11813  }
11814  llvm_unreachable("Unknown FMA operation");
11815  return nullptr; // Suppress no-return warning
11816  }
11817 
11818  case PPC::BI__builtin_vsx_insertword: {
11819  llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw);
11820 
11821  // Third argument is a compile time constant int. It must be clamped to
11822  // to the range [0, 12].
11823  ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
11824  assert(ArgCI &&
11825  "Third arg to xxinsertw intrinsic must be constant integer");
11826  const int64_t MaxIndex = 12;
11827  int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex);
11828 
11829  // The builtin semantics don't exactly match the xxinsertw instructions
11830  // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the
11831  // word from the first argument, and inserts it in the second argument. The
11832  // instruction extracts the word from its second input register and inserts
11833  // it into its first input register, so swap the first and second arguments.
11834  std::swap(Ops[0], Ops[1]);
11835 
11836  // Need to cast the second argument from a vector of unsigned int to a
11837  // vector of long long.
11838  Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2));
11839 
11840  if (getTarget().isLittleEndian()) {
11841  // Create a shuffle mask of (1, 0)
11842  Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1),
11843  ConstantInt::get(Int32Ty, 0)
11844  };
11845  Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
11846 
11847  // Reverse the double words in the vector we will extract from.
11848  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
11849  Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ShuffleMask);
11850 
11851  // Reverse the index.
11852  Index = MaxIndex - Index;
11853  }
11854 
11855  // Intrinsic expects the first arg to be a vector of int.
11856  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
11857  Ops[2] = ConstantInt::getSigned(Int32Ty, Index);
11858  return Builder.CreateCall(F, Ops);
11859  }
11860 
11861  case PPC::BI__builtin_vsx_extractuword: {
11862  llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw);
11863 
11864  // Intrinsic expects the first argument to be a vector of doublewords.
11865  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
11866 
11867  // The second argument is a compile time constant int that needs to
11868  // be clamped to the range [0, 12].
11869  ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[1]);
11870  assert(ArgCI &&
11871  "Second Arg to xxextractuw intrinsic must be a constant integer!");
11872  const int64_t MaxIndex = 12;
11873  int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex);
11874 
11875  if (getTarget().isLittleEndian()) {
11876  // Reverse the index.
11877  Index = MaxIndex - Index;
11878  Ops[1] = ConstantInt::getSigned(Int32Ty, Index);
11879 
11880  // Emit the call, then reverse the double words of the results vector.
11881  Value *Call = Builder.CreateCall(F, Ops);
11882 
11883  // Create a shuffle mask of (1, 0)
11884  Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1),
11885  ConstantInt::get(Int32Ty, 0)
11886  };
11887  Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
11888 
11889  Value *ShuffleCall = Builder.CreateShuffleVector(Call, Call, ShuffleMask);
11890  return ShuffleCall;
11891  } else {
11892  Ops[1] = ConstantInt::getSigned(Int32Ty, Index);
11893  return Builder.CreateCall(F, Ops);
11894  }
11895  }
11896 
11897  case PPC::BI__builtin_vsx_xxpermdi: {
11898  ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
11899  assert(ArgCI && "Third arg must be constant integer!");
11900 
11901  unsigned Index = ArgCI->getZExtValue();
11902  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
11903  Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2));
11904 
11905  // Account for endianness by treating this as just a shuffle. So we use the
11906  // same indices for both LE and BE in order to produce expected results in
11907  // both cases.
11908  unsigned ElemIdx0 = (Index & 2) >> 1;
11909  unsigned ElemIdx1 = 2 + (Index & 1);
11910 
11911  Constant *ShuffleElts[2] = {ConstantInt::get(Int32Ty, ElemIdx0),
11912  ConstantInt::get(Int32Ty, ElemIdx1)};
11913  Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
11914 
11915  Value *ShuffleCall =
11916  Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask);
11917  QualType BIRetType = E->getType();
11918  auto RetTy = ConvertType(BIRetType);
11919  return Builder.CreateBitCast(ShuffleCall, RetTy);
11920  }
11921 
11922  case PPC::BI__builtin_vsx_xxsldwi: {
11923  ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
11924  assert(ArgCI && "Third argument must be a compile time constant");
11925  unsigned Index = ArgCI->getZExtValue() & 0x3;
11926  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
11927  Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int32Ty, 4));
11928 
11929  // Create a shuffle mask
11930  unsigned ElemIdx0;
11931  unsigned ElemIdx1;
11932  unsigned ElemIdx2;
11933  unsigned ElemIdx3;
11934  if (getTarget().isLittleEndian()) {
11935  // Little endian element N comes from element 8+N-Index of the
11936  // concatenated wide vector (of course, using modulo arithmetic on
11937  // the total number of elements).
11938  ElemIdx0 = (8 - Index) % 8;
11939  ElemIdx1 = (9 - Index) % 8;
11940  ElemIdx2 = (10 - Index) % 8;
11941  ElemIdx3 = (11 - Index) % 8;
11942  } else {
11943  // Big endian ElemIdx<N> = Index + N
11944  ElemIdx0 = Index;
11945  ElemIdx1 = Index + 1;
11946  ElemIdx2 = Index + 2;
11947  ElemIdx3 = Index + 3;
11948  }
11949 
11950  Constant *ShuffleElts[4] = {ConstantInt::get(Int32Ty, ElemIdx0),
11951  ConstantInt::get(Int32Ty, ElemIdx1),
11952  ConstantInt::get(Int32Ty, ElemIdx2),
11953  ConstantInt::get(Int32Ty, ElemIdx3)};
11954 
11955  Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
11956  Value *ShuffleCall =
11957  Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask);
11958  QualType BIRetType = E->getType();
11959  auto RetTy = ConvertType(BIRetType);
11960  return Builder.CreateBitCast(ShuffleCall, RetTy);
11961  }
11962 
11963  case PPC::BI__builtin_pack_vector_int128: {
11964  bool isLittleEndian = getTarget().isLittleEndian();
11965  Value *UndefValue =
11966  llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), 2));
11967  Value *Res = Builder.CreateInsertElement(
11968  UndefValue, Ops[0], (uint64_t)(isLittleEndian ? 1 : 0));
11969  Res = Builder.CreateInsertElement(Res, Ops[1],
11970  (uint64_t)(isLittleEndian ? 0 : 1));
11971  return Builder.CreateBitCast(Res, ConvertType(E->getType()));
11972  }
11973 
11974  case PPC::BI__builtin_unpack_vector_int128: {
11975  ConstantInt *Index = cast<ConstantInt>(Ops[1]);
11976  Value *Unpacked = Builder.CreateBitCast(
11977  Ops[0], llvm::VectorType::get(ConvertType(E->getType()), 2));
11978 
11979  if (getTarget().isLittleEndian())
11980  Index = ConstantInt::get(Index->getType(), 1 - Index->getZExtValue());
11981 
11982  return Builder.CreateExtractElement(Unpacked, Index);
11983  }
11984  }
11985 }
11986 
11988  const CallExpr *E) {
11989  switch (BuiltinID) {
11990  case AMDGPU::BI__builtin_amdgcn_div_scale:
11991  case AMDGPU::BI__builtin_amdgcn_div_scalef: {
11992  // Translate from the intrinsics's struct return to the builtin's out
11993  // argument.
11994 
11995  Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
11996 
11997  llvm::Value *X = EmitScalarExpr(E->getArg(0));
11998  llvm::Value *Y = EmitScalarExpr(E->getArg(1));
11999  llvm::Value *Z = EmitScalarExpr(E->getArg(2));
12000 
12001  llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
12002  X->getType());
12003 
12004  llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
12005 
12006  llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
12007  llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
12008 
12009  llvm::Type *RealFlagType
12010  = FlagOutPtr.getPointer()->getType()->getPointerElementType();
12011 
12012  llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
12013  Builder.CreateStore(FlagExt, FlagOutPtr);
12014  return Result;
12015  }
12016  case AMDGPU::BI__builtin_amdgcn_div_fmas:
12017  case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
12018  llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
12019  llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
12020  llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
12021  llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
12022 
12023  llvm::Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
12024  Src0->getType());
12025  llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
12026  return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
12027  }
12028 
12029  case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
12030  return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle);
12031  case AMDGPU::BI__builtin_amdgcn_mov_dpp:
12032  case AMDGPU::BI__builtin_amdgcn_update_dpp: {
12034  for (unsigned I = 0; I != E->getNumArgs(); ++I)
12035  Args.push_back(EmitScalarExpr(E->getArg(I)));
12036  assert(Args.size() == 5 || Args.size() == 6);
12037  if (Args.size() == 5)
12038  Args.insert(Args.begin(), llvm::UndefValue::get(Args[0]->getType()));
12039  Value *F =
12040  CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType());
12041  return Builder.CreateCall(F, Args);
12042  }
12043  case AMDGPU::BI__builtin_amdgcn_div_fixup:
12044  case AMDGPU::BI__builtin_amdgcn_div_fixupf:
12045  case AMDGPU::BI__builtin_amdgcn_div_fixuph:
12046  return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup);
12047  case AMDGPU::BI__builtin_amdgcn_trig_preop:
12048  case AMDGPU::BI__builtin_amdgcn_trig_preopf:
12049  return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
12050  case AMDGPU::BI__builtin_amdgcn_rcp:
12051  case AMDGPU::BI__builtin_amdgcn_rcpf:
12052  case AMDGPU::BI__builtin_amdgcn_rcph:
12053  return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp);
12054  case AMDGPU::BI__builtin_amdgcn_rsq:
12055  case AMDGPU::BI__builtin_amdgcn_rsqf:
12056  case AMDGPU::BI__builtin_amdgcn_rsqh:
12057  return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq);
12058  case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
12059  case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
12060  return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp);
12061  case AMDGPU::BI__builtin_amdgcn_sinf:
12062  case AMDGPU::BI__builtin_amdgcn_sinh:
12063  return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin);
12064  case AMDGPU::BI__builtin_amdgcn_cosf:
12065  case AMDGPU::BI__builtin_amdgcn_cosh:
12066  return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos);
12067  case AMDGPU::BI__builtin_amdgcn_log_clampf:
12068  return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp);
12069  case AMDGPU::BI__builtin_amdgcn_ldexp:
12070  case AMDGPU::BI__builtin_amdgcn_ldexpf:
12071  case AMDGPU::BI__builtin_amdgcn_ldexph:
12072  return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp);
12073  case AMDGPU::BI__builtin_amdgcn_frexp_mant:
12074  case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
12075  case AMDGPU::BI__builtin_amdgcn_frexp_manth:
12076  return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant);
12077  case AMDGPU::BI__builtin_amdgcn_frexp_exp:
12078  case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
12079  Value *Src0 = EmitScalarExpr(E->getArg(0));
12080  Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
12081  { Builder.getInt32Ty(), Src0->getType() });
12082  return Builder.CreateCall(F, Src0);
12083  }
12084  case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
12085  Value *Src0 = EmitScalarExpr(E->getArg(0));
12086  Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
12087  { Builder.getInt16Ty(), Src0->getType() });
12088  return Builder.CreateCall(F, Src0);
12089  }
12090  case AMDGPU::BI__builtin_amdgcn_fract:
12091  case AMDGPU::BI__builtin_amdgcn_fractf:
12092  case AMDGPU::BI__builtin_amdgcn_fracth:
12093  return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract);
12094  case AMDGPU::BI__builtin_amdgcn_lerp:
12095  return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp);
12096  case AMDGPU::BI__builtin_amdgcn_uicmp:
12097  case AMDGPU::BI__builtin_amdgcn_uicmpl:
12098  case AMDGPU::BI__builtin_amdgcn_sicmp:
12099  case AMDGPU::BI__builtin_amdgcn_sicmpl:
12100  return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_icmp);
12101  case AMDGPU::BI__builtin_amdgcn_fcmp:
12102  case AMDGPU::BI__builtin_amdgcn_fcmpf:
12103  return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fcmp);
12104  case AMDGPU::BI__builtin_amdgcn_class:
12105  case AMDGPU::BI__builtin_amdgcn_classf:
12106  case AMDGPU::BI__builtin_amdgcn_classh:
12107  return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
12108  case AMDGPU::BI__builtin_amdgcn_fmed3f:
12109  case AMDGPU::BI__builtin_amdgcn_fmed3h:
12110  return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fmed3);
12111  case AMDGPU::BI__builtin_amdgcn_read_exec: {
12112  CallInst *CI = cast<CallInst>(
12113  EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec"));
12114  CI->setConvergent();
12115  return CI;
12116  }
12117  case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
12118  case AMDGPU::BI__builtin_amdgcn_read_exec_hi: {
12119  StringRef RegName = BuiltinID == AMDGPU::BI__builtin_amdgcn_read_exec_lo ?
12120  "exec_lo" : "exec_hi";
12121  CallInst *CI = cast<CallInst>(
12122  EmitSpecialRegisterBuiltin(*this, E, Int32Ty, Int32Ty, true, RegName));
12123  CI->setConvergent();
12124  return CI;
12125  }
12126  // amdgcn workitem
12127  case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
12128  return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024);
12129  case AMDGPU::BI__builtin_amdgcn_workitem_id_y:
12130  return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024);
12131  case AMDGPU::BI__builtin_amdgcn_workitem_id_z:
12132  return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024);
12133 
12134  // r600 intrinsics
12135  case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
12136  case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
12137  return emitUnaryBuiltin(*this, E, Intrinsic::r600_recipsqrt_ieee);
12138  case AMDGPU::BI__builtin_r600_read_tidig_x:
12139  return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024);
12140  case AMDGPU::BI__builtin_r600_read_tidig_y:
12141  return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024);
12142  case AMDGPU::BI__builtin_r600_read_tidig_z:
12143  return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024);
12144  default:
12145  return nullptr;
12146  }
12147 }
12148 
12149 /// Handle a SystemZ function in which the final argument is a pointer
12150 /// to an int that receives the post-instruction CC value. At the LLVM level
12151 /// this is represented as a function that returns a {result, cc} pair.
12153  unsigned IntrinsicID,
12154  const CallExpr *E) {
12155  unsigned NumArgs = E->getNumArgs() - 1;
12156  SmallVector<Value *, 8> Args(NumArgs);
12157  for (unsigned I = 0; I < NumArgs; ++I)
12158  Args[I] = CGF.EmitScalarExpr(E->getArg(I));
12159  Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs));
12160  Value *F = CGF.CGM.getIntrinsic(IntrinsicID);
12161  Value *Call = CGF.Builder.CreateCall(F, Args);
12162  Value *CC = CGF.Builder.CreateExtractValue(Call, 1);
12163  CGF.Builder.CreateStore(CC, CCPtr);
12164  return CGF.Builder.CreateExtractValue(Call, 0);
12165 }
12166 
12168  const CallExpr *E) {
12169  switch (BuiltinID) {
12170  case SystemZ::BI__builtin_tbegin: {
12171  Value *TDB = EmitScalarExpr(E->getArg(0));
12172  Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
12173  Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
12174  return Builder.CreateCall(F, {TDB, Control});
12175  }
12176  case SystemZ::BI__builtin_tbegin_nofloat: {
12177  Value *TDB = EmitScalarExpr(E->getArg(0));
12178  Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
12179  Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
12180  return Builder.CreateCall(F, {TDB, Control});
12181  }
12182  case SystemZ::BI__builtin_tbeginc: {
12183  Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);
12184  Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);
12185  Value *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
12186  return Builder.CreateCall(F, {TDB, Control});
12187  }
12188  case SystemZ::BI__builtin_tabort: {
12189  Value *Data = EmitScalarExpr(E->getArg(0));
12190  Value *F = CGM.getIntrinsic(Intrinsic::s390_tabort);
12191  return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort"));
12192  }
12193  case SystemZ::BI__builtin_non_tx_store: {
12194  Value *Address = EmitScalarExpr(E->getArg(0));
12195  Value *Data = EmitScalarExpr(E->getArg(1));
12196  Value *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
12197  return Builder.CreateCall(F, {Data, Address});
12198  }
12199 
12200  // Vector builtins. Note that most vector builtins are mapped automatically
12201  // to target-specific LLVM intrinsics. The ones handled specially here can
12202  // be represented via standard LLVM IR, which is preferable to enable common
12203  // LLVM optimizations.
12204 
12205  case SystemZ::BI__builtin_s390_vpopctb:
12206  case SystemZ::BI__builtin_s390_vpopcth:
12207  case SystemZ::BI__builtin_s390_vpopctf:
12208  case SystemZ::BI__builtin_s390_vpopctg: {
12209  llvm::Type *ResultType = ConvertType(E->getType());
12210  Value *X = EmitScalarExpr(E->getArg(0));
12211  Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
12212  return Builder.CreateCall(F, X);
12213  }
12214 
12215  case SystemZ::BI__builtin_s390_vclzb:
12216  case SystemZ::BI__builtin_s390_vclzh:
12217  case SystemZ::BI__builtin_s390_vclzf:
12218  case SystemZ::BI__builtin_s390_vclzg: {
12219  llvm::Type *ResultType = ConvertType(E->getType());
12220  Value *X = EmitScalarExpr(E->getArg(0));
12221  Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
12222  Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
12223  return Builder.CreateCall(F, {X, Undef});
12224  }
12225 
12226  case SystemZ::BI__builtin_s390_vctzb:
12227  case SystemZ::BI__builtin_s390_vctzh:
12228  case SystemZ::BI__builtin_s390_vctzf:
12229  case SystemZ::BI__builtin_s390_vctzg: {
12230  llvm::Type *ResultType = ConvertType(E->getType());
12231  Value *X = EmitScalarExpr(E->getArg(0));
12232  Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
12233  Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
12234  return Builder.CreateCall(F, {X, Undef});
12235  }
12236 
12237  case SystemZ::BI__builtin_s390_vfsqsb:
12238  case SystemZ::BI__builtin_s390_vfsqdb: {
12239  llvm::Type *ResultType = ConvertType(E->getType());
12240  Value *X = EmitScalarExpr(E->getArg(0));
12241  Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
12242  return Builder.CreateCall(F, X);
12243  }
12244  case SystemZ::BI__builtin_s390_vfmasb:
12245  case SystemZ::BI__builtin_s390_vfmadb: {
12246  llvm::Type *ResultType = ConvertType(E->getType());
12247  Value *X = EmitScalarExpr(E->getArg(0));
12248  Value *Y = EmitScalarExpr(E->getArg(1));
12249  Value *Z = EmitScalarExpr(E->getArg(2));
12250  Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
12251  return Builder.CreateCall(F, {X, Y, Z});
12252  }
12253  case SystemZ::BI__builtin_s390_vfmssb:
12254  case SystemZ::BI__builtin_s390_vfmsdb: {
12255  llvm::Type *ResultType = ConvertType(E->getType());
12256  Value *X = EmitScalarExpr(E->getArg(0));
12257  Value *Y = EmitScalarExpr(E->getArg(1));
12258  Value *Z = EmitScalarExpr(E->getArg(2));
12259  Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
12260  Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
12261  return Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
12262  }
12263  case SystemZ::BI__builtin_s390_vfnmasb:
12264  case SystemZ::BI__builtin_s390_vfnmadb: {
12265  llvm::Type *ResultType = ConvertType(E->getType());
12266  Value *X = EmitScalarExpr(E->getArg(0));
12267  Value *Y = EmitScalarExpr(E->getArg(1));
12268  Value *Z = EmitScalarExpr(E->getArg(2));
12269  Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
12270  Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
12271  return Builder.CreateFSub(Zero, Builder.CreateCall(F, {X, Y, Z}), "sub");
12272  }
12273  case SystemZ::BI__builtin_s390_vfnmssb:
12274  case SystemZ::BI__builtin_s390_vfnmsdb: {
12275  llvm::Type *ResultType = ConvertType(E->getType());
12276  Value *X = EmitScalarExpr(E->getArg(0));
12277  Value *Y = EmitScalarExpr(E->getArg(1));
12278  Value *Z = EmitScalarExpr(E->getArg(2));
12279  Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
12280  Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
12281  Value *NegZ = Builder.CreateFSub(Zero, Z, "sub");
12282  return Builder.CreateFSub(Zero, Builder.CreateCall(F, {X, Y, NegZ}));
12283  }
12284  case SystemZ::BI__builtin_s390_vflpsb:
12285  case SystemZ::BI__builtin_s390_vflpdb: {
12286  llvm::Type *ResultType = ConvertType(E->getType());
12287  Value *X = EmitScalarExpr(E->getArg(0));
12288  Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
12289  return Builder.CreateCall(F, X);
12290  }
12291  case SystemZ::BI__builtin_s390_vflnsb:
12292  case SystemZ::BI__builtin_s390_vflndb: {
12293  llvm::Type *ResultType = ConvertType(E->getType());
12294  Value *X = EmitScalarExpr(E->getArg(0));
12295  Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
12296  Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
12297  return Builder.CreateFSub(Zero, Builder.CreateCall(F, X), "sub");
12298  }
12299  case SystemZ::BI__builtin_s390_vfisb:
12300  case SystemZ::BI__builtin_s390_vfidb: {
12301  llvm::Type *ResultType = ConvertType(E->getType());
12302  Value *X = EmitScalarExpr(E->getArg(0));
12303  // Constant-fold the M4 and M5 mask arguments.
12304  llvm::APSInt M4, M5;
12305  bool IsConstM4 = E->getArg(1)->isIntegerConstantExpr(M4, getContext());
12306  bool IsConstM5 = E->getArg(2)->isIntegerConstantExpr(M5, getContext());
12307  assert(IsConstM4 && IsConstM5 && "Constant arg isn't actually constant?");
12308  (void)IsConstM4; (void)IsConstM5;
12309  // Check whether this instance can be represented via a LLVM standard
12310  // intrinsic. We only support some combinations of M4 and M5.
12311  Intrinsic::ID ID = Intrinsic::not_intrinsic;
12312  switch (M4.getZExtValue()) {
12313  default: break;
12314  case 0: // IEEE-inexact exception allowed
12315  switch (M5.getZExtValue()) {
12316  default: break;
12317  case 0: ID = Intrinsic::rint; break;
12318  }
12319  break;
12320  case 4: // IEEE-inexact exception suppressed
12321  switch (M5.getZExtValue()) {
12322  default: break;
12323  case 0: ID = Intrinsic::nearbyint; break;
12324  case 1: ID = Intrinsic::round; break;
12325  case 5: ID = Intrinsic::trunc; break;
12326  case 6: ID = Intrinsic::ceil; break;
12327  case 7: ID = Intrinsic::floor; break;
12328  }
12329  break;
12330  }
12331  if (ID != Intrinsic::not_intrinsic) {
12332  Function *F = CGM.getIntrinsic(ID, ResultType);
12333  return Builder.CreateCall(F, X);
12334  }
12335  switch (BuiltinID) {
12336  case SystemZ::BI__builtin_s390_vfisb: ID = Intrinsic::s390_vfisb; break;
12337  case SystemZ::BI__builtin_s390_vfidb: ID = Intrinsic::s390_vfidb; break;
12338  default: llvm_unreachable("Unknown BuiltinID");
12339  }
12340  Function *F = CGM.getIntrinsic(ID);
12341  Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
12342  Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
12343  return Builder.CreateCall(F, {X, M4Value, M5Value});
12344  }
12345  case SystemZ::BI__builtin_s390_vfmaxsb:
12346  case SystemZ::BI__builtin_s390_vfmaxdb: {
12347  llvm::Type *ResultType = ConvertType(E->getType());
12348  Value *X = EmitScalarExpr(E->getArg(0));
12349  Value *Y = EmitScalarExpr(E->getArg(1));
12350  // Constant-fold the M4 mask argument.
12351  llvm::APSInt M4;
12352  bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext());
12353  assert(IsConstM4 && "Constant arg isn't actually constant?");
12354  (void)IsConstM4;
12355  // Check whether this instance can be represented via a LLVM standard
12356  // intrinsic. We only support some values of M4.
12357  Intrinsic::ID ID = Intrinsic::not_intrinsic;
12358  switch (M4.getZExtValue()) {
12359  default: break;
12360  case 4: ID = Intrinsic::maxnum; break;
12361  }
12362  if (ID != Intrinsic::not_intrinsic) {
12363  Function *F = CGM.getIntrinsic(ID, ResultType);
12364  return Builder.CreateCall(F, {X, Y});
12365  }
12366  switch (BuiltinID) {
12367  case SystemZ::BI__builtin_s390_vfmaxsb: ID = Intrinsic::s390_vfmaxsb; break;
12368  case SystemZ::BI__builtin_s390_vfmaxdb: ID = Intrinsic::s390_vfmaxdb; break;
12369  default: llvm_unreachable("Unknown BuiltinID");
12370  }
12371  Function *F = CGM.getIntrinsic(ID);
12372  Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
12373  return Builder.CreateCall(F, {X, Y, M4Value});
12374  }
12375  case SystemZ::BI__builtin_s390_vfminsb:
12376  case SystemZ::BI__builtin_s390_vfmindb: {
12377  llvm::Type *ResultType = ConvertType(E->getType());
12378  Value *X = EmitScalarExpr(E->getArg(0));
12379  Value *Y = EmitScalarExpr(E->getArg(1));
12380  // Constant-fold the M4 mask argument.
12381  llvm::APSInt M4;
12382  bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext());
12383  assert(IsConstM4 && "Constant arg isn't actually constant?");
12384  (void)IsConstM4;
12385  // Check whether this instance can be represented via a LLVM standard
12386  // intrinsic. We only support some values of M4.
12387  Intrinsic::ID ID = Intrinsic::not_intrinsic;
12388  switch (M4.getZExtValue()) {
12389  default: break;
12390  case 4: ID = Intrinsic::minnum; break;
12391  }
12392  if (ID != Intrinsic::not_intrinsic) {
12393  Function *F = CGM.getIntrinsic(ID, ResultType);
12394  return Builder.CreateCall(F, {X, Y});
12395  }
12396  switch (BuiltinID) {
12397  case SystemZ::BI__builtin_s390_vfminsb: ID = Intrinsic::s390_vfminsb; break;
12398  case SystemZ::BI__builtin_s390_vfmindb: ID = Intrinsic::s390_vfmindb; break;
12399  default: llvm_unreachable("Unknown BuiltinID");
12400  }
12401  Function *F = CGM.getIntrinsic(ID);
12402  Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
12403  return Builder.CreateCall(F, {X, Y, M4Value});
12404  }
12405 
12406  // Vector intrinsics that output the post-instruction CC value.
12407 
12408 #define INTRINSIC_WITH_CC(NAME) \
12409  case SystemZ::BI__builtin_##NAME: \
12410  return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
12411 
12412  INTRINSIC_WITH_CC(s390_vpkshs);
12413  INTRINSIC_WITH_CC(s390_vpksfs);
12414  INTRINSIC_WITH_CC(s390_vpksgs);
12415 
12416  INTRINSIC_WITH_CC(s390_vpklshs);
12417  INTRINSIC_WITH_CC(s390_vpklsfs);
12418  INTRINSIC_WITH_CC(s390_vpklsgs);
12419 
12420  INTRINSIC_WITH_CC(s390_vceqbs);
12421  INTRINSIC_WITH_CC(s390_vceqhs);
12422  INTRINSIC_WITH_CC(s390_vceqfs);
12423  INTRINSIC_WITH_CC(s390_vceqgs);
12424 
12425  INTRINSIC_WITH_CC(s390_vchbs);
12426  INTRINSIC_WITH_CC(s390_vchhs);
12427  INTRINSIC_WITH_CC(s390_vchfs);
12428  INTRINSIC_WITH_CC(s390_vchgs);
12429 
12430  INTRINSIC_WITH_CC(s390_vchlbs);
12431  INTRINSIC_WITH_CC(s390_vchlhs);
12432  INTRINSIC_WITH_CC(s390_vchlfs);
12433  INTRINSIC_WITH_CC(s390_vchlgs);
12434 
12435  INTRINSIC_WITH_CC(s390_vfaebs);
12436  INTRINSIC_WITH_CC(s390_vfaehs);
12437  INTRINSIC_WITH_CC(s390_vfaefs);
12438 
12439  INTRINSIC_WITH_CC(s390_vfaezbs);
12440  INTRINSIC_WITH_CC(s390_vfaezhs);
12441  INTRINSIC_WITH_CC(s390_vfaezfs);
12442 
12443  INTRINSIC_WITH_CC(s390_vfeebs);
12444  INTRINSIC_WITH_CC(s390_vfeehs);
12445  INTRINSIC_WITH_CC(s390_vfeefs);
12446 
12447  INTRINSIC_WITH_CC(s390_vfeezbs);
12448  INTRINSIC_WITH_CC(s390_vfeezhs);
12449  INTRINSIC_WITH_CC(s390_vfeezfs);
12450 
12451  INTRINSIC_WITH_CC(s390_vfenebs);
12452  INTRINSIC_WITH_CC(s390_vfenehs);
12453  INTRINSIC_WITH_CC(s390_vfenefs);
12454 
12455  INTRINSIC_WITH_CC(s390_vfenezbs);
12456  INTRINSIC_WITH_CC(s390_vfenezhs);
12457  INTRINSIC_WITH_CC(s390_vfenezfs);
12458 
12459  INTRINSIC_WITH_CC(s390_vistrbs);
12460  INTRINSIC_WITH_CC(s390_vistrhs);
12461  INTRINSIC_WITH_CC(s390_vistrfs);
12462 
12463  INTRINSIC_WITH_CC(s390_vstrcbs);
12464  INTRINSIC_WITH_CC(s390_vstrchs);
12465  INTRINSIC_WITH_CC(s390_vstrcfs);
12466 
12467  INTRINSIC_WITH_CC(s390_vstrczbs);
12468  INTRINSIC_WITH_CC(s390_vstrczhs);
12469  INTRINSIC_WITH_CC(s390_vstrczfs);
12470 
12471  INTRINSIC_WITH_CC(s390_vfcesbs);
12472  INTRINSIC_WITH_CC(s390_vfcedbs);
12473  INTRINSIC_WITH_CC(s390_vfchsbs);
12474  INTRINSIC_WITH_CC(s390_vfchdbs);
12475  INTRINSIC_WITH_CC(s390_vfchesbs);
12476  INTRINSIC_WITH_CC(s390_vfchedbs);
12477 
12478  INTRINSIC_WITH_CC(s390_vftcisb);
12479  INTRINSIC_WITH_CC(s390_vftcidb);
12480 
12481 #undef INTRINSIC_WITH_CC
12482 
12483  default:
12484  return nullptr;
12485  }
12486 }
12487 
12489  const CallExpr *E) {
12490  auto MakeLdg = [&](unsigned IntrinsicID) {
12491  Value *Ptr = EmitScalarExpr(E->getArg(0));
12492  clang::CharUnits Align =
12493  getNaturalPointeeTypeAlignment(E->getArg(0)->getType());
12494  return Builder.CreateCall(
12495  CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
12496  Ptr->getType()}),
12497  {Ptr, ConstantInt::get(Builder.getInt32Ty(), Align.getQuantity())});
12498  };
12499  auto MakeScopedAtomic = [&](unsigned IntrinsicID) {
12500  Value *Ptr = EmitScalarExpr(E->getArg(0));
12501  return Builder.CreateCall(
12502  CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
12503  Ptr->getType()}),
12504  {Ptr, EmitScalarExpr(E->getArg(1))});
12505  };
12506  switch (BuiltinID) {
12507  case NVPTX::BI__nvvm_atom_add_gen_i:
12508  case NVPTX::BI__nvvm_atom_add_gen_l:
12509  case NVPTX::BI__nvvm_atom_add_gen_ll:
12510  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);
12511 
12512  case NVPTX::BI__nvvm_atom_sub_gen_i:
12513  case NVPTX::BI__nvvm_atom_sub_gen_l:
12514  case NVPTX::BI__nvvm_atom_sub_gen_ll:
12515  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);
12516 
12517  case NVPTX::BI__nvvm_atom_and_gen_i:
12518  case NVPTX::BI__nvvm_atom_and_gen_l:
12519  case NVPTX::BI__nvvm_atom_and_gen_ll:
12521 
12522  case NVPTX::BI__nvvm_atom_or_gen_i:
12523  case NVPTX::BI__nvvm_atom_or_gen_l:
12524  case NVPTX::BI__nvvm_atom_or_gen_ll:
12525  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
12526 
12527  case NVPTX::BI__nvvm_atom_xor_gen_i:
12528  case NVPTX::BI__nvvm_atom_xor_gen_l:
12529  case NVPTX::BI__nvvm_atom_xor_gen_ll:
12530  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
12531 
12532  case NVPTX::BI__nvvm_atom_xchg_gen_i:
12533  case NVPTX::BI__nvvm_atom_xchg_gen_l:
12534  case NVPTX::BI__nvvm_atom_xchg_gen_ll:
12535  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
12536 
12537  case NVPTX::BI__nvvm_atom_max_gen_i:
12538  case NVPTX::BI__nvvm_atom_max_gen_l:
12539  case NVPTX::BI__nvvm_atom_max_gen_ll:
12540  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
12541 
12542  case NVPTX::BI__nvvm_atom_max_gen_ui:
12543  case NVPTX::BI__nvvm_atom_max_gen_ul:
12544  case NVPTX::BI__nvvm_atom_max_gen_ull:
12545  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);
12546 
12547  case NVPTX::BI__nvvm_atom_min_gen_i:
12548  case NVPTX::BI__nvvm_atom_min_gen_l:
12549  case NVPTX::BI__nvvm_atom_min_gen_ll:
12550  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
12551 
12552  case NVPTX::BI__nvvm_atom_min_gen_ui:
12553  case NVPTX::BI__nvvm_atom_min_gen_ul:
12554  case NVPTX::BI__nvvm_atom_min_gen_ull:
12555  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
12556 
12557  case NVPTX::BI__nvvm_atom_cas_gen_i:
12558  case NVPTX::BI__nvvm_atom_cas_gen_l:
12559  case NVPTX::BI__nvvm_atom_cas_gen_ll:
12560  // __nvvm_atom_cas_gen_* should return the old value rather than the
12561  // success flag.
12562  return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false);
12563 
12564  case NVPTX::BI__nvvm_atom_add_gen_f: {
12565  Value *Ptr = EmitScalarExpr(E->getArg(0));
12566  Value *Val = EmitScalarExpr(E->getArg(1));
12567  // atomicrmw only deals with integer arguments so we need to use
12568  // LLVM's nvvm_atomic_load_add_f32 intrinsic for that.
12569  Value *FnALAF32 =
12570  CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f32, Ptr->getType());
12571  return Builder.CreateCall(FnALAF32, {Ptr, Val});
12572  }
12573 
12574  case NVPTX::BI__nvvm_atom_add_gen_d: {
12575  Value *Ptr = EmitScalarExpr(E->getArg(0));
12576  Value *Val = EmitScalarExpr(E->getArg(1));
12577  // atomicrmw only deals with integer arguments, so we need to use
12578  // LLVM's nvvm_atomic_load_add_f64 intrinsic.
12579  Value *FnALAF64 =
12580  CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f64, Ptr->getType());
12581  return Builder.CreateCall(FnALAF64, {Ptr, Val});
12582  }
12583 
12584  case NVPTX::BI__nvvm_atom_inc_gen_ui: {
12585  Value *Ptr = EmitScalarExpr(E->getArg(0));
12586  Value *Val = EmitScalarExpr(E->getArg(1));
12587  Value *FnALI32 =
12588  CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType());
12589  return Builder.CreateCall(FnALI32, {Ptr, Val});
12590  }
12591 
12592  case NVPTX::BI__nvvm_atom_dec_gen_ui: {
12593  Value *Ptr = EmitScalarExpr(E->getArg(0));
12594  Value *Val = EmitScalarExpr(E->getArg(1));
12595  Value *FnALD32 =
12596  CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType());
12597  return Builder.CreateCall(FnALD32, {Ptr, Val});
12598  }
12599 
12600  case NVPTX::BI__nvvm_ldg_c:
12601  case NVPTX::BI__nvvm_ldg_c2:
12602  case NVPTX::BI__nvvm_ldg_c4:
12603  case NVPTX::BI__nvvm_ldg_s:
12604  case NVPTX::BI__nvvm_ldg_s2:
12605  case NVPTX::BI__nvvm_ldg_s4:
12606  case NVPTX::BI__nvvm_ldg_i:
12607  case NVPTX::BI__nvvm_ldg_i2:
12608  case NVPTX::BI__nvvm_ldg_i4:
12609  case NVPTX::BI__nvvm_ldg_l:
12610  case NVPTX::BI__nvvm_ldg_ll:
12611  case NVPTX::BI__nvvm_ldg_ll2:
12612  case NVPTX::BI__nvvm_ldg_uc:
12613  case NVPTX::BI__nvvm_ldg_uc2:
12614  case NVPTX::BI__nvvm_ldg_uc4:
12615  case NVPTX::BI__nvvm_ldg_us:
12616  case NVPTX::BI__nvvm_ldg_us2:
12617  case NVPTX::BI__nvvm_ldg_us4:
12618  case NVPTX::BI__nvvm_ldg_ui:
12619  case NVPTX::BI__nvvm_ldg_ui2:
12620  case NVPTX::BI__nvvm_ldg_ui4:
12621  case NVPTX::BI__nvvm_ldg_ul:
12622  case NVPTX::BI__nvvm_ldg_ull:
12623  case NVPTX::BI__nvvm_ldg_ull2:
12624  // PTX Interoperability section 2.2: "For a vector with an even number of
12625  // elements, its alignment is set to number of elements times the alignment
12626  // of its member: n*alignof(t)."
12627  return MakeLdg(Intrinsic::nvvm_ldg_global_i);
12628  case NVPTX::BI__nvvm_ldg_f:
12629  case NVPTX::BI__nvvm_ldg_f2:
12630  case NVPTX::BI__nvvm_ldg_f4:
12631  case NVPTX::BI__nvvm_ldg_d:
12632  case NVPTX::BI__nvvm_ldg_d2:
12633  return MakeLdg(Intrinsic::nvvm_ldg_global_f);
12634 
12635  case NVPTX::BI__nvvm_atom_cta_add_gen_i:
12636  case NVPTX::BI__nvvm_atom_cta_add_gen_l:
12637  case NVPTX::BI__nvvm_atom_cta_add_gen_ll:
12638  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta);
12639  case NVPTX::BI__nvvm_atom_sys_add_gen_i:
12640  case NVPTX::BI__nvvm_atom_sys_add_gen_l:
12641  case NVPTX::BI__nvvm_atom_sys_add_gen_ll:
12642  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys);
12643  case NVPTX::BI__nvvm_atom_cta_add_gen_f:
12644  case NVPTX::BI__nvvm_atom_cta_add_gen_d:
12645  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta);
12646  case NVPTX::BI__nvvm_atom_sys_add_gen_f:
12647  case NVPTX::BI__nvvm_atom_sys_add_gen_d:
12648  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys);
12649  case NVPTX::BI__nvvm_atom_cta_xchg_gen_i:
12650  case NVPTX::BI__nvvm_atom_cta_xchg_gen_l:
12651  case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll:
12652  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta);
12653  case NVPTX::BI__nvvm_atom_sys_xchg_gen_i:
12654  case NVPTX::BI__nvvm_atom_sys_xchg_gen_l:
12655  case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll:
12656  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys);
12657  case NVPTX::BI__nvvm_atom_cta_max_gen_i:
12658  case NVPTX::BI__nvvm_atom_cta_max_gen_ui:
12659  case NVPTX::BI__nvvm_atom_cta_max_gen_l:
12660  case NVPTX::BI__nvvm_atom_cta_max_gen_ul:
12661  case NVPTX::BI__nvvm_atom_cta_max_gen_ll:
12662  case NVPTX::BI__nvvm_atom_cta_max_gen_ull:
12663  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta);
12664  case NVPTX::BI__nvvm_atom_sys_max_gen_i:
12665  case NVPTX::BI__nvvm_atom_sys_max_gen_ui:
12666  case NVPTX::BI__nvvm_atom_sys_max_gen_l:
12667  case NVPTX::BI__nvvm_atom_sys_max_gen_ul:
12668  case NVPTX::BI__nvvm_atom_sys_max_gen_ll:
12669  case NVPTX::BI__nvvm_atom_sys_max_gen_ull:
12670  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys);
12671  case NVPTX::BI__nvvm_atom_cta_min_gen_i:
12672  case NVPTX::BI__nvvm_atom_cta_min_gen_ui:
12673  case NVPTX::BI__nvvm_atom_cta_min_gen_l:
12674  case NVPTX::BI__nvvm_atom_cta_min_gen_ul:
12675  case NVPTX::BI__nvvm_atom_cta_min_gen_ll:
12676  case NVPTX::BI__nvvm_atom_cta_min_gen_ull:
12677  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta);
12678  case NVPTX::BI__nvvm_atom_sys_min_gen_i:
12679  case NVPTX::BI__nvvm_atom_sys_min_gen_ui:
12680  case NVPTX::BI__nvvm_atom_sys_min_gen_l:
12681  case NVPTX::BI__nvvm_atom_sys_min_gen_ul:
12682  case NVPTX::BI__nvvm_atom_sys_min_gen_ll:
12683  case NVPTX::BI__nvvm_atom_sys_min_gen_ull:
12684  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys);
12685  case NVPTX::BI__nvvm_atom_cta_inc_gen_ui:
12686  return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta);
12687  case NVPTX::BI__nvvm_atom_cta_dec_gen_ui:
12688  return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta);
12689  case NVPTX::BI__nvvm_atom_sys_inc_gen_ui:
12690  return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys);
12691  case NVPTX::BI__nvvm_atom_sys_dec_gen_ui:
12692  return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys);
12693  case NVPTX::BI__nvvm_atom_cta_and_gen_i:
12694  case NVPTX::BI__nvvm_atom_cta_and_gen_l:
12695  case NVPTX::BI__nvvm_atom_cta_and_gen_ll:
12696  return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta);
12697  case NVPTX::BI__nvvm_atom_sys_and_gen_i:
12698  case NVPTX::BI__nvvm_atom_sys_and_gen_l:
12699  case NVPTX::BI__nvvm_atom_sys_and_gen_ll:
12700  return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys);
12701  case NVPTX::BI__nvvm_atom_cta_or_gen_i:
12702  case NVPTX::BI__nvvm_atom_cta_or_gen_l:
12703  case NVPTX::BI__nvvm_atom_cta_or_gen_ll:
12704  return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta);
12705  case NVPTX::BI__nvvm_atom_sys_or_gen_i:
12706  case NVPTX::BI__nvvm_atom_sys_or_gen_l:
12707  case NVPTX::BI__nvvm_atom_sys_or_gen_ll:
12708  return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys);
12709  case NVPTX::BI__nvvm_atom_cta_xor_gen_i:
12710  case NVPTX::BI__nvvm_atom_cta_xor_gen_l:
12711  case NVPTX::BI__nvvm_atom_cta_xor_gen_ll:
12712  return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta);
12713  case NVPTX::BI__nvvm_atom_sys_xor_gen_i:
12714  case NVPTX::BI__nvvm_atom_sys_xor_gen_l:
12715  case NVPTX::BI__nvvm_atom_sys_xor_gen_ll:
12716  return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys);
12717  case NVPTX::BI__nvvm_atom_cta_cas_gen_i:
12718  case NVPTX::BI__nvvm_atom_cta_cas_gen_l:
12719  case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: {
12720  Value *Ptr = EmitScalarExpr(E->getArg(0));
12721  return Builder.CreateCall(
12722  CGM.getIntrinsic(
12723  Intrinsic::nvvm_atomic_cas_gen_i_cta,
12724  {Ptr->getType()->getPointerElementType(), Ptr->getType()}),
12725  {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
12726  }
12727  case NVPTX::BI__nvvm_atom_sys_cas_gen_i:
12728  case NVPTX::BI__nvvm_atom_sys_cas_gen_l:
12729  case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: {
12730  Value *Ptr = EmitScalarExpr(E->getArg(0));
12731  return Builder.CreateCall(
12732  CGM.getIntrinsic(
12733  Intrinsic::nvvm_atomic_cas_gen_i_sys,
12734  {Ptr->getType()->getPointerElementType(), Ptr->getType()}),
12735  {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
12736  }
12737  case NVPTX::BI__nvvm_match_all_sync_i32p:
12738  case NVPTX::BI__nvvm_match_all_sync_i64p: {
12739  Value *Mask = EmitScalarExpr(E->getArg(0));
12740  Value *Val = EmitScalarExpr(E->getArg(1));
12741  Address PredOutPtr = EmitPointerWithAlignment(E->getArg(2));
12742  Value *ResultPair = Builder.CreateCall(
12743  CGM.getIntrinsic(BuiltinID == NVPTX::BI__nvvm_match_all_sync_i32p
12744  ? Intrinsic::nvvm_match_all_sync_i32p
12745  : Intrinsic::nvvm_match_all_sync_i64p),
12746  {Mask, Val});
12747  Value *Pred = Builder.CreateZExt(Builder.CreateExtractValue(ResultPair, 1),
12748  PredOutPtr.getElementType());
12749  Builder.CreateStore(Pred, PredOutPtr);
12750  return Builder.CreateExtractValue(ResultPair, 0);
12751  }
12752  case NVPTX::BI__hmma_m16n16k16_ld_a:
12753  case NVPTX::BI__hmma_m16n16k16_ld_b:
12754  case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
12755  case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
12756  case NVPTX::BI__hmma_m32n8k16_ld_a:
12757  case NVPTX::BI__hmma_m32n8k16_ld_b:
12758  case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
12759  case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
12760  case NVPTX::BI__hmma_m8n32k16_ld_a:
12761  case NVPTX::BI__hmma_m8n32k16_ld_b:
12762  case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
12763  case NVPTX::BI__hmma_m8n32k16_ld_c_f32: {
12764  Address Dst = EmitPointerWithAlignment(E->getArg(0));
12765  Value *Src = EmitScalarExpr(E->getArg(1));
12766  Value *Ldm = EmitScalarExpr(E->getArg(2));
12767  llvm::APSInt isColMajorArg;
12768  if (!E->getArg(3)->isIntegerConstantExpr(isColMajorArg, getContext()))
12769  return nullptr;
12770  bool isColMajor = isColMajorArg.getSExtValue();
12771  unsigned IID;
12772  unsigned NumResults;
12773  switch (BuiltinID) {
12774  case NVPTX::BI__hmma_m16n16k16_ld_a:
12775  IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_col_stride
12776  : Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_row_stride;
12777  NumResults = 8;
12778  break;
12779  case NVPTX::BI__hmma_m16n16k16_ld_b:
12780  IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_col_stride
12781  : Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_row_stride;
12782  NumResults = 8;
12783  break;
12784  case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
12785  IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_col_stride
12786  : Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_row_stride;
12787  NumResults = 4;
12788  break;
12789  case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
12790  IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_col_stride
12791  : Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_row_stride;
12792  NumResults = 8;
12793  break;
12794  case NVPTX::BI__hmma_m32n8k16_ld_a:
12795  IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_col_stride
12796  : Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_row_stride;
12797  NumResults = 8;
12798  break;
12799  case NVPTX::BI__hmma_m32n8k16_ld_b:
12800  IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_col_stride
12801  : Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_row_stride;
12802  NumResults = 8;
12803  break;
12804  case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
12805  IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_col_stride
12806  : Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_row_stride;
12807  NumResults = 4;
12808  break;
12809  case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
12810  IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_col_stride
12811  : Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_row_stride;
12812  NumResults = 8;
12813  break;
12814  case NVPTX::BI__hmma_m8n32k16_ld_a:
12815  IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_col_stride
12816  : Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_row_stride;
12817  NumResults = 8;
12818  break;
12819  case NVPTX::BI__hmma_m8n32k16_ld_b:
12820  IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_col_stride
12821  : Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_row_stride;
12822  NumResults = 8;
12823  break;
12824  case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
12825  IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_col_stride
12826  : Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_row_stride;
12827  NumResults = 4;
12828  break;
12829  case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
12830  IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_col_stride
12831  : Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_row_stride;
12832  NumResults = 8;
12833  break;
12834  default:
12835  llvm_unreachable("Unexpected builtin ID.");
12836  }
12837  Value *Result =
12838  Builder.CreateCall(CGM.getIntrinsic(IID, Src->getType()), {Src, Ldm});
12839 
12840  // Save returned values.
12841  for (unsigned i = 0; i < NumResults; ++i) {
12842  Builder.CreateAlignedStore(
12843  Builder.CreateBitCast(Builder.CreateExtractValue(Result, i),
12844  Dst.getElementType()),
12845  Builder.CreateGEP(Dst.getPointer(), llvm::ConstantInt::get(IntTy, i)),
12847  }
12848  return Result;
12849  }
12850 
12851  case NVPTX::BI__hmma_m16n16k16_st_c_f16:
12852  case NVPTX::BI__hmma_m16n16k16_st_c_f32:
12853  case NVPTX::BI__hmma_m32n8k16_st_c_f16:
12854  case NVPTX::BI__hmma_m32n8k16_st_c_f32:
12855  case NVPTX::BI__hmma_m8n32k16_st_c_f16:
12856  case NVPTX::BI__hmma_m8n32k16_st_c_f32: {
12857  Value *Dst = EmitScalarExpr(E->getArg(0));
12858  Address Src = EmitPointerWithAlignment(E->getArg(1));
12859  Value *Ldm = EmitScalarExpr(E->getArg(2));
12860  llvm::APSInt isColMajorArg;
12861  if (!E->getArg(3)->isIntegerConstantExpr(isColMajorArg, getContext()))
12862  return nullptr;
12863  bool isColMajor = isColMajorArg.getSExtValue();
12864  unsigned IID;
12865  unsigned NumResults = 8;
12866  // PTX Instructions (and LLVM intrinsics) are defined for slice _d_, yet
12867  // for some reason nvcc builtins use _c_.
12868  switch (BuiltinID) {
12869  case NVPTX::BI__hmma_m16n16k16_st_c_f16:
12870  IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_col_stride
12871  : Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_row_stride;
12872  NumResults = 4;
12873  break;
12874  case NVPTX::BI__hmma_m16n16k16_st_c_f32:
12875  IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_col_stride
12876  : Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_row_stride;
12877  break;
12878  case NVPTX::BI__hmma_m32n8k16_st_c_f16:
12879  IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_col_stride
12880  : Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_row_stride;
12881  NumResults = 4;
12882  break;
12883  case NVPTX::BI__hmma_m32n8k16_st_c_f32:
12884  IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_col_stride
12885  : Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_row_stride;
12886  break;
12887  case NVPTX::BI__hmma_m8n32k16_st_c_f16:
12888  IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_col_stride
12889  : Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_row_stride;
12890  NumResults = 4;
12891  break;
12892  case NVPTX::BI__hmma_m8n32k16_st_c_f32:
12893  IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_col_stride
12894  : Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_row_stride;
12895  break;
12896  default:
12897  llvm_unreachable("Unexpected builtin ID.");
12898  }
12899  Function *Intrinsic = CGM.getIntrinsic(IID, Dst->getType());
12900  llvm::Type *ParamType = Intrinsic->getFunctionType()->getParamType(1);
12901  SmallVector<Value *, 10> Values = {Dst};
12902  for (unsigned i = 0; i < NumResults; ++i) {
12903  Value *V = Builder.CreateAlignedLoad(
12904  Builder.CreateGEP(Src.getPointer(), llvm::ConstantInt::get(IntTy, i)),
12906  Values.push_back(Builder.CreateBitCast(V, ParamType));
12907  }
12908  Values.push_back(Ldm);
12909  Value *Result = Builder.CreateCall(Intrinsic, Values);
12910  return Result;
12911  }
12912 
12913  // BI__hmma_m16n16k16_mma_<Dtype><CType>(d, a, b, c, layout, satf) -->
12914  // Intrinsic::nvvm_wmma_m16n16k16_mma_sync<layout A,B><DType><CType><Satf>
12915  case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
12916  case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
12917  case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
12918  case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
12919  case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
12920  case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
12921  case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
12922  case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
12923  case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
12924  case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
12925  case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
12926  case NVPTX::BI__hmma_m8n32k16_mma_f16f32: {
12927  Address Dst = EmitPointerWithAlignment(E->getArg(0));
12928  Address SrcA = EmitPointerWithAlignment(E->getArg(1));
12929  Address SrcB = EmitPointerWithAlignment(E->getArg(2));
12930  Address SrcC = EmitPointerWithAlignment(E->getArg(3));
12931  llvm::APSInt LayoutArg;
12932  if (!E->getArg(4)->isIntegerConstantExpr(LayoutArg, getContext()))
12933  return nullptr;
12934  int Layout = LayoutArg.getSExtValue();
12935  if (Layout < 0 || Layout > 3)
12936  return nullptr;
12937  llvm::APSInt SatfArg;
12938  if (!E->getArg(5)->isIntegerConstantExpr(SatfArg, getContext()))
12939  return nullptr;
12940  bool Satf = SatfArg.getSExtValue();
12941 
12942  // clang-format off
12943 #define MMA_VARIANTS(geom, type) {{ \
12944  Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type, \
12945  Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type##_satfinite, \
12946  Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
12947  Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
12948  Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type, \
12949  Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type##_satfinite, \
12950  Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type, \
12951  Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type##_satfinite \
12952  }}
12953  // clang-format on
12954 
12955  auto getMMAIntrinsic = [Layout, Satf](std::array<unsigned, 8> Variants) {
12956  unsigned Index = Layout * 2 + Satf;
12957  assert(Index < 8);
12958  return Variants[Index];
12959  };
12960  unsigned IID;
12961  unsigned NumEltsC;
12962  unsigned NumEltsD;
12963  switch (BuiltinID) {
12964  case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
12965  IID = getMMAIntrinsic(MMA_VARIANTS(m16n16k16, f16_f16));
12966  NumEltsC = 4;
12967  NumEltsD = 4;
12968  break;
12969  case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
12970  IID = getMMAIntrinsic(MMA_VARIANTS(m16n16k16, f32_f16));
12971  NumEltsC = 4;
12972  NumEltsD = 8;
12973  break;
12974  case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
12975  IID = getMMAIntrinsic(MMA_VARIANTS(m16n16k16, f16_f32));
12976  NumEltsC = 8;
12977  NumEltsD = 4;
12978  break;
12979  case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
12980  IID = getMMAIntrinsic(MMA_VARIANTS(m16n16k16, f32_f32));
12981  NumEltsC = 8;
12982  NumEltsD = 8;
12983  break;
12984  case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
12985  IID = getMMAIntrinsic(MMA_VARIANTS(m32n8k16, f16_f16));
12986  NumEltsC = 4;
12987  NumEltsD = 4;
12988  break;
12989  case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
12990  IID = getMMAIntrinsic(MMA_VARIANTS(m32n8k16, f32_f16));
12991  NumEltsC = 4;
12992  NumEltsD = 8;
12993  break;
12994  case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
12995  IID = getMMAIntrinsic(MMA_VARIANTS(m32n8k16, f16_f32));
12996  NumEltsC = 8;
12997  NumEltsD = 4;
12998  break;
12999  case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
13000  IID = getMMAIntrinsic(MMA_VARIANTS(m32n8k16, f32_f32));
13001  NumEltsC = 8;
13002  NumEltsD = 8;
13003  break;
13004  case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
13005  IID = getMMAIntrinsic(MMA_VARIANTS(m8n32k16, f16_f16));
13006  NumEltsC = 4;
13007  NumEltsD = 4;
13008  break;
13009  case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
13010  IID = getMMAIntrinsic(MMA_VARIANTS(m8n32k16, f32_f16));
13011  NumEltsC = 4;
13012  NumEltsD = 8;
13013  break;
13014  case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
13015  IID = getMMAIntrinsic(MMA_VARIANTS(m8n32k16, f16_f32));
13016  NumEltsC = 8;
13017  NumEltsD = 4;
13018  break;
13019  case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
13020  IID = getMMAIntrinsic(MMA_VARIANTS(m8n32k16, f32_f32));
13021  NumEltsC = 8;
13022  NumEltsD = 8;
13023  break;
13024  default:
13025  llvm_unreachable("Unexpected builtin ID.");
13026  }
13027 #undef MMA_VARIANTS
13028 
13029  SmallVector<Value *, 24> Values;
13030  Function *Intrinsic = CGM.getIntrinsic(IID);
13031  llvm::Type *ABType = Intrinsic->getFunctionType()->getParamType(0);
13032  // Load A
13033  for (unsigned i = 0; i < 8; ++i) {
13034  Value *V = Builder.CreateAlignedLoad(
13035  Builder.CreateGEP(SrcA.getPointer(),
13036  llvm::ConstantInt::get(IntTy, i)),
13038  Values.push_back(Builder.CreateBitCast(V, ABType));
13039  }
13040  // Load B
13041  for (unsigned i = 0; i < 8; ++i) {
13042  Value *V = Builder.CreateAlignedLoad(
13043  Builder.CreateGEP(SrcB.getPointer(),
13044  llvm::ConstantInt::get(IntTy, i)),
13046  Values.push_back(Builder.CreateBitCast(V, ABType));
13047  }
13048  // Load C
13049  llvm::Type *CType = Intrinsic->getFunctionType()->getParamType(16);
13050  for (unsigned i = 0; i < NumEltsC; ++i) {
13051  Value *V = Builder.CreateAlignedLoad(
13052  Builder.CreateGEP(SrcC.getPointer(),
13053  llvm::ConstantInt::get(IntTy, i)),
13055  Values.push_back(Builder.CreateBitCast(V, CType));
13056  }
13057  Value *Result = Builder.CreateCall(Intrinsic, Values);
13058  llvm::Type *DType = Dst.getElementType();
13059  for (unsigned i = 0; i < NumEltsD; ++i)
13060  Builder.CreateAlignedStore(
13061  Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), DType),
13062  Builder.CreateGEP(Dst.getPointer(), llvm::ConstantInt::get(IntTy, i)),
13064  return Result;
13065  }
13066  default:
13067  return nullptr;
13068  }
13069 }
13070 
13072  const CallExpr *E) {
13073  switch (BuiltinID) {
13074  case WebAssembly::BI__builtin_wasm_memory_size: {
13075  llvm::Type *ResultType = ConvertType(E->getType());
13076  Value *I = EmitScalarExpr(E->getArg(0));
13077  Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType);
13078  return Builder.CreateCall(Callee, I);
13079  }
13080  case WebAssembly::BI__builtin_wasm_memory_grow: {
13081  llvm::Type *ResultType = ConvertType(E->getType());
13082  Value *Args[] = {
13083  EmitScalarExpr(E->getArg(0)),
13084  EmitScalarExpr(E->getArg(1))
13085  };
13086  Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_grow, ResultType);
13087  return Builder.CreateCall(Callee, Args);
13088  }
13089  case WebAssembly::BI__builtin_wasm_throw: {
13090  Value *Tag = EmitScalarExpr(E->getArg(0));
13091  Value *Obj = EmitScalarExpr(E->getArg(1));
13092  Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw);
13093  return Builder.CreateCall(Callee, {Tag, Obj});
13094  }
13095  case WebAssembly::BI__builtin_wasm_rethrow: {
13096  Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow);
13097  return Builder.CreateCall(Callee);
13098  }
13099  case WebAssembly::BI__builtin_wasm_atomic_wait_i32: {
13100  Value *Addr = EmitScalarExpr(E->getArg(0));
13101  Value *Expected = EmitScalarExpr(E->getArg(1));
13102  Value *Timeout = EmitScalarExpr(E->getArg(2));
13103  Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_wait_i32);
13104  return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
13105  }
13106  case WebAssembly::BI__builtin_wasm_atomic_wait_i64: {
13107  Value *Addr = EmitScalarExpr(E->getArg(0));
13108  Value *Expected = EmitScalarExpr(E->getArg(1));
13109  Value *Timeout = EmitScalarExpr(E->getArg(2));
13110  Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_wait_i64);
13111  return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
13112  }
13113  case WebAssembly::BI__builtin_wasm_atomic_notify: {
13114  Value *Addr = EmitScalarExpr(E->getArg(0));
13115  Value *Count = EmitScalarExpr(E->getArg(1));
13116  Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_notify);
13117  return Builder.CreateCall(Callee, {Addr, Count});
13118  }
13119  case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f32:
13120  case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f64:
13121  case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f32:
13122  case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f64:
13123  case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4:
13124  case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64x2_f64x2: {
13125  Value *Src = EmitScalarExpr(E->getArg(0));
13126  llvm::Type *ResT = ConvertType(E->getType());
13127  Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_saturate_signed,
13128  {ResT, Src->getType()});
13129  return Builder.CreateCall(Callee, {Src});
13130  }
13131  case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f32:
13132  case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f64:
13133  case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f32:
13134  case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f64:
13135  case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4:
13136  case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64x2_f64x2: {
13137  Value *Src = EmitScalarExpr(E->getArg(0));
13138  llvm::Type *ResT = ConvertType(E->getType());
13139  Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_saturate_unsigned,
13140  {ResT, Src->getType()});
13141  return Builder.CreateCall(Callee, {Src});
13142  }
13143  case WebAssembly::BI__builtin_wasm_min_f32:
13144  case WebAssembly::BI__builtin_wasm_min_f64:
13145  case WebAssembly::BI__builtin_wasm_min_f32x4:
13146  case WebAssembly::BI__builtin_wasm_min_f64x2: {
13147  Value *LHS = EmitScalarExpr(E->getArg(0));
13148  Value *RHS = EmitScalarExpr(E->getArg(1));
13149  Value *Callee = CGM.getIntrinsic(Intrinsic::minimum,
13150  ConvertType(E->getType()));
13151  return Builder.CreateCall(Callee, {LHS, RHS});
13152  }
13153  case WebAssembly::BI__builtin_wasm_max_f32:
13154  case WebAssembly::BI__builtin_wasm_max_f64:
13155  case WebAssembly::BI__builtin_wasm_max_f32x4:
13156  case WebAssembly::BI__builtin_wasm_max_f64x2: {
13157  Value *LHS = EmitScalarExpr(E->getArg(0));
13158  Value *RHS = EmitScalarExpr(E->getArg(1));
13159  Value *Callee = CGM.getIntrinsic(Intrinsic::maximum,
13160  ConvertType(E->getType()));
13161  return Builder.CreateCall(Callee, {LHS, RHS});
13162  }
13163  case WebAssembly::BI__builtin_wasm_extract_lane_s_i8x16:
13164  case WebAssembly::BI__builtin_wasm_extract_lane_u_i8x16:
13165  case WebAssembly::BI__builtin_wasm_extract_lane_s_i16x8:
13166  case WebAssembly::BI__builtin_wasm_extract_lane_u_i16x8:
13167  case WebAssembly::BI__builtin_wasm_extract_lane_i32x4:
13168  case WebAssembly::BI__builtin_wasm_extract_lane_i64x2:
13169  case WebAssembly::BI__builtin_wasm_extract_lane_f32x4:
13170  case WebAssembly::BI__builtin_wasm_extract_lane_f64x2: {
13171  llvm::APSInt LaneConst;
13172  if (!E->getArg(1)->isIntegerConstantExpr(LaneConst, getContext()))
13173  llvm_unreachable("Constant arg isn't actually constant?");
13174  Value *Vec = EmitScalarExpr(E->getArg(0));
13175  Value *Lane = llvm::ConstantInt::get(getLLVMContext(), LaneConst);
13176  Value *Extract = Builder.CreateExtractElement(Vec, Lane);
13177  switch (BuiltinID) {
13178  case WebAssembly::BI__builtin_wasm_extract_lane_s_i8x16:
13179  case WebAssembly::BI__builtin_wasm_extract_lane_s_i16x8:
13180  return Builder.CreateSExt(Extract, ConvertType(E->getType()));
13181  case WebAssembly::BI__builtin_wasm_extract_lane_u_i8x16:
13182  case WebAssembly::BI__builtin_wasm_extract_lane_u_i16x8:
13183  return Builder.CreateZExt(Extract, ConvertType(E->getType()));
13184  case WebAssembly::BI__builtin_wasm_extract_lane_i32x4:
13185  case WebAssembly::BI__builtin_wasm_extract_lane_i64x2:
13186  case WebAssembly::BI__builtin_wasm_extract_lane_f32x4:
13187  case WebAssembly::BI__builtin_wasm_extract_lane_f64x2:
13188  return Extract;
13189  default:
13190  llvm_unreachable("unexpected builtin ID");
13191  }
13192  }
13193  case WebAssembly::BI__builtin_wasm_replace_lane_i8x16:
13194  case WebAssembly::BI__builtin_wasm_replace_lane_i16x8:
13195  case WebAssembly::BI__builtin_wasm_replace_lane_i32x4:
13196  case WebAssembly::BI__builtin_wasm_replace_lane_i64x2:
13197  case WebAssembly::BI__builtin_wasm_replace_lane_f32x4:
13198  case WebAssembly::BI__builtin_wasm_replace_lane_f64x2: {
13199  llvm::APSInt LaneConst;
13200  if (!E->getArg(1)->isIntegerConstantExpr(LaneConst, getContext()))
13201  llvm_unreachable("Constant arg isn't actually constant?");
13202  Value *Vec = EmitScalarExpr(E->getArg(0));
13203  Value *Lane = llvm::ConstantInt::get(getLLVMContext(), LaneConst);
13204  Value *Val = EmitScalarExpr(E->getArg(2));
13205  switch (BuiltinID) {
13206  case WebAssembly::BI__builtin_wasm_replace_lane_i8x16:
13207  case WebAssembly::BI__builtin_wasm_replace_lane_i16x8: {
13208  llvm::Type *ElemType = ConvertType(E->getType())->getVectorElementType();
13209  Value *Trunc = Builder.CreateTrunc(Val, ElemType);
13210  return Builder.CreateInsertElement(Vec, Trunc, Lane);
13211  }
13212  case WebAssembly::BI__builtin_wasm_replace_lane_i32x4:
13213  case WebAssembly::BI__builtin_wasm_replace_lane_i64x2:
13214  case WebAssembly::BI__builtin_wasm_replace_lane_f32x4:
13215  case WebAssembly::BI__builtin_wasm_replace_lane_f64x2:
13216  return Builder.CreateInsertElement(Vec, Val, Lane);
13217  default:
13218  llvm_unreachable("unexpected builtin ID");
13219  }
13220  }
13221  case WebAssembly::BI__builtin_wasm_add_saturate_s_i8x16:
13222  case WebAssembly::BI__builtin_wasm_add_saturate_u_i8x16:
13223  case WebAssembly::BI__builtin_wasm_add_saturate_s_i16x8:
13224  case WebAssembly::BI__builtin_wasm_add_saturate_u_i16x8:
13225  case WebAssembly::BI__builtin_wasm_sub_saturate_s_i8x16:
13226  case WebAssembly::BI__builtin_wasm_sub_saturate_u_i8x16:
13227  case WebAssembly::BI__builtin_wasm_sub_saturate_s_i16x8:
13228  case WebAssembly::BI__builtin_wasm_sub_saturate_u_i16x8: {
13229  unsigned IntNo;
13230  switch (BuiltinID) {
13231  case WebAssembly::BI__builtin_wasm_add_saturate_s_i8x16:
13232  case WebAssembly::BI__builtin_wasm_add_saturate_s_i16x8:
13233  IntNo = Intrinsic::sadd_sat;
13234  break;
13235  case WebAssembly::BI__builtin_wasm_add_saturate_u_i8x16:
13236  case WebAssembly::BI__builtin_wasm_add_saturate_u_i16x8:
13237  IntNo = Intrinsic::uadd_sat;
13238  break;
13239  case WebAssembly::BI__builtin_wasm_sub_saturate_s_i8x16:
13240  case WebAssembly::BI__builtin_wasm_sub_saturate_s_i16x8:
13241  IntNo = Intrinsic::wasm_sub_saturate_signed;
13242  break;
13243  case WebAssembly::BI__builtin_wasm_sub_saturate_u_i8x16:
13244  case WebAssembly::BI__builtin_wasm_sub_saturate_u_i16x8:
13245  IntNo = Intrinsic::wasm_sub_saturate_unsigned;
13246  break;
13247  default:
13248  llvm_unreachable("unexpected builtin ID");
13249  }
13250  Value *LHS = EmitScalarExpr(E->getArg(0));
13251  Value *RHS = EmitScalarExpr(E->getArg(1));
13252  Value *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType()));
13253  return Builder.CreateCall(Callee, {LHS, RHS});
13254  }
13255  case WebAssembly::BI__builtin_wasm_bitselect: {
13256  Value *V1 = EmitScalarExpr(E->getArg(0));
13257  Value *V2 = EmitScalarExpr(E->getArg(1));
13258  Value *C = EmitScalarExpr(E->getArg(2));
13259  Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_bitselect,
13260  ConvertType(E->getType()));
13261  return Builder.CreateCall(Callee, {V1, V2, C});
13262  }
13263  case WebAssembly::BI__builtin_wasm_any_true_i8x16:
13264  case WebAssembly::BI__builtin_wasm_any_true_i16x8:
13265  case WebAssembly::BI__builtin_wasm_any_true_i32x4:
13266  case WebAssembly::BI__builtin_wasm_any_true_i64x2:
13267  case WebAssembly::BI__builtin_wasm_all_true_i8x16:
13268  case WebAssembly::BI__builtin_wasm_all_true_i16x8:
13269  case WebAssembly::BI__builtin_wasm_all_true_i32x4:
13270  case WebAssembly::BI__builtin_wasm_all_true_i64x2: {
13271  unsigned IntNo;
13272  switch (BuiltinID) {
13273  case WebAssembly::BI__builtin_wasm_any_true_i8x16:
13274  case WebAssembly::BI__builtin_wasm_any_true_i16x8:
13275  case WebAssembly::BI__builtin_wasm_any_true_i32x4:
13276  case WebAssembly::BI__builtin_wasm_any_true_i64x2:
13277  IntNo = Intrinsic::wasm_anytrue;
13278  break;
13279  case WebAssembly::BI__builtin_wasm_all_true_i8x16:
13280  case WebAssembly::BI__builtin_wasm_all_true_i16x8:
13281  case WebAssembly::BI__builtin_wasm_all_true_i32x4:
13282  case WebAssembly::BI__builtin_wasm_all_true_i64x2:
13283  IntNo = Intrinsic::wasm_alltrue;
13284  break;
13285  default:
13286  llvm_unreachable("unexpected builtin ID");
13287  }
13288  Value *Vec = EmitScalarExpr(E->getArg(0));
13289  Value *Callee = CGM.getIntrinsic(IntNo, Vec->getType());
13290  return Builder.CreateCall(Callee, {Vec});
13291  }
13292  case WebAssembly::BI__builtin_wasm_abs_f32x4:
13293  case WebAssembly::BI__builtin_wasm_abs_f64x2: {
13294  Value *Vec = EmitScalarExpr(E->getArg(0));
13295  Value *Callee = CGM.getIntrinsic(Intrinsic::fabs, Vec->getType());
13296  return Builder.CreateCall(Callee, {Vec});
13297  }
13298  case WebAssembly::BI__builtin_wasm_sqrt_f32x4:
13299  case WebAssembly::BI__builtin_wasm_sqrt_f64x2: {
13300  Value *Vec = EmitScalarExpr(E->getArg(0));
13301  Value *Callee = CGM.getIntrinsic(Intrinsic::sqrt, Vec->getType());
13302  return Builder.CreateCall(Callee, {Vec});
13303  }
13304 
13305  default:
13306  return nullptr;
13307  }
13308 }
13309 
13311  const CallExpr *E) {
13313  Intrinsic::ID ID = Intrinsic::not_intrinsic;
13314 
13315  auto MakeCircLd = [&](unsigned IntID, bool HasImm) {
13316  // The base pointer is passed by address, so it needs to be loaded.
13317  Address BP = EmitPointerWithAlignment(E->getArg(0));
13318  BP = Address(Builder.CreateBitCast(BP.getPointer(), Int8PtrPtrTy),
13319  BP.getAlignment());
13320  llvm::Value *Base = Builder.CreateLoad(BP);
13321  // Operands are Base, Increment, Modifier, Start.
13322  if (HasImm)
13323  Ops = { Base, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)),
13324  EmitScalarExpr(E->getArg(3)) };
13325  else
13326  Ops = { Base, EmitScalarExpr(E->getArg(1)),
13327  EmitScalarExpr(E->getArg(2)) };
13328 
13329  llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops);
13330  llvm::Value *NewBase = Builder.CreateExtractValue(Result, 1);
13331  llvm::Value *LV = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)),
13332  NewBase->getType()->getPointerTo());
13333  Address Dest = EmitPointerWithAlignment(E->getArg(0));
13334  // The intrinsic generates two results. The new value for the base pointer
13335  // needs to be stored.
13336  Builder.CreateAlignedStore(NewBase, LV, Dest.getAlignment());
13337  return Builder.CreateExtractValue(Result, 0);
13338  };
13339 
13340  auto MakeCircSt = [&](unsigned IntID, bool HasImm) {
13341  // The base pointer is passed by address, so it needs to be loaded.
13342  Address BP = EmitPointerWithAlignment(E->getArg(0));
13343  BP = Address(Builder.CreateBitCast(BP.getPointer(), Int8PtrPtrTy),
13344  BP.getAlignment());
13345  llvm::Value *Base = Builder.CreateLoad(BP);
13346  // Operands are Base, Increment, Modifier, Value, Start.
13347  if (HasImm)
13348  Ops = { Base, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)),
13349  EmitScalarExpr(E->getArg(3)), EmitScalarExpr(E->getArg(4)) };
13350  else
13351  Ops = { Base, EmitScalarExpr(E->getArg(1)),
13352  EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3)) };
13353 
13354  llvm::Value *NewBase = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops);
13355  llvm::Value *LV = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)),
13356  NewBase->getType()->getPointerTo());
13357  Address Dest = EmitPointerWithAlignment(E->getArg(0));
13358  // The intrinsic generates one result, which is the new value for the base
13359  // pointer. It needs to be stored.
13360  return Builder.CreateAlignedStore(NewBase, LV, Dest.getAlignment());
13361  };
13362 
13363  // Handle the conversion of bit-reverse load intrinsics to bit code.
13364  // The intrinsic call after this function only reads from memory and the
13365  // write to memory is dealt by the store instruction.
13366  auto MakeBrevLd = [&](unsigned IntID, llvm::Type *DestTy) {
13367  // The intrinsic generates one result, which is the new value for the base
13368  // pointer. It needs to be returned. The result of the load instruction is
13369  // passed to intrinsic by address, so the value needs to be stored.
13370  llvm::Value *BaseAddress =
13371  Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int8PtrTy);
13372 
13373  // Expressions like &(*pt++) will be incremented per evaluation.
13374  // EmitPointerWithAlignment and EmitScalarExpr evaluates the expression
13375  // per call.
13376  Address DestAddr = EmitPointerWithAlignment(E->getArg(1));
13377  DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), Int8PtrTy),
13378  DestAddr.getAlignment());
13379  llvm::Value *DestAddress = DestAddr.getPointer();
13380 
13381  // Operands are Base, Dest, Modifier.
13382  // The intrinsic format in LLVM IR is defined as
13383  // { ValueType, i8* } (i8*, i32).
13384  Ops = {BaseAddress, EmitScalarExpr(E->getArg(2))};
13385 
13386  llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops);
13387  // The value needs to be stored as the variable is passed by reference.
13388  llvm::Value *DestVal = Builder.CreateExtractValue(Result, 0);
13389 
13390  // The store needs to be truncated to fit the destination type.
13391  // While i32 and i64 are natively supported on Hexagon, i8 and i16 needs
13392  // to be handled with stores of respective destination type.
13393  DestVal = Builder.CreateTrunc(DestVal, DestTy);
13394 
13395  llvm::Value *DestForStore =
13396  Builder.CreateBitCast(DestAddress, DestVal->getType()->getPointerTo());
13397  Builder.CreateAlignedStore(DestVal, DestForStore, DestAddr.getAlignment());
13398  // The updated value of the base pointer is returned.
13399  return Builder.CreateExtractValue(Result, 1);
13400  };
13401 
13402  switch (BuiltinID) {
13403  case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry:
13404  case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry_128B: {
13405  Address Dest = EmitPointerWithAlignment(E->getArg(2));
13406  unsigned Size;
13407  if (BuiltinID == Hexagon::BI__builtin_HEXAGON_V6_vaddcarry) {
13408  Size = 512;
13409  ID = Intrinsic::hexagon_V6_vaddcarry;
13410  } else {
13411  Size = 1024;
13412  ID = Intrinsic::hexagon_V6_vaddcarry_128B;
13413  }
13414  Dest = Builder.CreateBitCast(Dest,
13415  llvm::VectorType::get(Builder.getInt1Ty(), Size)->getPointerTo(0));
13416  LoadInst *QLd = Builder.CreateLoad(Dest);
13417  Ops = { EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), QLd };
13418  llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
13419  llvm::Value *Vprd = Builder.CreateExtractValue(Result, 1);
13420  llvm::Value *Base = Builder.CreateBitCast(EmitScalarExpr(E->getArg(2)),
13421  Vprd->getType()->getPointerTo(0));
13422  Builder.CreateAlignedStore(Vprd, Base, Dest.getAlignment());
13423  return Builder.CreateExtractValue(Result, 0);
13424  }
13425  case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry:
13426  case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry_128B: {
13427  Address Dest = EmitPointerWithAlignment(E->getArg(2));
13428  unsigned Size;
13429  if (BuiltinID == Hexagon::BI__builtin_HEXAGON_V6_vsubcarry) {
13430  Size = 512;
13431  ID = Intrinsic::hexagon_V6_vsubcarry;
13432  } else {
13433  Size = 1024;
13434  ID = Intrinsic::hexagon_V6_vsubcarry_128B;
13435  }
13436  Dest = Builder.CreateBitCast(Dest,
13437  llvm::VectorType::get(Builder.getInt1Ty(), Size)->getPointerTo(0));
13438  LoadInst *QLd = Builder.CreateLoad(Dest);
13439  Ops = { EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), QLd };
13440  llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
13441  llvm::Value *Vprd = Builder.CreateExtractValue(Result, 1);
13442  llvm::Value *Base = Builder.CreateBitCast(EmitScalarExpr(E->getArg(2)),
13443  Vprd->getType()->getPointerTo(0));
13444  Builder.CreateAlignedStore(Vprd, Base, Dest.getAlignment());
13445  return Builder.CreateExtractValue(Result, 0);
13446  }
13447  case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pci:
13448  return MakeCircLd(Intrinsic::hexagon_L2_loadrub_pci, /*HasImm*/true);
13449  case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pci:
13450  return MakeCircLd(Intrinsic::hexagon_L2_loadrb_pci, /*HasImm*/true);
13451  case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pci:
13452  return MakeCircLd(Intrinsic::hexagon_L2_loadruh_pci, /*HasImm*/true);
13453  case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pci:
13454  return MakeCircLd(Intrinsic::hexagon_L2_loadrh_pci, /*HasImm*/true);
13455  case Hexagon::BI__builtin_HEXAGON_L2_loadri_pci:
13456  return MakeCircLd(Intrinsic::hexagon_L2_loadri_pci, /*HasImm*/true);
13457  case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pci:
13458  return MakeCircLd(Intrinsic::hexagon_L2_loadrd_pci, /*HasImm*/true);
13459  case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pcr:
13460  return MakeCircLd(Intrinsic::hexagon_L2_loadrub_pcr, /*HasImm*/false);
13461  case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pcr:
13462  return MakeCircLd(Intrinsic::hexagon_L2_loadrb_pcr, /*HasImm*/false);
13463  case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pcr:
13464  return MakeCircLd(Intrinsic::hexagon_L2_loadruh_pcr, /*HasImm*/false);
13465  case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pcr:
13466  return MakeCircLd(Intrinsic::hexagon_L2_loadrh_pcr, /*HasImm*/false);
13467  case Hexagon::BI__builtin_HEXAGON_L2_loadri_pcr:
13468  return MakeCircLd(Intrinsic::hexagon_L2_loadri_pcr, /*HasImm*/false);
13469  case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pcr:
13470  return MakeCircLd(Intrinsic::hexagon_L2_loadrd_pcr, /*HasImm*/false);
13471  case Hexagon::BI__builtin_HEXAGON_S2_storerb_pci:
13472  return MakeCircSt(Intrinsic::hexagon_S2_storerb_pci, /*HasImm*/true);
13473  case Hexagon::BI__builtin_HEXAGON_S2_storerh_pci:
13474  return MakeCircSt(Intrinsic::hexagon_S2_storerh_pci, /*HasImm*/true);
13475  case Hexagon::BI__builtin_HEXAGON_S2_storerf_pci:
13476  return MakeCircSt(Intrinsic::hexagon_S2_storerf_pci, /*HasImm*/true);
13477  case Hexagon::BI__builtin_HEXAGON_S2_storeri_pci:
13478  return MakeCircSt(Intrinsic::hexagon_S2_storeri_pci, /*HasImm*/true);
13479  case Hexagon::BI__builtin_HEXAGON_S2_storerd_pci:
13480  return MakeCircSt(Intrinsic::hexagon_S2_storerd_pci, /*HasImm*/true);
13481  case Hexagon::BI__builtin_HEXAGON_S2_storerb_pcr:
13482  return MakeCircSt(Intrinsic::hexagon_S2_storerb_pcr, /*HasImm*/false);
13483  case Hexagon::BI__builtin_HEXAGON_S2_storerh_pcr:
13484  return MakeCircSt(Intrinsic::hexagon_S2_storerh_pcr, /*HasImm*/false);
13485  case Hexagon::BI__builtin_HEXAGON_S2_storerf_pcr:
13486  return MakeCircSt(Intrinsic::hexagon_S2_storerf_pcr, /*HasImm*/false);
13487  case Hexagon::BI__builtin_HEXAGON_S2_storeri_pcr:
13488  return MakeCircSt(Intrinsic::hexagon_S2_storeri_pcr, /*HasImm*/false);
13489  case Hexagon::BI__builtin_HEXAGON_S2_storerd_pcr:
13490  return MakeCircSt(Intrinsic::hexagon_S2_storerd_pcr, /*HasImm*/false);
13491  case Hexagon::BI__builtin_brev_ldub:
13492  return MakeBrevLd(Intrinsic::hexagon_L2_loadrub_pbr, Int8Ty);
13493  case Hexagon::BI__builtin_brev_ldb:
13494  return MakeBrevLd(Intrinsic::hexagon_L2_loadrb_pbr, Int8Ty);
13495  case Hexagon::BI__builtin_brev_lduh:
13496  return MakeBrevLd(Intrinsic::hexagon_L2_loadruh_pbr, Int16Ty);
13497  case Hexagon::BI__builtin_brev_ldh:
13498  return MakeBrevLd(Intrinsic::hexagon_L2_loadrh_pbr, Int16Ty);
13499  case Hexagon::BI__builtin_brev_ldw:
13500  return MakeBrevLd(Intrinsic::hexagon_L2_loadri_pbr, Int32Ty);
13501  case Hexagon::BI__builtin_brev_ldd:
13502  return MakeBrevLd(Intrinsic::hexagon_L2_loadrd_pbr, Int64Ty);
13503  default:
13504  break;
13505  } // switch
13506 
13507  return nullptr;
13508 }
ReturnValueSlot - Contains the address where the return value of a function can be stored...
Definition: CGCall.h:361
Defines the clang::ASTContext interface.
static Value * emitFPIntBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID)
Definition: CGBuiltin.cpp:338
Represents a function declaration or definition.
Definition: Decl.h:1738
llvm::IntegerType * IntTy
int
Other implicit parameter.
Definition: Decl.h:1510
llvm::Value * EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty, const llvm::CmpInst::Predicate Fp, const llvm::CmpInst::Predicate Ip, const llvm::Twine &Name="")
Definition: CGBuiltin.cpp:5607
static Value * EmitX86FMAExpr(CodeGenFunction &CGF, ArrayRef< Value *> Ops, unsigned BuiltinID, bool IsAddSub)
Definition: CGBuiltin.cpp:9298
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition: Type.h:2537
CanQualType VoidPtrTy
Definition: ASTContext.h:1044
static Value * EmitX86ExpandLoad(CodeGenFunction &CGF, ArrayRef< Value *> Ops)
Definition: CGBuiltin.cpp:9122
A (possibly-)qualified type.
Definition: Type.h:638
bool isBlockPointerType() const
Definition: Type.h:6304
#define fma(__x, __y, __z)
Definition: tgmath.h:758
bool isArrayType() const
Definition: Type.h:6345
static WidthAndSignedness getIntegerWidthAndSignedness(const clang::ASTContext &context, const clang::QualType Type)
Definition: CGBuiltin.cpp:436
unsigned char getSummaryByte() const
Definition: OSLog.h:140
const CodeGenOptions & getCodeGenOpts() const
Expr * getArg(unsigned Arg)
getArg - Return the specified argument.
Definition: Expr.h:2553
static Value * emitBinaryBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID)
Definition: CGBuiltin.cpp:315
static struct WidthAndSignedness EncompassingIntegerType(ArrayRef< struct WidthAndSignedness > Types)
Definition: CGBuiltin.cpp:448
llvm::Value * EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::LLVMContext & getLLVMContext()
DominatorTree GraphTraits specialization so the DominatorTree can be iterable by generic graph iterat...
Definition: Dominators.h:30
bool isBigEndian() const
Definition: TargetInfo.h:1219
bool EvaluateAsInt(EvalResult &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects) const
EvaluateAsInt - Return true if this is a constant which we can fold and convert to an integer...
const ASTRecordLayout & getASTRecordLayout(const RecordDecl *D) const
Get or compute information about the layout of the specified record (struct/union/class) D...
unsigned getNumArgs() const
getNumArgs - Return the number of actual arguments to this call.
Definition: Expr.h:2540
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee...
Definition: Type.cpp:505
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
Definition: TargetInfo.h:955
#define trunc(__x)
Definition: tgmath.h:1232
static const Builtin::Info BuiltinInfo[]
Definition: Builtins.cpp:21
static Value * EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred, ArrayRef< Value *> Ops)
Definition: CGBuiltin.cpp:9288
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
Definition: CGExpr.cpp:1029
constexpr XRayInstrMask Typed
Definition: XRayInstr.h:41
bool isRecordType() const
Definition: Type.h:6369
static Value * getMaskVecValue(CodeGenFunction &CGF, Value *Mask, unsigned NumElts)
Definition: CGBuiltin.cpp:9077
static Value * EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask, ArrayRef< Value *> Ops)
Definition: CGBuiltin.cpp:9472
StringRef P
llvm::Value * EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, const CallExpr *E)
Definition: CGBuiltin.cpp:848
static Value * EmitToInt(CodeGenFunction &CGF, llvm::Value *V, QualType T, llvm::IntegerType *IntType)
Emit the conversions required to turn the given value into an integer of the given size...
Definition: CGBuiltin.cpp:75
static Value * EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, llvm::Type *ValueType, bool IsRead, StringRef SysReg="")
Definition: CGBuiltin.cpp:5704
The base class of the type hierarchy.
Definition: Type.h:1407
static Value * EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned, ArrayRef< Value *> Ops)
Definition: CGBuiltin.cpp:9443
#define log2(__x)
Definition: tgmath.h:986
#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier)
Definition: CGBuiltin.cpp:4235
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
static CanQualType getOSLogArgType(ASTContext &C, int Size)
Get the argument type for arguments to os_log_helper.
Definition: CGBuiltin.cpp:1047
RValue emitRotate(const CallExpr *E, bool IsRotateRight)
Definition: CGBuiltin.cpp:1458
virtual llvm::Value * getPipeElemAlign(const Expr *PipeArg)
RValue EmitCall(const CGFunctionInfo &CallInfo, const CGCallee &Callee, ReturnValueSlot ReturnValue, const CallArgList &Args, llvm::Instruction **callOrInvoke, SourceLocation Loc)
EmitCall - Generate a call of the given function, expecting the given result type, and using the given argument list which specifies both the LLVM arguments and the types they were derived from.
Definition: CGCall.cpp:3807
llvm::Value * EmitCheckedArgForBuiltin(const Expr *E, BuiltinCheckKind Kind)
Emits an argument for a call to a builtin.
Definition: CGBuiltin.cpp:1026
Objects with "hidden" visibility are not seen by the dynamic linker.
Definition: Visibility.h:37
static bool isSpecialMixedSignMultiply(unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info, WidthAndSignedness ResultInfo)
Determine if a binop is a checked mixed-sign multiply we can specialize.
Definition: CGBuiltin.cpp:1233
RAII object to set/unset CodeGenFunction::IsSanitizerScope.
static Value * EmitX86MaskedLoad(CodeGenFunction &CGF, ArrayRef< Value *> Ops, unsigned Align)
Definition: CGBuiltin.cpp:9110
const T * getAs() const
Member-template getAs<specific type>&#39;.
Definition: Type.h:6748
const void * Store
Store - This opaque type encapsulates an immutable mapping from locations to values.
Definition: StoreRef.h:28
static Value * EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF, unsigned IntrinsicID, const CallExpr *E)
Handle a SystemZ function in which the final argument is a pointer to an int that receives the post-i...
static int64_t clamp(int64_t Value, int64_t Low, int64_t High)
Definition: CGBuiltin.cpp:45
llvm::Value * getPointer() const
Definition: Address.h:38
Represents a parameter to a function.
Definition: Decl.h:1550
static bool HasExtraNeonArgument(unsigned BuiltinID)
Return true if BuiltinID is an overloaded Neon intrinsic with an extra argument that specifies the ve...
Definition: CGBuiltin.cpp:5767
The collection of all-type qualifiers we support.
Definition: Type.h:141
void add(RValue rvalue, QualType type)
Definition: CGCall.h:285
PipeType - OpenCL20.
Definition: Type.h:6002
Expr * IgnoreImpCasts() LLVM_READONLY
IgnoreImpCasts - Skip past any implicit casts which might surround this expression.
Definition: Expr.h:3167
Represents a struct/union/class.
Definition: Decl.h:3593
void __ovld prefetch(const __global char *p, size_t num_elements)
Prefetch num_elements * sizeof(gentype) bytes into the global cache.
const TargetInfo & getTarget() const
static bool TypeRequiresBuiltinLaunder(CodeGenModule &CGM, QualType Ty)
Determine if the specified type requires laundering by checking if it is a dynamic class type or cont...
Definition: CGBuiltin.cpp:1451
One of these records is kept for each identifier that is lexed.
Address getAddress() const
Definition: CGValue.h:327
#define pow(__x, __y)
Definition: tgmath.h:506
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:155
RecordDecl * getDefinition() const
Returns the RecordDecl that actually defines this struct/union/class.
Definition: Decl.h:3774
field_range fields() const
Definition: Decl.h:3784
llvm::Value * BuildVector(ArrayRef< llvm::Value *> Ops)
Definition: CGBuiltin.cpp:9051
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: Stmt.cpp:288
Represents a member of a struct/union/class.
Definition: Decl.h:2579
static Value * EmitX86CompressStore(CodeGenFunction &CGF, ArrayRef< Value *> Ops)
Definition: CGBuiltin.cpp:9139
static Value * EmitX86FunnelShift(CodeGenFunction &CGF, Value *Op0, Value *Op1, Value *Amt, bool IsRight)
Definition: CGBuiltin.cpp:9170
llvm::Value * EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch)
Definition: CGBuiltin.cpp:6766
static Value * EmitScalarFMAExpr(CodeGenFunction &CGF, MutableArrayRef< Value *> Ops, Value *Upper, bool ZeroMask=false, unsigned PTIdx=0, bool NegAcc=false)
Definition: CGBuiltin.cpp:9403
static bool NEONSIMDIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:4908
llvm::Value * EmitHexagonBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
static Value * EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< Value *> &Ops, llvm::Triple::ArchType Arch)
Definition: CGBuiltin.cpp:6609
static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E, Instruction::BinaryOps Op, bool Invert=false)
Utility to insert an atomic instruction based Intrinsic::ID and the expression node, where the return value is the result of the operation.
Definition: CGBuiltin.cpp:158
bool isFloat() const
Definition: APValue.h:235
static llvm::Value * EmitX86BitTestIntrinsic(CodeGenFunction &CGF, BitTest BT, const CallExpr *E, Value *BitBase, Value *BitPos)
Definition: CGBuiltin.cpp:634
IdentifierTable & Idents
Definition: ASTContext.h:566
static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[]
Definition: CGBuiltin.cpp:4521
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
static Value * EmitX86AddSubSatExpr(CodeGenFunction &CGF, ArrayRef< Value *> Ops, bool IsSigned, bool IsAddition)
Definition: CGBuiltin.cpp:9508
static Value * EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc, ArrayRef< Value *> Ops, bool InvertLHS=false)
Definition: CGBuiltin.cpp:9156
static ApplyDebugLocation CreateArtificial(CodeGenFunction &CGF)
Apply TemporaryLocation if it is valid.
Definition: CGDebugInfo.h:715
llvm::Value * EmitCommonNeonBuiltinExpr(unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, const char *NameHint, unsigned Modifier, const CallExpr *E, SmallVectorImpl< llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1, llvm::Triple::ArchType Arch)
Definition: CGBuiltin.cpp:5036
static Value * EmitX86MaskedStore(CodeGenFunction &CGF, ArrayRef< Value *> Ops, unsigned Align)
Definition: CGBuiltin.cpp:9097
unsigned char getNumArgsByte() const
Definition: OSLog.h:149
bool isLibFunction(unsigned ID) const
Return true if this is a builtin for a libc/libm function, with a "__builtin_" prefix (e...
Definition: Builtins.h:134
CharUnits - This is an opaque type for sizes expressed in character units.
Definition: CharUnits.h:38
static Value * EmitAtomicDecrementValue(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent)
Definition: CGBuiltin.cpp:290
static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[]
Definition: CGBuiltin.cpp:4678
APValue Val
Val - This is the value the expression can be folded to.
Definition: Expr.h:573
#define INTRINSIC_WITH_CC(NAME)
CharUnits getAlignment() const
Return the alignment of this pointer.
Definition: Address.h:67
bool isVolatileQualified() const
Determine whether this type is volatile-qualified.
Definition: Type.h:6142
static bool AArch64SISDIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:4911
__DEVICE__ double powi(double __a, int __b)
static Value * EmitX86ScalarSelect(CodeGenFunction &CGF, Value *Mask, Value *Op0, Value *Op1)
Definition: CGBuiltin.cpp:9201
Expr * IgnoreParenCasts() LLVM_READONLY
IgnoreParenCasts - Ignore parentheses and casts.
Definition: Expr.cpp:2595
bool isInt() const
Definition: APValue.h:234
#define sin(__x)
Definition: tgmath.h:302
bool isUnsigned() const
bool computeOSLogBufferLayout(clang::ASTContext &Ctx, const clang::CallExpr *E, OSLogBufferLayout &layout)
Definition: OSLog.cpp:180
static FunctionDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation NLoc, DeclarationName N, QualType T, TypeSourceInfo *TInfo, StorageClass SC, bool isInlineSpecified=false, bool hasWrittenPrototype=true, bool isConstexprSpecified=false)
Definition: Decl.h:1875
static llvm::Value * dumpRecord(CodeGenFunction &CGF, QualType RType, Value *&RecordPtr, CharUnits Align, Value *Func, int Lvl)
Definition: CGBuiltin.cpp:1332
llvm::Constant * CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, llvm::AttributeList ExtraAttrs=llvm::AttributeList(), bool Local=false)
Create a new runtime function with the specified type and name.
bool tryEvaluateObjectSize(uint64_t &Result, ASTContext &Ctx, unsigned Type) const
If the current Expr is a pointer, this will try to statically determine the number of bytes available...
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
bool hasAttr() const
Definition: DeclBase.h:531
llvm::Type * HalfTy
float, double
static CharUnits One()
One - Construct a CharUnits quantity of one.
Definition: CharUnits.h:58
std::pair< llvm::Value *, llvm::Value * > ComplexPairTy
CXXRecordDecl * getAsCXXRecordDecl() const
Retrieves the CXXRecordDecl that this type refers to, either because the type is a RecordType or beca...
Definition: Type.cpp:1613
Represents a prototype with parameter type info, e.g.
Definition: Type.h:3687
llvm::Value * EmitISOVolatileStore(const CallExpr *E)
Definition: CGBuiltin.cpp:5814
#define NEONMAP0(NameBase)
Definition: CGBuiltin.cpp:4232
RValue - This trivial value class is used to represent the result of an expression that is evaluated...
Definition: CGValue.h:39
RValue EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
Definition: CGBuiltin.cpp:1473
const char * getName(unsigned ID) const
Return the identifier name for the specified builtin, e.g.
Definition: Builtins.h:86
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Definition: CharUnits.h:179
llvm::APSInt EvaluateKnownConstInt(const ASTContext &Ctx, SmallVectorImpl< PartialDiagnosticAt > *Diag=nullptr) const
EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded integer.
static Value * EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC, bool Signed, ArrayRef< Value *> Ops)
Definition: CGBuiltin.cpp:9239
unsigned Offset
Definition: Format.cpp:1631
ASTRecordLayout - This class contains layout information for one RecordDecl, which is a struct/union/...
Definition: RecordLayout.h:39
bool HasSideEffects(const ASTContext &Ctx, bool IncludePossibleEffects=true) const
HasSideEffects - This routine returns true for all those expressions which have any effect other than...
Definition: Expr.cpp:3101
llvm::Value * EmitVAStartEnd(llvm::Value *ArgValue, bool IsStart)
Emits a call to an LLVM variable-argument intrinsic, either llvm.va_start or llvm.va_end.
Definition: CGBuiltin.cpp:472
Exposes information about the current target.
Definition: TargetInfo.h:54
#define copysign(__x, __y)
Definition: tgmath.h:634
This represents one expression.
Definition: Expr.h:106
SourceLocation End
static Address invalid()
Definition: Address.h:35
static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD, const CallExpr *E, llvm::Constant *calleeValue)
Definition: CGBuiltin.cpp:387
const T * castAs() const
Member-template castAs<specific type>.
Definition: Type.h:6811
static CGCallee forDirect(llvm::Constant *functionPtr, const CGCalleeInfo &abstractInfo=CGCalleeInfo())
Definition: CGCall.h:134
Expr * getCallee()
Definition: Expr.h:2514
static uint64_t GetX86CpuSupportsMask(ArrayRef< StringRef > FeatureStrs)
Definition: CGBuiltin.cpp:9576
#define INTRINSIC_X86_XSAVE_ID(NAME)
static Value * EmitX86Select(CodeGenFunction &CGF, Value *Mask, Value *Op0, Value *Op1)
Definition: CGBuiltin.cpp:9188
llvm::Value * EmitToMemory(llvm::Value *Value, QualType Ty)
EmitToMemory - Change a scalar value from its value representation to its in-memory representation...
Definition: CGExpr.cpp:1677
static Value * emitUnaryBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID)
Definition: CGBuiltin.cpp:305
SourceLocation Begin
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition: CharUnits.h:63
FunctionDecl * getDirectCallee()
If the callee is a FunctionDecl, return it. Otherwise return null.
Definition: Expr.h:2532
static Value * EmitFromInt(CodeGenFunction &CGF, llvm::Value *V, QualType T, llvm::Type *ResultType)
Definition: CGBuiltin.cpp:86
static SVal getValue(SVal val, SValBuilder &svalBuilder)
int Depth
Definition: ASTDiff.cpp:191
llvm::LLVMContext & getLLVMContext()
bool isSignedIntegerType() const
Return true if this is an integer type that is signed, according to C99 6.2.5p4 [char, signed char, short, int, long..], or an enum decl which has a signed representation.
Definition: Type.cpp:1844
static Value * emitRangedBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID, int low, int high)
Definition: CGBuiltin.cpp:417
QualType getType() const
Definition: Expr.h:128
static const NeonIntrinsicInfo ARMSIMDIntrinsicMap[]
Definition: CGBuiltin.cpp:4244
LValue MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T)
static bool areBOSTypesCompatible(int From, int To)
Checks if using the result of __builtin_object_size(p, From) in place of __builtin_object_size(p, To) is correct.
Definition: CGBuiltin.cpp:484
QualType getFunctionType(QualType ResultTy, ArrayRef< QualType > Args, const FunctionProtoType::ExtProtoInfo &EPI) const
Return a normal function type with a typed argument list.
Definition: ASTContext.h:1380
RValue emitBuiltinOSLogFormat(const CallExpr &E)
Emit IR for __builtin_os_log_format.
Definition: CGBuiltin.cpp:1160
static const NeonIntrinsicInfo * findNeonIntrinsicInMap(ArrayRef< NeonIntrinsicInfo > IntrinsicMap, unsigned BuiltinID, bool &MapProvenSorted)
Definition: CGBuiltin.cpp:4915
ASTContext & getContext() const
static Value * EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:141
virtual llvm::Value * getPipeElemSize(const Expr *PipeArg)
llvm::Value * EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition: CGBuiltin.cpp:9657
GlobalDecl - represents a global declaration.
Definition: GlobalDecl.h:35
static Value * EmitAtomicIncrementValue(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent)
Definition: CGBuiltin.cpp:277
llvm::Value * EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
The l-value was considered opaque, so the alignment was determined from a type.
llvm::Value * EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
static llvm::Value * EmitOverflowIntrinsic(CodeGenFunction &CGF, const llvm::Intrinsic::ID IntrinsicID, llvm::Value *X, llvm::Value *Y, llvm::Value *&Carry)
Emit a call to llvm.
Definition: CGBuiltin.cpp:402
uint64_t getFieldOffset(unsigned FieldNo) const
getFieldOffset - Get the offset of the given field index, in bits.
Definition: RecordLayout.h:191
EltType getEltType() const
#define log(__x)
Definition: tgmath.h:476
bool EvaluateAsRValue(EvalResult &Result, const ASTContext &Ctx, bool InConstantContext=false) const
EvaluateAsRValue - Return true if this is a constant which we can fold to an rvalue using any crazy t...
Enumerates target-specific builtins in their own namespaces within namespace clang.
Address CreateBitCast(Address Addr, llvm::Type *Ty, const llvm::Twine &Name="")
Definition: CGBuilder.h:142
MSVCIntrin
Definition: CGBuiltin.cpp:810
Kind
FunctionDecl * getAsFunction() LLVM_READONLY
Returns the function itself, or the templated function if this is a function template.
Definition: DeclBase.cpp:218
Encodes a location in the source.
static RValue getIgnored()
Definition: CGValue.h:81
IdentifierInfo & get(StringRef Name)
Return the identifier token info for the specified named identifier.
LangAS getAddressSpace() const
Return the address space of this type.
Definition: Type.h:6188
llvm::Value * EmitFromMemory(llvm::Value *Value, QualType Ty)
EmitFromMemory - Change a scalar value from its memory representation to its value representation...
Definition: CGExpr.cpp:1691
static Value * EmitX86MaskedCompareResult(CodeGenFunction &CGF, Value *Cmp, unsigned NumElts, Value *MaskIn)
Definition: CGBuiltin.cpp:9216
static llvm::VectorType * GetFloatNeonType(CodeGenFunction *CGF, NeonTypeFlags IntTypeFlags)
Definition: CGBuiltin.cpp:4120
llvm::CallSite EmitRuntimeCallOrInvoke(llvm::Value *callee, ArrayRef< llvm::Value *> args, const Twine &name="")
Emits a call or invoke instruction to the given runtime function.
Definition: CGCall.cpp:3766
#define rint(__x)
Definition: tgmath.h:1147
ASTContext & getASTContext() const LLVM_READONLY
Definition: DeclBase.cpp:376
static Value * EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:127
const Decl * getDecl() const
Definition: GlobalDecl.h:69
APFloat & getFloat()
Definition: APValue.h:266
QualType getBaseElementType(const ArrayType *VAT) const
Return the innermost element type of an array type.
static Value * MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E, bool ReturnBool)
Utility to insert an atomic cmpxchg instruction.
Definition: CGBuiltin.cpp:207
#define round(__x)
Definition: tgmath.h:1164
#define exp2(__x)
Definition: tgmath.h:686
static llvm::AtomicOrdering getBitTestAtomicOrdering(BitTest::InterlockingKind I)
Definition: CGBuiltin.cpp:666
OpenMPLinearClauseKind Modifier
Modifier of &#39;linear&#39; clause.
Definition: OpenMPClause.h:102
#define MMA_VARIANTS(geom, type)
const ParmVarDecl * getParamDecl(unsigned i) const
Definition: Decl.h:2285
#define cos(__x)
Definition: tgmath.h:273
constexpr XRayInstrMask Custom
Definition: XRayInstr.h:40
const ArrayType * getAsArrayType(QualType T) const
Type Query functions.
Represents a canonical, potentially-qualified type.
Definition: CanonicalType.h:66
CanQualType VoidTy
Definition: ASTContext.h:1016
llvm::Constant * emitAbstract(const Expr *E, QualType T)
Emit the result of the given expression as an abstract constant, asserting that it succeeded...
llvm::Function * generateBuiltinOSLogHelperFunction(const analyze_os_log::OSLogBufferLayout &Layout, CharUnits BufferAlignment)
Definition: CGBuiltin.cpp:1052
arg_range arguments()
Definition: Expr.h:2590
An aligned address.
Definition: Address.h:25
static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind, const CallExpr *E)
MSVC handles setjmp a bit differently on different platforms.
Definition: CGBuiltin.cpp:770
All available information about a concrete callee.
Definition: CGCall.h:67
bool operator<(DeclarationName LHS, DeclarationName RHS)
Ordering on two declaration names.
static Value * EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, llvm::Type *DstTy)
Definition: CGBuiltin.cpp:9500
static Value * EmitAtomicCmpXchgForMSIntrin(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering SuccessOrdering=AtomicOrdering::SequentiallyConsistent)
This function should be invoked to emit atomic cmpxchg for Microsoft&#39;s _InterlockedCompareExchange* i...
Definition: CGBuiltin.cpp:251
char __ovld __cnfn rotate(char v, char i)
For each element in v, the bits are shifted left by the number of bits given by the corresponding ele...
#define exp(__x)
Definition: tgmath.h:447
bool hasSameUnqualifiedType(QualType T1, QualType T2) const
Determine whether the given types are equivalent after cvr-qualifiers have been removed.
Definition: ASTContext.h:2293
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition: Expr.cpp:215
static Value * EmitFAbs(CodeGenFunction &CGF, Value *V)
EmitFAbs - Emit a call to .fabs().
Definition: CGBuiltin.cpp:349
Like Angled, but marks system directories.
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type, returning the result.
FunctionArgList - Type for representing both the decl and type of parameters to a function...
Definition: CGCall.h:356
ast_type_traits::DynTypedNode Node
#define log10(__x)
Definition: tgmath.h:952
CGFunctionInfo - Class to encapsulate the information about a function definition.
llvm::Value * EmitNeonRShiftImm(llvm::Value *Vec, llvm::Value *Amt, llvm::Type *Ty, bool usgn, const char *name)
Definition: CGBuiltin.cpp:4162
This class organizes the cross-function state that is used while generating LLVM code.
static bool TypeRequiresBuiltinLaunderImp(const ASTContext &Ctx, QualType Ty, llvm::SmallPtrSetImpl< const Decl *> &Seen)
Definition: CGBuiltin.cpp:1423
Dataflow Directional Tag Classes.
#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier)
Definition: CGBuiltin.cpp:4239
bool hasSideEffects() const
Definition: Expr.h:565
static RValue getComplex(llvm::Value *V1, llvm::Value *V2)
Definition: CGValue.h:93
EvalResult is a struct with detailed info about an evaluated expression.
Definition: Expr.h:571
MSVCSetJmpKind
Definition: CGBuiltin.cpp:760
Address CreateStructGEP(Address Addr, unsigned Index, CharUnits Offset, const llvm::Twine &Name="")
Definition: CGBuilder.h:172
llvm::Value * EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitNeonCall(llvm::Function *F, SmallVectorImpl< llvm::Value *> &O, const char *name, unsigned shift=0, bool rightshift=false)
Definition: CGBuiltin.cpp:4141
void EmitARCIntrinsicUse(ArrayRef< llvm::Value *> values)
Given a number of pointers, inform the optimizer that they&#39;re being intrinsically used up until this ...
Definition: CGObjC.cpp:1893
#define ceil(__x)
Definition: tgmath.h:617
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
Definition: CGBuilder.h:70
static RValue EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info, const clang::Expr *Op2, WidthAndSignedness Op2Info, const clang::Expr *ResultArg, QualType ResultQTy, WidthAndSignedness ResultInfo)
Emit a checked mixed-sign multiply.
Definition: CGBuiltin.cpp:1245
static std::string getAsString(SplitQualType split, const PrintingPolicy &Policy)
Definition: Type.h:971
bool isBooleanType() const
Definition: Type.h:6657
static Value * EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch)
Definition: CGBuiltin.cpp:4035
llvm::Function * getIntrinsic(unsigned IID, ArrayRef< llvm::Type *> Tys=None)
bool isIntegerConstantExpr(llvm::APSInt &Result, const ASTContext &Ctx, SourceLocation *Loc=nullptr, bool isEvaluated=true) const
isIntegerConstantExpr - Return true if this expression is a valid integer constant expression...
#define floor(__x)
Definition: tgmath.h:738
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
Definition: CGBuilder.h:108
Flags to identify the types for overloaded Neon builtins.
QualType getCallReturnType(const ASTContext &Ctx) const
getCallReturnType - Get the return type of the call expr.
Definition: Expr.cpp:1396
Expr * IgnoreParenImpCasts() LLVM_READONLY
IgnoreParenImpCasts - Ignore parentheses and implicit casts.
Definition: Expr.cpp:2693
SmallVector< OSLogBufferItem, 4 > Items
Definition: OSLog.h:114
llvm::Value * EmitNeonSplat(llvm::Value *V, llvm::Constant *Idx)
Definition: CGBuiltin.cpp:4135
#define X86_VENDOR(ENUM, STRING)
A helper class that allows the use of isa/cast/dyncast to detect TagType objects of structs/unions/cl...
Definition: Type.h:4370
llvm::Value * vectorWrapScalar16(llvm::Value *Op)
Definition: CGBuiltin.cpp:6757
static llvm::Value * getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType)
Definition: CGBuiltin.cpp:492
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
Definition: Type.h:6578
T * getAttr() const
Definition: DeclBase.h:527
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition: Address.h:52
llvm::Value * EmitNeonShiftVector(llvm::Value *V, llvm::Type *Ty, bool negateForRightShift)
Definition: CGBuiltin.cpp:4155
bool isFunctionType() const
Definition: Type.h:6292
#define nearbyint(__x)
Definition: tgmath.h:1054
static Value * packTBLDVectorList(CodeGenFunction &CGF, ArrayRef< Value *> Ops, Value *ExtOp, Value *IndexOp, llvm::Type *ResTy, unsigned IntID, const char *Name)
Definition: CGBuiltin.cpp:5628
static llvm::Value * EmitBitTestIntrinsic(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E)
Emit a _bittest* intrinsic.
Definition: CGBuiltin.cpp:681
void setNontemporal(bool Value)
Definition: CGValue.h:292
llvm::Value * EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch)
Definition: CGBuiltin.cpp:5829
uint64_t getTypeSize(QualType T) const
Return the size of the specified (complete) type T, in bits.
Definition: ASTContext.h:2070
CanQualType getCanonicalType(QualType T) const
Return the canonical (structural) type corresponding to the specified potentially non-canonical type ...
Definition: ASTContext.h:2253
X
Add a minimal nested name specifier fixit hint to allow lookup of a tag name from an outer enclosing ...
Definition: SemaDecl.cpp:13954
CharUnits toCharUnitsFromBits(int64_t BitSize) const
Convert a size in bits to a size in characters.
static Value * EmitX86Abs(CodeGenFunction &CGF, ArrayRef< Value *> Ops)
Definition: CGBuiltin.cpp:9278
TranslationUnitDecl * getTranslationUnitDecl() const
Definition: ASTContext.h:1009
bool isVoidType() const
Definition: Type.h:6544
TypeInfo getTypeInfo(const Type *T) const
Get the size and alignment of the specified complete type in bits.
llvm::Function * LookupNeonLLVMIntrinsic(unsigned IntrinsicID, unsigned Modifier, llvm::Type *ArgTy, const CallExpr *E)
Definition: CGBuiltin.cpp:4934
llvm::Type * ConvertType(QualType T)
BuiltinCheckKind
Specifies which type of sanitizer check to apply when handling a particular builtin.
static Value * EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, const NeonIntrinsicInfo &SISDInfo, SmallVectorImpl< Value *> &Ops, const CallExpr *E)
Definition: CGBuiltin.cpp:4973
#define sqrt(__x)
Definition: tgmath.h:536
Builtin::Context & BuiltinInfo
Definition: ASTContext.h:568
static char bitActionToX86BTCode(BitTest::ActionKind A)
Definition: CGBuiltin.cpp:624
llvm::Constant * getBuiltinLibFunction(const FunctionDecl *FD, unsigned BuiltinID)
Given a builtin id for a function like "__builtin_fabsf", return a Function* for "fabsf".
Definition: CGBuiltin.cpp:51
#define fabs(__x)
Definition: tgmath.h:565
Defines the clang::TargetInfo interface.
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition: Expr.h:2396
static Value * emitTernaryBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID)
Definition: CGBuiltin.cpp:326
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition: Decl.h:276
uint64_t Width
Definition: ASTContext.h:144
__DEVICE__ int max(int __a, int __b)
static Value * EmitSignBit(CodeGenFunction &CGF, Value *V)
Emit the computation of the sign bit for a floating point value.
Definition: CGBuiltin.cpp:358
bool isReadOnly() const
Definition: Type.h:6035
static RValue get(llvm::Value *V)
Definition: CGValue.h:86
bool isUnion() const
Definition: Decl.h:3252
bool isPointerType() const
Definition: Type.h:6296
__DEVICE__ int min(int __a, int __b)
static ApplyDebugLocation CreateEmpty(CodeGenFunction &CGF)
Set the IRBuilder to not attach debug locations.
Definition: CGDebugInfo.h:732
QualType getType() const
Definition: Decl.h:648
LValue - This represents an lvalue references.
Definition: CGValue.h:167
Information for lazily generating a cleanup.
Definition: EHScopeStack.h:147
APSInt & getInt()
Definition: APValue.h:252
static Value * EmitX86ConvertToMask(CodeGenFunction &CGF, Value *In)
Definition: CGBuiltin.cpp:9273
static Value * MakeBinaryAtomicValue(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E, AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent)
Utility to insert an atomic instruction based on Intrinsic::ID and the expression node...
Definition: CGBuiltin.cpp:99
CallArgList - Type for representing both the value and type of arguments in a call.
Definition: CGCall.h:260
static RValue EmitBinaryAtomic(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E)
Definition: CGBuiltin.cpp:149
#define X86_FEATURE_COMPAT(VAL, ENUM, STR)
llvm::Value * EmitTargetBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
EmitTargetBuiltinExpr - Emit the given builtin call.
Definition: CGBuiltin.cpp:4072
static llvm::VectorType * GetNeonType(CodeGenFunction *CGF, NeonTypeFlags TypeFlags, bool HasLegalHalfType=true, bool V1Ty=false)
Definition: CGBuiltin.cpp:4085
static bool AArch64SIMDIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:4910
QualType getIntTypeForBitwidth(unsigned DestWidth, unsigned Signed) const
getIntTypeForBitwidth - sets integer QualTy according to specified details: bitwidth, signed/unsigned.
llvm::Value * EmitISOVolatileLoad(const CallExpr *E)
Definition: CGBuiltin.cpp:5801
llvm::Value * EmitSystemZBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
static OMPLinearClause * Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc, OpenMPLinearClauseKind Modifier, SourceLocation ModifierLoc, SourceLocation ColonLoc, SourceLocation EndLoc, ArrayRef< Expr *> VL, ArrayRef< Expr *> PL, ArrayRef< Expr *> IL, Expr *Step, Expr *CalcStep, Stmt *PreInit, Expr *PostUpdate)
Creates clause with a list of variables VL and a linear step Step.
CanQualType getSizeType() const
Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.