clang  8.0.0
CGBuiltin.cpp
Go to the documentation of this file.
1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit Builtin calls as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCXXABI.h"
15 #include "CGObjCRuntime.h"
16 #include "CGOpenCLRuntime.h"
17 #include "CGRecordLayout.h"
18 #include "CodeGenFunction.h"
19 #include "CodeGenModule.h"
20 #include "ConstantEmitter.h"
21 #include "TargetInfo.h"
22 #include "clang/AST/ASTContext.h"
23 #include "clang/AST/Decl.h"
24 #include "clang/AST/OSLog.h"
26 #include "clang/Basic/TargetInfo.h"
28 #include "llvm/ADT/SmallPtrSet.h"
29 #include "llvm/ADT/StringExtras.h"
30 #include "llvm/IR/CallSite.h"
31 #include "llvm/IR/DataLayout.h"
32 #include "llvm/IR/InlineAsm.h"
33 #include "llvm/IR/Intrinsics.h"
34 #include "llvm/IR/MDBuilder.h"
35 #include "llvm/Support/ConvertUTF.h"
36 #include "llvm/Support/ScopedPrinter.h"
37 #include "llvm/Support/TargetParser.h"
38 #include <sstream>
39 
40 using namespace clang;
41 using namespace CodeGen;
42 using namespace llvm;
43 
44 static
45 int64_t clamp(int64_t Value, int64_t Low, int64_t High) {
46  return std::min(High, std::max(Low, Value));
47 }
48 
49 /// getBuiltinLibFunction - Given a builtin id for a function like
50 /// "__builtin_fabsf", return a Function* for "fabsf".
52  unsigned BuiltinID) {
53  assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
54 
55  // Get the name, skip over the __builtin_ prefix (if necessary).
56  StringRef Name;
57  GlobalDecl D(FD);
58 
59  // If the builtin has been declared explicitly with an assembler label,
60  // use the mangled name. This differs from the plain label on platforms
61  // that prefix labels.
62  if (FD->hasAttr<AsmLabelAttr>())
63  Name = getMangledName(D);
64  else
65  Name = Context.BuiltinInfo.getName(BuiltinID) + 10;
66 
67  llvm::FunctionType *Ty =
68  cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
69 
70  return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
71 }
72 
73 /// Emit the conversions required to turn the given value into an
74 /// integer of the given size.
76  QualType T, llvm::IntegerType *IntType) {
77  V = CGF.EmitToMemory(V, T);
78 
79  if (V->getType()->isPointerTy())
80  return CGF.Builder.CreatePtrToInt(V, IntType);
81 
82  assert(V->getType() == IntType);
83  return V;
84 }
85 
87  QualType T, llvm::Type *ResultType) {
88  V = CGF.EmitFromMemory(V, T);
89 
90  if (ResultType->isPointerTy())
91  return CGF.Builder.CreateIntToPtr(V, ResultType);
92 
93  assert(V->getType() == ResultType);
94  return V;
95 }
96 
97 /// Utility to insert an atomic instruction based on Intrinsic::ID
98 /// and the expression node.
100  CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E,
101  AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
102  QualType T = E->getType();
103  assert(E->getArg(0)->getType()->isPointerType());
104  assert(CGF.getContext().hasSameUnqualifiedType(T,
105  E->getArg(0)->getType()->getPointeeType()));
106  assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
107 
108  llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
109  unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
110 
111  llvm::IntegerType *IntType =
112  llvm::IntegerType::get(CGF.getLLVMContext(),
113  CGF.getContext().getTypeSize(T));
114  llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
115 
116  llvm::Value *Args[2];
117  Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
118  Args[1] = CGF.EmitScalarExpr(E->getArg(1));
119  llvm::Type *ValueType = Args[1]->getType();
120  Args[1] = EmitToInt(CGF, Args[1], T, IntType);
121 
122  llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
123  Kind, Args[0], Args[1], Ordering);
124  return EmitFromInt(CGF, Result, T, ValueType);
125 }
126 
128  Value *Val = CGF.EmitScalarExpr(E->getArg(0));
129  Value *Address = CGF.EmitScalarExpr(E->getArg(1));
130 
131  // Convert the type of the pointer to a pointer to the stored type.
132  Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
133  Value *BC = CGF.Builder.CreateBitCast(
134  Address, llvm::PointerType::getUnqual(Val->getType()), "cast");
135  LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType());
136  LV.setNontemporal(true);
137  CGF.EmitStoreOfScalar(Val, LV, false);
138  return nullptr;
139 }
140 
142  Value *Address = CGF.EmitScalarExpr(E->getArg(0));
143 
144  LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType());
145  LV.setNontemporal(true);
146  return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
147 }
148 
150  llvm::AtomicRMWInst::BinOp Kind,
151  const CallExpr *E) {
152  return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
153 }
154 
155 /// Utility to insert an atomic instruction based Intrinsic::ID and
156 /// the expression node, where the return value is the result of the
157 /// operation.
159  llvm::AtomicRMWInst::BinOp Kind,
160  const CallExpr *E,
161  Instruction::BinaryOps Op,
162  bool Invert = false) {
163  QualType T = E->getType();
164  assert(E->getArg(0)->getType()->isPointerType());
165  assert(CGF.getContext().hasSameUnqualifiedType(T,
166  E->getArg(0)->getType()->getPointeeType()));
167  assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
168 
169  llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
170  unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
171 
172  llvm::IntegerType *IntType =
173  llvm::IntegerType::get(CGF.getLLVMContext(),
174  CGF.getContext().getTypeSize(T));
175  llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
176 
177  llvm::Value *Args[2];
178  Args[1] = CGF.EmitScalarExpr(E->getArg(1));
179  llvm::Type *ValueType = Args[1]->getType();
180  Args[1] = EmitToInt(CGF, Args[1], T, IntType);
181  Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
182 
183  llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
184  Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
185  Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
186  if (Invert)
187  Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
188  llvm::ConstantInt::get(IntType, -1));
189  Result = EmitFromInt(CGF, Result, T, ValueType);
190  return RValue::get(Result);
191 }
192 
193 /// Utility to insert an atomic cmpxchg instruction.
194 ///
195 /// @param CGF The current codegen function.
196 /// @param E Builtin call expression to convert to cmpxchg.
197 /// arg0 - address to operate on
198 /// arg1 - value to compare with
199 /// arg2 - new value
200 /// @param ReturnBool Specifies whether to return success flag of
201 /// cmpxchg result or the old value.
202 ///
203 /// @returns result of cmpxchg, according to ReturnBool
204 ///
205 /// Note: In order to lower Microsoft's _InterlockedCompareExchange* intrinsics
206 /// invoke the function EmitAtomicCmpXchgForMSIntrin.
208  bool ReturnBool) {
209  QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
210  llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
211  unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
212 
213  llvm::IntegerType *IntType = llvm::IntegerType::get(
214  CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
215  llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
216 
217  Value *Args[3];
218  Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
219  Args[1] = CGF.EmitScalarExpr(E->getArg(1));
220  llvm::Type *ValueType = Args[1]->getType();
221  Args[1] = EmitToInt(CGF, Args[1], T, IntType);
222  Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
223 
224  Value *Pair = CGF.Builder.CreateAtomicCmpXchg(
225  Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent,
226  llvm::AtomicOrdering::SequentiallyConsistent);
227  if (ReturnBool)
228  // Extract boolean success flag and zext it to int.
229  return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
230  CGF.ConvertType(E->getType()));
231  else
232  // Extract old value and emit it using the same type as compare value.
233  return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
234  ValueType);
235 }
236 
237 /// This function should be invoked to emit atomic cmpxchg for Microsoft's
238 /// _InterlockedCompareExchange* intrinsics which have the following signature:
239 /// T _InterlockedCompareExchange(T volatile *Destination,
240 /// T Exchange,
241 /// T Comparand);
242 ///
243 /// Whereas the llvm 'cmpxchg' instruction has the following syntax:
244 /// cmpxchg *Destination, Comparand, Exchange.
245 /// So we need to swap Comparand and Exchange when invoking
246 /// CreateAtomicCmpXchg. That is the reason we could not use the above utility
247 /// function MakeAtomicCmpXchgValue since it expects the arguments to be
248 /// already swapped.
249 
250 static
252  AtomicOrdering SuccessOrdering = AtomicOrdering::SequentiallyConsistent) {
253  assert(E->getArg(0)->getType()->isPointerType());
254  assert(CGF.getContext().hasSameUnqualifiedType(
255  E->getType(), E->getArg(0)->getType()->getPointeeType()));
256  assert(CGF.getContext().hasSameUnqualifiedType(E->getType(),
257  E->getArg(1)->getType()));
258  assert(CGF.getContext().hasSameUnqualifiedType(E->getType(),
259  E->getArg(2)->getType()));
260 
261  auto *Destination = CGF.EmitScalarExpr(E->getArg(0));
262  auto *Comparand = CGF.EmitScalarExpr(E->getArg(2));
263  auto *Exchange = CGF.EmitScalarExpr(E->getArg(1));
264 
265  // For Release ordering, the failure ordering should be Monotonic.
266  auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release ?
267  AtomicOrdering::Monotonic :
268  SuccessOrdering;
269 
270  auto *Result = CGF.Builder.CreateAtomicCmpXchg(
271  Destination, Comparand, Exchange,
272  SuccessOrdering, FailureOrdering);
273  Result->setVolatile(true);
274  return CGF.Builder.CreateExtractValue(Result, 0);
275 }
276 
278  AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
279  assert(E->getArg(0)->getType()->isPointerType());
280 
281  auto *IntTy = CGF.ConvertType(E->getType());
282  auto *Result = CGF.Builder.CreateAtomicRMW(
283  AtomicRMWInst::Add,
284  CGF.EmitScalarExpr(E->getArg(0)),
285  ConstantInt::get(IntTy, 1),
286  Ordering);
287  return CGF.Builder.CreateAdd(Result, ConstantInt::get(IntTy, 1));
288 }
289 
291  AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
292  assert(E->getArg(0)->getType()->isPointerType());
293 
294  auto *IntTy = CGF.ConvertType(E->getType());
295  auto *Result = CGF.Builder.CreateAtomicRMW(
296  AtomicRMWInst::Sub,
297  CGF.EmitScalarExpr(E->getArg(0)),
298  ConstantInt::get(IntTy, 1),
299  Ordering);
300  return CGF.Builder.CreateSub(Result, ConstantInt::get(IntTy, 1));
301 }
302 
303 // Emit a simple mangled intrinsic that has 1 argument and a return type
304 // matching the argument type.
306  const CallExpr *E,
307  unsigned IntrinsicID) {
308  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
309 
310  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
311  return CGF.Builder.CreateCall(F, Src0);
312 }
313 
314 // Emit an intrinsic that has 2 operands of the same type as its result.
316  const CallExpr *E,
317  unsigned IntrinsicID) {
318  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
319  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
320 
321  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
322  return CGF.Builder.CreateCall(F, { Src0, Src1 });
323 }
324 
325 // Emit an intrinsic that has 3 operands of the same type as its result.
327  const CallExpr *E,
328  unsigned IntrinsicID) {
329  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
330  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
331  llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
332 
333  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
334  return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
335 }
336 
337 // Emit an intrinsic that has 1 float or double operand, and 1 integer.
339  const CallExpr *E,
340  unsigned IntrinsicID) {
341  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
342  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
343 
344  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
345  return CGF.Builder.CreateCall(F, {Src0, Src1});
346 }
347 
348 /// EmitFAbs - Emit a call to @llvm.fabs().
349 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) {
350  Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
351  llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
352  Call->setDoesNotAccessMemory();
353  return Call;
354 }
355 
356 /// Emit the computation of the sign bit for a floating point value. Returns
357 /// the i1 sign bit value.
359  LLVMContext &C = CGF.CGM.getLLVMContext();
360 
361  llvm::Type *Ty = V->getType();
362  int Width = Ty->getPrimitiveSizeInBits();
363  llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
364  V = CGF.Builder.CreateBitCast(V, IntTy);
365  if (Ty->isPPC_FP128Ty()) {
366  // We want the sign bit of the higher-order double. The bitcast we just
367  // did works as if the double-double was stored to memory and then
368  // read as an i128. The "store" will put the higher-order double in the
369  // lower address in both little- and big-Endian modes, but the "load"
370  // will treat those bits as a different part of the i128: the low bits in
371  // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
372  // we need to shift the high bits down to the low before truncating.
373  Width >>= 1;
374  if (CGF.getTarget().isBigEndian()) {
375  Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
376  V = CGF.Builder.CreateLShr(V, ShiftCst);
377  }
378  // We are truncating value in order to extract the higher-order
379  // double, which we will be using to extract the sign from.
380  IntTy = llvm::IntegerType::get(C, Width);
381  V = CGF.Builder.CreateTrunc(V, IntTy);
382  }
383  Value *Zero = llvm::Constant::getNullValue(IntTy);
384  return CGF.Builder.CreateICmpSLT(V, Zero);
385 }
386 
388  const CallExpr *E, llvm::Constant *calleeValue) {
389  CGCallee callee = CGCallee::forDirect(calleeValue, GlobalDecl(FD));
390  return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot());
391 }
392 
393 /// Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
394 /// depending on IntrinsicID.
395 ///
396 /// \arg CGF The current codegen function.
397 /// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
398 /// \arg X The first argument to the llvm.*.with.overflow.*.
399 /// \arg Y The second argument to the llvm.*.with.overflow.*.
400 /// \arg Carry The carry returned by the llvm.*.with.overflow.*.
401 /// \returns The result (i.e. sum/product) returned by the intrinsic.
403  const llvm::Intrinsic::ID IntrinsicID,
405  llvm::Value *&Carry) {
406  // Make sure we have integers of the same width.
407  assert(X->getType() == Y->getType() &&
408  "Arguments must be the same type. (Did you forget to make sure both "
409  "arguments have the same integer width?)");
410 
411  llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
412  llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
413  Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
414  return CGF.Builder.CreateExtractValue(Tmp, 0);
415 }
416 
418  unsigned IntrinsicID,
419  int low, int high) {
420  llvm::MDBuilder MDHelper(CGF.getLLVMContext());
421  llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high));
422  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, {});
423  llvm::Instruction *Call = CGF.Builder.CreateCall(F);
424  Call->setMetadata(llvm::LLVMContext::MD_range, RNode);
425  return Call;
426 }
427 
428 namespace {
429  struct WidthAndSignedness {
430  unsigned Width;
431  bool Signed;
432  };
433 }
434 
435 static WidthAndSignedness
437  const clang::QualType Type) {
438  assert(Type->isIntegerType() && "Given type is not an integer.");
439  unsigned Width = Type->isBooleanType() ? 1 : context.getTypeInfo(Type).Width;
440  bool Signed = Type->isSignedIntegerType();
441  return {Width, Signed};
442 }
443 
444 // Given one or more integer types, this function produces an integer type that
445 // encompasses them: any value in one of the given types could be expressed in
446 // the encompassing type.
447 static struct WidthAndSignedness
448 EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
449  assert(Types.size() > 0 && "Empty list of types.");
450 
451  // If any of the given types is signed, we must return a signed type.
452  bool Signed = false;
453  for (const auto &Type : Types) {
454  Signed |= Type.Signed;
455  }
456 
457  // The encompassing type must have a width greater than or equal to the width
458  // of the specified types. Additionally, if the encompassing type is signed,
459  // its width must be strictly greater than the width of any unsigned types
460  // given.
461  unsigned Width = 0;
462  for (const auto &Type : Types) {
463  unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
464  if (Width < MinWidth) {
465  Width = MinWidth;
466  }
467  }
468 
469  return {Width, Signed};
470 }
471 
472 Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
473  llvm::Type *DestType = Int8PtrTy;
474  if (ArgValue->getType() != DestType)
475  ArgValue =
476  Builder.CreateBitCast(ArgValue, DestType, ArgValue->getName().data());
477 
478  Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
479  return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue);
480 }
481 
482 /// Checks if using the result of __builtin_object_size(p, @p From) in place of
483 /// __builtin_object_size(p, @p To) is correct
484 static bool areBOSTypesCompatible(int From, int To) {
485  // Note: Our __builtin_object_size implementation currently treats Type=0 and
486  // Type=2 identically. Encoding this implementation detail here may make
487  // improving __builtin_object_size difficult in the future, so it's omitted.
488  return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);
489 }
490 
491 static llvm::Value *
492 getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
493  return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true);
494 }
495 
496 llvm::Value *
497 CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
498  llvm::IntegerType *ResType,
499  llvm::Value *EmittedE) {
500  uint64_t ObjectSize;
501  if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
502  return emitBuiltinObjectSize(E, Type, ResType, EmittedE);
503  return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
504 }
505 
506 /// Returns a Value corresponding to the size of the given expression.
507 /// This Value may be either of the following:
508 /// - A llvm::Argument (if E is a param with the pass_object_size attribute on
509 /// it)
510 /// - A call to the @llvm.objectsize intrinsic
511 ///
512 /// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null
513 /// and we wouldn't otherwise try to reference a pass_object_size parameter,
514 /// we'll call @llvm.objectsize on EmittedE, rather than emitting E.
515 llvm::Value *
516 CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
517  llvm::IntegerType *ResType,
518  llvm::Value *EmittedE) {
519  // We need to reference an argument if the pointer is a parameter with the
520  // pass_object_size attribute.
521  if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
522  auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());
523  auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
524  if (Param != nullptr && PS != nullptr &&
525  areBOSTypesCompatible(PS->getType(), Type)) {
526  auto Iter = SizeArguments.find(Param);
527  assert(Iter != SizeArguments.end());
528 
529  const ImplicitParamDecl *D = Iter->second;
530  auto DIter = LocalDeclMap.find(D);
531  assert(DIter != LocalDeclMap.end());
532 
533  return EmitLoadOfScalar(DIter->second, /*volatile=*/false,
534  getContext().getSizeType(), E->getBeginLoc());
535  }
536  }
537 
538  // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
539  // evaluate E for side-effects. In either case, we shouldn't lower to
540  // @llvm.objectsize.
541  if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext())))
542  return getDefaultBuiltinObjectSizeResult(Type, ResType);
543 
544  Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E);
545  assert(Ptr->getType()->isPointerTy() &&
546  "Non-pointer passed to __builtin_object_size?");
547 
548  Value *F = CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()});
549 
550  // LLVM only supports 0 and 2, make sure that we pass along that as a boolean.
551  Value *Min = Builder.getInt1((Type & 2) != 0);
552  // For GCC compatibility, __builtin_object_size treat NULL as unknown size.
553  Value *NullIsUnknown = Builder.getTrue();
554  return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown});
555 }
556 
557 namespace {
558 /// A struct to generically describe a bit test intrinsic.
559 struct BitTest {
560  enum ActionKind : uint8_t { TestOnly, Complement, Reset, Set };
561  enum InterlockingKind : uint8_t {
562  Unlocked,
563  Sequential,
564  Acquire,
565  Release,
566  NoFence
567  };
568 
569  ActionKind Action;
570  InterlockingKind Interlocking;
571  bool Is64Bit;
572 
573  static BitTest decodeBitTestBuiltin(unsigned BuiltinID);
574 };
575 } // namespace
576 
577 BitTest BitTest::decodeBitTestBuiltin(unsigned BuiltinID) {
578  switch (BuiltinID) {
579  // Main portable variants.
580  case Builtin::BI_bittest:
581  return {TestOnly, Unlocked, false};
582  case Builtin::BI_bittestandcomplement:
583  return {Complement, Unlocked, false};
584  case Builtin::BI_bittestandreset:
585  return {Reset, Unlocked, false};
586  case Builtin::BI_bittestandset:
587  return {Set, Unlocked, false};
588  case Builtin::BI_interlockedbittestandreset:
589  return {Reset, Sequential, false};
590  case Builtin::BI_interlockedbittestandset:
591  return {Set, Sequential, false};
592 
593  // X86-specific 64-bit variants.
594  case Builtin::BI_bittest64:
595  return {TestOnly, Unlocked, true};
596  case Builtin::BI_bittestandcomplement64:
597  return {Complement, Unlocked, true};
598  case Builtin::BI_bittestandreset64:
599  return {Reset, Unlocked, true};
600  case Builtin::BI_bittestandset64:
601  return {Set, Unlocked, true};
602  case Builtin::BI_interlockedbittestandreset64:
603  return {Reset, Sequential, true};
604  case Builtin::BI_interlockedbittestandset64:
605  return {Set, Sequential, true};
606 
607  // ARM/AArch64-specific ordering variants.
608  case Builtin::BI_interlockedbittestandset_acq:
609  return {Set, Acquire, false};
610  case Builtin::BI_interlockedbittestandset_rel:
611  return {Set, Release, false};
612  case Builtin::BI_interlockedbittestandset_nf:
613  return {Set, NoFence, false};
614  case Builtin::BI_interlockedbittestandreset_acq:
615  return {Reset, Acquire, false};
616  case Builtin::BI_interlockedbittestandreset_rel:
617  return {Reset, Release, false};
618  case Builtin::BI_interlockedbittestandreset_nf:
619  return {Reset, NoFence, false};
620  }
621  llvm_unreachable("expected only bittest intrinsics");
622 }
623 
624 static char bitActionToX86BTCode(BitTest::ActionKind A) {
625  switch (A) {
626  case BitTest::TestOnly: return '\0';
627  case BitTest::Complement: return 'c';
628  case BitTest::Reset: return 'r';
629  case BitTest::Set: return 's';
630  }
631  llvm_unreachable("invalid action");
632 }
633 
635  BitTest BT,
636  const CallExpr *E, Value *BitBase,
637  Value *BitPos) {
638  char Action = bitActionToX86BTCode(BT.Action);
639  char SizeSuffix = BT.Is64Bit ? 'q' : 'l';
640 
641  // Build the assembly.
642  SmallString<64> Asm;
643  raw_svector_ostream AsmOS(Asm);
644  if (BT.Interlocking != BitTest::Unlocked)
645  AsmOS << "lock ";
646  AsmOS << "bt";
647  if (Action)
648  AsmOS << Action;
649  AsmOS << SizeSuffix << " $2, ($1)\n\tsetc ${0:b}";
650 
651  // Build the constraints. FIXME: We should support immediates when possible.
652  std::string Constraints = "=r,r,r,~{cc},~{flags},~{fpsr}";
653  llvm::IntegerType *IntType = llvm::IntegerType::get(
654  CGF.getLLVMContext(),
655  CGF.getContext().getTypeSize(E->getArg(1)->getType()));
656  llvm::Type *IntPtrType = IntType->getPointerTo();
657  llvm::FunctionType *FTy =
658  llvm::FunctionType::get(CGF.Int8Ty, {IntPtrType, IntType}, false);
659 
660  llvm::InlineAsm *IA =
661  llvm::InlineAsm::get(FTy, Asm, Constraints, /*SideEffects=*/true);
662  return CGF.Builder.CreateCall(IA, {BitBase, BitPos});
663 }
664 
665 static llvm::AtomicOrdering
666 getBitTestAtomicOrdering(BitTest::InterlockingKind I) {
667  switch (I) {
668  case BitTest::Unlocked: return llvm::AtomicOrdering::NotAtomic;
669  case BitTest::Sequential: return llvm::AtomicOrdering::SequentiallyConsistent;
670  case BitTest::Acquire: return llvm::AtomicOrdering::Acquire;
671  case BitTest::Release: return llvm::AtomicOrdering::Release;
672  case BitTest::NoFence: return llvm::AtomicOrdering::Monotonic;
673  }
674  llvm_unreachable("invalid interlocking");
675 }
676 
677 /// Emit a _bittest* intrinsic. These intrinsics take a pointer to an array of
678 /// bits and a bit position and read and optionally modify the bit at that
679 /// position. The position index can be arbitrarily large, i.e. it can be larger
680 /// than 31 or 63, so we need an indexed load in the general case.
682  unsigned BuiltinID,
683  const CallExpr *E) {
684  Value *BitBase = CGF.EmitScalarExpr(E->getArg(0));
685  Value *BitPos = CGF.EmitScalarExpr(E->getArg(1));
686 
687  BitTest BT = BitTest::decodeBitTestBuiltin(BuiltinID);
688 
689  // X86 has special BT, BTC, BTR, and BTS instructions that handle the array
690  // indexing operation internally. Use them if possible.
691  llvm::Triple::ArchType Arch = CGF.getTarget().getTriple().getArch();
692  if (Arch == llvm::Triple::x86 || Arch == llvm::Triple::x86_64)
693  return EmitX86BitTestIntrinsic(CGF, BT, E, BitBase, BitPos);
694 
695  // Otherwise, use generic code to load one byte and test the bit. Use all but
696  // the bottom three bits as the array index, and the bottom three bits to form
697  // a mask.
698  // Bit = BitBaseI8[BitPos >> 3] & (1 << (BitPos & 0x7)) != 0;
699  Value *ByteIndex = CGF.Builder.CreateAShr(
700  BitPos, llvm::ConstantInt::get(BitPos->getType(), 3), "bittest.byteidx");
701  Value *BitBaseI8 = CGF.Builder.CreatePointerCast(BitBase, CGF.Int8PtrTy);
702  Address ByteAddr(CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, BitBaseI8,
703  ByteIndex, "bittest.byteaddr"),
704  CharUnits::One());
705  Value *PosLow =
706  CGF.Builder.CreateAnd(CGF.Builder.CreateTrunc(BitPos, CGF.Int8Ty),
707  llvm::ConstantInt::get(CGF.Int8Ty, 0x7));
708 
709  // The updating instructions will need a mask.
710  Value *Mask = nullptr;
711  if (BT.Action != BitTest::TestOnly) {
712  Mask = CGF.Builder.CreateShl(llvm::ConstantInt::get(CGF.Int8Ty, 1), PosLow,
713  "bittest.mask");
714  }
715 
716  // Check the action and ordering of the interlocked intrinsics.
717  llvm::AtomicOrdering Ordering = getBitTestAtomicOrdering(BT.Interlocking);
718 
719  Value *OldByte = nullptr;
720  if (Ordering != llvm::AtomicOrdering::NotAtomic) {
721  // Emit a combined atomicrmw load/store operation for the interlocked
722  // intrinsics.
723  llvm::AtomicRMWInst::BinOp RMWOp = llvm::AtomicRMWInst::Or;
724  if (BT.Action == BitTest::Reset) {
725  Mask = CGF.Builder.CreateNot(Mask);
726  RMWOp = llvm::AtomicRMWInst::And;
727  }
728  OldByte = CGF.Builder.CreateAtomicRMW(RMWOp, ByteAddr.getPointer(), Mask,
729  Ordering);
730  } else {
731  // Emit a plain load for the non-interlocked intrinsics.
732  OldByte = CGF.Builder.CreateLoad(ByteAddr, "bittest.byte");
733  Value *NewByte = nullptr;
734  switch (BT.Action) {
735  case BitTest::TestOnly:
736  // Don't store anything.
737  break;
738  case BitTest::Complement:
739  NewByte = CGF.Builder.CreateXor(OldByte, Mask);
740  break;
741  case BitTest::Reset:
742  NewByte = CGF.Builder.CreateAnd(OldByte, CGF.Builder.CreateNot(Mask));
743  break;
744  case BitTest::Set:
745  NewByte = CGF.Builder.CreateOr(OldByte, Mask);
746  break;
747  }
748  if (NewByte)
749  CGF.Builder.CreateStore(NewByte, ByteAddr);
750  }
751 
752  // However we loaded the old byte, either by plain load or atomicrmw, shift
753  // the bit into the low position and mask it to 0 or 1.
754  Value *ShiftedByte = CGF.Builder.CreateLShr(OldByte, PosLow, "bittest.shr");
755  return CGF.Builder.CreateAnd(
756  ShiftedByte, llvm::ConstantInt::get(CGF.Int8Ty, 1), "bittest.res");
757 }
758 
759 namespace {
760 enum class MSVCSetJmpKind {
761  _setjmpex,
762  _setjmp3,
763  _setjmp
764 };
765 }
766 
767 /// MSVC handles setjmp a bit differently on different platforms. On every
768 /// architecture except 32-bit x86, the frame address is passed. On x86, extra
769 /// parameters can be passed as variadic arguments, but we always pass none.
771  const CallExpr *E) {
772  llvm::Value *Arg1 = nullptr;
773  llvm::Type *Arg1Ty = nullptr;
774  StringRef Name;
775  bool IsVarArg = false;
776  if (SJKind == MSVCSetJmpKind::_setjmp3) {
777  Name = "_setjmp3";
778  Arg1Ty = CGF.Int32Ty;
779  Arg1 = llvm::ConstantInt::get(CGF.IntTy, 0);
780  IsVarArg = true;
781  } else {
782  Name = SJKind == MSVCSetJmpKind::_setjmp ? "_setjmp" : "_setjmpex";
783  Arg1Ty = CGF.Int8PtrTy;
784  if (CGF.getTarget().getTriple().getArch() == llvm::Triple::aarch64) {
785  Arg1 = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(Intrinsic::sponentry));
786  } else
787  Arg1 = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(Intrinsic::frameaddress),
788  llvm::ConstantInt::get(CGF.Int32Ty, 0));
789  }
790 
791  // Mark the call site and declaration with ReturnsTwice.
792  llvm::Type *ArgTypes[2] = {CGF.Int8PtrTy, Arg1Ty};
793  llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
794  CGF.getLLVMContext(), llvm::AttributeList::FunctionIndex,
795  llvm::Attribute::ReturnsTwice);
796  llvm::Constant *SetJmpFn = CGF.CGM.CreateRuntimeFunction(
797  llvm::FunctionType::get(CGF.IntTy, ArgTypes, IsVarArg), Name,
798  ReturnsTwiceAttr, /*Local=*/true);
799 
800  llvm::Value *Buf = CGF.Builder.CreateBitOrPointerCast(
801  CGF.EmitScalarExpr(E->getArg(0)), CGF.Int8PtrTy);
802  llvm::Value *Args[] = {Buf, Arg1};
803  llvm::CallSite CS = CGF.EmitRuntimeCallOrInvoke(SetJmpFn, Args);
804  CS.setAttributes(ReturnsTwiceAttr);
805  return RValue::get(CS.getInstruction());
806 }
807 
808 // Many of MSVC builtins are on x64, ARM and AArch64; to avoid repeating code,
809 // we handle them here.
811  _BitScanForward,
812  _BitScanReverse,
813  _InterlockedAnd,
814  _InterlockedDecrement,
815  _InterlockedExchange,
816  _InterlockedExchangeAdd,
817  _InterlockedExchangeSub,
818  _InterlockedIncrement,
819  _InterlockedOr,
820  _InterlockedXor,
821  _InterlockedExchangeAdd_acq,
822  _InterlockedExchangeAdd_rel,
823  _InterlockedExchangeAdd_nf,
824  _InterlockedExchange_acq,
825  _InterlockedExchange_rel,
826  _InterlockedExchange_nf,
827  _InterlockedCompareExchange_acq,
828  _InterlockedCompareExchange_rel,
829  _InterlockedCompareExchange_nf,
830  _InterlockedOr_acq,
831  _InterlockedOr_rel,
832  _InterlockedOr_nf,
833  _InterlockedXor_acq,
834  _InterlockedXor_rel,
835  _InterlockedXor_nf,
836  _InterlockedAnd_acq,
837  _InterlockedAnd_rel,
838  _InterlockedAnd_nf,
839  _InterlockedIncrement_acq,
840  _InterlockedIncrement_rel,
841  _InterlockedIncrement_nf,
842  _InterlockedDecrement_acq,
843  _InterlockedDecrement_rel,
844  _InterlockedDecrement_nf,
845  __fastfail,
846 };
847 
849  const CallExpr *E) {
850  switch (BuiltinID) {
851  case MSVCIntrin::_BitScanForward:
852  case MSVCIntrin::_BitScanReverse: {
853  Value *ArgValue = EmitScalarExpr(E->getArg(1));
854 
855  llvm::Type *ArgType = ArgValue->getType();
856  llvm::Type *IndexType =
857  EmitScalarExpr(E->getArg(0))->getType()->getPointerElementType();
858  llvm::Type *ResultType = ConvertType(E->getType());
859 
860  Value *ArgZero = llvm::Constant::getNullValue(ArgType);
861  Value *ResZero = llvm::Constant::getNullValue(ResultType);
862  Value *ResOne = llvm::ConstantInt::get(ResultType, 1);
863 
864  BasicBlock *Begin = Builder.GetInsertBlock();
865  BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn);
866  Builder.SetInsertPoint(End);
867  PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result");
868 
869  Builder.SetInsertPoint(Begin);
870  Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero);
871  BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn);
872  Builder.CreateCondBr(IsZero, End, NotZero);
873  Result->addIncoming(ResZero, Begin);
874 
875  Builder.SetInsertPoint(NotZero);
876  Address IndexAddress = EmitPointerWithAlignment(E->getArg(0));
877 
878  if (BuiltinID == MSVCIntrin::_BitScanForward) {
879  Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
880  Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
881  ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
882  Builder.CreateStore(ZeroCount, IndexAddress, false);
883  } else {
884  unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
885  Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1);
886 
887  Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
888  Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
889  ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
890  Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount);
891  Builder.CreateStore(Index, IndexAddress, false);
892  }
893  Builder.CreateBr(End);
894  Result->addIncoming(ResOne, NotZero);
895 
896  Builder.SetInsertPoint(End);
897  return Result;
898  }
899  case MSVCIntrin::_InterlockedAnd:
900  return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E);
901  case MSVCIntrin::_InterlockedExchange:
902  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E);
903  case MSVCIntrin::_InterlockedExchangeAdd:
904  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E);
905  case MSVCIntrin::_InterlockedExchangeSub:
906  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E);
907  case MSVCIntrin::_InterlockedOr:
908  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E);
909  case MSVCIntrin::_InterlockedXor:
910  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);
911  case MSVCIntrin::_InterlockedExchangeAdd_acq:
912  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
913  AtomicOrdering::Acquire);
914  case MSVCIntrin::_InterlockedExchangeAdd_rel:
915  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
916  AtomicOrdering::Release);
917  case MSVCIntrin::_InterlockedExchangeAdd_nf:
918  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
919  AtomicOrdering::Monotonic);
920  case MSVCIntrin::_InterlockedExchange_acq:
921  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
922  AtomicOrdering::Acquire);
923  case MSVCIntrin::_InterlockedExchange_rel:
924  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
925  AtomicOrdering::Release);
926  case MSVCIntrin::_InterlockedExchange_nf:
927  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
928  AtomicOrdering::Monotonic);
929  case MSVCIntrin::_InterlockedCompareExchange_acq:
930  return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Acquire);
931  case MSVCIntrin::_InterlockedCompareExchange_rel:
932  return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Release);
933  case MSVCIntrin::_InterlockedCompareExchange_nf:
934  return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Monotonic);
935  case MSVCIntrin::_InterlockedOr_acq:
936  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
937  AtomicOrdering::Acquire);
938  case MSVCIntrin::_InterlockedOr_rel:
939  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
940  AtomicOrdering::Release);
941  case MSVCIntrin::_InterlockedOr_nf:
942  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
943  AtomicOrdering::Monotonic);
944  case MSVCIntrin::_InterlockedXor_acq:
945  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
946  AtomicOrdering::Acquire);
947  case MSVCIntrin::_InterlockedXor_rel:
948  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
949  AtomicOrdering::Release);
950  case MSVCIntrin::_InterlockedXor_nf:
951  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
952  AtomicOrdering::Monotonic);
953  case MSVCIntrin::_InterlockedAnd_acq:
954  return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
955  AtomicOrdering::Acquire);
956  case MSVCIntrin::_InterlockedAnd_rel:
957  return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
958  AtomicOrdering::Release);
959  case MSVCIntrin::_InterlockedAnd_nf:
960  return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
961  AtomicOrdering::Monotonic);
962  case MSVCIntrin::_InterlockedIncrement_acq:
963  return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Acquire);
964  case MSVCIntrin::_InterlockedIncrement_rel:
965  return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Release);
966  case MSVCIntrin::_InterlockedIncrement_nf:
967  return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Monotonic);
968  case MSVCIntrin::_InterlockedDecrement_acq:
969  return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Acquire);
970  case MSVCIntrin::_InterlockedDecrement_rel:
971  return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Release);
972  case MSVCIntrin::_InterlockedDecrement_nf:
973  return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Monotonic);
974 
975  case MSVCIntrin::_InterlockedDecrement:
976  return EmitAtomicDecrementValue(*this, E);
977  case MSVCIntrin::_InterlockedIncrement:
978  return EmitAtomicIncrementValue(*this, E);
979 
980  case MSVCIntrin::__fastfail: {
981  // Request immediate process termination from the kernel. The instruction
982  // sequences to do this are documented on MSDN:
983  // https://msdn.microsoft.com/en-us/library/dn774154.aspx
984  llvm::Triple::ArchType ISA = getTarget().getTriple().getArch();
985  StringRef Asm, Constraints;
986  switch (ISA) {
987  default:
988  ErrorUnsupported(E, "__fastfail call for this architecture");
989  break;
990  case llvm::Triple::x86:
991  case llvm::Triple::x86_64:
992  Asm = "int $$0x29";
993  Constraints = "{cx}";
994  break;
995  case llvm::Triple::thumb:
996  Asm = "udf #251";
997  Constraints = "{r0}";
998  break;
999  }
1000  llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false);
1001  llvm::InlineAsm *IA =
1002  llvm::InlineAsm::get(FTy, Asm, Constraints, /*SideEffects=*/true);
1003  llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
1004  getLLVMContext(), llvm::AttributeList::FunctionIndex,
1005  llvm::Attribute::NoReturn);
1006  CallSite CS = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0)));
1007  CS.setAttributes(NoReturnAttr);
1008  return CS.getInstruction();
1009  }
1010  }
1011  llvm_unreachable("Incorrect MSVC intrinsic!");
1012 }
1013 
1014 namespace {
1015 // ARC cleanup for __builtin_os_log_format
1016 struct CallObjCArcUse final : EHScopeStack::Cleanup {
1017  CallObjCArcUse(llvm::Value *object) : object(object) {}
1018  llvm::Value *object;
1019 
1020  void Emit(CodeGenFunction &CGF, Flags flags) override {
1021  CGF.EmitARCIntrinsicUse(object);
1022  }
1023 };
1024 }
1025 
1028  assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero)
1029  && "Unsupported builtin check kind");
1030 
1031  Value *ArgValue = EmitScalarExpr(E);
1032  if (!SanOpts.has(SanitizerKind::Builtin) || !getTarget().isCLZForZeroUndef())
1033  return ArgValue;
1034 
1035  SanitizerScope SanScope(this);
1036  Value *Cond = Builder.CreateICmpNE(
1037  ArgValue, llvm::Constant::getNullValue(ArgValue->getType()));
1038  EmitCheck(std::make_pair(Cond, SanitizerKind::Builtin),
1039  SanitizerHandler::InvalidBuiltin,
1040  {EmitCheckSourceLocation(E->getExprLoc()),
1041  llvm::ConstantInt::get(Builder.getInt8Ty(), Kind)},
1042  None);
1043  return ArgValue;
1044 }
1045 
1046 /// Get the argument type for arguments to os_log_helper.
1048  QualType UnsignedTy = C.getIntTypeForBitwidth(Size * 8, /*Signed=*/false);
1049  return C.getCanonicalType(UnsignedTy);
1050 }
1051 
1053  const analyze_os_log::OSLogBufferLayout &Layout,
1054  CharUnits BufferAlignment) {
1055  ASTContext &Ctx = getContext();
1056 
1057  llvm::SmallString<64> Name;
1058  {
1059  raw_svector_ostream OS(Name);
1060  OS << "__os_log_helper";
1061  OS << "_" << BufferAlignment.getQuantity();
1062  OS << "_" << int(Layout.getSummaryByte());
1063  OS << "_" << int(Layout.getNumArgsByte());
1064  for (const auto &Item : Layout.Items)
1065  OS << "_" << int(Item.getSizeByte()) << "_"
1066  << int(Item.getDescriptorByte());
1067  }
1068 
1069  if (llvm::Function *F = CGM.getModule().getFunction(Name))
1070  return F;
1071 
1074  Params.emplace_back(Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"),
1076  ArgTys.emplace_back(Ctx.VoidPtrTy);
1077 
1078  for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I) {
1079  char Size = Layout.Items[I].getSizeByte();
1080  if (!Size)
1081  continue;
1082 
1083  QualType ArgTy = getOSLogArgType(Ctx, Size);
1084  Params.emplace_back(
1085  Ctx, nullptr, SourceLocation(),
1086  &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)), ArgTy,
1088  ArgTys.emplace_back(ArgTy);
1089  }
1090 
1091  FunctionArgList Args;
1092  for (auto &P : Params)
1093  Args.push_back(&P);
1094 
1095  QualType ReturnTy = Ctx.VoidTy;
1096  QualType FuncionTy = Ctx.getFunctionType(ReturnTy, ArgTys, {});
1097 
1098  // The helper function has linkonce_odr linkage to enable the linker to merge
1099  // identical functions. To ensure the merging always happens, 'noinline' is
1100  // attached to the function when compiling with -Oz.
1101  const CGFunctionInfo &FI =
1102  CGM.getTypes().arrangeBuiltinFunctionDeclaration(ReturnTy, Args);
1103  llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(FI);
1104  llvm::Function *Fn = llvm::Function::Create(
1105  FuncTy, llvm::GlobalValue::LinkOnceODRLinkage, Name, &CGM.getModule());
1106  Fn->setVisibility(llvm::GlobalValue::HiddenVisibility);
1107  CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, Fn);
1108  CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Fn);
1109 
1110  // Attach 'noinline' at -Oz.
1111  if (CGM.getCodeGenOpts().OptimizeSize == 2)
1112  Fn->addFnAttr(llvm::Attribute::NoInline);
1113 
1114  auto NL = ApplyDebugLocation::CreateEmpty(*this);
1115  IdentifierInfo *II = &Ctx.Idents.get(Name);
1118  FuncionTy, nullptr, SC_PrivateExtern, false, false);
1119 
1120  StartFunction(FD, ReturnTy, Fn, FI, Args);
1121 
1122  // Create a scope with an artificial location for the body of this function.
1123  auto AL = ApplyDebugLocation::CreateArtificial(*this);
1124 
1125  CharUnits Offset;
1126  Address BufAddr(Builder.CreateLoad(GetAddrOfLocalVar(&Params[0]), "buf"),
1127  BufferAlignment);
1128  Builder.CreateStore(Builder.getInt8(Layout.getSummaryByte()),
1129  Builder.CreateConstByteGEP(BufAddr, Offset++, "summary"));
1130  Builder.CreateStore(Builder.getInt8(Layout.getNumArgsByte()),
1131  Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs"));
1132 
1133  unsigned I = 1;
1134  for (const auto &Item : Layout.Items) {
1135  Builder.CreateStore(
1136  Builder.getInt8(Item.getDescriptorByte()),
1137  Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor"));
1138  Builder.CreateStore(
1139  Builder.getInt8(Item.getSizeByte()),
1140  Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize"));
1141 
1142  CharUnits Size = Item.size();
1143  if (!Size.getQuantity())
1144  continue;
1145 
1146  Address Arg = GetAddrOfLocalVar(&Params[I]);
1147  Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset, "argData");
1148  Addr = Builder.CreateBitCast(Addr, Arg.getPointer()->getType(),
1149  "argDataCast");
1150  Builder.CreateStore(Builder.CreateLoad(Arg), Addr);
1151  Offset += Size;
1152  ++I;
1153  }
1154 
1155  FinishFunction();
1156 
1157  return Fn;
1158 }
1159 
1161  assert(E.getNumArgs() >= 2 &&
1162  "__builtin_os_log_format takes at least 2 arguments");
1163  ASTContext &Ctx = getContext();
1166  Address BufAddr = EmitPointerWithAlignment(E.getArg(0));
1167  llvm::SmallVector<llvm::Value *, 4> RetainableOperands;
1168 
1169  // Ignore argument 1, the format string. It is not currently used.
1170  CallArgList Args;
1171  Args.add(RValue::get(BufAddr.getPointer()), Ctx.VoidPtrTy);
1172 
1173  for (const auto &Item : Layout.Items) {
1174  int Size = Item.getSizeByte();
1175  if (!Size)
1176  continue;
1177 
1178  llvm::Value *ArgVal;
1179 
1180  if (Item.getKind() == analyze_os_log::OSLogBufferItem::MaskKind) {
1181  uint64_t Val = 0;
1182  for (unsigned I = 0, E = Item.getMaskType().size(); I < E; ++I)
1183  Val |= ((uint64_t)Item.getMaskType()[I]) << I * 8;
1184  ArgVal = llvm::Constant::getIntegerValue(Int64Ty, llvm::APInt(64, Val));
1185  } else if (const Expr *TheExpr = Item.getExpr()) {
1186  ArgVal = EmitScalarExpr(TheExpr, /*Ignore*/ false);
1187 
1188  // Check if this is a retainable type.
1189  if (TheExpr->getType()->isObjCRetainableType()) {
1190  assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar &&
1191  "Only scalar can be a ObjC retainable type");
1192  // Check if the object is constant, if not, save it in
1193  // RetainableOperands.
1194  if (!isa<Constant>(ArgVal))
1195  RetainableOperands.push_back(ArgVal);
1196  }
1197  } else {
1198  ArgVal = Builder.getInt32(Item.getConstValue().getQuantity());
1199  }
1200 
1201  unsigned ArgValSize =
1202  CGM.getDataLayout().getTypeSizeInBits(ArgVal->getType());
1203  llvm::IntegerType *IntTy = llvm::Type::getIntNTy(getLLVMContext(),
1204  ArgValSize);
1205  ArgVal = Builder.CreateBitOrPointerCast(ArgVal, IntTy);
1206  CanQualType ArgTy = getOSLogArgType(Ctx, Size);
1207  // If ArgVal has type x86_fp80, zero-extend ArgVal.
1208  ArgVal = Builder.CreateZExtOrBitCast(ArgVal, ConvertType(ArgTy));
1209  Args.add(RValue::get(ArgVal), ArgTy);
1210  }
1211 
1212  const CGFunctionInfo &FI =
1213  CGM.getTypes().arrangeBuiltinFunctionCall(Ctx.VoidTy, Args);
1214  llvm::Function *F = CodeGenFunction(CGM).generateBuiltinOSLogHelperFunction(
1215  Layout, BufAddr.getAlignment());
1216  EmitCall(FI, CGCallee::forDirect(F), ReturnValueSlot(), Args);
1217 
1218  // Push a clang.arc.use cleanup for each object in RetainableOperands. The
1219  // cleanup will cause the use to appear after the final log call, keeping
1220  // the object valid while it’s held in the log buffer. Note that if there’s
1221  // a release cleanup on the object, it will already be active; since
1222  // cleanups are emitted in reverse order, the use will occur before the
1223  // object is released.
1224  if (!RetainableOperands.empty() && getLangOpts().ObjCAutoRefCount &&
1225  CGM.getCodeGenOpts().OptimizationLevel != 0)
1226  for (llvm::Value *Object : RetainableOperands)
1227  pushFullExprCleanup<CallObjCArcUse>(getARCCleanupKind(), Object);
1228 
1229  return RValue::get(BufAddr.getPointer());
1230 }
1231 
1232 /// Determine if a binop is a checked mixed-sign multiply we can specialize.
1233 static bool isSpecialMixedSignMultiply(unsigned BuiltinID,
1234  WidthAndSignedness Op1Info,
1235  WidthAndSignedness Op2Info,
1236  WidthAndSignedness ResultInfo) {
1237  return BuiltinID == Builtin::BI__builtin_mul_overflow &&
1238  std::max(Op1Info.Width, Op2Info.Width) >= ResultInfo.Width &&
1239  Op1Info.Signed != Op2Info.Signed;
1240 }
1241 
1242 /// Emit a checked mixed-sign multiply. This is a cheaper specialization of
1243 /// the generic checked-binop irgen.
1244 static RValue
1246  WidthAndSignedness Op1Info, const clang::Expr *Op2,
1247  WidthAndSignedness Op2Info,
1248  const clang::Expr *ResultArg, QualType ResultQTy,
1249  WidthAndSignedness ResultInfo) {
1250  assert(isSpecialMixedSignMultiply(Builtin::BI__builtin_mul_overflow, Op1Info,
1251  Op2Info, ResultInfo) &&
1252  "Not a mixed-sign multipliction we can specialize");
1253 
1254  // Emit the signed and unsigned operands.
1255  const clang::Expr *SignedOp = Op1Info.Signed ? Op1 : Op2;
1256  const clang::Expr *UnsignedOp = Op1Info.Signed ? Op2 : Op1;
1257  llvm::Value *Signed = CGF.EmitScalarExpr(SignedOp);
1258  llvm::Value *Unsigned = CGF.EmitScalarExpr(UnsignedOp);
1259  unsigned SignedOpWidth = Op1Info.Signed ? Op1Info.Width : Op2Info.Width;
1260  unsigned UnsignedOpWidth = Op1Info.Signed ? Op2Info.Width : Op1Info.Width;
1261 
1262  // One of the operands may be smaller than the other. If so, [s|z]ext it.
1263  if (SignedOpWidth < UnsignedOpWidth)
1264  Signed = CGF.Builder.CreateSExt(Signed, Unsigned->getType(), "op.sext");
1265  if (UnsignedOpWidth < SignedOpWidth)
1266  Unsigned = CGF.Builder.CreateZExt(Unsigned, Signed->getType(), "op.zext");
1267 
1268  llvm::Type *OpTy = Signed->getType();
1269  llvm::Value *Zero = llvm::Constant::getNullValue(OpTy);
1270  Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
1271  llvm::Type *ResTy = ResultPtr.getElementType();
1272  unsigned OpWidth = std::max(Op1Info.Width, Op2Info.Width);
1273 
1274  // Take the absolute value of the signed operand.
1275  llvm::Value *IsNegative = CGF.Builder.CreateICmpSLT(Signed, Zero);
1276  llvm::Value *AbsOfNegative = CGF.Builder.CreateSub(Zero, Signed);
1277  llvm::Value *AbsSigned =
1278  CGF.Builder.CreateSelect(IsNegative, AbsOfNegative, Signed);
1279 
1280  // Perform a checked unsigned multiplication.
1281  llvm::Value *UnsignedOverflow;
1282  llvm::Value *UnsignedResult =
1283  EmitOverflowIntrinsic(CGF, llvm::Intrinsic::umul_with_overflow, AbsSigned,
1284  Unsigned, UnsignedOverflow);
1285 
1286  llvm::Value *Overflow, *Result;
1287  if (ResultInfo.Signed) {
1288  // Signed overflow occurs if the result is greater than INT_MAX or lesser
1289  // than INT_MIN, i.e when |Result| > (INT_MAX + IsNegative).
1290  auto IntMax =
1291  llvm::APInt::getSignedMaxValue(ResultInfo.Width).zextOrSelf(OpWidth);
1292  llvm::Value *MaxResult =
1293  CGF.Builder.CreateAdd(llvm::ConstantInt::get(OpTy, IntMax),
1294  CGF.Builder.CreateZExt(IsNegative, OpTy));
1295  llvm::Value *SignedOverflow =
1296  CGF.Builder.CreateICmpUGT(UnsignedResult, MaxResult);
1297  Overflow = CGF.Builder.CreateOr(UnsignedOverflow, SignedOverflow);
1298 
1299  // Prepare the signed result (possibly by negating it).
1300  llvm::Value *NegativeResult = CGF.Builder.CreateNeg(UnsignedResult);
1301  llvm::Value *SignedResult =
1302  CGF.Builder.CreateSelect(IsNegative, NegativeResult, UnsignedResult);
1303  Result = CGF.Builder.CreateTrunc(SignedResult, ResTy);
1304  } else {
1305  // Unsigned overflow occurs if the result is < 0 or greater than UINT_MAX.
1306  llvm::Value *Underflow = CGF.Builder.CreateAnd(
1307  IsNegative, CGF.Builder.CreateIsNotNull(UnsignedResult));
1308  Overflow = CGF.Builder.CreateOr(UnsignedOverflow, Underflow);
1309  if (ResultInfo.Width < OpWidth) {
1310  auto IntMax =
1311  llvm::APInt::getMaxValue(ResultInfo.Width).zext(OpWidth);
1312  llvm::Value *TruncOverflow = CGF.Builder.CreateICmpUGT(
1313  UnsignedResult, llvm::ConstantInt::get(OpTy, IntMax));
1314  Overflow = CGF.Builder.CreateOr(Overflow, TruncOverflow);
1315  }
1316 
1317  // Negate the product if it would be negative in infinite precision.
1318  Result = CGF.Builder.CreateSelect(
1319  IsNegative, CGF.Builder.CreateNeg(UnsignedResult), UnsignedResult);
1320 
1321  Result = CGF.Builder.CreateTrunc(Result, ResTy);
1322  }
1323  assert(Overflow && Result && "Missing overflow or result");
1324 
1325  bool isVolatile =
1326  ResultArg->getType()->getPointeeType().isVolatileQualified();
1327  CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
1328  isVolatile);
1329  return RValue::get(Overflow);
1330 }
1331 
1333  Value *&RecordPtr, CharUnits Align, Value *Func,
1334  int Lvl) {
1335  const auto *RT = RType->getAs<RecordType>();
1336  ASTContext &Context = CGF.getContext();
1337  RecordDecl *RD = RT->getDecl()->getDefinition();
1338  ASTContext &Ctx = RD->getASTContext();
1339  const ASTRecordLayout &RL = Ctx.getASTRecordLayout(RD);
1340  std::string Pad = std::string(Lvl * 4, ' ');
1341 
1342  Value *GString =
1343  CGF.Builder.CreateGlobalStringPtr(RType.getAsString() + " {\n");
1344  Value *Res = CGF.Builder.CreateCall(Func, {GString});
1345 
1346  static llvm::DenseMap<QualType, const char *> Types;
1347  if (Types.empty()) {
1348  Types[Context.CharTy] = "%c";
1349  Types[Context.BoolTy] = "%d";
1350  Types[Context.SignedCharTy] = "%hhd";
1351  Types[Context.UnsignedCharTy] = "%hhu";
1352  Types[Context.IntTy] = "%d";
1353  Types[Context.UnsignedIntTy] = "%u";
1354  Types[Context.LongTy] = "%ld";
1355  Types[Context.UnsignedLongTy] = "%lu";
1356  Types[Context.LongLongTy] = "%lld";
1357  Types[Context.UnsignedLongLongTy] = "%llu";
1358  Types[Context.ShortTy] = "%hd";
1359  Types[Context.UnsignedShortTy] = "%hu";
1360  Types[Context.VoidPtrTy] = "%p";
1361  Types[Context.FloatTy] = "%f";
1362  Types[Context.DoubleTy] = "%f";
1363  Types[Context.LongDoubleTy] = "%Lf";
1364  Types[Context.getPointerType(Context.CharTy)] = "%s";
1365  Types[Context.getPointerType(Context.getConstType(Context.CharTy))] = "%s";
1366  }
1367 
1368  for (const auto *FD : RD->fields()) {
1369  uint64_t Off = RL.getFieldOffset(FD->getFieldIndex());
1370  Off = Ctx.toCharUnitsFromBits(Off).getQuantity();
1371 
1372  Value *FieldPtr = RecordPtr;
1373  if (RD->isUnion())
1374  FieldPtr = CGF.Builder.CreatePointerCast(
1375  FieldPtr, CGF.ConvertType(Context.getPointerType(FD->getType())));
1376  else
1377  FieldPtr = CGF.Builder.CreateStructGEP(CGF.ConvertType(RType), FieldPtr,
1378  FD->getFieldIndex());
1379 
1380  GString = CGF.Builder.CreateGlobalStringPtr(
1381  llvm::Twine(Pad)
1382  .concat(FD->getType().getAsString())
1383  .concat(llvm::Twine(' '))
1384  .concat(FD->getNameAsString())
1385  .concat(" : ")
1386  .str());
1387  Value *TmpRes = CGF.Builder.CreateCall(Func, {GString});
1388  Res = CGF.Builder.CreateAdd(Res, TmpRes);
1389 
1390  QualType CanonicalType =
1391  FD->getType().getUnqualifiedType().getCanonicalType();
1392 
1393  // We check whether we are in a recursive type
1394  if (CanonicalType->isRecordType()) {
1395  Value *TmpRes =
1396  dumpRecord(CGF, CanonicalType, FieldPtr, Align, Func, Lvl + 1);
1397  Res = CGF.Builder.CreateAdd(TmpRes, Res);
1398  continue;
1399  }
1400 
1401  // We try to determine the best format to print the current field
1402  llvm::Twine Format = Types.find(CanonicalType) == Types.end()
1403  ? Types[Context.VoidPtrTy]
1404  : Types[CanonicalType];
1405 
1406  Address FieldAddress = Address(FieldPtr, Align);
1407  FieldPtr = CGF.Builder.CreateLoad(FieldAddress);
1408 
1409  // FIXME Need to handle bitfield here
1410  GString = CGF.Builder.CreateGlobalStringPtr(
1411  Format.concat(llvm::Twine('\n')).str());
1412  TmpRes = CGF.Builder.CreateCall(Func, {GString, FieldPtr});
1413  Res = CGF.Builder.CreateAdd(Res, TmpRes);
1414  }
1415 
1416  GString = CGF.Builder.CreateGlobalStringPtr(Pad + "}\n");
1417  Value *TmpRes = CGF.Builder.CreateCall(Func, {GString});
1418  Res = CGF.Builder.CreateAdd(Res, TmpRes);
1419  return Res;
1420 }
1421 
1422 static bool
1424  llvm::SmallPtrSetImpl<const Decl *> &Seen) {
1425  if (const auto *Arr = Ctx.getAsArrayType(Ty))
1426  Ty = Ctx.getBaseElementType(Arr);
1427 
1428  const auto *Record = Ty->getAsCXXRecordDecl();
1429  if (!Record)
1430  return false;
1431 
1432  // We've already checked this type, or are in the process of checking it.
1433  if (!Seen.insert(Record).second)
1434  return false;
1435 
1436  assert(Record->hasDefinition() &&
1437  "Incomplete types should already be diagnosed");
1438 
1439  if (Record->isDynamicClass())
1440  return true;
1441 
1442  for (FieldDecl *F : Record->fields()) {
1443  if (TypeRequiresBuiltinLaunderImp(Ctx, F->getType(), Seen))
1444  return true;
1445  }
1446  return false;
1447 }
1448 
1449 /// Determine if the specified type requires laundering by checking if it is a
1450 /// dynamic class type or contains a subobject which is a dynamic class type.
1452  if (!CGM.getCodeGenOpts().StrictVTablePointers)
1453  return false;
1454  llvm::SmallPtrSet<const Decl *, 16> Seen;
1455  return TypeRequiresBuiltinLaunderImp(CGM.getContext(), Ty, Seen);
1456 }
1457 
1458 RValue CodeGenFunction::emitRotate(const CallExpr *E, bool IsRotateRight) {
1459  llvm::Value *Src = EmitScalarExpr(E->getArg(0));
1460  llvm::Value *ShiftAmt = EmitScalarExpr(E->getArg(1));
1461 
1462  // The builtin's shift arg may have a different type than the source arg and
1463  // result, but the LLVM intrinsic uses the same type for all values.
1464  llvm::Type *Ty = Src->getType();
1465  ShiftAmt = Builder.CreateIntCast(ShiftAmt, Ty, false);
1466 
1467  // Rotate is a special case of LLVM funnel shift - 1st 2 args are the same.
1468  unsigned IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
1469  Value *F = CGM.getIntrinsic(IID, Ty);
1470  return RValue::get(Builder.CreateCall(F, { Src, Src, ShiftAmt }));
1471 }
1472 
1474  const CallExpr *E,
1475  ReturnValueSlot ReturnValue) {
1476  const FunctionDecl *FD = GD.getDecl()->getAsFunction();
1477  // See if we can constant fold this builtin. If so, don't emit it at all.
1478  Expr::EvalResult Result;
1479  if (E->EvaluateAsRValue(Result, CGM.getContext()) &&
1480  !Result.hasSideEffects()) {
1481  if (Result.Val.isInt())
1482  return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
1483  Result.Val.getInt()));
1484  if (Result.Val.isFloat())
1485  return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
1486  Result.Val.getFloat()));
1487  }
1488 
1489  // There are LLVM math intrinsics/instructions corresponding to math library
1490  // functions except the LLVM op will never set errno while the math library
1491  // might. Also, math builtins have the same semantics as their math library
1492  // twins. Thus, we can transform math library and builtin calls to their
1493  // LLVM counterparts if the call is marked 'const' (known to never set errno).
1494  if (FD->hasAttr<ConstAttr>()) {
1495  switch (BuiltinID) {
1496  case Builtin::BIceil:
1497  case Builtin::BIceilf:
1498  case Builtin::BIceill:
1499  case Builtin::BI__builtin_ceil:
1500  case Builtin::BI__builtin_ceilf:
1501  case Builtin::BI__builtin_ceill:
1502  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::ceil));
1503 
1504  case Builtin::BIcopysign:
1505  case Builtin::BIcopysignf:
1506  case Builtin::BIcopysignl:
1507  case Builtin::BI__builtin_copysign:
1508  case Builtin::BI__builtin_copysignf:
1509  case Builtin::BI__builtin_copysignl:
1510  case Builtin::BI__builtin_copysignf128:
1512 
1513  case Builtin::BIcos:
1514  case Builtin::BIcosf:
1515  case Builtin::BIcosl:
1516  case Builtin::BI__builtin_cos:
1517  case Builtin::BI__builtin_cosf:
1518  case Builtin::BI__builtin_cosl:
1519  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::cos));
1520 
1521  case Builtin::BIexp:
1522  case Builtin::BIexpf:
1523  case Builtin::BIexpl:
1524  case Builtin::BI__builtin_exp:
1525  case Builtin::BI__builtin_expf:
1526  case Builtin::BI__builtin_expl:
1527  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::exp));
1528 
1529  case Builtin::BIexp2:
1530  case Builtin::BIexp2f:
1531  case Builtin::BIexp2l:
1532  case Builtin::BI__builtin_exp2:
1533  case Builtin::BI__builtin_exp2f:
1534  case Builtin::BI__builtin_exp2l:
1535  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::exp2));
1536 
1537  case Builtin::BIfabs:
1538  case Builtin::BIfabsf:
1539  case Builtin::BIfabsl:
1540  case Builtin::BI__builtin_fabs:
1541  case Builtin::BI__builtin_fabsf:
1542  case Builtin::BI__builtin_fabsl:
1543  case Builtin::BI__builtin_fabsf128:
1544  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs));
1545 
1546  case Builtin::BIfloor:
1547  case Builtin::BIfloorf:
1548  case Builtin::BIfloorl:
1549  case Builtin::BI__builtin_floor:
1550  case Builtin::BI__builtin_floorf:
1551  case Builtin::BI__builtin_floorl:
1552  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::floor));
1553 
1554  case Builtin::BIfma:
1555  case Builtin::BIfmaf:
1556  case Builtin::BIfmal:
1557  case Builtin::BI__builtin_fma:
1558  case Builtin::BI__builtin_fmaf:
1559  case Builtin::BI__builtin_fmal:
1560  return RValue::get(emitTernaryBuiltin(*this, E, Intrinsic::fma));
1561 
1562  case Builtin::BIfmax:
1563  case Builtin::BIfmaxf:
1564  case Builtin::BIfmaxl:
1565  case Builtin::BI__builtin_fmax:
1566  case Builtin::BI__builtin_fmaxf:
1567  case Builtin::BI__builtin_fmaxl:
1568  return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::maxnum));
1569 
1570  case Builtin::BIfmin:
1571  case Builtin::BIfminf:
1572  case Builtin::BIfminl:
1573  case Builtin::BI__builtin_fmin:
1574  case Builtin::BI__builtin_fminf:
1575  case Builtin::BI__builtin_fminl:
1576  return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::minnum));
1577 
1578  // fmod() is a special-case. It maps to the frem instruction rather than an
1579  // LLVM intrinsic.
1580  case Builtin::BIfmod:
1581  case Builtin::BIfmodf:
1582  case Builtin::BIfmodl:
1583  case Builtin::BI__builtin_fmod:
1584  case Builtin::BI__builtin_fmodf:
1585  case Builtin::BI__builtin_fmodl: {
1586  Value *Arg1 = EmitScalarExpr(E->getArg(0));
1587  Value *Arg2 = EmitScalarExpr(E->getArg(1));
1588  return RValue::get(Builder.CreateFRem(Arg1, Arg2, "fmod"));
1589  }
1590 
1591  case Builtin::BIlog:
1592  case Builtin::BIlogf:
1593  case Builtin::BIlogl:
1594  case Builtin::BI__builtin_log:
1595  case Builtin::BI__builtin_logf:
1596  case Builtin::BI__builtin_logl:
1597  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log));
1598 
1599  case Builtin::BIlog10:
1600  case Builtin::BIlog10f:
1601  case Builtin::BIlog10l:
1602  case Builtin::BI__builtin_log10:
1603  case Builtin::BI__builtin_log10f:
1604  case Builtin::BI__builtin_log10l:
1605  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log10));
1606 
1607  case Builtin::BIlog2:
1608  case Builtin::BIlog2f:
1609  case Builtin::BIlog2l:
1610  case Builtin::BI__builtin_log2:
1611  case Builtin::BI__builtin_log2f:
1612  case Builtin::BI__builtin_log2l:
1613  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log2));
1614 
1615  case Builtin::BInearbyint:
1616  case Builtin::BInearbyintf:
1617  case Builtin::BInearbyintl:
1618  case Builtin::BI__builtin_nearbyint:
1619  case Builtin::BI__builtin_nearbyintf:
1620  case Builtin::BI__builtin_nearbyintl:
1622 
1623  case Builtin::BIpow:
1624  case Builtin::BIpowf:
1625  case Builtin::BIpowl:
1626  case Builtin::BI__builtin_pow:
1627  case Builtin::BI__builtin_powf:
1628  case Builtin::BI__builtin_powl:
1629  return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::pow));
1630 
1631  case Builtin::BIrint:
1632  case Builtin::BIrintf:
1633  case Builtin::BIrintl:
1634  case Builtin::BI__builtin_rint:
1635  case Builtin::BI__builtin_rintf:
1636  case Builtin::BI__builtin_rintl:
1637  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::rint));
1638 
1639  case Builtin::BIround:
1640  case Builtin::BIroundf:
1641  case Builtin::BIroundl:
1642  case Builtin::BI__builtin_round:
1643  case Builtin::BI__builtin_roundf:
1644  case Builtin::BI__builtin_roundl:
1645  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::round));
1646 
1647  case Builtin::BIsin:
1648  case Builtin::BIsinf:
1649  case Builtin::BIsinl:
1650  case Builtin::BI__builtin_sin:
1651  case Builtin::BI__builtin_sinf:
1652  case Builtin::BI__builtin_sinl:
1653  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::sin));
1654 
1655  case Builtin::BIsqrt:
1656  case Builtin::BIsqrtf:
1657  case Builtin::BIsqrtl:
1658  case Builtin::BI__builtin_sqrt:
1659  case Builtin::BI__builtin_sqrtf:
1660  case Builtin::BI__builtin_sqrtl:
1661  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::sqrt));
1662 
1663  case Builtin::BItrunc:
1664  case Builtin::BItruncf:
1665  case Builtin::BItruncl:
1666  case Builtin::BI__builtin_trunc:
1667  case Builtin::BI__builtin_truncf:
1668  case Builtin::BI__builtin_truncl:
1669  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc));
1670 
1671  default:
1672  break;
1673  }
1674  }
1675 
1676  switch (BuiltinID) {
1677  default: break;
1678  case Builtin::BI__builtin___CFStringMakeConstantString:
1679  case Builtin::BI__builtin___NSStringMakeConstantString:
1680  return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType()));
1681  case Builtin::BI__builtin_stdarg_start:
1682  case Builtin::BI__builtin_va_start:
1683  case Builtin::BI__va_start:
1684  case Builtin::BI__builtin_va_end:
1685  return RValue::get(
1686  EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
1687  ? EmitScalarExpr(E->getArg(0))
1688  : EmitVAListRef(E->getArg(0)).getPointer(),
1689  BuiltinID != Builtin::BI__builtin_va_end));
1690  case Builtin::BI__builtin_va_copy: {
1691  Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer();
1692  Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer();
1693 
1694  llvm::Type *Type = Int8PtrTy;
1695 
1696  DstPtr = Builder.CreateBitCast(DstPtr, Type);
1697  SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
1698  return RValue::get(Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy),
1699  {DstPtr, SrcPtr}));
1700  }
1701  case Builtin::BI__builtin_abs:
1702  case Builtin::BI__builtin_labs:
1703  case Builtin::BI__builtin_llabs: {
1704  // X < 0 ? -X : X
1705  // The negation has 'nsw' because abs of INT_MIN is undefined.
1706  Value *ArgValue = EmitScalarExpr(E->getArg(0));
1707  Value *NegOp = Builder.CreateNSWNeg(ArgValue, "neg");
1708  Constant *Zero = llvm::Constant::getNullValue(ArgValue->getType());
1709  Value *CmpResult = Builder.CreateICmpSLT(ArgValue, Zero, "abscond");
1710  Value *Result = Builder.CreateSelect(CmpResult, NegOp, ArgValue, "abs");
1711  return RValue::get(Result);
1712  }
1713  case Builtin::BI__builtin_conj:
1714  case Builtin::BI__builtin_conjf:
1715  case Builtin::BI__builtin_conjl: {
1716  ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
1717  Value *Real = ComplexVal.first;
1718  Value *Imag = ComplexVal.second;
1719  Value *Zero =
1720  Imag->getType()->isFPOrFPVectorTy()
1721  ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType())
1722  : llvm::Constant::getNullValue(Imag->getType());
1723 
1724  Imag = Builder.CreateFSub(Zero, Imag, "sub");
1725  return RValue::getComplex(std::make_pair(Real, Imag));
1726  }
1727  case Builtin::BI__builtin_creal:
1728  case Builtin::BI__builtin_crealf:
1729  case Builtin::BI__builtin_creall:
1730  case Builtin::BIcreal:
1731  case Builtin::BIcrealf:
1732  case Builtin::BIcreall: {
1733  ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
1734  return RValue::get(ComplexVal.first);
1735  }
1736 
1737  case Builtin::BI__builtin_dump_struct: {
1738  Value *Func = EmitScalarExpr(E->getArg(1)->IgnoreImpCasts());
1739  CharUnits Arg0Align = EmitPointerWithAlignment(E->getArg(0)).getAlignment();
1740 
1741  const Expr *Arg0 = E->getArg(0)->IgnoreImpCasts();
1742  QualType Arg0Type = Arg0->getType()->getPointeeType();
1743 
1744  Value *RecordPtr = EmitScalarExpr(Arg0);
1745  Value *Res = dumpRecord(*this, Arg0Type, RecordPtr, Arg0Align, Func, 0);
1746  return RValue::get(Res);
1747  }
1748 
1749  case Builtin::BI__builtin_cimag:
1750  case Builtin::BI__builtin_cimagf:
1751  case Builtin::BI__builtin_cimagl:
1752  case Builtin::BIcimag:
1753  case Builtin::BIcimagf:
1754  case Builtin::BIcimagl: {
1755  ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
1756  return RValue::get(ComplexVal.second);
1757  }
1758 
1759  case Builtin::BI__builtin_clrsb:
1760  case Builtin::BI__builtin_clrsbl:
1761  case Builtin::BI__builtin_clrsbll: {
1762  // clrsb(x) -> clz(x < 0 ? ~x : x) - 1 or
1763  Value *ArgValue = EmitScalarExpr(E->getArg(0));
1764 
1765  llvm::Type *ArgType = ArgValue->getType();
1766  Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
1767 
1768  llvm::Type *ResultType = ConvertType(E->getType());
1769  Value *Zero = llvm::Constant::getNullValue(ArgType);
1770  Value *IsNeg = Builder.CreateICmpSLT(ArgValue, Zero, "isneg");
1771  Value *Inverse = Builder.CreateNot(ArgValue, "not");
1772  Value *Tmp = Builder.CreateSelect(IsNeg, Inverse, ArgValue);
1773  Value *Ctlz = Builder.CreateCall(F, {Tmp, Builder.getFalse()});
1774  Value *Result = Builder.CreateSub(Ctlz, llvm::ConstantInt::get(ArgType, 1));
1775  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1776  "cast");
1777  return RValue::get(Result);
1778  }
1779  case Builtin::BI__builtin_ctzs:
1780  case Builtin::BI__builtin_ctz:
1781  case Builtin::BI__builtin_ctzl:
1782  case Builtin::BI__builtin_ctzll: {
1783  Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CTZPassedZero);
1784 
1785  llvm::Type *ArgType = ArgValue->getType();
1786  Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
1787 
1788  llvm::Type *ResultType = ConvertType(E->getType());
1789  Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
1790  Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
1791  if (Result->getType() != ResultType)
1792  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1793  "cast");
1794  return RValue::get(Result);
1795  }
1796  case Builtin::BI__builtin_clzs:
1797  case Builtin::BI__builtin_clz:
1798  case Builtin::BI__builtin_clzl:
1799  case Builtin::BI__builtin_clzll: {
1800  Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CLZPassedZero);
1801 
1802  llvm::Type *ArgType = ArgValue->getType();
1803  Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
1804 
1805  llvm::Type *ResultType = ConvertType(E->getType());
1806  Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
1807  Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
1808  if (Result->getType() != ResultType)
1809  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1810  "cast");
1811  return RValue::get(Result);
1812  }
1813  case Builtin::BI__builtin_ffs:
1814  case Builtin::BI__builtin_ffsl:
1815  case Builtin::BI__builtin_ffsll: {
1816  // ffs(x) -> x ? cttz(x) + 1 : 0
1817  Value *ArgValue = EmitScalarExpr(E->getArg(0));
1818 
1819  llvm::Type *ArgType = ArgValue->getType();
1820  Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
1821 
1822  llvm::Type *ResultType = ConvertType(E->getType());
1823  Value *Tmp =
1824  Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
1825  llvm::ConstantInt::get(ArgType, 1));
1826  Value *Zero = llvm::Constant::getNullValue(ArgType);
1827  Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
1828  Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
1829  if (Result->getType() != ResultType)
1830  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1831  "cast");
1832  return RValue::get(Result);
1833  }
1834  case Builtin::BI__builtin_parity:
1835  case Builtin::BI__builtin_parityl:
1836  case Builtin::BI__builtin_parityll: {
1837  // parity(x) -> ctpop(x) & 1
1838  Value *ArgValue = EmitScalarExpr(E->getArg(0));
1839 
1840  llvm::Type *ArgType = ArgValue->getType();
1841  Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
1842 
1843  llvm::Type *ResultType = ConvertType(E->getType());
1844  Value *Tmp = Builder.CreateCall(F, ArgValue);
1845  Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
1846  if (Result->getType() != ResultType)
1847  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1848  "cast");
1849  return RValue::get(Result);
1850  }
1851  case Builtin::BI__lzcnt16:
1852  case Builtin::BI__lzcnt:
1853  case Builtin::BI__lzcnt64: {
1854  Value *ArgValue = EmitScalarExpr(E->getArg(0));
1855 
1856  llvm::Type *ArgType = ArgValue->getType();
1857  Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
1858 
1859  llvm::Type *ResultType = ConvertType(E->getType());
1860  Value *Result = Builder.CreateCall(F, {ArgValue, Builder.getFalse()});
1861  if (Result->getType() != ResultType)
1862  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1863  "cast");
1864  return RValue::get(Result);
1865  }
1866  case Builtin::BI__popcnt16:
1867  case Builtin::BI__popcnt:
1868  case Builtin::BI__popcnt64:
1869  case Builtin::BI__builtin_popcount:
1870  case Builtin::BI__builtin_popcountl:
1871  case Builtin::BI__builtin_popcountll: {
1872  Value *ArgValue = EmitScalarExpr(E->getArg(0));
1873 
1874  llvm::Type *ArgType = ArgValue->getType();
1875  Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
1876 
1877  llvm::Type *ResultType = ConvertType(E->getType());
1878  Value *Result = Builder.CreateCall(F, ArgValue);
1879  if (Result->getType() != ResultType)
1880  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1881  "cast");
1882  return RValue::get(Result);
1883  }
1884  case Builtin::BI__builtin_unpredictable: {
1885  // Always return the argument of __builtin_unpredictable. LLVM does not
1886  // handle this builtin. Metadata for this builtin should be added directly
1887  // to instructions such as branches or switches that use it.
1888  return RValue::get(EmitScalarExpr(E->getArg(0)));
1889  }
1890  case Builtin::BI__builtin_expect: {
1891  Value *ArgValue = EmitScalarExpr(E->getArg(0));
1892  llvm::Type *ArgType = ArgValue->getType();
1893 
1894  Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
1895  // Don't generate llvm.expect on -O0 as the backend won't use it for
1896  // anything.
1897  // Note, we still IRGen ExpectedValue because it could have side-effects.
1898  if (CGM.getCodeGenOpts().OptimizationLevel == 0)
1899  return RValue::get(ArgValue);
1900 
1901  Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
1902  Value *Result =
1903  Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
1904  return RValue::get(Result);
1905  }
1906  case Builtin::BI__builtin_assume_aligned: {
1907  const Expr *Ptr = E->getArg(0);
1908  Value *PtrValue = EmitScalarExpr(Ptr);
1909  Value *OffsetValue =
1910  (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
1911 
1912  Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
1913  ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
1914  unsigned Alignment = (unsigned)AlignmentCI->getZExtValue();
1915 
1916  EmitAlignmentAssumption(PtrValue, Ptr, /*The expr loc is sufficient.*/ SourceLocation(),
1917  Alignment, OffsetValue);
1918  return RValue::get(PtrValue);
1919  }
1920  case Builtin::BI__assume:
1921  case Builtin::BI__builtin_assume: {
1922  if (E->getArg(0)->HasSideEffects(getContext()))
1923  return RValue::get(nullptr);
1924 
1925  Value *ArgValue = EmitScalarExpr(E->getArg(0));
1926  Value *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
1927  return RValue::get(Builder.CreateCall(FnAssume, ArgValue));
1928  }
1929  case Builtin::BI__builtin_bswap16:
1930  case Builtin::BI__builtin_bswap32:
1931  case Builtin::BI__builtin_bswap64: {
1932  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap));
1933  }
1934  case Builtin::BI__builtin_bitreverse8:
1935  case Builtin::BI__builtin_bitreverse16:
1936  case Builtin::BI__builtin_bitreverse32:
1937  case Builtin::BI__builtin_bitreverse64: {
1938  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse));
1939  }
1940  case Builtin::BI__builtin_rotateleft8:
1941  case Builtin::BI__builtin_rotateleft16:
1942  case Builtin::BI__builtin_rotateleft32:
1943  case Builtin::BI__builtin_rotateleft64:
1944  case Builtin::BI_rotl8: // Microsoft variants of rotate left
1945  case Builtin::BI_rotl16:
1946  case Builtin::BI_rotl:
1947  case Builtin::BI_lrotl:
1948  case Builtin::BI_rotl64:
1949  return emitRotate(E, false);
1950 
1951  case Builtin::BI__builtin_rotateright8:
1952  case Builtin::BI__builtin_rotateright16:
1953  case Builtin::BI__builtin_rotateright32:
1954  case Builtin::BI__builtin_rotateright64:
1955  case Builtin::BI_rotr8: // Microsoft variants of rotate right
1956  case Builtin::BI_rotr16:
1957  case Builtin::BI_rotr:
1958  case Builtin::BI_lrotr:
1959  case Builtin::BI_rotr64:
1960  return emitRotate(E, true);
1961 
1962  case Builtin::BI__builtin_constant_p: {
1963  llvm::Type *ResultType = ConvertType(E->getType());
1964  if (CGM.getCodeGenOpts().OptimizationLevel == 0)
1965  // At -O0, we don't perform inlining, so we don't need to delay the
1966  // processing.
1967  return RValue::get(ConstantInt::get(ResultType, 0));
1968 
1969  const Expr *Arg = E->getArg(0);
1970  QualType ArgType = Arg->getType();
1971  if (!hasScalarEvaluationKind(ArgType) || ArgType->isFunctionType())
1972  // We can only reason about scalar types.
1973  return RValue::get(ConstantInt::get(ResultType, 0));
1974 
1975  Value *ArgValue = EmitScalarExpr(Arg);
1976  Value *F = CGM.getIntrinsic(Intrinsic::is_constant, ConvertType(ArgType));
1977  Value *Result = Builder.CreateCall(F, ArgValue);
1978  if (Result->getType() != ResultType)
1979  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/false);
1980  return RValue::get(Result);
1981  }
1982  case Builtin::BI__builtin_object_size: {
1983  unsigned Type =
1984  E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
1985  auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));
1986 
1987  // We pass this builtin onto the optimizer so that it can figure out the
1988  // object size in more complex cases.
1989  return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType,
1990  /*EmittedE=*/nullptr));
1991  }
1992  case Builtin::BI__builtin_prefetch: {
1993  Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
1994  // FIXME: Technically these constants should of type 'int', yes?
1995  RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
1996  llvm::ConstantInt::get(Int32Ty, 0);
1997  Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
1998  llvm::ConstantInt::get(Int32Ty, 3);
1999  Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
2000  Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
2001  return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data}));
2002  }
2003  case Builtin::BI__builtin_readcyclecounter: {
2004  Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
2005  return RValue::get(Builder.CreateCall(F));
2006  }
2007  case Builtin::BI__builtin___clear_cache: {
2008  Value *Begin = EmitScalarExpr(E->getArg(0));
2009  Value *End = EmitScalarExpr(E->getArg(1));
2010  Value *F = CGM.getIntrinsic(Intrinsic::clear_cache);
2011  return RValue::get(Builder.CreateCall(F, {Begin, End}));
2012  }
2013  case Builtin::BI__builtin_trap:
2014  return RValue::get(EmitTrapCall(Intrinsic::trap));
2015  case Builtin::BI__debugbreak:
2016  return RValue::get(EmitTrapCall(Intrinsic::debugtrap));
2017  case Builtin::BI__builtin_unreachable: {
2018  EmitUnreachable(E->getExprLoc());
2019 
2020  // We do need to preserve an insertion point.
2021  EmitBlock(createBasicBlock("unreachable.cont"));
2022 
2023  return RValue::get(nullptr);
2024  }
2025 
2026  case Builtin::BI__builtin_powi:
2027  case Builtin::BI__builtin_powif:
2028  case Builtin::BI__builtin_powil: {
2029  Value *Base = EmitScalarExpr(E->getArg(0));
2030  Value *Exponent = EmitScalarExpr(E->getArg(1));
2031  llvm::Type *ArgType = Base->getType();
2032  Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType);
2033  return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
2034  }
2035 
2036  case Builtin::BI__builtin_isgreater:
2037  case Builtin::BI__builtin_isgreaterequal:
2038  case Builtin::BI__builtin_isless:
2039  case Builtin::BI__builtin_islessequal:
2040  case Builtin::BI__builtin_islessgreater:
2041  case Builtin::BI__builtin_isunordered: {
2042  // Ordered comparisons: we know the arguments to these are matching scalar
2043  // floating point values.
2044  Value *LHS = EmitScalarExpr(E->getArg(0));
2045  Value *RHS = EmitScalarExpr(E->getArg(1));
2046 
2047  switch (BuiltinID) {
2048  default: llvm_unreachable("Unknown ordered comparison");
2049  case Builtin::BI__builtin_isgreater:
2050  LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
2051  break;
2052  case Builtin::BI__builtin_isgreaterequal:
2053  LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
2054  break;
2055  case Builtin::BI__builtin_isless:
2056  LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
2057  break;
2058  case Builtin::BI__builtin_islessequal:
2059  LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
2060  break;
2061  case Builtin::BI__builtin_islessgreater:
2062  LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
2063  break;
2064  case Builtin::BI__builtin_isunordered:
2065  LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
2066  break;
2067  }
2068  // ZExt bool to int type.
2069  return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
2070  }
2071  case Builtin::BI__builtin_isnan: {
2072  Value *V = EmitScalarExpr(E->getArg(0));
2073  V = Builder.CreateFCmpUNO(V, V, "cmp");
2074  return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
2075  }
2076 
2077  case Builtin::BIfinite:
2078  case Builtin::BI__finite:
2079  case Builtin::BIfinitef:
2080  case Builtin::BI__finitef:
2081  case Builtin::BIfinitel:
2082  case Builtin::BI__finitel:
2083  case Builtin::BI__builtin_isinf:
2084  case Builtin::BI__builtin_isfinite: {
2085  // isinf(x) --> fabs(x) == infinity
2086  // isfinite(x) --> fabs(x) != infinity
2087  // x != NaN via the ordered compare in either case.
2088  Value *V = EmitScalarExpr(E->getArg(0));
2089  Value *Fabs = EmitFAbs(*this, V);
2090  Constant *Infinity = ConstantFP::getInfinity(V->getType());
2091  CmpInst::Predicate Pred = (BuiltinID == Builtin::BI__builtin_isinf)
2092  ? CmpInst::FCMP_OEQ
2093  : CmpInst::FCMP_ONE;
2094  Value *FCmp = Builder.CreateFCmp(Pred, Fabs, Infinity, "cmpinf");
2095  return RValue::get(Builder.CreateZExt(FCmp, ConvertType(E->getType())));
2096  }
2097 
2098  case Builtin::BI__builtin_isinf_sign: {
2099  // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
2100  Value *Arg = EmitScalarExpr(E->getArg(0));
2101  Value *AbsArg = EmitFAbs(*this, Arg);
2102  Value *IsInf = Builder.CreateFCmpOEQ(
2103  AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
2104  Value *IsNeg = EmitSignBit(*this, Arg);
2105 
2106  llvm::Type *IntTy = ConvertType(E->getType());
2107  Value *Zero = Constant::getNullValue(IntTy);
2108  Value *One = ConstantInt::get(IntTy, 1);
2109  Value *NegativeOne = ConstantInt::get(IntTy, -1);
2110  Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
2111  Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
2112  return RValue::get(Result);
2113  }
2114 
2115  case Builtin::BI__builtin_isnormal: {
2116  // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
2117  Value *V = EmitScalarExpr(E->getArg(0));
2118  Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
2119 
2120  Value *Abs = EmitFAbs(*this, V);
2121  Value *IsLessThanInf =
2122  Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
2123  APFloat Smallest = APFloat::getSmallestNormalized(
2124  getContext().getFloatTypeSemantics(E->getArg(0)->getType()));
2125  Value *IsNormal =
2126  Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest),
2127  "isnormal");
2128  V = Builder.CreateAnd(Eq, IsLessThanInf, "and");
2129  V = Builder.CreateAnd(V, IsNormal, "and");
2130  return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
2131  }
2132 
2133  case Builtin::BI__builtin_fpclassify: {
2134  Value *V = EmitScalarExpr(E->getArg(5));
2135  llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
2136 
2137  // Create Result
2138  BasicBlock *Begin = Builder.GetInsertBlock();
2139  BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
2140  Builder.SetInsertPoint(End);
2141  PHINode *Result =
2142  Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
2143  "fpclassify_result");
2144 
2145  // if (V==0) return FP_ZERO
2146  Builder.SetInsertPoint(Begin);
2147  Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
2148  "iszero");
2149  Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
2150  BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
2151  Builder.CreateCondBr(IsZero, End, NotZero);
2152  Result->addIncoming(ZeroLiteral, Begin);
2153 
2154  // if (V != V) return FP_NAN
2155  Builder.SetInsertPoint(NotZero);
2156  Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
2157  Value *NanLiteral = EmitScalarExpr(E->getArg(0));
2158  BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
2159  Builder.CreateCondBr(IsNan, End, NotNan);
2160  Result->addIncoming(NanLiteral, NotZero);
2161 
2162  // if (fabs(V) == infinity) return FP_INFINITY
2163  Builder.SetInsertPoint(NotNan);
2164  Value *VAbs = EmitFAbs(*this, V);
2165  Value *IsInf =
2166  Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
2167  "isinf");
2168  Value *InfLiteral = EmitScalarExpr(E->getArg(1));
2169  BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
2170  Builder.CreateCondBr(IsInf, End, NotInf);
2171  Result->addIncoming(InfLiteral, NotNan);
2172 
2173  // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
2174  Builder.SetInsertPoint(NotInf);
2175  APFloat Smallest = APFloat::getSmallestNormalized(
2176  getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
2177  Value *IsNormal =
2178  Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
2179  "isnormal");
2180  Value *NormalResult =
2181  Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
2182  EmitScalarExpr(E->getArg(3)));
2183  Builder.CreateBr(End);
2184  Result->addIncoming(NormalResult, NotInf);
2185 
2186  // return Result
2187  Builder.SetInsertPoint(End);
2188  return RValue::get(Result);
2189  }
2190 
2191  case Builtin::BIalloca:
2192  case Builtin::BI_alloca:
2193  case Builtin::BI__builtin_alloca: {
2194  Value *Size = EmitScalarExpr(E->getArg(0));
2195  const TargetInfo &TI = getContext().getTargetInfo();
2196  // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__.
2197  unsigned SuitableAlignmentInBytes =
2198  CGM.getContext()
2199  .toCharUnitsFromBits(TI.getSuitableAlign())
2200  .getQuantity();
2201  AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
2202  AI->setAlignment(SuitableAlignmentInBytes);
2203  return RValue::get(AI);
2204  }
2205 
2206  case Builtin::BI__builtin_alloca_with_align: {
2207  Value *Size = EmitScalarExpr(E->getArg(0));
2208  Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1));
2209  auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue);
2210  unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue();
2211  unsigned AlignmentInBytes =
2212  CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getQuantity();
2213  AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
2214  AI->setAlignment(AlignmentInBytes);
2215  return RValue::get(AI);
2216  }
2217 
2218  case Builtin::BIbzero:
2219  case Builtin::BI__builtin_bzero: {
2220  Address Dest = EmitPointerWithAlignment(E->getArg(0));
2221  Value *SizeVal = EmitScalarExpr(E->getArg(1));
2222  EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
2223  E->getArg(0)->getExprLoc(), FD, 0);
2224  Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
2225  return RValue::get(nullptr);
2226  }
2227  case Builtin::BImemcpy:
2228  case Builtin::BI__builtin_memcpy: {
2229  Address Dest = EmitPointerWithAlignment(E->getArg(0));
2230  Address Src = EmitPointerWithAlignment(E->getArg(1));
2231  Value *SizeVal = EmitScalarExpr(E->getArg(2));
2232  EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
2233  E->getArg(0)->getExprLoc(), FD, 0);
2234  EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
2235  E->getArg(1)->getExprLoc(), FD, 1);
2236  Builder.CreateMemCpy(Dest, Src, SizeVal, false);
2237  return RValue::get(Dest.getPointer());
2238  }
2239 
2240  case Builtin::BI__builtin_char_memchr:
2241  BuiltinID = Builtin::BI__builtin_memchr;
2242  break;
2243 
2244  case Builtin::BI__builtin___memcpy_chk: {
2245  // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
2246  Expr::EvalResult SizeResult, DstSizeResult;
2247  if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
2248  !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
2249  break;
2250  llvm::APSInt Size = SizeResult.Val.getInt();
2251  llvm::APSInt DstSize = DstSizeResult.Val.getInt();
2252  if (Size.ugt(DstSize))
2253  break;
2254  Address Dest = EmitPointerWithAlignment(E->getArg(0));
2255  Address Src = EmitPointerWithAlignment(E->getArg(1));
2256  Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
2257  Builder.CreateMemCpy(Dest, Src, SizeVal, false);
2258  return RValue::get(Dest.getPointer());
2259  }
2260 
2261  case Builtin::BI__builtin_objc_memmove_collectable: {
2262  Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
2263  Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
2264  Value *SizeVal = EmitScalarExpr(E->getArg(2));
2265  CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
2266  DestAddr, SrcAddr, SizeVal);
2267  return RValue::get(DestAddr.getPointer());
2268  }
2269 
2270  case Builtin::BI__builtin___memmove_chk: {
2271  // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
2272  Expr::EvalResult SizeResult, DstSizeResult;
2273  if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
2274  !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
2275  break;
2276  llvm::APSInt Size = SizeResult.Val.getInt();
2277  llvm::APSInt DstSize = DstSizeResult.Val.getInt();
2278  if (Size.ugt(DstSize))
2279  break;
2280  Address Dest = EmitPointerWithAlignment(E->getArg(0));
2281  Address Src = EmitPointerWithAlignment(E->getArg(1));
2282  Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
2283  Builder.CreateMemMove(Dest, Src, SizeVal, false);
2284  return RValue::get(Dest.getPointer());
2285  }
2286 
2287  case Builtin::BImemmove:
2288  case Builtin::BI__builtin_memmove: {
2289  Address Dest = EmitPointerWithAlignment(E->getArg(0));
2290  Address Src = EmitPointerWithAlignment(E->getArg(1));
2291  Value *SizeVal = EmitScalarExpr(E->getArg(2));
2292  EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
2293  E->getArg(0)->getExprLoc(), FD, 0);
2294  EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
2295  E->getArg(1)->getExprLoc(), FD, 1);
2296  Builder.CreateMemMove(Dest, Src, SizeVal, false);
2297  return RValue::get(Dest.getPointer());
2298  }
2299  case Builtin::BImemset:
2300  case Builtin::BI__builtin_memset: {
2301  Address Dest = EmitPointerWithAlignment(E->getArg(0));
2302  Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
2303  Builder.getInt8Ty());
2304  Value *SizeVal = EmitScalarExpr(E->getArg(2));
2305  EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
2306  E->getArg(0)->getExprLoc(), FD, 0);
2307  Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
2308  return RValue::get(Dest.getPointer());
2309  }
2310  case Builtin::BI__builtin___memset_chk: {
2311  // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
2312  Expr::EvalResult SizeResult, DstSizeResult;
2313  if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
2314  !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
2315  break;
2316  llvm::APSInt Size = SizeResult.Val.getInt();
2317  llvm::APSInt DstSize = DstSizeResult.Val.getInt();
2318  if (Size.ugt(DstSize))
2319  break;
2320  Address Dest = EmitPointerWithAlignment(E->getArg(0));
2321  Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
2322  Builder.getInt8Ty());
2323  Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
2324  Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
2325  return RValue::get(Dest.getPointer());
2326  }
2327  case Builtin::BI__builtin_wmemcmp: {
2328  // The MSVC runtime library does not provide a definition of wmemcmp, so we
2329  // need an inline implementation.
2330  if (!getTarget().getTriple().isOSMSVCRT())
2331  break;
2332 
2333  llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
2334 
2335  Value *Dst = EmitScalarExpr(E->getArg(0));
2336  Value *Src = EmitScalarExpr(E->getArg(1));
2337  Value *Size = EmitScalarExpr(E->getArg(2));
2338 
2339  BasicBlock *Entry = Builder.GetInsertBlock();
2340  BasicBlock *CmpGT = createBasicBlock("wmemcmp.gt");
2341  BasicBlock *CmpLT = createBasicBlock("wmemcmp.lt");
2342  BasicBlock *Next = createBasicBlock("wmemcmp.next");
2343  BasicBlock *Exit = createBasicBlock("wmemcmp.exit");
2344  Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));
2345  Builder.CreateCondBr(SizeEq0, Exit, CmpGT);
2346 
2347  EmitBlock(CmpGT);
2348  PHINode *DstPhi = Builder.CreatePHI(Dst->getType(), 2);
2349  DstPhi->addIncoming(Dst, Entry);
2350  PHINode *SrcPhi = Builder.CreatePHI(Src->getType(), 2);
2351  SrcPhi->addIncoming(Src, Entry);
2352  PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);
2353  SizePhi->addIncoming(Size, Entry);
2354  CharUnits WCharAlign =
2355  getContext().getTypeAlignInChars(getContext().WCharTy);
2356  Value *DstCh = Builder.CreateAlignedLoad(WCharTy, DstPhi, WCharAlign);
2357  Value *SrcCh = Builder.CreateAlignedLoad(WCharTy, SrcPhi, WCharAlign);
2358  Value *DstGtSrc = Builder.CreateICmpUGT(DstCh, SrcCh);
2359  Builder.CreateCondBr(DstGtSrc, Exit, CmpLT);
2360 
2361  EmitBlock(CmpLT);
2362  Value *DstLtSrc = Builder.CreateICmpULT(DstCh, SrcCh);
2363  Builder.CreateCondBr(DstLtSrc, Exit, Next);
2364 
2365  EmitBlock(Next);
2366  Value *NextDst = Builder.CreateConstInBoundsGEP1_32(WCharTy, DstPhi, 1);
2367  Value *NextSrc = Builder.CreateConstInBoundsGEP1_32(WCharTy, SrcPhi, 1);
2368  Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));
2369  Value *NextSizeEq0 =
2370  Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));
2371  Builder.CreateCondBr(NextSizeEq0, Exit, CmpGT);
2372  DstPhi->addIncoming(NextDst, Next);
2373  SrcPhi->addIncoming(NextSrc, Next);
2374  SizePhi->addIncoming(NextSize, Next);
2375 
2376  EmitBlock(Exit);
2377  PHINode *Ret = Builder.CreatePHI(IntTy, 4);
2378  Ret->addIncoming(ConstantInt::get(IntTy, 0), Entry);
2379  Ret->addIncoming(ConstantInt::get(IntTy, 1), CmpGT);
2380  Ret->addIncoming(ConstantInt::get(IntTy, -1), CmpLT);
2381  Ret->addIncoming(ConstantInt::get(IntTy, 0), Next);
2382  return RValue::get(Ret);
2383  }
2384  case Builtin::BI__builtin_dwarf_cfa: {
2385  // The offset in bytes from the first argument to the CFA.
2386  //
2387  // Why on earth is this in the frontend? Is there any reason at
2388  // all that the backend can't reasonably determine this while
2389  // lowering llvm.eh.dwarf.cfa()?
2390  //
2391  // TODO: If there's a satisfactory reason, add a target hook for
2392  // this instead of hard-coding 0, which is correct for most targets.
2393  int32_t Offset = 0;
2394 
2395  Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
2396  return RValue::get(Builder.CreateCall(F,
2397  llvm::ConstantInt::get(Int32Ty, Offset)));
2398  }
2399  case Builtin::BI__builtin_return_address: {
2400  Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
2401  getContext().UnsignedIntTy);
2402  Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
2403  return RValue::get(Builder.CreateCall(F, Depth));
2404  }
2405  case Builtin::BI_ReturnAddress: {
2406  Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
2407  return RValue::get(Builder.CreateCall(F, Builder.getInt32(0)));
2408  }
2409  case Builtin::BI__builtin_frame_address: {
2410  Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
2411  getContext().UnsignedIntTy);
2412  Value *F = CGM.getIntrinsic(Intrinsic::frameaddress);
2413  return RValue::get(Builder.CreateCall(F, Depth));
2414  }
2415  case Builtin::BI__builtin_extract_return_addr: {
2416  Value *Address = EmitScalarExpr(E->getArg(0));
2417  Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
2418  return RValue::get(Result);
2419  }
2420  case Builtin::BI__builtin_frob_return_addr: {
2421  Value *Address = EmitScalarExpr(E->getArg(0));
2422  Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
2423  return RValue::get(Result);
2424  }
2425  case Builtin::BI__builtin_dwarf_sp_column: {
2426  llvm::IntegerType *Ty
2427  = cast<llvm::IntegerType>(ConvertType(E->getType()));
2428  int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
2429  if (Column == -1) {
2430  CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
2431  return RValue::get(llvm::UndefValue::get(Ty));
2432  }
2433  return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
2434  }
2435  case Builtin::BI__builtin_init_dwarf_reg_size_table: {
2436  Value *Address = EmitScalarExpr(E->getArg(0));
2437  if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
2438  CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
2439  return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
2440  }
2441  case Builtin::BI__builtin_eh_return: {
2442  Value *Int = EmitScalarExpr(E->getArg(0));
2443  Value *Ptr = EmitScalarExpr(E->getArg(1));
2444 
2445  llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
2446  assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
2447  "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
2448  Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32
2449  ? Intrinsic::eh_return_i32
2450  : Intrinsic::eh_return_i64);
2451  Builder.CreateCall(F, {Int, Ptr});
2452  Builder.CreateUnreachable();
2453 
2454  // We do need to preserve an insertion point.
2455  EmitBlock(createBasicBlock("builtin_eh_return.cont"));
2456 
2457  return RValue::get(nullptr);
2458  }
2459  case Builtin::BI__builtin_unwind_init: {
2460  Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
2461  return RValue::get(Builder.CreateCall(F));
2462  }
2463  case Builtin::BI__builtin_extend_pointer: {
2464  // Extends a pointer to the size of an _Unwind_Word, which is
2465  // uint64_t on all platforms. Generally this gets poked into a
2466  // register and eventually used as an address, so if the
2467  // addressing registers are wider than pointers and the platform
2468  // doesn't implicitly ignore high-order bits when doing
2469  // addressing, we need to make sure we zext / sext based on
2470  // the platform's expectations.
2471  //
2472  // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
2473 
2474  // Cast the pointer to intptr_t.
2475  Value *Ptr = EmitScalarExpr(E->getArg(0));
2476  Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
2477 
2478  // If that's 64 bits, we're done.
2479  if (IntPtrTy->getBitWidth() == 64)
2480  return RValue::get(Result);
2481 
2482  // Otherwise, ask the codegen data what to do.
2483  if (getTargetHooks().extendPointerWithSExt())
2484  return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
2485  else
2486  return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
2487  }
2488  case Builtin::BI__builtin_setjmp: {
2489  // Buffer is a void**.
2490  Address Buf = EmitPointerWithAlignment(E->getArg(0));
2491 
2492  // Store the frame pointer to the setjmp buffer.
2493  Value *FrameAddr =
2494  Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
2495  ConstantInt::get(Int32Ty, 0));
2496  Builder.CreateStore(FrameAddr, Buf);
2497 
2498  // Store the stack pointer to the setjmp buffer.
2499  Value *StackAddr =
2500  Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
2501  Address StackSaveSlot =
2502  Builder.CreateConstInBoundsGEP(Buf, 2, getPointerSize());
2503  Builder.CreateStore(StackAddr, StackSaveSlot);
2504 
2505  // Call LLVM's EH setjmp, which is lightweight.
2506  Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
2507  Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
2508  return RValue::get(Builder.CreateCall(F, Buf.getPointer()));
2509  }
2510  case Builtin::BI__builtin_longjmp: {
2511  Value *Buf = EmitScalarExpr(E->getArg(0));
2512  Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
2513 
2514  // Call LLVM's EH longjmp, which is lightweight.
2515  Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
2516 
2517  // longjmp doesn't return; mark this as unreachable.
2518  Builder.CreateUnreachable();
2519 
2520  // We do need to preserve an insertion point.
2521  EmitBlock(createBasicBlock("longjmp.cont"));
2522 
2523  return RValue::get(nullptr);
2524  }
2525  case Builtin::BI__builtin_launder: {
2526  const Expr *Arg = E->getArg(0);
2527  QualType ArgTy = Arg->getType()->getPointeeType();
2528  Value *Ptr = EmitScalarExpr(Arg);
2529  if (TypeRequiresBuiltinLaunder(CGM, ArgTy))
2530  Ptr = Builder.CreateLaunderInvariantGroup(Ptr);
2531 
2532  return RValue::get(Ptr);
2533  }
2534  case Builtin::BI__sync_fetch_and_add:
2535  case Builtin::BI__sync_fetch_and_sub:
2536  case Builtin::BI__sync_fetch_and_or:
2537  case Builtin::BI__sync_fetch_and_and:
2538  case Builtin::BI__sync_fetch_and_xor:
2539  case Builtin::BI__sync_fetch_and_nand:
2540  case Builtin::BI__sync_add_and_fetch:
2541  case Builtin::BI__sync_sub_and_fetch:
2542  case Builtin::BI__sync_and_and_fetch:
2543  case Builtin::BI__sync_or_and_fetch:
2544  case Builtin::BI__sync_xor_and_fetch:
2545  case Builtin::BI__sync_nand_and_fetch:
2546  case Builtin::BI__sync_val_compare_and_swap:
2547  case Builtin::BI__sync_bool_compare_and_swap:
2548  case Builtin::BI__sync_lock_test_and_set:
2549  case Builtin::BI__sync_lock_release:
2550  case Builtin::BI__sync_swap:
2551  llvm_unreachable("Shouldn't make it through sema");
2552  case Builtin::BI__sync_fetch_and_add_1:
2553  case Builtin::BI__sync_fetch_and_add_2:
2554  case Builtin::BI__sync_fetch_and_add_4:
2555  case Builtin::BI__sync_fetch_and_add_8:
2556  case Builtin::BI__sync_fetch_and_add_16:
2557  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
2558  case Builtin::BI__sync_fetch_and_sub_1:
2559  case Builtin::BI__sync_fetch_and_sub_2:
2560  case Builtin::BI__sync_fetch_and_sub_4:
2561  case Builtin::BI__sync_fetch_and_sub_8:
2562  case Builtin::BI__sync_fetch_and_sub_16:
2563  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
2564  case Builtin::BI__sync_fetch_and_or_1:
2565  case Builtin::BI__sync_fetch_and_or_2:
2566  case Builtin::BI__sync_fetch_and_or_4:
2567  case Builtin::BI__sync_fetch_and_or_8:
2568  case Builtin::BI__sync_fetch_and_or_16:
2569  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
2570  case Builtin::BI__sync_fetch_and_and_1:
2571  case Builtin::BI__sync_fetch_and_and_2:
2572  case Builtin::BI__sync_fetch_and_and_4:
2573  case Builtin::BI__sync_fetch_and_and_8:
2574  case Builtin::BI__sync_fetch_and_and_16:
2575  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
2576  case Builtin::BI__sync_fetch_and_xor_1:
2577  case Builtin::BI__sync_fetch_and_xor_2:
2578  case Builtin::BI__sync_fetch_and_xor_4:
2579  case Builtin::BI__sync_fetch_and_xor_8:
2580  case Builtin::BI__sync_fetch_and_xor_16:
2581  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
2582  case Builtin::BI__sync_fetch_and_nand_1:
2583  case Builtin::BI__sync_fetch_and_nand_2:
2584  case Builtin::BI__sync_fetch_and_nand_4:
2585  case Builtin::BI__sync_fetch_and_nand_8:
2586  case Builtin::BI__sync_fetch_and_nand_16:
2587  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
2588 
2589  // Clang extensions: not overloaded yet.
2590  case Builtin::BI__sync_fetch_and_min:
2591  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
2592  case Builtin::BI__sync_fetch_and_max:
2593  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
2594  case Builtin::BI__sync_fetch_and_umin:
2595  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
2596  case Builtin::BI__sync_fetch_and_umax:
2597  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
2598 
2599  case Builtin::BI__sync_add_and_fetch_1:
2600  case Builtin::BI__sync_add_and_fetch_2:
2601  case Builtin::BI__sync_add_and_fetch_4:
2602  case Builtin::BI__sync_add_and_fetch_8:
2603  case Builtin::BI__sync_add_and_fetch_16:
2604  return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
2605  llvm::Instruction::Add);
2606  case Builtin::BI__sync_sub_and_fetch_1:
2607  case Builtin::BI__sync_sub_and_fetch_2:
2608  case Builtin::BI__sync_sub_and_fetch_4:
2609  case Builtin::BI__sync_sub_and_fetch_8:
2610  case Builtin::BI__sync_sub_and_fetch_16:
2611  return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
2612  llvm::Instruction::Sub);
2613  case Builtin::BI__sync_and_and_fetch_1:
2614  case Builtin::BI__sync_and_and_fetch_2:
2615  case Builtin::BI__sync_and_and_fetch_4:
2616  case Builtin::BI__sync_and_and_fetch_8:
2617  case Builtin::BI__sync_and_and_fetch_16:
2620  case Builtin::BI__sync_or_and_fetch_1:
2621  case Builtin::BI__sync_or_and_fetch_2:
2622  case Builtin::BI__sync_or_and_fetch_4:
2623  case Builtin::BI__sync_or_and_fetch_8:
2624  case Builtin::BI__sync_or_and_fetch_16:
2625  return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
2626  llvm::Instruction::Or);
2627  case Builtin::BI__sync_xor_and_fetch_1:
2628  case Builtin::BI__sync_xor_and_fetch_2:
2629  case Builtin::BI__sync_xor_and_fetch_4:
2630  case Builtin::BI__sync_xor_and_fetch_8:
2631  case Builtin::BI__sync_xor_and_fetch_16:
2632  return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
2633  llvm::Instruction::Xor);
2634  case Builtin::BI__sync_nand_and_fetch_1:
2635  case Builtin::BI__sync_nand_and_fetch_2:
2636  case Builtin::BI__sync_nand_and_fetch_4:
2637  case Builtin::BI__sync_nand_and_fetch_8:
2638  case Builtin::BI__sync_nand_and_fetch_16:
2639  return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
2640  llvm::Instruction::And, true);
2641 
2642  case Builtin::BI__sync_val_compare_and_swap_1:
2643  case Builtin::BI__sync_val_compare_and_swap_2:
2644  case Builtin::BI__sync_val_compare_and_swap_4:
2645  case Builtin::BI__sync_val_compare_and_swap_8:
2646  case Builtin::BI__sync_val_compare_and_swap_16:
2647  return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
2648 
2649  case Builtin::BI__sync_bool_compare_and_swap_1:
2650  case Builtin::BI__sync_bool_compare_and_swap_2:
2651  case Builtin::BI__sync_bool_compare_and_swap_4:
2652  case Builtin::BI__sync_bool_compare_and_swap_8:
2653  case Builtin::BI__sync_bool_compare_and_swap_16:
2654  return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
2655 
2656  case Builtin::BI__sync_swap_1:
2657  case Builtin::BI__sync_swap_2:
2658  case Builtin::BI__sync_swap_4:
2659  case Builtin::BI__sync_swap_8:
2660  case Builtin::BI__sync_swap_16:
2661  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
2662 
2663  case Builtin::BI__sync_lock_test_and_set_1:
2664  case Builtin::BI__sync_lock_test_and_set_2:
2665  case Builtin::BI__sync_lock_test_and_set_4:
2666  case Builtin::BI__sync_lock_test_and_set_8:
2667  case Builtin::BI__sync_lock_test_and_set_16:
2668  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
2669 
2670  case Builtin::BI__sync_lock_release_1:
2671  case Builtin::BI__sync_lock_release_2:
2672  case Builtin::BI__sync_lock_release_4:
2673  case Builtin::BI__sync_lock_release_8:
2674  case Builtin::BI__sync_lock_release_16: {
2675  Value *Ptr = EmitScalarExpr(E->getArg(0));
2676  QualType ElTy = E->getArg(0)->getType()->getPointeeType();
2677  CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
2678  llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
2679  StoreSize.getQuantity() * 8);
2680  Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
2681  llvm::StoreInst *Store =
2682  Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr,
2683  StoreSize);
2684  Store->setAtomic(llvm::AtomicOrdering::Release);
2685  return RValue::get(nullptr);
2686  }
2687 
2688  case Builtin::BI__sync_synchronize: {
2689  // We assume this is supposed to correspond to a C++0x-style
2690  // sequentially-consistent fence (i.e. this is only usable for
2691  // synchronization, not device I/O or anything like that). This intrinsic
2692  // is really badly designed in the sense that in theory, there isn't
2693  // any way to safely use it... but in practice, it mostly works
2694  // to use it with non-atomic loads and stores to get acquire/release
2695  // semantics.
2696  Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
2697  return RValue::get(nullptr);
2698  }
2699 
2700  case Builtin::BI__builtin_nontemporal_load:
2701  return RValue::get(EmitNontemporalLoad(*this, E));
2702  case Builtin::BI__builtin_nontemporal_store:
2703  return RValue::get(EmitNontemporalStore(*this, E));
2704  case Builtin::BI__c11_atomic_is_lock_free:
2705  case Builtin::BI__atomic_is_lock_free: {
2706  // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
2707  // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
2708  // _Atomic(T) is always properly-aligned.
2709  const char *LibCallName = "__atomic_is_lock_free";
2710  CallArgList Args;
2711  Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
2712  getContext().getSizeType());
2713  if (BuiltinID == Builtin::BI__atomic_is_lock_free)
2714  Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
2715  getContext().VoidPtrTy);
2716  else
2717  Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
2718  getContext().VoidPtrTy);
2719  const CGFunctionInfo &FuncInfo =
2720  CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args);
2721  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
2722  llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
2723  return EmitCall(FuncInfo, CGCallee::forDirect(Func),
2724  ReturnValueSlot(), Args);
2725  }
2726 
2727  case Builtin::BI__atomic_test_and_set: {
2728  // Look at the argument type to determine whether this is a volatile
2729  // operation. The parameter type is always volatile.
2730  QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
2731  bool Volatile =
2732  PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
2733 
2734  Value *Ptr = EmitScalarExpr(E->getArg(0));
2735  unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
2736  Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
2737  Value *NewVal = Builder.getInt8(1);
2738  Value *Order = EmitScalarExpr(E->getArg(1));
2739  if (isa<llvm::ConstantInt>(Order)) {
2740  int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
2741  AtomicRMWInst *Result = nullptr;
2742  switch (ord) {
2743  case 0: // memory_order_relaxed
2744  default: // invalid order
2745  Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
2746  llvm::AtomicOrdering::Monotonic);
2747  break;
2748  case 1: // memory_order_consume
2749  case 2: // memory_order_acquire
2750  Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
2751  llvm::AtomicOrdering::Acquire);
2752  break;
2753  case 3: // memory_order_release
2754  Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
2755  llvm::AtomicOrdering::Release);
2756  break;
2757  case 4: // memory_order_acq_rel
2758 
2759  Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
2760  llvm::AtomicOrdering::AcquireRelease);
2761  break;
2762  case 5: // memory_order_seq_cst
2763  Result = Builder.CreateAtomicRMW(
2764  llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
2765  llvm::AtomicOrdering::SequentiallyConsistent);
2766  break;
2767  }
2768  Result->setVolatile(Volatile);
2769  return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
2770  }
2771 
2772  llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
2773 
2774  llvm::BasicBlock *BBs[5] = {
2775  createBasicBlock("monotonic", CurFn),
2776  createBasicBlock("acquire", CurFn),
2777  createBasicBlock("release", CurFn),
2778  createBasicBlock("acqrel", CurFn),
2779  createBasicBlock("seqcst", CurFn)
2780  };
2781  llvm::AtomicOrdering Orders[5] = {
2782  llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire,
2783  llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease,
2784  llvm::AtomicOrdering::SequentiallyConsistent};
2785 
2786  Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
2787  llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
2788 
2789  Builder.SetInsertPoint(ContBB);
2790  PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
2791 
2792  for (unsigned i = 0; i < 5; ++i) {
2793  Builder.SetInsertPoint(BBs[i]);
2794  AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
2795  Ptr, NewVal, Orders[i]);
2796  RMW->setVolatile(Volatile);
2797  Result->addIncoming(RMW, BBs[i]);
2798  Builder.CreateBr(ContBB);
2799  }
2800 
2801  SI->addCase(Builder.getInt32(0), BBs[0]);
2802  SI->addCase(Builder.getInt32(1), BBs[1]);
2803  SI->addCase(Builder.getInt32(2), BBs[1]);
2804  SI->addCase(Builder.getInt32(3), BBs[2]);
2805  SI->addCase(Builder.getInt32(4), BBs[3]);
2806  SI->addCase(Builder.getInt32(5), BBs[4]);
2807 
2808  Builder.SetInsertPoint(ContBB);
2809  return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
2810  }
2811 
2812  case Builtin::BI__atomic_clear: {
2813  QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
2814  bool Volatile =
2815  PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
2816 
2817  Address Ptr = EmitPointerWithAlignment(E->getArg(0));
2818  unsigned AddrSpace = Ptr.getPointer()->getType()->getPointerAddressSpace();
2819  Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
2820  Value *NewVal = Builder.getInt8(0);
2821  Value *Order = EmitScalarExpr(E->getArg(1));
2822  if (isa<llvm::ConstantInt>(Order)) {
2823  int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
2824  StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
2825  switch (ord) {
2826  case 0: // memory_order_relaxed
2827  default: // invalid order
2828  Store->setOrdering(llvm::AtomicOrdering::Monotonic);
2829  break;
2830  case 3: // memory_order_release
2831  Store->setOrdering(llvm::AtomicOrdering::Release);
2832  break;
2833  case 5: // memory_order_seq_cst
2834  Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent);
2835  break;
2836  }
2837  return RValue::get(nullptr);
2838  }
2839 
2840  llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
2841 
2842  llvm::BasicBlock *BBs[3] = {
2843  createBasicBlock("monotonic", CurFn),
2844  createBasicBlock("release", CurFn),
2845  createBasicBlock("seqcst", CurFn)
2846  };
2847  llvm::AtomicOrdering Orders[3] = {
2848  llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release,
2849  llvm::AtomicOrdering::SequentiallyConsistent};
2850 
2851  Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
2852  llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
2853 
2854  for (unsigned i = 0; i < 3; ++i) {
2855  Builder.SetInsertPoint(BBs[i]);
2856  StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
2857  Store->setOrdering(Orders[i]);
2858  Builder.CreateBr(ContBB);
2859  }
2860 
2861  SI->addCase(Builder.getInt32(0), BBs[0]);
2862  SI->addCase(Builder.getInt32(3), BBs[1]);
2863  SI->addCase(Builder.getInt32(5), BBs[2]);
2864 
2865  Builder.SetInsertPoint(ContBB);
2866  return RValue::get(nullptr);
2867  }
2868 
2869  case Builtin::BI__atomic_thread_fence:
2870  case Builtin::BI__atomic_signal_fence:
2871  case Builtin::BI__c11_atomic_thread_fence:
2872  case Builtin::BI__c11_atomic_signal_fence: {
2873  llvm::SyncScope::ID SSID;
2874  if (BuiltinID == Builtin::BI__atomic_signal_fence ||
2875  BuiltinID == Builtin::BI__c11_atomic_signal_fence)
2876  SSID = llvm::SyncScope::SingleThread;
2877  else
2878  SSID = llvm::SyncScope::System;
2879  Value *Order = EmitScalarExpr(E->getArg(0));
2880  if (isa<llvm::ConstantInt>(Order)) {
2881  int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
2882  switch (ord) {
2883  case 0: // memory_order_relaxed
2884  default: // invalid order
2885  break;
2886  case 1: // memory_order_consume
2887  case 2: // memory_order_acquire
2888  Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
2889  break;
2890  case 3: // memory_order_release
2891  Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
2892  break;
2893  case 4: // memory_order_acq_rel
2894  Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
2895  break;
2896  case 5: // memory_order_seq_cst
2897  Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
2898  break;
2899  }
2900  return RValue::get(nullptr);
2901  }
2902 
2903  llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
2904  AcquireBB = createBasicBlock("acquire", CurFn);
2905  ReleaseBB = createBasicBlock("release", CurFn);
2906  AcqRelBB = createBasicBlock("acqrel", CurFn);
2907  SeqCstBB = createBasicBlock("seqcst", CurFn);
2908  llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
2909 
2910  Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
2911  llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
2912 
2913  Builder.SetInsertPoint(AcquireBB);
2914  Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
2915  Builder.CreateBr(ContBB);
2916  SI->addCase(Builder.getInt32(1), AcquireBB);
2917  SI->addCase(Builder.getInt32(2), AcquireBB);
2918 
2919  Builder.SetInsertPoint(ReleaseBB);
2920  Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
2921  Builder.CreateBr(ContBB);
2922  SI->addCase(Builder.getInt32(3), ReleaseBB);
2923 
2924  Builder.SetInsertPoint(AcqRelBB);
2925  Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
2926  Builder.CreateBr(ContBB);
2927  SI->addCase(Builder.getInt32(4), AcqRelBB);
2928 
2929  Builder.SetInsertPoint(SeqCstBB);
2930  Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
2931  Builder.CreateBr(ContBB);
2932  SI->addCase(Builder.getInt32(5), SeqCstBB);
2933 
2934  Builder.SetInsertPoint(ContBB);
2935  return RValue::get(nullptr);
2936  }
2937 
2938  case Builtin::BI__builtin_signbit:
2939  case Builtin::BI__builtin_signbitf:
2940  case Builtin::BI__builtin_signbitl: {
2941  return RValue::get(
2942  Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
2943  ConvertType(E->getType())));
2944  }
2945  case Builtin::BI__annotation: {
2946  // Re-encode each wide string to UTF8 and make an MDString.
2948  for (const Expr *Arg : E->arguments()) {
2949  const auto *Str = cast<StringLiteral>(Arg->IgnoreParenCasts());
2950  assert(Str->getCharByteWidth() == 2);
2951  StringRef WideBytes = Str->getBytes();
2952  std::string StrUtf8;
2953  if (!convertUTF16ToUTF8String(
2954  makeArrayRef(WideBytes.data(), WideBytes.size()), StrUtf8)) {
2955  CGM.ErrorUnsupported(E, "non-UTF16 __annotation argument");
2956  continue;
2957  }
2958  Strings.push_back(llvm::MDString::get(getLLVMContext(), StrUtf8));
2959  }
2960 
2961  // Build and MDTuple of MDStrings and emit the intrinsic call.
2962  llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {});
2963  MDTuple *StrTuple = MDTuple::get(getLLVMContext(), Strings);
2964  Builder.CreateCall(F, MetadataAsValue::get(getLLVMContext(), StrTuple));
2965  return RValue::getIgnored();
2966  }
2967  case Builtin::BI__builtin_annotation: {
2968  llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
2969  llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation,
2970  AnnVal->getType());
2971 
2972  // Get the annotation string, go through casts. Sema requires this to be a
2973  // non-wide string literal, potentially casted, so the cast<> is safe.
2974  const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
2975  StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
2976  return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc()));
2977  }
2978  case Builtin::BI__builtin_addcb:
2979  case Builtin::BI__builtin_addcs:
2980  case Builtin::BI__builtin_addc:
2981  case Builtin::BI__builtin_addcl:
2982  case Builtin::BI__builtin_addcll:
2983  case Builtin::BI__builtin_subcb:
2984  case Builtin::BI__builtin_subcs:
2985  case Builtin::BI__builtin_subc:
2986  case Builtin::BI__builtin_subcl:
2987  case Builtin::BI__builtin_subcll: {
2988 
2989  // We translate all of these builtins from expressions of the form:
2990  // int x = ..., y = ..., carryin = ..., carryout, result;
2991  // result = __builtin_addc(x, y, carryin, &carryout);
2992  //
2993  // to LLVM IR of the form:
2994  //
2995  // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
2996  // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
2997  // %carry1 = extractvalue {i32, i1} %tmp1, 1
2998  // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
2999  // i32 %carryin)
3000  // %result = extractvalue {i32, i1} %tmp2, 0
3001  // %carry2 = extractvalue {i32, i1} %tmp2, 1
3002  // %tmp3 = or i1 %carry1, %carry2
3003  // %tmp4 = zext i1 %tmp3 to i32
3004  // store i32 %tmp4, i32* %carryout
3005 
3006  // Scalarize our inputs.
3007  llvm::Value *X = EmitScalarExpr(E->getArg(0));
3008  llvm::Value *Y = EmitScalarExpr(E->getArg(1));
3009  llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
3010  Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
3011 
3012  // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
3013  llvm::Intrinsic::ID IntrinsicId;
3014  switch (BuiltinID) {
3015  default: llvm_unreachable("Unknown multiprecision builtin id.");
3016  case Builtin::BI__builtin_addcb:
3017  case Builtin::BI__builtin_addcs:
3018  case Builtin::BI__builtin_addc:
3019  case Builtin::BI__builtin_addcl:
3020  case Builtin::BI__builtin_addcll:
3021  IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
3022  break;
3023  case Builtin::BI__builtin_subcb:
3024  case Builtin::BI__builtin_subcs:
3025  case Builtin::BI__builtin_subc:
3026  case Builtin::BI__builtin_subcl:
3027  case Builtin::BI__builtin_subcll:
3028  IntrinsicId = llvm::Intrinsic::usub_with_overflow;
3029  break;
3030  }
3031 
3032  // Construct our resulting LLVM IR expression.
3033  llvm::Value *Carry1;
3034  llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
3035  X, Y, Carry1);
3036  llvm::Value *Carry2;
3037  llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
3038  Sum1, Carryin, Carry2);
3039  llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
3040  X->getType());
3041  Builder.CreateStore(CarryOut, CarryOutPtr);
3042  return RValue::get(Sum2);
3043  }
3044 
3045  case Builtin::BI__builtin_add_overflow:
3046  case Builtin::BI__builtin_sub_overflow:
3047  case Builtin::BI__builtin_mul_overflow: {
3048  const clang::Expr *LeftArg = E->getArg(0);
3049  const clang::Expr *RightArg = E->getArg(1);
3050  const clang::Expr *ResultArg = E->getArg(2);
3051 
3052  clang::QualType ResultQTy =
3053  ResultArg->getType()->castAs<PointerType>()->getPointeeType();
3054 
3055  WidthAndSignedness LeftInfo =
3056  getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType());
3057  WidthAndSignedness RightInfo =
3058  getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType());
3059  WidthAndSignedness ResultInfo =
3060  getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy);
3061 
3062  // Handle mixed-sign multiplication as a special case, because adding
3063  // runtime or backend support for our generic irgen would be too expensive.
3064  if (isSpecialMixedSignMultiply(BuiltinID, LeftInfo, RightInfo, ResultInfo))
3065  return EmitCheckedMixedSignMultiply(*this, LeftArg, LeftInfo, RightArg,
3066  RightInfo, ResultArg, ResultQTy,
3067  ResultInfo);
3068 
3069  WidthAndSignedness EncompassingInfo =
3070  EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
3071 
3072  llvm::Type *EncompassingLLVMTy =
3073  llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);
3074 
3075  llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);
3076 
3077  llvm::Intrinsic::ID IntrinsicId;
3078  switch (BuiltinID) {
3079  default:
3080  llvm_unreachable("Unknown overflow builtin id.");
3081  case Builtin::BI__builtin_add_overflow:
3082  IntrinsicId = EncompassingInfo.Signed
3083  ? llvm::Intrinsic::sadd_with_overflow
3084  : llvm::Intrinsic::uadd_with_overflow;
3085  break;
3086  case Builtin::BI__builtin_sub_overflow:
3087  IntrinsicId = EncompassingInfo.Signed
3088  ? llvm::Intrinsic::ssub_with_overflow
3089  : llvm::Intrinsic::usub_with_overflow;
3090  break;
3091  case Builtin::BI__builtin_mul_overflow:
3092  IntrinsicId = EncompassingInfo.Signed
3093  ? llvm::Intrinsic::smul_with_overflow
3094  : llvm::Intrinsic::umul_with_overflow;
3095  break;
3096  }
3097 
3098  llvm::Value *Left = EmitScalarExpr(LeftArg);
3099  llvm::Value *Right = EmitScalarExpr(RightArg);
3100  Address ResultPtr = EmitPointerWithAlignment(ResultArg);
3101 
3102  // Extend each operand to the encompassing type.
3103  Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
3104  Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
3105 
3106  // Perform the operation on the extended values.
3107  llvm::Value *Overflow, *Result;
3108  Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);
3109 
3110  if (EncompassingInfo.Width > ResultInfo.Width) {
3111  // The encompassing type is wider than the result type, so we need to
3112  // truncate it.
3113  llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);
3114 
3115  // To see if the truncation caused an overflow, we will extend
3116  // the result and then compare it to the original result.
3117  llvm::Value *ResultTruncExt = Builder.CreateIntCast(
3118  ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
3119  llvm::Value *TruncationOverflow =
3120  Builder.CreateICmpNE(Result, ResultTruncExt);
3121 
3122  Overflow = Builder.CreateOr(Overflow, TruncationOverflow);
3123  Result = ResultTrunc;
3124  }
3125 
3126  // Finally, store the result using the pointer.
3127  bool isVolatile =
3128  ResultArg->getType()->getPointeeType().isVolatileQualified();
3129  Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);
3130 
3131  return RValue::get(Overflow);
3132  }
3133 
3134  case Builtin::BI__builtin_uadd_overflow:
3135  case Builtin::BI__builtin_uaddl_overflow:
3136  case Builtin::BI__builtin_uaddll_overflow:
3137  case Builtin::BI__builtin_usub_overflow:
3138  case Builtin::BI__builtin_usubl_overflow:
3139  case Builtin::BI__builtin_usubll_overflow:
3140  case Builtin::BI__builtin_umul_overflow:
3141  case Builtin::BI__builtin_umull_overflow:
3142  case Builtin::BI__builtin_umulll_overflow:
3143  case Builtin::BI__builtin_sadd_overflow:
3144  case Builtin::BI__builtin_saddl_overflow:
3145  case Builtin::BI__builtin_saddll_overflow:
3146  case Builtin::BI__builtin_ssub_overflow:
3147  case Builtin::BI__builtin_ssubl_overflow:
3148  case Builtin::BI__builtin_ssubll_overflow:
3149  case Builtin::BI__builtin_smul_overflow:
3150  case Builtin::BI__builtin_smull_overflow:
3151  case Builtin::BI__builtin_smulll_overflow: {
3152 
3153  // We translate all of these builtins directly to the relevant llvm IR node.
3154 
3155  // Scalarize our inputs.
3156  llvm::Value *X = EmitScalarExpr(E->getArg(0));
3157  llvm::Value *Y = EmitScalarExpr(E->getArg(1));
3158  Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
3159 
3160  // Decide which of the overflow intrinsics we are lowering to:
3161  llvm::Intrinsic::ID IntrinsicId;
3162  switch (BuiltinID) {
3163  default: llvm_unreachable("Unknown overflow builtin id.");
3164  case Builtin::BI__builtin_uadd_overflow:
3165  case Builtin::BI__builtin_uaddl_overflow:
3166  case Builtin::BI__builtin_uaddll_overflow:
3167  IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
3168  break;
3169  case Builtin::BI__builtin_usub_overflow:
3170  case Builtin::BI__builtin_usubl_overflow:
3171  case Builtin::BI__builtin_usubll_overflow:
3172  IntrinsicId = llvm::Intrinsic::usub_with_overflow;
3173  break;
3174  case Builtin::BI__builtin_umul_overflow:
3175  case Builtin::BI__builtin_umull_overflow:
3176  case Builtin::BI__builtin_umulll_overflow:
3177  IntrinsicId = llvm::Intrinsic::umul_with_overflow;
3178  break;
3179  case Builtin::BI__builtin_sadd_overflow:
3180  case Builtin::BI__builtin_saddl_overflow:
3181  case Builtin::BI__builtin_saddll_overflow:
3182  IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
3183  break;
3184  case Builtin::BI__builtin_ssub_overflow:
3185  case Builtin::BI__builtin_ssubl_overflow:
3186  case Builtin::BI__builtin_ssubll_overflow:
3187  IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
3188  break;
3189  case Builtin::BI__builtin_smul_overflow:
3190  case Builtin::BI__builtin_smull_overflow:
3191  case Builtin::BI__builtin_smulll_overflow:
3192  IntrinsicId = llvm::Intrinsic::smul_with_overflow;
3193  break;
3194  }
3195 
3196 
3197  llvm::Value *Carry;
3198  llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
3199  Builder.CreateStore(Sum, SumOutPtr);
3200 
3201  return RValue::get(Carry);
3202  }
3203  case Builtin::BI__builtin_addressof:
3204  return RValue::get(EmitLValue(E->getArg(0)).getPointer());
3205  case Builtin::BI__builtin_operator_new:
3206  return EmitBuiltinNewDeleteCall(
3207  E->getCallee()->getType()->castAs<FunctionProtoType>(), E, false);
3208  case Builtin::BI__builtin_operator_delete:
3209  return EmitBuiltinNewDeleteCall(
3210  E->getCallee()->getType()->castAs<FunctionProtoType>(), E, true);
3211 
3212  case Builtin::BI__noop:
3213  // __noop always evaluates to an integer literal zero.
3214  return RValue::get(ConstantInt::get(IntTy, 0));
3215  case Builtin::BI__builtin_call_with_static_chain: {
3216  const CallExpr *Call = cast<CallExpr>(E->getArg(0));
3217  const Expr *Chain = E->getArg(1);
3218  return EmitCall(Call->getCallee()->getType(),
3219  EmitCallee(Call->getCallee()), Call, ReturnValue,
3220  EmitScalarExpr(Chain));
3221  }
3222  case Builtin::BI_InterlockedExchange8:
3223  case Builtin::BI_InterlockedExchange16:
3224  case Builtin::BI_InterlockedExchange:
3225  case Builtin::BI_InterlockedExchangePointer:
3226  return RValue::get(
3227  EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E));
3228  case Builtin::BI_InterlockedCompareExchangePointer:
3229  case Builtin::BI_InterlockedCompareExchangePointer_nf: {
3230  llvm::Type *RTy;
3231  llvm::IntegerType *IntType =
3232  IntegerType::get(getLLVMContext(),
3233  getContext().getTypeSize(E->getType()));
3234  llvm::Type *IntPtrType = IntType->getPointerTo();
3235 
3236  llvm::Value *Destination =
3237  Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType);
3238 
3239  llvm::Value *Exchange = EmitScalarExpr(E->getArg(1));
3240  RTy = Exchange->getType();
3241  Exchange = Builder.CreatePtrToInt(Exchange, IntType);
3242 
3243  llvm::Value *Comparand =
3244  Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
3245 
3246  auto Ordering =
3247  BuiltinID == Builtin::BI_InterlockedCompareExchangePointer_nf ?
3248  AtomicOrdering::Monotonic : AtomicOrdering::SequentiallyConsistent;
3249 
3250  auto Result = Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
3251  Ordering, Ordering);
3252  Result->setVolatile(true);
3253 
3254  return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
3255  0),
3256  RTy));
3257  }
3258  case Builtin::BI_InterlockedCompareExchange8:
3259  case Builtin::BI_InterlockedCompareExchange16:
3260  case Builtin::BI_InterlockedCompareExchange:
3261  case Builtin::BI_InterlockedCompareExchange64:
3262  return RValue::get(EmitAtomicCmpXchgForMSIntrin(*this, E));
3263  case Builtin::BI_InterlockedIncrement16:
3264  case Builtin::BI_InterlockedIncrement:
3265  return RValue::get(
3266  EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E));
3267  case Builtin::BI_InterlockedDecrement16:
3268  case Builtin::BI_InterlockedDecrement:
3269  return RValue::get(
3270  EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E));
3271  case Builtin::BI_InterlockedAnd8:
3272  case Builtin::BI_InterlockedAnd16:
3273  case Builtin::BI_InterlockedAnd:
3274  return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E));
3275  case Builtin::BI_InterlockedExchangeAdd8:
3276  case Builtin::BI_InterlockedExchangeAdd16:
3277  case Builtin::BI_InterlockedExchangeAdd:
3278  return RValue::get(
3279  EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E));
3280  case Builtin::BI_InterlockedExchangeSub8:
3281  case Builtin::BI_InterlockedExchangeSub16:
3282  case Builtin::BI_InterlockedExchangeSub:
3283  return RValue::get(
3284  EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E));
3285  case Builtin::BI_InterlockedOr8:
3286  case Builtin::BI_InterlockedOr16:
3287  case Builtin::BI_InterlockedOr:
3288  return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E));
3289  case Builtin::BI_InterlockedXor8:
3290  case Builtin::BI_InterlockedXor16:
3291  case Builtin::BI_InterlockedXor:
3292  return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));
3293 
3294  case Builtin::BI_bittest64:
3295  case Builtin::BI_bittest:
3296  case Builtin::BI_bittestandcomplement64:
3297  case Builtin::BI_bittestandcomplement:
3298  case Builtin::BI_bittestandreset64:
3299  case Builtin::BI_bittestandreset:
3300  case Builtin::BI_bittestandset64:
3301  case Builtin::BI_bittestandset:
3302  case Builtin::BI_interlockedbittestandreset:
3303  case Builtin::BI_interlockedbittestandreset64:
3304  case Builtin::BI_interlockedbittestandset64:
3305  case Builtin::BI_interlockedbittestandset:
3306  case Builtin::BI_interlockedbittestandset_acq:
3307  case Builtin::BI_interlockedbittestandset_rel:
3308  case Builtin::BI_interlockedbittestandset_nf:
3309  case Builtin::BI_interlockedbittestandreset_acq:
3310  case Builtin::BI_interlockedbittestandreset_rel:
3311  case Builtin::BI_interlockedbittestandreset_nf:
3312  return RValue::get(EmitBitTestIntrinsic(*this, BuiltinID, E));
3313 
3314  case Builtin::BI__exception_code:
3315  case Builtin::BI_exception_code:
3316  return RValue::get(EmitSEHExceptionCode());
3317  case Builtin::BI__exception_info:
3318  case Builtin::BI_exception_info:
3319  return RValue::get(EmitSEHExceptionInfo());
3320  case Builtin::BI__abnormal_termination:
3321  case Builtin::BI_abnormal_termination:
3322  return RValue::get(EmitSEHAbnormalTermination());
3323  case Builtin::BI_setjmpex:
3324  if (getTarget().getTriple().isOSMSVCRT())
3325  return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
3326  break;
3327  case Builtin::BI_setjmp:
3328  if (getTarget().getTriple().isOSMSVCRT()) {
3329  if (getTarget().getTriple().getArch() == llvm::Triple::x86)
3330  return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp3, E);
3331  else if (getTarget().getTriple().getArch() == llvm::Triple::aarch64)
3332  return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
3333  return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp, E);
3334  }
3335  break;
3336 
3337  case Builtin::BI__GetExceptionInfo: {
3338  if (llvm::GlobalVariable *GV =
3339  CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType()))
3340  return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy));
3341  break;
3342  }
3343 
3344  case Builtin::BI__fastfail:
3345  return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E));
3346 
3347  case Builtin::BI__builtin_coro_size: {
3348  auto & Context = getContext();
3349  auto SizeTy = Context.getSizeType();
3350  auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy));
3351  Value *F = CGM.getIntrinsic(Intrinsic::coro_size, T);
3352  return RValue::get(Builder.CreateCall(F));
3353  }
3354 
3355  case Builtin::BI__builtin_coro_id:
3356  return EmitCoroutineIntrinsic(E, Intrinsic::coro_id);
3357  case Builtin::BI__builtin_coro_promise:
3358  return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise);
3359  case Builtin::BI__builtin_coro_resume:
3360  return EmitCoroutineIntrinsic(E, Intrinsic::coro_resume);
3361  case Builtin::BI__builtin_coro_frame:
3362  return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame);
3363  case Builtin::BI__builtin_coro_noop:
3364  return EmitCoroutineIntrinsic(E, Intrinsic::coro_noop);
3365  case Builtin::BI__builtin_coro_free:
3366  return EmitCoroutineIntrinsic(E, Intrinsic::coro_free);
3367  case Builtin::BI__builtin_coro_destroy:
3368  return EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy);
3369  case Builtin::BI__builtin_coro_done:
3370  return EmitCoroutineIntrinsic(E, Intrinsic::coro_done);
3371  case Builtin::BI__builtin_coro_alloc:
3372  return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc);
3373  case Builtin::BI__builtin_coro_begin:
3374  return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin);
3375  case Builtin::BI__builtin_coro_end:
3376  return EmitCoroutineIntrinsic(E, Intrinsic::coro_end);
3377  case Builtin::BI__builtin_coro_suspend:
3378  return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend);
3379  case Builtin::BI__builtin_coro_param:
3380  return EmitCoroutineIntrinsic(E, Intrinsic::coro_param);
3381 
3382  // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
3383  case Builtin::BIread_pipe:
3384  case Builtin::BIwrite_pipe: {
3385  Value *Arg0 = EmitScalarExpr(E->getArg(0)),
3386  *Arg1 = EmitScalarExpr(E->getArg(1));
3387  CGOpenCLRuntime OpenCLRT(CGM);
3388  Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
3389  Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
3390 
3391  // Type of the generic packet parameter.
3392  unsigned GenericAS =
3393  getContext().getTargetAddressSpace(LangAS::opencl_generic);
3394  llvm::Type *I8PTy = llvm::PointerType::get(
3395  llvm::Type::getInt8Ty(getLLVMContext()), GenericAS);
3396 
3397  // Testing which overloaded version we should generate the call for.
3398  if (2U == E->getNumArgs()) {
3399  const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
3400  : "__write_pipe_2";
3401  // Creating a generic function type to be able to call with any builtin or
3402  // user defined type.
3403  llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty};
3404  llvm::FunctionType *FTy = llvm::FunctionType::get(
3405  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
3406  Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy);
3407  return RValue::get(
3408  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
3409  {Arg0, BCast, PacketSize, PacketAlign}));
3410  } else {
3411  assert(4 == E->getNumArgs() &&
3412  "Illegal number of parameters to pipe function");
3413  const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
3414  : "__write_pipe_4";
3415 
3416  llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy,
3417  Int32Ty, Int32Ty};
3418  Value *Arg2 = EmitScalarExpr(E->getArg(2)),
3419  *Arg3 = EmitScalarExpr(E->getArg(3));
3420  llvm::FunctionType *FTy = llvm::FunctionType::get(
3421  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
3422  Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy);
3423  // We know the third argument is an integer type, but we may need to cast
3424  // it to i32.
3425  if (Arg2->getType() != Int32Ty)
3426  Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
3427  return RValue::get(Builder.CreateCall(
3428  CGM.CreateRuntimeFunction(FTy, Name),
3429  {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign}));
3430  }
3431  }
3432  // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
3433  // functions
3434  case Builtin::BIreserve_read_pipe:
3435  case Builtin::BIreserve_write_pipe:
3436  case Builtin::BIwork_group_reserve_read_pipe:
3437  case Builtin::BIwork_group_reserve_write_pipe:
3438  case Builtin::BIsub_group_reserve_read_pipe:
3439  case Builtin::BIsub_group_reserve_write_pipe: {
3440  // Composing the mangled name for the function.
3441  const char *Name;
3442  if (BuiltinID == Builtin::BIreserve_read_pipe)
3443  Name = "__reserve_read_pipe";
3444  else if (BuiltinID == Builtin::BIreserve_write_pipe)
3445  Name = "__reserve_write_pipe";
3446  else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
3447  Name = "__work_group_reserve_read_pipe";
3448  else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
3449  Name = "__work_group_reserve_write_pipe";
3450  else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
3451  Name = "__sub_group_reserve_read_pipe";
3452  else
3453  Name = "__sub_group_reserve_write_pipe";
3454 
3455  Value *Arg0 = EmitScalarExpr(E->getArg(0)),
3456  *Arg1 = EmitScalarExpr(E->getArg(1));
3457  llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
3458  CGOpenCLRuntime OpenCLRT(CGM);
3459  Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
3460  Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
3461 
3462  // Building the generic function prototype.
3463  llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty};
3464  llvm::FunctionType *FTy = llvm::FunctionType::get(
3465  ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
3466  // We know the second argument is an integer type, but we may need to cast
3467  // it to i32.
3468  if (Arg1->getType() != Int32Ty)
3469  Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
3470  return RValue::get(
3471  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
3472  {Arg0, Arg1, PacketSize, PacketAlign}));
3473  }
3474  // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
3475  // functions
3476  case Builtin::BIcommit_read_pipe:
3477  case Builtin::BIcommit_write_pipe:
3478  case Builtin::BIwork_group_commit_read_pipe:
3479  case Builtin::BIwork_group_commit_write_pipe:
3480  case Builtin::BIsub_group_commit_read_pipe:
3481  case Builtin::BIsub_group_commit_write_pipe: {
3482  const char *Name;
3483  if (BuiltinID == Builtin::BIcommit_read_pipe)
3484  Name = "__commit_read_pipe";
3485  else if (BuiltinID == Builtin::BIcommit_write_pipe)
3486  Name = "__commit_write_pipe";
3487  else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
3488  Name = "__work_group_commit_read_pipe";
3489  else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
3490  Name = "__work_group_commit_write_pipe";
3491  else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
3492  Name = "__sub_group_commit_read_pipe";
3493  else
3494  Name = "__sub_group_commit_write_pipe";
3495 
3496  Value *Arg0 = EmitScalarExpr(E->getArg(0)),
3497  *Arg1 = EmitScalarExpr(E->getArg(1));
3498  CGOpenCLRuntime OpenCLRT(CGM);
3499  Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
3500  Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
3501 
3502  // Building the generic function prototype.
3503  llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty};
3504  llvm::FunctionType *FTy =
3505  llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
3506  llvm::ArrayRef<llvm::Type *>(ArgTys), false);
3507 
3508  return RValue::get(
3509  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
3510  {Arg0, Arg1, PacketSize, PacketAlign}));
3511  }
3512  // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
3513  case Builtin::BIget_pipe_num_packets:
3514  case Builtin::BIget_pipe_max_packets: {
3515  const char *BaseName;
3516  const PipeType *PipeTy = E->getArg(0)->getType()->getAs<PipeType>();
3517  if (BuiltinID == Builtin::BIget_pipe_num_packets)
3518  BaseName = "__get_pipe_num_packets";
3519  else
3520  BaseName = "__get_pipe_max_packets";
3521  auto Name = std::string(BaseName) +
3522  std::string(PipeTy->isReadOnly() ? "_ro" : "_wo");
3523 
3524  // Building the generic function prototype.
3525  Value *Arg0 = EmitScalarExpr(E->getArg(0));
3526  CGOpenCLRuntime OpenCLRT(CGM);
3527  Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
3528  Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
3529  llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty};
3530  llvm::FunctionType *FTy = llvm::FunctionType::get(
3531  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
3532 
3533  return RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
3534  {Arg0, PacketSize, PacketAlign}));
3535  }
3536 
3537  // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
3538  case Builtin::BIto_global:
3539  case Builtin::BIto_local:
3540  case Builtin::BIto_private: {
3541  auto Arg0 = EmitScalarExpr(E->getArg(0));
3542  auto NewArgT = llvm::PointerType::get(Int8Ty,
3543  CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
3544  auto NewRetT = llvm::PointerType::get(Int8Ty,
3545  CGM.getContext().getTargetAddressSpace(
3547  auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
3548  llvm::Value *NewArg;
3549  if (Arg0->getType()->getPointerAddressSpace() !=
3550  NewArgT->getPointerAddressSpace())
3551  NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT);
3552  else
3553  NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
3554  auto NewName = std::string("__") + E->getDirectCallee()->getName().str();
3555  auto NewCall =
3556  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
3557  return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
3558  ConvertType(E->getType())));
3559  }
3560 
3561  // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
3562  // It contains four different overload formats specified in Table 6.13.17.1.
3563  case Builtin::BIenqueue_kernel: {
3564  StringRef Name; // Generated function call name
3565  unsigned NumArgs = E->getNumArgs();
3566 
3567  llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
3568  llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
3569  getContext().getTargetAddressSpace(LangAS::opencl_generic));
3570 
3571  llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
3572  llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
3573  LValue NDRangeL = EmitAggExprToLValue(E->getArg(2));
3574  llvm::Value *Range = NDRangeL.getAddress().getPointer();
3575  llvm::Type *RangeTy = NDRangeL.getAddress().getType();
3576 
3577  if (NumArgs == 4) {
3578  // The most basic form of the call with parameters:
3579  // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
3580  Name = "__enqueue_kernel_basic";
3581  llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy,
3582  GenericVoidPtrTy};
3583  llvm::FunctionType *FTy = llvm::FunctionType::get(
3584  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
3585 
3586  auto Info =
3587  CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
3588  llvm::Value *Kernel =
3589  Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
3590  llvm::Value *Block =
3591  Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
3592 
3593  AttrBuilder B;
3594  B.addAttribute(Attribute::ByVal);
3595  llvm::AttributeList ByValAttrSet =
3596  llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B);
3597 
3598  auto RTCall =
3599  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet),
3600  {Queue, Flags, Range, Kernel, Block});
3601  RTCall->setAttributes(ByValAttrSet);
3602  return RValue::get(RTCall);
3603  }
3604  assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
3605 
3606  // Create a temporary array to hold the sizes of local pointer arguments
3607  // for the block. \p First is the position of the first size argument.
3608  auto CreateArrayForSizeVar = [=](unsigned First)
3609  -> std::tuple<llvm::Value *, llvm::Value *, llvm::Value *> {
3610  llvm::APInt ArraySize(32, NumArgs - First);
3611  QualType SizeArrayTy = getContext().getConstantArrayType(
3612  getContext().getSizeType(), ArraySize, ArrayType::Normal,
3613  /*IndexTypeQuals=*/0);
3614  auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes");
3615  llvm::Value *TmpPtr = Tmp.getPointer();
3616  llvm::Value *TmpSize = EmitLifetimeStart(
3617  CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), TmpPtr);
3618  llvm::Value *ElemPtr;
3619  // Each of the following arguments specifies the size of the corresponding
3620  // argument passed to the enqueued block.
3621  auto *Zero = llvm::ConstantInt::get(IntTy, 0);
3622  for (unsigned I = First; I < NumArgs; ++I) {
3623  auto *Index = llvm::ConstantInt::get(IntTy, I - First);
3624  auto *GEP = Builder.CreateGEP(TmpPtr, {Zero, Index});
3625  if (I == First)
3626  ElemPtr = GEP;
3627  auto *V =
3628  Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy);
3629  Builder.CreateAlignedStore(
3630  V, GEP, CGM.getDataLayout().getPrefTypeAlignment(SizeTy));
3631  }
3632  return std::tie(ElemPtr, TmpSize, TmpPtr);
3633  };
3634 
3635  // Could have events and/or varargs.
3636  if (E->getArg(3)->getType()->isBlockPointerType()) {
3637  // No events passed, but has variadic arguments.
3638  Name = "__enqueue_kernel_varargs";
3639  auto Info =
3640  CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
3641  llvm::Value *Kernel =
3642  Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
3643  auto *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
3644  llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
3645  std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(4);
3646 
3647  // Create a vector of the arguments, as well as a constant value to
3648  // express to the runtime the number of variadic arguments.
3649  std::vector<llvm::Value *> Args = {
3650  Queue, Flags, Range,
3651  Kernel, Block, ConstantInt::get(IntTy, NumArgs - 4),
3652  ElemPtr};
3653  std::vector<llvm::Type *> ArgTys = {
3654  QueueTy, IntTy, RangeTy, GenericVoidPtrTy,
3655  GenericVoidPtrTy, IntTy, ElemPtr->getType()};
3656 
3657  llvm::FunctionType *FTy = llvm::FunctionType::get(
3658  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
3659  auto Call =
3660  RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
3662  if (TmpSize)
3663  EmitLifetimeEnd(TmpSize, TmpPtr);
3664  return Call;
3665  }
3666  // Any calls now have event arguments passed.
3667  if (NumArgs >= 7) {
3668  llvm::Type *EventTy = ConvertType(getContext().OCLClkEventTy);
3669  llvm::Type *EventPtrTy = EventTy->getPointerTo(
3670  CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
3671 
3672  llvm::Value *NumEvents =
3673  Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty);
3674  llvm::Value *EventList =
3675  E->getArg(4)->getType()->isArrayType()
3676  ? EmitArrayToPointerDecay(E->getArg(4)).getPointer()
3677  : EmitScalarExpr(E->getArg(4));
3678  llvm::Value *ClkEvent = EmitScalarExpr(E->getArg(5));
3679  // Convert to generic address space.
3680  EventList = Builder.CreatePointerCast(EventList, EventPtrTy);
3681  ClkEvent = ClkEvent->getType()->isIntegerTy()
3682  ? Builder.CreateBitOrPointerCast(ClkEvent, EventPtrTy)
3683  : Builder.CreatePointerCast(ClkEvent, EventPtrTy);
3684  auto Info =
3685  CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(6));
3686  llvm::Value *Kernel =
3687  Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
3688  llvm::Value *Block =
3689  Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
3690 
3691  std::vector<llvm::Type *> ArgTys = {
3692  QueueTy, Int32Ty, RangeTy, Int32Ty,
3693  EventPtrTy, EventPtrTy, GenericVoidPtrTy, GenericVoidPtrTy};
3694 
3695  std::vector<llvm::Value *> Args = {Queue, Flags, Range, NumEvents,
3696  EventList, ClkEvent, Kernel, Block};
3697 
3698  if (NumArgs == 7) {
3699  // Has events but no variadics.
3700  Name = "__enqueue_kernel_basic_events";
3701  llvm::FunctionType *FTy = llvm::FunctionType::get(
3702  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
3703  return RValue::get(
3704  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
3706  }
3707  // Has event info and variadics
3708  // Pass the number of variadics to the runtime function too.
3709  Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
3710  ArgTys.push_back(Int32Ty);
3711  Name = "__enqueue_kernel_events_varargs";
3712 
3713  llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
3714  std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(7);
3715  Args.push_back(ElemPtr);
3716  ArgTys.push_back(ElemPtr->getType());
3717 
3718  llvm::FunctionType *FTy = llvm::FunctionType::get(
3719  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
3720  auto Call =
3721  RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
3723  if (TmpSize)
3724  EmitLifetimeEnd(TmpSize, TmpPtr);
3725  return Call;
3726  }
3727  LLVM_FALLTHROUGH;
3728  }
3729  // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
3730  // parameter.
3731  case Builtin::BIget_kernel_work_group_size: {
3732  llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
3733  getContext().getTargetAddressSpace(LangAS::opencl_generic));
3734  auto Info =
3735  CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
3736  Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
3737  Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
3738  return RValue::get(Builder.CreateCall(
3739  CGM.CreateRuntimeFunction(
3740  llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
3741  false),
3742  "__get_kernel_work_group_size_impl"),
3743  {Kernel, Arg}));
3744  }
3745  case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
3746  llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
3747  getContext().getTargetAddressSpace(LangAS::opencl_generic));
3748  auto Info =
3749  CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
3750  Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
3751  Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
3752  return RValue::get(Builder.CreateCall(
3753  CGM.CreateRuntimeFunction(
3754  llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
3755  false),
3756  "__get_kernel_preferred_work_group_size_multiple_impl"),
3757  {Kernel, Arg}));
3758  }
3759  case Builtin::BIget_kernel_max_sub_group_size_for_ndrange:
3760  case Builtin::BIget_kernel_sub_group_count_for_ndrange: {
3761  llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
3762  getContext().getTargetAddressSpace(LangAS::opencl_generic));
3763  LValue NDRangeL = EmitAggExprToLValue(E->getArg(0));
3764  llvm::Value *NDRange = NDRangeL.getAddress().getPointer();
3765  auto Info =
3766  CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(1));
3767  Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
3768  Value *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
3769  const char *Name =
3770  BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange
3771  ? "__get_kernel_max_sub_group_size_for_ndrange_impl"
3772  : "__get_kernel_sub_group_count_for_ndrange_impl";
3773  return RValue::get(Builder.CreateCall(
3774  CGM.CreateRuntimeFunction(
3775  llvm::FunctionType::get(
3776  IntTy, {NDRange->getType(), GenericVoidPtrTy, GenericVoidPtrTy},
3777  false),
3778  Name),
3779  {NDRange, Kernel, Block}));
3780  }
3781 
3782  case Builtin::BI__builtin_store_half:
3783  case Builtin::BI__builtin_store_halff: {
3784  Value *Val = EmitScalarExpr(E->getArg(0));
3785  Address Address = EmitPointerWithAlignment(E->getArg(1));
3786  Value *HalfVal = Builder.CreateFPTrunc(Val, Builder.getHalfTy());
3787  return RValue::get(Builder.CreateStore(HalfVal, Address));
3788  }
3789  case Builtin::BI__builtin_load_half: {
3790  Address Address = EmitPointerWithAlignment(E->getArg(0));
3791  Value *HalfVal = Builder.CreateLoad(Address);
3792  return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getDoubleTy()));
3793  }
3794  case Builtin::BI__builtin_load_halff: {
3795  Address Address = EmitPointerWithAlignment(E->getArg(0));
3796  Value *HalfVal = Builder.CreateLoad(Address);
3797  return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy()));
3798  }
3799  case Builtin::BIprintf:
3800  if (getTarget().getTriple().isNVPTX())
3801  return EmitNVPTXDevicePrintfCallExpr(E, ReturnValue);
3802  break;
3803  case Builtin::BI__builtin_canonicalize:
3804  case Builtin::BI__builtin_canonicalizef:
3805  case Builtin::BI__builtin_canonicalizel:
3806  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize));
3807 
3808  case Builtin::BI__builtin_thread_pointer: {
3809  if (!getContext().getTargetInfo().isTLSSupported())
3810  CGM.ErrorUnsupported(E, "__builtin_thread_pointer");
3811  // Fall through - it's already mapped to the intrinsic by GCCBuiltin.
3812  break;
3813  }
3814  case Builtin::BI__builtin_os_log_format:
3815  return emitBuiltinOSLogFormat(*E);
3816 
3817  case Builtin::BI__xray_customevent: {
3818  if (!ShouldXRayInstrumentFunction())
3819  return RValue::getIgnored();
3820 
3821  if (!CGM.getCodeGenOpts().XRayInstrumentationBundle.has(
3823  return RValue::getIgnored();
3824 
3825  if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
3826  if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayCustomEvents())
3827  return RValue::getIgnored();
3828 
3829  Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent);
3830  auto FTy = F->getFunctionType();
3831  auto Arg0 = E->getArg(0);
3832  auto Arg0Val = EmitScalarExpr(Arg0);
3833  auto Arg0Ty = Arg0->getType();
3834  auto PTy0 = FTy->getParamType(0);
3835  if (PTy0 != Arg0Val->getType()) {
3836  if (Arg0Ty->isArrayType())
3837  Arg0Val = EmitArrayToPointerDecay(Arg0).getPointer();
3838  else
3839  Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0);
3840  }
3841  auto Arg1 = EmitScalarExpr(E->getArg(1));
3842  auto PTy1 = FTy->getParamType(1);
3843  if (PTy1 != Arg1->getType())
3844  Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1);
3845  return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1}));
3846  }
3847 
3848  case Builtin::BI__xray_typedevent: {
3849  // TODO: There should be a way to always emit events even if the current
3850  // function is not instrumented. Losing events in a stream can cripple
3851  // a trace.
3852  if (!ShouldXRayInstrumentFunction())
3853  return RValue::getIgnored();
3854 
3855  if (!CGM.getCodeGenOpts().XRayInstrumentationBundle.has(
3857  return RValue::getIgnored();
3858 
3859  if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
3860  if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayTypedEvents())
3861  return RValue::getIgnored();
3862 
3863  Function *F = CGM.getIntrinsic(Intrinsic::xray_typedevent);
3864  auto FTy = F->getFunctionType();
3865  auto Arg0 = EmitScalarExpr(E->getArg(0));
3866  auto PTy0 = FTy->getParamType(0);
3867  if (PTy0 != Arg0->getType())
3868  Arg0 = Builder.CreateTruncOrBitCast(Arg0, PTy0);
3869  auto Arg1 = E->getArg(1);
3870  auto Arg1Val = EmitScalarExpr(Arg1);
3871  auto Arg1Ty = Arg1->getType();
3872  auto PTy1 = FTy->getParamType(1);
3873  if (PTy1 != Arg1Val->getType()) {
3874  if (Arg1Ty->isArrayType())
3875  Arg1Val = EmitArrayToPointerDecay(Arg1).getPointer();
3876  else
3877  Arg1Val = Builder.CreatePointerCast(Arg1Val, PTy1);
3878  }
3879  auto Arg2 = EmitScalarExpr(E->getArg(2));
3880  auto PTy2 = FTy->getParamType(2);
3881  if (PTy2 != Arg2->getType())
3882  Arg2 = Builder.CreateTruncOrBitCast(Arg2, PTy2);
3883  return RValue::get(Builder.CreateCall(F, {Arg0, Arg1Val, Arg2}));
3884  }
3885 
3886  case Builtin::BI__builtin_ms_va_start:
3887  case Builtin::BI__builtin_ms_va_end:
3888  return RValue::get(
3889  EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(),
3890  BuiltinID == Builtin::BI__builtin_ms_va_start));
3891 
3892  case Builtin::BI__builtin_ms_va_copy: {
3893  // Lower this manually. We can't reliably determine whether or not any
3894  // given va_copy() is for a Win64 va_list from the calling convention
3895  // alone, because it's legal to do this from a System V ABI function.
3896  // With opaque pointer types, we won't have enough information in LLVM
3897  // IR to determine this from the argument types, either. Best to do it
3898  // now, while we have enough information.
3899  Address DestAddr = EmitMSVAListRef(E->getArg(0));
3900  Address SrcAddr = EmitMSVAListRef(E->getArg(1));
3901 
3902  llvm::Type *BPP = Int8PtrPtrTy;
3903 
3904  DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"),
3905  DestAddr.getAlignment());
3906  SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"),
3907  SrcAddr.getAlignment());
3908 
3909  Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
3910  return RValue::get(Builder.CreateStore(ArgPtr, DestAddr));
3911  }
3912  }
3913 
3914  // If this is an alias for a lib function (e.g. __builtin_sin), emit
3915  // the call using the normal call path, but using the unmangled
3916  // version of the function name.
3917  if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
3918  return emitLibraryCall(*this, FD, E,
3919  CGM.getBuiltinLibFunction(FD, BuiltinID));
3920 
3921  // If this is a predefined lib function (e.g. malloc), emit the call
3922  // using exactly the normal call path.
3923  if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
3924  return emitLibraryCall(*this, FD, E,
3925  cast<llvm::Constant>(EmitScalarExpr(E->getCallee())));
3926 
3927  // Check that a call to a target specific builtin has the correct target
3928  // features.
3929  // This is down here to avoid non-target specific builtins, however, if
3930  // generic builtins start to require generic target features then we
3931  // can move this up to the beginning of the function.
3932  checkTargetFeatures(E, FD);
3933 
3934  if (unsigned VectorWidth = getContext().BuiltinInfo.getRequiredVectorWidth(BuiltinID))
3935  LargestVectorWidth = std::max(LargestVectorWidth, VectorWidth);
3936 
3937  // See if we have a target specific intrinsic.
3938  const char *Name = getContext().BuiltinInfo.getName(BuiltinID);
3939  Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
3940  StringRef Prefix =
3941  llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());
3942  if (!Prefix.empty()) {
3943  IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix.data(), Name);
3944  // NOTE we don't need to perform a compatibility flag check here since the
3945  // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
3946  // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
3947  if (IntrinsicID == Intrinsic::not_intrinsic)
3948  IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name);
3949  }
3950 
3951  if (IntrinsicID != Intrinsic::not_intrinsic) {
3953 
3954  // Find out if any arguments are required to be integer constant
3955  // expressions.
3956  unsigned ICEArguments = 0;
3958  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
3959  assert(Error == ASTContext::GE_None && "Should not codegen an error");
3960 
3961  Function *F = CGM.getIntrinsic(IntrinsicID);
3962  llvm::FunctionType *FTy = F->getFunctionType();
3963 
3964  for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
3965  Value *ArgValue;
3966  // If this is a normal argument, just emit it as a scalar.
3967  if ((ICEArguments & (1 << i)) == 0) {
3968  ArgValue = EmitScalarExpr(E->getArg(i));
3969  } else {
3970  // If this is required to be a constant, constant fold it so that we
3971  // know that the generated intrinsic gets a ConstantInt.
3972  llvm::APSInt Result;
3973  bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext());
3974  assert(IsConst && "Constant arg isn't actually constant?");
3975  (void)IsConst;
3976  ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result);
3977  }
3978 
3979  // If the intrinsic arg type is different from the builtin arg type
3980  // we need to do a bit cast.
3981  llvm::Type *PTy = FTy->getParamType(i);
3982  if (PTy != ArgValue->getType()) {
3983  // XXX - vector of pointers?
3984  if (auto *PtrTy = dyn_cast<llvm::PointerType>(PTy)) {
3985  if (PtrTy->getAddressSpace() !=
3986  ArgValue->getType()->getPointerAddressSpace()) {
3987  ArgValue = Builder.CreateAddrSpaceCast(
3988  ArgValue,
3989  ArgValue->getType()->getPointerTo(PtrTy->getAddressSpace()));
3990  }
3991  }
3992 
3993  assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
3994  "Must be able to losslessly bit cast to param");
3995  ArgValue = Builder.CreateBitCast(ArgValue, PTy);
3996  }
3997 
3998  Args.push_back(ArgValue);
3999  }
4000 
4001  Value *V = Builder.CreateCall(F, Args);
4002  QualType BuiltinRetType = E->getType();
4003 
4004  llvm::Type *RetTy = VoidTy;
4005  if (!BuiltinRetType->isVoidType())
4006  RetTy = ConvertType(BuiltinRetType);
4007 
4008  if (RetTy != V->getType()) {
4009  // XXX - vector of pointers?
4010  if (auto *PtrTy = dyn_cast<llvm::PointerType>(RetTy)) {
4011  if (PtrTy->getAddressSpace() != V->getType()->getPointerAddressSpace()) {
4012  V = Builder.CreateAddrSpaceCast(
4013  V, V->getType()->getPointerTo(PtrTy->getAddressSpace()));
4014  }
4015  }
4016 
4017  assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
4018  "Must be able to losslessly bit cast result type");
4019  V = Builder.CreateBitCast(V, RetTy);
4020  }
4021 
4022  return RValue::get(V);
4023  }
4024 
4025  // See if we have a target specific builtin that needs to be lowered.
4026  if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E))
4027  return RValue::get(V);
4028 
4029  ErrorUnsupported(E, "builtin function");
4030 
4031  // Unknown builtin, for now just dump it out and return undef.
4032  return GetUndefRValue(E->getType());
4033 }
4034 
4036  unsigned BuiltinID, const CallExpr *E,
4037  llvm::Triple::ArchType Arch) {
4038  switch (Arch) {
4039  case llvm::Triple::arm:
4040  case llvm::Triple::armeb:
4041  case llvm::Triple::thumb:
4042  case llvm::Triple::thumbeb:
4043  return CGF->EmitARMBuiltinExpr(BuiltinID, E, Arch);
4044  case llvm::Triple::aarch64:
4045  case llvm::Triple::aarch64_be:
4046  return CGF->EmitAArch64BuiltinExpr(BuiltinID, E, Arch);
4047  case llvm::Triple::x86:
4048  case llvm::Triple::x86_64:
4049  return CGF->EmitX86BuiltinExpr(BuiltinID, E);
4050  case llvm::Triple::ppc:
4051  case llvm::Triple::ppc64:
4052  case llvm::Triple::ppc64le:
4053  return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
4054  case llvm::Triple::r600:
4055  case llvm::Triple::amdgcn:
4056  return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
4057  case llvm::Triple::systemz:
4058  return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
4059  case llvm::Triple::nvptx:
4060  case llvm::Triple::nvptx64:
4061  return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
4062  case llvm::Triple::wasm32:
4063  case llvm::Triple::wasm64:
4064  return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
4065  case llvm::Triple::hexagon:
4066  return CGF->EmitHexagonBuiltinExpr(BuiltinID, E);
4067  default:
4068  return nullptr;
4069  }
4070 }
4071 
4073  const CallExpr *E) {
4074  if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
4075  assert(getContext().getAuxTargetInfo() && "Missing aux target info");
4077  this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
4078  getContext().getAuxTargetInfo()->getTriple().getArch());
4079  }
4080 
4081  return EmitTargetArchBuiltinExpr(this, BuiltinID, E,
4082  getTarget().getTriple().getArch());
4083 }
4084 
4085 static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
4086  NeonTypeFlags TypeFlags,
4087  bool HasLegalHalfType=true,
4088  bool V1Ty=false) {
4089  int IsQuad = TypeFlags.isQuad();
4090  switch (TypeFlags.getEltType()) {
4091  case NeonTypeFlags::Int8:
4092  case NeonTypeFlags::Poly8:
4093  return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
4094  case NeonTypeFlags::Int16:
4095  case NeonTypeFlags::Poly16:
4096  return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
4098  if (HasLegalHalfType)
4099  return llvm::VectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad));
4100  else
4101  return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
4102  case NeonTypeFlags::Int32:
4103  return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
4104  case NeonTypeFlags::Int64:
4105  case NeonTypeFlags::Poly64:
4106  return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
4108  // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
4109  // There is a lot of i128 and f128 API missing.
4110  // so we use v16i8 to represent poly128 and get pattern matched.
4111  return llvm::VectorType::get(CGF->Int8Ty, 16);
4113  return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
4115  return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
4116  }
4117  llvm_unreachable("Unknown vector element type!");
4118 }
4119 
4120 static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
4121  NeonTypeFlags IntTypeFlags) {
4122  int IsQuad = IntTypeFlags.isQuad();
4123  switch (IntTypeFlags.getEltType()) {
4124  case NeonTypeFlags::Int16:
4125  return llvm::VectorType::get(CGF->HalfTy, (4 << IsQuad));
4126  case NeonTypeFlags::Int32:
4127  return llvm::VectorType::get(CGF->FloatTy, (2 << IsQuad));
4128  case NeonTypeFlags::Int64:
4129  return llvm::VectorType::get(CGF->DoubleTy, (1 << IsQuad));
4130  default:
4131  llvm_unreachable("Type can't be converted to floating-point!");
4132  }
4133 }
4134 
4136  unsigned nElts = V->getType()->getVectorNumElements();
4137  Value* SV = llvm::ConstantVector::getSplat(nElts, C);
4138  return Builder.CreateShuffleVector(V, V, SV, "lane");
4139 }
4140 
4142  const char *name,
4143  unsigned shift, bool rightshift) {
4144  unsigned j = 0;
4145  for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
4146  ai != ae; ++ai, ++j)
4147  if (shift > 0 && shift == j)
4148  Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
4149  else
4150  Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
4151 
4152  return Builder.CreateCall(F, Ops, name);
4153 }
4154 
4156  bool neg) {
4157  int SV = cast<ConstantInt>(V)->getSExtValue();
4158  return ConstantInt::get(Ty, neg ? -SV : SV);
4159 }
4160 
4161 // Right-shift a vector by a constant.
4163  llvm::Type *Ty, bool usgn,
4164  const char *name) {
4165  llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
4166 
4167  int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
4168  int EltSize = VTy->getScalarSizeInBits();
4169 
4170  Vec = Builder.CreateBitCast(Vec, Ty);
4171 
4172  // lshr/ashr are undefined when the shift amount is equal to the vector
4173  // element size.
4174  if (ShiftAmt == EltSize) {
4175  if (usgn) {
4176  // Right-shifting an unsigned value by its size yields 0.
4177  return llvm::ConstantAggregateZero::get(VTy);
4178  } else {
4179  // Right-shifting a signed value by its size is equivalent
4180  // to a shift of size-1.
4181  --ShiftAmt;
4182  Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
4183  }
4184  }
4185 
4186  Shift = EmitNeonShiftVector(Shift, Ty, false);
4187  if (usgn)
4188  return Builder.CreateLShr(Vec, Shift, name);
4189  else
4190  return Builder.CreateAShr(Vec, Shift, name);
4191 }
4192 
4193 enum {
4194  AddRetType = (1 << 0),
4195  Add1ArgType = (1 << 1),
4196  Add2ArgTypes = (1 << 2),
4197 
4198  VectorizeRetType = (1 << 3),
4199  VectorizeArgTypes = (1 << 4),
4200 
4201  InventFloatType = (1 << 5),
4202  UnsignedAlts = (1 << 6),
4203 
4204  Use64BitVectors = (1 << 7),
4205  Use128BitVectors = (1 << 8),
4206 
4213 };
4214 
4215 namespace {
4216 struct NeonIntrinsicInfo {
4217  const char *NameHint;
4218  unsigned BuiltinID;
4219  unsigned LLVMIntrinsic;
4220  unsigned AltLLVMIntrinsic;
4221  unsigned TypeModifier;
4222 
4223  bool operator<(unsigned RHSBuiltinID) const {
4224  return BuiltinID < RHSBuiltinID;
4225  }
4226  bool operator<(const NeonIntrinsicInfo &TE) const {
4227  return BuiltinID < TE.BuiltinID;
4228  }
4229 };
4230 } // end anonymous namespace
4231 
4232 #define NEONMAP0(NameBase) \
4233  { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
4234 
4235 #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
4236  { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
4237  Intrinsic::LLVMIntrinsic, 0, TypeModifier }
4238 
4239 #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
4240  { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
4241  Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
4242  TypeModifier }
4243 
4244 static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
4245  NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
4246  NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
4247  NEONMAP1(vabs_v, arm_neon_vabs, 0),
4248  NEONMAP1(vabsq_v, arm_neon_vabs, 0),
4249  NEONMAP0(vaddhn_v),
4250  NEONMAP1(vaesdq_v, arm_neon_aesd, 0),
4251  NEONMAP1(vaeseq_v, arm_neon_aese, 0),
4252  NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0),
4253  NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0),
4254  NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
4255  NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
4256  NEONMAP1(vcage_v, arm_neon_vacge, 0),
4257  NEONMAP1(vcageq_v, arm_neon_vacge, 0),
4258  NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
4259  NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
4260  NEONMAP1(vcale_v, arm_neon_vacge, 0),
4261  NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
4262  NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
4263  NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
4264  NEONMAP0(vceqz_v),
4265  NEONMAP0(vceqzq_v),
4266  NEONMAP0(vcgez_v),
4267  NEONMAP0(vcgezq_v),
4268  NEONMAP0(vcgtz_v),
4269  NEONMAP0(vcgtzq_v),
4270  NEONMAP0(vclez_v),
4271  NEONMAP0(vclezq_v),
4272  NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
4273  NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
4274  NEONMAP0(vcltz_v),
4275  NEONMAP0(vcltzq_v),
4276  NEONMAP1(vclz_v, ctlz, Add1ArgType),
4277  NEONMAP1(vclzq_v, ctlz, Add1ArgType),
4278  NEONMAP1(vcnt_v, ctpop, Add1ArgType),
4279  NEONMAP1(vcntq_v, ctpop, Add1ArgType),
4280  NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
4281  NEONMAP0(vcvt_f16_v),
4282  NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
4283  NEONMAP0(vcvt_f32_v),
4284  NEONMAP2(vcvt_n_f16_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
4285  NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
4286  NEONMAP1(vcvt_n_s16_v, arm_neon_vcvtfp2fxs, 0),
4287  NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
4288  NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
4289  NEONMAP1(vcvt_n_u16_v, arm_neon_vcvtfp2fxu, 0),
4290  NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
4291  NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
4292  NEONMAP0(vcvt_s16_v),
4293  NEONMAP0(vcvt_s32_v),
4294  NEONMAP0(vcvt_s64_v),
4295  NEONMAP0(vcvt_u16_v),
4296  NEONMAP0(vcvt_u32_v),
4297  NEONMAP0(vcvt_u64_v),
4298  NEONMAP1(vcvta_s16_v, arm_neon_vcvtas, 0),
4299  NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
4300  NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
4301  NEONMAP1(vcvta_u16_v, arm_neon_vcvtau, 0),
4302  NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
4303  NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
4304  NEONMAP1(vcvtaq_s16_v, arm_neon_vcvtas, 0),
4305  NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
4306  NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
4307  NEONMAP1(vcvtaq_u16_v, arm_neon_vcvtau, 0),
4308  NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
4309  NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
4310  NEONMAP1(vcvtm_s16_v, arm_neon_vcvtms, 0),
4311  NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
4312  NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
4313  NEONMAP1(vcvtm_u16_v, arm_neon_vcvtmu, 0),
4314  NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
4315  NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
4316  NEONMAP1(vcvtmq_s16_v, arm_neon_vcvtms, 0),
4317  NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
4318  NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
4319  NEONMAP1(vcvtmq_u16_v, arm_neon_vcvtmu, 0),
4320  NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
4321  NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
4322  NEONMAP1(vcvtn_s16_v, arm_neon_vcvtns, 0),
4323  NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
4324  NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
4325  NEONMAP1(vcvtn_u16_v, arm_neon_vcvtnu, 0),
4326  NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
4327  NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
4328  NEONMAP1(vcvtnq_s16_v, arm_neon_vcvtns, 0),
4329  NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
4330  NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
4331  NEONMAP1(vcvtnq_u16_v, arm_neon_vcvtnu, 0),
4332  NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
4333  NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
4334  NEONMAP1(vcvtp_s16_v, arm_neon_vcvtps, 0),
4335  NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
4336  NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
4337  NEONMAP1(vcvtp_u16_v, arm_neon_vcvtpu, 0),
4338  NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
4339  NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
4340  NEONMAP1(vcvtpq_s16_v, arm_neon_vcvtps, 0),
4341  NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
4342  NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
4343  NEONMAP1(vcvtpq_u16_v, arm_neon_vcvtpu, 0),
4344  NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
4345  NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
4346  NEONMAP0(vcvtq_f16_v),
4347  NEONMAP0(vcvtq_f32_v),
4348  NEONMAP2(vcvtq_n_f16_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
4349  NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
4350  NEONMAP1(vcvtq_n_s16_v, arm_neon_vcvtfp2fxs, 0),
4351  NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
4352  NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
4353  NEONMAP1(vcvtq_n_u16_v, arm_neon_vcvtfp2fxu, 0),
4354  NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
4355  NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
4356  NEONMAP0(vcvtq_s16_v),
4357  NEONMAP0(vcvtq_s32_v),
4358  NEONMAP0(vcvtq_s64_v),
4359  NEONMAP0(vcvtq_u16_v),
4360  NEONMAP0(vcvtq_u32_v),
4361  NEONMAP0(vcvtq_u64_v),
4362  NEONMAP2(vdot_v, arm_neon_udot, arm_neon_sdot, 0),
4363  NEONMAP2(vdotq_v, arm_neon_udot, arm_neon_sdot, 0),
4364  NEONMAP0(vext_v),
4365  NEONMAP0(vextq_v),
4366  NEONMAP0(vfma_v),
4367  NEONMAP0(vfmaq_v),
4368  NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
4369  NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
4370  NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
4371  NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
4372  NEONMAP0(vld1_dup_v),
4373  NEONMAP1(vld1_v, arm_neon_vld1, 0),
4374  NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
4375  NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
4376  NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
4377  NEONMAP0(vld1q_dup_v),
4378  NEONMAP1(vld1q_v, arm_neon_vld1, 0),
4379  NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
4380  NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
4381  NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
4382  NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
4383  NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
4384  NEONMAP1(vld2_v, arm_neon_vld2, 0),
4385  NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
4386  NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
4387  NEONMAP1(vld2q_v, arm_neon_vld2, 0),
4388  NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
4389  NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
4390  NEONMAP1(vld3_v, arm_neon_vld3, 0),
4391  NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
4392  NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
4393  NEONMAP1(vld3q_v, arm_neon_vld3, 0),
4394  NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
4395  NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
4396  NEONMAP1(vld4_v, arm_neon_vld4, 0),
4397  NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
4398  NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
4399  NEONMAP1(vld4q_v, arm_neon_vld4, 0),
4400  NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
4401  NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
4402  NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
4403  NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
4404  NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
4405  NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
4406  NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
4407  NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
4408  NEONMAP0(vmovl_v),
4409  NEONMAP0(vmovn_v),
4410  NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
4411  NEONMAP0(vmull_v),
4412  NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
4413  NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
4414  NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
4415  NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
4416  NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
4417  NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
4418  NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
4419  NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
4420  NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
4421  NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
4422  NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
4423  NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
4424  NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
4425  NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0),
4426  NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0),
4427  NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
4428  NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
4429  NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
4430  NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
4431  NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
4432  NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
4433  NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
4434  NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
4435  NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
4436  NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
4437  NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
4438  NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
4439  NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
4440  NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
4441  NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
4442  NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
4443  NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
4444  NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
4445  NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
4446  NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
4447  NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
4448  NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
4449  NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
4450  NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
4451  NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
4452  NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
4453  NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
4454  NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
4455  NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
4456  NEONMAP0(vrndi_v),
4457  NEONMAP0(vrndiq_v),
4458  NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
4459  NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
4460  NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
4461  NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
4462  NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
4463  NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
4464  NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
4465  NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
4466  NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
4467  NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
4468  NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
4469  NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
4470  NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
4471  NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
4472  NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
4473  NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
4474  NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
4475  NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
4476  NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0),
4477  NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0),
4478  NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0),
4479  NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0),
4480  NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0),
4481  NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0),
4482  NEONMAP0(vshl_n_v),
4483  NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
4484  NEONMAP0(vshll_n_v),
4485  NEONMAP0(vshlq_n_v),
4486  NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
4487  NEONMAP0(vshr_n_v),
4488  NEONMAP0(vshrn_n_v),
4489  NEONMAP0(vshrq_n_v),
4490  NEONMAP1(vst1_v, arm_neon_vst1, 0),
4491  NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
4492  NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
4493  NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
4494  NEONMAP1(vst1q_v, arm_neon_vst1, 0),
4495  NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
4496  NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
4497  NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
4498  NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
4499  NEONMAP1(vst2_v, arm_neon_vst2, 0),
4500  NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
4501  NEONMAP1(vst2q_v, arm_neon_vst2, 0),
4502  NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
4503  NEONMAP1(vst3_v, arm_neon_vst3, 0),
4504  NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
4505  NEONMAP1(vst3q_v, arm_neon_vst3, 0),
4506  NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
4507  NEONMAP1(vst4_v, arm_neon_vst4, 0),
4508  NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
4509  NEONMAP1(vst4q_v, arm_neon_vst4, 0),
4510  NEONMAP0(vsubhn_v),
4511  NEONMAP0(vtrn_v),
4512  NEONMAP0(vtrnq_v),
4513  NEONMAP0(vtst_v),
4514  NEONMAP0(vtstq_v),
4515  NEONMAP0(vuzp_v),
4516  NEONMAP0(vuzpq_v),
4517  NEONMAP0(vzip_v),
4518  NEONMAP0(vzipq_v)
4519 };
4520 
4521 static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
4522  NEONMAP1(vabs_v, aarch64_neon_abs, 0),
4523  NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
4524  NEONMAP0(vaddhn_v),
4525  NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0),
4526  NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0),
4527  NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0),
4528  NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0),
4529  NEONMAP1(vcage_v, aarch64_neon_facge, 0),
4530  NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
4531  NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
4532  NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
4533  NEONMAP1(vcale_v, aarch64_neon_facge, 0),
4534  NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
4535  NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
4536  NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
4537  NEONMAP0(vceqz_v),
4538  NEONMAP0(vceqzq_v),
4539  NEONMAP0(vcgez_v),
4540  NEONMAP0(vcgezq_v),
4541  NEONMAP0(vcgtz_v),
4542  NEONMAP0(vcgtzq_v),
4543  NEONMAP0(vclez_v),
4544  NEONMAP0(vclezq_v),
4545  NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
4546  NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
4547  NEONMAP0(vcltz_v),
4548  NEONMAP0(vcltzq_v),
4549  NEONMAP1(vclz_v, ctlz, Add1ArgType),
4550  NEONMAP1(vclzq_v, ctlz, Add1ArgType),
4551  NEONMAP1(vcnt_v, ctpop, Add1ArgType),
4552  NEONMAP1(vcntq_v, ctpop, Add1ArgType),
4553  NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
4554  NEONMAP0(vcvt_f16_v),
4555  NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
4556  NEONMAP0(vcvt_f32_v),
4557  NEONMAP2(vcvt_n_f16_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
4558  NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
4559  NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
4560  NEONMAP1(vcvt_n_s16_v, aarch64_neon_vcvtfp2fxs, 0),
4561  NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
4562  NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
4563  NEONMAP1(vcvt_n_u16_v, aarch64_neon_vcvtfp2fxu, 0),
4564  NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
4565  NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
4566  NEONMAP0(vcvtq_f16_v),
4567  NEONMAP0(vcvtq_f32_v),
4568  NEONMAP2(vcvtq_n_f16_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
4569  NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
4570  NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
4571  NEONMAP1(vcvtq_n_s16_v, aarch64_neon_vcvtfp2fxs, 0),
4572  NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
4573  NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
4574  NEONMAP1(vcvtq_n_u16_v, aarch64_neon_vcvtfp2fxu, 0),
4575  NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
4576  NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
4577  NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
4578  NEONMAP2(vdot_v, aarch64_neon_udot, aarch64_neon_sdot, 0),
4579  NEONMAP2(vdotq_v, aarch64_neon_udot, aarch64_neon_sdot, 0),
4580  NEONMAP0(vext_v),
4581  NEONMAP0(vextq_v),
4582  NEONMAP0(vfma_v),
4583  NEONMAP0(vfmaq_v),
4584  NEONMAP1(vfmlal_high_v, aarch64_neon_fmlal2, 0),
4585  NEONMAP1(vfmlal_low_v, aarch64_neon_fmlal, 0),
4586  NEONMAP1(vfmlalq_high_v, aarch64_neon_fmlal2, 0),
4587  NEONMAP1(vfmlalq_low_v, aarch64_neon_fmlal, 0),
4588  NEONMAP1(vfmlsl_high_v, aarch64_neon_fmlsl2, 0),
4589  NEONMAP1(vfmlsl_low_v, aarch64_neon_fmlsl, 0),
4590  NEONMAP1(vfmlslq_high_v, aarch64_neon_fmlsl2, 0),
4591  NEONMAP1(vfmlslq_low_v, aarch64_neon_fmlsl, 0),
4592  NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
4593  NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
4594  NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
4595  NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
4596  NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0),
4597  NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0),
4598  NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0),
4599  NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0),
4600  NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0),
4601  NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0),
4602  NEONMAP0(vmovl_v),
4603  NEONMAP0(vmovn_v),
4604  NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
4605  NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
4606  NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
4607  NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
4608  NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
4609  NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
4610  NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
4611  NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
4612  NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
4613  NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
4614  NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
4615  NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
4616  NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
4617  NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
4618  NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
4619  NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
4620  NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
4621  NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
4622  NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
4623  NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
4624  NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
4625  NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
4626  NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
4627  NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
4628  NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
4629  NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
4630  NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
4631  NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
4632  NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
4633  NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
4634  NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
4635  NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
4636  NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
4637  NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
4638  NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
4639  NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
4640  NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
4641  NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
4642  NEONMAP0(vrndi_v),
4643  NEONMAP0(vrndiq_v),
4644  NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
4645  NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
4646  NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
4647  NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
4648  NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
4649  NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
4650  NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
4651  NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
4652  NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
4653  NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0),
4654  NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0),
4655  NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0),
4656  NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0),
4657  NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0),
4658  NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0),
4659  NEONMAP0(vshl_n_v),
4660  NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
4661  NEONMAP0(vshll_n_v),
4662  NEONMAP0(vshlq_n_v),
4663  NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
4664  NEONMAP0(vshr_n_v),
4665  NEONMAP0(vshrn_n_v),
4666  NEONMAP0(vshrq_n_v),
4667  NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0),
4668  NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0),
4669  NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0),
4670  NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0),
4671  NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0),
4672  NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0),
4673  NEONMAP0(vsubhn_v),
4674  NEONMAP0(vtst_v),
4675  NEONMAP0(vtstq_v),
4676 };
4677 
4678 static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = {
4679  NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
4680  NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
4681  NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
4682  NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
4683  NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
4684  NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
4685  NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
4686  NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
4687  NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
4688  NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
4689  NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
4690  NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
4691  NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
4692  NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
4693  NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
4694  NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
4695  NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
4696  NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
4697  NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
4698  NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
4699  NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
4700  NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
4701  NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
4702  NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
4703  NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
4704  NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
4705  NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
4706  NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
4707  NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
4708  NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
4709  NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
4710  NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
4711  NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
4712  NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
4713  NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
4714  NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
4715  NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
4716  NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
4717  NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
4718  NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
4719  NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
4720  NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
4721  NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
4722  NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
4723  NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
4724  NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
4725  NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
4726  NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
4727  NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
4728  NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
4729  NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
4730  NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
4731  NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
4732  NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
4733  NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
4734  NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
4735  NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
4736  NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
4737  NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
4738  NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
4739  NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
4740  NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
4741  NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
4742  NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
4743  NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
4744  NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
4745  NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
4746  NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
4747  NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
4748  NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
4749  NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
4750  NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
4751  NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
4752  NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
4753  NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
4754  NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
4755  NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
4756  NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
4757  NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
4758  NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
4759  NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
4760  NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
4761  NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
4762  NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
4763  NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
4764  NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
4765  NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
4766  NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
4767  NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
4768  NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
4769  NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
4770  NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
4771  NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
4772  NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
4773  NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
4774  NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
4775  NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
4776  NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
4777  NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
4778  NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
4779  NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
4780  NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
4781  NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
4782  NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
4783  NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
4784  NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
4785  NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
4786  NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
4787  NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
4788  NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
4789  NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
4790  NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
4791  NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
4792  NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
4793  NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
4794  NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
4795  NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
4796  NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
4797  NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
4798  NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
4799  NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
4800  NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
4801  NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
4802  NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
4803  NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
4804  NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
4805  NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
4806  NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
4807  NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
4808  NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
4809  NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
4810  NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
4811  NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
4812  NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
4813  NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
4814  NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
4815  NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
4816  NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
4817  NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
4818  NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
4819  NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
4820  NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
4821  NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
4822  NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
4823  NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
4824  NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
4825  NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
4826  NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
4827  NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
4828  NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
4829  NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
4830  NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
4831  NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
4832  NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
4833  NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
4834  NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
4835  NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
4836  NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
4837  NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
4838  NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
4839  NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
4840  NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
4841  NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
4842  NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
4843  NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
4844  NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
4845  NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
4846  NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
4847  NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
4848  NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
4849  NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
4850  NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
4851  NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
4852  NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
4853  NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
4854  NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
4855  NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
4856  NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
4857  NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
4858  NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
4859  NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
4860  NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
4861  NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
4862  NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
4863  NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
4864  NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
4865  NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
4866  NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
4867  NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
4868  NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
4869  NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
4870  NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
4871  // FP16 scalar intrinisics go here.
4872  NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType),
4873  NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
4874  NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
4875  NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
4876  NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
4877  NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
4878  NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
4879  NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
4880  NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
4881  NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
4882  NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
4883  NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
4884  NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
4885  NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
4886  NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
4887  NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
4888  NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
4889  NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
4890  NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
4891  NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
4892  NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
4893  NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
4894  NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
4895  NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
4896  NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
4897  NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType),
4898  NEONMAP1(vrecpeh_f16, aarch64_neon_frecpe, Add1ArgType),
4899  NEONMAP1(vrecpxh_f16, aarch64_neon_frecpx, Add1ArgType),
4900  NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType),
4901  NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType),
4902 };
4903 
4904 #undef NEONMAP0
4905 #undef NEONMAP1
4906 #undef NEONMAP2
4907 
4909 
4912 
4913 
4914 static const NeonIntrinsicInfo *
4916  unsigned BuiltinID, bool &MapProvenSorted) {
4917 
4918 #ifndef NDEBUG
4919  if (!MapProvenSorted) {
4920  assert(std::is_sorted(std::begin(IntrinsicMap), std::end(IntrinsicMap)));
4921  MapProvenSorted = true;
4922  }
4923 #endif
4924 
4925  const NeonIntrinsicInfo *Builtin =
4926  std::lower_bound(IntrinsicMap.begin(), IntrinsicMap.end(), BuiltinID);
4927 
4928  if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
4929  return Builtin;
4930 
4931  return nullptr;
4932 }
4933 
4934 Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
4935  unsigned Modifier,
4936  llvm::Type *ArgType,
4937  const CallExpr *E) {
4938  int VectorSize = 0;
4939  if (Modifier & Use64BitVectors)
4940  VectorSize = 64;
4941  else if (Modifier & Use128BitVectors)
4942  VectorSize = 128;
4943 
4944  // Return type.
4946  if (Modifier & AddRetType) {
4947  llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
4948  if (Modifier & VectorizeRetType)
4949  Ty = llvm::VectorType::get(
4950  Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
4951 
4952  Tys.push_back(Ty);
4953  }
4954 
4955  // Arguments.
4956  if (Modifier & VectorizeArgTypes) {
4957  int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
4958  ArgType = llvm::VectorType::get(ArgType, Elts);
4959  }
4960 
4961  if (Modifier & (Add1ArgType | Add2ArgTypes))
4962  Tys.push_back(ArgType);
4963 
4964  if (Modifier & Add2ArgTypes)
4965  Tys.push_back(ArgType);
4966 
4967  if (Modifier & InventFloatType)
4968  Tys.push_back(FloatTy);
4969 
4970  return CGM.getIntrinsic(IntrinsicID, Tys);
4971 }
4972 
4974  const NeonIntrinsicInfo &SISDInfo,
4976  const CallExpr *E) {
4977  unsigned BuiltinID = SISDInfo.BuiltinID;
4978  unsigned int Int = SISDInfo.LLVMIntrinsic;
4979  unsigned Modifier = SISDInfo.TypeModifier;
4980  const char *s = SISDInfo.NameHint;
4981 
4982  switch (BuiltinID) {
4983  case NEON::BI__builtin_neon_vcled_s64:
4984  case NEON::BI__builtin_neon_vcled_u64:
4985  case NEON::BI__builtin_neon_vcles_f32:
4986  case NEON::BI__builtin_neon_vcled_f64:
4987  case NEON::BI__builtin_neon_vcltd_s64:
4988  case NEON::BI__builtin_neon_vcltd_u64:
4989  case NEON::BI__builtin_neon_vclts_f32:
4990  case NEON::BI__builtin_neon_vcltd_f64:
4991  case NEON::BI__builtin_neon_vcales_f32:
4992  case NEON::BI__builtin_neon_vcaled_f64:
4993  case NEON::BI__builtin_neon_vcalts_f32:
4994  case NEON::BI__builtin_neon_vcaltd_f64:
4995  // Only one direction of comparisons actually exist, cmle is actually a cmge
4996  // with swapped operands. The table gives us the right intrinsic but we
4997  // still need to do the swap.
4998  std::swap(Ops[0], Ops[1]);
4999  break;
5000  }
5001 
5002  assert(Int && "Generic code assumes a valid intrinsic");
5003 
5004  // Determine the type(s) of this overloaded AArch64 intrinsic.
5005  const Expr *Arg = E->getArg(0);
5006  llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
5007  Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
5008 
5009  int j = 0;
5010  ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
5011  for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
5012  ai != ae; ++ai, ++j) {
5013  llvm::Type *ArgTy = ai->getType();
5014  if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
5015  ArgTy->getPrimitiveSizeInBits())
5016  continue;
5017 
5018  assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
5019  // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
5020  // it before inserting.
5021  Ops[j] =
5022  CGF.Builder.CreateTruncOrBitCast(Ops[j], ArgTy->getVectorElementType());
5023  Ops[j] =
5024  CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0);
5025  }
5026 
5027  Value *Result = CGF.EmitNeonCall(F, Ops, s);
5028  llvm::Type *ResultType = CGF.ConvertType(E->getType());
5029  if (ResultType->getPrimitiveSizeInBits() <
5030  Result->getType()->getPrimitiveSizeInBits())
5031  return CGF.Builder.CreateExtractElement(Result, C0);
5032 
5033  return CGF.Builder.CreateBitCast(Result, ResultType, s);
5034 }
5035 
5037  unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
5038  const char *NameHint, unsigned Modifier, const CallExpr *E,
5039  SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1,
5040  llvm::Triple::ArchType Arch) {
5041  // Get the last argument, which specifies the vector type.
5042  llvm::APSInt NeonTypeConst;
5043  const Expr *Arg = E->getArg(E->getNumArgs() - 1);
5044  if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext()))
5045  return nullptr;
5046 
5047  // Determine the type of this overloaded NEON intrinsic.
5048  NeonTypeFlags Type(NeonTypeConst.getZExtValue());
5049  bool Usgn = Type.isUnsigned();
5050  bool Quad = Type.isQuad();
5051  const bool HasLegalHalfType = getTarget().hasLegalHalfType();
5052 
5053  llvm::VectorType *VTy = GetNeonType(this, Type, HasLegalHalfType);
5054  llvm::Type *Ty = VTy;
5055  if (!Ty)
5056  return nullptr;
5057 
5058  auto getAlignmentValue32 = [&](Address addr) -> Value* {
5059  return Builder.getInt32(addr.getAlignment().getQuantity());
5060  };
5061 
5062  unsigned Int = LLVMIntrinsic;
5063  if ((Modifier & UnsignedAlts) && !Usgn)
5064  Int = AltLLVMIntrinsic;
5065 
5066  switch (BuiltinID) {
5067  default: break;
5068  case NEON::BI__builtin_neon_vabs_v:
5069  case NEON::BI__builtin_neon_vabsq_v:
5070  if (VTy->getElementType()->isFloatingPointTy())
5071  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
5072  return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
5073  case NEON::BI__builtin_neon_vaddhn_v: {
5074  llvm::VectorType *SrcTy =
5075  llvm::VectorType::getExtendedElementVectorType(VTy);
5076 
5077  // %sum = add <4 x i32> %lhs, %rhs
5078  Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
5079  Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
5080  Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
5081 
5082  // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
5083  Constant *ShiftAmt =
5084  ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
5085  Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
5086 
5087  // %res = trunc <4 x i32> %high to <4 x i16>
5088  return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
5089  }
5090  case NEON::BI__builtin_neon_vcale_v:
5091  case NEON::BI__builtin_neon_vcaleq_v:
5092  case NEON::BI__builtin_neon_vcalt_v:
5093  case NEON::BI__builtin_neon_vcaltq_v:
5094  std::swap(Ops[0], Ops[1]);
5095  LLVM_FALLTHROUGH;
5096  case NEON::BI__builtin_neon_vcage_v:
5097  case NEON::BI__builtin_neon_vcageq_v:
5098  case NEON::BI__builtin_neon_vcagt_v:
5099  case NEON::BI__builtin_neon_vcagtq_v: {
5100  llvm::Type *Ty;
5101  switch (VTy->getScalarSizeInBits()) {
5102  default: llvm_unreachable("unexpected type");
5103  case 32:
5104  Ty = FloatTy;
5105  break;
5106  case 64:
5107  Ty = DoubleTy;
5108  break;
5109  case 16:
5110  Ty = HalfTy;
5111  break;
5112  }
5113  llvm::Type *VecFlt = llvm::VectorType::get(Ty, VTy->getNumElements());
5114  llvm::Type *Tys[] = { VTy, VecFlt };
5115  Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
5116  return EmitNeonCall(F, Ops, NameHint);
5117  }
5118  case NEON::BI__builtin_neon_vceqz_v:
5119  case NEON::BI__builtin_neon_vceqzq_v:
5120  return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
5121  ICmpInst::ICMP_EQ, "vceqz");
5122  case NEON::BI__builtin_neon_vcgez_v:
5123  case NEON::BI__builtin_neon_vcgezq_v:
5124  return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
5125  ICmpInst::ICMP_SGE, "vcgez");
5126  case NEON::BI__builtin_neon_vclez_v:
5127  case NEON::BI__builtin_neon_vclezq_v:
5128  return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
5129  ICmpInst::ICMP_SLE, "vclez");
5130  case NEON::BI__builtin_neon_vcgtz_v:
5131  case NEON::BI__builtin_neon_vcgtzq_v:
5132  return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
5133  ICmpInst::ICMP_SGT, "vcgtz");
5134  case NEON::BI__builtin_neon_vcltz_v:
5135  case NEON::BI__builtin_neon_vcltzq_v:
5136  return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
5137  ICmpInst::ICMP_SLT, "vcltz");
5138  case NEON::BI__builtin_neon_vclz_v:
5139  case NEON::BI__builtin_neon_vclzq_v:
5140  // We generate target-independent intrinsic, which needs a second argument
5141  // for whether or not clz of zero is undefined; on ARM it isn't.
5142  Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
5143  break;
5144  case NEON::BI__builtin_neon_vcvt_f32_v:
5145  case NEON::BI__builtin_neon_vcvtq_f32_v:
5146  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5147  Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad),
5148  HasLegalHalfType);
5149  return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
5150  : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
5151  case NEON::BI__builtin_neon_vcvt_f16_v:
5152  case NEON::BI__builtin_neon_vcvtq_f16_v:
5153  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5154  Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad),
5155  HasLegalHalfType);
5156  return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
5157  : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
5158  case NEON::BI__builtin_neon_vcvt_n_f16_v:
5159  case NEON::BI__builtin_neon_vcvt_n_f32_v:
5160  case NEON::BI__builtin_neon_vcvt_n_f64_v:
5161  case NEON::BI__builtin_neon_vcvtq_n_f16_v:
5162  case NEON::BI__builtin_neon_vcvtq_n_f32_v:
5163  case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
5164  llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
5165  Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
5166  Function *F = CGM.getIntrinsic(Int, Tys);
5167  return EmitNeonCall(F, Ops, "vcvt_n");
5168  }
5169  case NEON::BI__builtin_neon_vcvt_n_s16_v:
5170  case NEON::BI__builtin_neon_vcvt_n_s32_v:
5171  case NEON::BI__builtin_neon_vcvt_n_u16_v:
5172  case NEON::BI__builtin_neon_vcvt_n_u32_v:
5173  case NEON::BI__builtin_neon_vcvt_n_s64_v:
5174  case NEON::BI__builtin_neon_vcvt_n_u64_v:
5175  case NEON::BI__builtin_neon_vcvtq_n_s16_v:
5176  case NEON::BI__builtin_neon_vcvtq_n_s32_v:
5177  case NEON::BI__builtin_neon_vcvtq_n_u16_v:
5178  case NEON::BI__builtin_neon_vcvtq_n_u32_v:
5179  case NEON::BI__builtin_neon_vcvtq_n_s64_v:
5180  case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
5181  llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
5182  Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
5183  return EmitNeonCall(F, Ops, "vcvt_n");
5184  }
5185  case NEON::BI__builtin_neon_vcvt_s32_v:
5186  case NEON::BI__builtin_neon_vcvt_u32_v:
5187  case NEON::BI__builtin_neon_vcvt_s64_v:
5188  case NEON::BI__builtin_neon_vcvt_u64_v:
5189  case NEON::BI__builtin_neon_vcvt_s16_v:
5190  case NEON::BI__builtin_neon_vcvt_u16_v:
5191  case NEON::BI__builtin_neon_vcvtq_s32_v:
5192  case NEON::BI__builtin_neon_vcvtq_u32_v:
5193  case NEON::BI__builtin_neon_vcvtq_s64_v:
5194  case NEON::BI__builtin_neon_vcvtq_u64_v:
5195  case NEON::BI__builtin_neon_vcvtq_s16_v:
5196  case NEON::BI__builtin_neon_vcvtq_u16_v: {
5197  Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
5198  return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
5199  : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
5200  }
5201  case NEON::BI__builtin_neon_vcvta_s16_v:
5202  case NEON::BI__builtin_neon_vcvta_s32_v:
5203  case NEON::BI__builtin_neon_vcvta_s64_v:
5204  case NEON::BI__builtin_neon_vcvta_u16_v:
5205  case NEON::BI__builtin_neon_vcvta_u32_v:
5206  case NEON::BI__builtin_neon_vcvta_u64_v:
5207  case NEON::BI__builtin_neon_vcvtaq_s16_v:
5208  case NEON::BI__builtin_neon_vcvtaq_s32_v:
5209  case NEON::BI__builtin_neon_vcvtaq_s64_v:
5210  case NEON::BI__builtin_neon_vcvtaq_u16_v:
5211  case NEON::BI__builtin_neon_vcvtaq_u32_v:
5212  case NEON::BI__builtin_neon_vcvtaq_u64_v:
5213  case NEON::BI__builtin_neon_vcvtn_s16_v:
5214  case NEON::BI__builtin_neon_vcvtn_s32_v:
5215  case NEON::BI__builtin_neon_vcvtn_s64_v:
5216  case NEON::BI__builtin_neon_vcvtn_u16_v:
5217  case NEON::BI__builtin_neon_vcvtn_u32_v:
5218  case NEON::BI__builtin_neon_vcvtn_u64_v:
5219  case NEON::BI__builtin_neon_vcvtnq_s16_v:
5220  case NEON::BI__builtin_neon_vcvtnq_s32_v:
5221  case NEON::BI__builtin_neon_vcvtnq_s64_v:
5222  case NEON::BI__builtin_neon_vcvtnq_u16_v:
5223  case NEON::BI__builtin_neon_vcvtnq_u32_v:
5224  case NEON::BI__builtin_neon_vcvtnq_u64_v:
5225  case NEON::BI__builtin_neon_vcvtp_s16_v:
5226  case NEON::BI__builtin_neon_vcvtp_s32_v:
5227  case NEON::BI__builtin_neon_vcvtp_s64_v:
5228  case NEON::BI__builtin_neon_vcvtp_u16_v:
5229  case NEON::BI__builtin_neon_vcvtp_u32_v:
5230  case NEON::BI__builtin_neon_vcvtp_u64_v:
5231  case NEON::BI__builtin_neon_vcvtpq_s16_v:
5232  case NEON::BI__builtin_neon_vcvtpq_s32_v:
5233  case NEON::BI__builtin_neon_vcvtpq_s64_v:
5234  case NEON::BI__builtin_neon_vcvtpq_u16_v:
5235  case NEON::BI__builtin_neon_vcvtpq_u32_v:
5236  case NEON::BI__builtin_neon_vcvtpq_u64_v:
5237  case NEON::BI__builtin_neon_vcvtm_s16_v:
5238  case NEON::BI__builtin_neon_vcvtm_s32_v:
5239  case NEON::BI__builtin_neon_vcvtm_s64_v:
5240  case NEON::BI__builtin_neon_vcvtm_u16_v:
5241  case NEON::BI__builtin_neon_vcvtm_u32_v:
5242  case NEON::BI__builtin_neon_vcvtm_u64_v:
5243  case NEON::BI__builtin_neon_vcvtmq_s16_v:
5244  case NEON::BI__builtin_neon_vcvtmq_s32_v:
5245  case NEON::BI__builtin_neon_vcvtmq_s64_v:
5246  case NEON::BI__builtin_neon_vcvtmq_u16_v:
5247  case NEON::BI__builtin_neon_vcvtmq_u32_v:
5248  case NEON::BI__builtin_neon_vcvtmq_u64_v: {
5249  llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
5250  return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
5251  }
5252  case NEON::BI__builtin_neon_vext_v:
5253  case NEON::BI__builtin_neon_vextq_v: {
5254  int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
5255  SmallVector<uint32_t, 16> Indices;
5256  for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
5257  Indices.push_back(i+CV);
5258 
5259  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5260  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5261  return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
5262  }
5263  case NEON::BI__builtin_neon_vfma_v:
5264  case NEON::BI__builtin_neon_vfmaq_v: {
5265  Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
5266  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5267  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5268  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
5269 
5270  // NEON intrinsic puts accumulator first, unlike the LLVM fma.
5271  return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
5272  }
5273  case NEON::BI__builtin_neon_vld1_v:
5274  case NEON::BI__builtin_neon_vld1q_v: {
5275  llvm::Type *Tys[] = {Ty, Int8PtrTy};
5276  Ops.push_back(getAlignmentValue32(PtrOp0));
5277  return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
5278  }
5279  case NEON::BI__builtin_neon_vld1_x2_v:
5280  case NEON::BI__builtin_neon_vld1q_x2_v:
5281  case NEON::BI__builtin_neon_vld1_x3_v:
5282  case NEON::BI__builtin_neon_vld1q_x3_v:
5283  case NEON::BI__builtin_neon_vld1_x4_v:
5284  case NEON::BI__builtin_neon_vld1q_x4_v: {
5285  llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
5286  Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
5287  llvm::Type *Tys[2] = { VTy, PTy };
5288  Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
5289  Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
5290  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5291  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5292  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5293  }
5294  case NEON::BI__builtin_neon_vld2_v:
5295  case NEON::BI__builtin_neon_vld2q_v:
5296  case NEON::BI__builtin_neon_vld3_v:
5297  case NEON::BI__builtin_neon_vld3q_v:
5298  case NEON::BI__builtin_neon_vld4_v:
5299  case NEON::BI__builtin_neon_vld4q_v:
5300  case NEON::BI__builtin_neon_vld2_dup_v:
5301  case NEON::BI__builtin_neon_vld2q_dup_v:
5302  case NEON::BI__builtin_neon_vld3_dup_v:
5303  case NEON::BI__builtin_neon_vld3q_dup_v:
5304  case NEON::BI__builtin_neon_vld4_dup_v:
5305  case NEON::BI__builtin_neon_vld4q_dup_v: {
5306  llvm::Type *Tys[] = {Ty, Int8PtrTy};
5307  Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
5308  Value *Align = getAlignmentValue32(PtrOp1);
5309  Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
5310  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5311  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5312  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5313  }
5314  case NEON::BI__builtin_neon_vld1_dup_v:
5315  case NEON::BI__builtin_neon_vld1q_dup_v: {
5316  Value *V = UndefValue::get(Ty);
5317  Ty = llvm::PointerType::getUnqual(VTy->getElementType());
5318  PtrOp0 = Builder.CreateBitCast(PtrOp0, Ty);
5319  LoadInst *Ld = Builder.CreateLoad(PtrOp0);
5320  llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
5321  Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
5322  return EmitNeonSplat(Ops[0], CI);
5323  }
5324  case NEON::BI__builtin_neon_vld2_lane_v:
5325  case NEON::BI__builtin_neon_vld2q_lane_v:
5326  case NEON::BI__builtin_neon_vld3_lane_v:
5327  case NEON::BI__builtin_neon_vld3q_lane_v:
5328  case NEON::BI__builtin_neon_vld4_lane_v:
5329  case NEON::BI__builtin_neon_vld4q_lane_v: {
5330  llvm::Type *Tys[] = {Ty, Int8PtrTy};
5331  Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
5332  for (unsigned I = 2; I < Ops.size() - 1; ++I)
5333  Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
5334  Ops.push_back(getAlignmentValue32(PtrOp1));
5335  Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint);
5336  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5337  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5338  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5339  }
5340  case NEON::BI__builtin_neon_vmovl_v: {
5341  llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
5342  Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
5343  if (Usgn)
5344  return Builder.CreateZExt(Ops[0], Ty, "vmovl");
5345  return Builder.CreateSExt(Ops[0], Ty, "vmovl");
5346  }
5347  case NEON::BI__builtin_neon_vmovn_v: {
5348  llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy);
5349  Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
5350  return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
5351  }
5352  case NEON::BI__builtin_neon_vmull_v:
5353  // FIXME: the integer vmull operations could be emitted in terms of pure
5354  // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
5355  // hoisting the exts outside loops. Until global ISel comes along that can
5356  // see through such movement this leads to bad CodeGen. So we need an
5357  // intrinsic for now.
5358  Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
5359  Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
5360  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
5361  case NEON::BI__builtin_neon_vpadal_v:
5362  case NEON::BI__builtin_neon_vpadalq_v: {
5363  // The source operand type has twice as many elements of half the size.
5364  unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
5365  llvm::Type *EltTy =
5366  llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
5367  llvm::Type *NarrowTy =
5368  llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
5369  llvm::Type *Tys[2] = { Ty, NarrowTy };
5370  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
5371  }
5372  case NEON::BI__builtin_neon_vpaddl_v:
5373  case NEON::BI__builtin_neon_vpaddlq_v: {
5374  // The source operand type has twice as many elements of half the size.
5375  unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
5376  llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
5377  llvm::Type *NarrowTy =
5378  llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
5379  llvm::Type *Tys[2] = { Ty, NarrowTy };
5380  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
5381  }
5382  case NEON::BI__builtin_neon_vqdmlal_v:
5383  case NEON::BI__builtin_neon_vqdmlsl_v: {
5384  SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
5385  Ops[1] =
5386  EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
5387  Ops.resize(2);
5388  return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
5389  }
5390  case NEON::BI__builtin_neon_vqshl_n_v:
5391  case NEON::BI__builtin_neon_vqshlq_n_v:
5392  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
5393  1, false);
5394  case NEON::BI__builtin_neon_vqshlu_n_v:
5395  case NEON::BI__builtin_neon_vqshluq_n_v:
5396  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
5397  1, false);
5398  case NEON::BI__builtin_neon_vrecpe_v:
5399  case NEON::BI__builtin_neon_vrecpeq_v:
5400  case NEON::BI__builtin_neon_vrsqrte_v:
5401  case NEON::BI__builtin_neon_vrsqrteq_v:
5402  Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
5403  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
5404  case NEON::BI__builtin_neon_vrndi_v:
5405  case NEON::BI__builtin_neon_vrndiq_v:
5406  Int = Intrinsic::nearbyint;
5407  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
5408  case NEON::BI__builtin_neon_vrshr_n_v:
5409  case NEON::BI__builtin_neon_vrshrq_n_v:
5410  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
5411  1, true);
5412  case NEON::BI__builtin_neon_vshl_n_v:
5413  case NEON::BI__builtin_neon_vshlq_n_v:
5414  Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
5415  return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
5416  "vshl_n");
5417  case NEON::BI__builtin_neon_vshll_n_v: {
5418  llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy);
5419  Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
5420  if (Usgn)
5421  Ops[0] = Builder.CreateZExt(Ops[0], VTy);
5422  else
5423  Ops[0] = Builder.CreateSExt(Ops[0], VTy);
5424  Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
5425  return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
5426  }
5427  case NEON::BI__builtin_neon_vshrn_n_v: {
5428  llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy);
5429  Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
5430  Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
5431  if (Usgn)
5432  Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
5433  else
5434  Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
5435  return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
5436  }
5437  case NEON::BI__builtin_neon_vshr_n_v:
5438  case NEON::BI__builtin_neon_vshrq_n_v:
5439  return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
5440  case NEON::BI__builtin_neon_vst1_v:
5441  case NEON::BI__builtin_neon_vst1q_v:
5442  case NEON::BI__builtin_neon_vst2_v:
5443  case NEON::BI__builtin_neon_vst2q_v:
5444  case NEON::BI__builtin_neon_vst3_v:
5445  case NEON::BI__builtin_neon_vst3q_v:
5446  case NEON::BI__builtin_neon_vst4_v:
5447  case NEON::BI__builtin_neon_vst4q_v:
5448  case NEON::BI__builtin_neon_vst2_lane_v:
5449  case NEON::BI__builtin_neon_vst2q_lane_v:
5450  case NEON::BI__builtin_neon_vst3_lane_v:
5451  case NEON::BI__builtin_neon_vst3q_lane_v:
5452  case NEON::BI__builtin_neon_vst4_lane_v:
5453  case NEON::BI__builtin_neon_vst4q_lane_v: {
5454  llvm::Type *Tys[] = {Int8PtrTy, Ty};
5455  Ops.push_back(getAlignmentValue32(PtrOp0));
5456  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
5457  }
5458  case NEON::BI__builtin_neon_vst1_x2_v:
5459  case NEON::BI__builtin_neon_vst1q_x2_v:
5460  case NEON::BI__builtin_neon_vst1_x3_v:
5461  case NEON::BI__builtin_neon_vst1q_x3_v:
5462  case NEON::BI__builtin_neon_vst1_x4_v:
5463  case NEON::BI__builtin_neon_vst1q_x4_v: {
5464  llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
5465  // TODO: Currently in AArch32 mode the pointer operand comes first, whereas
5466  // in AArch64 it comes last. We may want to stick to one or another.
5467  if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be) {
5468  llvm::Type *Tys[2] = { VTy, PTy };
5469  std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
5470  return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
5471  }
5472  llvm::Type *Tys[2] = { PTy, VTy };
5473  return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
5474  }
5475  case NEON::BI__builtin_neon_vsubhn_v: {
5476  llvm::VectorType *SrcTy =
5477  llvm::VectorType::getExtendedElementVectorType(VTy);
5478 
5479  // %sum = add <4 x i32> %lhs, %rhs
5480  Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
5481  Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
5482  Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
5483 
5484  // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
5485  Constant *ShiftAmt =
5486  ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
5487  Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
5488 
5489  // %res = trunc <4 x i32> %high to <4 x i16>
5490  return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
5491  }
5492  case NEON::BI__builtin_neon_vtrn_v:
5493  case NEON::BI__builtin_neon_vtrnq_v: {
5494  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
5495  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5496  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
5497  Value *SV = nullptr;
5498 
5499  for (unsigned vi = 0; vi != 2; ++vi) {
5500  SmallVector<uint32_t, 16> Indices;
5501  for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
5502  Indices.push_back(i+vi);
5503  Indices.push_back(i+e+vi);
5504  }
5505  Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
5506  SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
5507  SV = Builder.CreateDefaultAlignedStore(SV, Addr);
5508  }
5509  return SV;
5510  }
5511  case NEON::BI__builtin_neon_vtst_v:
5512  case NEON::BI__builtin_neon_vtstq_v: {
5513  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5514  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5515  Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
5516  Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
5517  ConstantAggregateZero::get(Ty));
5518  return Builder.CreateSExt(Ops[0], Ty, "vtst");
5519  }
5520  case NEON::BI__builtin_neon_vuzp_v:
5521  case NEON::BI__builtin_neon_vuzpq_v: {
5522  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
5523  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5524  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
5525  Value *SV = nullptr;
5526 
5527  for (unsigned vi = 0; vi != 2; ++vi) {
5528  SmallVector<uint32_t, 16> Indices;
5529  for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
5530  Indices.push_back(2*i+vi);
5531 
5532  Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
5533  SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
5534  SV = Builder.CreateDefaultAlignedStore(SV, Addr);
5535  }
5536  return SV;
5537  }
5538  case NEON::BI__builtin_neon_vzip_v:
5539  case NEON::BI__builtin_neon_vzipq_v: {
5540  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
5541  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5542  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
5543  Value *SV = nullptr;
5544 
5545  for (unsigned vi = 0; vi != 2; ++vi) {
5546  SmallVector<uint32_t, 16> Indices;
5547  for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
5548  Indices.push_back((i + vi*e) >> 1);
5549  Indices.push_back(((i + vi*e) >> 1)+e);
5550  }
5551  Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
5552  SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
5553  SV = Builder.CreateDefaultAlignedStore(SV, Addr);
5554  }
5555  return SV;
5556  }
5557  case NEON::BI__builtin_neon_vdot_v:
5558  case NEON::BI__builtin_neon_vdotq_v: {
5559  llvm::Type *InputTy =
5560  llvm::VectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
5561  llvm::Type *Tys[2] = { Ty, InputTy };
5562  Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
5563  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot");
5564  }
5565  case NEON::BI__builtin_neon_vfmlal_low_v:
5566  case NEON::BI__builtin_neon_vfmlalq_low_v: {
5567  llvm::Type *InputTy =
5568  llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
5569  llvm::Type *Tys[2] = { Ty, InputTy };
5570  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_low");
5571  }
5572  case NEON::BI__builtin_neon_vfmlsl_low_v:
5573  case NEON::BI__builtin_neon_vfmlslq_low_v: {
5574  llvm::Type *InputTy =
5575  llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
5576  llvm::Type *Tys[2] = { Ty, InputTy };
5577  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_low");
5578  }
5579  case NEON::BI__builtin_neon_vfmlal_high_v:
5580  case NEON::BI__builtin_neon_vfmlalq_high_v: {
5581  llvm::Type *InputTy =
5582  llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
5583  llvm::Type *Tys[2] = { Ty, InputTy };
5584  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_high");
5585  }
5586  case NEON::BI__builtin_neon_vfmlsl_high_v:
5587  case NEON::BI__builtin_neon_vfmlslq_high_v: {
5588  llvm::Type *InputTy =
5589  llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
5590  llvm::Type *Tys[2] = { Ty, InputTy };
5591  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_high");
5592  }
5593  }
5594 
5595  assert(Int && "Expected valid intrinsic number");
5596 
5597  // Determine the type(s) of this overloaded AArch64 intrinsic.
5598  Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
5599 
5600  Value *Result = EmitNeonCall(F, Ops, NameHint);
5601  llvm::Type *ResultType = ConvertType(E->getType());
5602  // AArch64 intrinsic one-element vector type cast to
5603  // scalar type expected by the builtin
5604  return Builder.CreateBitCast(Result, ResultType, NameHint);
5605 }
5606 
5608  Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
5609  const CmpInst::Predicate Ip, const Twine &Name) {
5610  llvm::Type *OTy = Op->getType();
5611 
5612  // FIXME: this is utterly horrific. We should not be looking at previous
5613  // codegen context to find out what needs doing. Unfortunately TableGen
5614  // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
5615  // (etc).
5616  if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
5617  OTy = BI->getOperand(0)->getType();
5618 
5619  Op = Builder.CreateBitCast(Op, OTy);
5620  if (OTy->getScalarType()->isFloatingPointTy()) {
5621  Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
5622  } else {
5623  Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
5624  }
5625  return Builder.CreateSExt(Op, Ty, Name);
5626 }
5627 
5629  Value *ExtOp, Value *IndexOp,
5630  llvm::Type *ResTy, unsigned IntID,
5631  const char *Name) {
5632  SmallVector<Value *, 2> TblOps;
5633  if (ExtOp)
5634  TblOps.push_back(ExtOp);
5635 
5636  // Build a vector containing sequential number like (0, 1, 2, ..., 15)
5637  SmallVector<uint32_t, 16> Indices;
5638  llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType());
5639  for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
5640  Indices.push_back(2*i);
5641  Indices.push_back(2*i+1);
5642  }
5643 
5644  int PairPos = 0, End = Ops.size() - 1;
5645  while (PairPos < End) {
5646  TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
5647  Ops[PairPos+1], Indices,
5648  Name));
5649  PairPos += 2;
5650  }
5651 
5652  // If there's an odd number of 64-bit lookup table, fill the high 64-bit
5653  // of the 128-bit lookup table with zero.
5654  if (PairPos == End) {
5655  Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
5656  TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
5657  ZeroTbl, Indices, Name));
5658  }
5659 
5660  Function *TblF;
5661  TblOps.push_back(IndexOp);
5662  TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
5663 
5664  return CGF.EmitNeonCall(TblF, TblOps, Name);
5665 }
5666 
5667 Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
5668  unsigned Value;
5669  switch (BuiltinID) {
5670  default:
5671  return nullptr;
5672  case ARM::BI__builtin_arm_nop:
5673  Value = 0;
5674  break;
5675  case ARM::BI__builtin_arm_yield:
5676  case ARM::BI__yield:
5677  Value = 1;
5678  break;
5679  case ARM::BI__builtin_arm_wfe:
5680  case ARM::BI__wfe:
5681  Value = 2;
5682  break;
5683  case ARM::BI__builtin_arm_wfi:
5684  case ARM::BI__wfi:
5685  Value = 3;
5686  break;
5687  case ARM::BI__builtin_arm_sev:
5688  case ARM::BI__sev:
5689  Value = 4;
5690  break;
5691  case ARM::BI__builtin_arm_sevl:
5692  case ARM::BI__sevl:
5693  Value = 5;
5694  break;
5695  }
5696 
5697  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
5698  llvm::ConstantInt::get(Int32Ty, Value));
5699 }
5700 
5701 // Generates the IR for the read/write special register builtin,
5702 // ValueType is the type of the value that is to be written or read,
5703 // RegisterType is the type of the register being written to or read from.
5705  const CallExpr *E,
5706  llvm::Type *RegisterType,
5707  llvm::Type *ValueType,
5708  bool IsRead,
5709  StringRef SysReg = "") {
5710  // write and register intrinsics only support 32 and 64 bit operations.
5711  assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64))
5712  && "Unsupported size for register.");
5713 
5714  CodeGen::CGBuilderTy &Builder = CGF.Builder;
5715  CodeGen::CodeGenModule &CGM = CGF.CGM;
5716  LLVMContext &Context = CGM.getLLVMContext();
5717 
5718  if (SysReg.empty()) {
5719  const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
5720  SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
5721  }
5722 
5723  llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
5724  llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
5725  llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
5726 
5727  llvm::Type *Types[] = { RegisterType };
5728 
5729  bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
5730  assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
5731  && "Can't fit 64-bit value in 32-bit register");
5732 
5733  if (IsRead) {
5734  llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
5735  llvm::Value *Call = Builder.CreateCall(F, Metadata);
5736 
5737  if (MixedTypes)
5738  // Read into 64 bit register and then truncate result to 32 bit.
5739  return Builder.CreateTrunc(Call, ValueType);
5740 
5741  if (ValueType->isPointerTy())
5742  // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
5743  return Builder.CreateIntToPtr(Call, ValueType);
5744 
5745  return Call;
5746  }
5747 
5748  llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
5749  llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
5750  if (MixedTypes) {
5751  // Extend 32 bit write value to 64 bit to pass to write.
5752  ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
5753  return Builder.CreateCall(F, { Metadata, ArgValue });
5754  }
5755 
5756  if (ValueType->isPointerTy()) {
5757  // Have VoidPtrTy ArgValue but want to return an i32/i64.
5758  ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
5759  return Builder.CreateCall(F, { Metadata, ArgValue });
5760  }
5761 
5762  return Builder.CreateCall(F, { Metadata, ArgValue });
5763 }
5764 
5765 /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
5766 /// argument that specifies the vector type.
5767 static bool HasExtraNeonArgument(unsigned BuiltinID) {
5768  switch (BuiltinID) {
5769  default: break;
5770  case NEON::BI__builtin_neon_vget_lane_i8:
5771  case NEON::BI__builtin_neon_vget_lane_i16:
5772  case NEON::BI__builtin_neon_vget_lane_i32:
5773  case NEON::BI__builtin_neon_vget_lane_i64:
5774  case NEON::BI__builtin_neon_vget_lane_f32:
5775  case NEON::BI__builtin_neon_vgetq_lane_i8:
5776  case NEON::BI__builtin_neon_vgetq_lane_i16:
5777  case NEON::BI__builtin_neon_vgetq_lane_i32:
5778  case NEON::BI__builtin_neon_vgetq_lane_i64:
5779  case NEON::BI__builtin_neon_vgetq_lane_f32:
5780  case NEON::BI__builtin_neon_vset_lane_i8:
5781  case NEON::BI__builtin_neon_vset_lane_i16:
5782  case NEON::BI__builtin_neon_vset_lane_i32:
5783  case NEON::BI__builtin_neon_vset_lane_i64:
5784  case NEON::BI__builtin_neon_vset_lane_f32:
5785  case NEON::BI__builtin_neon_vsetq_lane_i8:
5786  case NEON::BI__builtin_neon_vsetq_lane_i16:
5787  case NEON::BI__builtin_neon_vsetq_lane_i32:
5788  case NEON::BI__builtin_neon_vsetq_lane_i64:
5789  case NEON::BI__builtin_neon_vsetq_lane_f32:
5790  case NEON::BI__builtin_neon_vsha1h_u32:
5791  case NEON::BI__builtin_neon_vsha1cq_u32:
5792  case NEON::BI__builtin_neon_vsha1pq_u32:
5793  case NEON::BI__builtin_neon_vsha1mq_u32:
5794  case clang::ARM::BI_MoveToCoprocessor:
5795  case clang::ARM::BI_MoveToCoprocessor2:
5796  return false;
5797  }
5798  return true;
5799 }
5800 
5802  Value *Ptr = EmitScalarExpr(E->getArg(0));
5803  QualType ElTy = E->getArg(0)->getType()->getPointeeType();
5804  CharUnits LoadSize = getContext().getTypeSizeInChars(ElTy);
5805  llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
5806  LoadSize.getQuantity() * 8);
5807  Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
5808  llvm::LoadInst *Load =
5809  Builder.CreateAlignedLoad(Ptr, LoadSize);
5810  Load->setVolatile(true);
5811  return Load;
5812 }
5813 
5815  Value *Ptr = EmitScalarExpr(E->getArg(0));
5816  Value *Value = EmitScalarExpr(E->getArg(1));
5817  QualType ElTy = E->getArg(0)->getType()->getPointeeType();
5818  CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
5819  llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
5820  StoreSize.getQuantity() * 8);
5821  Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
5822  llvm::StoreInst *Store =
5823  Builder.CreateAlignedStore(Value, Ptr,
5824  StoreSize);
5825  Store->setVolatile(true);
5826  return Store;
5827 }
5828 
5830  const CallExpr *E,
5831  llvm::Triple::ArchType Arch) {
5832  if (auto Hint = GetValueForARMHint(BuiltinID))
5833  return Hint;
5834 
5835  if (BuiltinID == ARM::BI__emit) {
5836  bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
5837  llvm::FunctionType *FTy =
5838  llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
5839 
5840  Expr::EvalResult Result;
5841  if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
5842  llvm_unreachable("Sema will ensure that the parameter is constant");
5843 
5844  llvm::APSInt Value = Result.Val.getInt();
5845  uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
5846 
5847  llvm::InlineAsm *Emit =
5848  IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
5849  /*SideEffects=*/true)
5850  : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
5851  /*SideEffects=*/true);
5852 
5853  return Builder.CreateCall(Emit);
5854  }
5855 
5856  if (BuiltinID == ARM::BI__builtin_arm_dbg) {
5857  Value *Option = EmitScalarExpr(E->getArg(0));
5858  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
5859  }
5860 
5861  if (BuiltinID == ARM::BI__builtin_arm_prefetch) {
5862  Value *Address = EmitScalarExpr(E->getArg(0));
5863  Value *RW = EmitScalarExpr(E->getArg(1));
5864  Value *IsData = EmitScalarExpr(E->getArg(2));
5865 
5866  // Locality is not supported on ARM target
5867  Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
5868 
5869  Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
5870  return Builder.CreateCall(F, {Address, RW, Locality, IsData});
5871  }
5872 
5873  if (BuiltinID == ARM::BI__builtin_arm_rbit) {
5874  llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5875  return Builder.CreateCall(
5876  CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
5877  }
5878 
5879  if (BuiltinID == ARM::BI__clear_cache) {
5880  assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
5881  const FunctionDecl *FD = E->getDirectCallee();
5882  Value *Ops[2];
5883  for (unsigned i = 0; i < 2; i++)
5884  Ops[i] = EmitScalarExpr(E->getArg(i));
5885  llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
5886  llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
5887  StringRef Name = FD->getName();
5888  return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
5889  }
5890 
5891  if (BuiltinID == ARM::BI__builtin_arm_mcrr ||
5892  BuiltinID == ARM::BI__builtin_arm_mcrr2) {
5893  Function *F;
5894 
5895  switch (BuiltinID) {
5896  default: llvm_unreachable("unexpected builtin");
5897  case ARM::BI__builtin_arm_mcrr:
5898  F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
5899  break;
5900  case ARM::BI__builtin_arm_mcrr2:
5901  F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
5902  break;
5903  }
5904 
5905  // MCRR{2} instruction has 5 operands but
5906  // the intrinsic has 4 because Rt and Rt2
5907  // are represented as a single unsigned 64
5908  // bit integer in the intrinsic definition
5909  // but internally it's represented as 2 32
5910  // bit integers.
5911 
5912  Value *Coproc = EmitScalarExpr(E->getArg(0));
5913  Value *Opc1 = EmitScalarExpr(E->getArg(1));
5914  Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
5915  Value *CRm = EmitScalarExpr(E->getArg(3));
5916 
5917  Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
5918  Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
5919  Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
5920  Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
5921 
5922  return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
5923  }
5924 
5925  if (BuiltinID == ARM::BI__builtin_arm_mrrc ||
5926  BuiltinID == ARM::BI__builtin_arm_mrrc2) {
5927  Function *F;
5928 
5929  switch (BuiltinID) {
5930  default: llvm_unreachable("unexpected builtin");
5931  case ARM::BI__builtin_arm_mrrc:
5932  F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
5933  break;
5934  case ARM::BI__builtin_arm_mrrc2:
5935  F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
5936  break;
5937  }
5938 
5939  Value *Coproc = EmitScalarExpr(E->getArg(0));
5940  Value *Opc1 = EmitScalarExpr(E->getArg(1));
5941  Value *CRm = EmitScalarExpr(E->getArg(2));
5942  Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
5943 
5944  // Returns an unsigned 64 bit integer, represented
5945  // as two 32 bit integers.
5946 
5947  Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
5948  Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
5949  Rt = Builder.CreateZExt(Rt, Int64Ty);
5950  Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
5951 
5952  Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
5953  RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
5954  RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
5955 
5956  return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
5957  }
5958 
5959  if (BuiltinID == ARM::BI__builtin_arm_ldrexd ||
5960  ((BuiltinID == ARM::BI__builtin_arm_ldrex ||
5961  BuiltinID == ARM::BI__builtin_arm_ldaex) &&
5962  getContext().getTypeSize(E->getType()) == 64) ||
5963  BuiltinID == ARM::BI__ldrexd) {
5964  Function *F;
5965 
5966  switch (BuiltinID) {
5967  default: llvm_unreachable("unexpected builtin");
5968  case ARM::BI__builtin_arm_ldaex:
5969  F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
5970  break;
5971  case ARM::BI__builtin_arm_ldrexd:
5972  case ARM::BI__builtin_arm_ldrex:
5973  case ARM::BI__ldrexd:
5974  F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
5975  break;
5976  }
5977 
5978  Value *LdPtr = EmitScalarExpr(E->getArg(0));
5979  Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
5980  "ldrexd");
5981 
5982  Value *Val0 = Builder.CreateExtractValue(Val, 1);
5983  Value *Val1 = Builder.CreateExtractValue(Val, 0);
5984  Val0 = Builder.CreateZExt(Val0, Int64Ty);
5985  Val1 = Builder.CreateZExt(Val1, Int64Ty);
5986 
5987  Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
5988  Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
5989  Val = Builder.CreateOr(Val, Val1);
5990  return Builder.CreateBitCast(Val, ConvertType(E->getType()));
5991  }
5992 
5993  if (BuiltinID == ARM::BI__builtin_arm_ldrex ||
5994  BuiltinID == ARM::BI__builtin_arm_ldaex) {
5995  Value *LoadAddr = EmitScalarExpr(E->getArg(0));
5996 
5997  QualType Ty = E->getType();
5998  llvm::Type *RealResTy = ConvertType(Ty);
5999  llvm::Type *PtrTy = llvm::IntegerType::get(
6000  getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo();
6001  LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy);
6002 
6003  Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex
6004  ? Intrinsic::arm_ldaex
6005  : Intrinsic::arm_ldrex,
6006  PtrTy);
6007  Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
6008 
6009  if (RealResTy->isPointerTy())
6010  return Builder.CreateIntToPtr(Val, RealResTy);
6011  else {
6012  llvm::Type *IntResTy = llvm::IntegerType::get(
6013  getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
6014  Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
6015  return Builder.CreateBitCast(Val, RealResTy);
6016  }
6017  }
6018 
6019  if (BuiltinID == ARM::BI__builtin_arm_strexd ||
6020  ((BuiltinID == ARM::BI__builtin_arm_stlex ||
6021  BuiltinID == ARM::BI__builtin_arm_strex) &&
6022  getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
6023  Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
6024  ? Intrinsic::arm_stlexd
6025  : Intrinsic::arm_strexd);
6026  llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
6027 
6028  Address Tmp = CreateMemTemp(E->getArg(0)->getType());
6029  Value *Val = EmitScalarExpr(E->getArg(0));
6030  Builder.CreateStore(Val, Tmp);
6031 
6032  Address LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy));
6033  Val = Builder.CreateLoad(LdPtr);
6034 
6035  Value *Arg0 = Builder.CreateExtractValue(Val, 0);
6036  Value *Arg1 = Builder.CreateExtractValue(Val, 1);
6037  Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy);
6038  return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
6039  }
6040 
6041  if (BuiltinID == ARM::BI__builtin_arm_strex ||
6042  BuiltinID == ARM::BI__builtin_arm_stlex) {
6043  Value *StoreVal = EmitScalarExpr(E->getArg(0));
6044  Value *StoreAddr = EmitScalarExpr(E->getArg(1));
6045 
6046  QualType Ty = E->getArg(0)->getType();
6047  llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
6048  getContext().getTypeSize(Ty));
6049  StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
6050 
6051  if (StoreVal->getType()->isPointerTy())
6052  StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
6053  else {
6054  llvm::Type *IntTy = llvm::IntegerType::get(
6055  getLLVMContext(),
6056  CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
6057  StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
6058  StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
6059  }
6060 
6061  Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
6062  ? Intrinsic::arm_stlex
6063  : Intrinsic::arm_strex,
6064  StoreAddr->getType());
6065  return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
6066  }
6067 
6068  switch (BuiltinID) {
6069  case ARM::BI__iso_volatile_load8:
6070  case ARM::BI__iso_volatile_load16:
6071  case ARM::BI__iso_volatile_load32:
6072  case ARM::BI__iso_volatile_load64:
6073  return EmitISOVolatileLoad(E);
6074  case ARM::BI__iso_volatile_store8:
6075  case ARM::BI__iso_volatile_store16:
6076  case ARM::BI__iso_volatile_store32:
6077  case ARM::BI__iso_volatile_store64:
6078  return EmitISOVolatileStore(E);
6079  }
6080 
6081  if (BuiltinID == ARM::BI__builtin_arm_clrex) {
6082  Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
6083  return Builder.CreateCall(F);
6084  }
6085 
6086  // CRC32
6087  Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
6088  switch (BuiltinID) {
6089  case ARM::BI__builtin_arm_crc32b:
6090  CRCIntrinsicID = Intrinsic::arm_crc32b; break;
6091  case ARM::BI__builtin_arm_crc32cb:
6092  CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
6093  case ARM::BI__builtin_arm_crc32h:
6094  CRCIntrinsicID = Intrinsic::arm_crc32h; break;
6095  case ARM::BI__builtin_arm_crc32ch:
6096  CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
6097  case ARM::BI__builtin_arm_crc32w:
6098  case ARM::BI__builtin_arm_crc32d:
6099  CRCIntrinsicID = Intrinsic::arm_crc32w; break;
6100  case ARM::BI__builtin_arm_crc32cw:
6101  case ARM::BI__builtin_arm_crc32cd:
6102  CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
6103  }
6104 
6105  if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
6106  Value *Arg0 = EmitScalarExpr(E->getArg(0));
6107  Value *Arg1 = EmitScalarExpr(E->getArg(1));
6108 
6109  // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w
6110  // intrinsics, hence we need different codegen for these cases.
6111  if (BuiltinID == ARM::BI__builtin_arm_crc32d ||
6112  BuiltinID == ARM::BI__builtin_arm_crc32cd) {
6113  Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
6114  Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
6115  Value *Arg1b = Builder.CreateLShr(Arg1, C1);
6116  Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
6117 
6118  Function *F = CGM.getIntrinsic(CRCIntrinsicID);
6119  Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
6120  return Builder.CreateCall(F, {Res, Arg1b});
6121  } else {
6122  Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
6123 
6124  Function *F = CGM.getIntrinsic(CRCIntrinsicID);
6125  return Builder.CreateCall(F, {Arg0, Arg1});
6126  }
6127  }
6128 
6129  if (BuiltinID == ARM::BI__builtin_arm_rsr ||
6130  BuiltinID == ARM::BI__builtin_arm_rsr64 ||
6131  BuiltinID == ARM::BI__builtin_arm_rsrp ||
6132  BuiltinID == ARM::BI__builtin_arm_wsr ||
6133  BuiltinID == ARM::BI__builtin_arm_wsr64 ||
6134  BuiltinID == ARM::BI__builtin_arm_wsrp) {
6135 
6136  bool IsRead = BuiltinID == ARM::BI__builtin_arm_rsr ||
6137  BuiltinID == ARM::BI__builtin_arm_rsr64 ||
6138  BuiltinID == ARM::BI__builtin_arm_rsrp;
6139 
6140  bool IsPointerBuiltin = BuiltinID == ARM::BI__builtin_arm_rsrp ||
6141  BuiltinID == ARM::BI__builtin_arm_wsrp;
6142 
6143  bool Is64Bit = BuiltinID == ARM::BI__builtin_arm_rsr64 ||
6144  BuiltinID == ARM::BI__builtin_arm_wsr64;
6145 
6146  llvm::Type *ValueType;
6147  llvm::Type *RegisterType;
6148  if (IsPointerBuiltin) {
6149  ValueType = VoidPtrTy;
6150  RegisterType = Int32Ty;
6151  } else if (Is64Bit) {
6152  ValueType = RegisterType = Int64Ty;
6153  } else {
6154  ValueType = RegisterType = Int32Ty;
6155  }
6156 
6157  return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
6158  }
6159 
6160  // Find out if any arguments are required to be integer constant
6161  // expressions.
6162  unsigned ICEArguments = 0;
6164  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
6165  assert(Error == ASTContext::GE_None && "Should not codegen an error");
6166 
6167  auto getAlignmentValue32 = [&](Address addr) -> Value* {
6168  return Builder.getInt32(addr.getAlignment().getQuantity());
6169  };
6170 
6171  Address PtrOp0 = Address::invalid();
6172  Address PtrOp1 = Address::invalid();
6174  bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
6175  unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
6176  for (unsigned i = 0, e = NumArgs; i != e; i++) {
6177  if (i == 0) {
6178  switch (BuiltinID) {
6179  case NEON::BI__builtin_neon_vld1_v:
6180  case NEON::BI__builtin_neon_vld1q_v:
6181  case NEON::BI__builtin_neon_vld1q_lane_v:
6182  case NEON::BI__builtin_neon_vld1_lane_v:
6183  case NEON::BI__builtin_neon_vld1_dup_v:
6184  case NEON::BI__builtin_neon_vld1q_dup_v:
6185  case NEON::BI__builtin_neon_vst1_v:
6186  case NEON::BI__builtin_neon_vst1q_v:
6187  case NEON::BI__builtin_neon_vst1q_lane_v:
6188  case NEON::BI__builtin_neon_vst1_lane_v:
6189  case NEON::BI__builtin_neon_vst2_v:
6190  case NEON::BI__builtin_neon_vst2q_v:
6191  case NEON::BI__builtin_neon_vst2_lane_v:
6192  case NEON::BI__builtin_neon_vst2q_lane_v:
6193  case NEON::BI__builtin_neon_vst3_v:
6194  case NEON::BI__builtin_neon_vst3q_v:
6195  case NEON::BI__builtin_neon_vst3_lane_v:
6196  case NEON::BI__builtin_neon_vst3q_lane_v:
6197  case NEON::BI__builtin_neon_vst4_v:
6198  case NEON::BI__builtin_neon_vst4q_v:
6199  case NEON::BI__builtin_neon_vst4_lane_v:
6200  case NEON::BI__builtin_neon_vst4q_lane_v:
6201  // Get the alignment for the argument in addition to the value;
6202  // we'll use it later.
6203  PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
6204  Ops.push_back(PtrOp0.getPointer());
6205  continue;
6206  }
6207  }
6208  if (i == 1) {
6209  switch (BuiltinID) {
6210  case NEON::BI__builtin_neon_vld2_v:
6211  case NEON::BI__builtin_neon_vld2q_v:
6212  case NEON::BI__builtin_neon_vld3_v:
6213  case NEON::BI__builtin_neon_vld3q_v:
6214  case NEON::BI__builtin_neon_vld4_v:
6215  case NEON::BI__builtin_neon_vld4q_v:
6216  case NEON::BI__builtin_neon_vld2_lane_v:
6217  case NEON::BI__builtin_neon_vld2q_lane_v:
6218  case NEON::BI__builtin_neon_vld3_lane_v:
6219  case NEON::BI__builtin_neon_vld3q_lane_v:
6220  case NEON::BI__builtin_neon_vld4_lane_v:
6221  case NEON::BI__builtin_neon_vld4q_lane_v:
6222  case NEON::BI__builtin_neon_vld2_dup_v:
6223  case NEON::BI__builtin_neon_vld2q_dup_v:
6224  case NEON::BI__builtin_neon_vld3_dup_v:
6225  case NEON::BI__builtin_neon_vld3q_dup_v:
6226  case NEON::BI__builtin_neon_vld4_dup_v:
6227  case NEON::BI__builtin_neon_vld4q_dup_v:
6228  // Get the alignment for the argument in addition to the value;
6229  // we'll use it later.
6230  PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
6231  Ops.push_back(PtrOp1.getPointer());
6232  continue;
6233  }
6234  }
6235 
6236  if ((ICEArguments & (1 << i)) == 0) {
6237  Ops.push_back(EmitScalarExpr(E->getArg(i)));
6238  } else {
6239  // If this is required to be a constant, constant fold it so that we know
6240  // that the generated intrinsic gets a ConstantInt.
6241  llvm::APSInt Result;
6242  bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
6243  assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
6244  Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
6245  }
6246  }
6247 
6248  switch (BuiltinID) {
6249  default: break;
6250 
6251  case NEON::BI__builtin_neon_vget_lane_i8:
6252  case NEON::BI__builtin_neon_vget_lane_i16:
6253  case NEON::BI__builtin_neon_vget_lane_i32:
6254  case NEON::BI__builtin_neon_vget_lane_i64:
6255  case NEON::BI__builtin_neon_vget_lane_f32:
6256  case NEON::BI__builtin_neon_vgetq_lane_i8:
6257  case NEON::BI__builtin_neon_vgetq_lane_i16:
6258  case NEON::BI__builtin_neon_vgetq_lane_i32:
6259  case NEON::BI__builtin_neon_vgetq_lane_i64:
6260  case NEON::BI__builtin_neon_vgetq_lane_f32:
6261  return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
6262 
6263  case NEON::BI__builtin_neon_vrndns_f32: {
6264  Value *Arg = EmitScalarExpr(E->getArg(0));
6265  llvm::Type *Tys[] = {Arg->getType()};
6266  Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vrintn, Tys);
6267  return Builder.CreateCall(F, {Arg}, "vrndn"); }
6268 
6269  case NEON::BI__builtin_neon_vset_lane_i8:
6270  case NEON::BI__builtin_neon_vset_lane_i16:
6271  case NEON::BI__builtin_neon_vset_lane_i32:
6272  case NEON::BI__builtin_neon_vset_lane_i64:
6273  case NEON::BI__builtin_neon_vset_lane_f32:
6274  case NEON::BI__builtin_neon_vsetq_lane_i8:
6275  case NEON::BI__builtin_neon_vsetq_lane_i16:
6276  case NEON::BI__builtin_neon_vsetq_lane_i32:
6277  case NEON::BI__builtin_neon_vsetq_lane_i64:
6278  case NEON::BI__builtin_neon_vsetq_lane_f32:
6279  return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
6280 
6281  case NEON::BI__builtin_neon_vsha1h_u32:
6282  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
6283  "vsha1h");
6284  case NEON::BI__builtin_neon_vsha1cq_u32:
6285  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
6286  "vsha1h");
6287  case NEON::BI__builtin_neon_vsha1pq_u32:
6288  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
6289  "vsha1h");
6290  case NEON::BI__builtin_neon_vsha1mq_u32:
6291  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
6292  "vsha1h");
6293 
6294  // The ARM _MoveToCoprocessor builtins put the input register value as
6295  // the first argument, but the LLVM intrinsic expects it as the third one.
6296  case ARM::BI_MoveToCoprocessor:
6297  case ARM::BI_MoveToCoprocessor2: {
6298  Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ?
6299  Intrinsic::arm_mcr : Intrinsic::arm_mcr2);
6300  return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
6301  Ops[3], Ops[4], Ops[5]});
6302  }
6303  case ARM::BI_BitScanForward:
6304  case ARM::BI_BitScanForward64:
6305  return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
6306  case ARM::BI_BitScanReverse:
6307  case ARM::BI_BitScanReverse64:
6308  return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
6309 
6310  case ARM::BI_InterlockedAnd64:
6311  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
6312  case ARM::BI_InterlockedExchange64:
6313  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
6314  case ARM::BI_InterlockedExchangeAdd64:
6315  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
6316  case ARM::BI_InterlockedExchangeSub64:
6317  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
6318  case ARM::BI_InterlockedOr64:
6319  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
6320  case ARM::BI_InterlockedXor64:
6321  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
6322  case ARM::BI_InterlockedDecrement64:
6323  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
6324  case ARM::BI_InterlockedIncrement64:
6325  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
6326  case ARM::BI_InterlockedExchangeAdd8_acq:
6327  case ARM::BI_InterlockedExchangeAdd16_acq:
6328  case ARM::BI_InterlockedExchangeAdd_acq:
6329  case ARM::BI_InterlockedExchangeAdd64_acq:
6330  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_acq, E);
6331  case ARM::BI_InterlockedExchangeAdd8_rel:
6332  case ARM::BI_InterlockedExchangeAdd16_rel:
6333  case ARM::BI_InterlockedExchangeAdd_rel:
6334  case ARM::BI_InterlockedExchangeAdd64_rel:
6335  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_rel, E);
6336  case ARM::BI_InterlockedExchangeAdd8_nf:
6337  case ARM::BI_InterlockedExchangeAdd16_nf:
6338  case ARM::BI_InterlockedExchangeAdd_nf:
6339  case ARM::BI_InterlockedExchangeAdd64_nf:
6340  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_nf, E);
6341  case ARM::BI_InterlockedExchange8_acq:
6342  case ARM::BI_InterlockedExchange16_acq:
6343  case ARM::BI_InterlockedExchange_acq:
6344  case ARM::BI_InterlockedExchange64_acq:
6345  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_acq, E);
6346  case ARM::BI_InterlockedExchange8_rel:
6347  case ARM::BI_InterlockedExchange16_rel:
6348  case ARM::BI_InterlockedExchange_rel:
6349  case ARM::BI_InterlockedExchange64_rel:
6350  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_rel, E);
6351  case ARM::BI_InterlockedExchange8_nf:
6352  case ARM::BI_InterlockedExchange16_nf:
6353  case ARM::BI_InterlockedExchange_nf:
6354  case ARM::BI_InterlockedExchange64_nf:
6355  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_nf, E);
6356  case ARM::BI_InterlockedCompareExchange8_acq:
6357  case ARM::BI_InterlockedCompareExchange16_acq:
6358  case ARM::BI_InterlockedCompareExchange_acq:
6359  case ARM::BI_InterlockedCompareExchange64_acq:
6360  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_acq, E);
6361  case ARM::BI_InterlockedCompareExchange8_rel:
6362  case ARM::BI_InterlockedCompareExchange16_rel:
6363  case ARM::BI_InterlockedCompareExchange_rel:
6364  case ARM::BI_InterlockedCompareExchange64_rel:
6365  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_rel, E);
6366  case ARM::BI_InterlockedCompareExchange8_nf:
6367  case ARM::BI_InterlockedCompareExchange16_nf:
6368  case ARM::BI_InterlockedCompareExchange_nf:
6369  case ARM::BI_InterlockedCompareExchange64_nf:
6370  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_nf, E);
6371  case ARM::BI_InterlockedOr8_acq:
6372  case ARM::BI_InterlockedOr16_acq:
6373  case ARM::BI_InterlockedOr_acq:
6374  case ARM::BI_InterlockedOr64_acq:
6375  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_acq, E);
6376  case ARM::BI_InterlockedOr8_rel:
6377  case ARM::BI_InterlockedOr16_rel:
6378  case ARM::BI_InterlockedOr_rel:
6379  case ARM::BI_InterlockedOr64_rel:
6380  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_rel, E);
6381  case ARM::BI_InterlockedOr8_nf:
6382  case ARM::BI_InterlockedOr16_nf:
6383  case ARM::BI_InterlockedOr_nf:
6384  case ARM::BI_InterlockedOr64_nf:
6385  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_nf, E);
6386  case ARM::BI_InterlockedXor8_acq:
6387  case ARM::BI_InterlockedXor16_acq:
6388  case ARM::BI_InterlockedXor_acq:
6389  case ARM::BI_InterlockedXor64_acq:
6390  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_acq, E);
6391  case ARM::BI_InterlockedXor8_rel:
6392  case ARM::BI_InterlockedXor16_rel:
6393  case ARM::BI_InterlockedXor_rel:
6394  case ARM::BI_InterlockedXor64_rel:
6395  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_rel, E);
6396  case ARM::BI_InterlockedXor8_nf:
6397  case ARM::BI_InterlockedXor16_nf:
6398  case ARM::BI_InterlockedXor_nf:
6399  case ARM::BI_InterlockedXor64_nf:
6400  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_nf, E);
6401  case ARM::BI_InterlockedAnd8_acq:
6402  case ARM::BI_InterlockedAnd16_acq:
6403  case ARM::BI_InterlockedAnd_acq:
6404  case ARM::BI_InterlockedAnd64_acq:
6405  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_acq, E);
6406  case ARM::BI_InterlockedAnd8_rel:
6407  case ARM::BI_InterlockedAnd16_rel:
6408  case ARM::BI_InterlockedAnd_rel:
6409  case ARM::BI_InterlockedAnd64_rel:
6410  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_rel, E);
6411  case ARM::BI_InterlockedAnd8_nf:
6412  case ARM::BI_InterlockedAnd16_nf:
6413  case ARM::BI_InterlockedAnd_nf:
6414  case ARM::BI_InterlockedAnd64_nf:
6415  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_nf, E);
6416  case ARM::BI_InterlockedIncrement16_acq:
6417  case ARM::BI_InterlockedIncrement_acq:
6418  case ARM::BI_InterlockedIncrement64_acq:
6419  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_acq, E);
6420  case ARM::BI_InterlockedIncrement16_rel:
6421  case ARM::BI_InterlockedIncrement_rel:
6422  case ARM::BI_InterlockedIncrement64_rel:
6423  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_rel, E);
6424  case ARM::BI_InterlockedIncrement16_nf:
6425  case ARM::BI_InterlockedIncrement_nf:
6426  case ARM::BI_InterlockedIncrement64_nf:
6427  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_nf, E);
6428  case ARM::BI_InterlockedDecrement16_acq:
6429  case ARM::BI_InterlockedDecrement_acq:
6430  case ARM::BI_InterlockedDecrement64_acq:
6431  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_acq, E);
6432  case ARM::BI_InterlockedDecrement16_rel:
6433  case ARM::BI_InterlockedDecrement_rel:
6434  case ARM::BI_InterlockedDecrement64_rel:
6435  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_rel, E);
6436  case ARM::BI_InterlockedDecrement16_nf:
6437  case ARM::BI_InterlockedDecrement_nf:
6438  case ARM::BI_InterlockedDecrement64_nf:
6439  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_nf, E);
6440  }
6441 
6442  // Get the last argument, which specifies the vector type.
6443  assert(HasExtraArg);
6444  llvm::APSInt Result;
6445  const Expr *Arg = E->getArg(E->getNumArgs()-1);
6446  if (!Arg->isIntegerConstantExpr(Result, getContext()))
6447  return nullptr;
6448 
6449  if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f ||
6450  BuiltinID == ARM::BI__builtin_arm_vcvtr_d) {
6451  // Determine the overloaded type of this builtin.
6452  llvm::Type *Ty;
6453  if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f)
6454  Ty = FloatTy;
6455  else
6456  Ty = DoubleTy;
6457 
6458  // Determine whether this is an unsigned conversion or not.
6459  bool usgn = Result.getZExtValue() == 1;
6460  unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
6461 
6462  // Call the appropriate intrinsic.
6463  Function *F = CGM.getIntrinsic(Int, Ty);
6464  return Builder.CreateCall(F, Ops, "vcvtr");
6465  }
6466 
6467  // Determine the type of this overloaded NEON intrinsic.
6468  NeonTypeFlags Type(Result.getZExtValue());
6469  bool usgn = Type.isUnsigned();
6470  bool rightShift = false;
6471 
6472  llvm::VectorType *VTy = GetNeonType(this, Type,
6473  getTarget().hasLegalHalfType());
6474  llvm::Type *Ty = VTy;
6475  if (!Ty)
6476  return nullptr;
6477 
6478  // Many NEON builtins have identical semantics and uses in ARM and
6479  // AArch64. Emit these in a single function.
6480  auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap);
6481  const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
6482  IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
6483  if (Builtin)
6484  return EmitCommonNeonBuiltinExpr(
6485  Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
6486  Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1, Arch);
6487 
6488  unsigned Int;
6489  switch (BuiltinID) {
6490  default: return nullptr;
6491  case NEON::BI__builtin_neon_vld1q_lane_v:
6492  // Handle 64-bit integer elements as a special case. Use shuffles of
6493  // one-element vectors to avoid poor code for i64 in the backend.
6494  if (VTy->getElementType()->isIntegerTy(64)) {
6495  // Extract the other lane.
6496  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6497  uint32_t Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
6498  Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
6499  Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
6500  // Load the value as a one-element vector.
6501  Ty = llvm::VectorType::get(VTy->getElementType(), 1);
6502  llvm::Type *Tys[] = {Ty, Int8PtrTy};
6503  Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
6504  Value *Align = getAlignmentValue32(PtrOp0);
6505  Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
6506  // Combine them.
6507  uint32_t Indices[] = {1 - Lane, Lane};
6508  SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
6509  return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane");
6510  }
6511  LLVM_FALLTHROUGH;
6512  case NEON::BI__builtin_neon_vld1_lane_v: {
6513  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6514  PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType());
6515  Value *Ld = Builder.CreateLoad(PtrOp0);
6516  return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
6517  }
6518  case NEON::BI__builtin_neon_vqrshrn_n_v:
6519  Int =
6520  usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
6521  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
6522  1, true);
6523  case NEON::BI__builtin_neon_vqrshrun_n_v:
6524  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
6525  Ops, "vqrshrun_n", 1, true);
6526  case NEON::BI__builtin_neon_vqshrn_n_v:
6527  Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
6528  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
6529  1, true);
6530  case NEON::BI__builtin_neon_vqshrun_n_v:
6531  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
6532  Ops, "vqshrun_n", 1, true);
6533  case NEON::BI__builtin_neon_vrecpe_v:
6534  case NEON::BI__builtin_neon_vrecpeq_v:
6535  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
6536  Ops, "vrecpe");
6537  case NEON::BI__builtin_neon_vrshrn_n_v:
6538  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
6539  Ops, "vrshrn_n", 1, true);
6540  case NEON::BI__builtin_neon_vrsra_n_v:
6541  case NEON::BI__builtin_neon_vrsraq_n_v:
6542  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6543  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6544  Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
6545  Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
6546  Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
6547  return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
6548  case NEON::BI__builtin_neon_vsri_n_v:
6549  case NEON::BI__builtin_neon_vsriq_n_v:
6550  rightShift = true;
6551  LLVM_FALLTHROUGH;
6552  case NEON::BI__builtin_neon_vsli_n_v:
6553  case NEON::BI__builtin_neon_vsliq_n_v:
6554  Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
6555  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
6556  Ops, "vsli_n");
6557  case NEON::BI__builtin_neon_vsra_n_v:
6558  case NEON::BI__builtin_neon_vsraq_n_v:
6559  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6560  Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
6561  return Builder.CreateAdd(Ops[0], Ops[1]);
6562  case NEON::BI__builtin_neon_vst1q_lane_v:
6563  // Handle 64-bit integer elements as a special case. Use a shuffle to get
6564  // a one-element vector and avoid poor code for i64 in the backend.
6565  if (VTy->getElementType()->isIntegerTy(64)) {
6566  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6567  Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
6568  Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
6569  Ops[2] = getAlignmentValue32(PtrOp0);
6570  llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
6571  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
6572  Tys), Ops);
6573  }
6574  LLVM_FALLTHROUGH;
6575  case NEON::BI__builtin_neon_vst1_lane_v: {
6576  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6577  Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
6578  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6579  auto St = Builder.CreateStore(Ops[1], Builder.CreateBitCast(PtrOp0, Ty));
6580  return St;
6581  }
6582  case NEON::BI__builtin_neon_vtbl1_v:
6583  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
6584  Ops, "vtbl1");
6585  case NEON::BI__builtin_neon_vtbl2_v:
6586  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
6587  Ops, "vtbl2");
6588  case NEON::BI__builtin_neon_vtbl3_v:
6589  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
6590  Ops, "vtbl3");
6591  case NEON::BI__builtin_neon_vtbl4_v:
6592  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
6593  Ops, "vtbl4");
6594  case NEON::BI__builtin_neon_vtbx1_v:
6595  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
6596  Ops, "vtbx1");
6597  case NEON::BI__builtin_neon_vtbx2_v:
6598  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
6599  Ops, "vtbx2");
6600  case NEON::BI__builtin_neon_vtbx3_v:
6601  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
6602  Ops, "vtbx3");
6603  case NEON::BI__builtin_neon_vtbx4_v:
6604  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
6605  Ops, "vtbx4");
6606  }
6607 }
6608 
6609 static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
6610  const CallExpr *E,
6612  llvm::Triple::ArchType Arch) {
6613  unsigned int Int = 0;
6614  const char *s = nullptr;
6615 
6616  switch (BuiltinID) {
6617  default:
6618  return nullptr;
6619  case NEON::BI__builtin_neon_vtbl1_v:
6620  case NEON::BI__builtin_neon_vqtbl1_v:
6621  case NEON::BI__builtin_neon_vqtbl1q_v:
6622  case NEON::BI__builtin_neon_vtbl2_v:
6623  case NEON::BI__builtin_neon_vqtbl2_v:
6624  case NEON::BI__builtin_neon_vqtbl2q_v:
6625  case NEON::BI__builtin_neon_vtbl3_v:
6626  case NEON::BI__builtin_neon_vqtbl3_v:
6627  case NEON::BI__builtin_neon_vqtbl3q_v:
6628  case NEON::BI__builtin_neon_vtbl4_v:
6629  case NEON::BI__builtin_neon_vqtbl4_v:
6630  case NEON::BI__builtin_neon_vqtbl4q_v:
6631  break;
6632  case NEON::BI__builtin_neon_vtbx1_v:
6633  case NEON::BI__builtin_neon_vqtbx1_v:
6634  case NEON::BI__builtin_neon_vqtbx1q_v:
6635  case NEON::BI__builtin_neon_vtbx2_v:
6636  case NEON::BI__builtin_neon_vqtbx2_v:
6637  case NEON::BI__builtin_neon_vqtbx2q_v:
6638  case NEON::BI__builtin_neon_vtbx3_v:
6639  case NEON::BI__builtin_neon_vqtbx3_v:
6640  case NEON::BI__builtin_neon_vqtbx3q_v:
6641  case NEON::BI__builtin_neon_vtbx4_v:
6642  case NEON::BI__builtin_neon_vqtbx4_v:
6643  case NEON::BI__builtin_neon_vqtbx4q_v:
6644  break;
6645  }
6646 
6647  assert(E->getNumArgs() >= 3);
6648 
6649  // Get the last argument, which specifies the vector type.
6650  llvm::APSInt Result;
6651  const Expr *Arg = E->getArg(E->getNumArgs() - 1);
6652  if (!Arg->isIntegerConstantExpr(Result, CGF.getContext()))
6653  return nullptr;
6654 
6655  // Determine the type of this overloaded NEON intrinsic.
6656  NeonTypeFlags Type(Result.getZExtValue());
6657  llvm::VectorType *Ty = GetNeonType(&CGF, Type);
6658  if (!Ty)
6659  return nullptr;
6660 
6661  CodeGen::CGBuilderTy &Builder = CGF.Builder;
6662 
6663  // AArch64 scalar builtins are not overloaded, they do not have an extra
6664  // argument that specifies the vector type, need to handle each case.
6665  switch (BuiltinID) {
6666  case NEON::BI__builtin_neon_vtbl1_v: {
6667  return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 1), nullptr,
6668  Ops[1], Ty, Intrinsic::aarch64_neon_tbl1,
6669  "vtbl1");
6670  }
6671  case NEON::BI__builtin_neon_vtbl2_v: {
6672  return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 2), nullptr,
6673  Ops[2], Ty, Intrinsic::aarch64_neon_tbl1,
6674  "vtbl1");
6675  }
6676  case NEON::BI__builtin_neon_vtbl3_v: {
6677  return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 3), nullptr,
6678  Ops[3], Ty, Intrinsic::aarch64_neon_tbl2,
6679  "vtbl2");
6680  }
6681  case NEON::BI__builtin_neon_vtbl4_v: {
6682  return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 4), nullptr,
6683  Ops[4], Ty, Intrinsic::aarch64_neon_tbl2,
6684  "vtbl2");
6685  }
6686  case NEON::BI__builtin_neon_vtbx1_v: {
6687  Value *TblRes =
6688  packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 1), nullptr, Ops[2],
6689  Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
6690 
6691  llvm::Constant *EightV = ConstantInt::get(Ty, 8);
6692  Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
6693  CmpRes = Builder.CreateSExt(CmpRes, Ty);
6694 
6695  Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
6696  Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
6697  return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
6698  }
6699  case NEON::BI__builtin_neon_vtbx2_v: {
6700  return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 2), Ops[0],
6701  Ops[3], Ty, Intrinsic::aarch64_neon_tbx1,
6702  "vtbx1");
6703  }
6704  case NEON::BI__builtin_neon_vtbx3_v: {
6705  Value *TblRes =
6706  packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 3), nullptr, Ops[4],
6707  Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
6708 
6709  llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
6710  Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
6711  TwentyFourV);
6712  CmpRes = Builder.CreateSExt(CmpRes, Ty);
6713 
6714  Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
6715  Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
6716  return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
6717  }
6718  case NEON::BI__builtin_neon_vtbx4_v: {
6719  return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 4), Ops[0],
6720  Ops[5], Ty, Intrinsic::aarch64_neon_tbx2,
6721  "vtbx2");
6722  }
6723  case NEON::BI__builtin_neon_vqtbl1_v:
6724  case NEON::BI__builtin_neon_vqtbl1q_v:
6725  Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
6726  case NEON::BI__builtin_neon_vqtbl2_v:
6727  case NEON::BI__builtin_neon_vqtbl2q_v: {
6728  Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
6729  case NEON::BI__builtin_neon_vqtbl3_v:
6730  case NEON::BI__builtin_neon_vqtbl3q_v:
6731  Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
6732  case NEON::BI__builtin_neon_vqtbl4_v:
6733  case NEON::BI__builtin_neon_vqtbl4q_v:
6734  Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
6735  case NEON::BI__builtin_neon_vqtbx1_v:
6736  case NEON::BI__builtin_neon_vqtbx1q_v:
6737  Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
6738  case NEON::BI__builtin_neon_vqtbx2_v:
6739  case NEON::BI__builtin_neon_vqtbx2q_v:
6740  Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
6741  case NEON::BI__builtin_neon_vqtbx3_v:
6742  case NEON::BI__builtin_neon_vqtbx3q_v:
6743  Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
6744  case NEON::BI__builtin_neon_vqtbx4_v:
6745  case NEON::BI__builtin_neon_vqtbx4q_v:
6746  Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
6747  }
6748  }
6749 
6750  if (!Int)
6751  return nullptr;
6752 
6753  Function *F = CGF.CGM.getIntrinsic(Int, Ty);
6754  return CGF.EmitNeonCall(F, Ops, s);
6755 }
6756 
6758  llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4);
6759  Op = Builder.CreateBitCast(Op, Int16Ty);
6760  Value *V = UndefValue::get(VTy);
6761  llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
6762  Op = Builder.CreateInsertElement(V, Op, CI);
6763  return Op;
6764 }
6765 
6767  const CallExpr *E,
6768  llvm::Triple::ArchType Arch) {
6769  unsigned HintID = static_cast<unsigned>(-1);
6770  switch (BuiltinID) {
6771  default: break;
6772  case AArch64::BI__builtin_arm_nop:
6773  HintID = 0;
6774  break;
6775  case AArch64::BI__builtin_arm_yield:
6776  case AArch64::BI__yield:
6777  HintID = 1;
6778  break;
6779  case AArch64::BI__builtin_arm_wfe:
6780  case AArch64::BI__wfe:
6781  HintID = 2;
6782  break;
6783  case AArch64::BI__builtin_arm_wfi:
6784  case AArch64::BI__wfi:
6785  HintID = 3;
6786  break;
6787  case AArch64::BI__builtin_arm_sev:
6788  case AArch64::BI__sev:
6789  HintID = 4;
6790  break;
6791  case AArch64::BI__builtin_arm_sevl:
6792  case AArch64::BI__sevl:
6793  HintID = 5;
6794  break;
6795  }
6796 
6797  if (HintID != static_cast<unsigned>(-1)) {
6798  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
6799  return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
6800  }
6801 
6802  if (BuiltinID == AArch64::BI__builtin_arm_prefetch) {
6803  Value *Address = EmitScalarExpr(E->getArg(0));
6804  Value *RW = EmitScalarExpr(E->getArg(1));
6805  Value *CacheLevel = EmitScalarExpr(E->getArg(2));
6806  Value *RetentionPolicy = EmitScalarExpr(E->getArg(3));
6807  Value *IsData = EmitScalarExpr(E->getArg(4));
6808 
6809  Value *Locality = nullptr;
6810  if (cast<llvm::ConstantInt>(RetentionPolicy)->isZero()) {
6811  // Temporal fetch, needs to convert cache level to locality.
6812  Locality = llvm::ConstantInt::get(Int32Ty,
6813  -cast<llvm::ConstantInt>(CacheLevel)->getValue() + 3);
6814  } else {
6815  // Streaming fetch.
6816  Locality = llvm::ConstantInt::get(Int32Ty, 0);
6817  }
6818 
6819  // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify
6820  // PLDL3STRM or PLDL2STRM.
6821  Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
6822  return Builder.CreateCall(F, {Address, RW, Locality, IsData});
6823  }
6824 
6825  if (BuiltinID == AArch64::BI__builtin_arm_rbit) {
6826  assert((getContext().getTypeSize(E->getType()) == 32) &&
6827  "rbit of unusual size!");
6828  llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
6829  return Builder.CreateCall(
6830  CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
6831  }
6832  if (BuiltinID == AArch64::BI__builtin_arm_rbit64) {
6833  assert((getContext().getTypeSize(E->getType()) == 64) &&
6834  "rbit of unusual size!");
6835  llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
6836  return Builder.CreateCall(
6837  CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
6838  }
6839 
6840  if (BuiltinID == AArch64::BI__clear_cache) {
6841  assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
6842  const FunctionDecl *FD = E->getDirectCallee();
6843  Value *Ops[2];
6844  for (unsigned i = 0; i < 2; i++)
6845  Ops[i] = EmitScalarExpr(E->getArg(i));
6846  llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
6847  llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
6848  StringRef Name = FD->getName();
6849  return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
6850  }
6851 
6852  if ((BuiltinID == AArch64::BI__builtin_arm_ldrex ||
6853  BuiltinID == AArch64::BI__builtin_arm_ldaex) &&
6854  getContext().getTypeSize(E->getType()) == 128) {
6855  Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
6856  ? Intrinsic::aarch64_ldaxp
6857  : Intrinsic::aarch64_ldxp);
6858 
6859  Value *LdPtr = EmitScalarExpr(E->getArg(0));
6860  Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
6861  "ldxp");
6862 
6863  Value *Val0 = Builder.CreateExtractValue(Val, 1);
6864  Value *Val1 = Builder.CreateExtractValue(Val, 0);
6865  llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
6866  Val0 = Builder.CreateZExt(Val0, Int128Ty);
6867  Val1 = Builder.CreateZExt(Val1, Int128Ty);
6868 
6869  Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
6870  Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
6871  Val = Builder.CreateOr(Val, Val1);
6872  return Builder.CreateBitCast(Val, ConvertType(E->getType()));
6873  } else if (BuiltinID == AArch64::BI__builtin_arm_ldrex ||
6874  BuiltinID == AArch64::BI__builtin_arm_ldaex) {
6875  Value *LoadAddr = EmitScalarExpr(E->getArg(0));
6876 
6877  QualType Ty = E->getType();
6878  llvm::Type *RealResTy = ConvertType(Ty);
6879  llvm::Type *PtrTy = llvm::IntegerType::get(
6880  getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo();
6881  LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy);
6882 
6883  Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
6884  ? Intrinsic::aarch64_ldaxr
6885  : Intrinsic::aarch64_ldxr,
6886  PtrTy);
6887  Value *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
6888 
6889  if (RealResTy->isPointerTy())
6890  return Builder.CreateIntToPtr(Val, RealResTy);
6891 
6892  llvm::Type *IntResTy = llvm::IntegerType::get(
6893  getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
6894  Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
6895  return Builder.CreateBitCast(Val, RealResTy);
6896  }
6897 
6898  if ((BuiltinID == AArch64::BI__builtin_arm_strex ||
6899  BuiltinID == AArch64::BI__builtin_arm_stlex) &&
6900  getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
6901  Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
6902  ? Intrinsic::aarch64_stlxp
6903  : Intrinsic::aarch64_stxp);
6904  llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty);
6905 
6906  Address Tmp = CreateMemTemp(E->getArg(0)->getType());
6907  EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
6908 
6909  Tmp = Builder.CreateBitCast(Tmp, llvm::PointerType::getUnqual(STy));
6910  llvm::Value *Val = Builder.CreateLoad(Tmp);
6911 
6912  Value *Arg0 = Builder.CreateExtractValue(Val, 0);
6913  Value *Arg1 = Builder.CreateExtractValue(Val, 1);
6914  Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)),
6915  Int8PtrTy);
6916  return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
6917  }
6918 
6919  if (BuiltinID == AArch64::BI__builtin_arm_strex ||
6920  BuiltinID == AArch64::BI__builtin_arm_stlex) {
6921  Value *StoreVal = EmitScalarExpr(E->getArg(0));
6922  Value *StoreAddr = EmitScalarExpr(E->getArg(1));
6923 
6924  QualType Ty = E->getArg(0)->getType();
6925  llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
6926  getContext().getTypeSize(Ty));
6927  StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
6928 
6929  if (StoreVal->getType()->isPointerTy())
6930  StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
6931  else {
6932  llvm::Type *IntTy = llvm::IntegerType::get(
6933  getLLVMContext(),
6934  CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
6935  StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
6936  StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
6937  }
6938 
6939  Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
6940  ? Intrinsic::aarch64_stlxr
6941  : Intrinsic::aarch64_stxr,
6942  StoreAddr->getType());
6943  return Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
6944  }
6945 
6946  if (BuiltinID == AArch64::BI__getReg) {
6947  Expr::EvalResult Result;
6948  if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
6949  llvm_unreachable("Sema will ensure that the parameter is constant");
6950 
6951  llvm::APSInt Value = Result.Val.getInt();
6952  LLVMContext &Context = CGM.getLLVMContext();
6953  std::string Reg = Value == 31 ? "sp" : "x" + Value.toString(10);
6954 
6955  llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)};
6956  llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
6957  llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
6958 
6959  llvm::Value *F =
6960  CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
6961  return Builder.CreateCall(F, Metadata);
6962  }
6963 
6964  if (BuiltinID == AArch64::BI__builtin_arm_clrex) {
6965  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
6966  return Builder.CreateCall(F);
6967  }
6968 
6969  if (BuiltinID == AArch64::BI_ReadWriteBarrier)
6970  return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
6971  llvm::SyncScope::SingleThread);
6972 
6973  // CRC32
6974  Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
6975  switch (BuiltinID) {
6976  case AArch64::BI__builtin_arm_crc32b:
6977  CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
6978  case AArch64::BI__builtin_arm_crc32cb:
6979  CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
6980  case AArch64::BI__builtin_arm_crc32h:
6981  CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
6982  case AArch64::BI__builtin_arm_crc32ch:
6983  CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
6984  case AArch64::BI__builtin_arm_crc32w:
6985  CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
6986  case AArch64::BI__builtin_arm_crc32cw:
6987  CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
6988  case AArch64::BI__builtin_arm_crc32d:
6989  CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
6990  case AArch64::BI__builtin_arm_crc32cd:
6991  CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
6992  }
6993 
6994  if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
6995  Value *Arg0 = EmitScalarExpr(E->getArg(0));
6996  Value *Arg1 = EmitScalarExpr(E->getArg(1));
6997  Function *F = CGM.getIntrinsic(CRCIntrinsicID);
6998 
6999  llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
7000  Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
7001 
7002  return Builder.CreateCall(F, {Arg0, Arg1});
7003  }
7004 
7005  if (BuiltinID == AArch64::BI__builtin_arm_rsr ||
7006  BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
7007  BuiltinID == AArch64::BI__builtin_arm_rsrp ||
7008  BuiltinID == AArch64::BI__builtin_arm_wsr ||
7009  BuiltinID == AArch64::BI__builtin_arm_wsr64 ||
7010  BuiltinID == AArch64::BI__builtin_arm_wsrp) {
7011 
7012  bool IsRead = BuiltinID == AArch64::BI__builtin_arm_rsr ||
7013  BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
7014  BuiltinID == AArch64::BI__builtin_arm_rsrp;
7015 
7016  bool IsPointerBuiltin = BuiltinID == AArch64::BI__builtin_arm_rsrp ||
7017  BuiltinID == AArch64::BI__builtin_arm_wsrp;
7018 
7019  bool Is64Bit = BuiltinID != AArch64::BI__builtin_arm_rsr &&
7020  BuiltinID != AArch64::BI__builtin_arm_wsr;
7021 
7022  llvm::Type *ValueType;
7023  llvm::Type *RegisterType = Int64Ty;
7024  if (IsPointerBuiltin) {
7025  ValueType = VoidPtrTy;
7026  } else if (Is64Bit) {
7027  ValueType = Int64Ty;
7028  } else {
7029  ValueType = Int32Ty;
7030  }
7031 
7032  return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
7033  }
7034 
7035  if (BuiltinID == AArch64::BI_ReadStatusReg ||
7036  BuiltinID == AArch64::BI_WriteStatusReg) {
7037  LLVMContext &Context = CGM.getLLVMContext();
7038 
7039  unsigned SysReg =
7040  E->getArg(0)->EvaluateKnownConstInt(getContext()).getZExtValue();
7041 
7042  std::string SysRegStr;
7043  llvm::raw_string_ostream(SysRegStr) <<
7044  ((1 << 1) | ((SysReg >> 14) & 1)) << ":" <<
7045  ((SysReg >> 11) & 7) << ":" <<
7046  ((SysReg >> 7) & 15) << ":" <<
7047  ((SysReg >> 3) & 15) << ":" <<
7048  ( SysReg & 7);
7049 
7050  llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) };
7051  llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
7052  llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
7053 
7054  llvm::Type *RegisterType = Int64Ty;
7055  llvm::Type *ValueType = Int32Ty;
7056  llvm::Type *Types[] = { RegisterType };
7057 
7058  if (BuiltinID == AArch64::BI_ReadStatusReg) {
7059  llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
7060  llvm::Value *Call = Builder.CreateCall(F, Metadata);
7061 
7062  return Builder.CreateTrunc(Call, ValueType);
7063  }
7064 
7065  llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
7066  llvm::Value *ArgValue = EmitScalarExpr(E->getArg(1));
7067  ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
7068 
7069  return Builder.CreateCall(F, { Metadata, ArgValue });
7070  }
7071 
7072  if (BuiltinID == AArch64::BI_AddressOfReturnAddress) {
7073  llvm::Value *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress);
7074  return Builder.CreateCall(F);
7075  }
7076 
7077  // Find out if any arguments are required to be integer constant
7078  // expressions.
7079  unsigned ICEArguments = 0;
7081  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
7082  assert(Error == ASTContext::GE_None && "Should not codegen an error");
7083 
7085  for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
7086  if ((ICEArguments & (1 << i)) == 0) {
7087  Ops.push_back(EmitScalarExpr(E->getArg(i)));
7088  } else {
7089  // If this is required to be a constant, constant fold it so that we know
7090  // that the generated intrinsic gets a ConstantInt.
7091  llvm::APSInt Result;
7092  bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
7093  assert(IsConst && "Constant arg isn't actually constant?");
7094  (void)IsConst;
7095  Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
7096  }
7097  }
7098 
7099  auto SISDMap = makeArrayRef(AArch64SISDIntrinsicMap);
7100  const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
7101  SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
7102 
7103  if (Builtin) {
7104  Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
7105  Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
7106  assert(Result && "SISD intrinsic should have been handled");
7107  return Result;
7108  }
7109 
7110  llvm::APSInt Result;
7111  const Expr *Arg = E->getArg(E->getNumArgs()-1);
7112  NeonTypeFlags Type(0);
7113  if (Arg->isIntegerConstantExpr(Result, getContext()))
7114  // Determine the type of this overloaded NEON intrinsic.
7115  Type = NeonTypeFlags(Result.getZExtValue());
7116 
7117  bool usgn = Type.isUnsigned();
7118  bool quad = Type.isQuad();
7119 
7120  // Handle non-overloaded intrinsics first.
7121  switch (BuiltinID) {
7122  default: break;
7123  case NEON::BI__builtin_neon_vabsh_f16:
7124  Ops.push_back(EmitScalarExpr(E->getArg(0)));
7125  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs");
7126  case NEON::BI__builtin_neon_vldrq_p128: {
7127  llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
7128  llvm::Type *Int128PTy = llvm::PointerType::get(Int128Ty, 0);
7129  Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy);
7130  return Builder.CreateAlignedLoad(Int128Ty, Ptr,
7132  }
7133  case NEON::BI__builtin_neon_vstrq_p128: {
7134  llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
7135  Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy);
7136  return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
7137  }
7138  case NEON::BI__builtin_neon_vcvts_u32_f32:
7139  case NEON::BI__builtin_neon_vcvtd_u64_f64:
7140  usgn = true;
7141  LLVM_FALLTHROUGH;
7142  case NEON::BI__builtin_neon_vcvts_s32_f32:
7143  case NEON::BI__builtin_neon_vcvtd_s64_f64: {
7144  Ops.push_back(EmitScalarExpr(E->getArg(0)));
7145  bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
7146  llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
7147  llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
7148  Ops[0] = Builder.CreateBitCast(Ops[0], FTy);
7149  if (usgn)
7150  return Builder.CreateFPToUI(Ops[0], InTy);
7151  return Builder.CreateFPToSI(Ops[0], InTy);
7152  }
7153  case NEON::BI__builtin_neon_vcvts_f32_u32:
7154  case NEON::BI__builtin_neon_vcvtd_f64_u64:
7155  usgn = true;
7156  LLVM_FALLTHROUGH;
7157  case NEON::BI__builtin_neon_vcvts_f32_s32:
7158  case NEON::BI__builtin_neon_vcvtd_f64_s64: {
7159  Ops.push_back(EmitScalarExpr(E->getArg(0)));
7160  bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
7161  llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
7162  llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
7163  Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
7164  if (usgn)
7165  return Builder.CreateUIToFP(Ops[0], FTy);
7166  return Builder.CreateSIToFP(Ops[0], FTy);
7167  }
7168  case NEON::BI__builtin_neon_vcvth_f16_u16:
7169  case NEON::BI__builtin_neon_vcvth_f16_u32:
7170  case NEON::BI__builtin_neon_vcvth_f16_u64:
7171  usgn = true;
7172  LLVM_FALLTHROUGH;
7173  case NEON::BI__builtin_neon_vcvth_f16_s16:
7174  case NEON::BI__builtin_neon_vcvth_f16_s32:
7175  case NEON::BI__builtin_neon_vcvth_f16_s64: {
7176  Ops.push_back(EmitScalarExpr(E->getArg(0)));
7177  llvm::Type *FTy = HalfTy;
7178  llvm::Type *InTy;
7179  if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64)
7180  InTy = Int64Ty;
7181  else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32)
7182  InTy = Int32Ty;
7183  else
7184  InTy = Int16Ty;
7185  Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
7186  if (usgn)
7187  return Builder.CreateUIToFP(Ops[0], FTy);
7188  return Builder.CreateSIToFP(Ops[0], FTy);
7189  }
7190  case NEON::BI__builtin_neon_vcvth_u16_f16:
7191  usgn = true;
7192  LLVM_FALLTHROUGH;
7193  case NEON::BI__builtin_neon_vcvth_s16_f16: {
7194  Ops.push_back(EmitScalarExpr(E->getArg(0)));
7195  Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
7196  if (usgn)
7197  return Builder.CreateFPToUI(Ops[0], Int16Ty);
7198  return Builder.CreateFPToSI(Ops[0], Int16Ty);
7199  }
7200  case NEON::BI__builtin_neon_vcvth_u32_f16:
7201  usgn = true;
7202  LLVM_FALLTHROUGH;
7203  case NEON::BI__builtin_neon_vcvth_s32_f16: {
7204  Ops.push_back(EmitScalarExpr(E->getArg(0)));
7205  Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
7206  if (usgn)
7207  return Builder.CreateFPToUI(Ops[0], Int32Ty);
7208  return Builder.CreateFPToSI(Ops[0], Int32Ty);
7209  }
7210  case NEON::BI__builtin_neon_vcvth_u64_f16:
7211  usgn = true;
7212  LLVM_FALLTHROUGH;
7213  case NEON::BI__builtin_neon_vcvth_s64_f16: {
7214  Ops.push_back(EmitScalarExpr(E->getArg(0)));
7215  Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
7216  if (usgn)
7217  return Builder.CreateFPToUI(Ops[0], Int64Ty);
7218  return Builder.CreateFPToSI(Ops[0], Int64Ty);
7219  }
7220  case NEON::BI__builtin_neon_vcvtah_u16_f16:
7221  case NEON::BI__builtin_neon_vcvtmh_u16_f16:
7222  case NEON::BI__builtin_neon_vcvtnh_u16_f16:
7223  case NEON::BI__builtin_neon_vcvtph_u16_f16:
7224  case NEON::BI__builtin_neon_vcvtah_s16_f16:
7225  case NEON::BI__builtin_neon_vcvtmh_s16_f16:
7226  case NEON::BI__builtin_neon_vcvtnh_s16_f16:
7227  case NEON::BI__builtin_neon_vcvtph_s16_f16: {
7228  unsigned Int;
7229  llvm::Type* InTy = Int32Ty;
7230  llvm::Type* FTy = HalfTy;
7231  llvm::Type *Tys[2] = {InTy, FTy};
7232  Ops.push_back(EmitScalarExpr(E->getArg(0)));
7233  switch (BuiltinID) {
7234  default: llvm_unreachable("missing builtin ID in switch!");
7235  case NEON::BI__builtin_neon_vcvtah_u16_f16:
7236  Int = Intrinsic::aarch64_neon_fcvtau; break;
7237  case NEON::BI__builtin_neon_vcvtmh_u16_f16:
7238  Int = Intrinsic::aarch64_neon_fcvtmu; break;
7239  case NEON::BI__builtin_neon_vcvtnh_u16_f16:
7240  Int = Intrinsic::aarch64_neon_fcvtnu; break;
7241  case NEON::BI__builtin_neon_vcvtph_u16_f16:
7242  Int = Intrinsic::aarch64_neon_fcvtpu; break;
7243  case NEON::BI__builtin_neon_vcvtah_s16_f16:
7244  Int = Intrinsic::aarch64_neon_fcvtas; break;
7245  case NEON::BI__builtin_neon_vcvtmh_s16_f16:
7246  Int = Intrinsic::aarch64_neon_fcvtms; break;
7247  case NEON::BI__builtin_neon_vcvtnh_s16_f16:
7248  Int = Intrinsic::aarch64_neon_fcvtns; break;
7249  case NEON::BI__builtin_neon_vcvtph_s16_f16:
7250  Int = Intrinsic::aarch64_neon_fcvtps; break;
7251  }
7252  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
7253  return Builder.CreateTrunc(Ops[0], Int16Ty);
7254  }
7255  case NEON::BI__builtin_neon_vcaleh_f16:
7256  case NEON::BI__builtin_neon_vcalth_f16:
7257  case NEON::BI__builtin_neon_vcageh_f16:
7258  case NEON::BI__builtin_neon_vcagth_f16: {
7259  unsigned Int;
7260  llvm::Type* InTy = Int32Ty;
7261  llvm::Type* FTy = HalfTy;
7262  llvm::Type *Tys[2] = {InTy, FTy};
7263  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7264  switch (BuiltinID) {
7265  default: llvm_unreachable("missing builtin ID in switch!");
7266  case NEON::BI__builtin_neon_vcageh_f16:
7267  Int = Intrinsic::aarch64_neon_facge; break;
7268  case NEON::BI__builtin_neon_vcagth_f16:
7269  Int = Intrinsic::aarch64_neon_facgt; break;
7270  case NEON::BI__builtin_neon_vcaleh_f16:
7271  Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]); break;
7272  case NEON::BI__builtin_neon_vcalth_f16:
7273  Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]); break;
7274  }
7275  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "facg");
7276  return Builder.CreateTrunc(Ops[0], Int16Ty);
7277  }
7278  case NEON::BI__builtin_neon_vcvth_n_s16_f16:
7279  case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
7280  unsigned Int;
7281  llvm::Type* InTy = Int32Ty;
7282  llvm::Type* FTy = HalfTy;
7283  llvm::Type *Tys[2] = {InTy, FTy};
7284  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7285  switch (BuiltinID) {
7286  default: llvm_unreachable("missing builtin ID in switch!");
7287  case NEON::BI__builtin_neon_vcvth_n_s16_f16:
7288  Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break;
7289  case NEON::BI__builtin_neon_vcvth_n_u16_f16:
7290  Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break;
7291  }
7292  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
7293  return Builder.CreateTrunc(Ops[0], Int16Ty);
7294  }
7295  case NEON::BI__builtin_neon_vcvth_n_f16_s16:
7296  case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
7297  unsigned Int;
7298  llvm::Type* FTy = HalfTy;
7299  llvm::Type* InTy = Int32Ty;
7300  llvm::Type *Tys[2] = {FTy, InTy};
7301  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7302  switch (BuiltinID) {
7303  default: llvm_unreachable("missing builtin ID in switch!");
7304  case NEON::BI__builtin_neon_vcvth_n_f16_s16:
7305  Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
7306  Ops[0] = Builder.CreateSExt(Ops[0], InTy, "sext");
7307  break;
7308  case NEON::BI__builtin_neon_vcvth_n_f16_u16:
7309  Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
7310  Ops[0] = Builder.CreateZExt(Ops[0], InTy);
7311  break;
7312  }
7313  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
7314  }
7315  case NEON::BI__builtin_neon_vpaddd_s64: {
7316  llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2);
7317  Value *Vec = EmitScalarExpr(E->getArg(0));
7318  // The vector is v2f64, so make sure it's bitcast to that.
7319  Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
7320  llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
7321  llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
7322  Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
7323  Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
7324  // Pairwise addition of a v2f64 into a scalar f64.
7325  return Builder.CreateAdd(Op0, Op1, "vpaddd");
7326  }
7327  case NEON::BI__builtin_neon_vpaddd_f64: {
7328  llvm::Type *Ty =
7329  llvm::VectorType::get(DoubleTy, 2);
7330  Value *Vec = EmitScalarExpr(E->getArg(0));
7331  // The vector is v2f64, so make sure it's bitcast to that.
7332  Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
7333  llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
7334  llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
7335  Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
7336  Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
7337  // Pairwise addition of a v2f64 into a scalar f64.
7338  return Builder.CreateFAdd(Op0, Op1, "vpaddd");
7339  }
7340  case NEON::BI__builtin_neon_vpadds_f32: {
7341  llvm::Type *Ty =
7342  llvm::VectorType::get(FloatTy, 2);
7343  Value *Vec = EmitScalarExpr(E->getArg(0));
7344  // The vector is v2f32, so make sure it's bitcast to that.
7345  Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
7346  llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
7347  llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
7348  Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
7349  Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
7350  // Pairwise addition of a v2f32 into a scalar f32.
7351  return Builder.CreateFAdd(Op0, Op1, "vpaddd");
7352  }
7353  case NEON::BI__builtin_neon_vceqzd_s64:
7354  case NEON::BI__builtin_neon_vceqzd_f64:
7355  case NEON::BI__builtin_neon_vceqzs_f32:
7356  case NEON::BI__builtin_neon_vceqzh_f16:
7357  Ops.push_back(EmitScalarExpr(E->getArg(0)));
7358  return EmitAArch64CompareBuiltinExpr(
7359  Ops[0], ConvertType(E->getCallReturnType(getContext())),
7360  ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
7361  case NEON::BI__builtin_neon_vcgezd_s64:
7362  case NEON::BI__builtin_neon_vcgezd_f64:
7363  case NEON::BI__builtin_neon_vcgezs_f32:
7364  case NEON::BI__builtin_neon_vcgezh_f16:
7365  Ops.push_back(EmitScalarExpr(E->getArg(0)));
7366  return EmitAArch64CompareBuiltinExpr(
7367  Ops[0], ConvertType(E->getCallReturnType(getContext())),
7368  ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
7369  case NEON::BI__builtin_neon_vclezd_s64:
7370  case NEON::BI__builtin_neon_vclezd_f64:
7371  case NEON::BI__builtin_neon_vclezs_f32:
7372  case NEON::BI__builtin_neon_vclezh_f16:
7373  Ops.push_back(EmitScalarExpr(E->getArg(0)));
7374  return EmitAArch64CompareBuiltinExpr(
7375  Ops[0], ConvertType(E->getCallReturnType(getContext())),
7376  ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
7377  case NEON::BI__builtin_neon_vcgtzd_s64:
7378  case NEON::BI__builtin_neon_vcgtzd_f64:
7379  case NEON::BI__builtin_neon_vcgtzs_f32:
7380  case NEON::BI__builtin_neon_vcgtzh_f16:
7381  Ops.push_back(EmitScalarExpr(E->getArg(0)));
7382  return EmitAArch64CompareBuiltinExpr(
7383  Ops[0], ConvertType(E->getCallReturnType(getContext())),
7384  ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
7385  case NEON::BI__builtin_neon_vcltzd_s64:
7386  case NEON::BI__builtin_neon_vcltzd_f64:
7387  case NEON::BI__builtin_neon_vcltzs_f32:
7388  case NEON::BI__builtin_neon_vcltzh_f16:
7389  Ops.push_back(EmitScalarExpr(E->getArg(0)));
7390  return EmitAArch64CompareBuiltinExpr(
7391  Ops[0], ConvertType(E->getCallReturnType(getContext())),
7392  ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
7393 
7394  case NEON::BI__builtin_neon_vceqzd_u64: {
7395  Ops.push_back(EmitScalarExpr(E->getArg(0)));
7396  Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
7397  Ops[0] =
7398  Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
7399  return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
7400  }
7401  case NEON::BI__builtin_neon_vceqd_f64:
7402  case NEON::BI__builtin_neon_vcled_f64:
7403  case NEON::BI__builtin_neon_vcltd_f64:
7404  case NEON::BI__builtin_neon_vcged_f64:
7405  case NEON::BI__builtin_neon_vcgtd_f64: {
7406  llvm::CmpInst::Predicate P;
7407  switch (BuiltinID) {
7408  default: llvm_unreachable("missing builtin ID in switch!");
7409  case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
7410  case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
7411  case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
7412  case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
7413  case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
7414  }
7415  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7416  Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
7417  Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
7418  Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
7419  return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
7420  }
7421  case NEON::BI__builtin_neon_vceqs_f32:
7422  case NEON::BI__builtin_neon_vcles_f32:
7423  case NEON::BI__builtin_neon_vclts_f32:
7424  case NEON::BI__builtin_neon_vcges_f32:
7425  case NEON::BI__builtin_neon_vcgts_f32: {
7426  llvm::CmpInst::Predicate P;
7427  switch (BuiltinID) {
7428  default: llvm_unreachable("missing builtin ID in switch!");
7429  case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
7430  case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
7431  case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
7432  case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
7433  case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
7434  }
7435  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7436  Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
7437  Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
7438  Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
7439  return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
7440  }
7441  case NEON::BI__builtin_neon_vceqh_f16:
7442  case NEON::BI__builtin_neon_vcleh_f16:
7443  case NEON::BI__builtin_neon_vclth_f16:
7444  case NEON::BI__builtin_neon_vcgeh_f16:
7445  case NEON::BI__builtin_neon_vcgth_f16: {
7446  llvm::CmpInst::Predicate P;
7447  switch (BuiltinID) {
7448  default: llvm_unreachable("missing builtin ID in switch!");
7449  case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break;
7450  case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break;
7451  case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break;
7452  case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break;
7453  case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break;
7454  }
7455  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7456  Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
7457  Ops[1] = Builder.CreateBitCast(Ops[1], HalfTy);
7458  Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
7459  return Builder.CreateSExt(Ops[0], Int16Ty, "vcmpd");
7460  }
7461  case NEON::BI__builtin_neon_vceqd_s64:
7462  case NEON::BI__builtin_neon_vceqd_u64:
7463  case NEON::BI__builtin_neon_vcgtd_s64:
7464  case NEON::BI__builtin_neon_vcgtd_u64:
7465  case NEON::BI__builtin_neon_vcltd_s64:
7466  case NEON::BI__builtin_neon_vcltd_u64:
7467  case NEON::BI__builtin_neon_vcged_u64:
7468  case NEON::BI__builtin_neon_vcged_s64:
7469  case NEON::BI__builtin_neon_vcled_u64:
7470  case NEON::BI__builtin_neon_vcled_s64: {
7471  llvm::CmpInst::Predicate P;
7472  switch (BuiltinID) {
7473  default: llvm_unreachable("missing builtin ID in switch!");
7474  case NEON::BI__builtin_neon_vceqd_s64:
7475  case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
7476  case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
7477  case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
7478  case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
7479  case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
7480  case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
7481  case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
7482  case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
7483  case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
7484  }
7485  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7486  Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
7487  Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
7488  Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
7489  return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
7490  }
7491  case NEON::BI__builtin_neon_vtstd_s64:
7492  case NEON::BI__builtin_neon_vtstd_u64: {
7493  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7494  Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
7495  Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
7496  Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
7497  Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
7498  llvm::Constant::getNullValue(Int64Ty));
7499  return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
7500  }
7501  case NEON::BI__builtin_neon_vset_lane_i8:
7502  case NEON::BI__builtin_neon_vset_lane_i16:
7503  case NEON::BI__builtin_neon_vset_lane_i32:
7504  case NEON::BI__builtin_neon_vset_lane_i64:
7505  case NEON::BI__builtin_neon_vset_lane_f32:
7506  case NEON::BI__builtin_neon_vsetq_lane_i8:
7507  case NEON::BI__builtin_neon_vsetq_lane_i16:
7508  case NEON::BI__builtin_neon_vsetq_lane_i32:
7509  case NEON::BI__builtin_neon_vsetq_lane_i64:
7510  case NEON::BI__builtin_neon_vsetq_lane_f32:
7511  Ops.push_back(EmitScalarExpr(E->getArg(2)));
7512  return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
7513  case NEON::BI__builtin_neon_vset_lane_f64:
7514  // The vector type needs a cast for the v1f64 variant.
7515  Ops[1] = Builder.CreateBitCast(Ops[1],
7516  llvm::VectorType::get(DoubleTy, 1));
7517  Ops.push_back(EmitScalarExpr(E->getArg(2)));
7518  return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
7519  case NEON::BI__builtin_neon_vsetq_lane_f64:
7520  // The vector type needs a cast for the v2f64 variant.
7521  Ops[1] = Builder.CreateBitCast(Ops[1],
7522  llvm::VectorType::get(DoubleTy, 2));
7523  Ops.push_back(EmitScalarExpr(E->getArg(2)));
7524  return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
7525 
7526  case NEON::BI__builtin_neon_vget_lane_i8:
7527  case NEON::BI__builtin_neon_vdupb_lane_i8:
7528  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 8));
7529  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7530  "vget_lane");
7531  case NEON::BI__builtin_neon_vgetq_lane_i8:
7532  case NEON::BI__builtin_neon_vdupb_laneq_i8:
7533  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 16));
7534  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7535  "vgetq_lane");
7536  case NEON::BI__builtin_neon_vget_lane_i16:
7537  case NEON::BI__builtin_neon_vduph_lane_i16:
7538  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 4));
7539  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7540  "vget_lane");
7541  case NEON::BI__builtin_neon_vgetq_lane_i16:
7542  case NEON::BI__builtin_neon_vduph_laneq_i16:
7543  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 8));
7544  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7545  "vgetq_lane");
7546  case NEON::BI__builtin_neon_vget_lane_i32:
7547  case NEON::BI__builtin_neon_vdups_lane_i32:
7548  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 2));
7549  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7550  "vget_lane");
7551  case NEON::BI__builtin_neon_vdups_lane_f32:
7552  Ops[0] = Builder.CreateBitCast(Ops[0],
7553  llvm::VectorType::get(FloatTy, 2));
7554  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7555  "vdups_lane");
7556  case NEON::BI__builtin_neon_vgetq_lane_i32:
7557  case NEON::BI__builtin_neon_vdups_laneq_i32:
7558  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
7559  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7560  "vgetq_lane");
7561  case NEON::BI__builtin_neon_vget_lane_i64:
7562  case NEON::BI__builtin_neon_vdupd_lane_i64:
7563  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 1));
7564  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7565  "vget_lane");
7566  case NEON::BI__builtin_neon_vdupd_lane_f64:
7567  Ops[0] = Builder.CreateBitCast(Ops[0],
7568  llvm::VectorType::get(DoubleTy, 1));
7569  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7570  "vdupd_lane");
7571  case NEON::BI__builtin_neon_vgetq_lane_i64:
7572  case NEON::BI__builtin_neon_vdupd_laneq_i64:
7573  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
7574  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7575  "vgetq_lane");
7576  case NEON::BI__builtin_neon_vget_lane_f32:
7577  Ops[0] = Builder.CreateBitCast(Ops[0],
7578  llvm::VectorType::get(FloatTy, 2));
7579  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7580  "vget_lane");
7581  case NEON::BI__builtin_neon_vget_lane_f64:
7582  Ops[0] = Builder.CreateBitCast(Ops[0],
7583  llvm::VectorType::get(DoubleTy, 1));
7584  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7585  "vget_lane");
7586  case NEON::BI__builtin_neon_vgetq_lane_f32:
7587  case NEON::BI__builtin_neon_vdups_laneq_f32:
7588  Ops[0] = Builder.CreateBitCast(Ops[0],
7589  llvm::VectorType::get(FloatTy, 4));
7590  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7591  "vgetq_lane");
7592  case NEON::BI__builtin_neon_vgetq_lane_f64:
7593  case NEON::BI__builtin_neon_vdupd_laneq_f64:
7594  Ops[0] = Builder.CreateBitCast(Ops[0],
7595  llvm::VectorType::get(DoubleTy, 2));
7596  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7597  "vgetq_lane");
7598  case NEON::BI__builtin_neon_vaddh_f16:
7599  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7600  return Builder.CreateFAdd(Ops[0], Ops[1], "vaddh");
7601  case NEON::BI__builtin_neon_vsubh_f16:
7602  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7603  return Builder.CreateFSub(Ops[0], Ops[1], "vsubh");
7604  case NEON::BI__builtin_neon_vmulh_f16:
7605  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7606  return Builder.CreateFMul(Ops[0], Ops[1], "vmulh");
7607  case NEON::BI__builtin_neon_vdivh_f16:
7608  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7609  return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh");
7610  case NEON::BI__builtin_neon_vfmah_f16: {
7611  Value *F = CGM.getIntrinsic(Intrinsic::fma, HalfTy);
7612  // NEON intrinsic puts accumulator first, unlike the LLVM fma.
7613  return Builder.CreateCall(F,
7614  {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]});
7615  }
7616  case NEON::BI__builtin_neon_vfmsh_f16: {
7617  Value *F = CGM.getIntrinsic(Intrinsic::fma, HalfTy);
7618  Value *Zero = llvm::ConstantFP::getZeroValueForNegation(HalfTy);
7619  Value* Sub = Builder.CreateFSub(Zero, EmitScalarExpr(E->getArg(1)), "vsubh");
7620  // NEON intrinsic puts accumulator first, unlike the LLVM fma.
7621  return Builder.CreateCall(F, {Sub, EmitScalarExpr(E->getArg(2)), Ops[0]});
7622  }
7623  case NEON::BI__builtin_neon_vaddd_s64:
7624  case NEON::BI__builtin_neon_vaddd_u64:
7625  return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
7626  case NEON::BI__builtin_neon_vsubd_s64:
7627  case NEON::BI__builtin_neon_vsubd_u64:
7628  return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
7629  case NEON::BI__builtin_neon_vqdmlalh_s16:
7630  case NEON::BI__builtin_neon_vqdmlslh_s16: {
7631  SmallVector<Value *, 2> ProductOps;
7632  ProductOps.push_back(vectorWrapScalar16(Ops[1]));
7633  ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
7634  llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
7635  Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
7636  ProductOps, "vqdmlXl");
7637  Constant *CI = ConstantInt::get(SizeTy, 0);
7638  Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
7639 
7640  unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
7641  ? Intrinsic::aarch64_neon_sqadd
7642  : Intrinsic::aarch64_neon_sqsub;
7643  return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
7644  }
7645  case NEON::BI__builtin_neon_vqshlud_n_s64: {
7646  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7647  Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
7648  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
7649  Ops, "vqshlu_n");
7650  }
7651  case NEON::BI__builtin_neon_vqshld_n_u64:
7652  case NEON::BI__builtin_neon_vqshld_n_s64: {
7653  unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
7654  ? Intrinsic::aarch64_neon_uqshl
7655  : Intrinsic::aarch64_neon_sqshl;
7656  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7657  Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
7658  return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
7659  }
7660  case NEON::BI__builtin_neon_vrshrd_n_u64:
7661  case NEON::BI__builtin_neon_vrshrd_n_s64: {
7662  unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
7663  ? Intrinsic::aarch64_neon_urshl
7664  : Intrinsic::aarch64_neon_srshl;
7665  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7666  int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
7667  Ops[1] = ConstantInt::get(Int64Ty, -SV);
7668  return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
7669  }
7670  case NEON::BI__builtin_neon_vrsrad_n_u64:
7671  case NEON::BI__builtin_neon_vrsrad_n_s64: {
7672  unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
7673  ? Intrinsic::aarch64_neon_urshl
7674  : Intrinsic::aarch64_neon_srshl;
7675  Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
7676  Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
7677  Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
7678  {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
7679  return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
7680  }
7681  case NEON::BI__builtin_neon_vshld_n_s64:
7682  case NEON::BI__builtin_neon_vshld_n_u64: {
7683  llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
7684  return Builder.CreateShl(
7685  Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
7686  }
7687  case NEON::BI__builtin_neon_vshrd_n_s64: {
7688  llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
7689  return Builder.CreateAShr(
7690  Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
7691  Amt->getZExtValue())),
7692  "shrd_n");
7693  }
7694  case NEON::BI__builtin_neon_vshrd_n_u64: {
7695  llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
7696  uint64_t ShiftAmt = Amt->getZExtValue();
7697  // Right-shifting an unsigned value by its size yields 0.
7698  if (ShiftAmt == 64)
7699  return ConstantInt::get(Int64Ty, 0);
7700  return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
7701  "shrd_n");
7702  }
7703  case NEON::BI__builtin_neon_vsrad_n_s64: {
7704  llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
7705  Ops[1] = Builder.CreateAShr(
7706  Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
7707  Amt->getZExtValue())),
7708  "shrd_n");
7709  return Builder.CreateAdd(Ops[0], Ops[1]);
7710  }
7711  case NEON::BI__builtin_neon_vsrad_n_u64: {
7712  llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
7713  uint64_t ShiftAmt = Amt->getZExtValue();
7714  // Right-shifting an unsigned value by its size yields 0.
7715  // As Op + 0 = Op, return Ops[0] directly.
7716  if (ShiftAmt == 64)
7717  return Ops[0];
7718  Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
7719  "shrd_n");
7720  return Builder.CreateAdd(Ops[0], Ops[1]);
7721  }
7722  case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
7723  case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
7724  case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
7725  case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
7726  Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
7727  "lane");
7728  SmallVector<Value *, 2> ProductOps;
7729  ProductOps.push_back(vectorWrapScalar16(Ops[1]));
7730  ProductOps.push_back(vectorWrapScalar16(Ops[2]));
7731  llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
7732  Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
7733  ProductOps, "vqdmlXl");
7734  Constant *CI = ConstantInt::get(SizeTy, 0);
7735  Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
7736  Ops.pop_back();
7737 
7738  unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
7739  BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
7740  ? Intrinsic::aarch64_neon_sqadd
7741  : Intrinsic::aarch64_neon_sqsub;
7742  return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
7743  }
7744  case NEON::BI__builtin_neon_vqdmlals_s32:
7745  case NEON::BI__builtin_neon_vqdmlsls_s32: {
7746  SmallVector<Value *, 2> ProductOps;
7747  ProductOps.push_back(Ops[1]);
7748  ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
7749  Ops[1] =
7750  EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
7751  ProductOps, "vqdmlXl");
7752 
7753  unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
7754  ? Intrinsic::aarch64_neon_sqadd
7755  : Intrinsic::aarch64_neon_sqsub;
7756  return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
7757  }
7758  case NEON::BI__builtin_neon_vqdmlals_lane_s32:
7759  case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
7760  case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
7761  case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
7762  Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
7763  "lane");
7764  SmallVector<Value *, 2> ProductOps;
7765  ProductOps.push_back(Ops[1]);
7766  ProductOps.push_back(Ops[2]);
7767  Ops[1] =
7768  EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
7769  ProductOps, "vqdmlXl");
7770  Ops.pop_back();
7771 
7772  unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
7773  BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
7774  ? Intrinsic::aarch64_neon_sqadd
7775  : Intrinsic::aarch64_neon_sqsub;
7776  return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
7777  }
7778  }
7779 
7780  llvm::VectorType *VTy = GetNeonType(this, Type);
7781  llvm::Type *Ty = VTy;
7782  if (!Ty)
7783  return nullptr;
7784 
7785  // Not all intrinsics handled by the common case work for AArch64 yet, so only
7786  // defer to common code if it's been added to our special map.
7787  Builtin = findNeonIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID,
7788  AArch64SIMDIntrinsicsProvenSorted);
7789 
7790  if (Builtin)
7791  return EmitCommonNeonBuiltinExpr(
7792  Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
7793  Builtin->NameHint, Builtin->TypeModifier, E, Ops,
7794  /*never use addresses*/ Address::invalid(), Address::invalid(), Arch);
7795 
7796  if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops, Arch))
7797  return V;
7798 
7799  unsigned Int;
7800  switch (BuiltinID) {
7801  default: return nullptr;
7802  case NEON::BI__builtin_neon_vbsl_v:
7803  case NEON::BI__builtin_neon_vbslq_v: {
7804  llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
7805  Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
7806  Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
7807  Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
7808 
7809  Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
7810  Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
7811  Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
7812  return Builder.CreateBitCast(Ops[0], Ty);
7813  }
7814  case NEON::BI__builtin_neon_vfma_lane_v:
7815  case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
7816  // The ARM builtins (and instructions) have the addend as the first
7817  // operand, but the 'fma' intrinsics have it last. Swap it around here.
7818  Value *Addend = Ops[0];
7819  Value *Multiplicand = Ops[1];
7820  Value *LaneSource = Ops[2];
7821  Ops[0] = Multiplicand;
7822  Ops[1] = LaneSource;
7823  Ops[2] = Addend;
7824 
7825  // Now adjust things to handle the lane access.
7826  llvm::Type *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v ?
7827  llvm::VectorType::get(VTy->getElementType(), VTy->getNumElements() / 2) :
7828  VTy;
7829  llvm::Constant *cst = cast<Constant>(Ops[3]);
7830  Value *SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), cst);
7831  Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
7832  Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
7833 
7834  Ops.pop_back();
7835  Int = Intrinsic::fma;
7836  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
7837  }
7838  case NEON::BI__builtin_neon_vfma_laneq_v: {
7839  llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
7840  // v1f64 fma should be mapped to Neon scalar f64 fma
7841  if (VTy && VTy->getElementType() == DoubleTy) {
7842  Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
7843  Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
7844  llvm::Type *VTy = GetNeonType(this,
7845  NeonTypeFlags(NeonTypeFlags::Float64, false, true));
7846  Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
7847  Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
7848  Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy);
7849  Value *Result = Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
7850  return Builder.CreateBitCast(Result, Ty);
7851  }
7852  Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
7853  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7854  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7855 
7856  llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(),
7857  VTy->getNumElements() * 2);
7858  Ops[2] = Builder.CreateBitCast(Ops[2], STy);
7859  Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(),
7860  cast<ConstantInt>(Ops[3]));
7861  Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
7862 
7863  return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
7864  }
7865  case NEON::BI__builtin_neon_vfmaq_laneq_v: {
7866  Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
7867  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7868  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7869 
7870  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7871  Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
7872  return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
7873  }
7874  case NEON::BI__builtin_neon_vfmah_lane_f16:
7875  case NEON::BI__builtin_neon_vfmas_lane_f32:
7876  case NEON::BI__builtin_neon_vfmah_laneq_f16:
7877  case NEON::BI__builtin_neon_vfmas_laneq_f32:
7878  case NEON::BI__builtin_neon_vfmad_lane_f64:
7879  case NEON::BI__builtin_neon_vfmad_laneq_f64: {
7880  Ops.push_back(EmitScalarExpr(E->getArg(3)));
7881  llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
7882  Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
7883  Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
7884  return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
7885  }
7886  case NEON::BI__builtin_neon_vmull_v:
7887  // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
7888  Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
7889  if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
7890  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
7891  case NEON::BI__builtin_neon_vmax_v:
7892  case NEON::BI__builtin_neon_vmaxq_v:
7893  // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
7894  Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
7895  if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
7896  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
7897  case NEON::BI__builtin_neon_vmaxh_f16: {
7898  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7899  Int = Intrinsic::aarch64_neon_fmax;
7900  return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmax");
7901  }
7902  case NEON::BI__builtin_neon_vmin_v:
7903  case NEON::BI__builtin_neon_vminq_v:
7904  // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
7905  Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
7906  if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
7907  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
7908  case NEON::BI__builtin_neon_vminh_f16: {
7909  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7910  Int = Intrinsic::aarch64_neon_fmin;
7911  return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmin");
7912  }
7913  case NEON::BI__builtin_neon_vabd_v:
7914  case NEON::BI__builtin_neon_vabdq_v:
7915  // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
7916  Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
7917  if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
7918  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
7919  case NEON::BI__builtin_neon_vpadal_v:
7920  case NEON::BI__builtin_neon_vpadalq_v: {
7921  unsigned ArgElts = VTy->getNumElements();
7922  llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
7923  unsigned BitWidth = EltTy->getBitWidth();
7924  llvm::Type *ArgTy = llvm::VectorType::get(
7925  llvm::IntegerType::get(getLLVMContext(), BitWidth/2), 2*ArgElts);
7926  llvm::Type* Tys[2] = { VTy, ArgTy };
7927  Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
7929  TmpOps.push_back(Ops[1]);
7930  Function *F = CGM.getIntrinsic(Int, Tys);
7931  llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
7932  llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
7933  return Builder.CreateAdd(tmp, addend);
7934  }
7935  case NEON::BI__builtin_neon_vpmin_v:
7936  case NEON::BI__builtin_neon_vpminq_v:
7937  // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
7938  Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
7939  if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
7940  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
7941  case NEON::BI__builtin_neon_vpmax_v:
7942  case NEON::BI__builtin_neon_vpmaxq_v:
7943  // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
7944  Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
7945  if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
7946  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
7947  case NEON::BI__builtin_neon_vminnm_v:
7948  case NEON::BI__builtin_neon_vminnmq_v:
7949  Int = Intrinsic::aarch64_neon_fminnm;
7950  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
7951  case NEON::BI__builtin_neon_vminnmh_f16:
7952  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7953  Int = Intrinsic::aarch64_neon_fminnm;
7954  return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vminnm");
7955  case NEON::BI__builtin_neon_vmaxnm_v:
7956  case NEON::BI__builtin_neon_vmaxnmq_v:
7957  Int = Intrinsic::aarch64_neon_fmaxnm;
7958  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
7959  case NEON::BI__builtin_neon_vmaxnmh_f16:
7960  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7961  Int = Intrinsic::aarch64_neon_fmaxnm;
7962  return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmaxnm");
7963  case NEON::BI__builtin_neon_vrecpss_f32: {
7964  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7965  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
7966  Ops, "vrecps");
7967  }
7968  case NEON::BI__builtin_neon_vrecpsd_f64:
7969  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7970  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
7971  Ops, "vrecps");
7972  case NEON::BI__builtin_neon_vrecpsh_f16:
7973  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7974  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, HalfTy),
7975  Ops, "vrecps");
7976  case NEON::BI__builtin_neon_vqshrun_n_v:
7977  Int = Intrinsic::aarch64_neon_sqshrun;
7978  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
7979  case NEON::BI__builtin_neon_vqrshrun_n_v:
7980  Int = Intrinsic::aarch64_neon_sqrshrun;
7981  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
7982  case NEON::BI__builtin_neon_vqshrn_n_v:
7983  Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
7984  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
7985  case NEON::BI__builtin_neon_vrshrn_n_v:
7986  Int = Intrinsic::aarch64_neon_rshrn;
7987  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
7988  case NEON::BI__builtin_neon_vqrshrn_n_v:
7989  Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
7990  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
7991  case NEON::BI__builtin_neon_vrndah_f16: {
7992  Ops.push_back(EmitScalarExpr(E->getArg(0)));
7993  Int = Intrinsic::round;
7994  return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda");
7995  }
7996  case NEON::BI__builtin_neon_vrnda_v:
7997  case NEON::BI__builtin_neon_vrndaq_v: {
7998  Int = Intrinsic::round;
7999  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
8000  }
8001  case NEON::BI__builtin_neon_vrndih_f16: {
8002  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8003  Int = Intrinsic::nearbyint;
8004  return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi");
8005  }
8006  case NEON::BI__builtin_neon_vrndmh_f16: {
8007  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8008  Int = Intrinsic::floor;
8009  return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm");
8010  }
8011  case NEON::BI__builtin_neon_vrndm_v:
8012  case NEON::BI__builtin_neon_vrndmq_v: {
8013  Int = Intrinsic::floor;
8014  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
8015  }
8016  case NEON::BI__builtin_neon_vrndnh_f16: {
8017  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8018  Int = Intrinsic::aarch64_neon_frintn;
8019  return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn");
8020  }
8021  case NEON::BI__builtin_neon_vrndn_v:
8022  case NEON::BI__builtin_neon_vrndnq_v: {
8023  Int = Intrinsic::aarch64_neon_frintn;
8024  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
8025  }
8026  case NEON::BI__builtin_neon_vrndns_f32: {
8027  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8028  Int = Intrinsic::aarch64_neon_frintn;
8029  return EmitNeonCall(CGM.getIntrinsic(Int, FloatTy), Ops, "vrndn");
8030  }
8031  case NEON::BI__builtin_neon_vrndph_f16: {
8032  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8033  Int = Intrinsic::ceil;
8034  return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp");
8035  }
8036  case NEON::BI__builtin_neon_vrndp_v:
8037  case NEON::BI__builtin_neon_vrndpq_v: {
8038  Int = Intrinsic::ceil;
8039  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
8040  }
8041  case NEON::BI__builtin_neon_vrndxh_f16: {
8042  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8043  Int = Intrinsic::rint;
8044  return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx");
8045  }
8046  case NEON::BI__builtin_neon_vrndx_v:
8047  case NEON::BI__builtin_neon_vrndxq_v: {
8048  Int = Intrinsic::rint;
8049  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
8050  }
8051  case NEON::BI__builtin_neon_vrndh_f16: {
8052  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8053  Int = Intrinsic::trunc;
8054  return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz");
8055  }
8056  case NEON::BI__builtin_neon_vrnd_v:
8057  case NEON::BI__builtin_neon_vrndq_v: {
8058  Int = Intrinsic::trunc;
8059  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
8060  }
8061  case NEON::BI__builtin_neon_vcvt_f64_v:
8062  case NEON::BI__builtin_neon_vcvtq_f64_v:
8063  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8064  Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
8065  return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
8066  : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
8067  case NEON::BI__builtin_neon_vcvt_f64_f32: {
8068  assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
8069  "unexpected vcvt_f64_f32 builtin");
8070  NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
8071  Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
8072 
8073  return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
8074  }
8075  case NEON::BI__builtin_neon_vcvt_f32_f64: {
8076  assert(Type.getEltType() == NeonTypeFlags::Float32 &&
8077  "unexpected vcvt_f32_f64 builtin");
8078  NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
8079  Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
8080 
8081  return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
8082  }
8083  case NEON::BI__builtin_neon_vcvt_s32_v:
8084  case NEON::BI__builtin_neon_vcvt_u32_v:
8085  case NEON::BI__builtin_neon_vcvt_s64_v:
8086  case NEON::BI__builtin_neon_vcvt_u64_v:
8087  case NEON::BI__builtin_neon_vcvt_s16_v:
8088  case NEON::BI__builtin_neon_vcvt_u16_v:
8089  case NEON::BI__builtin_neon_vcvtq_s32_v:
8090  case NEON::BI__builtin_neon_vcvtq_u32_v:
8091  case NEON::BI__builtin_neon_vcvtq_s64_v:
8092  case NEON::BI__builtin_neon_vcvtq_u64_v:
8093  case NEON::BI__builtin_neon_vcvtq_s16_v:
8094  case NEON::BI__builtin_neon_vcvtq_u16_v: {
8095  Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
8096  if (usgn)
8097  return Builder.CreateFPToUI(Ops[0], Ty);
8098  return Builder.CreateFPToSI(Ops[0], Ty);
8099  }
8100  case NEON::BI__builtin_neon_vcvta_s16_v:
8101  case NEON::BI__builtin_neon_vcvta_u16_v:
8102  case NEON::BI__builtin_neon_vcvta_s32_v:
8103  case NEON::BI__builtin_neon_vcvtaq_s16_v:
8104  case NEON::BI__builtin_neon_vcvtaq_s32_v:
8105  case NEON::BI__builtin_neon_vcvta_u32_v:
8106  case NEON::BI__builtin_neon_vcvtaq_u16_v:
8107  case NEON::BI__builtin_neon_vcvtaq_u32_v:
8108  case NEON::BI__builtin_neon_vcvta_s64_v:
8109  case NEON::BI__builtin_neon_vcvtaq_s64_v:
8110  case NEON::BI__builtin_neon_vcvta_u64_v:
8111  case NEON::BI__builtin_neon_vcvtaq_u64_v: {
8112  Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
8113  llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
8114  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
8115  }
8116  case NEON::BI__builtin_neon_vcvtm_s16_v:
8117  case NEON::BI__builtin_neon_vcvtm_s32_v:
8118  case NEON::BI__builtin_neon_vcvtmq_s16_v:
8119  case NEON::BI__builtin_neon_vcvtmq_s32_v:
8120  case NEON::BI__builtin_neon_vcvtm_u16_v:
8121  case NEON::BI__builtin_neon_vcvtm_u32_v:
8122  case NEON::BI__builtin_neon_vcvtmq_u16_v:
8123  case NEON::BI__builtin_neon_vcvtmq_u32_v:
8124  case NEON::BI__builtin_neon_vcvtm_s64_v:
8125  case NEON::BI__builtin_neon_vcvtmq_s64_v:
8126  case NEON::BI__builtin_neon_vcvtm_u64_v:
8127  case NEON::BI__builtin_neon_vcvtmq_u64_v: {
8128  Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
8129  llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
8130  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
8131  }
8132  case NEON::BI__builtin_neon_vcvtn_s16_v:
8133  case NEON::BI__builtin_neon_vcvtn_s32_v:
8134  case NEON::BI__builtin_neon_vcvtnq_s16_v:
8135  case NEON::BI__builtin_neon_vcvtnq_s32_v:
8136  case NEON::BI__builtin_neon_vcvtn_u16_v:
8137  case NEON::BI__builtin_neon_vcvtn_u32_v:
8138  case NEON::BI__builtin_neon_vcvtnq_u16_v:
8139  case NEON::BI__builtin_neon_vcvtnq_u32_v:
8140  case NEON::BI__builtin_neon_vcvtn_s64_v:
8141  case NEON::BI__builtin_neon_vcvtnq_s64_v:
8142  case NEON::BI__builtin_neon_vcvtn_u64_v:
8143  case NEON::BI__builtin_neon_vcvtnq_u64_v: {
8144  Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
8145  llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
8146  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
8147  }
8148  case NEON::BI__builtin_neon_vcvtp_s16_v:
8149  case NEON::BI__builtin_neon_vcvtp_s32_v:
8150  case NEON::BI__builtin_neon_vcvtpq_s16_v:
8151  case NEON::BI__builtin_neon_vcvtpq_s32_v:
8152  case NEON::BI__builtin_neon_vcvtp_u16_v:
8153  case NEON::BI__builtin_neon_vcvtp_u32_v:
8154  case NEON::BI__builtin_neon_vcvtpq_u16_v:
8155  case NEON::BI__builtin_neon_vcvtpq_u32_v:
8156  case NEON::BI__builtin_neon_vcvtp_s64_v:
8157  case NEON::BI__builtin_neon_vcvtpq_s64_v:
8158  case NEON::BI__builtin_neon_vcvtp_u64_v:
8159  case NEON::BI__builtin_neon_vcvtpq_u64_v: {
8160  Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
8161  llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
8162  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
8163  }
8164  case NEON::BI__builtin_neon_vmulx_v:
8165  case NEON::BI__builtin_neon_vmulxq_v: {
8166  Int = Intrinsic::aarch64_neon_fmulx;
8167  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
8168  }
8169  case NEON::BI__builtin_neon_vmulxh_lane_f16:
8170  case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
8171  // vmulx_lane should be mapped to Neon scalar mulx after
8172  // extracting the scalar element
8173  Ops.push_back(EmitScalarExpr(E->getArg(2)));
8174  Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
8175  Ops.pop_back();
8176  Int = Intrinsic::aarch64_neon_fmulx;
8177  return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmulx");
8178  }
8179  case NEON::BI__builtin_neon_vmul_lane_v:
8180  case NEON::BI__builtin_neon_vmul_laneq_v: {
8181  // v1f64 vmul_lane should be mapped to Neon scalar mul lane
8182  bool Quad = false;
8183  if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
8184  Quad = true;
8185  Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
8186  llvm::Type *VTy = GetNeonType(this,
8187  NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
8188  Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
8189  Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
8190  Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
8191  return Builder.CreateBitCast(Result, Ty);
8192  }
8193  case NEON::BI__builtin_neon_vnegd_s64:
8194  return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
8195  case NEON::BI__builtin_neon_vnegh_f16:
8196  return Builder.CreateFNeg(EmitScalarExpr(E->getArg(0)), "vnegh");
8197  case NEON::BI__builtin_neon_vpmaxnm_v:
8198  case NEON::BI__builtin_neon_vpmaxnmq_v: {
8199  Int = Intrinsic::aarch64_neon_fmaxnmp;
8200  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
8201  }
8202  case NEON::BI__builtin_neon_vpminnm_v:
8203  case NEON::BI__builtin_neon_vpminnmq_v: {
8204  Int = Intrinsic::aarch64_neon_fminnmp;
8205  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
8206  }
8207  case NEON::BI__builtin_neon_vsqrth_f16: {
8208  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8209  Int = Intrinsic::sqrt;
8210  return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt");
8211  }
8212  case NEON::BI__builtin_neon_vsqrt_v:
8213  case NEON::BI__builtin_neon_vsqrtq_v: {
8214  Int = Intrinsic::sqrt;
8215  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8216  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
8217  }
8218  case NEON::BI__builtin_neon_vrbit_v:
8219  case NEON::BI__builtin_neon_vrbitq_v: {
8220  Int = Intrinsic::aarch64_neon_rbit;
8221  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
8222  }
8223  case NEON::BI__builtin_neon_vaddv_u8:
8224  // FIXME: These are handled by the AArch64 scalar code.
8225  usgn = true;
8226  LLVM_FALLTHROUGH;
8227  case NEON::BI__builtin_neon_vaddv_s8: {
8228  Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
8229  Ty = Int32Ty;
8230  VTy = llvm::VectorType::get(Int8Ty, 8);
8231  llvm::Type *Tys[2] = { Ty, VTy };
8232  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8233  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
8234  return Builder.CreateTrunc(Ops[0], Int8Ty);
8235  }
8236  case NEON::BI__builtin_neon_vaddv_u16:
8237  usgn = true;
8238  LLVM_FALLTHROUGH;
8239  case NEON::BI__builtin_neon_vaddv_s16: {
8240  Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
8241  Ty = Int32Ty;
8242  VTy = llvm::VectorType::get(Int16Ty, 4);
8243  llvm::Type *Tys[2] = { Ty, VTy };
8244  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8245  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
8246  return Builder.CreateTrunc(Ops[0], Int16Ty);
8247  }
8248  case NEON::BI__builtin_neon_vaddvq_u8:
8249  usgn = true;
8250  LLVM_FALLTHROUGH;
8251  case NEON::BI__builtin_neon_vaddvq_s8: {
8252  Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
8253  Ty = Int32Ty;
8254  VTy = llvm::VectorType::get(Int8Ty, 16);
8255  llvm::Type *Tys[2] = { Ty, VTy };
8256  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8257  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
8258  return Builder.CreateTrunc(Ops[0], Int8Ty);
8259  }
8260  case NEON::BI__builtin_neon_vaddvq_u16:
8261  usgn = true;
8262  LLVM_FALLTHROUGH;
8263  case NEON::BI__builtin_neon_vaddvq_s16: {
8264  Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
8265  Ty = Int32Ty;
8266  VTy = llvm::VectorType::get(Int16Ty, 8);
8267  llvm::Type *Tys[2] = { Ty, VTy };
8268  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8269  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
8270  return Builder.CreateTrunc(Ops[0], Int16Ty);
8271  }
8272  case NEON::BI__builtin_neon_vmaxv_u8: {
8273  Int = Intrinsic::aarch64_neon_umaxv;
8274  Ty = Int32Ty;
8275  VTy = llvm::VectorType::get(Int8Ty, 8);
8276  llvm::Type *Tys[2] = { Ty, VTy };
8277  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8278  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
8279  return Builder.CreateTrunc(Ops[0], Int8Ty);
8280  }
8281  case NEON::BI__builtin_neon_vmaxv_u16: {
8282  Int = Intrinsic::aarch64_neon_umaxv;
8283  Ty = Int32Ty;
8284  VTy = llvm::VectorType::get(Int16Ty, 4);
8285  llvm::Type *Tys[2] = { Ty, VTy };
8286  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8287  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
8288  return Builder.CreateTrunc(Ops[0], Int16Ty);
8289  }
8290  case NEON::BI__builtin_neon_vmaxvq_u8: {
8291  Int = Intrinsic::aarch64_neon_umaxv;
8292  Ty = Int32Ty;
8293  VTy = llvm::VectorType::get(Int8Ty, 16);
8294  llvm::Type *Tys[2] = { Ty, VTy };
8295  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8296  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
8297  return Builder.CreateTrunc(Ops[0], Int8Ty);
8298  }
8299  case NEON::BI__builtin_neon_vmaxvq_u16: {
8300  Int = Intrinsic::aarch64_neon_umaxv;
8301  Ty = Int32Ty;
8302  VTy = llvm::VectorType::get(Int16Ty, 8);
8303  llvm::Type *Tys[2] = { Ty, VTy };
8304  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8305  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
8306  return Builder.CreateTrunc(Ops[0], Int16Ty);
8307  }
8308  case NEON::BI__builtin_neon_vmaxv_s8: {
8309  Int = Intrinsic::aarch64_neon_smaxv;
8310  Ty = Int32Ty;
8311  VTy = llvm::VectorType::get(Int8Ty, 8);
8312  llvm::Type *Tys[2] = { Ty, VTy };
8313  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8314  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
8315  return Builder.CreateTrunc(Ops[0], Int8Ty);
8316  }
8317  case NEON::BI__builtin_neon_vmaxv_s16: {
8318  Int = Intrinsic::aarch64_neon_smaxv;
8319  Ty = Int32Ty;
8320  VTy = llvm::VectorType::get(Int16Ty, 4);
8321  llvm::Type *Tys[2] = { Ty, VTy };
8322  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8323  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
8324  return Builder.CreateTrunc(Ops[0], Int16Ty);
8325  }
8326  case NEON::BI__builtin_neon_vmaxvq_s8: {
8327  Int = Intrinsic::aarch64_neon_smaxv;
8328  Ty = Int32Ty;
8329  VTy = llvm::VectorType::get(Int8Ty, 16);
8330  llvm::Type *Tys[2] = { Ty, VTy };
8331  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8332  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
8333  return Builder.CreateTrunc(Ops[0], Int8Ty);
8334  }
8335  case NEON::BI__builtin_neon_vmaxvq_s16: {
8336  Int = Intrinsic::aarch64_neon_smaxv;
8337  Ty = Int32Ty;
8338  VTy = llvm::VectorType::get(Int16Ty, 8);
8339  llvm::Type *Tys[2] = { Ty, VTy };
8340  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8341  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
8342  return Builder.CreateTrunc(Ops[0], Int16Ty);
8343  }
8344  case NEON::BI__builtin_neon_vmaxv_f16: {
8345  Int = Intrinsic::aarch64_neon_fmaxv;
8346  Ty = HalfTy;
8347  VTy = llvm::VectorType::get(HalfTy, 4);
8348  llvm::Type *Tys[2] = { Ty, VTy };
8349  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8350  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
8351  return Builder.CreateTrunc(Ops[0], HalfTy);
8352  }
8353  case NEON::BI__builtin_neon_vmaxvq_f16: {
8354  Int = Intrinsic::aarch64_neon_fmaxv;
8355  Ty = HalfTy;
8356  VTy = llvm::VectorType::get(HalfTy, 8);
8357  llvm::Type *Tys[2] = { Ty, VTy };
8358  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8359  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
8360  return Builder.CreateTrunc(Ops[0], HalfTy);
8361  }
8362  case NEON::BI__builtin_neon_vminv_u8: {
8363  Int = Intrinsic::aarch64_neon_uminv;
8364  Ty = Int32Ty;
8365  VTy = llvm::VectorType::get(Int8Ty, 8);
8366  llvm::Type *Tys[2] = { Ty, VTy };
8367  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8368  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
8369  return Builder.CreateTrunc(Ops[0], Int8Ty);
8370  }
8371  case NEON::BI__builtin_neon_vminv_u16: {
8372  Int = Intrinsic::aarch64_neon_uminv;
8373  Ty = Int32Ty;
8374  VTy = llvm::VectorType::get(Int16Ty, 4);
8375  llvm::Type *Tys[2] = { Ty, VTy };
8376  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8377  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
8378  return Builder.CreateTrunc(Ops[0], Int16Ty);
8379  }
8380  case NEON::BI__builtin_neon_vminvq_u8: {
8381  Int = Intrinsic::aarch64_neon_uminv;
8382  Ty = Int32Ty;
8383  VTy = llvm::VectorType::get(Int8Ty, 16);
8384  llvm::Type *Tys[2] = { Ty, VTy };
8385  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8386  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
8387  return Builder.CreateTrunc(Ops[0], Int8Ty);
8388  }
8389  case NEON::BI__builtin_neon_vminvq_u16: {
8390  Int = Intrinsic::aarch64_neon_uminv;
8391  Ty = Int32Ty;
8392  VTy = llvm::VectorType::get(Int16Ty, 8);
8393  llvm::Type *Tys[2] = { Ty, VTy };
8394  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8395  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
8396  return Builder.CreateTrunc(Ops[0], Int16Ty);
8397  }
8398  case NEON::BI__builtin_neon_vminv_s8: {
8399  Int = Intrinsic::aarch64_neon_sminv;
8400  Ty = Int32Ty;
8401  VTy = llvm::VectorType::get(Int8Ty, 8);
8402  llvm::Type *Tys[2] = { Ty, VTy };
8403  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8404  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
8405  return Builder.CreateTrunc(Ops[0], Int8Ty);
8406  }
8407  case NEON::BI__builtin_neon_vminv_s16: {
8408  Int = Intrinsic::aarch64_neon_sminv;
8409  Ty = Int32Ty;
8410  VTy = llvm::VectorType::get(Int16Ty, 4);
8411  llvm::Type *Tys[2] = { Ty, VTy };
8412  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8413  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
8414  return Builder.CreateTrunc(Ops[0], Int16Ty);
8415  }
8416  case NEON::BI__builtin_neon_vminvq_s8: {
8417  Int = Intrinsic::aarch64_neon_sminv;
8418  Ty = Int32Ty;
8419  VTy = llvm::VectorType::get(Int8Ty, 16);
8420  llvm::Type *Tys[2] = { Ty, VTy };
8421  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8422  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
8423  return Builder.CreateTrunc(Ops[0], Int8Ty);
8424  }
8425  case NEON::BI__builtin_neon_vminvq_s16: {
8426  Int = Intrinsic::aarch64_neon_sminv;
8427  Ty = Int32Ty;
8428  VTy = llvm::VectorType::get(Int16Ty, 8);
8429  llvm::Type *Tys[2] = { Ty, VTy };
8430  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8431  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
8432  return Builder.CreateTrunc(Ops[0], Int16Ty);
8433  }
8434  case NEON::BI__builtin_neon_vminv_f16: {
8435  Int = Intrinsic::aarch64_neon_fminv;
8436  Ty = HalfTy;
8437  VTy = llvm::VectorType::get(HalfTy, 4);
8438  llvm::Type *Tys[2] = { Ty, VTy };
8439  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8440  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
8441  return Builder.CreateTrunc(Ops[0], HalfTy);
8442  }
8443  case NEON::BI__builtin_neon_vminvq_f16: {
8444  Int = Intrinsic::aarch64_neon_fminv;
8445  Ty = HalfTy;
8446  VTy = llvm::VectorType::get(HalfTy, 8);
8447  llvm::Type *Tys[2] = { Ty, VTy };
8448  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8449  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
8450  return Builder.CreateTrunc(Ops[0], HalfTy);
8451  }
8452  case NEON::BI__builtin_neon_vmaxnmv_f16: {
8453  Int = Intrinsic::aarch64_neon_fmaxnmv;
8454  Ty = HalfTy;
8455  VTy = llvm::VectorType::get(HalfTy, 4);
8456  llvm::Type *Tys[2] = { Ty, VTy };
8457  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8458  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
8459  return Builder.CreateTrunc(Ops[0], HalfTy);
8460  }
8461  case NEON::BI__builtin_neon_vmaxnmvq_f16: {
8462  Int = Intrinsic::aarch64_neon_fmaxnmv;
8463  Ty = HalfTy;
8464  VTy = llvm::VectorType::get(HalfTy, 8);
8465  llvm::Type *Tys[2] = { Ty, VTy };
8466  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8467  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
8468  return Builder.CreateTrunc(Ops[0], HalfTy);
8469  }
8470  case NEON::BI__builtin_neon_vminnmv_f16: {
8471  Int = Intrinsic::aarch64_neon_fminnmv;
8472  Ty = HalfTy;
8473  VTy = llvm::VectorType::get(HalfTy, 4);
8474  llvm::Type *Tys[2] = { Ty, VTy };
8475  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8476  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
8477  return Builder.CreateTrunc(Ops[0], HalfTy);
8478  }
8479  case NEON::BI__builtin_neon_vminnmvq_f16: {
8480  Int = Intrinsic::aarch64_neon_fminnmv;
8481  Ty = HalfTy;
8482  VTy = llvm::VectorType::get(HalfTy, 8);
8483  llvm::Type *Tys[2] = { Ty, VTy };
8484  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8485  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
8486  return Builder.CreateTrunc(Ops[0], HalfTy);
8487  }
8488  case NEON::BI__builtin_neon_vmul_n_f64: {
8489  Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
8490  Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
8491  return Builder.CreateFMul(Ops[0], RHS);
8492  }
8493  case NEON::BI__builtin_neon_vaddlv_u8: {
8494  Int = Intrinsic::aarch64_neon_uaddlv;
8495  Ty = Int32Ty;
8496  VTy = llvm::VectorType::get(Int8Ty, 8);
8497  llvm::Type *Tys[2] = { Ty, VTy };
8498  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8499  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
8500  return Builder.CreateTrunc(Ops[0], Int16Ty);
8501  }
8502  case NEON::BI__builtin_neon_vaddlv_u16: {
8503  Int = Intrinsic::aarch64_neon_uaddlv;
8504  Ty = Int32Ty;
8505  VTy = llvm::VectorType::get(Int16Ty, 4);
8506  llvm::Type *Tys[2] = { Ty, VTy };
8507  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8508  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
8509  }
8510  case NEON::BI__builtin_neon_vaddlvq_u8: {
8511  Int = Intrinsic::aarch64_neon_uaddlv;
8512  Ty = Int32Ty;
8513  VTy = llvm::VectorType::get(Int8Ty, 16);
8514  llvm::Type *Tys[2] = { Ty, VTy };
8515  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8516  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
8517  return Builder.CreateTrunc(Ops[0], Int16Ty);
8518  }
8519  case NEON::BI__builtin_neon_vaddlvq_u16: {
8520  Int = Intrinsic::aarch64_neon_uaddlv;
8521  Ty = Int32Ty;
8522  VTy = llvm::VectorType::get(Int16Ty, 8);
8523  llvm::Type *Tys[2] = { Ty, VTy };
8524  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8525  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
8526  }
8527  case NEON::BI__builtin_neon_vaddlv_s8: {
8528  Int = Intrinsic::aarch64_neon_saddlv;
8529  Ty = Int32Ty;
8530  VTy = llvm::VectorType::get(Int8Ty, 8);
8531  llvm::Type *Tys[2] = { Ty, VTy };
8532  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8533  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
8534  return Builder.CreateTrunc(Ops[0], Int16Ty);
8535  }
8536  case NEON::BI__builtin_neon_vaddlv_s16: {
8537  Int = Intrinsic::aarch64_neon_saddlv;
8538  Ty = Int32Ty;
8539  VTy = llvm::VectorType::get(Int16Ty, 4);
8540  llvm::Type *Tys[2] = { Ty, VTy };
8541  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8542  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
8543  }
8544  case NEON::BI__builtin_neon_vaddlvq_s8: {
8545  Int = Intrinsic::aarch64_neon_saddlv;
8546  Ty = Int32Ty;
8547  VTy = llvm::VectorType::get(Int8Ty, 16);
8548  llvm::Type *Tys[2] = { Ty, VTy };
8549  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8550  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
8551  return Builder.CreateTrunc(Ops[0], Int16Ty);
8552  }
8553  case NEON::BI__builtin_neon_vaddlvq_s16: {
8554  Int = Intrinsic::aarch64_neon_saddlv;
8555  Ty = Int32Ty;
8556  VTy = llvm::VectorType::get(Int16Ty, 8);
8557  llvm::Type *Tys[2] = { Ty, VTy };
8558  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8559  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
8560  }
8561  case NEON::BI__builtin_neon_vsri_n_v:
8562  case NEON::BI__builtin_neon_vsriq_n_v: {
8563  Int = Intrinsic::aarch64_neon_vsri;
8564  llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
8565  return EmitNeonCall(Intrin, Ops, "vsri_n");
8566  }
8567  case NEON::BI__builtin_neon_vsli_n_v:
8568  case NEON::BI__builtin_neon_vsliq_n_v: {
8569  Int = Intrinsic::aarch64_neon_vsli;
8570  llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
8571  return EmitNeonCall(Intrin, Ops, "vsli_n");
8572  }
8573  case NEON::BI__builtin_neon_vsra_n_v:
8574  case NEON::BI__builtin_neon_vsraq_n_v:
8575  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8576  Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
8577  return Builder.CreateAdd(Ops[0], Ops[1]);
8578  case NEON::BI__builtin_neon_vrsra_n_v:
8579  case NEON::BI__builtin_neon_vrsraq_n_v: {
8580  Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
8582  TmpOps.push_back(Ops[1]);
8583  TmpOps.push_back(Ops[2]);
8584  Function* F = CGM.getIntrinsic(Int, Ty);
8585  llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
8586  Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
8587  return Builder.CreateAdd(Ops[0], tmp);
8588  }
8589  case NEON::BI__builtin_neon_vld1_v:
8590  case NEON::BI__builtin_neon_vld1q_v: {
8591  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
8592  auto Alignment = CharUnits::fromQuantity(
8593  BuiltinID == NEON::BI__builtin_neon_vld1_v ? 8 : 16);
8594  return Builder.CreateAlignedLoad(VTy, Ops[0], Alignment);
8595  }
8596  case NEON::BI__builtin_neon_vst1_v:
8597  case NEON::BI__builtin_neon_vst1q_v:
8598  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
8599  Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
8600  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8601  case NEON::BI__builtin_neon_vld1_lane_v:
8602  case NEON::BI__builtin_neon_vld1q_lane_v: {
8603  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8604  Ty = llvm::PointerType::getUnqual(VTy->getElementType());
8605  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8606  auto Alignment = CharUnits::fromQuantity(
8607  BuiltinID == NEON::BI__builtin_neon_vld1_lane_v ? 8 : 16);
8608  Ops[0] =
8609  Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment);
8610  return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
8611  }
8612  case NEON::BI__builtin_neon_vld1_dup_v:
8613  case NEON::BI__builtin_neon_vld1q_dup_v: {
8614  Value *V = UndefValue::get(Ty);
8615  Ty = llvm::PointerType::getUnqual(VTy->getElementType());
8616  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8617  auto Alignment = CharUnits::fromQuantity(
8618  BuiltinID == NEON::BI__builtin_neon_vld1_dup_v ? 8 : 16);
8619  Ops[0] =
8620  Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment);
8621  llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
8622  Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
8623  return EmitNeonSplat(Ops[0], CI);
8624  }
8625  case NEON::BI__builtin_neon_vst1_lane_v:
8626  case NEON::BI__builtin_neon_vst1q_lane_v:
8627  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8628  Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
8629  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
8630  return Builder.CreateDefaultAlignedStore(Ops[1],
8631  Builder.CreateBitCast(Ops[0], Ty));
8632  case NEON::BI__builtin_neon_vld2_v:
8633  case NEON::BI__builtin_neon_vld2q_v: {
8634  llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
8635  Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
8636  llvm::Type *Tys[2] = { VTy, PTy };
8637  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
8638  Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
8639  Ops[0] = Builder.CreateBitCast(Ops[0],
8640  llvm::PointerType::getUnqual(Ops[1]->getType()));
8641  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8642  }
8643  case NEON::BI__builtin_neon_vld3_v:
8644  case NEON::BI__builtin_neon_vld3q_v: {
8645  llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
8646  Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
8647  llvm::Type *Tys[2] = { VTy, PTy };
8648  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
8649  Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
8650  Ops[0] = Builder.CreateBitCast(Ops[0],
8651  llvm::PointerType::getUnqual(Ops[1]->getType()));
8652  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8653  }
8654  case NEON::BI__builtin_neon_vld4_v:
8655  case NEON::BI__builtin_neon_vld4q_v: {
8656  llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
8657  Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
8658  llvm::Type *Tys[2] = { VTy, PTy };
8659  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
8660  Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
8661  Ops[0] = Builder.CreateBitCast(Ops[0],
8662  llvm::PointerType::getUnqual(Ops[1]->getType()));
8663  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8664  }
8665  case NEON::BI__builtin_neon_vld2_dup_v:
8666  case NEON::BI__builtin_neon_vld2q_dup_v: {
8667  llvm::Type *PTy =
8668  llvm::PointerType::getUnqual(VTy->getElementType());
8669  Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
8670  llvm::Type *Tys[2] = { VTy, PTy };
8671  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
8672  Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
8673  Ops[0] = Builder.CreateBitCast(Ops[0],
8674  llvm::PointerType::getUnqual(Ops[1]->getType()));
8675  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8676  }
8677  case NEON::BI__builtin_neon_vld3_dup_v:
8678  case NEON::BI__builtin_neon_vld3q_dup_v: {
8679  llvm::Type *PTy =
8680  llvm::PointerType::getUnqual(VTy->getElementType());
8681  Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
8682  llvm::Type *Tys[2] = { VTy, PTy };
8683  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
8684  Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
8685  Ops[0] = Builder.CreateBitCast(Ops[0],
8686  llvm::PointerType::getUnqual(Ops[1]->getType()));
8687  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8688  }
8689  case NEON::BI__builtin_neon_vld4_dup_v:
8690  case NEON::BI__builtin_neon_vld4q_dup_v: {
8691  llvm::Type *PTy =
8692  llvm::PointerType::getUnqual(VTy->getElementType());
8693  Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
8694  llvm::Type *Tys[2] = { VTy, PTy };
8695  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
8696  Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
8697  Ops[0] = Builder.CreateBitCast(Ops[0],
8698  llvm::PointerType::getUnqual(Ops[1]->getType()));
8699  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8700  }
8701  case NEON::BI__builtin_neon_vld2_lane_v:
8702  case NEON::BI__builtin_neon_vld2q_lane_v: {
8703  llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
8704  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
8705  Ops.push_back(Ops[1]);
8706  Ops.erase(Ops.begin()+1);
8707  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8708  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8709  Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
8710  Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane");
8711  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
8712  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8713  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8714  }
8715  case NEON::BI__builtin_neon_vld3_lane_v:
8716  case NEON::BI__builtin_neon_vld3q_lane_v: {
8717  llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
8718  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
8719  Ops.push_back(Ops[1]);
8720  Ops.erase(Ops.begin()+1);
8721  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8722  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8723  Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
8724  Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
8725  Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
8726  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
8727  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8728  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8729  }
8730  case NEON::BI__builtin_neon_vld4_lane_v:
8731  case NEON::BI__builtin_neon_vld4q_lane_v: {
8732  llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
8733  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
8734  Ops.push_back(Ops[1]);
8735  Ops.erase(Ops.begin()+1);
8736  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8737  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8738  Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
8739  Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
8740  Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
8741  Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld4_lane");
8742  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
8743  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8744  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8745  }
8746  case NEON::BI__builtin_neon_vst2_v:
8747  case NEON::BI__builtin_neon_vst2q_v: {
8748  Ops.push_back(Ops[0]);
8749  Ops.erase(Ops.begin());
8750  llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
8751  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
8752  Ops, "");
8753  }
8754  case NEON::BI__builtin_neon_vst2_lane_v:
8755  case NEON::BI__builtin_neon_vst2q_lane_v: {
8756  Ops.push_back(Ops[0]);
8757  Ops.erase(Ops.begin());
8758  Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
8759  llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
8760  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
8761  Ops, "");
8762  }
8763  case NEON::BI__builtin_neon_vst3_v:
8764  case NEON::BI__builtin_neon_vst3q_v: {
8765  Ops.push_back(Ops[0]);
8766  Ops.erase(Ops.begin());
8767  llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
8768  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
8769  Ops, "");
8770  }
8771  case NEON::BI__builtin_neon_vst3_lane_v:
8772  case NEON::BI__builtin_neon_vst3q_lane_v: {
8773  Ops.push_back(Ops[0]);
8774  Ops.erase(Ops.begin());
8775  Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
8776  llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
8777  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
8778  Ops, "");
8779  }
8780  case NEON::BI__builtin_neon_vst4_v:
8781  case NEON::BI__builtin_neon_vst4q_v: {
8782  Ops.push_back(Ops[0]);
8783  Ops.erase(Ops.begin());
8784  llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
8785  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
8786  Ops, "");
8787  }
8788  case NEON::BI__builtin_neon_vst4_lane_v:
8789  case NEON::BI__builtin_neon_vst4q_lane_v: {
8790  Ops.push_back(Ops[0]);
8791  Ops.erase(Ops.begin());
8792  Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
8793  llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
8794  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
8795  Ops, "");
8796  }
8797  case NEON::BI__builtin_neon_vtrn_v:
8798  case NEON::BI__builtin_neon_vtrnq_v: {
8799  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
8800  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8801  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8802  Value *SV = nullptr;
8803 
8804  for (unsigned vi = 0; vi != 2; ++vi) {
8805  SmallVector<uint32_t, 16> Indices;
8806  for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
8807  Indices.push_back(i+vi);
8808  Indices.push_back(i+e+vi);
8809  }
8810  Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8811  SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
8812  SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8813  }
8814  return SV;
8815  }
8816  case NEON::BI__builtin_neon_vuzp_v:
8817  case NEON::BI__builtin_neon_vuzpq_v: {
8818  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
8819  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8820  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8821  Value *SV = nullptr;
8822 
8823  for (unsigned vi = 0; vi != 2; ++vi) {
8824  SmallVector<uint32_t, 16> Indices;
8825  for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
8826  Indices.push_back(2*i+vi);
8827 
8828  Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8829  SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
8830  SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8831  }
8832  return SV;
8833  }
8834  case NEON::BI__builtin_neon_vzip_v:
8835  case NEON::BI__builtin_neon_vzipq_v: {
8836  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
8837  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8838  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8839  Value *SV = nullptr;
8840 
8841  for (unsigned vi = 0; vi != 2; ++vi) {
8842  SmallVector<uint32_t, 16> Indices;
8843  for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
8844  Indices.push_back((i + vi*e) >> 1);
8845  Indices.push_back(((i + vi*e) >> 1)+e);
8846  }
8847  Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8848  SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
8849  SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8850  }
8851  return SV;
8852  }
8853  case NEON::BI__builtin_neon_vqtbl1q_v: {
8854  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
8855  Ops, "vtbl1");
8856  }
8857  case NEON::BI__builtin_neon_vqtbl2q_v: {
8858  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
8859  Ops, "vtbl2");
8860  }
8861  case NEON::BI__builtin_neon_vqtbl3q_v: {
8862  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
8863  Ops, "vtbl3");
8864  }
8865  case NEON::BI__builtin_neon_vqtbl4q_v: {
8866  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
8867  Ops, "vtbl4");
8868  }
8869  case NEON::BI__builtin_neon_vqtbx1q_v: {
8870  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
8871  Ops, "vtbx1");
8872  }
8873  case NEON::BI__builtin_neon_vqtbx2q_v: {
8874  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
8875  Ops, "vtbx2");
8876  }
8877  case NEON::BI__builtin_neon_vqtbx3q_v: {
8878  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
8879  Ops, "vtbx3");
8880  }
8881  case NEON::BI__builtin_neon_vqtbx4q_v: {
8882  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
8883  Ops, "vtbx4");
8884  }
8885  case NEON::BI__builtin_neon_vsqadd_v:
8886  case NEON::BI__builtin_neon_vsqaddq_v: {
8887  Int = Intrinsic::aarch64_neon_usqadd;
8888  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
8889  }
8890  case NEON::BI__builtin_neon_vuqadd_v:
8891  case NEON::BI__builtin_neon_vuqaddq_v: {
8892  Int = Intrinsic::aarch64_neon_suqadd;
8893  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
8894  }
8895  case AArch64::BI__iso_volatile_load8:
8896  case AArch64::BI__iso_volatile_load16:
8897  case AArch64::BI__iso_volatile_load32:
8898  case AArch64::BI__iso_volatile_load64:
8899  return EmitISOVolatileLoad(E);
8900  case AArch64::BI__iso_volatile_store8:
8901  case AArch64::BI__iso_volatile_store16:
8902  case AArch64::BI__iso_volatile_store32:
8903  case AArch64::BI__iso_volatile_store64:
8904  return EmitISOVolatileStore(E);
8905  case AArch64::BI_BitScanForward:
8906  case AArch64::BI_BitScanForward64:
8907  return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
8908  case AArch64::BI_BitScanReverse:
8909  case AArch64::BI_BitScanReverse64:
8910  return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
8911  case AArch64::BI_InterlockedAnd64:
8912  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
8913  case AArch64::BI_InterlockedExchange64:
8914  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
8915  case AArch64::BI_InterlockedExchangeAdd64:
8916  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
8917  case AArch64::BI_InterlockedExchangeSub64:
8918  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
8919  case AArch64::BI_InterlockedOr64:
8920  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
8921  case AArch64::BI_InterlockedXor64:
8922  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
8923  case AArch64::BI_InterlockedDecrement64:
8924  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
8925  case AArch64::BI_InterlockedIncrement64:
8926  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
8927  case AArch64::BI_InterlockedExchangeAdd8_acq:
8928  case AArch64::BI_InterlockedExchangeAdd16_acq:
8929  case AArch64::BI_InterlockedExchangeAdd_acq:
8930  case AArch64::BI_InterlockedExchangeAdd64_acq:
8931  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_acq, E);
8932  case AArch64::BI_InterlockedExchangeAdd8_rel:
8933  case AArch64::BI_InterlockedExchangeAdd16_rel:
8934  case AArch64::BI_InterlockedExchangeAdd_rel:
8935  case AArch64::BI_InterlockedExchangeAdd64_rel:
8936  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_rel, E);
8937  case AArch64::BI_InterlockedExchangeAdd8_nf:
8938  case AArch64::BI_InterlockedExchangeAdd16_nf:
8939  case AArch64::BI_InterlockedExchangeAdd_nf:
8940  case AArch64::BI_InterlockedExchangeAdd64_nf:
8941  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_nf, E);
8942  case AArch64::BI_InterlockedExchange8_acq:
8943  case AArch64::BI_InterlockedExchange16_acq:
8944  case AArch64::BI_InterlockedExchange_acq:
8945  case AArch64::BI_InterlockedExchange64_acq:
8946  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_acq, E);
8947  case AArch64::BI_InterlockedExchange8_rel:
8948  case AArch64::BI_InterlockedExchange16_rel:
8949  case AArch64::BI_InterlockedExchange_rel:
8950  case AArch64::BI_InterlockedExchange64_rel:
8951  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_rel, E);
8952  case AArch64::BI_InterlockedExchange8_nf:
8953  case AArch64::BI_InterlockedExchange16_nf:
8954  case AArch64::BI_InterlockedExchange_nf:
8955  case AArch64::BI_InterlockedExchange64_nf:
8956  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_nf, E);
8957  case AArch64::BI_InterlockedCompareExchange8_acq:
8958  case AArch64::BI_InterlockedCompareExchange16_acq:
8959  case AArch64::BI_InterlockedCompareExchange_acq:
8960  case AArch64::BI_InterlockedCompareExchange64_acq:
8961  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_acq, E);
8962  case AArch64::BI_InterlockedCompareExchange8_rel:
8963  case AArch64::BI_InterlockedCompareExchange16_rel:
8964  case AArch64::BI_InterlockedCompareExchange_rel:
8965  case AArch64::BI_InterlockedCompareExchange64_rel:
8966  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_rel, E);
8967  case AArch64::BI_InterlockedCompareExchange8_nf:
8968  case AArch64::BI_InterlockedCompareExchange16_nf:
8969  case AArch64::BI_InterlockedCompareExchange_nf:
8970  case AArch64::BI_InterlockedCompareExchange64_nf:
8971  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_nf, E);
8972  case AArch64::BI_InterlockedOr8_acq:
8973  case AArch64::BI_InterlockedOr16_acq:
8974  case AArch64::BI_InterlockedOr_acq:
8975  case AArch64::BI_InterlockedOr64_acq:
8976  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_acq, E);
8977  case AArch64::BI_InterlockedOr8_rel:
8978  case AArch64::BI_InterlockedOr16_rel:
8979  case AArch64::BI_InterlockedOr_rel:
8980  case AArch64::BI_InterlockedOr64_rel:
8981  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_rel, E);
8982  case AArch64::BI_InterlockedOr8_nf:
8983  case AArch64::BI_InterlockedOr16_nf:
8984  case AArch64::BI_InterlockedOr_nf:
8985  case AArch64::BI_InterlockedOr64_nf:
8986  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_nf, E);
8987  case AArch64::BI_InterlockedXor8_acq:
8988  case AArch64::BI_InterlockedXor16_acq:
8989  case AArch64::BI_InterlockedXor_acq:
8990  case AArch64::BI_InterlockedXor64_acq:
8991  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_acq, E);
8992  case AArch64::BI_InterlockedXor8_rel:
8993  case AArch64::BI_InterlockedXor16_rel:
8994  case AArch64::BI_InterlockedXor_rel:
8995  case AArch64::BI_InterlockedXor64_rel:
8996  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_rel, E);
8997  case AArch64::BI_InterlockedXor8_nf:
8998  case AArch64::BI_InterlockedXor16_nf:
8999  case AArch64::BI_InterlockedXor_nf:
9000  case AArch64::BI_InterlockedXor64_nf:
9001  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_nf, E);
9002  case AArch64::BI_InterlockedAnd8_acq:
9003  case AArch64::BI_InterlockedAnd16_acq:
9004  case AArch64::BI_InterlockedAnd_acq:
9005  case AArch64::BI_InterlockedAnd64_acq:
9006  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_acq, E);
9007  case AArch64::BI_InterlockedAnd8_rel:
9008  case AArch64::BI_InterlockedAnd16_rel:
9009  case AArch64::BI_InterlockedAnd_rel:
9010  case AArch64::BI_InterlockedAnd64_rel:
9011  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_rel, E);
9012  case AArch64::BI_InterlockedAnd8_nf:
9013  case AArch64::BI_InterlockedAnd16_nf:
9014  case AArch64::BI_InterlockedAnd_nf:
9015  case AArch64::BI_InterlockedAnd64_nf:
9016  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_nf, E);
9017  case AArch64::BI_InterlockedIncrement16_acq:
9018  case AArch64::BI_InterlockedIncrement_acq:
9019  case AArch64::BI_InterlockedIncrement64_acq:
9020  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_acq, E);
9021  case AArch64::BI_InterlockedIncrement16_rel:
9022  case AArch64::BI_InterlockedIncrement_rel:
9023  case AArch64::BI_InterlockedIncrement64_rel:
9024  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_rel, E);
9025  case AArch64::BI_InterlockedIncrement16_nf:
9026  case AArch64::BI_InterlockedIncrement_nf:
9027  case AArch64::BI_InterlockedIncrement64_nf:
9028  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_nf, E);
9029  case AArch64::BI_InterlockedDecrement16_acq:
9030  case AArch64::BI_InterlockedDecrement_acq:
9031  case AArch64::BI_InterlockedDecrement64_acq:
9032  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_acq, E);
9033  case AArch64::BI_InterlockedDecrement16_rel:
9034  case AArch64::BI_InterlockedDecrement_rel:
9035  case AArch64::BI_InterlockedDecrement64_rel:
9036  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_rel, E);
9037  case AArch64::BI_InterlockedDecrement16_nf:
9038  case AArch64::BI_InterlockedDecrement_nf:
9039  case AArch64::BI_InterlockedDecrement64_nf:
9040  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_nf, E);
9041 
9042  case AArch64::BI_InterlockedAdd: {
9043  Value *Arg0 = EmitScalarExpr(E->getArg(0));
9044  Value *Arg1 = EmitScalarExpr(E->getArg(1));
9045  AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
9046  AtomicRMWInst::Add, Arg0, Arg1,
9047  llvm::AtomicOrdering::SequentiallyConsistent);
9048  return Builder.CreateAdd(RMWI, Arg1);
9049  }
9050  }
9051 }
9052 
9055  assert((Ops.size() & (Ops.size() - 1)) == 0 &&
9056  "Not a power-of-two sized vector!");
9057  bool AllConstants = true;
9058  for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
9059  AllConstants &= isa<Constant>(Ops[i]);
9060 
9061  // If this is a constant vector, create a ConstantVector.
9062  if (AllConstants) {
9064  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
9065  CstOps.push_back(cast<Constant>(Ops[i]));
9066  return llvm::ConstantVector::get(CstOps);
9067  }
9068 
9069  // Otherwise, insertelement the values to build the vector.
9070  Value *Result =
9071  llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size()));
9072 
9073  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
9074  Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i));
9075 
9076  return Result;
9077 }
9078 
9079 // Convert the mask from an integer type to a vector of i1.
9081  unsigned NumElts) {
9082 
9083  llvm::VectorType *MaskTy = llvm::VectorType::get(CGF.Builder.getInt1Ty(),
9084  cast<IntegerType>(Mask->getType())->getBitWidth());
9085  Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
9086 
9087  // If we have less than 8 elements, then the starting mask was an i8 and
9088  // we need to extract down to the right number of elements.
9089  if (NumElts < 8) {
9090  uint32_t Indices[4];
9091  for (unsigned i = 0; i != NumElts; ++i)
9092  Indices[i] = i;
9093  MaskVec = CGF.Builder.CreateShuffleVector(MaskVec, MaskVec,
9094  makeArrayRef(Indices, NumElts),
9095  "extract");
9096  }
9097  return MaskVec;
9098 }
9099 
9101  ArrayRef<Value *> Ops,
9102  unsigned Align) {
9103  // Cast the pointer to right type.
9104  Value *Ptr = CGF.Builder.CreateBitCast(Ops[0],
9105  llvm::PointerType::getUnqual(Ops[1]->getType()));
9106 
9107  Value *MaskVec = getMaskVecValue(CGF, Ops[2],
9108  Ops[1]->getType()->getVectorNumElements());
9109 
9110  return CGF.Builder.CreateMaskedStore(Ops[1], Ptr, Align, MaskVec);
9111 }
9112 
9114  ArrayRef<Value *> Ops, unsigned Align) {
9115  // Cast the pointer to right type.
9116  Value *Ptr = CGF.Builder.CreateBitCast(Ops[0],
9117  llvm::PointerType::getUnqual(Ops[1]->getType()));
9118 
9119  Value *MaskVec = getMaskVecValue(CGF, Ops[2],
9120  Ops[1]->getType()->getVectorNumElements());
9121 
9122  return CGF.Builder.CreateMaskedLoad(Ptr, Align, MaskVec, Ops[1]);
9123 }
9124 
9126  ArrayRef<Value *> Ops) {
9127  llvm::Type *ResultTy = Ops[1]->getType();
9128  llvm::Type *PtrTy = ResultTy->getVectorElementType();
9129 
9130  // Cast the pointer to element type.
9131  Value *Ptr = CGF.Builder.CreateBitCast(Ops[0],
9132  llvm::PointerType::getUnqual(PtrTy));
9133 
9134  Value *MaskVec = getMaskVecValue(CGF, Ops[2],
9135  ResultTy->getVectorNumElements());
9136 
9137  llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_expandload,
9138  ResultTy);
9139  return CGF.Builder.CreateCall(F, { Ptr, MaskVec, Ops[1] });
9140 }
9141 
9143  ArrayRef<Value *> Ops) {
9144  llvm::Type *ResultTy = Ops[1]->getType();
9145  llvm::Type *PtrTy = ResultTy->getVectorElementType();
9146 
9147  // Cast the pointer to element type.
9148  Value *Ptr = CGF.Builder.CreateBitCast(Ops[0],
9149  llvm::PointerType::getUnqual(PtrTy));
9150 
9151  Value *MaskVec = getMaskVecValue(CGF, Ops[2],
9152  ResultTy->getVectorNumElements());
9153 
9154  llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_compressstore,
9155  ResultTy);
9156  return CGF.Builder.CreateCall(F, { Ops[1], Ptr, MaskVec });
9157 }
9158 
9159 static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc,
9160  ArrayRef<Value *> Ops,
9161  bool InvertLHS = false) {
9162  unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
9163  Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts);
9164  Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts);
9165 
9166  if (InvertLHS)
9167  LHS = CGF.Builder.CreateNot(LHS);
9168 
9169  return CGF.Builder.CreateBitCast(CGF.Builder.CreateBinOp(Opc, LHS, RHS),
9170  Ops[0]->getType());
9171 }
9172 
9174  Value *Amt, bool IsRight) {
9175  llvm::Type *Ty = Op0->getType();
9176 
9177  // Amount may be scalar immediate, in which case create a splat vector.
9178  // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
9179  // we only care about the lowest log2 bits anyway.
9180  if (Amt->getType() != Ty) {
9181  unsigned NumElts = Ty->getVectorNumElements();
9182  Amt = CGF.Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
9183  Amt = CGF.Builder.CreateVectorSplat(NumElts, Amt);
9184  }
9185 
9186  unsigned IID = IsRight ? Intrinsic::fshr : Intrinsic::fshl;
9187  Value *F = CGF.CGM.getIntrinsic(IID, Ty);
9188  return CGF.Builder.CreateCall(F, {Op0, Op1, Amt});
9189 }
9190 
9192  Value *Mask, Value *Op0, Value *Op1) {
9193 
9194  // If the mask is all ones just return first argument.
9195  if (const auto *C = dyn_cast<Constant>(Mask))
9196  if (C->isAllOnesValue())
9197  return Op0;
9198 
9199  Mask = getMaskVecValue(CGF, Mask, Op0->getType()->getVectorNumElements());
9200 
9201  return CGF.Builder.CreateSelect(Mask, Op0, Op1);
9202 }
9203 
9205  Value *Mask, Value *Op0, Value *Op1) {
9206  // If the mask is all ones just return first argument.
9207  if (const auto *C = dyn_cast<Constant>(Mask))
9208  if (C->isAllOnesValue())
9209  return Op0;
9210 
9211  llvm::VectorType *MaskTy =
9212  llvm::VectorType::get(CGF.Builder.getInt1Ty(),
9213  Mask->getType()->getIntegerBitWidth());
9214  Mask = CGF.Builder.CreateBitCast(Mask, MaskTy);
9215  Mask = CGF.Builder.CreateExtractElement(Mask, (uint64_t)0);
9216  return CGF.Builder.CreateSelect(Mask, Op0, Op1);
9217 }
9218 
9220  unsigned NumElts, Value *MaskIn) {
9221  if (MaskIn) {
9222  const auto *C = dyn_cast<Constant>(MaskIn);
9223  if (!C || !C->isAllOnesValue())
9224  Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, MaskIn, NumElts));
9225  }
9226 
9227  if (NumElts < 8) {
9228  uint32_t Indices[8];
9229  for (unsigned i = 0; i != NumElts; ++i)
9230  Indices[i] = i;
9231  for (unsigned i = NumElts; i != 8; ++i)
9232  Indices[i] = i % NumElts + NumElts;
9233  Cmp = CGF.Builder.CreateShuffleVector(
9234  Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
9235  }
9236 
9237  return CGF.Builder.CreateBitCast(Cmp,
9238  IntegerType::get(CGF.getLLVMContext(),
9239  std::max(NumElts, 8U)));
9240 }
9241 
9242 static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC,
9243  bool Signed, ArrayRef<Value *> Ops) {
9244  assert((Ops.size() == 2 || Ops.size() == 4) &&
9245  "Unexpected number of arguments");
9246  unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
9247  Value *Cmp;
9248 
9249  if (CC == 3) {
9250  Cmp = Constant::getNullValue(
9251  llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
9252  } else if (CC == 7) {
9253  Cmp = Constant::getAllOnesValue(
9254  llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
9255  } else {
9256  ICmpInst::Predicate Pred;
9257  switch (CC) {
9258  default: llvm_unreachable("Unknown condition code");
9259  case 0: Pred = ICmpInst::ICMP_EQ; break;
9260  case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
9261  case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
9262  case 4: Pred = ICmpInst::ICMP_NE; break;
9263  case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
9264  case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
9265  }
9266  Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
9267  }
9268 
9269  Value *MaskIn = nullptr;
9270  if (Ops.size() == 4)
9271  MaskIn = Ops[3];
9272 
9273  return EmitX86MaskedCompareResult(CGF, Cmp, NumElts, MaskIn);
9274 }
9275 
9277  Value *Zero = Constant::getNullValue(In->getType());
9278  return EmitX86MaskedCompare(CGF, 1, true, { In, Zero });
9279 }
9280 
9282 
9283  llvm::Type *Ty = Ops[0]->getType();
9284  Value *Zero = llvm::Constant::getNullValue(Ty);
9285  Value *Sub = CGF.Builder.CreateSub(Zero, Ops[0]);
9286  Value *Cmp = CGF.Builder.CreateICmp(ICmpInst::ICMP_SGT, Ops[0], Zero);
9287  Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Sub);
9288  return Res;
9289 }
9290 
9291 static Value *EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred,
9292  ArrayRef<Value *> Ops) {
9293  Value *Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
9294  Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
9295 
9296  assert(Ops.size() == 2);
9297  return Res;
9298 }
9299 
9300 // Lowers X86 FMA intrinsics to IR.
9302  unsigned BuiltinID, bool IsAddSub) {
9303 
9304  bool Subtract = false;
9305  Intrinsic::ID IID = Intrinsic::not_intrinsic;
9306  switch (BuiltinID) {
9307  default: break;
9308  case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
9309  Subtract = true;
9310  LLVM_FALLTHROUGH;
9311  case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
9312  case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
9313  case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
9314  IID = llvm::Intrinsic::x86_avx512_vfmadd_ps_512; break;
9315  case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
9316  Subtract = true;
9317  LLVM_FALLTHROUGH;
9318  case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
9319  case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
9320  case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
9321  IID = llvm::Intrinsic::x86_avx512_vfmadd_pd_512; break;
9322  case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
9323  Subtract = true;
9324  LLVM_FALLTHROUGH;
9325  case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
9326  case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
9327  case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
9328  IID = llvm::Intrinsic::x86_avx512_vfmaddsub_ps_512;
9329  break;
9330  case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
9331  Subtract = true;
9332  LLVM_FALLTHROUGH;
9333  case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
9334  case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
9335  case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
9336  IID = llvm::Intrinsic::x86_avx512_vfmaddsub_pd_512;
9337  break;
9338  }
9339 
9340  Value *A = Ops[0];
9341  Value *B = Ops[1];
9342  Value *C = Ops[2];
9343 
9344  if (Subtract)
9345  C = CGF.Builder.CreateFNeg(C);
9346 
9347  Value *Res;
9348 
9349  // Only handle in case of _MM_FROUND_CUR_DIRECTION/4 (no rounding).
9350  if (IID != Intrinsic::not_intrinsic &&
9351  cast<llvm::ConstantInt>(Ops.back())->getZExtValue() != (uint64_t)4) {
9352  Function *Intr = CGF.CGM.getIntrinsic(IID);
9353  Res = CGF.Builder.CreateCall(Intr, {A, B, C, Ops.back() });
9354  } else {
9355  llvm::Type *Ty = A->getType();
9356  Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty);
9357  Res = CGF.Builder.CreateCall(FMA, {A, B, C} );
9358 
9359  if (IsAddSub) {
9360  // Negate even elts in C using a mask.
9361  unsigned NumElts = Ty->getVectorNumElements();
9362  SmallVector<uint32_t, 16> Indices(NumElts);
9363  for (unsigned i = 0; i != NumElts; ++i)
9364  Indices[i] = i + (i % 2) * NumElts;
9365 
9366  Value *NegC = CGF.Builder.CreateFNeg(C);
9367  Value *FMSub = CGF.Builder.CreateCall(FMA, {A, B, NegC} );
9368  Res = CGF.Builder.CreateShuffleVector(FMSub, Res, Indices);
9369  }
9370  }
9371 
9372  // Handle any required masking.
9373  Value *MaskFalseVal = nullptr;
9374  switch (BuiltinID) {
9375  case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
9376  case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
9377  case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
9378  case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
9379  MaskFalseVal = Ops[0];
9380  break;
9381  case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
9382  case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
9383  case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
9384  case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
9385  MaskFalseVal = Constant::getNullValue(Ops[0]->getType());
9386  break;
9387  case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
9388  case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
9389  case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
9390  case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
9391  case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
9392  case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
9393  case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
9394  case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
9395  MaskFalseVal = Ops[2];
9396  break;
9397  }
9398 
9399  if (MaskFalseVal)
9400  return EmitX86Select(CGF, Ops[3], Res, MaskFalseVal);
9401 
9402  return Res;
9403 }
9404 
9405 static Value *
9407  Value *Upper, bool ZeroMask = false, unsigned PTIdx = 0,
9408  bool NegAcc = false) {
9409  unsigned Rnd = 4;
9410  if (Ops.size() > 4)
9411  Rnd = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
9412 
9413  if (NegAcc)
9414  Ops[2] = CGF.Builder.CreateFNeg(Ops[2]);
9415 
9416  Ops[0] = CGF.Builder.CreateExtractElement(Ops[0], (uint64_t)0);
9417  Ops[1] = CGF.Builder.CreateExtractElement(Ops[1], (uint64_t)0);
9418  Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], (uint64_t)0);
9419  Value *Res;
9420  if (Rnd != 4) {
9421  Intrinsic::ID IID = Ops[0]->getType()->getPrimitiveSizeInBits() == 32 ?
9422  Intrinsic::x86_avx512_vfmadd_f32 :
9423  Intrinsic::x86_avx512_vfmadd_f64;
9424  Res = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
9425  {Ops[0], Ops[1], Ops[2], Ops[4]});
9426  } else {
9427  Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ops[0]->getType());
9428  Res = CGF.Builder.CreateCall(FMA, Ops.slice(0, 3));
9429  }
9430  // If we have more than 3 arguments, we need to do masking.
9431  if (Ops.size() > 3) {
9432  Value *PassThru = ZeroMask ? Constant::getNullValue(Res->getType())
9433  : Ops[PTIdx];
9434 
9435  // If we negated the accumulator and the its the PassThru value we need to
9436  // bypass the negate. Conveniently Upper should be the same thing in this
9437  // case.
9438  if (NegAcc && PTIdx == 2)
9439  PassThru = CGF.Builder.CreateExtractElement(Upper, (uint64_t)0);
9440 
9441  Res = EmitX86ScalarSelect(CGF, Ops[3], Res, PassThru);
9442  }
9443  return CGF.Builder.CreateInsertElement(Upper, Res, (uint64_t)0);
9444 }
9445 
9446 static Value *EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned,
9447  ArrayRef<Value *> Ops) {
9448  llvm::Type *Ty = Ops[0]->getType();
9449  // Arguments have a vXi32 type so cast to vXi64.
9450  Ty = llvm::VectorType::get(CGF.Int64Ty,
9451  Ty->getPrimitiveSizeInBits() / 64);
9452  Value *LHS = CGF.Builder.CreateBitCast(Ops[0], Ty);
9453  Value *RHS = CGF.Builder.CreateBitCast(Ops[1], Ty);
9454 
9455  if (IsSigned) {
9456  // Shift left then arithmetic shift right.
9457  Constant *ShiftAmt = ConstantInt::get(Ty, 32);
9458  LHS = CGF.Builder.CreateShl(LHS, ShiftAmt);
9459  LHS = CGF.Builder.CreateAShr(LHS, ShiftAmt);
9460  RHS = CGF.Builder.CreateShl(RHS, ShiftAmt);
9461  RHS = CGF.Builder.CreateAShr(RHS, ShiftAmt);
9462  } else {
9463  // Clear the upper bits.
9464  Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
9465  LHS = CGF.Builder.CreateAnd(LHS, Mask);
9466  RHS = CGF.Builder.CreateAnd(RHS, Mask);
9467  }
9468 
9469  return CGF.Builder.CreateMul(LHS, RHS);
9470 }
9471 
9472 // Emit a masked pternlog intrinsic. This only exists because the header has to
9473 // use a macro and we aren't able to pass the input argument to a pternlog
9474 // builtin and a select builtin without evaluating it twice.
9475 static Value *EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask,
9476  ArrayRef<Value *> Ops) {
9477  llvm::Type *Ty = Ops[0]->getType();
9478 
9479  unsigned VecWidth = Ty->getPrimitiveSizeInBits();
9480  unsigned EltWidth = Ty->getScalarSizeInBits();
9481  Intrinsic::ID IID;
9482  if (VecWidth == 128 && EltWidth == 32)
9483  IID = Intrinsic::x86_avx512_pternlog_d_128;
9484  else if (VecWidth == 256 && EltWidth == 32)
9485  IID = Intrinsic::x86_avx512_pternlog_d_256;
9486  else if (VecWidth == 512 && EltWidth == 32)
9487  IID = Intrinsic::x86_avx512_pternlog_d_512;
9488  else if (VecWidth == 128 && EltWidth == 64)
9489  IID = Intrinsic::x86_avx512_pternlog_q_128;
9490  else if (VecWidth == 256 && EltWidth == 64)
9491  IID = Intrinsic::x86_avx512_pternlog_q_256;
9492  else if (VecWidth == 512 && EltWidth == 64)
9493  IID = Intrinsic::x86_avx512_pternlog_q_512;
9494  else
9495  llvm_unreachable("Unexpected intrinsic");
9496 
9497  Value *Ternlog = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
9498  Ops.drop_back());
9499  Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty) : Ops[0];
9500  return EmitX86Select(CGF, Ops[4], Ternlog, PassThru);
9501 }
9502 
9504  llvm::Type *DstTy) {
9505  unsigned NumberOfElements = DstTy->getVectorNumElements();
9506  Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements);
9507  return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
9508 }
9509 
9510 // Emit addition or subtraction with signed/unsigned saturation.
9512  ArrayRef<Value *> Ops, bool IsSigned,
9513  bool IsAddition) {
9514  Intrinsic::ID IID =
9515  IsSigned ? (IsAddition ? Intrinsic::sadd_sat : Intrinsic::ssub_sat)
9516  : (IsAddition ? Intrinsic::uadd_sat : Intrinsic::usub_sat);
9517  llvm::Function *F = CGF.CGM.getIntrinsic(IID, Ops[0]->getType());
9518  return CGF.Builder.CreateCall(F, {Ops[0], Ops[1]});
9519 }
9520 
9521 Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) {
9522  const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
9523  StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
9524  return EmitX86CpuIs(CPUStr);
9525 }
9526 
9527 Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) {
9528 
9529  llvm::Type *Int32Ty = Builder.getInt32Ty();
9530 
9531  // Matching the struct layout from the compiler-rt/libgcc structure that is
9532  // filled in:
9533  // unsigned int __cpu_vendor;
9534  // unsigned int __cpu_type;
9535  // unsigned int __cpu_subtype;
9536  // unsigned int __cpu_features[1];
9537  llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
9538  llvm::ArrayType::get(Int32Ty, 1));
9539 
9540  // Grab the global __cpu_model.
9541  llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
9542  cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
9543 
9544  // Calculate the index needed to access the correct field based on the
9545  // range. Also adjust the expected value.
9546  unsigned Index;
9547  unsigned Value;
9548  std::tie(Index, Value) = StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
9549 #define X86_VENDOR(ENUM, STRING) \
9550  .Case(STRING, {0u, static_cast<unsigned>(llvm::X86::ENUM)})
9551 #define X86_CPU_TYPE_COMPAT_WITH_ALIAS(ARCHNAME, ENUM, STR, ALIAS) \
9552  .Cases(STR, ALIAS, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
9553 #define X86_CPU_TYPE_COMPAT(ARCHNAME, ENUM, STR) \
9554  .Case(STR, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
9555 #define X86_CPU_SUBTYPE_COMPAT(ARCHNAME, ENUM, STR) \
9556  .Case(STR, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
9557 #include "llvm/Support/X86TargetParser.def"
9558  .Default({0, 0});
9559  assert(Value != 0 && "Invalid CPUStr passed to CpuIs");
9560 
9561  // Grab the appropriate field from __cpu_model.
9562  llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0),
9563  ConstantInt::get(Int32Ty, Index)};
9564  llvm::Value *CpuValue = Builder.CreateGEP(STy, CpuModel, Idxs);
9565  CpuValue = Builder.CreateAlignedLoad(CpuValue, CharUnits::fromQuantity(4));
9566 
9567  // Check the value of the field against the requested value.
9568  return Builder.CreateICmpEQ(CpuValue,
9569  llvm::ConstantInt::get(Int32Ty, Value));
9570 }
9571 
9572 Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) {
9573  const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
9574  StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
9575  return EmitX86CpuSupports(FeatureStr);
9576 }
9577 
9578 uint64_t
9580  // Processor features and mapping to processor feature value.
9581  uint64_t FeaturesMask = 0;
9582  for (const StringRef &FeatureStr : FeatureStrs) {
9583  unsigned Feature =
9584  StringSwitch<unsigned>(FeatureStr)
9585 #define X86_FEATURE_COMPAT(VAL, ENUM, STR) .Case(STR, VAL)
9586 #include "llvm/Support/X86TargetParser.def"
9587  ;
9588  FeaturesMask |= (1ULL << Feature);
9589  }
9590  return FeaturesMask;
9591 }
9592 
9593 Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) {
9594  return EmitX86CpuSupports(GetX86CpuSupportsMask(FeatureStrs));
9595 }
9596 
9597 llvm::Value *CodeGenFunction::EmitX86CpuSupports(uint64_t FeaturesMask) {
9598  uint32_t Features1 = Lo_32(FeaturesMask);
9599  uint32_t Features2 = Hi_32(FeaturesMask);
9600 
9601  Value *Result = Builder.getTrue();
9602 
9603  if (Features1 != 0) {
9604  // Matching the struct layout from the compiler-rt/libgcc structure that is
9605  // filled in:
9606  // unsigned int __cpu_vendor;
9607  // unsigned int __cpu_type;
9608  // unsigned int __cpu_subtype;
9609  // unsigned int __cpu_features[1];
9610  llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
9611  llvm::ArrayType::get(Int32Ty, 1));
9612 
9613  // Grab the global __cpu_model.
9614  llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
9615  cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
9616 
9617  // Grab the first (0th) element from the field __cpu_features off of the
9618  // global in the struct STy.
9619  Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(3),
9620  Builder.getInt32(0)};
9621  Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs);
9622  Value *Features =
9623  Builder.CreateAlignedLoad(CpuFeatures, CharUnits::fromQuantity(4));
9624 
9625  // Check the value of the bit corresponding to the feature requested.
9626  Value *Mask = Builder.getInt32(Features1);
9627  Value *Bitset = Builder.CreateAnd(Features, Mask);
9628  Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
9629  Result = Builder.CreateAnd(Result, Cmp);
9630  }
9631 
9632  if (Features2 != 0) {
9633  llvm::Constant *CpuFeatures2 = CGM.CreateRuntimeVariable(Int32Ty,
9634  "__cpu_features2");
9635  cast<llvm::GlobalValue>(CpuFeatures2)->setDSOLocal(true);
9636 
9637  Value *Features =
9638  Builder.CreateAlignedLoad(CpuFeatures2, CharUnits::fromQuantity(4));
9639 
9640  // Check the value of the bit corresponding to the feature requested.
9641  Value *Mask = Builder.getInt32(Features2);
9642  Value *Bitset = Builder.CreateAnd(Features, Mask);
9643  Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
9644  Result = Builder.CreateAnd(Result, Cmp);
9645  }
9646 
9647  return Result;
9648 }
9649 
9650 Value *CodeGenFunction::EmitX86CpuInit() {
9651  llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy,
9652  /*Variadic*/ false);
9653  llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, "__cpu_indicator_init");
9654  cast<llvm::GlobalValue>(Func)->setDSOLocal(true);
9655  cast<llvm::GlobalValue>(Func)->setDLLStorageClass(
9656  llvm::GlobalValue::DefaultStorageClass);
9657  return Builder.CreateCall(Func);
9658 }
9659 
9661  const CallExpr *E) {
9662  if (BuiltinID == X86::BI__builtin_cpu_is)
9663  return EmitX86CpuIs(E);
9664  if (BuiltinID == X86::BI__builtin_cpu_supports)
9665  return EmitX86CpuSupports(E);
9666  if (BuiltinID == X86::BI__builtin_cpu_init)
9667  return EmitX86CpuInit();
9668 
9670 
9671  // Find out if any arguments are required to be integer constant expressions.
9672  unsigned ICEArguments = 0;
9674  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
9675  assert(Error == ASTContext::GE_None && "Should not codegen an error");
9676 
9677  for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
9678  // If this is a normal argument, just emit it as a scalar.
9679  if ((ICEArguments & (1 << i)) == 0) {
9680  Ops.push_back(EmitScalarExpr(E->getArg(i)));
9681  continue;
9682  }
9683 
9684  // If this is required to be a constant, constant fold it so that we know
9685  // that the generated intrinsic gets a ConstantInt.
9686  llvm::APSInt Result;
9687  bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
9688  assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
9689  Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
9690  }
9691 
9692  // These exist so that the builtin that takes an immediate can be bounds
9693  // checked by clang to avoid passing bad immediates to the backend. Since
9694  // AVX has a larger immediate than SSE we would need separate builtins to
9695  // do the different bounds checking. Rather than create a clang specific
9696  // SSE only builtin, this implements eight separate builtins to match gcc
9697  // implementation.
9698  auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
9699  Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
9700  llvm::Function *F = CGM.getIntrinsic(ID);
9701  return Builder.CreateCall(F, Ops);
9702  };
9703 
9704  // For the vector forms of FP comparisons, translate the builtins directly to
9705  // IR.
9706  // TODO: The builtins could be removed if the SSE header files used vector
9707  // extension comparisons directly (vector ordered/unordered may need
9708  // additional support via __builtin_isnan()).
9709  auto getVectorFCmpIR = [this, &Ops](CmpInst::Predicate Pred) {
9710  Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
9711  llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
9712  llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
9713  Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
9714  return Builder.CreateBitCast(Sext, FPVecTy);
9715  };
9716 
9717  switch (BuiltinID) {
9718  default: return nullptr;
9719  case X86::BI_mm_prefetch: {
9720  Value *Address = Ops[0];
9721  ConstantInt *C = cast<ConstantInt>(Ops[1]);
9722  Value *RW = ConstantInt::get(Int32Ty, (C->getZExtValue() >> 2) & 0x1);
9723  Value *Locality = ConstantInt::get(Int32Ty, C->getZExtValue() & 0x3);
9724  Value *Data = ConstantInt::get(Int32Ty, 1);
9725  Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
9726  return Builder.CreateCall(F, {Address, RW, Locality, Data});
9727  }
9728  case X86::BI_mm_clflush: {
9729  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush),
9730  Ops[0]);
9731  }
9732  case X86::BI_mm_lfence: {
9733  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence));
9734  }
9735  case X86::BI_mm_mfence: {
9736  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence));
9737  }
9738  case X86::BI_mm_sfence: {
9739  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence));
9740  }
9741  case X86::BI_mm_pause: {
9742  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause));
9743  }
9744  case X86::BI__rdtsc: {
9745  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc));
9746  }
9747  case X86::BI__builtin_ia32_rdtscp: {
9748  Value *Call = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtscp));
9749  Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
9750  Ops[0]);
9751  return Builder.CreateExtractValue(Call, 0);
9752  }
9753  case X86::BI__builtin_ia32_lzcnt_u16:
9754  case X86::BI__builtin_ia32_lzcnt_u32:
9755  case X86::BI__builtin_ia32_lzcnt_u64: {
9756  Value *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
9757  return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
9758  }
9759  case X86::BI__builtin_ia32_tzcnt_u16:
9760  case X86::BI__builtin_ia32_tzcnt_u32:
9761  case X86::BI__builtin_ia32_tzcnt_u64: {
9762  Value *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType());
9763  return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
9764  }
9765  case X86::BI__builtin_ia32_undef128:
9766  case X86::BI__builtin_ia32_undef256:
9767  case X86::BI__builtin_ia32_undef512:
9768  // The x86 definition of "undef" is not the same as the LLVM definition
9769  // (PR32176). We leave optimizing away an unnecessary zero constant to the
9770  // IR optimizer and backend.
9771  // TODO: If we had a "freeze" IR instruction to generate a fixed undef
9772  // value, we should use that here instead of a zero.
9773  return llvm::Constant::getNullValue(ConvertType(E->getType()));
9774  case X86::BI__builtin_ia32_vec_init_v8qi:
9775  case X86::BI__builtin_ia32_vec_init_v4hi:
9776  case X86::BI__builtin_ia32_vec_init_v2si:
9777  return Builder.CreateBitCast(BuildVector(Ops),
9778  llvm::Type::getX86_MMXTy(getLLVMContext()));
9779  case X86::BI__builtin_ia32_vec_ext_v2si:
9780  case X86::BI__builtin_ia32_vec_ext_v16qi:
9781  case X86::BI__builtin_ia32_vec_ext_v8hi:
9782  case X86::BI__builtin_ia32_vec_ext_v4si:
9783  case X86::BI__builtin_ia32_vec_ext_v4sf:
9784  case X86::BI__builtin_ia32_vec_ext_v2di:
9785  case X86::BI__builtin_ia32_vec_ext_v32qi:
9786  case X86::BI__builtin_ia32_vec_ext_v16hi:
9787  case X86::BI__builtin_ia32_vec_ext_v8si:
9788  case X86::BI__builtin_ia32_vec_ext_v4di: {
9789  unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
9790  uint64_t Index = cast<ConstantInt>(Ops[1])->getZExtValue();
9791  Index &= NumElts - 1;
9792  // These builtins exist so we can ensure the index is an ICE and in range.
9793  // Otherwise we could just do this in the header file.
9794  return Builder.CreateExtractElement(Ops[0], Index);
9795  }
9796  case X86::BI__builtin_ia32_vec_set_v16qi:
9797  case X86::BI__builtin_ia32_vec_set_v8hi:
9798  case X86::BI__builtin_ia32_vec_set_v4si:
9799  case X86::BI__builtin_ia32_vec_set_v2di:
9800  case X86::BI__builtin_ia32_vec_set_v32qi:
9801  case X86::BI__builtin_ia32_vec_set_v16hi:
9802  case X86::BI__builtin_ia32_vec_set_v8si:
9803  case X86::BI__builtin_ia32_vec_set_v4di: {
9804  unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
9805  unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
9806  Index &= NumElts - 1;
9807  // These builtins exist so we can ensure the index is an ICE and in range.
9808  // Otherwise we could just do this in the header file.
9809  return Builder.CreateInsertElement(Ops[0], Ops[1], Index);
9810  }
9811  case X86::BI_mm_setcsr:
9812  case X86::BI__builtin_ia32_ldmxcsr: {
9813  Address Tmp = CreateMemTemp(E->getArg(0)->getType());
9814  Builder.CreateStore(Ops[0], Tmp);
9815  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
9816  Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
9817  }
9818  case X86::BI_mm_getcsr:
9819  case X86::BI__builtin_ia32_stmxcsr: {
9820  Address Tmp = CreateMemTemp(E->getType());
9821  Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
9822  Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
9823  return Builder.CreateLoad(Tmp, "stmxcsr");
9824  }
9825  case X86::BI__builtin_ia32_xsave:
9826  case X86::BI__builtin_ia32_xsave64:
9827  case X86::BI__builtin_ia32_xrstor:
9828  case X86::BI__builtin_ia32_xrstor64:
9829  case X86::BI__builtin_ia32_xsaveopt:
9830  case X86::BI__builtin_ia32_xsaveopt64:
9831  case X86::BI__builtin_ia32_xrstors:
9832  case X86::BI__builtin_ia32_xrstors64:
9833  case X86::BI__builtin_ia32_xsavec:
9834  case X86::BI__builtin_ia32_xsavec64:
9835  case X86::BI__builtin_ia32_xsaves:
9836  case X86::BI__builtin_ia32_xsaves64: {
9837  Intrinsic::ID ID;
9838 #define INTRINSIC_X86_XSAVE_ID(NAME) \
9839  case X86::BI__builtin_ia32_##NAME: \
9840  ID = Intrinsic::x86_##NAME; \
9841  break
9842  switch (BuiltinID) {
9843  default: llvm_unreachable("Unsupported intrinsic!");
9844  INTRINSIC_X86_XSAVE_ID(xsave);
9845  INTRINSIC_X86_XSAVE_ID(xsave64);
9846  INTRINSIC_X86_XSAVE_ID(xrstor);
9847  INTRINSIC_X86_XSAVE_ID(xrstor64);
9848  INTRINSIC_X86_XSAVE_ID(xsaveopt);
9849  INTRINSIC_X86_XSAVE_ID(xsaveopt64);
9850  INTRINSIC_X86_XSAVE_ID(xrstors);
9851  INTRINSIC_X86_XSAVE_ID(xrstors64);
9852  INTRINSIC_X86_XSAVE_ID(xsavec);
9853  INTRINSIC_X86_XSAVE_ID(xsavec64);
9854  INTRINSIC_X86_XSAVE_ID(xsaves);
9855  INTRINSIC_X86_XSAVE_ID(xsaves64);
9856  }
9857 #undef INTRINSIC_X86_XSAVE_ID
9858  Value *Mhi = Builder.CreateTrunc(
9859  Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);
9860  Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);
9861  Ops[1] = Mhi;
9862  Ops.push_back(Mlo);
9863  return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
9864  }
9865  case X86::BI__builtin_ia32_storedqudi128_mask:
9866  case X86::BI__builtin_ia32_storedqusi128_mask:
9867  case X86::BI__builtin_ia32_storedquhi128_mask:
9868  case X86::BI__builtin_ia32_storedquqi128_mask:
9869  case X86::BI__builtin_ia32_storeupd128_mask:
9870  case X86::BI__builtin_ia32_storeups128_mask:
9871  case X86::BI__builtin_ia32_storedqudi256_mask:
9872  case X86::BI__builtin_ia32_storedqusi256_mask:
9873  case X86::BI__builtin_ia32_storedquhi256_mask:
9874  case X86::BI__builtin_ia32_storedquqi256_mask:
9875  case X86::BI__builtin_ia32_storeupd256_mask:
9876  case X86::BI__builtin_ia32_storeups256_mask:
9877  case X86::BI__builtin_ia32_storedqudi512_mask:
9878  case X86::BI__builtin_ia32_storedqusi512_mask:
9879  case X86::BI__builtin_ia32_storedquhi512_mask:
9880  case X86::BI__builtin_ia32_storedquqi512_mask:
9881  case X86::BI__builtin_ia32_storeupd512_mask:
9882  case X86::BI__builtin_ia32_storeups512_mask:
9883  return EmitX86MaskedStore(*this, Ops, 1);
9884 
9885  case X86::BI__builtin_ia32_storess128_mask:
9886  case X86::BI__builtin_ia32_storesd128_mask: {
9887  return EmitX86MaskedStore(*this, Ops, 1);
9888  }
9889  case X86::BI__builtin_ia32_vpopcntb_128:
9890  case X86::BI__builtin_ia32_vpopcntd_128:
9891  case X86::BI__builtin_ia32_vpopcntq_128:
9892  case X86::BI__builtin_ia32_vpopcntw_128:
9893  case X86::BI__builtin_ia32_vpopcntb_256:
9894  case X86::BI__builtin_ia32_vpopcntd_256:
9895  case X86::BI__builtin_ia32_vpopcntq_256:
9896  case X86::BI__builtin_ia32_vpopcntw_256:
9897  case X86::BI__builtin_ia32_vpopcntb_512:
9898  case X86::BI__builtin_ia32_vpopcntd_512:
9899  case X86::BI__builtin_ia32_vpopcntq_512:
9900  case X86::BI__builtin_ia32_vpopcntw_512: {
9901  llvm::Type *ResultType = ConvertType(E->getType());
9902  llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
9903  return Builder.CreateCall(F, Ops);
9904  }
9905  case X86::BI__builtin_ia32_cvtmask2b128:
9906  case X86::BI__builtin_ia32_cvtmask2b256:
9907  case X86::BI__builtin_ia32_cvtmask2b512:
9908  case X86::BI__builtin_ia32_cvtmask2w128:
9909  case X86::BI__builtin_ia32_cvtmask2w256:
9910  case X86::BI__builtin_ia32_cvtmask2w512:
9911  case X86::BI__builtin_ia32_cvtmask2d128:
9912  case X86::BI__builtin_ia32_cvtmask2d256:
9913  case X86::BI__builtin_ia32_cvtmask2d512:
9914  case X86::BI__builtin_ia32_cvtmask2q128:
9915  case X86::BI__builtin_ia32_cvtmask2q256:
9916  case X86::BI__builtin_ia32_cvtmask2q512:
9917  return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType()));
9918 
9919  case X86::BI__builtin_ia32_cvtb2mask128:
9920  case X86::BI__builtin_ia32_cvtb2mask256:
9921  case X86::BI__builtin_ia32_cvtb2mask512:
9922  case X86::BI__builtin_ia32_cvtw2mask128:
9923  case X86::BI__builtin_ia32_cvtw2mask256:
9924  case X86::BI__builtin_ia32_cvtw2mask512:
9925  case X86::BI__builtin_ia32_cvtd2mask128:
9926  case X86::BI__builtin_ia32_cvtd2mask256:
9927  case X86::BI__builtin_ia32_cvtd2mask512:
9928  case X86::BI__builtin_ia32_cvtq2mask128:
9929  case X86::BI__builtin_ia32_cvtq2mask256:
9930  case X86::BI__builtin_ia32_cvtq2mask512:
9931  return EmitX86ConvertToMask(*this, Ops[0]);
9932 
9933  case X86::BI__builtin_ia32_vfmaddss3:
9934  case X86::BI__builtin_ia32_vfmaddsd3:
9935  case X86::BI__builtin_ia32_vfmaddss3_mask:
9936  case X86::BI__builtin_ia32_vfmaddsd3_mask:
9937  return EmitScalarFMAExpr(*this, Ops, Ops[0]);
9938  case X86::BI__builtin_ia32_vfmaddss:
9939  case X86::BI__builtin_ia32_vfmaddsd:
9940  return EmitScalarFMAExpr(*this, Ops,
9941  Constant::getNullValue(Ops[0]->getType()));
9942  case X86::BI__builtin_ia32_vfmaddss3_maskz:
9943  case X86::BI__builtin_ia32_vfmaddsd3_maskz:
9944  return EmitScalarFMAExpr(*this, Ops, Ops[0], /*ZeroMask*/true);
9945  case X86::BI__builtin_ia32_vfmaddss3_mask3:
9946  case X86::BI__builtin_ia32_vfmaddsd3_mask3:
9947  return EmitScalarFMAExpr(*this, Ops, Ops[2], /*ZeroMask*/false, 2);
9948  case X86::BI__builtin_ia32_vfmsubss3_mask3:
9949  case X86::BI__builtin_ia32_vfmsubsd3_mask3:
9950  return EmitScalarFMAExpr(*this, Ops, Ops[2], /*ZeroMask*/false, 2,
9951  /*NegAcc*/true);
9952  case X86::BI__builtin_ia32_vfmaddps:
9953  case X86::BI__builtin_ia32_vfmaddpd:
9954  case X86::BI__builtin_ia32_vfmaddps256:
9955  case X86::BI__builtin_ia32_vfmaddpd256:
9956  case X86::BI__builtin_ia32_vfmaddps512_mask:
9957  case X86::BI__builtin_ia32_vfmaddps512_maskz:
9958  case X86::BI__builtin_ia32_vfmaddps512_mask3:
9959  case X86::BI__builtin_ia32_vfmsubps512_mask3:
9960  case X86::BI__builtin_ia32_vfmaddpd512_mask:
9961  case X86::BI__builtin_ia32_vfmaddpd512_maskz:
9962  case X86::BI__builtin_ia32_vfmaddpd512_mask3:
9963  case X86::BI__builtin_ia32_vfmsubpd512_mask3:
9964  return EmitX86FMAExpr(*this, Ops, BuiltinID, /*IsAddSub*/false);
9965  case X86::BI__builtin_ia32_vfmaddsubps:
9966  case X86::BI__builtin_ia32_vfmaddsubpd:
9967  case X86::BI__builtin_ia32_vfmaddsubps256:
9968  case X86::BI__builtin_ia32_vfmaddsubpd256:
9969  case X86::BI__builtin_ia32_vfmaddsubps512_mask:
9970  case X86::BI__builtin_ia32_vfmaddsubps512_maskz:
9971  case X86::BI__builtin_ia32_vfmaddsubps512_mask3:
9972  case X86::BI__builtin_ia32_vfmsubaddps512_mask3:
9973  case X86::BI__builtin_ia32_vfmaddsubpd512_mask:
9974  case X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
9975  case X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
9976  case X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
9977  return EmitX86FMAExpr(*this, Ops, BuiltinID, /*IsAddSub*/true);
9978 
9979  case X86::BI__builtin_ia32_movdqa32store128_mask:
9980  case X86::BI__builtin_ia32_movdqa64store128_mask:
9981  case X86::BI__builtin_ia32_storeaps128_mask:
9982  case X86::BI__builtin_ia32_storeapd128_mask:
9983  case X86::BI__builtin_ia32_movdqa32store256_mask:
9984  case X86::BI__builtin_ia32_movdqa64store256_mask:
9985  case X86::BI__builtin_ia32_storeaps256_mask:
9986  case X86::BI__builtin_ia32_storeapd256_mask:
9987  case X86::BI__builtin_ia32_movdqa32store512_mask:
9988  case X86::BI__builtin_ia32_movdqa64store512_mask:
9989  case X86::BI__builtin_ia32_storeaps512_mask:
9990  case X86::BI__builtin_ia32_storeapd512_mask: {
9991  unsigned Align =
9992  getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
9993  return EmitX86MaskedStore(*this, Ops, Align);
9994  }
9995  case X86::BI__builtin_ia32_loadups128_mask:
9996  case X86::BI__builtin_ia32_loadups256_mask:
9997  case X86::BI__builtin_ia32_loadups512_mask:
9998  case X86::BI__builtin_ia32_loadupd128_mask:
9999  case X86::BI__builtin_ia32_loadupd256_mask:
10000  case X86::BI__builtin_ia32_loadupd512_mask:
10001  case X86::BI__builtin_ia32_loaddquqi128_mask:
10002  case X86::BI__builtin_ia32_loaddquqi256_mask:
10003  case X86::BI__builtin_ia32_loaddquqi512_mask:
10004  case X86::BI__builtin_ia32_loaddquhi128_mask:
10005  case X86::BI__builtin_ia32_loaddquhi256_mask:
10006  case X86::BI__builtin_ia32_loaddquhi512_mask:
10007  case X86::BI__builtin_ia32_loaddqusi128_mask:
10008  case X86::BI__builtin_ia32_loaddqusi256_mask:
10009  case X86::BI__builtin_ia32_loaddqusi512_mask:
10010  case X86::BI__builtin_ia32_loaddqudi128_mask:
10011  case X86::BI__builtin_ia32_loaddqudi256_mask:
10012  case X86::BI__builtin_ia32_loaddqudi512_mask:
10013  return EmitX86MaskedLoad(*this, Ops, 1);
10014 
10015  case X86::BI__builtin_ia32_loadss128_mask:
10016  case X86::BI__builtin_ia32_loadsd128_mask:
10017  return EmitX86MaskedLoad(*this, Ops, 1);
10018 
10019  case X86::BI__builtin_ia32_loadaps128_mask:
10020  case X86::BI__builtin_ia32_loadaps256_mask:
10021  case X86::BI__builtin_ia32_loadaps512_mask:
10022  case X86::BI__builtin_ia32_loadapd128_mask:
10023  case X86::BI__builtin_ia32_loadapd256_mask:
10024  case X86::BI__builtin_ia32_loadapd512_mask:
10025  case X86::BI__builtin_ia32_movdqa32load128_mask:
10026  case X86::BI__builtin_ia32_movdqa32load256_mask:
10027  case X86::BI__builtin_ia32_movdqa32load512_mask:
10028  case X86::BI__builtin_ia32_movdqa64load128_mask:
10029  case X86::BI__builtin_ia32_movdqa64load256_mask:
10030  case X86::BI__builtin_ia32_movdqa64load512_mask: {
10031  unsigned Align =
10032  getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
10033  return EmitX86MaskedLoad(*this, Ops, Align);
10034  }
10035 
10036  case X86::BI__builtin_ia32_expandloaddf128_mask:
10037  case X86::BI__builtin_ia32_expandloaddf256_mask:
10038  case X86::BI__builtin_ia32_expandloaddf512_mask:
10039  case X86::BI__builtin_ia32_expandloadsf128_mask:
10040  case X86::BI__builtin_ia32_expandloadsf256_mask:
10041  case X86::BI__builtin_ia32_expandloadsf512_mask:
10042  case X86::BI__builtin_ia32_expandloaddi128_mask:
10043  case X86::BI__builtin_ia32_expandloaddi256_mask:
10044  case X86::BI__builtin_ia32_expandloaddi512_mask:
10045  case X86::BI__builtin_ia32_expandloadsi128_mask:
10046  case X86::BI__builtin_ia32_expandloadsi256_mask:
10047  case X86::BI__builtin_ia32_expandloadsi512_mask:
10048  case X86::BI__builtin_ia32_expandloadhi128_mask:
10049  case X86::BI__builtin_ia32_expandloadhi256_mask:
10050  case X86::BI__builtin_ia32_expandloadhi512_mask:
10051  case X86::BI__builtin_ia32_expandloadqi128_mask:
10052  case X86::BI__builtin_ia32_expandloadqi256_mask:
10053  case X86::BI__builtin_ia32_expandloadqi512_mask:
10054  return EmitX86ExpandLoad(*this, Ops);
10055 
10056  case X86::BI__builtin_ia32_compressstoredf128_mask:
10057  case X86::BI__builtin_ia32_compressstoredf256_mask:
10058  case X86::BI__builtin_ia32_compressstoredf512_mask:
10059  case X86::BI__builtin_ia32_compressstoresf128_mask:
10060  case X86::BI__builtin_ia32_compressstoresf256_mask:
10061  case X86::BI__builtin_ia32_compressstoresf512_mask:
10062  case X86::BI__builtin_ia32_compressstoredi128_mask:
10063  case X86::BI__builtin_ia32_compressstoredi256_mask:
10064  case X86::BI__builtin_ia32_compressstoredi512_mask:
10065  case X86::BI__builtin_ia32_compressstoresi128_mask:
10066  case X86::BI__builtin_ia32_compressstoresi256_mask:
10067  case X86::BI__builtin_ia32_compressstoresi512_mask:
10068  case X86::BI__builtin_ia32_compressstorehi128_mask:
10069  case X86::BI__builtin_ia32_compressstorehi256_mask:
10070  case X86::BI__builtin_ia32_compressstorehi512_mask:
10071  case X86::BI__builtin_ia32_compressstoreqi128_mask:
10072  case X86::BI__builtin_ia32_compressstoreqi256_mask:
10073  case X86::BI__builtin_ia32_compressstoreqi512_mask:
10074  return EmitX86CompressStore(*this, Ops);
10075 
10076  case X86::BI__builtin_ia32_storehps:
10077  case X86::BI__builtin_ia32_storelps: {
10078  llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty);
10079  llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
10080 
10081  // cast val v2i64
10082  Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast");
10083 
10084  // extract (0, 1)
10085  unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1;
10086  Ops[1] = Builder.CreateExtractElement(Ops[1], Index, "extract");
10087 
10088  // cast pointer to i64 & store
10089  Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy);
10090  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
10091  }
10092  case X86::BI__builtin_ia32_vextractf128_pd256:
10093  case X86::BI__builtin_ia32_vextractf128_ps256:
10094  case X86::BI__builtin_ia32_vextractf128_si256:
10095  case X86::BI__builtin_ia32_extract128i256:
10096  case X86::BI__builtin_ia32_extractf64x4_mask:
10097  case X86::BI__builtin_ia32_extractf32x4_mask:
10098  case X86::BI__builtin_ia32_extracti64x4_mask:
10099  case X86::BI__builtin_ia32_extracti32x4_mask:
10100  case X86::BI__builtin_ia32_extractf32x8_mask:
10101  case X86::BI__builtin_ia32_extracti32x8_mask:
10102  case X86::BI__builtin_ia32_extractf32x4_256_mask:
10103  case X86::BI__builtin_ia32_extracti32x4_256_mask:
10104  case X86::BI__builtin_ia32_extractf64x2_256_mask:
10105  case X86::BI__builtin_ia32_extracti64x2_256_mask:
10106  case X86::BI__builtin_ia32_extractf64x2_512_mask:
10107  case X86::BI__builtin_ia32_extracti64x2_512_mask: {
10108  llvm::Type *DstTy = ConvertType(E->getType());
10109  unsigned NumElts = DstTy->getVectorNumElements();
10110  unsigned SrcNumElts = Ops[0]->getType()->getVectorNumElements();
10111  unsigned SubVectors = SrcNumElts / NumElts;
10112  unsigned Index = cast<ConstantInt>(Ops[1])->getZExtValue();
10113  assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
10114  Index &= SubVectors - 1; // Remove any extra bits.
10115  Index *= NumElts;
10116 
10117  uint32_t Indices[16];
10118  for (unsigned i = 0; i != NumElts; ++i)
10119  Indices[i] = i + Index;
10120 
10121  Value *Res = Builder.CreateShuffleVector(Ops[0],
10122  UndefValue::get(Ops[0]->getType()),
10123  makeArrayRef(Indices, NumElts),
10124  "extract");
10125 
10126  if (Ops.size() == 4)
10127  Res = EmitX86Select(*this, Ops[3], Res, Ops[2]);
10128 
10129  return Res;
10130  }
10131  case X86::BI__builtin_ia32_vinsertf128_pd256:
10132  case X86::BI__builtin_ia32_vinsertf128_ps256:
10133  case X86::BI__builtin_ia32_vinsertf128_si256:
10134  case X86::BI__builtin_ia32_insert128i256:
10135  case X86::BI__builtin_ia32_insertf64x4:
10136  case X86::BI__builtin_ia32_insertf32x4:
10137  case X86::BI__builtin_ia32_inserti64x4:
10138  case X86::BI__builtin_ia32_inserti32x4:
10139  case X86::BI__builtin_ia32_insertf32x8:
10140  case X86::BI__builtin_ia32_inserti32x8:
10141  case X86::BI__builtin_ia32_insertf32x4_256:
10142  case X86::BI__builtin_ia32_inserti32x4_256:
10143  case X86::BI__builtin_ia32_insertf64x2_256:
10144  case X86::BI__builtin_ia32_inserti64x2_256:
10145  case X86::BI__builtin_ia32_insertf64x2_512:
10146  case X86::BI__builtin_ia32_inserti64x2_512: {
10147  unsigned DstNumElts = Ops[0]->getType()->getVectorNumElements();
10148  unsigned SrcNumElts = Ops[1]->getType()->getVectorNumElements();
10149  unsigned SubVectors = DstNumElts / SrcNumElts;
10150  unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
10151  assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
10152  Index &= SubVectors - 1; // Remove any extra bits.
10153  Index *= SrcNumElts;
10154 
10155  uint32_t Indices[16];
10156  for (unsigned i = 0; i != DstNumElts; ++i)
10157  Indices[i] = (i >= SrcNumElts) ? SrcNumElts + (i % SrcNumElts) : i;
10158 
10159  Value *Op1 = Builder.CreateShuffleVector(Ops[1],
10160  UndefValue::get(Ops[1]->getType()),
10161  makeArrayRef(Indices, DstNumElts),
10162  "widen");
10163 
10164  for (unsigned i = 0; i != DstNumElts; ++i) {
10165  if (i >= Index && i < (Index + SrcNumElts))
10166  Indices[i] = (i - Index) + DstNumElts;
10167  else
10168  Indices[i] = i;
10169  }
10170 
10171  return Builder.CreateShuffleVector(Ops[0], Op1,
10172  makeArrayRef(Indices, DstNumElts),
10173  "insert");
10174  }
10175  case X86::BI__builtin_ia32_pmovqd512_mask:
10176  case X86::BI__builtin_ia32_pmovwb512_mask: {
10177  Value *Res = Builder.CreateTrunc(Ops[0], Ops[1]->getType());
10178  return EmitX86Select(*this, Ops[2], Res, Ops[1]);
10179  }
10180  case X86::BI__builtin_ia32_pmovdb512_mask:
10181  case X86::BI__builtin_ia32_pmovdw512_mask:
10182  case X86::BI__builtin_ia32_pmovqw512_mask: {
10183  if (const auto *C = dyn_cast<Constant>(Ops[2]))
10184  if (C->isAllOnesValue())
10185  return Builder.CreateTrunc(Ops[0], Ops[1]->getType());
10186 
10187  Intrinsic::ID IID;
10188  switch (BuiltinID) {
10189  default: llvm_unreachable("Unsupported intrinsic!");
10190  case X86::BI__builtin_ia32_pmovdb512_mask:
10191  IID = Intrinsic::x86_avx512_mask_pmov_db_512;
10192  break;
10193  case X86::BI__builtin_ia32_pmovdw512_mask:
10194  IID = Intrinsic::x86_avx512_mask_pmov_dw_512;
10195  break;
10196  case X86::BI__builtin_ia32_pmovqw512_mask:
10197  IID = Intrinsic::x86_avx512_mask_pmov_qw_512;
10198  break;
10199  }
10200 
10201  Function *Intr = CGM.getIntrinsic(IID);
10202  return Builder.CreateCall(Intr, Ops);
10203  }
10204  case X86::BI__builtin_ia32_pblendw128:
10205  case X86::BI__builtin_ia32_blendpd:
10206  case X86::BI__builtin_ia32_blendps:
10207  case X86::BI__builtin_ia32_blendpd256:
10208  case X86::BI__builtin_ia32_blendps256:
10209  case X86::BI__builtin_ia32_pblendw256:
10210  case X86::BI__builtin_ia32_pblendd128:
10211  case X86::BI__builtin_ia32_pblendd256: {
10212  unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
10213  unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
10214 
10215  uint32_t Indices[16];
10216  // If there are more than 8 elements, the immediate is used twice so make
10217  // sure we handle that.
10218  for (unsigned i = 0; i != NumElts; ++i)
10219  Indices[i] = ((Imm >> (i % 8)) & 0x1) ? NumElts + i : i;
10220 
10221  return Builder.CreateShuffleVector(Ops[0], Ops[1],
10222  makeArrayRef(Indices, NumElts),
10223  "blend");
10224  }
10225  case X86::BI__builtin_ia32_pshuflw:
10226  case X86::BI__builtin_ia32_pshuflw256:
10227  case X86::BI__builtin_ia32_pshuflw512: {
10228  uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
10229  llvm::Type *Ty = Ops[0]->getType();
10230  unsigned NumElts = Ty->getVectorNumElements();
10231 
10232  // Splat the 8-bits of immediate 4 times to help the loop wrap around.
10233  Imm = (Imm & 0xff) * 0x01010101;
10234 
10235  uint32_t Indices[32];
10236  for (unsigned l = 0; l != NumElts; l += 8) {
10237  for (unsigned i = 0; i != 4; ++i) {
10238  Indices[l + i] = l + (Imm & 3);
10239  Imm >>= 2;
10240  }
10241  for (unsigned i = 4; i != 8; ++i)
10242  Indices[l + i] = l + i;
10243  }
10244 
10245  return Builder.CreateShuffleVector(Ops[0], UndefValue::get(Ty),
10246  makeArrayRef(Indices, NumElts),
10247  "pshuflw");
10248  }
10249  case X86::BI__builtin_ia32_pshufhw:
10250  case X86::BI__builtin_ia32_pshufhw256:
10251  case X86::BI__builtin_ia32_pshufhw512: {
10252  uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
10253  llvm::Type *Ty = Ops[0]->getType();
10254  unsigned NumElts = Ty->getVectorNumElements();
10255 
10256  // Splat the 8-bits of immediate 4 times to help the loop wrap around.
10257  Imm = (Imm & 0xff) * 0x01010101;
10258 
10259  uint32_t Indices[32];
10260  for (unsigned l = 0; l != NumElts; l += 8) {
10261  for (unsigned i = 0; i != 4; ++i)
10262  Indices[l + i] = l + i;
10263  for (unsigned i = 4; i != 8; ++i) {
10264  Indices[l + i] = l + 4 + (Imm & 3);
10265  Imm >>= 2;
10266  }
10267  }
10268 
10269  return Builder.CreateShuffleVector(Ops[0], UndefValue::get(Ty),
10270  makeArrayRef(Indices, NumElts),
10271  "pshufhw");
10272  }
10273  case X86::BI__builtin_ia32_pshufd:
10274  case X86::BI__builtin_ia32_pshufd256:
10275  case X86::BI__builtin_ia32_pshufd512:
10276  case X86::BI__builtin_ia32_vpermilpd:
10277  case X86::BI__builtin_ia32_vpermilps:
10278  case X86::BI__builtin_ia32_vpermilpd256:
10279  case X86::BI__builtin_ia32_vpermilps256:
10280  case X86::BI__builtin_ia32_vpermilpd512:
10281  case X86::BI__builtin_ia32_vpermilps512: {
10282  uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
10283  llvm::Type *Ty = Ops[0]->getType();
10284  unsigned NumElts = Ty->getVectorNumElements();
10285  unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
10286  unsigned NumLaneElts = NumElts / NumLanes;
10287 
10288  // Splat the 8-bits of immediate 4 times to help the loop wrap around.
10289  Imm = (Imm & 0xff) * 0x01010101;
10290 
10291  uint32_t Indices[16];
10292  for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
10293  for (unsigned i = 0; i != NumLaneElts; ++i) {
10294  Indices[i + l] = (Imm % NumLaneElts) + l;
10295  Imm /= NumLaneElts;
10296  }
10297  }
10298 
10299  return Builder.CreateShuffleVector(Ops[0], UndefValue::get(Ty),
10300  makeArrayRef(Indices, NumElts),
10301  "permil");
10302  }
10303  case X86::BI__builtin_ia32_shufpd:
10304  case X86::BI__builtin_ia32_shufpd256:
10305  case X86::BI__builtin_ia32_shufpd512:
10306  case X86::BI__builtin_ia32_shufps:
10307  case X86::BI__builtin_ia32_shufps256:
10308  case X86::BI__builtin_ia32_shufps512: {
10309  uint32_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
10310  llvm::Type *Ty = Ops[0]->getType();
10311  unsigned NumElts = Ty->getVectorNumElements();
10312  unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
10313  unsigned NumLaneElts = NumElts / NumLanes;
10314 
10315  // Splat the 8-bits of immediate 4 times to help the loop wrap around.
10316  Imm = (Imm & 0xff) * 0x01010101;
10317 
10318  uint32_t Indices[16];
10319  for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
10320  for (unsigned i = 0; i != NumLaneElts; ++i) {
10321  unsigned Index = Imm % NumLaneElts;
10322  Imm /= NumLaneElts;
10323  if (i >= (NumLaneElts / 2))
10324  Index += NumElts;
10325  Indices[l + i] = l + Index;
10326  }
10327  }
10328 
10329  return Builder.CreateShuffleVector(Ops[0], Ops[1],
10330  makeArrayRef(Indices, NumElts),
10331  "shufp");
10332  }
10333  case X86::BI__builtin_ia32_permdi256:
10334  case X86::BI__builtin_ia32_permdf256:
10335  case X86::BI__builtin_ia32_permdi512:
10336  case X86::BI__builtin_ia32_permdf512: {
10337  unsigned Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
10338  llvm::Type *Ty = Ops[0]->getType();
10339  unsigned NumElts = Ty->getVectorNumElements();
10340 
10341  // These intrinsics operate on 256-bit lanes of four 64-bit elements.
10342  uint32_t Indices[8];
10343  for (unsigned l = 0; l != NumElts; l += 4)
10344  for (unsigned i = 0; i != 4; ++i)
10345  Indices[l + i] = l + ((Imm >> (2 * i)) & 0x3);
10346 
10347  return Builder.CreateShuffleVector(Ops[0], UndefValue::get(Ty),
10348  makeArrayRef(Indices, NumElts),
10349  "perm");
10350  }
10351  case X86::BI__builtin_ia32_palignr128:
10352  case X86::BI__builtin_ia32_palignr256:
10353  case X86::BI__builtin_ia32_palignr512: {
10354  unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
10355 
10356  unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
10357  assert(NumElts % 16 == 0);
10358 
10359  // If palignr is shifting the pair of vectors more than the size of two
10360  // lanes, emit zero.
10361  if (ShiftVal >= 32)
10362  return llvm::Constant::getNullValue(ConvertType(E->getType()));
10363 
10364  // If palignr is shifting the pair of input vectors more than one lane,
10365  // but less than two lanes, convert to shifting in zeroes.
10366  if (ShiftVal > 16) {
10367  ShiftVal -= 16;
10368  Ops[1] = Ops[0];
10369  Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
10370  }
10371 
10372  uint32_t Indices[64];
10373  // 256-bit palignr operates on 128-bit lanes so we need to handle that
10374  for (unsigned l = 0; l != NumElts; l += 16) {
10375  for (unsigned i = 0; i != 16; ++i) {
10376  unsigned Idx = ShiftVal + i;
10377  if (Idx >= 16)
10378  Idx += NumElts - 16; // End of lane, switch operand.
10379  Indices[l + i] = Idx + l;
10380  }
10381  }
10382 
10383  return Builder.CreateShuffleVector(Ops[1], Ops[0],
10384  makeArrayRef(Indices, NumElts),
10385  "palignr");
10386  }
10387  case X86::BI__builtin_ia32_alignd128:
10388  case X86::BI__builtin_ia32_alignd256:
10389  case X86::BI__builtin_ia32_alignd512:
10390  case X86::BI__builtin_ia32_alignq128:
10391  case X86::BI__builtin_ia32_alignq256:
10392  case X86::BI__builtin_ia32_alignq512: {
10393  unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
10394  unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
10395 
10396  // Mask the shift amount to width of two vectors.
10397  ShiftVal &= (2 * NumElts) - 1;
10398 
10399  uint32_t Indices[16];
10400  for (unsigned i = 0; i != NumElts; ++i)
10401  Indices[i] = i + ShiftVal;
10402 
10403  return Builder.CreateShuffleVector(Ops[1], Ops[0],
10404  makeArrayRef(Indices, NumElts),
10405  "valign");
10406  }
10407  case X86::BI__builtin_ia32_shuf_f32x4_256:
10408  case X86::BI__builtin_ia32_shuf_f64x2_256:
10409  case X86::BI__builtin_ia32_shuf_i32x4_256:
10410  case X86::BI__builtin_ia32_shuf_i64x2_256:
10411  case X86::BI__builtin_ia32_shuf_f32x4:
10412  case X86::BI__builtin_ia32_shuf_f64x2:
10413  case X86::BI__builtin_ia32_shuf_i32x4:
10414  case X86::BI__builtin_ia32_shuf_i64x2: {
10415  unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
10416  llvm::Type *Ty = Ops[0]->getType();
10417  unsigned NumElts = Ty->getVectorNumElements();
10418  unsigned NumLanes = Ty->getPrimitiveSizeInBits() == 512 ? 4 : 2;
10419  unsigned NumLaneElts = NumElts / NumLanes;
10420 
10421  uint32_t Indices[16];
10422  for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
10423  unsigned Index = (Imm % NumLanes) * NumLaneElts;
10424  Imm /= NumLanes; // Discard the bits we just used.
10425  if (l >= (NumElts / 2))
10426  Index += NumElts; // Switch to other source.
10427  for (unsigned i = 0; i != NumLaneElts; ++i) {
10428  Indices[l + i] = Index + i;
10429  }
10430  }
10431 
10432  return Builder.CreateShuffleVector(Ops[0], Ops[1],
10433  makeArrayRef(Indices, NumElts),
10434  "shuf");
10435  }
10436 
10437  case X86::BI__builtin_ia32_vperm2f128_pd256:
10438  case X86::BI__builtin_ia32_vperm2f128_ps256:
10439  case X86::BI__builtin_ia32_vperm2f128_si256:
10440  case X86::BI__builtin_ia32_permti256: {
10441  unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
10442  unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
10443 
10444  // This takes a very simple approach since there are two lanes and a
10445  // shuffle can have 2 inputs. So we reserve the first input for the first
10446  // lane and the second input for the second lane. This may result in
10447  // duplicate sources, but this can be dealt with in the backend.
10448 
10449  Value *OutOps[2];
10450  uint32_t Indices[8];
10451  for (unsigned l = 0; l != 2; ++l) {
10452  // Determine the source for this lane.
10453  if (Imm & (1 << ((l * 4) + 3)))
10454  OutOps[l] = llvm::ConstantAggregateZero::get(Ops[0]->getType());
10455  else if (Imm & (1 << ((l * 4) + 1)))
10456  OutOps[l] = Ops[1];
10457  else
10458  OutOps[l] = Ops[0];
10459 
10460  for (unsigned i = 0; i != NumElts/2; ++i) {
10461  // Start with ith element of the source for this lane.
10462  unsigned Idx = (l * NumElts) + i;
10463  // If bit 0 of the immediate half is set, switch to the high half of
10464  // the source.
10465  if (Imm & (1 << (l * 4)))
10466  Idx += NumElts/2;
10467  Indices[(l * (NumElts/2)) + i] = Idx;
10468  }
10469  }
10470 
10471  return Builder.CreateShuffleVector(OutOps[0], OutOps[1],
10472  makeArrayRef(Indices, NumElts),
10473  "vperm");
10474  }
10475 
10476  case X86::BI__builtin_ia32_pslldqi128_byteshift:
10477  case X86::BI__builtin_ia32_pslldqi256_byteshift:
10478  case X86::BI__builtin_ia32_pslldqi512_byteshift: {
10479  unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
10480  llvm::Type *ResultType = Ops[0]->getType();
10481  // Builtin type is vXi64 so multiply by 8 to get bytes.
10482  unsigned NumElts = ResultType->getVectorNumElements() * 8;
10483 
10484  // If pslldq is shifting the vector more than 15 bytes, emit zero.
10485  if (ShiftVal >= 16)
10486  return llvm::Constant::getNullValue(ResultType);
10487 
10488  uint32_t Indices[64];
10489  // 256/512-bit pslldq operates on 128-bit lanes so we need to handle that
10490  for (unsigned l = 0; l != NumElts; l += 16) {
10491  for (unsigned i = 0; i != 16; ++i) {
10492  unsigned Idx = NumElts + i - ShiftVal;
10493  if (Idx < NumElts) Idx -= NumElts - 16; // end of lane, switch operand.
10494  Indices[l + i] = Idx + l;
10495  }
10496  }
10497 
10498  llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, NumElts);
10499  Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
10500  Value *Zero = llvm::Constant::getNullValue(VecTy);
10501  Value *SV = Builder.CreateShuffleVector(Zero, Cast,
10502  makeArrayRef(Indices, NumElts),
10503  "pslldq");
10504  return Builder.CreateBitCast(SV, Ops[0]->getType(), "cast");
10505  }
10506  case X86::BI__builtin_ia32_psrldqi128_byteshift:
10507  case X86::BI__builtin_ia32_psrldqi256_byteshift:
10508  case X86::BI__builtin_ia32_psrldqi512_byteshift: {
10509  unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
10510  llvm::Type *ResultType = Ops[0]->getType();
10511  // Builtin type is vXi64 so multiply by 8 to get bytes.
10512  unsigned NumElts = ResultType->getVectorNumElements() * 8;
10513 
10514  // If psrldq is shifting the vector more than 15 bytes, emit zero.
10515  if (ShiftVal >= 16)
10516  return llvm::Constant::getNullValue(ResultType);
10517 
10518  uint32_t Indices[64];
10519  // 256/512-bit psrldq operates on 128-bit lanes so we need to handle that
10520  for (unsigned l = 0; l != NumElts; l += 16) {
10521  for (unsigned i = 0; i != 16; ++i) {
10522  unsigned Idx = i + ShiftVal;
10523  if (Idx >= 16) Idx += NumElts - 16; // end of lane, switch operand.
10524  Indices[l + i] = Idx + l;
10525  }
10526  }
10527 
10528  llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, NumElts);
10529  Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
10530  Value *Zero = llvm::Constant::getNullValue(VecTy);
10531  Value *SV = Builder.CreateShuffleVector(Cast, Zero,
10532  makeArrayRef(Indices, NumElts),
10533  "psrldq");
10534  return Builder.CreateBitCast(SV, ResultType, "cast");
10535  }
10536  case X86::BI__builtin_ia32_kshiftliqi:
10537  case X86::BI__builtin_ia32_kshiftlihi:
10538  case X86::BI__builtin_ia32_kshiftlisi:
10539  case X86::BI__builtin_ia32_kshiftlidi: {
10540  unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
10541  unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
10542 
10543  if (ShiftVal >= NumElts)
10544  return llvm::Constant::getNullValue(Ops[0]->getType());
10545 
10546  Value *In = getMaskVecValue(*this, Ops[0], NumElts);
10547 
10548  uint32_t Indices[64];
10549  for (unsigned i = 0; i != NumElts; ++i)
10550  Indices[i] = NumElts + i - ShiftVal;
10551 
10552  Value *Zero = llvm::Constant::getNullValue(In->getType());
10553  Value *SV = Builder.CreateShuffleVector(Zero, In,
10554  makeArrayRef(Indices, NumElts),
10555  "kshiftl");
10556  return Builder.CreateBitCast(SV, Ops[0]->getType());
10557  }
10558  case X86::BI__builtin_ia32_kshiftriqi:
10559  case X86::BI__builtin_ia32_kshiftrihi:
10560  case X86::BI__builtin_ia32_kshiftrisi:
10561  case X86::BI__builtin_ia32_kshiftridi: {
10562  unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
10563  unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
10564 
10565  if (ShiftVal >= NumElts)
10566  return llvm::Constant::getNullValue(Ops[0]->getType());
10567 
10568  Value *In = getMaskVecValue(*this, Ops[0], NumElts);
10569 
10570  uint32_t Indices[64];
10571  for (unsigned i = 0; i != NumElts; ++i)
10572  Indices[i] = i + ShiftVal;
10573 
10574  Value *Zero = llvm::Constant::getNullValue(In->getType());
10575  Value *SV = Builder.CreateShuffleVector(In, Zero,
10576  makeArrayRef(Indices, NumElts),
10577  "kshiftr");
10578  return Builder.CreateBitCast(SV, Ops[0]->getType());
10579  }
10580  case X86::BI__builtin_ia32_movnti:
10581  case X86::BI__builtin_ia32_movnti64:
10582  case X86::BI__builtin_ia32_movntsd:
10583  case X86::BI__builtin_ia32_movntss: {
10584  llvm::MDNode *Node = llvm::MDNode::get(
10585  getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
10586 
10587  Value *Ptr = Ops[0];
10588  Value *Src = Ops[1];
10589 
10590  // Extract the 0'th element of the source vector.
10591  if (BuiltinID == X86::BI__builtin_ia32_movntsd ||
10592  BuiltinID == X86::BI__builtin_ia32_movntss)
10593  Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract");
10594 
10595  // Convert the type of the pointer to a pointer to the stored type.
10596  Value *BC = Builder.CreateBitCast(
10597  Ptr, llvm::PointerType::getUnqual(Src->getType()), "cast");
10598 
10599  // Unaligned nontemporal store of the scalar value.
10600  StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, BC);
10601  SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
10602  SI->setAlignment(1);
10603  return SI;
10604  }
10605  // Rotate is a special case of funnel shift - 1st 2 args are the same.
10606  case X86::BI__builtin_ia32_vprotb:
10607  case X86::BI__builtin_ia32_vprotw:
10608  case X86::BI__builtin_ia32_vprotd:
10609  case X86::BI__builtin_ia32_vprotq:
10610  case X86::BI__builtin_ia32_vprotbi:
10611  case X86::BI__builtin_ia32_vprotwi:
10612  case X86::BI__builtin_ia32_vprotdi:
10613  case X86::BI__builtin_ia32_vprotqi:
10614  case X86::BI__builtin_ia32_prold128:
10615  case X86::BI__builtin_ia32_prold256:
10616  case X86::BI__builtin_ia32_prold512:
10617  case X86::BI__builtin_ia32_prolq128:
10618  case X86::BI__builtin_ia32_prolq256:
10619  case X86::BI__builtin_ia32_prolq512:
10620  case X86::BI__builtin_ia32_prolvd128:
10621  case X86::BI__builtin_ia32_prolvd256:
10622  case X86::BI__builtin_ia32_prolvd512:
10623  case X86::BI__builtin_ia32_prolvq128:
10624  case X86::BI__builtin_ia32_prolvq256:
10625  case X86::BI__builtin_ia32_prolvq512:
10626  return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], false);
10627  case X86::BI__builtin_ia32_prord128:
10628  case X86::BI__builtin_ia32_prord256:
10629  case X86::BI__builtin_ia32_prord512:
10630  case X86::BI__builtin_ia32_prorq128:
10631  case X86::BI__builtin_ia32_prorq256:
10632  case X86::BI__builtin_ia32_prorq512:
10633  case X86::BI__builtin_ia32_prorvd128:
10634  case X86::BI__builtin_ia32_prorvd256:
10635  case X86::BI__builtin_ia32_prorvd512:
10636  case X86::BI__builtin_ia32_prorvq128:
10637  case X86::BI__builtin_ia32_prorvq256:
10638  case X86::BI__builtin_ia32_prorvq512:
10639  return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], true);
10640  case X86::BI__builtin_ia32_selectb_128:
10641  case X86::BI__builtin_ia32_selectb_256:
10642  case X86::BI__builtin_ia32_selectb_512:
10643  case X86::BI__builtin_ia32_selectw_128:
10644  case X86::BI__builtin_ia32_selectw_256:
10645  case X86::BI__builtin_ia32_selectw_512:
10646  case X86::BI__builtin_ia32_selectd_128:
10647  case X86::BI__builtin_ia32_selectd_256:
10648  case X86::BI__builtin_ia32_selectd_512:
10649  case X86::BI__builtin_ia32_selectq_128:
10650  case X86::BI__builtin_ia32_selectq_256:
10651  case X86::BI__builtin_ia32_selectq_512:
10652  case X86::BI__builtin_ia32_selectps_128:
10653  case X86::BI__builtin_ia32_selectps_256:
10654  case X86::BI__builtin_ia32_selectps_512:
10655  case X86::BI__builtin_ia32_selectpd_128:
10656  case X86::BI__builtin_ia32_selectpd_256:
10657  case X86::BI__builtin_ia32_selectpd_512:
10658  return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
10659  case X86::BI__builtin_ia32_selectss_128:
10660  case X86::BI__builtin_ia32_selectsd_128: {
10661  Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
10662  Value *B = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
10663  A = EmitX86ScalarSelect(*this, Ops[0], A, B);
10664  return Builder.CreateInsertElement(Ops[1], A, (uint64_t)0);
10665  }
10666  case X86::BI__builtin_ia32_cmpb128_mask:
10667  case X86::BI__builtin_ia32_cmpb256_mask:
10668  case X86::BI__builtin_ia32_cmpb512_mask:
10669  case X86::BI__builtin_ia32_cmpw128_mask:
10670  case X86::BI__builtin_ia32_cmpw256_mask:
10671  case X86::BI__builtin_ia32_cmpw512_mask:
10672  case X86::BI__builtin_ia32_cmpd128_mask:
10673  case X86::BI__builtin_ia32_cmpd256_mask:
10674  case X86::BI__builtin_ia32_cmpd512_mask:
10675  case X86::BI__builtin_ia32_cmpq128_mask:
10676  case X86::BI__builtin_ia32_cmpq256_mask:
10677  case X86::BI__builtin_ia32_cmpq512_mask: {
10678  unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
10679  return EmitX86MaskedCompare(*this, CC, true, Ops);
10680  }
10681  case X86::BI__builtin_ia32_ucmpb128_mask:
10682  case X86::BI__builtin_ia32_ucmpb256_mask:
10683  case X86::BI__builtin_ia32_ucmpb512_mask:
10684  case X86::BI__builtin_ia32_ucmpw128_mask:
10685  case X86::BI__builtin_ia32_ucmpw256_mask:
10686  case X86::BI__builtin_ia32_ucmpw512_mask:
10687  case X86::BI__builtin_ia32_ucmpd128_mask:
10688  case X86::BI__builtin_ia32_ucmpd256_mask:
10689  case X86::BI__builtin_ia32_ucmpd512_mask:
10690  case X86::BI__builtin_ia32_ucmpq128_mask:
10691  case X86::BI__builtin_ia32_ucmpq256_mask:
10692  case X86::BI__builtin_ia32_ucmpq512_mask: {
10693  unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
10694  return EmitX86MaskedCompare(*this, CC, false, Ops);
10695  }
10696 
10697  case X86::BI__builtin_ia32_kortestcqi:
10698  case X86::BI__builtin_ia32_kortestchi:
10699  case X86::BI__builtin_ia32_kortestcsi:
10700  case X86::BI__builtin_ia32_kortestcdi: {
10701  Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
10702  Value *C = llvm::Constant::getAllOnesValue(Ops[0]->getType());
10703  Value *Cmp = Builder.CreateICmpEQ(Or, C);
10704  return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
10705  }
10706  case X86::BI__builtin_ia32_kortestzqi:
10707  case X86::BI__builtin_ia32_kortestzhi:
10708  case X86::BI__builtin_ia32_kortestzsi:
10709  case X86::BI__builtin_ia32_kortestzdi: {
10710  Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
10711  Value *C = llvm::Constant::getNullValue(Ops[0]->getType());
10712  Value *Cmp = Builder.CreateICmpEQ(Or, C);
10713  return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
10714  }
10715 
10716  case X86::BI__builtin_ia32_ktestcqi:
10717  case X86::BI__builtin_ia32_ktestzqi:
10718  case X86::BI__builtin_ia32_ktestchi:
10719  case X86::BI__builtin_ia32_ktestzhi:
10720  case X86::BI__builtin_ia32_ktestcsi:
10721  case X86::BI__builtin_ia32_ktestzsi:
10722  case X86::BI__builtin_ia32_ktestcdi:
10723  case X86::BI__builtin_ia32_ktestzdi: {
10724  Intrinsic::ID IID;
10725  switch (BuiltinID) {
10726  default: llvm_unreachable("Unsupported intrinsic!");
10727  case X86::BI__builtin_ia32_ktestcqi:
10728  IID = Intrinsic::x86_avx512_ktestc_b;
10729  break;
10730  case X86::BI__builtin_ia32_ktestzqi:
10731  IID = Intrinsic::x86_avx512_ktestz_b;
10732  break;
10733  case X86::BI__builtin_ia32_ktestchi:
10734  IID = Intrinsic::x86_avx512_ktestc_w;
10735  break;
10736  case X86::BI__builtin_ia32_ktestzhi:
10737  IID = Intrinsic::x86_avx512_ktestz_w;
10738  break;
10739  case X86::BI__builtin_ia32_ktestcsi:
10740  IID = Intrinsic::x86_avx512_ktestc_d;
10741  break;
10742  case X86::BI__builtin_ia32_ktestzsi:
10743  IID = Intrinsic::x86_avx512_ktestz_d;
10744  break;
10745  case X86::BI__builtin_ia32_ktestcdi:
10746  IID = Intrinsic::x86_avx512_ktestc_q;
10747  break;
10748  case X86::BI__builtin_ia32_ktestzdi:
10749  IID = Intrinsic::x86_avx512_ktestz_q;
10750  break;
10751  }
10752 
10753  unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
10754  Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
10755  Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
10756  Function *Intr = CGM.getIntrinsic(IID);
10757  return Builder.CreateCall(Intr, {LHS, RHS});
10758  }
10759 
10760  case X86::BI__builtin_ia32_kaddqi:
10761  case X86::BI__builtin_ia32_kaddhi:
10762  case X86::BI__builtin_ia32_kaddsi:
10763  case X86::BI__builtin_ia32_kadddi: {
10764  Intrinsic::ID IID;
10765  switch (BuiltinID) {
10766  default: llvm_unreachable("Unsupported intrinsic!");
10767  case X86::BI__builtin_ia32_kaddqi:
10768  IID = Intrinsic::x86_avx512_kadd_b;
10769  break;
10770  case X86::BI__builtin_ia32_kaddhi:
10771  IID = Intrinsic::x86_avx512_kadd_w;
10772  break;
10773  case X86::BI__builtin_ia32_kaddsi:
10774  IID = Intrinsic::x86_avx512_kadd_d;
10775  break;
10776  case X86::BI__builtin_ia32_kadddi:
10777  IID = Intrinsic::x86_avx512_kadd_q;
10778  break;
10779  }
10780 
10781  unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
10782  Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
10783  Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
10784  Function *Intr = CGM.getIntrinsic(IID);
10785  Value *Res = Builder.CreateCall(Intr, {LHS, RHS});
10786  return Builder.CreateBitCast(Res, Ops[0]->getType());
10787  }
10788  case X86::BI__builtin_ia32_kandqi:
10789  case X86::BI__builtin_ia32_kandhi:
10790  case X86::BI__builtin_ia32_kandsi:
10791  case X86::BI__builtin_ia32_kanddi:
10792  return EmitX86MaskLogic(*this, Instruction::And, Ops);
10793  case X86::BI__builtin_ia32_kandnqi:
10794  case X86::BI__builtin_ia32_kandnhi:
10795  case X86::BI__builtin_ia32_kandnsi:
10796  case X86::BI__builtin_ia32_kandndi:
10797  return EmitX86MaskLogic(*this, Instruction::And, Ops, true);
10798  case X86::BI__builtin_ia32_korqi:
10799  case X86::BI__builtin_ia32_korhi:
10800  case X86::BI__builtin_ia32_korsi:
10801  case X86::BI__builtin_ia32_kordi:
10802  return EmitX86MaskLogic(*this, Instruction::Or, Ops);
10803  case X86::BI__builtin_ia32_kxnorqi:
10804  case X86::BI__builtin_ia32_kxnorhi:
10805  case X86::BI__builtin_ia32_kxnorsi:
10806  case X86::BI__builtin_ia32_kxnordi:
10807  return EmitX86MaskLogic(*this, Instruction::Xor, Ops, true);
10808  case X86::BI__builtin_ia32_kxorqi:
10809  case X86::BI__builtin_ia32_kxorhi:
10810  case X86::BI__builtin_ia32_kxorsi:
10811  case X86::BI__builtin_ia32_kxordi:
10812  return EmitX86MaskLogic(*this, Instruction::Xor, Ops);
10813  case X86::BI__builtin_ia32_knotqi:
10814  case X86::BI__builtin_ia32_knothi:
10815  case X86::BI__builtin_ia32_knotsi:
10816  case X86::BI__builtin_ia32_knotdi: {
10817  unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
10818  Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
10819  return Builder.CreateBitCast(Builder.CreateNot(Res),
10820  Ops[0]->getType());
10821  }
10822  case X86::BI__builtin_ia32_kmovb:
10823  case X86::BI__builtin_ia32_kmovw:
10824  case X86::BI__builtin_ia32_kmovd:
10825  case X86::BI__builtin_ia32_kmovq: {
10826  // Bitcast to vXi1 type and then back to integer. This gets the mask
10827  // register type into the IR, but might be optimized out depending on
10828  // what's around it.
10829  unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
10830  Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
10831  return Builder.CreateBitCast(Res, Ops[0]->getType());
10832  }
10833 
10834  case X86::BI__builtin_ia32_kunpckdi:
10835  case X86::BI__builtin_ia32_kunpcksi:
10836  case X86::BI__builtin_ia32_kunpckhi: {
10837  unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
10838  Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
10839  Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
10840  uint32_t Indices[64];
10841  for (unsigned i = 0; i != NumElts; ++i)
10842  Indices[i] = i;
10843 
10844  // First extract half of each vector. This gives better codegen than
10845  // doing it in a single shuffle.
10846  LHS = Builder.CreateShuffleVector(LHS, LHS,
10847  makeArrayRef(Indices, NumElts / 2));
10848  RHS = Builder.CreateShuffleVector(RHS, RHS,
10849  makeArrayRef(Indices, NumElts / 2));
10850  // Concat the vectors.
10851  // NOTE: Operands are swapped to match the intrinsic definition.
10852  Value *Res = Builder.CreateShuffleVector(RHS, LHS,
10853  makeArrayRef(Indices, NumElts));
10854  return Builder.CreateBitCast(Res, Ops[0]->getType());
10855  }
10856 
10857  case X86::BI__builtin_ia32_vplzcntd_128:
10858  case X86::BI__builtin_ia32_vplzcntd_256:
10859  case X86::BI__builtin_ia32_vplzcntd_512:
10860  case X86::BI__builtin_ia32_vplzcntq_128:
10861  case X86::BI__builtin_ia32_vplzcntq_256:
10862  case X86::BI__builtin_ia32_vplzcntq_512: {
10863  Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
10864  return Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)});
10865  }
10866  case X86::BI__builtin_ia32_sqrtss:
10867  case X86::BI__builtin_ia32_sqrtsd: {
10868  Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
10869  Function *F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
10870  A = Builder.CreateCall(F, {A});
10871  return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
10872  }
10873  case X86::BI__builtin_ia32_sqrtsd_round_mask:
10874  case X86::BI__builtin_ia32_sqrtss_round_mask: {
10875  unsigned CC = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
10876  // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
10877  // otherwise keep the intrinsic.
10878  if (CC != 4) {
10879  Intrinsic::ID IID = BuiltinID == X86::BI__builtin_ia32_sqrtsd_round_mask ?
10880  Intrinsic::x86_avx512_mask_sqrt_sd :
10881  Intrinsic::x86_avx512_mask_sqrt_ss;
10882  return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
10883  }
10884  Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
10885  Function *F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
10886  A = Builder.CreateCall(F, A);
10887  Value *Src = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
10888  A = EmitX86ScalarSelect(*this, Ops[3], A, Src);
10889  return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
10890  }
10891  case X86::BI__builtin_ia32_sqrtpd256:
10892  case X86::BI__builtin_ia32_sqrtpd:
10893  case X86::BI__builtin_ia32_sqrtps256:
10894  case X86::BI__builtin_ia32_sqrtps:
10895  case X86::BI__builtin_ia32_sqrtps512:
10896  case X86::BI__builtin_ia32_sqrtpd512: {
10897  if (Ops.size() == 2) {
10898  unsigned CC = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
10899  // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
10900  // otherwise keep the intrinsic.
10901  if (CC != 4) {
10902  Intrinsic::ID IID = BuiltinID == X86::BI__builtin_ia32_sqrtps512 ?
10903  Intrinsic::x86_avx512_sqrt_ps_512 :
10904  Intrinsic::x86_avx512_sqrt_pd_512;
10905  return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
10906  }
10907  }
10908  Function *F = CGM.getIntrinsic(Intrinsic::sqrt, Ops[0]->getType());
10909  return Builder.CreateCall(F, Ops[0]);
10910  }
10911  case X86::BI__builtin_ia32_pabsb128:
10912  case X86::BI__builtin_ia32_pabsw128:
10913  case X86::BI__builtin_ia32_pabsd128:
10914  case X86::BI__builtin_ia32_pabsb256:
10915  case X86::BI__builtin_ia32_pabsw256:
10916  case X86::BI__builtin_ia32_pabsd256:
10917  case X86::BI__builtin_ia32_pabsq128:
10918  case X86::BI__builtin_ia32_pabsq256:
10919  case X86::BI__builtin_ia32_pabsb512:
10920  case X86::BI__builtin_ia32_pabsw512:
10921  case X86::BI__builtin_ia32_pabsd512:
10922  case X86::BI__builtin_ia32_pabsq512:
10923  return EmitX86Abs(*this, Ops);
10924 
10925  case X86::BI__builtin_ia32_pmaxsb128:
10926  case X86::BI__builtin_ia32_pmaxsw128:
10927  case X86::BI__builtin_ia32_pmaxsd128:
10928  case X86::BI__builtin_ia32_pmaxsq128:
10929  case X86::BI__builtin_ia32_pmaxsb256:
10930  case X86::BI__builtin_ia32_pmaxsw256:
10931  case X86::BI__builtin_ia32_pmaxsd256:
10932  case X86::BI__builtin_ia32_pmaxsq256:
10933  case X86::BI__builtin_ia32_pmaxsb512:
10934  case X86::BI__builtin_ia32_pmaxsw512:
10935  case X86::BI__builtin_ia32_pmaxsd512:
10936  case X86::BI__builtin_ia32_pmaxsq512:
10937  return EmitX86MinMax(*this, ICmpInst::ICMP_SGT, Ops);
10938  case X86::BI__builtin_ia32_pmaxub128:
10939  case X86::BI__builtin_ia32_pmaxuw128:
10940  case X86::BI__builtin_ia32_pmaxud128:
10941  case X86::BI__builtin_ia32_pmaxuq128:
10942  case X86::BI__builtin_ia32_pmaxub256:
10943  case X86::BI__builtin_ia32_pmaxuw256:
10944  case X86::BI__builtin_ia32_pmaxud256:
10945  case X86::BI__builtin_ia32_pmaxuq256:
10946  case X86::BI__builtin_ia32_pmaxub512:
10947  case X86::BI__builtin_ia32_pmaxuw512:
10948  case X86::BI__builtin_ia32_pmaxud512:
10949  case X86::BI__builtin_ia32_pmaxuq512:
10950  return EmitX86MinMax(*this, ICmpInst::ICMP_UGT, Ops);
10951  case X86::BI__builtin_ia32_pminsb128:
10952  case X86::BI__builtin_ia32_pminsw128:
10953  case X86::BI__builtin_ia32_pminsd128:
10954  case X86::BI__builtin_ia32_pminsq128:
10955  case X86::BI__builtin_ia32_pminsb256:
10956  case X86::BI__builtin_ia32_pminsw256:
10957  case X86::BI__builtin_ia32_pminsd256:
10958  case X86::BI__builtin_ia32_pminsq256:
10959  case X86::BI__builtin_ia32_pminsb512:
10960  case X86::BI__builtin_ia32_pminsw512:
10961  case X86::BI__builtin_ia32_pminsd512:
10962  case X86::BI__builtin_ia32_pminsq512:
10963  return EmitX86MinMax(*this, ICmpInst::ICMP_SLT, Ops);
10964  case X86::BI__builtin_ia32_pminub128:
10965  case X86::BI__builtin_ia32_pminuw128:
10966  case X86::BI__builtin_ia32_pminud128:
10967  case X86::BI__builtin_ia32_pminuq128:
10968  case X86::BI__builtin_ia32_pminub256:
10969  case X86::BI__builtin_ia32_pminuw256:
10970  case X86::BI__builtin_ia32_pminud256:
10971  case X86::BI__builtin_ia32_pminuq256:
10972  case X86::BI__builtin_ia32_pminub512:
10973  case X86::BI__builtin_ia32_pminuw512:
10974  case X86::BI__builtin_ia32_pminud512:
10975  case X86::BI__builtin_ia32_pminuq512:
10976  return EmitX86MinMax(*this, ICmpInst::ICMP_ULT, Ops);
10977 
10978  case X86::BI__builtin_ia32_pmuludq128:
10979  case X86::BI__builtin_ia32_pmuludq256:
10980  case X86::BI__builtin_ia32_pmuludq512:
10981  return EmitX86Muldq(*this, /*IsSigned*/false, Ops);
10982 
10983  case X86::BI__builtin_ia32_pmuldq128:
10984  case X86::BI__builtin_ia32_pmuldq256:
10985  case X86::BI__builtin_ia32_pmuldq512:
10986  return EmitX86Muldq(*this, /*IsSigned*/true, Ops);
10987 
10988  case X86::BI__builtin_ia32_pternlogd512_mask:
10989  case X86::BI__builtin_ia32_pternlogq512_mask:
10990  case X86::BI__builtin_ia32_pternlogd128_mask:
10991  case X86::BI__builtin_ia32_pternlogd256_mask:
10992  case X86::BI__builtin_ia32_pternlogq128_mask:
10993  case X86::BI__builtin_ia32_pternlogq256_mask:
10994  return EmitX86Ternlog(*this, /*ZeroMask*/false, Ops);
10995 
10996  case X86::BI__builtin_ia32_pternlogd512_maskz:
10997  case X86::BI__builtin_ia32_pternlogq512_maskz:
10998  case X86::BI__builtin_ia32_pternlogd128_maskz:
10999  case X86::BI__builtin_ia32_pternlogd256_maskz:
11000  case X86::BI__builtin_ia32_pternlogq128_maskz:
11001  case X86::BI__builtin_ia32_pternlogq256_maskz:
11002  return EmitX86Ternlog(*this, /*ZeroMask*/true, Ops);
11003 
11004  case X86::BI__builtin_ia32_vpshldd128:
11005  case X86::BI__builtin_ia32_vpshldd256:
11006  case X86::BI__builtin_ia32_vpshldd512:
11007  case X86::BI__builtin_ia32_vpshldq128:
11008  case X86::BI__builtin_ia32_vpshldq256:
11009  case X86::BI__builtin_ia32_vpshldq512:
11010  case X86::BI__builtin_ia32_vpshldw128:
11011  case X86::BI__builtin_ia32_vpshldw256:
11012  case X86::BI__builtin_ia32_vpshldw512:
11013  return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
11014 
11015  case X86::BI__builtin_ia32_vpshrdd128:
11016  case X86::BI__builtin_ia32_vpshrdd256:
11017  case X86::BI__builtin_ia32_vpshrdd512:
11018  case X86::BI__builtin_ia32_vpshrdq128:
11019  case X86::BI__builtin_ia32_vpshrdq256:
11020  case X86::BI__builtin_ia32_vpshrdq512:
11021  case X86::BI__builtin_ia32_vpshrdw128:
11022  case X86::BI__builtin_ia32_vpshrdw256:
11023  case X86::BI__builtin_ia32_vpshrdw512:
11024  // Ops 0 and 1 are swapped.
11025  return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
11026 
11027  case X86::BI__builtin_ia32_vpshldvd128:
11028  case X86::BI__builtin_ia32_vpshldvd256:
11029  case X86::BI__builtin_ia32_vpshldvd512:
11030  case X86::BI__builtin_ia32_vpshldvq128:
11031  case X86::BI__builtin_ia32_vpshldvq256:
11032  case X86::BI__builtin_ia32_vpshldvq512:
11033  case X86::BI__builtin_ia32_vpshldvw128:
11034  case X86::BI__builtin_ia32_vpshldvw256:
11035  case X86::BI__builtin_ia32_vpshldvw512:
11036  return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
11037 
11038  case X86::BI__builtin_ia32_vpshrdvd128:
11039  case X86::BI__builtin_ia32_vpshrdvd256:
11040  case X86::BI__builtin_ia32_vpshrdvd512:
11041  case X86::BI__builtin_ia32_vpshrdvq128:
11042  case X86::BI__builtin_ia32_vpshrdvq256:
11043  case X86::BI__builtin_ia32_vpshrdvq512:
11044  case X86::BI__builtin_ia32_vpshrdvw128:
11045  case X86::BI__builtin_ia32_vpshrdvw256:
11046  case X86::BI__builtin_ia32_vpshrdvw512:
11047  // Ops 0 and 1 are swapped.
11048  return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
11049 
11050  // 3DNow!
11051  case X86::BI__builtin_ia32_pswapdsf:
11052  case X86::BI__builtin_ia32_pswapdsi: {
11053  llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext());
11054  Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast");
11055  llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd);
11056  return Builder.CreateCall(F, Ops, "pswapd");
11057  }
11058  case X86::BI__builtin_ia32_rdrand16_step:
11059  case X86::BI__builtin_ia32_rdrand32_step:
11060  case X86::BI__builtin_ia32_rdrand64_step:
11061  case X86::BI__builtin_ia32_rdseed16_step:
11062  case X86::BI__builtin_ia32_rdseed32_step:
11063  case X86::BI__builtin_ia32_rdseed64_step: {
11064  Intrinsic::ID ID;
11065  switch (BuiltinID) {
11066  default: llvm_unreachable("Unsupported intrinsic!");
11067  case X86::BI__builtin_ia32_rdrand16_step:
11068  ID = Intrinsic::x86_rdrand_16;
11069  break;
11070  case X86::BI__builtin_ia32_rdrand32_step:
11071  ID = Intrinsic::x86_rdrand_32;
11072  break;
11073  case X86::BI__builtin_ia32_rdrand64_step:
11074  ID = Intrinsic::x86_rdrand_64;
11075  break;
11076  case X86::BI__builtin_ia32_rdseed16_step:
11077  ID = Intrinsic::x86_rdseed_16;
11078  break;
11079  case X86::BI__builtin_ia32_rdseed32_step:
11080  ID = Intrinsic::x86_rdseed_32;
11081  break;
11082  case X86::BI__builtin_ia32_rdseed64_step:
11083  ID = Intrinsic::x86_rdseed_64;
11084  break;
11085  }
11086 
11087  Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
11088  Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
11089  Ops[0]);
11090  return Builder.CreateExtractValue(Call, 1);
11091  }
11092  case X86::BI__builtin_ia32_addcarryx_u32:
11093  case X86::BI__builtin_ia32_addcarryx_u64:
11094  case X86::BI__builtin_ia32_subborrow_u32:
11095  case X86::BI__builtin_ia32_subborrow_u64: {
11096  Intrinsic::ID IID;
11097  switch (BuiltinID) {
11098  default: llvm_unreachable("Unsupported intrinsic!");
11099  case X86::BI__builtin_ia32_addcarryx_u32:
11100  IID = Intrinsic::x86_addcarry_32;
11101  break;
11102  case X86::BI__builtin_ia32_addcarryx_u64:
11103  IID = Intrinsic::x86_addcarry_64;
11104  break;
11105  case X86::BI__builtin_ia32_subborrow_u32:
11106  IID = Intrinsic::x86_subborrow_32;
11107  break;
11108  case X86::BI__builtin_ia32_subborrow_u64:
11109  IID = Intrinsic::x86_subborrow_64;
11110  break;
11111  }
11112 
11113  Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID),
11114  { Ops[0], Ops[1], Ops[2] });
11115  Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
11116  Ops[3]);
11117  return Builder.CreateExtractValue(Call, 0);
11118  }
11119 
11120  case X86::BI__builtin_ia32_fpclassps128_mask:
11121  case X86::BI__builtin_ia32_fpclassps256_mask:
11122  case X86::BI__builtin_ia32_fpclassps512_mask:
11123  case X86::BI__builtin_ia32_fpclasspd128_mask:
11124  case X86::BI__builtin_ia32_fpclasspd256_mask:
11125  case X86::BI__builtin_ia32_fpclasspd512_mask: {
11126  unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
11127  Value *MaskIn = Ops[2];
11128  Ops.erase(&Ops[2]);
11129 
11130  Intrinsic::ID ID;
11131  switch (BuiltinID) {
11132  default: llvm_unreachable("Unsupported intrinsic!");
11133  case X86::BI__builtin_ia32_fpclassps128_mask:
11134  ID = Intrinsic::x86_avx512_fpclass_ps_128;
11135  break;
11136  case X86::BI__builtin_ia32_fpclassps256_mask:
11137  ID = Intrinsic::x86_avx512_fpclass_ps_256;
11138  break;
11139  case X86::BI__builtin_ia32_fpclassps512_mask:
11140  ID = Intrinsic::x86_avx512_fpclass_ps_512;
11141  break;
11142  case X86::BI__builtin_ia32_fpclasspd128_mask:
11143  ID = Intrinsic::x86_avx512_fpclass_pd_128;
11144  break;
11145  case X86::BI__builtin_ia32_fpclasspd256_mask:
11146  ID = Intrinsic::x86_avx512_fpclass_pd_256;
11147  break;
11148  case X86::BI__builtin_ia32_fpclasspd512_mask:
11149  ID = Intrinsic::x86_avx512_fpclass_pd_512;
11150  break;
11151  }
11152 
11153  Value *Fpclass = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
11154  return EmitX86MaskedCompareResult(*this, Fpclass, NumElts, MaskIn);
11155  }
11156 
11157  case X86::BI__builtin_ia32_vpmultishiftqb128:
11158  case X86::BI__builtin_ia32_vpmultishiftqb256:
11159  case X86::BI__builtin_ia32_vpmultishiftqb512: {
11160  Intrinsic::ID ID;
11161  switch (BuiltinID) {
11162  default: llvm_unreachable("Unsupported intrinsic!");
11163  case X86::BI__builtin_ia32_vpmultishiftqb128:
11164  ID = Intrinsic::x86_avx512_pmultishift_qb_128;
11165  break;
11166  case X86::BI__builtin_ia32_vpmultishiftqb256:
11167  ID = Intrinsic::x86_avx512_pmultishift_qb_256;
11168  break;
11169  case X86::BI__builtin_ia32_vpmultishiftqb512:
11170  ID = Intrinsic::x86_avx512_pmultishift_qb_512;
11171  break;
11172  }
11173 
11174  return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
11175  }
11176 
11177  case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
11178  case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
11179  case X86::BI__builtin_ia32_vpshufbitqmb512_mask: {
11180  unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
11181  Value *MaskIn = Ops[2];
11182  Ops.erase(&Ops[2]);
11183 
11184  Intrinsic::ID ID;
11185  switch (BuiltinID) {
11186  default: llvm_unreachable("Unsupported intrinsic!");
11187  case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
11188  ID = Intrinsic::x86_avx512_vpshufbitqmb_128;
11189  break;
11190  case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
11191  ID = Intrinsic::x86_avx512_vpshufbitqmb_256;
11192  break;
11193  case X86::BI__builtin_ia32_vpshufbitqmb512_mask:
11194  ID = Intrinsic::x86_avx512_vpshufbitqmb_512;
11195  break;
11196  }
11197 
11198  Value *Shufbit = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
11199  return EmitX86MaskedCompareResult(*this, Shufbit, NumElts, MaskIn);
11200  }
11201 
11202  // packed comparison intrinsics
11203  case X86::BI__builtin_ia32_cmpeqps:
11204  case X86::BI__builtin_ia32_cmpeqpd:
11205  return getVectorFCmpIR(CmpInst::FCMP_OEQ);
11206  case X86::BI__builtin_ia32_cmpltps:
11207  case X86::BI__builtin_ia32_cmpltpd:
11208  return getVectorFCmpIR(CmpInst::FCMP_OLT);
11209  case X86::BI__builtin_ia32_cmpleps:
11210  case X86::BI__builtin_ia32_cmplepd:
11211  return getVectorFCmpIR(CmpInst::FCMP_OLE);
11212  case X86::BI__builtin_ia32_cmpunordps:
11213  case X86::BI__builtin_ia32_cmpunordpd:
11214  return getVectorFCmpIR(CmpInst::FCMP_UNO);
11215  case X86::BI__builtin_ia32_cmpneqps:
11216  case X86::BI__builtin_ia32_cmpneqpd:
11217  return getVectorFCmpIR(CmpInst::FCMP_UNE);
11218  case X86::BI__builtin_ia32_cmpnltps:
11219  case X86::BI__builtin_ia32_cmpnltpd:
11220  return getVectorFCmpIR(CmpInst::FCMP_UGE);
11221  case X86::BI__builtin_ia32_cmpnleps:
11222  case X86::BI__builtin_ia32_cmpnlepd:
11223  return getVectorFCmpIR(CmpInst::FCMP_UGT);
11224  case X86::BI__builtin_ia32_cmpordps:
11225  case X86::BI__builtin_ia32_cmpordpd:
11226  return getVectorFCmpIR(CmpInst::FCMP_ORD);
11227  case X86::BI__builtin_ia32_cmpps:
11228  case X86::BI__builtin_ia32_cmpps256:
11229  case X86::BI__builtin_ia32_cmppd:
11230  case X86::BI__builtin_ia32_cmppd256:
11231  case X86::BI__builtin_ia32_cmpps128_mask:
11232  case X86::BI__builtin_ia32_cmpps256_mask:
11233  case X86::BI__builtin_ia32_cmpps512_mask:
11234  case X86::BI__builtin_ia32_cmppd128_mask:
11235  case X86::BI__builtin_ia32_cmppd256_mask:
11236  case X86::BI__builtin_ia32_cmppd512_mask: {
11237  // Lowering vector comparisons to fcmp instructions, while
11238  // ignoring signalling behaviour requested
11239  // ignoring rounding mode requested
11240  // This is is only possible as long as FENV_ACCESS is not implemented.
11241  // See also: https://reviews.llvm.org/D45616
11242 
11243  // The third argument is the comparison condition, and integer in the
11244  // range [0, 31]
11245  unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x1f;
11246 
11247  // Lowering to IR fcmp instruction.
11248  // Ignoring requested signaling behaviour,
11249  // e.g. both _CMP_GT_OS & _CMP_GT_OQ are translated to FCMP_OGT.
11250  FCmpInst::Predicate Pred;
11251  switch (CC) {
11252  case 0x00: Pred = FCmpInst::FCMP_OEQ; break;
11253  case 0x01: Pred = FCmpInst::FCMP_OLT; break;
11254  case 0x02: Pred = FCmpInst::FCMP_OLE; break;
11255  case 0x03: Pred = FCmpInst::FCMP_UNO; break;
11256  case 0x04: Pred = FCmpInst::FCMP_UNE; break;
11257  case 0x05: Pred = FCmpInst::FCMP_UGE; break;
11258  case 0x06: Pred = FCmpInst::FCMP_UGT; break;
11259  case 0x07: Pred = FCmpInst::FCMP_ORD; break;
11260  case 0x08: Pred = FCmpInst::FCMP_UEQ; break;
11261  case 0x09: Pred = FCmpInst::FCMP_ULT; break;
11262  case 0x0a: Pred = FCmpInst::FCMP_ULE; break;
11263  case 0x0b: Pred = FCmpInst::FCMP_FALSE; break;
11264  case 0x0c: Pred = FCmpInst::FCMP_ONE; break;
11265  case 0x0d: Pred = FCmpInst::FCMP_OGE; break;
11266  case 0x0e: Pred = FCmpInst::FCMP_OGT; break;
11267  case 0x0f: Pred = FCmpInst::FCMP_TRUE; break;
11268  case 0x10: Pred = FCmpInst::FCMP_OEQ; break;
11269  case 0x11: Pred = FCmpInst::FCMP_OLT; break;
11270  case 0x12: Pred = FCmpInst::FCMP_OLE; break;
11271  case 0x13: Pred = FCmpInst::FCMP_UNO; break;
11272  case 0x14: Pred = FCmpInst::FCMP_UNE; break;
11273  case 0x15: Pred = FCmpInst::FCMP_UGE; break;
11274  case 0x16: Pred = FCmpInst::FCMP_UGT; break;
11275  case 0x17: Pred = FCmpInst::FCMP_ORD; break;
11276  case 0x18: Pred = FCmpInst::FCMP_UEQ; break;
11277  case 0x19: Pred = FCmpInst::FCMP_ULT; break;
11278  case 0x1a: Pred = FCmpInst::FCMP_ULE; break;
11279  case 0x1b: Pred = FCmpInst::FCMP_FALSE; break;
11280  case 0x1c: Pred = FCmpInst::FCMP_ONE; break;
11281  case 0x1d: Pred = FCmpInst::FCMP_OGE; break;
11282  case 0x1e: Pred = FCmpInst::FCMP_OGT; break;
11283  case 0x1f: Pred = FCmpInst::FCMP_TRUE; break;
11284  default: llvm_unreachable("Unhandled CC");
11285  }
11286 
11287  // Builtins without the _mask suffix return a vector of integers
11288  // of the same width as the input vectors
11289  switch (BuiltinID) {
11290  case X86::BI__builtin_ia32_cmpps512_mask:
11291  case X86::BI__builtin_ia32_cmppd512_mask:
11292  case X86::BI__builtin_ia32_cmpps128_mask:
11293  case X86::BI__builtin_ia32_cmpps256_mask:
11294  case X86::BI__builtin_ia32_cmppd128_mask:
11295  case X86::BI__builtin_ia32_cmppd256_mask: {
11296  unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
11297  Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
11298  return EmitX86MaskedCompareResult(*this, Cmp, NumElts, Ops[3]);
11299  }
11300  default:
11301  return getVectorFCmpIR(Pred);
11302  }
11303  }
11304 
11305  // SSE scalar comparison intrinsics
11306  case X86::BI__builtin_ia32_cmpeqss:
11307  return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
11308  case X86::BI__builtin_ia32_cmpltss:
11309  return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
11310  case X86::BI__builtin_ia32_cmpless:
11311  return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
11312  case X86::BI__builtin_ia32_cmpunordss:
11313  return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
11314  case X86::BI__builtin_ia32_cmpneqss:
11315  return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
11316  case X86::BI__builtin_ia32_cmpnltss:
11317  return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
11318  case X86::BI__builtin_ia32_cmpnless:
11319  return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
11320  case X86::BI__builtin_ia32_cmpordss:
11321  return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
11322  case X86::BI__builtin_ia32_cmpeqsd:
11323  return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
11324  case X86::BI__builtin_ia32_cmpltsd:
11325  return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
11326  case X86::BI__builtin_ia32_cmplesd:
11327  return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
11328  case X86::BI__builtin_ia32_cmpunordsd:
11329  return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
11330  case X86::BI__builtin_ia32_cmpneqsd:
11331  return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
11332  case X86::BI__builtin_ia32_cmpnltsd:
11333  return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
11334  case X86::BI__builtin_ia32_cmpnlesd:
11335  return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
11336  case X86::BI__builtin_ia32_cmpordsd:
11337  return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
11338 
11339  case X86::BI__emul:
11340  case X86::BI__emulu: {
11341  llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64);
11342  bool isSigned = (BuiltinID == X86::BI__emul);
11343  Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned);
11344  Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned);
11345  return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned);
11346  }
11347  case X86::BI__mulh:
11348  case X86::BI__umulh:
11349  case X86::BI_mul128:
11350  case X86::BI_umul128: {
11351  llvm::Type *ResType = ConvertType(E->getType());
11352  llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
11353 
11354  bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128);
11355  Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned);
11356  Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned);
11357 
11358  Value *MulResult, *HigherBits;
11359  if (IsSigned) {
11360  MulResult = Builder.CreateNSWMul(LHS, RHS);
11361  HigherBits = Builder.CreateAShr(MulResult, 64);
11362  } else {
11363  MulResult = Builder.CreateNUWMul(LHS, RHS);
11364  HigherBits = Builder.CreateLShr(MulResult, 64);
11365  }
11366  HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
11367 
11368  if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh)
11369  return HigherBits;
11370 
11371  Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2));
11372  Builder.CreateStore(HigherBits, HighBitsAddress);
11373  return Builder.CreateIntCast(MulResult, ResType, IsSigned);
11374  }
11375 
11376  case X86::BI__faststorefence: {
11377  return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
11379  }
11380  case X86::BI__shiftleft128:
11381  case X86::BI__shiftright128: {
11382  // FIXME: Once fshl/fshr no longer add an unneeded and and cmov, do this:
11383  // llvm::Function *F = CGM.getIntrinsic(
11384  // BuiltinID == X86::BI__shiftleft128 ? Intrinsic::fshl : Intrinsic::fshr,
11385  // Int64Ty);
11386  // Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
11387  // return Builder.CreateCall(F, Ops);
11388  llvm::Type *Int128Ty = Builder.getInt128Ty();
11389  Value *Val = Builder.CreateOr(
11390  Builder.CreateShl(Builder.CreateZExt(Ops[1], Int128Ty), 64),
11391  Builder.CreateZExt(Ops[0], Int128Ty));
11392  Value *Amt = Builder.CreateAnd(Builder.CreateZExt(Ops[2], Int128Ty),
11393  llvm::ConstantInt::get(Int128Ty, 0x3f));
11394  Value *Res;
11395  if (BuiltinID == X86::BI__shiftleft128)
11396  Res = Builder.CreateLShr(Builder.CreateShl(Val, Amt), 64);
11397  else
11398  Res = Builder.CreateLShr(Val, Amt);
11399  return Builder.CreateTrunc(Res, Int64Ty);
11400  }
11401  case X86::BI_ReadWriteBarrier:
11402  case X86::BI_ReadBarrier:
11403  case X86::BI_WriteBarrier: {
11404  return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
11405  llvm::SyncScope::SingleThread);
11406  }
11407  case X86::BI_BitScanForward:
11408  case X86::BI_BitScanForward64:
11409  return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
11410  case X86::BI_BitScanReverse:
11411  case X86::BI_BitScanReverse64:
11412  return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
11413 
11414  case X86::BI_InterlockedAnd64:
11415  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
11416  case X86::BI_InterlockedExchange64:
11417  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
11418  case X86::BI_InterlockedExchangeAdd64:
11419  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
11420  case X86::BI_InterlockedExchangeSub64:
11421  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
11422  case X86::BI_InterlockedOr64:
11423  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
11424  case X86::BI_InterlockedXor64:
11425  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
11426  case X86::BI_InterlockedDecrement64:
11427  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
11428  case X86::BI_InterlockedIncrement64:
11429  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
11430  case X86::BI_InterlockedCompareExchange128: {
11431  // InterlockedCompareExchange128 doesn't directly refer to 128bit ints,
11432  // instead it takes pointers to 64bit ints for Destination and
11433  // ComparandResult, and exchange is taken as two 64bit ints (high & low).
11434  // The previous value is written to ComparandResult, and success is
11435  // returned.
11436 
11437  llvm::Type *Int128Ty = Builder.getInt128Ty();
11438  llvm::Type *Int128PtrTy = Int128Ty->getPointerTo();
11439 
11440  Value *Destination =
11441  Builder.CreateBitCast(Ops[0], Int128PtrTy);
11442  Value *ExchangeHigh128 = Builder.CreateZExt(Ops[1], Int128Ty);
11443  Value *ExchangeLow128 = Builder.CreateZExt(Ops[2], Int128Ty);
11444  Address ComparandResult(Builder.CreateBitCast(Ops[3], Int128PtrTy),
11445  getContext().toCharUnitsFromBits(128));
11446 
11447  Value *Exchange = Builder.CreateOr(
11448  Builder.CreateShl(ExchangeHigh128, 64, "", false, false),
11449  ExchangeLow128);
11450 
11451  Value *Comparand = Builder.CreateLoad(ComparandResult);
11452 
11453  AtomicCmpXchgInst *CXI =
11454  Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
11455  AtomicOrdering::SequentiallyConsistent,
11456  AtomicOrdering::SequentiallyConsistent);
11457  CXI->setVolatile(true);
11458 
11459  // Write the result back to the inout pointer.
11460  Builder.CreateStore(Builder.CreateExtractValue(CXI, 0), ComparandResult);
11461 
11462  // Get the success boolean and zero extend it to i8.
11463  Value *Success = Builder.CreateExtractValue(CXI, 1);
11464  return Builder.CreateZExt(Success, ConvertType(E->getType()));
11465  }
11466 
11467  case X86::BI_AddressOfReturnAddress: {
11468  Value *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress);
11469  return Builder.CreateCall(F);
11470  }
11471  case X86::BI__stosb: {
11472  // We treat __stosb as a volatile memset - it may not generate "rep stosb"
11473  // instruction, but it will create a memset that won't be optimized away.
11474  return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], 1, true);
11475  }
11476  case X86::BI__ud2:
11477  // llvm.trap makes a ud2a instruction on x86.
11478  return EmitTrapCall(Intrinsic::trap);
11479  case X86::BI__int2c: {
11480  // This syscall signals a driver assertion failure in x86 NT kernels.
11481  llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
11482  llvm::InlineAsm *IA =
11483  llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*SideEffects=*/true);
11484  llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
11485  getLLVMContext(), llvm::AttributeList::FunctionIndex,
11486  llvm::Attribute::NoReturn);
11487  CallSite CS = Builder.CreateCall(IA);
11488  CS.setAttributes(NoReturnAttr);
11489  return CS.getInstruction();
11490  }
11491  case X86::BI__readfsbyte:
11492  case X86::BI__readfsword:
11493  case X86::BI__readfsdword:
11494  case X86::BI__readfsqword: {
11495  llvm::Type *IntTy = ConvertType(E->getType());
11496  Value *Ptr =
11497  Builder.CreateIntToPtr(Ops[0], llvm::PointerType::get(IntTy, 257));
11498  LoadInst *Load = Builder.CreateAlignedLoad(
11499  IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
11500  Load->setVolatile(true);
11501  return Load;
11502  }
11503  case X86::BI__readgsbyte:
11504  case X86::BI__readgsword:
11505  case X86::BI__readgsdword:
11506  case X86::BI__readgsqword: {
11507  llvm::Type *IntTy = ConvertType(E->getType());
11508  Value *Ptr =
11509  Builder.CreateIntToPtr(Ops[0], llvm::PointerType::get(IntTy, 256));
11510  LoadInst *Load = Builder.CreateAlignedLoad(
11511  IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
11512  Load->setVolatile(true);
11513  return Load;
11514  }
11515  case X86::BI__builtin_ia32_paddsb512:
11516  case X86::BI__builtin_ia32_paddsw512:
11517  case X86::BI__builtin_ia32_paddsb256:
11518  case X86::BI__builtin_ia32_paddsw256:
11519  case X86::BI__builtin_ia32_paddsb128:
11520  case X86::BI__builtin_ia32_paddsw128:
11521  return EmitX86AddSubSatExpr(*this, Ops, true, true);
11522  case X86::BI__builtin_ia32_paddusb512:
11523  case X86::BI__builtin_ia32_paddusw512:
11524  case X86::BI__builtin_ia32_paddusb256:
11525  case X86::BI__builtin_ia32_paddusw256:
11526  case X86::BI__builtin_ia32_paddusb128:
11527  case X86::BI__builtin_ia32_paddusw128:
11528  return EmitX86AddSubSatExpr(*this, Ops, false, true);
11529  case X86::BI__builtin_ia32_psubsb512:
11530  case X86::BI__builtin_ia32_psubsw512:
11531  case X86::BI__builtin_ia32_psubsb256:
11532  case X86::BI__builtin_ia32_psubsw256:
11533  case X86::BI__builtin_ia32_psubsb128:
11534  case X86::BI__builtin_ia32_psubsw128:
11535  return EmitX86AddSubSatExpr(*this, Ops, true, false);
11536  case X86::BI__builtin_ia32_psubusb512:
11537  case X86::BI__builtin_ia32_psubusw512:
11538  case X86::BI__builtin_ia32_psubusb256:
11539  case X86::BI__builtin_ia32_psubusw256:
11540  case X86::BI__builtin_ia32_psubusb128:
11541  case X86::BI__builtin_ia32_psubusw128:
11542  return EmitX86AddSubSatExpr(*this, Ops, false, false);
11543  }
11544 }
11545 
11547  const CallExpr *E) {
11549 
11550  for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
11551  Ops.push_back(EmitScalarExpr(E->getArg(i)));
11552 
11553  Intrinsic::ID ID = Intrinsic::not_intrinsic;
11554 
11555  switch (BuiltinID) {
11556  default: return nullptr;
11557 
11558  // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
11559  // call __builtin_readcyclecounter.
11560  case PPC::BI__builtin_ppc_get_timebase:
11561  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));
11562 
11563  // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr
11564  case PPC::BI__builtin_altivec_lvx:
11565  case PPC::BI__builtin_altivec_lvxl:
11566  case PPC::BI__builtin_altivec_lvebx:
11567  case PPC::BI__builtin_altivec_lvehx:
11568  case PPC::BI__builtin_altivec_lvewx:
11569  case PPC::BI__builtin_altivec_lvsl:
11570  case PPC::BI__builtin_altivec_lvsr:
11571  case PPC::BI__builtin_vsx_lxvd2x:
11572  case PPC::BI__builtin_vsx_lxvw4x:
11573  case PPC::BI__builtin_vsx_lxvd2x_be:
11574  case PPC::BI__builtin_vsx_lxvw4x_be:
11575  case PPC::BI__builtin_vsx_lxvl:
11576  case PPC::BI__builtin_vsx_lxvll:
11577  {
11578  if(BuiltinID == PPC::BI__builtin_vsx_lxvl ||
11579  BuiltinID == PPC::BI__builtin_vsx_lxvll){
11580  Ops[0] = Builder.CreateBitCast(Ops[0], Int8PtrTy);
11581  }else {
11582  Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
11583  Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]);
11584  Ops.pop_back();
11585  }
11586 
11587  switch (BuiltinID) {
11588  default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
11589  case PPC::BI__builtin_altivec_lvx:
11590  ID = Intrinsic::ppc_altivec_lvx;
11591  break;
11592  case PPC::BI__builtin_altivec_lvxl:
11593  ID = Intrinsic::ppc_altivec_lvxl;
11594  break;
11595  case PPC::BI__builtin_altivec_lvebx:
11596  ID = Intrinsic::ppc_altivec_lvebx;
11597  break;
11598  case PPC::BI__builtin_altivec_lvehx:
11599  ID = Intrinsic::ppc_altivec_lvehx;
11600  break;
11601  case PPC::BI__builtin_altivec_lvewx:
11602  ID = Intrinsic::ppc_altivec_lvewx;
11603  break;
11604  case PPC::BI__builtin_altivec_lvsl:
11605  ID = Intrinsic::ppc_altivec_lvsl;
11606  break;
11607  case PPC::BI__builtin_altivec_lvsr:
11608  ID = Intrinsic::ppc_altivec_lvsr;
11609  break;
11610  case PPC::BI__builtin_vsx_lxvd2x:
11611  ID = Intrinsic::ppc_vsx_lxvd2x;
11612  break;
11613  case PPC::BI__builtin_vsx_lxvw4x:
11614  ID = Intrinsic::ppc_vsx_lxvw4x;
11615  break;
11616  case PPC::BI__builtin_vsx_lxvd2x_be:
11617  ID = Intrinsic::ppc_vsx_lxvd2x_be;
11618  break;
11619  case PPC::BI__builtin_vsx_lxvw4x_be:
11620  ID = Intrinsic::ppc_vsx_lxvw4x_be;
11621  break;
11622  case PPC::BI__builtin_vsx_lxvl:
11623  ID = Intrinsic::ppc_vsx_lxvl;
11624  break;
11625  case PPC::BI__builtin_vsx_lxvll:
11626  ID = Intrinsic::ppc_vsx_lxvll;
11627  break;
11628  }
11629  llvm::Function *F = CGM.getIntrinsic(ID);
11630  return Builder.CreateCall(F, Ops, "");
11631  }
11632 
11633  // vec_st, vec_xst_be
11634  case PPC::BI__builtin_altivec_stvx:
11635  case PPC::BI__builtin_altivec_stvxl:
11636  case PPC::BI__builtin_altivec_stvebx:
11637  case PPC::BI__builtin_altivec_stvehx:
11638  case PPC::BI__builtin_altivec_stvewx:
11639  case PPC::BI__builtin_vsx_stxvd2x:
11640  case PPC::BI__builtin_vsx_stxvw4x:
11641  case PPC::BI__builtin_vsx_stxvd2x_be:
11642  case PPC::BI__builtin_vsx_stxvw4x_be:
11643  case PPC::BI__builtin_vsx_stxvl:
11644  case PPC::BI__builtin_vsx_stxvll:
11645  {
11646  if(BuiltinID == PPC::BI__builtin_vsx_stxvl ||
11647  BuiltinID == PPC::BI__builtin_vsx_stxvll ){
11648  Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
11649  }else {
11650  Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
11651  Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]);
11652  Ops.pop_back();
11653  }
11654 
11655  switch (BuiltinID) {
11656  default: llvm_unreachable("Unsupported st intrinsic!");
11657  case PPC::BI__builtin_altivec_stvx:
11658  ID = Intrinsic::ppc_altivec_stvx;
11659  break;
11660  case PPC::BI__builtin_altivec_stvxl:
11661  ID = Intrinsic::ppc_altivec_stvxl;
11662  break;
11663  case PPC::BI__builtin_altivec_stvebx:
11664  ID = Intrinsic::ppc_altivec_stvebx;
11665  break;
11666  case PPC::BI__builtin_altivec_stvehx:
11667  ID = Intrinsic::ppc_altivec_stvehx;
11668  break;
11669  case PPC::BI__builtin_altivec_stvewx:
11670  ID = Intrinsic::ppc_altivec_stvewx;
11671  break;
11672  case PPC::BI__builtin_vsx_stxvd2x:
11673  ID = Intrinsic::ppc_vsx_stxvd2x;
11674  break;
11675  case PPC::BI__builtin_vsx_stxvw4x:
11676  ID = Intrinsic::ppc_vsx_stxvw4x;
11677  break;
11678  case PPC::BI__builtin_vsx_stxvd2x_be:
11679  ID = Intrinsic::ppc_vsx_stxvd2x_be;
11680  break;
11681  case PPC::BI__builtin_vsx_stxvw4x_be:
11682  ID = Intrinsic::ppc_vsx_stxvw4x_be;
11683  break;
11684  case PPC::BI__builtin_vsx_stxvl:
11685  ID = Intrinsic::ppc_vsx_stxvl;
11686  break;
11687  case PPC::BI__builtin_vsx_stxvll:
11688  ID = Intrinsic::ppc_vsx_stxvll;
11689  break;
11690  }
11691  llvm::Function *F = CGM.getIntrinsic(ID);
11692  return Builder.CreateCall(F, Ops, "");
11693  }
11694  // Square root
11695  case PPC::BI__builtin_vsx_xvsqrtsp:
11696  case PPC::BI__builtin_vsx_xvsqrtdp: {
11697  llvm::Type *ResultType = ConvertType(E->getType());
11698  Value *X = EmitScalarExpr(E->getArg(0));
11699  ID = Intrinsic::sqrt;
11700  llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
11701  return Builder.CreateCall(F, X);
11702  }
11703  // Count leading zeros
11704  case PPC::BI__builtin_altivec_vclzb:
11705  case PPC::BI__builtin_altivec_vclzh:
11706  case PPC::BI__builtin_altivec_vclzw:
11707  case PPC::BI__builtin_altivec_vclzd: {
11708  llvm::Type *ResultType = ConvertType(E->getType());
11709  Value *X = EmitScalarExpr(E->getArg(0));
11710  Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
11711  Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
11712  return Builder.CreateCall(F, {X, Undef});
11713  }
11714  case PPC::BI__builtin_altivec_vctzb:
11715  case PPC::BI__builtin_altivec_vctzh:
11716  case PPC::BI__builtin_altivec_vctzw:
11717  case PPC::BI__builtin_altivec_vctzd: {
11718  llvm::Type *ResultType = ConvertType(E->getType());
11719  Value *X = EmitScalarExpr(E->getArg(0));
11720  Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
11721  Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
11722  return Builder.CreateCall(F, {X, Undef});
11723  }
11724  case PPC::BI__builtin_altivec_vpopcntb:
11725  case PPC::BI__builtin_altivec_vpopcnth:
11726  case PPC::BI__builtin_altivec_vpopcntw:
11727  case PPC::BI__builtin_altivec_vpopcntd: {
11728  llvm::Type *ResultType = ConvertType(E->getType());
11729  Value *X = EmitScalarExpr(E->getArg(0));
11730  llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
11731  return Builder.CreateCall(F, X);
11732  }
11733  // Copy sign
11734  case PPC::BI__builtin_vsx_xvcpsgnsp:
11735  case PPC::BI__builtin_vsx_xvcpsgndp: {
11736  llvm::Type *ResultType = ConvertType(E->getType());
11737  Value *X = EmitScalarExpr(E->getArg(0));
11738  Value *Y = EmitScalarExpr(E->getArg(1));
11739  ID = Intrinsic::copysign;
11740  llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
11741  return Builder.CreateCall(F, {X, Y});
11742  }
11743  // Rounding/truncation
11744  case PPC::BI__builtin_vsx_xvrspip:
11745  case PPC::BI__builtin_vsx_xvrdpip:
11746  case PPC::BI__builtin_vsx_xvrdpim:
11747  case PPC::BI__builtin_vsx_xvrspim:
11748  case PPC::BI__builtin_vsx_xvrdpi:
11749  case PPC::BI__builtin_vsx_xvrspi:
11750  case PPC::BI__builtin_vsx_xvrdpic:
11751  case PPC::BI__builtin_vsx_xvrspic:
11752  case PPC::BI__builtin_vsx_xvrdpiz:
11753  case PPC::BI__builtin_vsx_xvrspiz: {
11754  llvm::Type *ResultType = ConvertType(E->getType());
11755  Value *X = EmitScalarExpr(E->getArg(0));
11756  if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
11757  BuiltinID == PPC::BI__builtin_vsx_xvrspim)
11758  ID = Intrinsic::floor;
11759  else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
11760  BuiltinID == PPC::BI__builtin_vsx_xvrspi)
11761  ID = Intrinsic::round;
11762  else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
11763  BuiltinID == PPC::BI__builtin_vsx_xvrspic)
11764  ID = Intrinsic::nearbyint;
11765  else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
11766  BuiltinID == PPC::BI__builtin_vsx_xvrspip)
11767  ID = Intrinsic::ceil;
11768  else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
11769  BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
11770  ID = Intrinsic::trunc;
11771  llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
11772  return Builder.CreateCall(F, X);
11773  }
11774 
11775  // Absolute value
11776  case PPC::BI__builtin_vsx_xvabsdp:
11777  case PPC::BI__builtin_vsx_xvabssp: {
11778  llvm::Type *ResultType = ConvertType(E->getType());
11779  Value *X = EmitScalarExpr(E->getArg(0));
11780  llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
11781  return Builder.CreateCall(F, X);
11782  }
11783 
11784  // FMA variations
11785  case PPC::BI__builtin_vsx_xvmaddadp:
11786  case PPC::BI__builtin_vsx_xvmaddasp:
11787  case PPC::BI__builtin_vsx_xvnmaddadp:
11788  case PPC::BI__builtin_vsx_xvnmaddasp:
11789  case PPC::BI__builtin_vsx_xvmsubadp:
11790  case PPC::BI__builtin_vsx_xvmsubasp:
11791  case PPC::BI__builtin_vsx_xvnmsubadp:
11792  case PPC::BI__builtin_vsx_xvnmsubasp: {
11793  llvm::Type *ResultType = ConvertType(E->getType());
11794  Value *X = EmitScalarExpr(E->getArg(0));
11795  Value *Y = EmitScalarExpr(E->getArg(1));
11796  Value *Z = EmitScalarExpr(E->getArg(2));
11797  Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
11798  llvm::Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
11799  switch (BuiltinID) {
11800  case PPC::BI__builtin_vsx_xvmaddadp:
11801  case PPC::BI__builtin_vsx_xvmaddasp:
11802  return Builder.CreateCall(F, {X, Y, Z});
11803  case PPC::BI__builtin_vsx_xvnmaddadp:
11804  case PPC::BI__builtin_vsx_xvnmaddasp:
11805  return Builder.CreateFSub(Zero,
11806  Builder.CreateCall(F, {X, Y, Z}), "sub");
11807  case PPC::BI__builtin_vsx_xvmsubadp:
11808  case PPC::BI__builtin_vsx_xvmsubasp:
11809  return Builder.CreateCall(F,
11810  {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
11811  case PPC::BI__builtin_vsx_xvnmsubadp:
11812  case PPC::BI__builtin_vsx_xvnmsubasp:
11813  Value *FsubRes =
11814  Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
11815  return Builder.CreateFSub(Zero, FsubRes, "sub");
11816  }
11817  llvm_unreachable("Unknown FMA operation");
11818  return nullptr; // Suppress no-return warning
11819  }
11820 
11821  case PPC::BI__builtin_vsx_insertword: {
11822  llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw);
11823 
11824  // Third argument is a compile time constant int. It must be clamped to
11825  // to the range [0, 12].
11826  ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
11827  assert(ArgCI &&
11828  "Third arg to xxinsertw intrinsic must be constant integer");
11829  const int64_t MaxIndex = 12;
11830  int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex);
11831 
11832  // The builtin semantics don't exactly match the xxinsertw instructions
11833  // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the
11834  // word from the first argument, and inserts it in the second argument. The
11835  // instruction extracts the word from its second input register and inserts
11836  // it into its first input register, so swap the first and second arguments.
11837  std::swap(Ops[0], Ops[1]);
11838 
11839  // Need to cast the second argument from a vector of unsigned int to a
11840  // vector of long long.
11841  Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2));
11842 
11843  if (getTarget().isLittleEndian()) {
11844  // Create a shuffle mask of (1, 0)
11845  Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1),
11846  ConstantInt::get(Int32Ty, 0)
11847  };
11848  Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
11849 
11850  // Reverse the double words in the vector we will extract from.
11851  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
11852  Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ShuffleMask);
11853 
11854  // Reverse the index.
11855  Index = MaxIndex - Index;
11856  }
11857 
11858  // Intrinsic expects the first arg to be a vector of int.
11859  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
11860  Ops[2] = ConstantInt::getSigned(Int32Ty, Index);
11861  return Builder.CreateCall(F, Ops);
11862  }
11863 
11864  case PPC::BI__builtin_vsx_extractuword: {
11865  llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw);
11866 
11867  // Intrinsic expects the first argument to be a vector of doublewords.
11868  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
11869 
11870  // The second argument is a compile time constant int that needs to
11871  // be clamped to the range [0, 12].
11872  ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[1]);
11873  assert(ArgCI &&
11874  "Second Arg to xxextractuw intrinsic must be a constant integer!");
11875  const int64_t MaxIndex = 12;
11876  int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex);
11877 
11878  if (getTarget().isLittleEndian()) {
11879  // Reverse the index.
11880  Index = MaxIndex - Index;
11881  Ops[1] = ConstantInt::getSigned(Int32Ty, Index);
11882 
11883  // Emit the call, then reverse the double words of the results vector.
11884  Value *Call = Builder.CreateCall(F, Ops);
11885 
11886  // Create a shuffle mask of (1, 0)
11887  Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1),
11888  ConstantInt::get(Int32Ty, 0)
11889  };
11890  Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
11891 
11892  Value *ShuffleCall = Builder.CreateShuffleVector(Call, Call, ShuffleMask);
11893  return ShuffleCall;
11894  } else {
11895  Ops[1] = ConstantInt::getSigned(Int32Ty, Index);
11896  return Builder.CreateCall(F, Ops);
11897  }
11898  }
11899 
11900  case PPC::BI__builtin_vsx_xxpermdi: {
11901  ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
11902  assert(ArgCI && "Third arg must be constant integer!");
11903 
11904  unsigned Index = ArgCI->getZExtValue();
11905  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
11906  Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2));
11907 
11908  // Account for endianness by treating this as just a shuffle. So we use the
11909  // same indices for both LE and BE in order to produce expected results in
11910  // both cases.
11911  unsigned ElemIdx0 = (Index & 2) >> 1;
11912  unsigned ElemIdx1 = 2 + (Index & 1);
11913 
11914  Constant *ShuffleElts[2] = {ConstantInt::get(Int32Ty, ElemIdx0),
11915  ConstantInt::get(Int32Ty, ElemIdx1)};
11916  Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
11917 
11918  Value *ShuffleCall =
11919  Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask);
11920  QualType BIRetType = E->getType();
11921  auto RetTy = ConvertType(BIRetType);
11922  return Builder.CreateBitCast(ShuffleCall, RetTy);
11923  }
11924 
11925  case PPC::BI__builtin_vsx_xxsldwi: {
11926  ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
11927  assert(ArgCI && "Third argument must be a compile time constant");
11928  unsigned Index = ArgCI->getZExtValue() & 0x3;
11929  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
11930  Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int32Ty, 4));
11931 
11932  // Create a shuffle mask
11933  unsigned ElemIdx0;
11934  unsigned ElemIdx1;
11935  unsigned ElemIdx2;
11936  unsigned ElemIdx3;
11937  if (getTarget().isLittleEndian()) {
11938  // Little endian element N comes from element 8+N-Index of the
11939  // concatenated wide vector (of course, using modulo arithmetic on
11940  // the total number of elements).
11941  ElemIdx0 = (8 - Index) % 8;
11942  ElemIdx1 = (9 - Index) % 8;
11943  ElemIdx2 = (10 - Index) % 8;
11944  ElemIdx3 = (11 - Index) % 8;
11945  } else {
11946  // Big endian ElemIdx<N> = Index + N
11947  ElemIdx0 = Index;
11948  ElemIdx1 = Index + 1;
11949  ElemIdx2 = Index + 2;
11950  ElemIdx3 = Index + 3;
11951  }
11952 
11953  Constant *ShuffleElts[4] = {ConstantInt::get(Int32Ty, ElemIdx0),
11954  ConstantInt::get(Int32Ty, ElemIdx1),
11955  ConstantInt::get(Int32Ty, ElemIdx2),
11956  ConstantInt::get(Int32Ty, ElemIdx3)};
11957 
11958  Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
11959  Value *ShuffleCall =
11960  Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask);
11961  QualType BIRetType = E->getType();
11962  auto RetTy = ConvertType(BIRetType);
11963  return Builder.CreateBitCast(ShuffleCall, RetTy);
11964  }
11965 
11966  case PPC::BI__builtin_pack_vector_int128: {
11967  bool isLittleEndian = getTarget().isLittleEndian();
11968  Value *UndefValue =
11969  llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), 2));
11970  Value *Res = Builder.CreateInsertElement(
11971  UndefValue, Ops[0], (uint64_t)(isLittleEndian ? 1 : 0));
11972  Res = Builder.CreateInsertElement(Res, Ops[1],
11973  (uint64_t)(isLittleEndian ? 0 : 1));
11974  return Builder.CreateBitCast(Res, ConvertType(E->getType()));
11975  }
11976 
11977  case PPC::BI__builtin_unpack_vector_int128: {
11978  ConstantInt *Index = cast<ConstantInt>(Ops[1]);
11979  Value *Unpacked = Builder.CreateBitCast(
11980  Ops[0], llvm::VectorType::get(ConvertType(E->getType()), 2));
11981 
11982  if (getTarget().isLittleEndian())
11983  Index = ConstantInt::get(Index->getType(), 1 - Index->getZExtValue());
11984 
11985  return Builder.CreateExtractElement(Unpacked, Index);
11986  }
11987  }
11988 }
11989 
11991  const CallExpr *E) {
11992  switch (BuiltinID) {
11993  case AMDGPU::BI__builtin_amdgcn_div_scale:
11994  case AMDGPU::BI__builtin_amdgcn_div_scalef: {
11995  // Translate from the intrinsics's struct return to the builtin's out
11996  // argument.
11997 
11998  Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
11999 
12000  llvm::Value *X = EmitScalarExpr(E->getArg(0));
12001  llvm::Value *Y = EmitScalarExpr(E->getArg(1));
12002  llvm::Value *Z = EmitScalarExpr(E->getArg(2));
12003 
12004  llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
12005  X->getType());
12006 
12007  llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
12008 
12009  llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
12010  llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
12011 
12012  llvm::Type *RealFlagType
12013  = FlagOutPtr.getPointer()->getType()->getPointerElementType();
12014 
12015  llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
12016  Builder.CreateStore(FlagExt, FlagOutPtr);
12017  return Result;
12018  }
12019  case AMDGPU::BI__builtin_amdgcn_div_fmas:
12020  case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
12021  llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
12022  llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
12023  llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
12024  llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
12025 
12026  llvm::Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
12027  Src0->getType());
12028  llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
12029  return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
12030  }
12031 
12032  case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
12033  return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle);
12034  case AMDGPU::BI__builtin_amdgcn_mov_dpp:
12035  case AMDGPU::BI__builtin_amdgcn_update_dpp: {
12037  for (unsigned I = 0; I != E->getNumArgs(); ++I)
12038  Args.push_back(EmitScalarExpr(E->getArg(I)));
12039  assert(Args.size() == 5 || Args.size() == 6);
12040  if (Args.size() == 5)
12041  Args.insert(Args.begin(), llvm::UndefValue::get(Args[0]->getType()));
12042  Value *F =
12043  CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType());
12044  return Builder.CreateCall(F, Args);
12045  }
12046  case AMDGPU::BI__builtin_amdgcn_div_fixup:
12047  case AMDGPU::BI__builtin_amdgcn_div_fixupf:
12048  case AMDGPU::BI__builtin_amdgcn_div_fixuph:
12049  return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup);
12050  case AMDGPU::BI__builtin_amdgcn_trig_preop:
12051  case AMDGPU::BI__builtin_amdgcn_trig_preopf:
12052  return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
12053  case AMDGPU::BI__builtin_amdgcn_rcp:
12054  case AMDGPU::BI__builtin_amdgcn_rcpf:
12055  case AMDGPU::BI__builtin_amdgcn_rcph:
12056  return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp);
12057  case AMDGPU::BI__builtin_amdgcn_rsq:
12058  case AMDGPU::BI__builtin_amdgcn_rsqf:
12059  case AMDGPU::BI__builtin_amdgcn_rsqh:
12060  return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq);
12061  case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
12062  case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
12063  return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp);
12064  case AMDGPU::BI__builtin_amdgcn_sinf:
12065  case AMDGPU::BI__builtin_amdgcn_sinh:
12066  return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin);
12067  case AMDGPU::BI__builtin_amdgcn_cosf:
12068  case AMDGPU::BI__builtin_amdgcn_cosh:
12069  return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos);
12070  case AMDGPU::BI__builtin_amdgcn_log_clampf:
12071  return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp);
12072  case AMDGPU::BI__builtin_amdgcn_ldexp:
12073  case AMDGPU::BI__builtin_amdgcn_ldexpf:
12074  case AMDGPU::BI__builtin_amdgcn_ldexph:
12075  return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp);
12076  case AMDGPU::BI__builtin_amdgcn_frexp_mant:
12077  case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
12078  case AMDGPU::BI__builtin_amdgcn_frexp_manth:
12079  return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant);
12080  case AMDGPU::BI__builtin_amdgcn_frexp_exp:
12081  case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
12082  Value *Src0 = EmitScalarExpr(E->getArg(0));
12083  Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
12084  { Builder.getInt32Ty(), Src0->getType() });
12085  return Builder.CreateCall(F, Src0);
12086  }
12087  case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
12088  Value *Src0 = EmitScalarExpr(E->getArg(0));
12089  Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
12090  { Builder.getInt16Ty(), Src0->getType() });
12091  return Builder.CreateCall(F, Src0);
12092  }
12093  case AMDGPU::BI__builtin_amdgcn_fract:
12094  case AMDGPU::BI__builtin_amdgcn_fractf:
12095  case AMDGPU::BI__builtin_amdgcn_fracth:
12096  return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract);
12097  case AMDGPU::BI__builtin_amdgcn_lerp:
12098  return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp);
12099  case AMDGPU::BI__builtin_amdgcn_uicmp:
12100  case AMDGPU::BI__builtin_amdgcn_uicmpl:
12101  case AMDGPU::BI__builtin_amdgcn_sicmp:
12102  case AMDGPU::BI__builtin_amdgcn_sicmpl:
12103  return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_icmp);
12104  case AMDGPU::BI__builtin_amdgcn_fcmp:
12105  case AMDGPU::BI__builtin_amdgcn_fcmpf:
12106  return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fcmp);
12107  case AMDGPU::BI__builtin_amdgcn_class:
12108  case AMDGPU::BI__builtin_amdgcn_classf:
12109  case AMDGPU::BI__builtin_amdgcn_classh:
12110  return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
12111  case AMDGPU::BI__builtin_amdgcn_fmed3f:
12112  case AMDGPU::BI__builtin_amdgcn_fmed3h:
12113  return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fmed3);
12114  case AMDGPU::BI__builtin_amdgcn_read_exec: {
12115  CallInst *CI = cast<CallInst>(
12116  EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec"));
12117  CI->setConvergent();
12118  return CI;
12119  }
12120  case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
12121  case AMDGPU::BI__builtin_amdgcn_read_exec_hi: {
12122  StringRef RegName = BuiltinID == AMDGPU::BI__builtin_amdgcn_read_exec_lo ?
12123  "exec_lo" : "exec_hi";
12124  CallInst *CI = cast<CallInst>(
12125  EmitSpecialRegisterBuiltin(*this, E, Int32Ty, Int32Ty, true, RegName));
12126  CI->setConvergent();
12127  return CI;
12128  }
12129  // amdgcn workitem
12130  case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
12131  return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024);
12132  case AMDGPU::BI__builtin_amdgcn_workitem_id_y:
12133  return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024);
12134  case AMDGPU::BI__builtin_amdgcn_workitem_id_z:
12135  return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024);
12136 
12137  // r600 intrinsics
12138  case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
12139  case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
12140  return emitUnaryBuiltin(*this, E, Intrinsic::r600_recipsqrt_ieee);
12141  case AMDGPU::BI__builtin_r600_read_tidig_x:
12142  return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024);
12143  case AMDGPU::BI__builtin_r600_read_tidig_y:
12144  return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024);
12145  case AMDGPU::BI__builtin_r600_read_tidig_z:
12146  return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024);
12147  default:
12148  return nullptr;
12149  }
12150 }
12151 
12152 /// Handle a SystemZ function in which the final argument is a pointer
12153 /// to an int that receives the post-instruction CC value. At the LLVM level
12154 /// this is represented as a function that returns a {result, cc} pair.
12156  unsigned IntrinsicID,
12157  const CallExpr *E) {
12158  unsigned NumArgs = E->getNumArgs() - 1;
12159  SmallVector<Value *, 8> Args(NumArgs);
12160  for (unsigned I = 0; I < NumArgs; ++I)
12161  Args[I] = CGF.EmitScalarExpr(E->getArg(I));
12162  Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs));
12163  Value *F = CGF.CGM.getIntrinsic(IntrinsicID);
12164  Value *Call = CGF.Builder.CreateCall(F, Args);
12165  Value *CC = CGF.Builder.CreateExtractValue(Call, 1);
12166  CGF.Builder.CreateStore(CC, CCPtr);
12167  return CGF.Builder.CreateExtractValue(Call, 0);
12168 }
12169 
12171  const CallExpr *E) {
12172  switch (BuiltinID) {
12173  case SystemZ::BI__builtin_tbegin: {
12174  Value *TDB = EmitScalarExpr(E->getArg(0));
12175  Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
12176  Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
12177  return Builder.CreateCall(F, {TDB, Control});
12178  }
12179  case SystemZ::BI__builtin_tbegin_nofloat: {
12180  Value *TDB = EmitScalarExpr(E->getArg(0));
12181  Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
12182  Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
12183  return Builder.CreateCall(F, {TDB, Control});
12184  }
12185  case SystemZ::BI__builtin_tbeginc: {
12186  Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);
12187  Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);
12188  Value *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
12189  return Builder.CreateCall(F, {TDB, Control});
12190  }
12191  case SystemZ::BI__builtin_tabort: {
12192  Value *Data = EmitScalarExpr(E->getArg(0));
12193  Value *F = CGM.getIntrinsic(Intrinsic::s390_tabort);
12194  return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort"));
12195  }
12196  case SystemZ::BI__builtin_non_tx_store: {
12197  Value *Address = EmitScalarExpr(E->getArg(0));
12198  Value *Data = EmitScalarExpr(E->getArg(1));
12199  Value *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
12200  return Builder.CreateCall(F, {Data, Address});
12201  }
12202 
12203  // Vector builtins. Note that most vector builtins are mapped automatically
12204  // to target-specific LLVM intrinsics. The ones handled specially here can
12205  // be represented via standard LLVM IR, which is preferable to enable common
12206  // LLVM optimizations.
12207 
12208  case SystemZ::BI__builtin_s390_vpopctb:
12209  case SystemZ::BI__builtin_s390_vpopcth:
12210  case SystemZ::BI__builtin_s390_vpopctf:
12211  case SystemZ::BI__builtin_s390_vpopctg: {
12212  llvm::Type *ResultType = ConvertType(E->getType());
12213  Value *X = EmitScalarExpr(E->getArg(0));
12214  Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
12215  return Builder.CreateCall(F, X);
12216  }
12217 
12218  case SystemZ::BI__builtin_s390_vclzb:
12219  case SystemZ::BI__builtin_s390_vclzh:
12220  case SystemZ::BI__builtin_s390_vclzf:
12221  case SystemZ::BI__builtin_s390_vclzg: {
12222  llvm::Type *ResultType = ConvertType(E->getType());
12223  Value *X = EmitScalarExpr(E->getArg(0));
12224  Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
12225  Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
12226  return Builder.CreateCall(F, {X, Undef});
12227  }
12228 
12229  case SystemZ::BI__builtin_s390_vctzb:
12230  case SystemZ::BI__builtin_s390_vctzh:
12231  case SystemZ::BI__builtin_s390_vctzf:
12232  case SystemZ::BI__builtin_s390_vctzg: {
12233  llvm::Type *ResultType = ConvertType(E->getType());
12234  Value *X = EmitScalarExpr(E->getArg(0));
12235  Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
12236  Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
12237  return Builder.CreateCall(F, {X, Undef});
12238  }
12239 
12240  case SystemZ::BI__builtin_s390_vfsqsb:
12241  case SystemZ::BI__builtin_s390_vfsqdb: {
12242  llvm::Type *ResultType = ConvertType(E->getType());
12243  Value *X = EmitScalarExpr(E->getArg(0));
12244  Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
12245  return Builder.CreateCall(F, X);
12246  }
12247  case SystemZ::BI__builtin_s390_vfmasb:
12248  case SystemZ::BI__builtin_s390_vfmadb: {
12249  llvm::Type *ResultType = ConvertType(E->getType());
12250  Value *X = EmitScalarExpr(E->getArg(0));
12251  Value *Y = EmitScalarExpr(E->getArg(1));
12252  Value *Z = EmitScalarExpr(E->getArg(2));
12253  Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
12254  return Builder.CreateCall(F, {X, Y, Z});
12255  }
12256  case SystemZ::BI__builtin_s390_vfmssb:
12257  case SystemZ::BI__builtin_s390_vfmsdb: {
12258  llvm::Type *ResultType = ConvertType(E->getType());
12259  Value *X = EmitScalarExpr(E->getArg(0));
12260  Value *Y = EmitScalarExpr(E->getArg(1));
12261  Value *Z = EmitScalarExpr(E->getArg(2));
12262  Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
12263  Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
12264  return Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
12265  }
12266  case SystemZ::BI__builtin_s390_vfnmasb:
12267  case SystemZ::BI__builtin_s390_vfnmadb: {
12268  llvm::Type *ResultType = ConvertType(E->getType());
12269  Value *X = EmitScalarExpr(E->getArg(0));
12270  Value *Y = EmitScalarExpr(E->getArg(1));
12271  Value *Z = EmitScalarExpr(E->getArg(2));
12272  Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
12273  Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
12274  return Builder.CreateFSub(Zero, Builder.CreateCall(F, {X, Y, Z}), "sub");
12275  }
12276  case SystemZ::BI__builtin_s390_vfnmssb:
12277  case SystemZ::BI__builtin_s390_vfnmsdb: {
12278  llvm::Type *ResultType = ConvertType(E->getType());
12279  Value *X = EmitScalarExpr(E->getArg(0));
12280  Value *Y = EmitScalarExpr(E->getArg(1));
12281  Value *Z = EmitScalarExpr(E->getArg(2));
12282  Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
12283  Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
12284  Value *NegZ = Builder.CreateFSub(Zero, Z, "sub");
12285  return Builder.CreateFSub(Zero, Builder.CreateCall(F, {X, Y, NegZ}));
12286  }
12287  case SystemZ::BI__builtin_s390_vflpsb:
12288  case SystemZ::BI__builtin_s390_vflpdb: {
12289  llvm::Type *ResultType = ConvertType(E->getType());
12290  Value *X = EmitScalarExpr(E->getArg(0));
12291  Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
12292  return Builder.CreateCall(F, X);
12293  }
12294  case SystemZ::BI__builtin_s390_vflnsb:
12295  case SystemZ::BI__builtin_s390_vflndb: {
12296  llvm::Type *ResultType = ConvertType(E->getType());
12297  Value *X = EmitScalarExpr(E->getArg(0));
12298  Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
12299  Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
12300  return Builder.CreateFSub(Zero, Builder.CreateCall(F, X), "sub");
12301  }
12302  case SystemZ::BI__builtin_s390_vfisb:
12303  case SystemZ::BI__builtin_s390_vfidb: {
12304  llvm::Type *ResultType = ConvertType(E->getType());
12305  Value *X = EmitScalarExpr(E->getArg(0));
12306  // Constant-fold the M4 and M5 mask arguments.
12307  llvm::APSInt M4, M5;
12308  bool IsConstM4 = E->getArg(1)->isIntegerConstantExpr(M4, getContext());
12309  bool IsConstM5 = E->getArg(2)->isIntegerConstantExpr(M5, getContext());
12310  assert(IsConstM4 && IsConstM5 && "Constant arg isn't actually constant?");
12311  (void)IsConstM4; (void)IsConstM5;
12312  // Check whether this instance can be represented via a LLVM standard
12313  // intrinsic. We only support some combinations of M4 and M5.
12314  Intrinsic::ID ID = Intrinsic::not_intrinsic;
12315  switch (M4.getZExtValue()) {
12316  default: break;
12317  case 0: // IEEE-inexact exception allowed
12318  switch (M5.getZExtValue()) {
12319  default: break;
12320  case 0: ID = Intrinsic::rint; break;
12321  }
12322  break;
12323  case 4: // IEEE-inexact exception suppressed
12324  switch (M5.getZExtValue()) {
12325  default: break;
12326  case 0: ID = Intrinsic::nearbyint; break;
12327  case 1: ID = Intrinsic::round; break;
12328  case 5: ID = Intrinsic::trunc; break;
12329  case 6: ID = Intrinsic::ceil; break;
12330  case 7: ID = Intrinsic::floor; break;
12331  }
12332  break;
12333  }
12334  if (ID != Intrinsic::not_intrinsic) {
12335  Function *F = CGM.getIntrinsic(ID, ResultType);
12336  return Builder.CreateCall(F, X);
12337  }
12338  switch (BuiltinID) {
12339  case SystemZ::BI__builtin_s390_vfisb: ID = Intrinsic::s390_vfisb; break;
12340  case SystemZ::BI__builtin_s390_vfidb: ID = Intrinsic::s390_vfidb; break;
12341  default: llvm_unreachable("Unknown BuiltinID");
12342  }
12343  Function *F = CGM.getIntrinsic(ID);
12344  Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
12345  Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
12346  return Builder.CreateCall(F, {X, M4Value, M5Value});
12347  }
12348  case SystemZ::BI__builtin_s390_vfmaxsb:
12349  case SystemZ::BI__builtin_s390_vfmaxdb: {
12350  llvm::Type *ResultType = ConvertType(E->getType());
12351  Value *X = EmitScalarExpr(E->getArg(0));
12352  Value *Y = EmitScalarExpr(E->getArg(1));
12353  // Constant-fold the M4 mask argument.
12354  llvm::APSInt M4;
12355  bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext());
12356  assert(IsConstM4 && "Constant arg isn't actually constant?");
12357  (void)IsConstM4;
12358  // Check whether this instance can be represented via a LLVM standard
12359  // intrinsic. We only support some values of M4.
12360  Intrinsic::ID ID = Intrinsic::not_intrinsic;
12361  switch (M4.getZExtValue()) {
12362  default: break;
12363  case 4: ID = Intrinsic::maxnum; break;
12364  }
12365  if (ID != Intrinsic::not_intrinsic) {
12366  Function *F = CGM.getIntrinsic(ID, ResultType);
12367  return Builder.CreateCall(F, {X, Y});
12368  }
12369  switch (BuiltinID) {
12370  case SystemZ::BI__builtin_s390_vfmaxsb: ID = Intrinsic::s390_vfmaxsb; break;
12371  case SystemZ::BI__builtin_s390_vfmaxdb: ID = Intrinsic::s390_vfmaxdb; break;
12372  default: llvm_unreachable("Unknown BuiltinID");
12373  }
12374  Function *F = CGM.getIntrinsic(ID);
12375  Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
12376  return Builder.CreateCall(F, {X, Y, M4Value});
12377  }
12378  case SystemZ::BI__builtin_s390_vfminsb:
12379  case SystemZ::BI__builtin_s390_vfmindb: {
12380  llvm::Type *ResultType = ConvertType(E->getType());
12381  Value *X = EmitScalarExpr(E->getArg(0));
12382  Value *Y = EmitScalarExpr(E->getArg(1));
12383  // Constant-fold the M4 mask argument.
12384  llvm::APSInt M4;
12385  bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext());
12386  assert(IsConstM4 && "Constant arg isn't actually constant?");
12387  (void)IsConstM4;
12388  // Check whether this instance can be represented via a LLVM standard
12389  // intrinsic. We only support some values of M4.
12390  Intrinsic::ID ID = Intrinsic::not_intrinsic;
12391  switch (M4.getZExtValue()) {
12392  default: break;
12393  case 4: ID = Intrinsic::minnum; break;
12394  }
12395  if (ID != Intrinsic::not_intrinsic) {
12396  Function *F = CGM.getIntrinsic(ID, ResultType);
12397  return Builder.CreateCall(F, {X, Y});
12398  }
12399  switch (BuiltinID) {
12400  case SystemZ::BI__builtin_s390_vfminsb: ID = Intrinsic::s390_vfminsb; break;
12401  case SystemZ::BI__builtin_s390_vfmindb: ID = Intrinsic::s390_vfmindb; break;
12402  default: llvm_unreachable("Unknown BuiltinID");
12403  }
12404  Function *F = CGM.getIntrinsic(ID);
12405  Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
12406  return Builder.CreateCall(F, {X, Y, M4Value});
12407  }
12408 
12409  // Vector intrinsics that output the post-instruction CC value.
12410 
12411 #define INTRINSIC_WITH_CC(NAME) \
12412  case SystemZ::BI__builtin_##NAME: \
12413  return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
12414 
12415  INTRINSIC_WITH_CC(s390_vpkshs);
12416  INTRINSIC_WITH_CC(s390_vpksfs);
12417  INTRINSIC_WITH_CC(s390_vpksgs);
12418 
12419  INTRINSIC_WITH_CC(s390_vpklshs);
12420  INTRINSIC_WITH_CC(s390_vpklsfs);
12421  INTRINSIC_WITH_CC(s390_vpklsgs);
12422 
12423  INTRINSIC_WITH_CC(s390_vceqbs);
12424  INTRINSIC_WITH_CC(s390_vceqhs);
12425  INTRINSIC_WITH_CC(s390_vceqfs);
12426  INTRINSIC_WITH_CC(s390_vceqgs);
12427 
12428  INTRINSIC_WITH_CC(s390_vchbs);
12429  INTRINSIC_WITH_CC(s390_vchhs);
12430  INTRINSIC_WITH_CC(s390_vchfs);
12431  INTRINSIC_WITH_CC(s390_vchgs);
12432 
12433  INTRINSIC_WITH_CC(s390_vchlbs);
12434  INTRINSIC_WITH_CC(s390_vchlhs);
12435  INTRINSIC_WITH_CC(s390_vchlfs);
12436  INTRINSIC_WITH_CC(s390_vchlgs);
12437 
12438  INTRINSIC_WITH_CC(s390_vfaebs);
12439  INTRINSIC_WITH_CC(s390_vfaehs);
12440  INTRINSIC_WITH_CC(s390_vfaefs);
12441 
12442  INTRINSIC_WITH_CC(s390_vfaezbs);
12443  INTRINSIC_WITH_CC(s390_vfaezhs);
12444  INTRINSIC_WITH_CC(s390_vfaezfs);
12445 
12446  INTRINSIC_WITH_CC(s390_vfeebs);
12447  INTRINSIC_WITH_CC(s390_vfeehs);
12448  INTRINSIC_WITH_CC(s390_vfeefs);
12449 
12450  INTRINSIC_WITH_CC(s390_vfeezbs);
12451  INTRINSIC_WITH_CC(s390_vfeezhs);
12452  INTRINSIC_WITH_CC(s390_vfeezfs);
12453 
12454  INTRINSIC_WITH_CC(s390_vfenebs);
12455  INTRINSIC_WITH_CC(s390_vfenehs);
12456  INTRINSIC_WITH_CC(s390_vfenefs);
12457 
12458  INTRINSIC_WITH_CC(s390_vfenezbs);
12459  INTRINSIC_WITH_CC(s390_vfenezhs);
12460  INTRINSIC_WITH_CC(s390_vfenezfs);
12461 
12462  INTRINSIC_WITH_CC(s390_vistrbs);
12463  INTRINSIC_WITH_CC(s390_vistrhs);
12464  INTRINSIC_WITH_CC(s390_vistrfs);
12465 
12466  INTRINSIC_WITH_CC(s390_vstrcbs);
12467  INTRINSIC_WITH_CC(s390_vstrchs);
12468  INTRINSIC_WITH_CC(s390_vstrcfs);
12469 
12470  INTRINSIC_WITH_CC(s390_vstrczbs);
12471  INTRINSIC_WITH_CC(s390_vstrczhs);
12472  INTRINSIC_WITH_CC(s390_vstrczfs);
12473 
12474  INTRINSIC_WITH_CC(s390_vfcesbs);
12475  INTRINSIC_WITH_CC(s390_vfcedbs);
12476  INTRINSIC_WITH_CC(s390_vfchsbs);
12477  INTRINSIC_WITH_CC(s390_vfchdbs);
12478  INTRINSIC_WITH_CC(s390_vfchesbs);
12479  INTRINSIC_WITH_CC(s390_vfchedbs);
12480 
12481  INTRINSIC_WITH_CC(s390_vftcisb);
12482  INTRINSIC_WITH_CC(s390_vftcidb);
12483 
12484 #undef INTRINSIC_WITH_CC
12485 
12486  default:
12487  return nullptr;
12488  }
12489 }
12490 
12492  const CallExpr *E) {
12493  auto MakeLdg = [&](unsigned IntrinsicID) {
12494  Value *Ptr = EmitScalarExpr(E->getArg(0));
12495  clang::CharUnits Align =
12496  getNaturalPointeeTypeAlignment(E->getArg(0)->getType());
12497  return Builder.CreateCall(
12498  CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
12499  Ptr->getType()}),
12500  {Ptr, ConstantInt::get(Builder.getInt32Ty(), Align.getQuantity())});
12501  };
12502  auto MakeScopedAtomic = [&](unsigned IntrinsicID) {
12503  Value *Ptr = EmitScalarExpr(E->getArg(0));
12504  return Builder.CreateCall(
12505  CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
12506  Ptr->getType()}),
12507  {Ptr, EmitScalarExpr(E->getArg(1))});
12508  };
12509  switch (BuiltinID) {
12510  case NVPTX::BI__nvvm_atom_add_gen_i:
12511  case NVPTX::BI__nvvm_atom_add_gen_l:
12512  case NVPTX::BI__nvvm_atom_add_gen_ll:
12513  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);
12514 
12515  case NVPTX::BI__nvvm_atom_sub_gen_i:
12516  case NVPTX::BI__nvvm_atom_sub_gen_l:
12517  case NVPTX::BI__nvvm_atom_sub_gen_ll:
12518  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);
12519 
12520  case NVPTX::BI__nvvm_atom_and_gen_i:
12521  case NVPTX::BI__nvvm_atom_and_gen_l:
12522  case NVPTX::BI__nvvm_atom_and_gen_ll:
12524 
12525  case NVPTX::BI__nvvm_atom_or_gen_i:
12526  case NVPTX::BI__nvvm_atom_or_gen_l:
12527  case NVPTX::BI__nvvm_atom_or_gen_ll:
12528  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
12529 
12530  case NVPTX::BI__nvvm_atom_xor_gen_i:
12531  case NVPTX::BI__nvvm_atom_xor_gen_l:
12532  case NVPTX::BI__nvvm_atom_xor_gen_ll:
12533  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
12534 
12535  case NVPTX::BI__nvvm_atom_xchg_gen_i:
12536  case NVPTX::BI__nvvm_atom_xchg_gen_l:
12537  case NVPTX::BI__nvvm_atom_xchg_gen_ll:
12538  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
12539 
12540  case NVPTX::BI__nvvm_atom_max_gen_i:
12541  case NVPTX::BI__nvvm_atom_max_gen_l:
12542  case NVPTX::BI__nvvm_atom_max_gen_ll:
12543  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
12544 
12545  case NVPTX::BI__nvvm_atom_max_gen_ui:
12546  case NVPTX::BI__nvvm_atom_max_gen_ul:
12547  case NVPTX::BI__nvvm_atom_max_gen_ull:
12548  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);
12549 
12550  case NVPTX::BI__nvvm_atom_min_gen_i:
12551  case NVPTX::BI__nvvm_atom_min_gen_l:
12552  case NVPTX::BI__nvvm_atom_min_gen_ll:
12553  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
12554 
12555  case NVPTX::BI__nvvm_atom_min_gen_ui:
12556  case NVPTX::BI__nvvm_atom_min_gen_ul:
12557  case NVPTX::BI__nvvm_atom_min_gen_ull:
12558  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
12559 
12560  case NVPTX::BI__nvvm_atom_cas_gen_i:
12561  case NVPTX::BI__nvvm_atom_cas_gen_l:
12562  case NVPTX::BI__nvvm_atom_cas_gen_ll:
12563  // __nvvm_atom_cas_gen_* should return the old value rather than the
12564  // success flag.
12565  return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false);
12566 
12567  case NVPTX::BI__nvvm_atom_add_gen_f: {
12568  Value *Ptr = EmitScalarExpr(E->getArg(0));
12569  Value *Val = EmitScalarExpr(E->getArg(1));
12570  // atomicrmw only deals with integer arguments so we need to use
12571  // LLVM's nvvm_atomic_load_add_f32 intrinsic for that.
12572  Value *FnALAF32 =
12573  CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f32, Ptr->getType());
12574  return Builder.CreateCall(FnALAF32, {Ptr, Val});
12575  }
12576 
12577  case NVPTX::BI__nvvm_atom_add_gen_d: {
12578  Value *Ptr = EmitScalarExpr(E->getArg(0));
12579  Value *Val = EmitScalarExpr(E->getArg(1));
12580  // atomicrmw only deals with integer arguments, so we need to use
12581  // LLVM's nvvm_atomic_load_add_f64 intrinsic.
12582  Value *FnALAF64 =
12583  CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f64, Ptr->getType());
12584  return Builder.CreateCall(FnALAF64, {Ptr, Val});
12585  }
12586 
12587  case NVPTX::BI__nvvm_atom_inc_gen_ui: {
12588  Value *Ptr = EmitScalarExpr(E->getArg(0));
12589  Value *Val = EmitScalarExpr(E->getArg(1));
12590  Value *FnALI32 =
12591  CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType());
12592  return Builder.CreateCall(FnALI32, {Ptr, Val});
12593  }
12594 
12595  case NVPTX::BI__nvvm_atom_dec_gen_ui: {
12596  Value *Ptr = EmitScalarExpr(E->getArg(0));
12597  Value *Val = EmitScalarExpr(E->getArg(1));
12598  Value *FnALD32 =
12599  CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType());
12600  return Builder.CreateCall(FnALD32, {Ptr, Val});
12601  }
12602 
12603  case NVPTX::BI__nvvm_ldg_c:
12604  case NVPTX::BI__nvvm_ldg_c2:
12605  case NVPTX::BI__nvvm_ldg_c4:
12606  case NVPTX::BI__nvvm_ldg_s:
12607  case NVPTX::BI__nvvm_ldg_s2:
12608  case NVPTX::BI__nvvm_ldg_s4:
12609  case NVPTX::BI__nvvm_ldg_i:
12610  case NVPTX::BI__nvvm_ldg_i2:
12611  case NVPTX::BI__nvvm_ldg_i4:
12612  case NVPTX::BI__nvvm_ldg_l:
12613  case NVPTX::BI__nvvm_ldg_ll:
12614  case NVPTX::BI__nvvm_ldg_ll2:
12615  case NVPTX::BI__nvvm_ldg_uc:
12616  case NVPTX::BI__nvvm_ldg_uc2:
12617  case NVPTX::BI__nvvm_ldg_uc4:
12618  case NVPTX::BI__nvvm_ldg_us:
12619  case NVPTX::BI__nvvm_ldg_us2:
12620  case NVPTX::BI__nvvm_ldg_us4:
12621  case NVPTX::BI__nvvm_ldg_ui:
12622  case NVPTX::BI__nvvm_ldg_ui2:
12623  case NVPTX::BI__nvvm_ldg_ui4:
12624  case NVPTX::BI__nvvm_ldg_ul:
12625  case NVPTX::BI__nvvm_ldg_ull:
12626  case NVPTX::BI__nvvm_ldg_ull2:
12627  // PTX Interoperability section 2.2: "For a vector with an even number of
12628  // elements, its alignment is set to number of elements times the alignment
12629  // of its member: n*alignof(t)."
12630  return MakeLdg(Intrinsic::nvvm_ldg_global_i);
12631  case NVPTX::BI__nvvm_ldg_f:
12632  case NVPTX::BI__nvvm_ldg_f2:
12633  case NVPTX::BI__nvvm_ldg_f4:
12634  case NVPTX::BI__nvvm_ldg_d:
12635  case NVPTX::BI__nvvm_ldg_d2:
12636  return MakeLdg(Intrinsic::nvvm_ldg_global_f);
12637 
12638  case NVPTX::BI__nvvm_atom_cta_add_gen_i:
12639  case NVPTX::BI__nvvm_atom_cta_add_gen_l:
12640  case NVPTX::BI__nvvm_atom_cta_add_gen_ll:
12641  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta);
12642  case NVPTX::BI__nvvm_atom_sys_add_gen_i:
12643  case NVPTX::BI__nvvm_atom_sys_add_gen_l:
12644  case NVPTX::BI__nvvm_atom_sys_add_gen_ll:
12645  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys);
12646  case NVPTX::BI__nvvm_atom_cta_add_gen_f:
12647  case NVPTX::BI__nvvm_atom_cta_add_gen_d:
12648  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta);
12649  case NVPTX::BI__nvvm_atom_sys_add_gen_f:
12650  case NVPTX::BI__nvvm_atom_sys_add_gen_d:
12651  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys);
12652  case NVPTX::BI__nvvm_atom_cta_xchg_gen_i:
12653  case NVPTX::BI__nvvm_atom_cta_xchg_gen_l:
12654  case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll:
12655  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta);
12656  case NVPTX::BI__nvvm_atom_sys_xchg_gen_i:
12657  case NVPTX::BI__nvvm_atom_sys_xchg_gen_l:
12658  case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll:
12659  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys);
12660  case NVPTX::BI__nvvm_atom_cta_max_gen_i:
12661  case NVPTX::BI__nvvm_atom_cta_max_gen_ui:
12662  case NVPTX::BI__nvvm_atom_cta_max_gen_l:
12663  case NVPTX::BI__nvvm_atom_cta_max_gen_ul:
12664  case NVPTX::BI__nvvm_atom_cta_max_gen_ll:
12665  case NVPTX::BI__nvvm_atom_cta_max_gen_ull:
12666  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta);
12667  case NVPTX::BI__nvvm_atom_sys_max_gen_i:
12668  case NVPTX::BI__nvvm_atom_sys_max_gen_ui:
12669  case NVPTX::BI__nvvm_atom_sys_max_gen_l:
12670  case NVPTX::BI__nvvm_atom_sys_max_gen_ul:
12671  case NVPTX::BI__nvvm_atom_sys_max_gen_ll:
12672  case NVPTX::BI__nvvm_atom_sys_max_gen_ull:
12673  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys);
12674  case NVPTX::BI__nvvm_atom_cta_min_gen_i:
12675  case NVPTX::BI__nvvm_atom_cta_min_gen_ui:
12676  case NVPTX::BI__nvvm_atom_cta_min_gen_l:
12677  case NVPTX::BI__nvvm_atom_cta_min_gen_ul:
12678  case NVPTX::BI__nvvm_atom_cta_min_gen_ll:
12679  case NVPTX::BI__nvvm_atom_cta_min_gen_ull:
12680  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta);
12681  case NVPTX::BI__nvvm_atom_sys_min_gen_i:
12682  case NVPTX::BI__nvvm_atom_sys_min_gen_ui:
12683  case NVPTX::BI__nvvm_atom_sys_min_gen_l:
12684  case NVPTX::BI__nvvm_atom_sys_min_gen_ul:
12685  case NVPTX::BI__nvvm_atom_sys_min_gen_ll:
12686  case NVPTX::BI__nvvm_atom_sys_min_gen_ull:
12687  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys);
12688  case NVPTX::BI__nvvm_atom_cta_inc_gen_ui:
12689  return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta);
12690  case NVPTX::BI__nvvm_atom_cta_dec_gen_ui:
12691  return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta);
12692  case NVPTX::BI__nvvm_atom_sys_inc_gen_ui:
12693  return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys);
12694  case NVPTX::BI__nvvm_atom_sys_dec_gen_ui:
12695  return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys);
12696  case NVPTX::BI__nvvm_atom_cta_and_gen_i:
12697  case NVPTX::BI__nvvm_atom_cta_and_gen_l:
12698  case NVPTX::BI__nvvm_atom_cta_and_gen_ll:
12699  return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta);
12700  case NVPTX::BI__nvvm_atom_sys_and_gen_i:
12701  case NVPTX::BI__nvvm_atom_sys_and_gen_l:
12702  case NVPTX::BI__nvvm_atom_sys_and_gen_ll:
12703  return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys);
12704  case NVPTX::BI__nvvm_atom_cta_or_gen_i:
12705  case NVPTX::BI__nvvm_atom_cta_or_gen_l:
12706  case NVPTX::BI__nvvm_atom_cta_or_gen_ll:
12707  return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta);
12708  case NVPTX::BI__nvvm_atom_sys_or_gen_i:
12709  case NVPTX::BI__nvvm_atom_sys_or_gen_l:
12710  case NVPTX::BI__nvvm_atom_sys_or_gen_ll:
12711  return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys);
12712  case NVPTX::BI__nvvm_atom_cta_xor_gen_i:
12713  case NVPTX::BI__nvvm_atom_cta_xor_gen_l:
12714  case NVPTX::BI__nvvm_atom_cta_xor_gen_ll:
12715  return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta);
12716  case NVPTX::BI__nvvm_atom_sys_xor_gen_i:
12717  case NVPTX::BI__nvvm_atom_sys_xor_gen_l:
12718  case NVPTX::BI__nvvm_atom_sys_xor_gen_ll:
12719  return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys);
12720  case NVPTX::BI__nvvm_atom_cta_cas_gen_i:
12721  case NVPTX::BI__nvvm_atom_cta_cas_gen_l:
12722  case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: {
12723  Value *Ptr = EmitScalarExpr(E->getArg(0));
12724  return Builder.CreateCall(
12725  CGM.getIntrinsic(
12726  Intrinsic::nvvm_atomic_cas_gen_i_cta,
12727  {Ptr->getType()->getPointerElementType(), Ptr->getType()}),
12728  {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
12729  }
12730  case NVPTX::BI__nvvm_atom_sys_cas_gen_i:
12731  case NVPTX::BI__nvvm_atom_sys_cas_gen_l:
12732  case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: {
12733  Value *Ptr = EmitScalarExpr(E->getArg(0));
12734  return Builder.CreateCall(
12735  CGM.getIntrinsic(
12736  Intrinsic::nvvm_atomic_cas_gen_i_sys,
12737  {Ptr->getType()->getPointerElementType(), Ptr->getType()}),
12738  {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
12739  }
12740  case NVPTX::BI__nvvm_match_all_sync_i32p:
12741  case NVPTX::BI__nvvm_match_all_sync_i64p: {
12742  Value *Mask = EmitScalarExpr(E->getArg(0));
12743  Value *Val = EmitScalarExpr(E->getArg(1));
12744  Address PredOutPtr = EmitPointerWithAlignment(E->getArg(2));
12745  Value *ResultPair = Builder.CreateCall(
12746  CGM.getIntrinsic(BuiltinID == NVPTX::BI__nvvm_match_all_sync_i32p
12747  ? Intrinsic::nvvm_match_all_sync_i32p
12748  : Intrinsic::nvvm_match_all_sync_i64p),
12749  {Mask, Val});
12750  Value *Pred = Builder.CreateZExt(Builder.CreateExtractValue(ResultPair, 1),
12751  PredOutPtr.getElementType());
12752  Builder.CreateStore(Pred, PredOutPtr);
12753  return Builder.CreateExtractValue(ResultPair, 0);
12754  }
12755  case NVPTX::BI__hmma_m16n16k16_ld_a:
12756  case NVPTX::BI__hmma_m16n16k16_ld_b:
12757  case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
12758  case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
12759  case NVPTX::BI__hmma_m32n8k16_ld_a:
12760  case NVPTX::BI__hmma_m32n8k16_ld_b:
12761  case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
12762  case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
12763  case NVPTX::BI__hmma_m8n32k16_ld_a:
12764  case NVPTX::BI__hmma_m8n32k16_ld_b:
12765  case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
12766  case NVPTX::BI__hmma_m8n32k16_ld_c_f32: {
12767  Address Dst = EmitPointerWithAlignment(E->getArg(0));
12768  Value *Src = EmitScalarExpr(E->getArg(1));
12769  Value *Ldm = EmitScalarExpr(E->getArg(2));
12770  llvm::APSInt isColMajorArg;
12771  if (!E->getArg(3)->isIntegerConstantExpr(isColMajorArg, getContext()))
12772  return nullptr;
12773  bool isColMajor = isColMajorArg.getSExtValue();
12774  unsigned IID;
12775  unsigned NumResults;
12776  switch (BuiltinID) {
12777  case NVPTX::BI__hmma_m16n16k16_ld_a:
12778  IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_col_stride
12779  : Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_row_stride;
12780  NumResults = 8;
12781  break;
12782  case NVPTX::BI__hmma_m16n16k16_ld_b:
12783  IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_col_stride
12784  : Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_row_stride;
12785  NumResults = 8;
12786  break;
12787  case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
12788  IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_col_stride
12789  : Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_row_stride;
12790  NumResults = 4;
12791  break;
12792  case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
12793  IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_col_stride
12794  : Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_row_stride;
12795  NumResults = 8;
12796  break;
12797  case NVPTX::BI__hmma_m32n8k16_ld_a:
12798  IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_col_stride
12799  : Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_row_stride;
12800  NumResults = 8;
12801  break;
12802  case NVPTX::BI__hmma_m32n8k16_ld_b:
12803  IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_col_stride
12804  : Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_row_stride;
12805  NumResults = 8;
12806  break;
12807  case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
12808  IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_col_stride
12809  : Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_row_stride;
12810  NumResults = 4;
12811  break;
12812  case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
12813  IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_col_stride
12814  : Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_row_stride;
12815  NumResults = 8;
12816  break;
12817  case NVPTX::BI__hmma_m8n32k16_ld_a:
12818  IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_col_stride
12819  : Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_row_stride;
12820  NumResults = 8;
12821  break;
12822  case NVPTX::BI__hmma_m8n32k16_ld_b:
12823  IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_col_stride
12824  : Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_row_stride;
12825  NumResults = 8;
12826  break;
12827  case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
12828  IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_col_stride
12829  : Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_row_stride;
12830  NumResults = 4;
12831  break;
12832  case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
12833  IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_col_stride
12834  : Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_row_stride;
12835  NumResults = 8;
12836  break;
12837  default:
12838  llvm_unreachable("Unexpected builtin ID.");
12839  }
12840  Value *Result =
12841  Builder.CreateCall(CGM.getIntrinsic(IID, Src->getType()), {Src, Ldm});
12842 
12843  // Save returned values.
12844  for (unsigned i = 0; i < NumResults; ++i) {
12845  Builder.CreateAlignedStore(
12846  Builder.CreateBitCast(Builder.CreateExtractValue(Result, i),
12847  Dst.getElementType()),
12848  Builder.CreateGEP(Dst.getPointer(), llvm::ConstantInt::get(IntTy, i)),
12850  }
12851  return Result;
12852  }
12853 
12854  case NVPTX::BI__hmma_m16n16k16_st_c_f16:
12855  case NVPTX::BI__hmma_m16n16k16_st_c_f32:
12856  case NVPTX::BI__hmma_m32n8k16_st_c_f16:
12857  case NVPTX::BI__hmma_m32n8k16_st_c_f32:
12858  case NVPTX::BI__hmma_m8n32k16_st_c_f16:
12859  case NVPTX::BI__hmma_m8n32k16_st_c_f32: {
12860  Value *Dst = EmitScalarExpr(E->getArg(0));
12861  Address Src = EmitPointerWithAlignment(E->getArg(1));
12862  Value *Ldm = EmitScalarExpr(E->getArg(2));
12863  llvm::APSInt isColMajorArg;
12864  if (!E->getArg(3)->isIntegerConstantExpr(isColMajorArg, getContext()))
12865  return nullptr;
12866  bool isColMajor = isColMajorArg.getSExtValue();
12867  unsigned IID;
12868  unsigned NumResults = 8;
12869  // PTX Instructions (and LLVM intrinsics) are defined for slice _d_, yet
12870  // for some reason nvcc builtins use _c_.
12871  switch (BuiltinID) {
12872  case NVPTX::BI__hmma_m16n16k16_st_c_f16:
12873  IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_col_stride
12874  : Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_row_stride;
12875  NumResults = 4;
12876  break;
12877  case NVPTX::BI__hmma_m16n16k16_st_c_f32:
12878  IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_col_stride
12879  : Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_row_stride;
12880  break;
12881  case NVPTX::BI__hmma_m32n8k16_st_c_f16:
12882  IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_col_stride
12883  : Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_row_stride;
12884  NumResults = 4;
12885  break;
12886  case NVPTX::BI__hmma_m32n8k16_st_c_f32:
12887  IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_col_stride
12888  : Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_row_stride;
12889  break;
12890  case NVPTX::BI__hmma_m8n32k16_st_c_f16:
12891  IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_col_stride
12892  : Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_row_stride;
12893  NumResults = 4;
12894  break;
12895  case NVPTX::BI__hmma_m8n32k16_st_c_f32:
12896  IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_col_stride
12897  : Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_row_stride;
12898  break;
12899  default:
12900  llvm_unreachable("Unexpected builtin ID.");
12901  }
12902  Function *Intrinsic = CGM.getIntrinsic(IID, Dst->getType());
12903  llvm::Type *ParamType = Intrinsic->getFunctionType()->getParamType(1);
12904  SmallVector<Value *, 10> Values = {Dst};
12905  for (unsigned i = 0; i < NumResults; ++i) {
12906  Value *V = Builder.CreateAlignedLoad(
12907  Builder.CreateGEP(Src.getPointer(), llvm::ConstantInt::get(IntTy, i)),
12909  Values.push_back(Builder.CreateBitCast(V, ParamType));
12910  }
12911  Values.push_back(Ldm);
12912  Value *Result = Builder.CreateCall(Intrinsic, Values);
12913  return Result;
12914  }
12915 
12916  // BI__hmma_m16n16k16_mma_<Dtype><CType>(d, a, b, c, layout, satf) -->
12917  // Intrinsic::nvvm_wmma_m16n16k16_mma_sync<layout A,B><DType><CType><Satf>
12918  case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
12919  case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
12920  case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
12921  case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
12922  case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
12923  case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
12924  case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
12925  case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
12926  case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
12927  case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
12928  case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
12929  case NVPTX::BI__hmma_m8n32k16_mma_f16f32: {
12930  Address Dst = EmitPointerWithAlignment(E->getArg(0));
12931  Address SrcA = EmitPointerWithAlignment(E->getArg(1));
12932  Address SrcB = EmitPointerWithAlignment(E->getArg(2));
12933  Address SrcC = EmitPointerWithAlignment(E->getArg(3));
12934  llvm::APSInt LayoutArg;
12935  if (!E->getArg(4)->isIntegerConstantExpr(LayoutArg, getContext()))
12936  return nullptr;
12937  int Layout = LayoutArg.getSExtValue();
12938  if (Layout < 0 || Layout > 3)
12939  return nullptr;
12940  llvm::APSInt SatfArg;
12941  if (!E->getArg(5)->isIntegerConstantExpr(SatfArg, getContext()))
12942  return nullptr;
12943  bool Satf = SatfArg.getSExtValue();
12944 
12945  // clang-format off
12946 #define MMA_VARIANTS(geom, type) {{ \
12947  Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type, \
12948  Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type##_satfinite, \
12949  Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
12950  Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
12951  Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type, \
12952  Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type##_satfinite, \
12953  Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type, \
12954  Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type##_satfinite \
12955  }}
12956  // clang-format on
12957 
12958  auto getMMAIntrinsic = [Layout, Satf](std::array<unsigned, 8> Variants) {
12959  unsigned Index = Layout * 2 + Satf;
12960  assert(Index < 8);
12961  return Variants[Index];
12962  };
12963  unsigned IID;
12964  unsigned NumEltsC;
12965  unsigned NumEltsD;
12966  switch (BuiltinID) {
12967  case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
12968  IID = getMMAIntrinsic(MMA_VARIANTS(m16n16k16, f16_f16));
12969  NumEltsC = 4;
12970  NumEltsD = 4;
12971  break;
12972  case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
12973  IID = getMMAIntrinsic(MMA_VARIANTS(m16n16k16, f32_f16));
12974  NumEltsC = 4;
12975  NumEltsD = 8;
12976  break;
12977  case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
12978  IID = getMMAIntrinsic(MMA_VARIANTS(m16n16k16, f16_f32));
12979  NumEltsC = 8;
12980  NumEltsD = 4;
12981  break;
12982  case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
12983  IID = getMMAIntrinsic(MMA_VARIANTS(m16n16k16, f32_f32));
12984  NumEltsC = 8;
12985  NumEltsD = 8;
12986  break;
12987  case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
12988  IID = getMMAIntrinsic(MMA_VARIANTS(m32n8k16, f16_f16));
12989  NumEltsC = 4;
12990  NumEltsD = 4;
12991  break;
12992  case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
12993  IID = getMMAIntrinsic(MMA_VARIANTS(m32n8k16, f32_f16));
12994  NumEltsC = 4;
12995  NumEltsD = 8;
12996  break;
12997  case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
12998  IID = getMMAIntrinsic(MMA_VARIANTS(m32n8k16, f16_f32));
12999  NumEltsC = 8;
13000  NumEltsD = 4;
13001  break;
13002  case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
13003  IID = getMMAIntrinsic(MMA_VARIANTS(m32n8k16, f32_f32));
13004  NumEltsC = 8;
13005  NumEltsD = 8;
13006  break;
13007  case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
13008  IID = getMMAIntrinsic(MMA_VARIANTS(m8n32k16, f16_f16));
13009  NumEltsC = 4;
13010  NumEltsD = 4;
13011  break;
13012  case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
13013  IID = getMMAIntrinsic(MMA_VARIANTS(m8n32k16, f32_f16));
13014  NumEltsC = 4;
13015  NumEltsD = 8;
13016  break;
13017  case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
13018  IID = getMMAIntrinsic(MMA_VARIANTS(m8n32k16, f16_f32));
13019  NumEltsC = 8;
13020  NumEltsD = 4;
13021  break;
13022  case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
13023  IID = getMMAIntrinsic(MMA_VARIANTS(m8n32k16, f32_f32));
13024  NumEltsC = 8;
13025  NumEltsD = 8;
13026  break;
13027  default:
13028  llvm_unreachable("Unexpected builtin ID.");
13029  }
13030 #undef MMA_VARIANTS
13031 
13032  SmallVector<Value *, 24> Values;
13033  Function *Intrinsic = CGM.getIntrinsic(IID);
13034  llvm::Type *ABType = Intrinsic->getFunctionType()->getParamType(0);
13035  // Load A
13036  for (unsigned i = 0; i < 8; ++i) {
13037  Value *V = Builder.CreateAlignedLoad(
13038  Builder.CreateGEP(SrcA.getPointer(),
13039  llvm::ConstantInt::get(IntTy, i)),
13041  Values.push_back(Builder.CreateBitCast(V, ABType));
13042  }
13043  // Load B
13044  for (unsigned i = 0; i < 8; ++i) {
13045  Value *V = Builder.CreateAlignedLoad(
13046  Builder.CreateGEP(SrcB.getPointer(),
13047  llvm::ConstantInt::get(IntTy, i)),
13049  Values.push_back(Builder.CreateBitCast(V, ABType));
13050  }
13051  // Load C
13052  llvm::Type *CType = Intrinsic->getFunctionType()->getParamType(16);
13053  for (unsigned i = 0; i < NumEltsC; ++i) {
13054  Value *V = Builder.CreateAlignedLoad(
13055  Builder.CreateGEP(SrcC.getPointer(),
13056  llvm::ConstantInt::get(IntTy, i)),
13058  Values.push_back(Builder.CreateBitCast(V, CType));
13059  }
13060  Value *Result = Builder.CreateCall(Intrinsic, Values);
13061  llvm::Type *DType = Dst.getElementType();
13062  for (unsigned i = 0; i < NumEltsD; ++i)
13063  Builder.CreateAlignedStore(
13064  Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), DType),
13065  Builder.CreateGEP(Dst.getPointer(), llvm::ConstantInt::get(IntTy, i)),
13067  return Result;
13068  }
13069  default:
13070  return nullptr;
13071  }
13072 }
13073 
13075  const CallExpr *E) {
13076  switch (BuiltinID) {
13077  case WebAssembly::BI__builtin_wasm_memory_size: {
13078  llvm::Type *ResultType = ConvertType(E->getType());
13079  Value *I = EmitScalarExpr(E->getArg(0));
13080  Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType);
13081  return Builder.CreateCall(Callee, I);
13082  }
13083  case WebAssembly::BI__builtin_wasm_memory_grow: {
13084  llvm::Type *ResultType = ConvertType(E->getType());
13085  Value *Args[] = {
13086  EmitScalarExpr(E->getArg(0)),
13087  EmitScalarExpr(E->getArg(1))
13088  };
13089  Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_grow, ResultType);
13090  return Builder.CreateCall(Callee, Args);
13091  }
13092  case WebAssembly::BI__builtin_wasm_throw: {
13093  Value *Tag = EmitScalarExpr(E->getArg(0));
13094  Value *Obj = EmitScalarExpr(E->getArg(1));
13095  Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw);
13096  return Builder.CreateCall(Callee, {Tag, Obj});
13097  }
13098  case WebAssembly::BI__builtin_wasm_rethrow: {
13099  Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow);
13100  return Builder.CreateCall(Callee);
13101  }
13102  case WebAssembly::BI__builtin_wasm_atomic_wait_i32: {
13103  Value *Addr = EmitScalarExpr(E->getArg(0));
13104  Value *Expected = EmitScalarExpr(E->getArg(1));
13105  Value *Timeout = EmitScalarExpr(E->getArg(2));
13106  Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_wait_i32);
13107  return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
13108  }
13109  case WebAssembly::BI__builtin_wasm_atomic_wait_i64: {
13110  Value *Addr = EmitScalarExpr(E->getArg(0));
13111  Value *Expected = EmitScalarExpr(E->getArg(1));
13112  Value *Timeout = EmitScalarExpr(E->getArg(2));
13113  Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_wait_i64);
13114  return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
13115  }
13116  case WebAssembly::BI__builtin_wasm_atomic_notify: {
13117  Value *Addr = EmitScalarExpr(E->getArg(0));
13118  Value *Count = EmitScalarExpr(E->getArg(1));
13119  Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_notify);
13120  return Builder.CreateCall(Callee, {Addr, Count});
13121  }
13122  case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f32:
13123  case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f64:
13124  case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f32:
13125  case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f64:
13126  case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4:
13127  case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64x2_f64x2: {
13128  Value *Src = EmitScalarExpr(E->getArg(0));
13129  llvm::Type *ResT = ConvertType(E->getType());
13130  Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_saturate_signed,
13131  {ResT, Src->getType()});
13132  return Builder.CreateCall(Callee, {Src});
13133  }
13134  case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f32:
13135  case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f64:
13136  case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f32:
13137  case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f64:
13138  case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4:
13139  case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64x2_f64x2: {
13140  Value *Src = EmitScalarExpr(E->getArg(0));
13141  llvm::Type *ResT = ConvertType(E->getType());
13142  Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_saturate_unsigned,
13143  {ResT, Src->getType()});
13144  return Builder.CreateCall(Callee, {Src});
13145  }
13146  case WebAssembly::BI__builtin_wasm_min_f32:
13147  case WebAssembly::BI__builtin_wasm_min_f64:
13148  case WebAssembly::BI__builtin_wasm_min_f32x4:
13149  case WebAssembly::BI__builtin_wasm_min_f64x2: {
13150  Value *LHS = EmitScalarExpr(E->getArg(0));
13151  Value *RHS = EmitScalarExpr(E->getArg(1));
13152  Value *Callee = CGM.getIntrinsic(Intrinsic::minimum,
13153  ConvertType(E->getType()));
13154  return Builder.CreateCall(Callee, {LHS, RHS});
13155  }
13156  case WebAssembly::BI__builtin_wasm_max_f32:
13157  case WebAssembly::BI__builtin_wasm_max_f64:
13158  case WebAssembly::BI__builtin_wasm_max_f32x4:
13159  case WebAssembly::BI__builtin_wasm_max_f64x2: {
13160  Value *LHS = EmitScalarExpr(E->getArg(0));
13161  Value *RHS = EmitScalarExpr(E->getArg(1));
13162  Value *Callee = CGM.getIntrinsic(Intrinsic::maximum,
13163  ConvertType(E->getType()));
13164  return Builder.CreateCall(Callee, {LHS, RHS});
13165  }
13166  case WebAssembly::BI__builtin_wasm_extract_lane_s_i8x16:
13167  case WebAssembly::BI__builtin_wasm_extract_lane_u_i8x16:
13168  case WebAssembly::BI__builtin_wasm_extract_lane_s_i16x8:
13169  case WebAssembly::BI__builtin_wasm_extract_lane_u_i16x8:
13170  case WebAssembly::BI__builtin_wasm_extract_lane_i32x4:
13171  case WebAssembly::BI__builtin_wasm_extract_lane_i64x2:
13172  case WebAssembly::BI__builtin_wasm_extract_lane_f32x4:
13173  case WebAssembly::BI__builtin_wasm_extract_lane_f64x2: {
13174  llvm::APSInt LaneConst;
13175  if (!E->getArg(1)->isIntegerConstantExpr(LaneConst, getContext()))
13176  llvm_unreachable("Constant arg isn't actually constant?");
13177  Value *Vec = EmitScalarExpr(E->getArg(0));
13178  Value *Lane = llvm::ConstantInt::get(getLLVMContext(), LaneConst);
13179  Value *Extract = Builder.CreateExtractElement(Vec, Lane);
13180  switch (BuiltinID) {
13181  case WebAssembly::BI__builtin_wasm_extract_lane_s_i8x16:
13182  case WebAssembly::BI__builtin_wasm_extract_lane_s_i16x8:
13183  return Builder.CreateSExt(Extract, ConvertType(E->getType()));
13184  case WebAssembly::BI__builtin_wasm_extract_lane_u_i8x16:
13185  case WebAssembly::BI__builtin_wasm_extract_lane_u_i16x8:
13186  return Builder.CreateZExt(Extract, ConvertType(E->getType()));
13187  case WebAssembly::BI__builtin_wasm_extract_lane_i32x4:
13188  case WebAssembly::BI__builtin_wasm_extract_lane_i64x2:
13189  case WebAssembly::BI__builtin_wasm_extract_lane_f32x4:
13190  case WebAssembly::BI__builtin_wasm_extract_lane_f64x2:
13191  return Extract;
13192  default:
13193  llvm_unreachable("unexpected builtin ID");
13194  }
13195  }
13196  case WebAssembly::BI__builtin_wasm_replace_lane_i8x16:
13197  case WebAssembly::BI__builtin_wasm_replace_lane_i16x8:
13198  case WebAssembly::BI__builtin_wasm_replace_lane_i32x4:
13199  case WebAssembly::BI__builtin_wasm_replace_lane_i64x2:
13200  case WebAssembly::BI__builtin_wasm_replace_lane_f32x4:
13201  case WebAssembly::BI__builtin_wasm_replace_lane_f64x2: {
13202  llvm::APSInt LaneConst;
13203  if (!E->getArg(1)->isIntegerConstantExpr(LaneConst, getContext()))
13204  llvm_unreachable("Constant arg isn't actually constant?");
13205  Value *Vec = EmitScalarExpr(E->getArg(0));
13206  Value *Lane = llvm::ConstantInt::get(getLLVMContext(), LaneConst);
13207  Value *Val = EmitScalarExpr(E->getArg(2));
13208  switch (BuiltinID) {
13209  case WebAssembly::BI__builtin_wasm_replace_lane_i8x16:
13210  case WebAssembly::BI__builtin_wasm_replace_lane_i16x8: {
13211  llvm::Type *ElemType = ConvertType(E->getType())->getVectorElementType();
13212  Value *Trunc = Builder.CreateTrunc(Val, ElemType);
13213  return Builder.CreateInsertElement(Vec, Trunc, Lane);
13214  }
13215  case WebAssembly::BI__builtin_wasm_replace_lane_i32x4:
13216  case WebAssembly::BI__builtin_wasm_replace_lane_i64x2:
13217  case WebAssembly::BI__builtin_wasm_replace_lane_f32x4:
13218  case WebAssembly::BI__builtin_wasm_replace_lane_f64x2:
13219  return Builder.CreateInsertElement(Vec, Val, Lane);
13220  default:
13221  llvm_unreachable("unexpected builtin ID");
13222  }
13223  }
13224  case WebAssembly::BI__builtin_wasm_add_saturate_s_i8x16:
13225  case WebAssembly::BI__builtin_wasm_add_saturate_u_i8x16:
13226  case WebAssembly::BI__builtin_wasm_add_saturate_s_i16x8:
13227  case WebAssembly::BI__builtin_wasm_add_saturate_u_i16x8:
13228  case WebAssembly::BI__builtin_wasm_sub_saturate_s_i8x16:
13229  case WebAssembly::BI__builtin_wasm_sub_saturate_u_i8x16:
13230  case WebAssembly::BI__builtin_wasm_sub_saturate_s_i16x8:
13231  case WebAssembly::BI__builtin_wasm_sub_saturate_u_i16x8: {
13232  unsigned IntNo;
13233  switch (BuiltinID) {
13234  case WebAssembly::BI__builtin_wasm_add_saturate_s_i8x16:
13235  case WebAssembly::BI__builtin_wasm_add_saturate_s_i16x8:
13236  IntNo = Intrinsic::sadd_sat;
13237  break;
13238  case WebAssembly::BI__builtin_wasm_add_saturate_u_i8x16:
13239  case WebAssembly::BI__builtin_wasm_add_saturate_u_i16x8:
13240  IntNo = Intrinsic::uadd_sat;
13241  break;
13242  case WebAssembly::BI__builtin_wasm_sub_saturate_s_i8x16:
13243  case WebAssembly::BI__builtin_wasm_sub_saturate_s_i16x8:
13244  IntNo = Intrinsic::wasm_sub_saturate_signed;
13245  break;
13246  case WebAssembly::BI__builtin_wasm_sub_saturate_u_i8x16:
13247  case WebAssembly::BI__builtin_wasm_sub_saturate_u_i16x8:
13248  IntNo = Intrinsic::wasm_sub_saturate_unsigned;
13249  break;
13250  default:
13251  llvm_unreachable("unexpected builtin ID");
13252  }
13253  Value *LHS = EmitScalarExpr(E->getArg(0));
13254  Value *RHS = EmitScalarExpr(E->getArg(1));
13255  Value *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType()));
13256  return Builder.CreateCall(Callee, {LHS, RHS});
13257  }
13258  case WebAssembly::BI__builtin_wasm_bitselect: {
13259  Value *V1 = EmitScalarExpr(E->getArg(0));
13260  Value *V2 = EmitScalarExpr(E->getArg(1));
13261  Value *C = EmitScalarExpr(E->getArg(2));
13262  Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_bitselect,
13263  ConvertType(E->getType()));
13264  return Builder.CreateCall(Callee, {V1, V2, C});
13265  }
13266  case WebAssembly::BI__builtin_wasm_any_true_i8x16:
13267  case WebAssembly::BI__builtin_wasm_any_true_i16x8:
13268  case WebAssembly::BI__builtin_wasm_any_true_i32x4:
13269  case WebAssembly::BI__builtin_wasm_any_true_i64x2:
13270  case WebAssembly::BI__builtin_wasm_all_true_i8x16:
13271  case WebAssembly::BI__builtin_wasm_all_true_i16x8:
13272  case WebAssembly::BI__builtin_wasm_all_true_i32x4:
13273  case WebAssembly::BI__builtin_wasm_all_true_i64x2: {
13274  unsigned IntNo;
13275  switch (BuiltinID) {
13276  case WebAssembly::BI__builtin_wasm_any_true_i8x16:
13277  case WebAssembly::BI__builtin_wasm_any_true_i16x8:
13278  case WebAssembly::BI__builtin_wasm_any_true_i32x4:
13279  case WebAssembly::BI__builtin_wasm_any_true_i64x2:
13280  IntNo = Intrinsic::wasm_anytrue;
13281  break;
13282  case WebAssembly::BI__builtin_wasm_all_true_i8x16:
13283  case WebAssembly::BI__builtin_wasm_all_true_i16x8:
13284  case WebAssembly::BI__builtin_wasm_all_true_i32x4:
13285  case WebAssembly::BI__builtin_wasm_all_true_i64x2:
13286  IntNo = Intrinsic::wasm_alltrue;
13287  break;
13288  default:
13289  llvm_unreachable("unexpected builtin ID");
13290  }
13291  Value *Vec = EmitScalarExpr(E->getArg(0));
13292  Value *Callee = CGM.getIntrinsic(IntNo, Vec->getType());
13293  return Builder.CreateCall(Callee, {Vec});
13294  }
13295  case WebAssembly::BI__builtin_wasm_abs_f32x4:
13296  case WebAssembly::BI__builtin_wasm_abs_f64x2: {
13297  Value *Vec = EmitScalarExpr(E->getArg(0));
13298  Value *Callee = CGM.getIntrinsic(Intrinsic::fabs, Vec->getType());
13299  return Builder.CreateCall(Callee, {Vec});
13300  }
13301  case WebAssembly::BI__builtin_wasm_sqrt_f32x4:
13302  case WebAssembly::BI__builtin_wasm_sqrt_f64x2: {
13303  Value *Vec = EmitScalarExpr(E->getArg(0));
13304  Value *Callee = CGM.getIntrinsic(Intrinsic::sqrt, Vec->getType());
13305  return Builder.CreateCall(Callee, {Vec});
13306  }
13307 
13308  default:
13309  return nullptr;
13310  }
13311 }
13312 
13314  const CallExpr *E) {
13316  Intrinsic::ID ID = Intrinsic::not_intrinsic;
13317 
13318  auto MakeCircLd = [&](unsigned IntID, bool HasImm) {
13319  // The base pointer is passed by address, so it needs to be loaded.
13320  Address BP = EmitPointerWithAlignment(E->getArg(0));
13321  BP = Address(Builder.CreateBitCast(BP.getPointer(), Int8PtrPtrTy),
13322  BP.getAlignment());
13323  llvm::Value *Base = Builder.CreateLoad(BP);
13324  // Operands are Base, Increment, Modifier, Start.
13325  if (HasImm)
13326  Ops = { Base, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)),
13327  EmitScalarExpr(E->getArg(3)) };
13328  else
13329  Ops = { Base, EmitScalarExpr(E->getArg(1)),
13330  EmitScalarExpr(E->getArg(2)) };
13331 
13332  llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops);
13333  llvm::Value *NewBase = Builder.CreateExtractValue(Result, 1);
13334  llvm::Value *LV = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)),
13335  NewBase->getType()->getPointerTo());
13336  Address Dest = EmitPointerWithAlignment(E->getArg(0));
13337  // The intrinsic generates two results. The new value for the base pointer
13338  // needs to be stored.
13339  Builder.CreateAlignedStore(NewBase, LV, Dest.getAlignment());
13340  return Builder.CreateExtractValue(Result, 0);
13341  };
13342 
13343  auto MakeCircSt = [&](unsigned IntID, bool HasImm) {
13344  // The base pointer is passed by address, so it needs to be loaded.
13345  Address BP = EmitPointerWithAlignment(E->getArg(0));
13346  BP = Address(Builder.CreateBitCast(BP.getPointer(), Int8PtrPtrTy),
13347  BP.getAlignment());
13348  llvm::Value *Base = Builder.CreateLoad(BP);
13349  // Operands are Base, Increment, Modifier, Value, Start.
13350  if (HasImm)
13351  Ops = { Base, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)),
13352  EmitScalarExpr(E->getArg(3)), EmitScalarExpr(E->getArg(4)) };
13353  else
13354  Ops = { Base, EmitScalarExpr(E->getArg(1)),
13355  EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3)) };
13356 
13357  llvm::Value *NewBase = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops);
13358  llvm::Value *LV = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)),
13359  NewBase->getType()->getPointerTo());
13360  Address Dest = EmitPointerWithAlignment(E->getArg(0));
13361  // The intrinsic generates one result, which is the new value for the base
13362  // pointer. It needs to be stored.
13363  return Builder.CreateAlignedStore(NewBase, LV, Dest.getAlignment());
13364  };
13365 
13366  // Handle the conversion of bit-reverse load intrinsics to bit code.
13367  // The intrinsic call after this function only reads from memory and the
13368  // write to memory is dealt by the store instruction.
13369  auto MakeBrevLd = [&](unsigned IntID, llvm::Type *DestTy) {
13370  // The intrinsic generates one result, which is the new value for the base
13371  // pointer. It needs to be returned. The result of the load instruction is
13372  // passed to intrinsic by address, so the value needs to be stored.
13373  llvm::Value *BaseAddress =
13374  Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int8PtrTy);
13375 
13376  // Expressions like &(*pt++) will be incremented per evaluation.
13377  // EmitPointerWithAlignment and EmitScalarExpr evaluates the expression
13378  // per call.
13379  Address DestAddr = EmitPointerWithAlignment(E->getArg(1));
13380  DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), Int8PtrTy),
13381  DestAddr.getAlignment());
13382  llvm::Value *DestAddress = DestAddr.getPointer();
13383 
13384  // Operands are Base, Dest, Modifier.
13385  // The intrinsic format in LLVM IR is defined as
13386  // { ValueType, i8* } (i8*, i32).
13387  Ops = {BaseAddress, EmitScalarExpr(E->getArg(2))};
13388 
13389  llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops);
13390  // The value needs to be stored as the variable is passed by reference.
13391  llvm::Value *DestVal = Builder.CreateExtractValue(Result, 0);
13392 
13393  // The store needs to be truncated to fit the destination type.
13394  // While i32 and i64 are natively supported on Hexagon, i8 and i16 needs
13395  // to be handled with stores of respective destination type.
13396  DestVal = Builder.CreateTrunc(DestVal, DestTy);
13397 
13398  llvm::Value *DestForStore =
13399  Builder.CreateBitCast(DestAddress, DestVal->getType()->getPointerTo());
13400  Builder.CreateAlignedStore(DestVal, DestForStore, DestAddr.getAlignment());
13401  // The updated value of the base pointer is returned.
13402  return Builder.CreateExtractValue(Result, 1);
13403  };
13404 
13405  switch (BuiltinID) {
13406  case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry:
13407  case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry_128B: {
13408  Address Dest = EmitPointerWithAlignment(E->getArg(2));
13409  unsigned Size;
13410  if (BuiltinID == Hexagon::BI__builtin_HEXAGON_V6_vaddcarry) {
13411  Size = 512;
13412  ID = Intrinsic::hexagon_V6_vaddcarry;
13413  } else {
13414  Size = 1024;
13415  ID = Intrinsic::hexagon_V6_vaddcarry_128B;
13416  }
13417  Dest = Builder.CreateBitCast(Dest,
13418  llvm::VectorType::get(Builder.getInt1Ty(), Size)->getPointerTo(0));
13419  LoadInst *QLd = Builder.CreateLoad(Dest);
13420  Ops = { EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), QLd };
13421  llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
13422  llvm::Value *Vprd = Builder.CreateExtractValue(Result, 1);
13423  llvm::Value *Base = Builder.CreateBitCast(EmitScalarExpr(E->getArg(2)),
13424  Vprd->getType()->getPointerTo(0));
13425  Builder.CreateAlignedStore(Vprd, Base, Dest.getAlignment());
13426  return Builder.CreateExtractValue(Result, 0);
13427  }
13428  case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry:
13429  case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry_128B: {
13430  Address Dest = EmitPointerWithAlignment(E->getArg(2));
13431  unsigned Size;
13432  if (BuiltinID == Hexagon::BI__builtin_HEXAGON_V6_vsubcarry) {
13433  Size = 512;
13434  ID = Intrinsic::hexagon_V6_vsubcarry;
13435  } else {
13436  Size = 1024;
13437  ID = Intrinsic::hexagon_V6_vsubcarry_128B;
13438  }
13439  Dest = Builder.CreateBitCast(Dest,
13440  llvm::VectorType::get(Builder.getInt1Ty(), Size)->getPointerTo(0));
13441  LoadInst *QLd = Builder.CreateLoad(Dest);
13442  Ops = { EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), QLd };
13443  llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
13444  llvm::Value *Vprd = Builder.CreateExtractValue(Result, 1);
13445  llvm::Value *Base = Builder.CreateBitCast(EmitScalarExpr(E->getArg(2)),
13446  Vprd->getType()->getPointerTo(0));
13447  Builder.CreateAlignedStore(Vprd, Base, Dest.getAlignment());
13448  return Builder.CreateExtractValue(Result, 0);
13449  }
13450  case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pci:
13451  return MakeCircLd(Intrinsic::hexagon_L2_loadrub_pci, /*HasImm*/true);
13452  case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pci:
13453  return MakeCircLd(Intrinsic::hexagon_L2_loadrb_pci, /*HasImm*/true);
13454  case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pci:
13455  return MakeCircLd(Intrinsic::hexagon_L2_loadruh_pci, /*HasImm*/true);
13456  case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pci:
13457  return MakeCircLd(Intrinsic::hexagon_L2_loadrh_pci, /*HasImm*/true);
13458  case Hexagon::BI__builtin_HEXAGON_L2_loadri_pci:
13459  return MakeCircLd(Intrinsic::hexagon_L2_loadri_pci, /*HasImm*/true);
13460  case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pci:
13461  return MakeCircLd(Intrinsic::hexagon_L2_loadrd_pci, /*HasImm*/true);
13462  case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pcr:
13463  return MakeCircLd(Intrinsic::hexagon_L2_loadrub_pcr, /*HasImm*/false);
13464  case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pcr:
13465  return MakeCircLd(Intrinsic::hexagon_L2_loadrb_pcr, /*HasImm*/false);
13466  case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pcr:
13467  return MakeCircLd(Intrinsic::hexagon_L2_loadruh_pcr, /*HasImm*/false);
13468  case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pcr:
13469  return MakeCircLd(Intrinsic::hexagon_L2_loadrh_pcr, /*HasImm*/false);
13470  case Hexagon::BI__builtin_HEXAGON_L2_loadri_pcr:
13471  return MakeCircLd(Intrinsic::hexagon_L2_loadri_pcr, /*HasImm*/false);
13472  case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pcr:
13473  return MakeCircLd(Intrinsic::hexagon_L2_loadrd_pcr, /*HasImm*/false);
13474  case Hexagon::BI__builtin_HEXAGON_S2_storerb_pci:
13475  return MakeCircSt(Intrinsic::hexagon_S2_storerb_pci, /*HasImm*/true);
13476  case Hexagon::BI__builtin_HEXAGON_S2_storerh_pci:
13477  return MakeCircSt(Intrinsic::hexagon_S2_storerh_pci, /*HasImm*/true);
13478  case Hexagon::BI__builtin_HEXAGON_S2_storerf_pci:
13479  return MakeCircSt(Intrinsic::hexagon_S2_storerf_pci, /*HasImm*/true);
13480  case Hexagon::BI__builtin_HEXAGON_S2_storeri_pci:
13481  return MakeCircSt(Intrinsic::hexagon_S2_storeri_pci, /*HasImm*/true);
13482  case Hexagon::BI__builtin_HEXAGON_S2_storerd_pci:
13483  return MakeCircSt(Intrinsic::hexagon_S2_storerd_pci, /*HasImm*/true);
13484  case Hexagon::BI__builtin_HEXAGON_S2_storerb_pcr:
13485  return MakeCircSt(Intrinsic::hexagon_S2_storerb_pcr, /*HasImm*/false);
13486  case Hexagon::BI__builtin_HEXAGON_S2_storerh_pcr:
13487  return MakeCircSt(Intrinsic::hexagon_S2_storerh_pcr, /*HasImm*/false);
13488  case Hexagon::BI__builtin_HEXAGON_S2_storerf_pcr:
13489  return MakeCircSt(Intrinsic::hexagon_S2_storerf_pcr, /*HasImm*/false);
13490  case Hexagon::BI__builtin_HEXAGON_S2_storeri_pcr:
13491  return MakeCircSt(Intrinsic::hexagon_S2_storeri_pcr, /*HasImm*/false);
13492  case Hexagon::BI__builtin_HEXAGON_S2_storerd_pcr:
13493  return MakeCircSt(Intrinsic::hexagon_S2_storerd_pcr, /*HasImm*/false);
13494  case Hexagon::BI__builtin_brev_ldub:
13495  return MakeBrevLd(Intrinsic::hexagon_L2_loadrub_pbr, Int8Ty);
13496  case Hexagon::BI__builtin_brev_ldb:
13497  return MakeBrevLd(Intrinsic::hexagon_L2_loadrb_pbr, Int8Ty);
13498  case Hexagon::BI__builtin_brev_lduh:
13499  return MakeBrevLd(Intrinsic::hexagon_L2_loadruh_pbr, Int16Ty);
13500  case Hexagon::BI__builtin_brev_ldh:
13501  return MakeBrevLd(Intrinsic::hexagon_L2_loadrh_pbr, Int16Ty);
13502  case Hexagon::BI__builtin_brev_ldw:
13503  return MakeBrevLd(Intrinsic::hexagon_L2_loadri_pbr, Int32Ty);
13504  case Hexagon::BI__builtin_brev_ldd:
13505  return MakeBrevLd(Intrinsic::hexagon_L2_loadrd_pbr, Int64Ty);
13506  default:
13507  break;
13508  } // switch
13509 
13510  return nullptr;
13511 }
ReturnValueSlot - Contains the address where the return value of a function can be stored...
Definition: CGCall.h:361
Defines the clang::ASTContext interface.
static Value * emitFPIntBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID)
Definition: CGBuiltin.cpp:338
Represents a function declaration or definition.
Definition: Decl.h:1738
llvm::IntegerType * IntTy
int
Other implicit parameter.
Definition: Decl.h:1510
llvm::Value * EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty, const llvm::CmpInst::Predicate Fp, const llvm::CmpInst::Predicate Ip, const llvm::Twine &Name="")
Definition: CGBuiltin.cpp:5607
static Value * EmitX86FMAExpr(CodeGenFunction &CGF, ArrayRef< Value *> Ops, unsigned BuiltinID, bool IsAddSub)
Definition: CGBuiltin.cpp:9301
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition: Type.h:2537
CanQualType VoidPtrTy
Definition: ASTContext.h:1044
static Value * EmitX86ExpandLoad(CodeGenFunction &CGF, ArrayRef< Value *> Ops)
Definition: CGBuiltin.cpp:9125
A (possibly-)qualified type.
Definition: Type.h:638
bool isBlockPointerType() const
Definition: Type.h:6304
#define fma(__x, __y, __z)
Definition: tgmath.h:758
bool isArrayType() const
Definition: Type.h:6345
static WidthAndSignedness getIntegerWidthAndSignedness(const clang::ASTContext &context, const clang::QualType Type)
Definition: CGBuiltin.cpp:436
unsigned char getSummaryByte() const
Definition: OSLog.h:140
const CodeGenOptions & getCodeGenOpts() const
Expr * getArg(unsigned Arg)
getArg - Return the specified argument.
Definition: Expr.h:2553
static Value * emitBinaryBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID)
Definition: CGBuiltin.cpp:315
static struct WidthAndSignedness EncompassingIntegerType(ArrayRef< struct WidthAndSignedness > Types)
Definition: CGBuiltin.cpp:448
llvm::Value * EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::LLVMContext & getLLVMContext()
DominatorTree GraphTraits specialization so the DominatorTree can be iterable by generic graph iterat...
Definition: Dominators.h:30
bool isBigEndian() const
Definition: TargetInfo.h:1217
bool EvaluateAsInt(EvalResult &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects) const
EvaluateAsInt - Return true if this is a constant which we can fold and convert to an integer...
const ASTRecordLayout & getASTRecordLayout(const RecordDecl *D) const
Get or compute information about the layout of the specified record (struct/union/class) D...
unsigned getNumArgs() const
getNumArgs - Return the number of actual arguments to this call.
Definition: Expr.h:2540
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee...
Definition: Type.cpp:505
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
Definition: TargetInfo.h:953
#define trunc(__x)
Definition: tgmath.h:1232
static const Builtin::Info BuiltinInfo[]
Definition: Builtins.cpp:21
static Value * EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred, ArrayRef< Value *> Ops)
Definition: CGBuiltin.cpp:9291
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
Definition: CGExpr.cpp:1029
constexpr XRayInstrMask Typed
Definition: XRayInstr.h:41
bool isRecordType() const
Definition: Type.h:6369
static Value * getMaskVecValue(CodeGenFunction &CGF, Value *Mask, unsigned NumElts)
Definition: CGBuiltin.cpp:9080
static Value * EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask, ArrayRef< Value *> Ops)
Definition: CGBuiltin.cpp:9475
StringRef P
llvm::Value * EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, const CallExpr *E)
Definition: CGBuiltin.cpp:848
static Value * EmitToInt(CodeGenFunction &CGF, llvm::Value *V, QualType T, llvm::IntegerType *IntType)
Emit the conversions required to turn the given value into an integer of the given size...
Definition: CGBuiltin.cpp:75
static Value * EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, llvm::Type *ValueType, bool IsRead, StringRef SysReg="")
Definition: CGBuiltin.cpp:5704
The base class of the type hierarchy.
Definition: Type.h:1407
static Value * EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned, ArrayRef< Value *> Ops)
Definition: CGBuiltin.cpp:9446
#define log2(__x)
Definition: tgmath.h:986
#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier)
Definition: CGBuiltin.cpp:4235
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
static CanQualType getOSLogArgType(ASTContext &C, int Size)
Get the argument type for arguments to os_log_helper.
Definition: CGBuiltin.cpp:1047
RValue emitRotate(const CallExpr *E, bool IsRotateRight)
Definition: CGBuiltin.cpp:1458
virtual llvm::Value * getPipeElemAlign(const Expr *PipeArg)
RValue EmitCall(const CGFunctionInfo &CallInfo, const CGCallee &Callee, ReturnValueSlot ReturnValue, const CallArgList &Args, llvm::Instruction **callOrInvoke, SourceLocation Loc)
EmitCall - Generate a call of the given function, expecting the given result type, and using the given argument list which specifies both the LLVM arguments and the types they were derived from.
Definition: CGCall.cpp:3807
llvm::Value * EmitCheckedArgForBuiltin(const Expr *E, BuiltinCheckKind Kind)
Emits an argument for a call to a builtin.
Definition: CGBuiltin.cpp:1026
Objects with "hidden" visibility are not seen by the dynamic linker.
Definition: Visibility.h:37
static bool isSpecialMixedSignMultiply(unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info, WidthAndSignedness ResultInfo)
Determine if a binop is a checked mixed-sign multiply we can specialize.
Definition: CGBuiltin.cpp:1233
RAII object to set/unset CodeGenFunction::IsSanitizerScope.
static Value * EmitX86MaskedLoad(CodeGenFunction &CGF, ArrayRef< Value *> Ops, unsigned Align)
Definition: CGBuiltin.cpp:9113
const T * getAs() const
Member-template getAs<specific type>&#39;.
Definition: Type.h:6748
const void * Store
Store - This opaque type encapsulates an immutable mapping from locations to values.
Definition: StoreRef.h:28
static Value * EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF, unsigned IntrinsicID, const CallExpr *E)
Handle a SystemZ function in which the final argument is a pointer to an int that receives the post-i...
static int64_t clamp(int64_t Value, int64_t Low, int64_t High)
Definition: CGBuiltin.cpp:45
llvm::Value * getPointer() const
Definition: Address.h:38
Represents a parameter to a function.
Definition: Decl.h:1550
static bool HasExtraNeonArgument(unsigned BuiltinID)
Return true if BuiltinID is an overloaded Neon intrinsic with an extra argument that specifies the ve...
Definition: CGBuiltin.cpp:5767
The collection of all-type qualifiers we support.
Definition: Type.h:141
void add(RValue rvalue, QualType type)
Definition: CGCall.h:285
PipeType - OpenCL20.
Definition: Type.h:6002
Expr * IgnoreImpCasts() LLVM_READONLY
IgnoreImpCasts - Skip past any implicit casts which might surround this expression.
Definition: Expr.h:3162
Represents a struct/union/class.
Definition: Decl.h:3593
void __ovld prefetch(const __global char *p, size_t num_elements)
Prefetch num_elements * sizeof(gentype) bytes into the global cache.
const TargetInfo & getTarget() const
static bool TypeRequiresBuiltinLaunder(CodeGenModule &CGM, QualType Ty)
Determine if the specified type requires laundering by checking if it is a dynamic class type or cont...
Definition: CGBuiltin.cpp:1451
One of these records is kept for each identifier that is lexed.
Address getAddress() const
Definition: CGValue.h:327
#define pow(__x, __y)
Definition: tgmath.h:506
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:155
RecordDecl * getDefinition() const
Returns the RecordDecl that actually defines this struct/union/class.
Definition: Decl.h:3774
field_range fields() const
Definition: Decl.h:3784
llvm::Value * BuildVector(ArrayRef< llvm::Value *> Ops)
Definition: CGBuiltin.cpp:9054
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: Stmt.cpp:288
Represents a member of a struct/union/class.
Definition: Decl.h:2579
static Value * EmitX86CompressStore(CodeGenFunction &CGF, ArrayRef< Value *> Ops)
Definition: CGBuiltin.cpp:9142
static Value * EmitX86FunnelShift(CodeGenFunction &CGF, Value *Op0, Value *Op1, Value *Amt, bool IsRight)
Definition: CGBuiltin.cpp:9173
llvm::Value * EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch)
Definition: CGBuiltin.cpp:6766
static Value * EmitScalarFMAExpr(CodeGenFunction &CGF, MutableArrayRef< Value *> Ops, Value *Upper, bool ZeroMask=false, unsigned PTIdx=0, bool NegAcc=false)
Definition: CGBuiltin.cpp:9406
static bool NEONSIMDIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:4908
llvm::Value * EmitHexagonBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
static Value * EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< Value *> &Ops, llvm::Triple::ArchType Arch)
Definition: CGBuiltin.cpp:6609
static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E, Instruction::BinaryOps Op, bool Invert=false)
Utility to insert an atomic instruction based Intrinsic::ID and the expression node, where the return value is the result of the operation.
Definition: CGBuiltin.cpp:158
bool isFloat() const
Definition: APValue.h:235
static llvm::Value * EmitX86BitTestIntrinsic(CodeGenFunction &CGF, BitTest BT, const CallExpr *E, Value *BitBase, Value *BitPos)
Definition: CGBuiltin.cpp:634
IdentifierTable & Idents
Definition: ASTContext.h:566
static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[]
Definition: CGBuiltin.cpp:4521
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
static Value * EmitX86AddSubSatExpr(CodeGenFunction &CGF, ArrayRef< Value *> Ops, bool IsSigned, bool IsAddition)
Definition: CGBuiltin.cpp:9511
static Value * EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc, ArrayRef< Value *> Ops, bool InvertLHS=false)
Definition: CGBuiltin.cpp:9159
static ApplyDebugLocation CreateArtificial(CodeGenFunction &CGF)
Apply TemporaryLocation if it is valid.
Definition: CGDebugInfo.h:715
llvm::Value * EmitCommonNeonBuiltinExpr(unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, const char *NameHint, unsigned Modifier, const CallExpr *E, SmallVectorImpl< llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1, llvm::Triple::ArchType Arch)
Definition: CGBuiltin.cpp:5036
static Value * EmitX86MaskedStore(CodeGenFunction &CGF, ArrayRef< Value *> Ops, unsigned Align)
Definition: CGBuiltin.cpp:9100
unsigned char getNumArgsByte() const
Definition: OSLog.h:149
bool isLibFunction(unsigned ID) const
Return true if this is a builtin for a libc/libm function, with a "__builtin_" prefix (e...
Definition: Builtins.h:134
CharUnits - This is an opaque type for sizes expressed in character units.
Definition: CharUnits.h:38
static Value * EmitAtomicDecrementValue(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent)
Definition: CGBuiltin.cpp:290
static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[]
Definition: CGBuiltin.cpp:4678
APValue Val
Val - This is the value the expression can be folded to.
Definition: Expr.h:573
#define INTRINSIC_WITH_CC(NAME)
CharUnits getAlignment() const
Return the alignment of this pointer.
Definition: Address.h:67
bool isVolatileQualified() const
Determine whether this type is volatile-qualified.
Definition: Type.h:6142
static bool AArch64SISDIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:4911
__DEVICE__ double powi(double __a, int __b)
static Value * EmitX86ScalarSelect(CodeGenFunction &CGF, Value *Mask, Value *Op0, Value *Op1)
Definition: CGBuiltin.cpp:9204
Expr * IgnoreParenCasts() LLVM_READONLY
IgnoreParenCasts - Ignore parentheses and casts.
Definition: Expr.cpp:2595
bool isInt() const
Definition: APValue.h:234
#define sin(__x)
Definition: tgmath.h:302
bool isUnsigned() const
bool computeOSLogBufferLayout(clang::ASTContext &Ctx, const clang::CallExpr *E, OSLogBufferLayout &layout)
Definition: OSLog.cpp:180
static FunctionDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation NLoc, DeclarationName N, QualType T, TypeSourceInfo *TInfo, StorageClass SC, bool isInlineSpecified=false, bool hasWrittenPrototype=true, bool isConstexprSpecified=false)
Definition: Decl.h:1875
static llvm::Value * dumpRecord(CodeGenFunction &CGF, QualType RType, Value *&RecordPtr, CharUnits Align, Value *Func, int Lvl)
Definition: CGBuiltin.cpp:1332
llvm::Constant * CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, llvm::AttributeList ExtraAttrs=llvm::AttributeList(), bool Local=false)
Create a new runtime function with the specified type and name.
bool tryEvaluateObjectSize(uint64_t &Result, ASTContext &Ctx, unsigned Type) const
If the current Expr is a pointer, this will try to statically determine the number of bytes available...
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
bool hasAttr() const
Definition: DeclBase.h:531
llvm::Type * HalfTy
float, double
static CharUnits One()
One - Construct a CharUnits quantity of one.
Definition: CharUnits.h:58
std::pair< llvm::Value *, llvm::Value * > ComplexPairTy
CXXRecordDecl * getAsCXXRecordDecl() const
Retrieves the CXXRecordDecl that this type refers to, either because the type is a RecordType or beca...
Definition: Type.cpp:1613
Represents a prototype with parameter type info, e.g.
Definition: Type.h:3687
llvm::Value * EmitISOVolatileStore(const CallExpr *E)
Definition: CGBuiltin.cpp:5814
#define NEONMAP0(NameBase)
Definition: CGBuiltin.cpp:4232
RValue - This trivial value class is used to represent the result of an expression that is evaluated...
Definition: CGValue.h:39
RValue EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
Definition: CGBuiltin.cpp:1473
const char * getName(unsigned ID) const
Return the identifier name for the specified builtin, e.g.
Definition: Builtins.h:86
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Definition: CharUnits.h:179
llvm::APSInt EvaluateKnownConstInt(const ASTContext &Ctx, SmallVectorImpl< PartialDiagnosticAt > *Diag=nullptr) const
EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded integer.
static Value * EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC, bool Signed, ArrayRef< Value *> Ops)
Definition: CGBuiltin.cpp:9242
unsigned Offset
Definition: Format.cpp:1631
ASTRecordLayout - This class contains layout information for one RecordDecl, which is a struct/union/...
Definition: RecordLayout.h:39
bool HasSideEffects(const ASTContext &Ctx, bool IncludePossibleEffects=true) const
HasSideEffects - This routine returns true for all those expressions which have any effect other than...
Definition: Expr.cpp:3101
llvm::Value * EmitVAStartEnd(llvm::Value *ArgValue, bool IsStart)
Emits a call to an LLVM variable-argument intrinsic, either llvm.va_start or llvm.va_end.
Definition: CGBuiltin.cpp:472
Exposes information about the current target.
Definition: TargetInfo.h:54
#define copysign(__x, __y)
Definition: tgmath.h:634
This represents one expression.
Definition: Expr.h:106
SourceLocation End
static Address invalid()
Definition: Address.h:35
static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD, const CallExpr *E, llvm::Constant *calleeValue)
Definition: CGBuiltin.cpp:387
const T * castAs() const
Member-template castAs<specific type>.
Definition: Type.h:6811
static CGCallee forDirect(llvm::Constant *functionPtr, const CGCalleeInfo &abstractInfo=CGCalleeInfo())
Definition: CGCall.h:134
Expr * getCallee()
Definition: Expr.h:2514
static uint64_t GetX86CpuSupportsMask(ArrayRef< StringRef > FeatureStrs)
Definition: CGBuiltin.cpp:9579
#define INTRINSIC_X86_XSAVE_ID(NAME)
static Value * EmitX86Select(CodeGenFunction &CGF, Value *Mask, Value *Op0, Value *Op1)
Definition: CGBuiltin.cpp:9191
llvm::Value * EmitToMemory(llvm::Value *Value, QualType Ty)
EmitToMemory - Change a scalar value from its value representation to its in-memory representation...
Definition: CGExpr.cpp:1677
static Value * emitUnaryBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID)
Definition: CGBuiltin.cpp:305
SourceLocation Begin
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition: CharUnits.h:63
FunctionDecl * getDirectCallee()
If the callee is a FunctionDecl, return it. Otherwise return null.
Definition: Expr.h:2532
static Value * EmitFromInt(CodeGenFunction &CGF, llvm::Value *V, QualType T, llvm::Type *ResultType)
Definition: CGBuiltin.cpp:86
static SVal getValue(SVal val, SValBuilder &svalBuilder)
int Depth
Definition: ASTDiff.cpp:191
llvm::LLVMContext & getLLVMContext()
bool isSignedIntegerType() const
Return true if this is an integer type that is signed, according to C99 6.2.5p4 [char, signed char, short, int, long..], or an enum decl which has a signed representation.
Definition: Type.cpp:1844
static Value * emitRangedBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID, int low, int high)
Definition: CGBuiltin.cpp:417
QualType getType() const
Definition: Expr.h:128
static const NeonIntrinsicInfo ARMSIMDIntrinsicMap[]
Definition: CGBuiltin.cpp:4244
LValue MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T)
static bool areBOSTypesCompatible(int From, int To)
Checks if using the result of __builtin_object_size(p, From) in place of __builtin_object_size(p, To) is correct.
Definition: CGBuiltin.cpp:484
QualType getFunctionType(QualType ResultTy, ArrayRef< QualType > Args, const FunctionProtoType::ExtProtoInfo &EPI) const
Return a normal function type with a typed argument list.
Definition: ASTContext.h:1380
RValue emitBuiltinOSLogFormat(const CallExpr &E)
Emit IR for __builtin_os_log_format.
Definition: CGBuiltin.cpp:1160
static const NeonIntrinsicInfo * findNeonIntrinsicInMap(ArrayRef< NeonIntrinsicInfo > IntrinsicMap, unsigned BuiltinID, bool &MapProvenSorted)
Definition: CGBuiltin.cpp:4915
ASTContext & getContext() const
static Value * EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:141
virtual llvm::Value * getPipeElemSize(const Expr *PipeArg)
llvm::Value * EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition: CGBuiltin.cpp:9660
GlobalDecl - represents a global declaration.
Definition: GlobalDecl.h:35
static Value * EmitAtomicIncrementValue(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent)
Definition: CGBuiltin.cpp:277
llvm::Value * EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
The l-value was considered opaque, so the alignment was determined from a type.
llvm::Value * EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
static llvm::Value * EmitOverflowIntrinsic(CodeGenFunction &CGF, const llvm::Intrinsic::ID IntrinsicID, llvm::Value *X, llvm::Value *Y, llvm::Value *&Carry)
Emit a call to llvm.
Definition: CGBuiltin.cpp:402
uint64_t getFieldOffset(unsigned FieldNo) const
getFieldOffset - Get the offset of the given field index, in bits.
Definition: RecordLayout.h:191
EltType getEltType() const
#define log(__x)
Definition: tgmath.h:476
bool EvaluateAsRValue(EvalResult &Result, const ASTContext &Ctx, bool InConstantContext=false) const
EvaluateAsRValue - Return true if this is a constant which we can fold to an rvalue using any crazy t...
Enumerates target-specific builtins in their own namespaces within namespace clang.
Address CreateBitCast(Address Addr, llvm::Type *Ty, const llvm::Twine &Name="")
Definition: CGBuilder.h:142
MSVCIntrin
Definition: CGBuiltin.cpp:810
Kind
FunctionDecl * getAsFunction() LLVM_READONLY
Returns the function itself, or the templated function if this is a function template.
Definition: DeclBase.cpp:218
Encodes a location in the source.
static RValue getIgnored()
Definition: CGValue.h:81
IdentifierInfo & get(StringRef Name)
Return the identifier token info for the specified named identifier.
LangAS getAddressSpace() const
Return the address space of this type.
Definition: Type.h:6188
llvm::Value * EmitFromMemory(llvm::Value *Value, QualType Ty)
EmitFromMemory - Change a scalar value from its memory representation to its value representation...
Definition: CGExpr.cpp:1691
static Value * EmitX86MaskedCompareResult(CodeGenFunction &CGF, Value *Cmp, unsigned NumElts, Value *MaskIn)
Definition: CGBuiltin.cpp:9219
static llvm::VectorType * GetFloatNeonType(CodeGenFunction *CGF, NeonTypeFlags IntTypeFlags)
Definition: CGBuiltin.cpp:4120
llvm::CallSite EmitRuntimeCallOrInvoke(llvm::Value *callee, ArrayRef< llvm::Value *> args, const Twine &name="")
Emits a call or invoke instruction to the given runtime function.
Definition: CGCall.cpp:3766
#define rint(__x)
Definition: tgmath.h:1147
ASTContext & getASTContext() const LLVM_READONLY
Definition: DeclBase.cpp:376
static Value * EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:127
const Decl * getDecl() const
Definition: GlobalDecl.h:69
APFloat & getFloat()
Definition: APValue.h:260
QualType getBaseElementType(const ArrayType *VAT) const
Return the innermost element type of an array type.
static Value * MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E, bool ReturnBool)
Utility to insert an atomic cmpxchg instruction.
Definition: CGBuiltin.cpp:207
#define round(__x)
Definition: tgmath.h:1164
#define exp2(__x)
Definition: tgmath.h:686
static llvm::AtomicOrdering getBitTestAtomicOrdering(BitTest::InterlockingKind I)
Definition: CGBuiltin.cpp:666
OpenMPLinearClauseKind Modifier
Modifier of &#39;linear&#39; clause.
Definition: OpenMPClause.h:102
#define MMA_VARIANTS(geom, type)
const ParmVarDecl * getParamDecl(unsigned i) const
Definition: Decl.h:2285
#define cos(__x)
Definition: tgmath.h:273
constexpr XRayInstrMask Custom
Definition: XRayInstr.h:40
const ArrayType * getAsArrayType(QualType T) const
Type Query functions.
Represents a canonical, potentially-qualified type.
Definition: CanonicalType.h:66
CanQualType VoidTy
Definition: ASTContext.h:1016
llvm::Constant * emitAbstract(const Expr *E, QualType T)
Emit the result of the given expression as an abstract constant, asserting that it succeeded...
llvm::Function * generateBuiltinOSLogHelperFunction(const analyze_os_log::OSLogBufferLayout &Layout, CharUnits BufferAlignment)
Definition: CGBuiltin.cpp:1052
arg_range arguments()
Definition: Expr.h:2585
An aligned address.
Definition: Address.h:25
static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind, const CallExpr *E)
MSVC handles setjmp a bit differently on different platforms.
Definition: CGBuiltin.cpp:770
All available information about a concrete callee.
Definition: CGCall.h:67
bool operator<(DeclarationName LHS, DeclarationName RHS)
Ordering on two declaration names.
static Value * EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, llvm::Type *DstTy)
Definition: CGBuiltin.cpp:9503
static Value * EmitAtomicCmpXchgForMSIntrin(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering SuccessOrdering=AtomicOrdering::SequentiallyConsistent)
This function should be invoked to emit atomic cmpxchg for Microsoft&#39;s _InterlockedCompareExchange* i...
Definition: CGBuiltin.cpp:251
char __ovld __cnfn rotate(char v, char i)
For each element in v, the bits are shifted left by the number of bits given by the corresponding ele...
#define exp(__x)
Definition: tgmath.h:447
bool hasSameUnqualifiedType(QualType T1, QualType T2) const
Determine whether the given types are equivalent after cvr-qualifiers have been removed.
Definition: ASTContext.h:2293
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition: Expr.cpp:215
static Value * EmitFAbs(CodeGenFunction &CGF, Value *V)
EmitFAbs - Emit a call to .fabs().
Definition: CGBuiltin.cpp:349
Like Angled, but marks system directories.
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type, returning the result.
FunctionArgList - Type for representing both the decl and type of parameters to a function...
Definition: CGCall.h:356
ast_type_traits::DynTypedNode Node
#define log10(__x)
Definition: tgmath.h:952
CGFunctionInfo - Class to encapsulate the information about a function definition.
llvm::Value * EmitNeonRShiftImm(llvm::Value *Vec, llvm::Value *Amt, llvm::Type *Ty, bool usgn, const char *name)
Definition: CGBuiltin.cpp:4162
This class organizes the cross-function state that is used while generating LLVM code.
static bool TypeRequiresBuiltinLaunderImp(const ASTContext &Ctx, QualType Ty, llvm::SmallPtrSetImpl< const Decl *> &Seen)
Definition: CGBuiltin.cpp:1423
Dataflow Directional Tag Classes.
#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier)
Definition: CGBuiltin.cpp:4239
bool hasSideEffects() const
Definition: Expr.h:565
static RValue getComplex(llvm::Value *V1, llvm::Value *V2)
Definition: CGValue.h:93
EvalResult is a struct with detailed info about an evaluated expression.
Definition: Expr.h:571
MSVCSetJmpKind
Definition: CGBuiltin.cpp:760
Address CreateStructGEP(Address Addr, unsigned Index, CharUnits Offset, const llvm::Twine &Name="")
Definition: CGBuilder.h:172
llvm::Value * EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitNeonCall(llvm::Function *F, SmallVectorImpl< llvm::Value *> &O, const char *name, unsigned shift=0, bool rightshift=false)
Definition: CGBuiltin.cpp:4141
void EmitARCIntrinsicUse(ArrayRef< llvm::Value *> values)
Given a number of pointers, inform the optimizer that they&#39;re being intrinsically used up until this ...
Definition: CGObjC.cpp:1893
#define ceil(__x)
Definition: tgmath.h:617
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
Definition: CGBuilder.h:70
static RValue EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info, const clang::Expr *Op2, WidthAndSignedness Op2Info, const clang::Expr *ResultArg, QualType ResultQTy, WidthAndSignedness ResultInfo)
Emit a checked mixed-sign multiply.
Definition: CGBuiltin.cpp:1245
static std::string getAsString(SplitQualType split, const PrintingPolicy &Policy)
Definition: Type.h:971
bool isBooleanType() const
Definition: Type.h:6657
static Value * EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch)
Definition: CGBuiltin.cpp:4035
llvm::Function * getIntrinsic(unsigned IID, ArrayRef< llvm::Type *> Tys=None)
bool isIntegerConstantExpr(llvm::APSInt &Result, const ASTContext &Ctx, SourceLocation *Loc=nullptr, bool isEvaluated=true) const
isIntegerConstantExpr - Return true if this expression is a valid integer constant expression...
#define floor(__x)
Definition: tgmath.h:738
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
Definition: CGBuilder.h:108
Flags to identify the types for overloaded Neon builtins.
QualType getCallReturnType(const ASTContext &Ctx) const
getCallReturnType - Get the return type of the call expr.
Definition: Expr.cpp:1396
Expr * IgnoreParenImpCasts() LLVM_READONLY
IgnoreParenImpCasts - Ignore parentheses and implicit casts.
Definition: Expr.cpp:2693
SmallVector< OSLogBufferItem, 4 > Items
Definition: OSLog.h:114
llvm::Value * EmitNeonSplat(llvm::Value *V, llvm::Constant *Idx)
Definition: CGBuiltin.cpp:4135
#define X86_VENDOR(ENUM, STRING)
A helper class that allows the use of isa/cast/dyncast to detect TagType objects of structs/unions/cl...
Definition: Type.h:4370
llvm::Value * vectorWrapScalar16(llvm::Value *Op)
Definition: CGBuiltin.cpp:6757
static llvm::Value * getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType)
Definition: CGBuiltin.cpp:492
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
Definition: Type.h:6578
T * getAttr() const
Definition: DeclBase.h:527
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition: Address.h:52
llvm::Value * EmitNeonShiftVector(llvm::Value *V, llvm::Type *Ty, bool negateForRightShift)
Definition: CGBuiltin.cpp:4155
bool isFunctionType() const
Definition: Type.h:6292
#define nearbyint(__x)
Definition: tgmath.h:1054
static Value * packTBLDVectorList(CodeGenFunction &CGF, ArrayRef< Value *> Ops, Value *ExtOp, Value *IndexOp, llvm::Type *ResTy, unsigned IntID, const char *Name)
Definition: CGBuiltin.cpp:5628
static llvm::Value * EmitBitTestIntrinsic(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E)
Emit a _bittest* intrinsic.
Definition: CGBuiltin.cpp:681
void setNontemporal(bool Value)
Definition: CGValue.h:292
llvm::Value * EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch)
Definition: CGBuiltin.cpp:5829
uint64_t getTypeSize(QualType T) const
Return the size of the specified (complete) type T, in bits.
Definition: ASTContext.h:2070
CanQualType getCanonicalType(QualType T) const
Return the canonical (structural) type corresponding to the specified potentially non-canonical type ...
Definition: ASTContext.h:2253
X
Add a minimal nested name specifier fixit hint to allow lookup of a tag name from an outer enclosing ...
Definition: SemaDecl.cpp:13954
CharUnits toCharUnitsFromBits(int64_t BitSize) const
Convert a size in bits to a size in characters.
static Value * EmitX86Abs(CodeGenFunction &CGF, ArrayRef< Value *> Ops)
Definition: CGBuiltin.cpp:9281
TranslationUnitDecl * getTranslationUnitDecl() const
Definition: ASTContext.h:1009
bool isVoidType() const
Definition: Type.h:6544
TypeInfo getTypeInfo(const Type *T) const
Get the size and alignment of the specified complete type in bits.
llvm::Function * LookupNeonLLVMIntrinsic(unsigned IntrinsicID, unsigned Modifier, llvm::Type *ArgTy, const CallExpr *E)
Definition: CGBuiltin.cpp:4934
llvm::Type * ConvertType(QualType T)
BuiltinCheckKind
Specifies which type of sanitizer check to apply when handling a particular builtin.
static Value * EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, const NeonIntrinsicInfo &SISDInfo, SmallVectorImpl< Value *> &Ops, const CallExpr *E)
Definition: CGBuiltin.cpp:4973
#define sqrt(__x)
Definition: tgmath.h:536
Builtin::Context & BuiltinInfo
Definition: ASTContext.h:568
static char bitActionToX86BTCode(BitTest::ActionKind A)
Definition: CGBuiltin.cpp:624
llvm::Constant * getBuiltinLibFunction(const FunctionDecl *FD, unsigned BuiltinID)
Given a builtin id for a function like "__builtin_fabsf", return a Function* for "fabsf".
Definition: CGBuiltin.cpp:51
#define fabs(__x)
Definition: tgmath.h:565
Defines the clang::TargetInfo interface.
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition: Expr.h:2396
static Value * emitTernaryBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID)
Definition: CGBuiltin.cpp:326
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition: Decl.h:276
uint64_t Width
Definition: ASTContext.h:144
__DEVICE__ int max(int __a, int __b)
static Value * EmitSignBit(CodeGenFunction &CGF, Value *V)
Emit the computation of the sign bit for a floating point value.
Definition: CGBuiltin.cpp:358
bool isReadOnly() const
Definition: Type.h:6035
static RValue get(llvm::Value *V)
Definition: CGValue.h:86
bool isUnion() const
Definition: Decl.h:3252
bool isPointerType() const
Definition: Type.h:6296
__DEVICE__ int min(int __a, int __b)
static ApplyDebugLocation CreateEmpty(CodeGenFunction &CGF)
Set the IRBuilder to not attach debug locations.
Definition: CGDebugInfo.h:732
QualType getType() const
Definition: Decl.h:648
LValue - This represents an lvalue references.
Definition: CGValue.h:167
Information for lazily generating a cleanup.
Definition: EHScopeStack.h:147
APSInt & getInt()
Definition: APValue.h:252
static Value * EmitX86ConvertToMask(CodeGenFunction &CGF, Value *In)
Definition: CGBuiltin.cpp:9276
static Value * MakeBinaryAtomicValue(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E, AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent)
Utility to insert an atomic instruction based on Intrinsic::ID and the expression node...
Definition: CGBuiltin.cpp:99
CallArgList - Type for representing both the value and type of arguments in a call.
Definition: CGCall.h:260
static RValue EmitBinaryAtomic(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E)
Definition: CGBuiltin.cpp:149
#define X86_FEATURE_COMPAT(VAL, ENUM, STR)
llvm::Value * EmitTargetBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
EmitTargetBuiltinExpr - Emit the given builtin call.
Definition: CGBuiltin.cpp:4072
static llvm::VectorType * GetNeonType(CodeGenFunction *CGF, NeonTypeFlags TypeFlags, bool HasLegalHalfType=true, bool V1Ty=false)
Definition: CGBuiltin.cpp:4085
static bool AArch64SIMDIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:4910
QualType getIntTypeForBitwidth(unsigned DestWidth, unsigned Signed) const
getIntTypeForBitwidth - sets integer QualTy according to specified details: bitwidth, signed/unsigned.
llvm::Value * EmitISOVolatileLoad(const CallExpr *E)
Definition: CGBuiltin.cpp:5801
llvm::Value * EmitSystemZBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
static OMPLinearClause * Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc, OpenMPLinearClauseKind Modifier, SourceLocation ModifierLoc, SourceLocation ColonLoc, SourceLocation EndLoc, ArrayRef< Expr *> VL, ArrayRef< Expr *> PL, ArrayRef< Expr *> IL, Expr *Step, Expr *CalcStep, Stmt *PreInit, Expr *PostUpdate)
Creates clause with a list of variables VL and a linear step Step.
CanQualType getSizeType() const
Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.