40 #ifndef __CLANG_CUDA_RUNTIME_WRAPPER_H__ 41 #define __CLANG_CUDA_RUNTIME_WRAPPER_H__ 43 #if defined(__CUDA__) && defined(__clang__) 56 #pragma push_macro("__THROW") 57 #pragma push_macro("__CUDA_ARCH__") 63 #if !defined(CUDA_VERSION) 64 #error "cuda.h did not define CUDA_VERSION" 65 #elif CUDA_VERSION < 7000 || CUDA_VERSION > 10000 66 #error "Unsupported CUDA version!" 69 #pragma push_macro("__CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__") 70 #if CUDA_VERSION >= 10000 71 #define __CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__ 77 #define __CUDA_ARCH__ 350 84 #define __DEVICE_LAUNCH_PARAMETERS_H__ 89 #define __DEVICE_FUNCTIONS_H__ 90 #define __MATH_FUNCTIONS_H__ 91 #define __COMMON_FUNCTIONS_H__ 94 #define __DEVICE_FUNCTIONS_DECLS_H__ 97 #if CUDA_VERSION < 9000 100 #define __CUDA_LIBDEVICE__ 104 #include "driver_types.h" 105 #include "host_config.h" 106 #include "host_defines.h" 111 #pragma push_macro("nv_weak") 114 #undef __CUDA_LIBDEVICE__ 116 #include "cuda_runtime.h" 118 #pragma pop_macro("nv_weak") 124 #define __nvvm_memcpy(s, d, n, a) __builtin_memcpy(s, d, n) 125 #define __nvvm_memset(d, c, n, a) __builtin_memset(d, c, n) 127 #if CUDA_VERSION < 9000 128 #include "crt/device_runtime.h" 130 #include "crt/host_runtime.h" 134 #undef __cxa_vec_ctor 135 #undef __cxa_vec_cctor 136 #undef __cxa_vec_dtor 138 #undef __cxa_vec_new2 139 #undef __cxa_vec_new3 140 #undef __cxa_vec_delete2 141 #undef __cxa_vec_delete 142 #undef __cxa_vec_delete3 143 #undef __cxa_pure_virtual 161 #if CUDA_VERSION >= 9000 176 #if defined(CU_DEVICE_INVALID) 177 #if !defined(__USE_FAST_MATH__) 178 #define __USE_FAST_MATH__ 0 181 #if !defined(__CUDA_PREC_DIV) 182 #define __CUDA_PREC_DIV 0 188 #pragma push_macro("__host__") 189 #define __host__ UNEXPECTED_HOST_ATTRIBUTE 195 #pragma push_macro("__forceinline__") 196 #define __forceinline__ __device__ __inline__ __attribute__((always_inline)) 197 #if CUDA_VERSION < 9000 198 #include "device_functions.hpp" 207 #pragma push_macro("__USE_FAST_MATH__") 208 #if defined(__CLANG_CUDA_APPROX_TRANSCENDENTALS__) 209 #define __USE_FAST_MATH__ 1 212 #if CUDA_VERSION >= 9000 215 #if CUDA_VERSION >= 9020 218 #include "crt/math_functions.hpp" 220 #include "math_functions.hpp" 223 #pragma pop_macro("__USE_FAST_MATH__") 225 #if CUDA_VERSION < 9000 226 #include "math_functions_dbl_ptx3.hpp" 228 #pragma pop_macro("__forceinline__") 232 #undef __MATH_FUNCTIONS_HPP__ 234 #if CUDA_VERSION < 9000 235 #include "math_functions.hpp" 252 #if CUDA_VERSION < 9000 266 #if CUDA_VERSION >= 9000 269 #include "device_atomic_functions.h" 271 #undef __DEVICE_FUNCTIONS_HPP__ 272 #include "device_atomic_functions.hpp" 273 #if CUDA_VERSION >= 9000 274 #include "crt/device_functions.hpp" 275 #include "crt/device_double_functions.hpp" 277 #include "device_functions.hpp" 279 #include "device_double_functions.h" 282 #include "sm_20_atomic_functions.hpp" 283 #include "sm_20_intrinsics.hpp" 284 #include "sm_32_atomic_functions.hpp" 294 #if CUDA_VERSION >= 8000 295 #pragma push_macro("__CUDA_ARCH__") 297 #include "sm_60_atomic_functions.hpp" 298 #include "sm_61_intrinsics.hpp" 299 #pragma pop_macro("__CUDA_ARCH__") 302 #undef __MATH_FUNCTIONS_HPP__ 308 #pragma push_macro("signbit") 309 #pragma push_macro("__GNUC__") 311 #define signbit __ignored_cuda_signbit 316 #pragma push_macro("_GLIBCXX_MATH_H") 317 #pragma push_macro("_LIBCPP_VERSION") 318 #if CUDA_VERSION >= 9000 319 #undef _GLIBCXX_MATH_H 321 #ifdef _LIBCPP_VERSION 322 #define _LIBCPP_VERSION 3700 326 #if CUDA_VERSION >= 9000 327 #include "crt/math_functions.hpp" 329 #include "math_functions.hpp" 331 #pragma pop_macro("_GLIBCXX_MATH_H") 332 #pragma pop_macro("_LIBCPP_VERSION") 333 #pragma pop_macro("__GNUC__") 334 #pragma pop_macro("signbit") 336 #pragma pop_macro("__host__") 338 #include "texture_indirect_functions.h" 341 #pragma pop_macro("__CUDA_ARCH__") 342 #pragma pop_macro("__THROW") 354 __device__
int vprintf(
const char *,
const char *);
355 __device__
void free(
void *) __attribute((nothrow));
356 __device__
void *malloc(
size_t) __attribute((nothrow))
__attribute__((malloc));
357 __device__
void __assertfail(
const char *__message,
const char *__file,
358 unsigned __line,
const char *__function,
363 __device__
static inline void __assert_fail(
const char *__message,
364 const char *__file,
unsigned __line,
365 const char *__function) {
366 __assertfail(__message, __file, __line, __function,
sizeof(
char));
371 __device__
int printf(
const char *, ...);
376 __device__
static inline void free(
void *__ptr) { ::free(__ptr); }
377 __device__
static inline void *malloc(
size_t __size) {
378 return ::malloc(__size);
385 __device__
inline __cuda_builtin_threadIdx_t::operator uint3()
const {
393 __device__
inline __cuda_builtin_blockIdx_t::operator uint3()
const {
401 __device__
inline __cuda_builtin_blockDim_t::operator dim3()
const {
402 return dim3(x, y, z);
405 __device__
inline __cuda_builtin_gridDim_t::operator dim3()
const {
406 return dim3(x, y, z);
419 #pragma push_macro("dim3") 420 #pragma push_macro("uint3") 421 #define dim3 __cuda_builtin_blockDim_t 422 #define uint3 __cuda_builtin_threadIdx_t 423 #include "curand_mtgp32_kernel.h" 424 #pragma pop_macro("dim3") 425 #pragma pop_macro("uint3") 426 #pragma pop_macro("__USE_FAST_MATH__") 427 #pragma pop_macro("__CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__") 430 #endif // __CLANG_CUDA_RUNTIME_WRAPPER_H__ __DEVICE__ bool signbit(float __x)
Test for sign bit.
vector signed char unaligned_vec_schar __attribute__((aligned(1)))
Zeroes the upper 128 bits (bits 255:128) of all YMM registers.
__DEVICE__ float normcdff(float __a)
__DEVICE__ void sincospif(float __a, float *__sptr, float *__cptr)
__DEVICE__ double cospi(double __a)
__DEVICE__ double sinpi(double __a)
__DEVICE__ double normcdfinv(double __a)
__DEVICE__ float rsqrtf(float __a)
__DEVICE__ double rsqrt(double __a)
static __inline__ void int __a
__DEVICE__ float cospif(float __a)
__DEVICE__ float erfcinvf(float __a)
__DEVICE__ float rcbrtf(float __a)
__DEVICE__ void sincospi(double __a, double *__sptr, double *__cptr)
__DEVICE__ double erfcinv(double __a)
static __inline__ vector float vector float __b
__DEVICE__ void __brkpt()
__DEVICE__ int __signbitd(double __a)
int printf(__constant const char *st,...) __attribute__((format(printf
__DEVICE__ double erfcx(double __a)
__DEVICE__ float sinpif(float __a)
__DEVICE__ float normcdfinvf(float __a)
__DEVICE__ double normcdf(double __a)
__DEVICE__ double rcbrt(double __a)
__DEVICE__ float erfcxf(float __a)
static __inline__ vector float vector float vector float __c