Commit: 12834fe0f00daf766e515102a3ff071423b66abe Author: Sergey Sharybin Date: Tue Aug 8 13:58:07 2017 +0200 Branches: master https://developer.blender.org/rB12834fe0f00daf766e515102a3ff071423b66abe
Update CUDA wrangler to latest version Brings new declarations from toolkit version 8.0, also fixes some pointers used in function declarations. =================================================================== M extern/cuew/README.blender M extern/cuew/include/cuew.h M extern/cuew/src/cuew.c =================================================================== diff --git a/extern/cuew/README.blender b/extern/cuew/README.blender index 7b77935d750..ef36c110e3f 100644 --- a/extern/cuew/README.blender +++ b/extern/cuew/README.blender @@ -1,5 +1,5 @@ Project: Cuda Wrangler URL: https://github.com/CudaWrangler/cuew License: Apache 2.0 -Upstream version: 63d2a0f +Upstream version: 3dd0b01 Local modifications: None diff --git a/extern/cuew/include/cuew.h b/extern/cuew/include/cuew.h index 4cce29d38ab..c90ab39601a 100644 --- a/extern/cuew/include/cuew.h +++ b/extern/cuew/include/cuew.h @@ -27,7 +27,7 @@ extern "C" { #define CUEW_VERSION_MAJOR 1 #define CUEW_VERSION_MINOR 2 -#define CUDA_VERSION 7050 +#define CUDA_VERSION 8000 #define CU_IPC_HANDLE_SIZE 64 #define CU_STREAM_LEGACY ((CUstream)0x1) #define CU_STREAM_PER_THREAD ((CUstream)0x2) @@ -51,6 +51,8 @@ extern "C" { #define CU_LAUNCH_PARAM_BUFFER_POINTER ((void*)0x01) #define CU_LAUNCH_PARAM_BUFFER_SIZE ((void*)0x02) #define CU_PARAM_TR_DEFAULT -1 +#define CU_DEVICE_CPU ((CUdevice)-1) +#define CU_DEVICE_INVALID ((CUdevice)-2) /* Functions which changed 3.1 -> 3.2 for 64 bit stuff, * the cuda library has both the old ones for compatibility and new @@ -120,6 +122,45 @@ typedef unsigned long long CUdeviceptr; typedef unsigned int CUdeviceptr; #endif + +#ifdef _WIN32 +# define CUDAAPI __stdcall +# define CUDA_CB __stdcall +#else +# define CUDAAPI +# define CUDA_CB +#endif + +typedef signed char int8_t; +typedef short int int16_t; +typedef int int32_t; +typedef long int int64_t; +typedef unsigned char uint8_t; +typedef unsigned short int uint16_t; +typedef unsigned int uint32_t; +typedef unsigned long int uint64_t; +typedef signed char int_least8_t; +typedef short int int_least16_t; +typedef int int_least32_t; +typedef long int int_least64_t; +typedef unsigned char uint_least8_t; +typedef unsigned short int uint_least16_t; +typedef unsigned int uint_least32_t; +typedef unsigned long int uint_least64_t; +typedef signed char int_fast8_t; +typedef long int int_fast16_t; +typedef long int int_fast32_t; +typedef long int int_fast64_t; +typedef unsigned char uint_fast8_t; +typedef unsigned long int uint_fast16_t; +typedef unsigned long int uint_fast32_t; +typedef unsigned long int uint_fast64_t; +typedef long int intptr_t; +typedef unsigned long int uintptr_t; +typedef long int intmax_t; +typedef unsigned long int uintmax_t; +typedef uint32_t cuuint32_t; +typedef uint64_t cuuint64_t; typedef int CUdevice; typedef struct CUctx_st* CUcontext; typedef struct CUmod_st* CUmodule; @@ -180,6 +221,53 @@ typedef enum CUevent_flags_enum { CU_EVENT_INTERPROCESS = 0x4, } CUevent_flags; +typedef enum CUstreamWaitValue_flags_enum { + CU_STREAM_WAIT_VALUE_GEQ = 0x0, + CU_STREAM_WAIT_VALUE_EQ = 0x1, + CU_STREAM_WAIT_VALUE_AND = 0x2, + CU_STREAM_WAIT_VALUE_FLUSH = (1 << 30), +} CUstreamWaitValue_flags; + +typedef enum CUstreamWriteValue_flags_enum { + CU_STREAM_WRITE_VALUE_DEFAULT = 0x0, + CU_STREAM_WRITE_VALUE_NO_MEMORY_BARRIER = 0x1, +} CUstreamWriteValue_flags; + +typedef enum CUstreamBatchMemOpType_enum { + CU_STREAM_MEM_OP_WAIT_VALUE_32 = 1, + CU_STREAM_MEM_OP_WRITE_VALUE_32 = 2, + CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES = 3, +} CUstreamBatchMemOpType; + +typedef union CUstreamBatchMemOpParams_union { + CUstreamBatchMemOpType operation; + struct CUstreamMemOpWaitValueParams_st { + CUstreamBatchMemOpType operation; + CUdeviceptr address; + union { + cuuint32_t value; + cuuint64_t pad; + }; + unsigned int flags; + CUdeviceptr alias; + } waitValue; + struct CUstreamMemOpWriteValueParams_st { + CUstreamBatchMemOpType operation; + CUdeviceptr address; + union { + cuuint32_t value; + cuuint64_t pad; + }; + unsigned int flags; + CUdeviceptr alias; + } writeValue; + struct CUstreamMemOpFlushRemoteWritesParams_st { + CUstreamBatchMemOpType operation; + unsigned int flags; + } flushRemoteWrites; + cuuint64_t pad[6]; +} CUstreamBatchMemOpParams; + typedef enum CUoccupancy_flags_enum { CU_OCCUPANCY_DEFAULT = 0x0, CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE = 0x1, @@ -299,6 +387,12 @@ typedef enum CUdevice_attribute_enum { CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY = 83, CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD = 84, CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID = 85, + CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED = 86, + CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO = 87, + CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS = 88, + CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS = 89, + CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED = 90, + CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM = 91, CU_DEVICE_ATTRIBUTE_MAX, } CUdevice_attribute; @@ -360,11 +454,26 @@ typedef enum CUmemorytype_enum { typedef enum CUcomputemode_enum { CU_COMPUTEMODE_DEFAULT = 0, - CU_COMPUTEMODE_EXCLUSIVE = 1, CU_COMPUTEMODE_PROHIBITED = 2, CU_COMPUTEMODE_EXCLUSIVE_PROCESS = 3, } CUcomputemode; +typedef enum CUmem_advise_enum { + CU_MEM_ADVISE_SET_READ_MOSTLY = 1, + CU_MEM_ADVISE_UNSET_READ_MOSTLY = 2, + CU_MEM_ADVISE_SET_PREFERRED_LOCATION = 3, + CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION = 4, + CU_MEM_ADVISE_SET_ACCESSED_BY = 5, + CU_MEM_ADVISE_UNSET_ACCESSED_BY = 6, +} CUmem_advise; + +typedef enum CUmem_range_attribute_enum { + CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY = 1, + CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION = 2, + CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY = 3, + CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION = 4, +} CUmem_range_attribute; + typedef enum CUjit_option_enum { CU_JIT_MAX_REGISTERS = 0, CU_JIT_THREADS_PER_BLOCK, @@ -381,6 +490,8 @@ typedef enum CUjit_option_enum { CU_JIT_LOG_VERBOSE, CU_JIT_GENERATE_LINE_INFO, CU_JIT_CACHE_MODE, + CU_JIT_NEW_SM3X_OPT, + CU_JIT_FAST_COMPILE, CU_JIT_NUM_OPTIONS, } CUjit_option; @@ -397,6 +508,10 @@ typedef enum CUjit_target_enum { CU_TARGET_COMPUTE_37 = 37, CU_TARGET_COMPUTE_50 = 50, CU_TARGET_COMPUTE_52 = 52, + CU_TARGET_COMPUTE_53 = 53, + CU_TARGET_COMPUTE_60 = 60, + CU_TARGET_COMPUTE_61 = 61, + CU_TARGET_COMPUTE_62 = 62, } CUjit_target; typedef enum CUjit_fallback_enum { @@ -490,6 +605,7 @@ typedef enum cudaError_enum { CUDA_ERROR_PEER_ACCESS_UNSUPPORTED = 217, CUDA_ERROR_INVALID_PTX = 218, CUDA_ERROR_INVALID_GRAPHICS_CONTEXT = 219, + CUDA_ERROR_NVLINK_UNCORRECTABLE = 220, CUDA_ERROR_INVALID_SOURCE = 300, CUDA_ERROR_FILE_NOT_FOUND = 301, CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302, @@ -521,8 +637,14 @@ typedef enum cudaError_enum { CUDA_ERROR_UNKNOWN = 999, } CUresult; -typedef void* CUstreamCallback; -typedef size_t* CUoccupancyB2DSize; +typedef enum CUdevice_P2PAttribute_enum { + CU_DEVICE_P2P_ATTRIBUTE_PERFORMANCE_RANK = 0x01, + CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED = 0x02, + CU_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED = 0x03, +} CUdevice_P2PAttribute; + +typedef void (CUDA_CB *CUstreamCallback)(CUstream hStream, CUresult status, void* userData); +typedef size_t (CUDA_CB *CUoccupancyB2DSize)(int blockSize); typedef struct CUDA_MEMCPY2D_st { size_t srcXInBytes; @@ -654,7 +776,8 @@ typedef struct CUDA_TEXTURE_DESC_st { float mipmapLevelBias; float minMipmapLevelClamp; float maxMipmapLevelClamp; - int reserved[16]; + float borderColor[4]; + int reserved[12]; } CUDA_TEXTURE_DESC; typedef enum CUresourceViewFormat_enum { @@ -736,21 +859,16 @@ typedef enum { NVRTC_ERROR_INVALID_OPTION = 5, NVRTC_ERROR_COMPILATION = 6, NVRTC_ERROR_BUILTIN_OPERATION_FAILURE = 7, + NVRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION = 8, + NVRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION = 9, + NVRTC_ERROR_NAME_EXPRESSION_NOT_VALID = 10, + NVRTC_ERROR_INTERNAL_ERROR = 11, } nvrtcResult; typedef struct _nvrtcProgram* nvrtcProgram; - -#ifdef _WIN32 -# define CUDAAPI __stdcall -# define CUDA_CB __stdcall -#else -# define CUDAAPI -# define CUDA_CB -#endif - /* Function types. */ -typedef CUresult CUDAAPI tcuGetErrorString(CUresult error, const char* pStr); -typedef CUresult CUDAAPI tcuGetErrorName(CUresult error, const char* pStr); +typedef CUresult CUDAAPI tcuGetErrorString(CUresult error, const char** pStr); +typedef CUresult CUDAAPI tcuGetErrorName(CUresult error, const char** pStr); typedef CUresult CUDAAPI tcuInit(unsigned int Flags); typedef CUresult CUDAAPI tcuDriverGetVersion(int* driverVersion); typedef CUresult CUDAAPI tcuDeviceGet(CUdevice* device, int ordinal); @@ -786,26 +904,26 @@ typedef CUresult CUDAAPI tcuCtxAttach(CUcontext* pctx, unsigned int flags); typedef CUresult CUDAAPI tcuCtxDetach(CUcontext ctx); typedef CUresult CUDAAPI tcuModuleLoad(CUmodule* module, const char* fname); typedef CUresult CUDAAPI tcuModuleLoadData(CUmodule* module, const void* image); -typedef CUresult CUDAAPI tcuModuleLoadDataEx(CUmodule* module, const void* image, unsigned int numOptions, CUjit_option* options, void* optionValues); +typedef CUresult CUDAAPI tcuModuleLoadDataEx(CUmodule* module, const void* image, unsigned int numOptions, CUjit_option* options, void** optionValues); typedef CUresult CUDAAPI tcuModuleLoadFatBinary(CUmodule* module, const void* fatCubin); typedef CUresult CUDAAPI tcuModuleUnload(CUmodule hmod); typedef CUresult CUDAAPI tcuModuleGetFunction(CUfunction* hfunc, CUmodule hmod, const char* name); typedef CUresult CUDAAPI tcuModuleGetGlobal_v2(CUdeviceptr* dptr, size_t* bytes, CUmodule hmod, const char* name); typedef CUresult CUDAAPI tcuModuleGetTexRef(CUtexref* pTexRef, CUmodule hmod, const char* name); typedef CUresult CUDAAPI tcuModuleGetSurfRef(CUsurfref* pSurfRef, CUmodule hmod, const char* name); -typedef CUresult CUDAAPI tcuLinkCreate_v2(unsigned int numOptions, CUjit_option* options, void* optionValues, CUlinkState* stateOut); -typedef CUresult CUDAAPI tcuLinkAddData_v2(CUlinkState state, CUjitInputType type, void* data, size_t size, const char* name, unsigned int numOptions, CUjit_option* options, void* optionValues); -typedef CUresult CUDAAPI tcuLinkAddFile_v2(CUlinkState state, CUjitInputType type, const char* path, unsigned int numOptions, CUjit_option* options, void* optionValues); -typedef CUresult CUDAAPI tcuLinkComplete(CUlinkState state, void* cubinOut, size_t* sizeOut @@ Diff output truncated at 10240 characters. @@ _______________________________________________ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org https://lists.blender.org/mailman/listinfo/bf-blender-cvs