Devsh-Graphics-Programming · devshgraphicsprogramming · Jul 8, 2023 · Jul 9, 2023 · Jul 9, 2023 · Jul 9, 2023
diff --git a/3rdparty/jitify b/3rdparty/jitify
diff --git a/examples_tests b/examples_tests
diff --git a/include/nbl/video/CCUDADevice.h b/include/nbl/video/CCUDADevice.h
@@ -6,7 +6,8 @@
 
 
 #include "nbl/video/IPhysicalDevice.h"
-
+#include "nbl/video/CCUDASharedMemory.h"
+#include "nbl/video/CCUDASharedSemaphore.h"
 
 #ifdef _NBL_COMPILE_WITH_CUDA_
 
@@ -23,10 +24,20 @@
 namespace nbl::video
 {
 class CCUDAHandler;
+class CCUDASharedMemory;
+class CCUDASharedSemaphore;
 
 class CCUDADevice : public core::IReferenceCounted
 {
     public:
+#ifdef _WIN32
+		static constexpr IDeviceMemoryAllocation::E_EXTERNAL_HANDLE_TYPE EXTERNAL_MEMORY_HANDLE_TYPE = IDeviceMemoryAllocation::EHT_OPAQUE_WIN32;
+		static constexpr CUmemAllocationHandleType ALLOCATION_HANDLE_TYPE = CU_MEM_HANDLE_TYPE_WIN32;
+#else
+		static constexpr IDeviceMemoryBacked::E_EXTERNAL_HANDLE_TYPE EXTERNAL_MEMORY_HANDLE_TYPE = IDeviceMemoryBacked::EHT_OPAQUE_FD;
+		static constexpr CUmemAllocationHandleType ALLOCATION_TYPE = CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR;
+#endif
+
 		enum E_VIRTUAL_ARCHITECTURE
 		{
 			EVA_30,
@@ -72,127 +83,45 @@ class CCUDADevice : public core::IReferenceCounted
 		// https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#vulkan-interoperability
 		// Watch out, use Driver API (`cu` functions) NOT the Runtime API (`cuda` functions)
 		// Also maybe separate this out into its own `CCUDA` class instead of nesting it here?
-#if 0
-		template<typename ObjType>
-		struct GraphicsAPIObjLink
-		{
-				GraphicsAPIObjLink() : obj(nullptr), cudaHandle(nullptr), acquired(false)
-				{
-					asImage = {nullptr};
-				}
-				GraphicsAPIObjLink(core::smart_refctd_ptr<ObjType>&& _obj) : GraphicsAPIObjLink()
-				{
-					obj = std::move(_obj);
-				}
-				GraphicsAPIObjLink(GraphicsAPIObjLink&& other) : GraphicsAPIObjLink()
-				{
-					operator=(std::move(other));
-				}
-
-				GraphicsAPIObjLink(const GraphicsAPIObjLink& other) = delete;
-				GraphicsAPIObjLink& operator=(const GraphicsAPIObjLink& other) = delete;
-				GraphicsAPIObjLink& operator=(GraphicsAPIObjLink&& other)
-				{
-					std::swap(obj,other.obj);
-					std::swap(cudaHandle,other.cudaHandle);
-					std::swap(acquired,other.acquired);
-					std::swap(asImage,other.asImage);
-					return *this;
-				}
-
-				~GraphicsAPIObjLink()
-				{
-					assert(!acquired); // you've fucked up, there's no way for us to fix it, you need to release the objects on a proper stream
-					if (obj)
-						CCUDAHandler::cuda.pcuGraphicsUnregisterResource(cudaHandle);
-				}
-
-				//
-				auto* getObject() const {return obj.get();}
-
-			private:
-				core::smart_refctd_ptr<ObjType> obj;
-				CUgraphicsResource cudaHandle;
-				bool acquired;
-
-				friend class CCUDAHandler;
-			public:
-				union
-				{
-					struct
-					{
-						CUdeviceptr pointer;
-					} asBuffer;
-					struct
-					{
-						CUmipmappedArray mipmappedArray;
-						CUarray array;
-					} asImage;
-				};
-		};
 
-		//
-		static CUresult registerBuffer(GraphicsAPIObjLink<video::IGPUBuffer>* link, uint32_t flags = CU_GRAPHICS_REGISTER_FLAGS_NONE);
-		static CUresult registerImage(GraphicsAPIObjLink<video::IGPUImage>* link, uint32_t flags = CU_GRAPHICS_REGISTER_FLAGS_NONE);
+		CUdevice getInternalObject() const { return m_handle; }
+		const CCUDAHandler* getHandler() const { return m_handler.get();  }
+		CUresult importGPUSemaphore(core::smart_refctd_ptr<CCUDASharedSemaphore>* outPtr, ISemaphore* sem);
+		CUresult createSharedMemory(core::smart_refctd_ptr<CCUDASharedMemory>* outMem, struct CCUDASharedMemory::SCreationParams&& inParams);
+		bool isMatchingDevice(const IPhysicalDevice* device) { return device && !memcmp(device->getProperties().deviceUUID, m_vulkanDevice->getProperties().deviceUUID, 16); }
 
-
-		template<typename ObjType>
-		static CUresult acquireResourcesFromGraphics(void* tmpStorage, GraphicsAPIObjLink<ObjType>* linksBegin, GraphicsAPIObjLink<ObjType>* linksEnd, CUstream stream)
-		{
-			auto count = std::distance(linksBegin,linksEnd);
-
-			auto resources = reinterpret_cast<CUgraphicsResource*>(tmpStorage);
-			auto rit = resources;
-			for (auto iit=linksBegin; iit!=linksEnd; iit++,rit++)
-			{
-				if (iit->acquired)
-					return CUDA_ERROR_UNKNOWN;
-				*rit = iit->cudaHandle;
-			}
-
-			auto retval = cuda.pcuGraphicsMapResources(count,resources,stream);
-			for (auto iit=linksBegin; iit!=linksEnd; iit++)
-				iit->acquired = true;
-			return retval;
-		}
-		template<typename ObjType>
-		static CUresult releaseResourcesToGraphics(void* tmpStorage, GraphicsAPIObjLink<ObjType>* linksBegin, GraphicsAPIObjLink<ObjType>* linksEnd, CUstream stream)
-		{
-			auto count = std::distance(linksBegin,linksEnd);
-
-			auto resources = reinterpret_cast<CUgraphicsResource*>(tmpStorage);
-			auto rit = resources;
-			for (auto iit=linksBegin; iit!=linksEnd; iit++,rit++)
-			{
-				if (!iit->acquired)
-					return CUDA_ERROR_UNKNOWN;
-				*rit = iit->cudaHandle;
-			}
-
-			auto retval = cuda.pcuGraphicsUnmapResources(count,resources,stream);
-			for (auto iit=linksBegin; iit!=linksEnd; iit++)
-				iit->acquired = false;
-			return retval;
-		}
-
-		static CUresult acquireAndGetPointers(GraphicsAPIObjLink<video::IGPUBuffer>* linksBegin, GraphicsAPIObjLink<video::IGPUBuffer>* linksEnd, CUstream stream, size_t* outbufferSizes = nullptr);
-		static CUresult acquireAndGetMipmappedArray(GraphicsAPIObjLink<video::IGPUImage>* linksBegin, GraphicsAPIObjLink<video::IGPUImage>* linksEnd, CUstream stream);
-		static CUresult acquireAndGetArray(GraphicsAPIObjLink<video::IGPUImage>* linksBegin, GraphicsAPIObjLink<video::IGPUImage>* linksEnd, uint32_t* arrayIndices, uint32_t* mipLevels, CUstream stream);
-#endif
+		size_t roundToGranularity(CUmemLocationType location, size_t size) const;
 
 	protected:
+		CUresult reserveAdrressAndMapMemory(CUdeviceptr* outPtr, size_t size, size_t alignment, CUmemLocationType location, CUmemGenericAllocationHandle memory);
+
 		friend class CCUDAHandler;
-		CCUDADevice(core::smart_refctd_ptr<CVulkanConnection>&& _vulkanConnection, IPhysicalDevice* const _vulkanDevice, const E_VIRTUAL_ARCHITECTURE _virtualArchitecture);
-		~CCUDADevice() = default;
+		friend class CCUDASharedMemory;
+		friend class CCUDASharedSemaphore;
+
+		struct SCUDACleaner : video::ICleanup
+		{
+			core::smart_refctd_ptr<const core::IReferenceCounted> resource;
+			SCUDACleaner(core::smart_refctd_ptr<const core::IReferenceCounted> resource)
+				: resource(std::move(resource))
+			{ }
+		};
+
+		CCUDADevice(core::smart_refctd_ptr<CVulkanConnection>&& _vulkanConnection, IPhysicalDevice* const _vulkanDevice, const E_VIRTUAL_ARCHITECTURE _virtualArchitecture, CUdevice _handle, core::smart_refctd_ptr<CCUDAHandler>&& _handler);
+		~CCUDADevice();
 
 		std::vector<const char*> m_defaultCompileOptions;
 		core::smart_refctd_ptr<CVulkanConnection> m_vulkanConnection;
 		IPhysicalDevice* const m_vulkanDevice;
 		E_VIRTUAL_ARCHITECTURE m_virtualArchitecture;
+		core::smart_refctd_ptr<CCUDAHandler> m_handler;
+		CUdevice m_handle;
+		CUcontext m_context;
+		size_t m_allocationGranularity[4];
 };
 
 }
 
 #endif // _NBL_COMPILE_WITH_CUDA_
 
-#endif
+#endif
diff --git a/include/nbl/video/CCUDAHandler.h b/include/nbl/video/CCUDAHandler.h
@@ -34,7 +34,7 @@ class CCUDAHandler : public core::IReferenceCounted
 		static T* cast_CUDA_ptr(CUdeviceptr ptr) { return reinterpret_cast<T*>(ptr); }
 
 		//
-		core::smart_refctd_ptr<CCUDAHandler> create(system::ISystem* system, core::smart_refctd_ptr<system::ILogger>&& _logger);
+		static core::smart_refctd_ptr<CCUDAHandler> create(system::ISystem* system, core::smart_refctd_ptr<system::ILogger>&& _logger);
 
 		//
 		using LibLoader = system::DefaultFuncPtrLoader;
@@ -119,6 +119,23 @@ class CCUDAHandler : public core::IReferenceCounted
 			,cuSurfObjectDestroy
 			,cuTexObjectCreate
 			,cuTexObjectDestroy
+			,cuImportExternalMemory
+			,cuDestroyExternalMemory
+			,cuExternalMemoryGetMappedBuffer
+			,cuMemUnmap
+			,cuMemAddressFree
+			,cuMemGetAllocationGranularity
+			,cuMemAddressReserve
+			,cuMemCreate
+			,cuMemExportToShareableHandle
+			,cuMemMap
+			,cuMemRelease
+			,cuMemSetAccess
+			,cuMemImportFromShareableHandle
+			,cuLaunchHostFunc
+			,cuDestroyExternalSemaphore
+			,cuImportExternalSemaphore
+			,cuSignalExternalSemaphoresAsync
 		);
 		const CUDA& getCUDAFunctionTable() const {return m_cuda;}
 
@@ -157,9 +174,9 @@ class CCUDAHandler : public core::IReferenceCounted
 			const auto filesize = file->getSize();
 			std::string source(filesize+1u,'0');
 
-			system::future<size_t> bytesRead;
+			system::IFile::success_t bytesRead;
 			file->read(bytesRead,source.data(),0u,file->getSize());
-			source.resize(bytesRead.get());
+			source.resize(bytesRead.getBytesProcessed());
 
 			return createProgram(prog,std::move(source),file->getFileName().string().c_str(),headerCount,headerContents,includeNames);
 		}
@@ -226,8 +243,7 @@ class CCUDAHandler : public core::IReferenceCounted
 		}
 
 		core::smart_refctd_ptr<CCUDADevice> createDevice(core::smart_refctd_ptr<CVulkanConnection>&& vulkanConnection, IPhysicalDevice* physicalDevice);
-
-	protected:
+protected:
 		CCUDAHandler(CUDA&& _cuda, NVRTC&& _nvrtc, core::vector<core::smart_refctd_ptr<system::IFile>>&& _headers, core::smart_refctd_ptr<system::ILogger>&& _logger, int _version)
 			: m_cuda(std::move(_cuda)), m_nvrtc(std::move(_nvrtc)), m_headers(std::move(_headers)), m_logger(std::move(_logger)), m_version(_version)
 		{
@@ -239,7 +255,8 @@ class CCUDAHandler : public core::IReferenceCounted
 			}
 		}
 		~CCUDAHandler() = default;
-
+
+
 		//
 		inline ptx_and_nvrtcResult_t compileDirectlyToPTX_impl(nvrtcResult result, nvrtcProgram program, core::SRange<const char* const> nvrtcOptions, std::string* log)
 		{
@@ -272,4 +289,4 @@ class CCUDAHandler : public core::IReferenceCounted
 
 #endif // _NBL_COMPILE_WITH_CUDA_
 
-#endif
+#endif
diff --git a/include/nbl/video/CCUDASharedMemory.h b/include/nbl/video/CCUDASharedMemory.h
@@ -0,0 +1,74 @@
+// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O.
+// This file is part of the "Nabla Engine".
+// For conditions of distribution and use, see copyright notice in nabla.h
+#ifndef _NBL_VIDEO_C_CUDA_SHARED_MEMORY_H_
+#define _NBL_VIDEO_C_CUDA_SHARED_MEMORY_H_
+
+
+#ifdef _NBL_COMPILE_WITH_CUDA_
+
+#include "cuda.h"
+#include "nvrtc.h"
+#if CUDA_VERSION < 9000
+	#error "Need CUDA 9.0 SDK or higher."
+#endif
+
+// useful includes in the future
+//#include "cudaEGL.h"
+//#include "cudaVDPAU.h"
+
+namespace nbl::video
+{
+
+class CCUDAMemoryMapping: public core::IReferenceCounted
+{
+};
+
+class CCUDASharedMemory : public core::IReferenceCounted
+{
+public:
+    friend class CCUDADevice;
+
+    CUdeviceptr getDeviceptr() const { return m_params.ptr;  }
+
+    struct SCreationParams
+    {
+        size_t            size;
+        uint32_t          alignment;
+        CUmemLocationType location;
+    };
+
+    struct SCachedCreationParams : SCreationParams
+    {
+        size_t granularSize;
+        CUdeviceptr ptr;
+        union
+        {
+            void* osHandle;
+            int fd;
+        };
+    };
+
+    const SCreationParams& getCreationParams() const { return m_params; }
+
+    core::smart_refctd_ptr<IDeviceMemoryAllocation> exportAsMemory(ILogicalDevice* device, IDeviceMemoryBacked* dedication = nullptr) const;
+
+    core::smart_refctd_ptr<IGPUImage>  exportAsImage(ILogicalDevice* device, asset::IImage::SCreationParams&& params) const;
+
+protected:
+
+    CCUDASharedMemory(core::smart_refctd_ptr<CCUDADevice> device, SCachedCreationParams&& params)
+        : m_device(std::move(device))
+        , m_params(std::move(params))
+    {}
+    ~CCUDASharedMemory() override;
+
+    core::smart_refctd_ptr<CCUDADevice> m_device;
+    SCachedCreationParams m_params;
+};
+
+}
+
+#endif // _NBL_COMPILE_WITH_CUDA_
+
+#endif
diff --git a/include/nbl/video/CCUDASharedSemaphore.h b/include/nbl/video/CCUDASharedSemaphore.h
@@ -0,0 +1,49 @@
+// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O.
+// This file is part of the "Nabla Engine".
+// For conditions of distribution and use, see copyright notice in nabla.h
+#ifndef _NBL_VIDEO_C_CUDA_SHARED_SEMAPHORE_H_
+#define _NBL_VIDEO_C_CUDA_SHARED_SEMAPHORE_H_
+
+#ifdef _NBL_COMPILE_WITH_CUDA_
+
+#include "cuda.h"
+#include "nvrtc.h"
+#if CUDA_VERSION < 9000
+	#error "Need CUDA 9.0 SDK or higher."
+#endif
+
+// useful includes in the future
+//#include "cudaEGL.h"
+//#include "cudaVDPAU.h"
+
+namespace nbl::video
+{
+
+class CCUDASharedSemaphore : public core::IReferenceCounted
+{
+public:
+    friend class CCUDADevice;
+
+    CUexternalSemaphore getInternalObject() const { return m_handle; }
+
+protected:
+
+    CCUDASharedSemaphore(core::smart_refctd_ptr<CCUDADevice> device, core::smart_refctd_ptr<ISemaphore> src, CUexternalSemaphore semaphore, void* osHandle)
+        : m_device(std::move(device))
+        , m_src(std::move(m_src))
+        , m_handle(semaphore)
+        , m_osHandle(osHandle)
+    {}
+    ~CCUDASharedSemaphore() override;
+
+    core::smart_refctd_ptr<CCUDADevice> m_device;
+    core::smart_refctd_ptr<ISemaphore> m_src;
+    CUexternalSemaphore m_handle;
+    void* m_osHandle;
+};
+
+}
+
+#endif // _NBL_COMPILE_WITH_CUDA_
+
+#endif