diff --git a/3rdparty/jitify b/3rdparty/jitify
index 0d6dbd8ccd..1a0ca0e837 160000
--- a/3rdparty/jitify
+++ b/3rdparty/jitify
@@ -1 +1 @@
-Subproject commit 0d6dbd8ccd07e6bfc811d363a54912dfc6d4799a
+Subproject commit 1a0ca0e837405506f3b8f7883bacb71c20d86d96
diff --git a/examples_tests b/examples_tests
index 9e980e729b..73f147941e 160000
--- a/examples_tests
+++ b/examples_tests
@@ -1 +1 @@
-Subproject commit 9e980e729bb6e2813d0b2a60e20c182b837d4fce
+Subproject commit 73f147941ef5362d0adee47ae72b4088b8c49aa5
diff --git a/include/nbl/asset/IBuffer.h b/include/nbl/asset/IBuffer.h
index e11d8faf7d..d50a415e69 100644
--- a/include/nbl/asset/IBuffer.h
+++ b/include/nbl/asset/IBuffer.h
@@ -42,6 +42,8 @@ class IBuffer : public core::IBuffer, public IDescriptor
 			//! synthetic Nabla inventions
 			// whether `IGPUCommandBuffer::updateBuffer` can be used on this buffer
 			EUF_INLINE_UPDATE_VIA_CMDBUF = 0x80000000u,
+			
+			EUF_SYNTHEHIC_FLAGS_MASK = EUF_INLINE_UPDATE_VIA_CMDBUF | 0 /* fill out as needed if anymore synthethic flags are added*/
         };
 
 		//!
diff --git a/include/nbl/video/CCUDADevice.h b/include/nbl/video/CCUDADevice.h
index 1120224fdb..551c2a7e5b 100644
--- a/include/nbl/video/CCUDADevice.h
+++ b/include/nbl/video/CCUDADevice.h
@@ -6,7 +6,8 @@
 
 
 #include "nbl/video/IPhysicalDevice.h"
-
+#include "nbl/video/CCUDASharedMemory.h"
+#include "nbl/video/CCUDASharedSemaphore.h"
 
 #ifdef _NBL_COMPILE_WITH_CUDA_
 
@@ -23,10 +24,27 @@
 namespace nbl::video
 {
 class CCUDAHandler;
+class CCUDASharedMemory;
+class CCUDASharedSemaphore;
 
 class CCUDADevice : public core::IReferenceCounted
 {
     public:
+#ifdef _WIN32
+		static constexpr IDeviceMemoryAllocation::E_EXTERNAL_HANDLE_TYPE EXTERNAL_MEMORY_HANDLE_TYPE = IDeviceMemoryAllocation::EHT_OPAQUE_WIN32;
+		static constexpr CUmemAllocationHandleType ALLOCATION_HANDLE_TYPE = CU_MEM_HANDLE_TYPE_WIN32;
+#else
+		static constexpr IDeviceMemoryBacked::E_EXTERNAL_HANDLE_TYPE EXTERNAL_MEMORY_HANDLE_TYPE = IDeviceMemoryBacked::EHT_OPAQUE_FD;
+		static constexpr CUmemAllocationHandleType ALLOCATION_TYPE = CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR;
+#endif
+		struct SCUDACleaner : video::ICleanup
+		{
+			core::smart_refctd_ptr<const core::IReferenceCounted> resource;
+			SCUDACleaner(core::smart_refctd_ptr<const core::IReferenceCounted> resource)
+				: resource(std::move(resource))
+			{ }
+		};
+
 		enum E_VIRTUAL_ARCHITECTURE
 		{
 			EVA_30,
@@ -72,127 +90,37 @@ class CCUDADevice : public core::IReferenceCounted
 		// https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#vulkan-interoperability
 		// Watch out, use Driver API (`cu` functions) NOT the Runtime API (`cuda` functions)
 		// Also maybe separate this out into its own `CCUDA` class instead of nesting it here?
-#if 0
-		template<typename ObjType>
-		struct GraphicsAPIObjLink
-		{
-				GraphicsAPIObjLink() : obj(nullptr), cudaHandle(nullptr), acquired(false)
-				{
-					asImage = {nullptr};
-				}
-				GraphicsAPIObjLink(core::smart_refctd_ptr<ObjType>&& _obj) : GraphicsAPIObjLink()
-				{
-					obj = std::move(_obj);
-				}
-				GraphicsAPIObjLink(GraphicsAPIObjLink&& other) : GraphicsAPIObjLink()
-				{
-					operator=(std::move(other));
-				}
-
-				GraphicsAPIObjLink(const GraphicsAPIObjLink& other) = delete;
-				GraphicsAPIObjLink& operator=(const GraphicsAPIObjLink& other) = delete;
-				GraphicsAPIObjLink& operator=(GraphicsAPIObjLink&& other)
-				{
-					std::swap(obj,other.obj);
-					std::swap(cudaHandle,other.cudaHandle);
-					std::swap(acquired,other.acquired);
-					std::swap(asImage,other.asImage);
-					return *this;
-				}
-
-				~GraphicsAPIObjLink()
-				{
-					assert(!acquired); // you've fucked up, there's no way for us to fix it, you need to release the objects on a proper stream
-					if (obj)
-						CCUDAHandler::cuda.pcuGraphicsUnregisterResource(cudaHandle);
-				}
-
-				//
-				auto* getObject() const {return obj.get();}
-
-			private:
-				core::smart_refctd_ptr<ObjType> obj;
-				CUgraphicsResource cudaHandle;
-				bool acquired;
-
-				friend class CCUDAHandler;
-			public:
-				union
-				{
-					struct
-					{
-						CUdeviceptr pointer;
-					} asBuffer;
-					struct
-					{
-						CUmipmappedArray mipmappedArray;
-						CUarray array;
-					} asImage;
-				};
-		};
 
-		//
-		static CUresult registerBuffer(GraphicsAPIObjLink<video::IGPUBuffer>* link, uint32_t flags = CU_GRAPHICS_REGISTER_FLAGS_NONE);
-		static CUresult registerImage(GraphicsAPIObjLink<video::IGPUImage>* link, uint32_t flags = CU_GRAPHICS_REGISTER_FLAGS_NONE);
+		CUdevice getInternalObject() const { return m_handle; }
+		const CCUDAHandler* getHandler() const { return m_handler.get();  }
+		CUresult importGPUSemaphore(core::smart_refctd_ptr<CCUDASharedSemaphore>* outPtr, ISemaphore* sem);
+		CUresult createSharedMemory(core::smart_refctd_ptr<CCUDASharedMemory>* outMem, struct CCUDASharedMemory::SCreationParams&& inParams);
+		bool isMatchingDevice(const IPhysicalDevice* device) { return device && !memcmp(device->getProperties().deviceUUID, m_vulkanDevice->getProperties().deviceUUID, 16); }
 		
+		size_t roundToGranularity(CUmemLocationType location, size_t size) const;
 
-		template<typename ObjType>
-		static CUresult acquireResourcesFromGraphics(void* tmpStorage, GraphicsAPIObjLink<ObjType>* linksBegin, GraphicsAPIObjLink<ObjType>* linksEnd, CUstream stream)
-		{
-			auto count = std::distance(linksBegin,linksEnd);
-
-			auto resources = reinterpret_cast<CUgraphicsResource*>(tmpStorage);
-			auto rit = resources;
-			for (auto iit=linksBegin; iit!=linksEnd; iit++,rit++)
-			{
-				if (iit->acquired)
-					return CUDA_ERROR_UNKNOWN;
-				*rit = iit->cudaHandle;
-			}
-
-			auto retval = cuda.pcuGraphicsMapResources(count,resources,stream);
-			for (auto iit=linksBegin; iit!=linksEnd; iit++)
-				iit->acquired = true;
-			return retval;
-		}
-		template<typename ObjType>
-		static CUresult releaseResourcesToGraphics(void* tmpStorage, GraphicsAPIObjLink<ObjType>* linksBegin, GraphicsAPIObjLink<ObjType>* linksEnd, CUstream stream)
-		{
-			auto count = std::distance(linksBegin,linksEnd);
-
-			auto resources = reinterpret_cast<CUgraphicsResource*>(tmpStorage);
-			auto rit = resources;
-			for (auto iit=linksBegin; iit!=linksEnd; iit++,rit++)
-			{
-				if (!iit->acquired)
-					return CUDA_ERROR_UNKNOWN;
-				*rit = iit->cudaHandle;
-			}
-
-			auto retval = cuda.pcuGraphicsUnmapResources(count,resources,stream);
-			for (auto iit=linksBegin; iit!=linksEnd; iit++)
-				iit->acquired = false;
-			return retval;
-		}
+	protected:
+		CUresult reserveAdrressAndMapMemory(CUdeviceptr* outPtr, size_t size, size_t alignment, CUmemLocationType location, CUmemGenericAllocationHandle memory);
 
-		static CUresult acquireAndGetPointers(GraphicsAPIObjLink<video::IGPUBuffer>* linksBegin, GraphicsAPIObjLink<video::IGPUBuffer>* linksEnd, CUstream stream, size_t* outbufferSizes = nullptr);
-		static CUresult acquireAndGetMipmappedArray(GraphicsAPIObjLink<video::IGPUImage>* linksBegin, GraphicsAPIObjLink<video::IGPUImage>* linksEnd, CUstream stream);
-		static CUresult acquireAndGetArray(GraphicsAPIObjLink<video::IGPUImage>* linksBegin, GraphicsAPIObjLink<video::IGPUImage>* linksEnd, uint32_t* arrayIndices, uint32_t* mipLevels, CUstream stream);
-#endif
 
-	protected:
+		// CUDAHandler creates CUDADevice, it needs to access ctor
 		friend class CCUDAHandler;
-		CCUDADevice(core::smart_refctd_ptr<CVulkanConnection>&& _vulkanConnection, IPhysicalDevice* const _vulkanDevice, const E_VIRTUAL_ARCHITECTURE _virtualArchitecture);
-		~CCUDADevice() = default;
+
+		CCUDADevice(core::smart_refctd_ptr<CVulkanConnection>&& _vulkanConnection, IPhysicalDevice* const _vulkanDevice, const E_VIRTUAL_ARCHITECTURE _virtualArchitecture, CUdevice _handle, core::smart_refctd_ptr<CCUDAHandler>&& _handler);
+		~CCUDADevice();
 		
 		std::vector<const char*> m_defaultCompileOptions;
 		core::smart_refctd_ptr<CVulkanConnection> m_vulkanConnection;
 		IPhysicalDevice* const m_vulkanDevice;
 		E_VIRTUAL_ARCHITECTURE m_virtualArchitecture;
+		core::smart_refctd_ptr<CCUDAHandler> m_handler;
+		CUdevice m_handle;
+		CUcontext m_context;
+		size_t m_allocationGranularity[4];
 };
 
 }
 
 #endif // _NBL_COMPILE_WITH_CUDA_
 
-#endif
+#endif
\ No newline at end of file
diff --git a/include/nbl/video/CCUDAHandler.h b/include/nbl/video/CCUDAHandler.h
index 01774b25d2..022024e856 100644
--- a/include/nbl/video/CCUDAHandler.h
+++ b/include/nbl/video/CCUDAHandler.h
@@ -34,7 +34,7 @@ class CCUDAHandler : public core::IReferenceCounted
 		static T* cast_CUDA_ptr(CUdeviceptr ptr) { return reinterpret_cast<T*>(ptr); }
 
 		//
-		core::smart_refctd_ptr<CCUDAHandler> create(system::ISystem* system, core::smart_refctd_ptr<system::ILogger>&& _logger);
+		static core::smart_refctd_ptr<CCUDAHandler> create(system::ISystem* system, core::smart_refctd_ptr<system::ILogger>&& _logger);
 
 		//
 		using LibLoader = system::DefaultFuncPtrLoader;
@@ -119,6 +119,24 @@ class CCUDAHandler : public core::IReferenceCounted
 			,cuSurfObjectDestroy
 			,cuTexObjectCreate
 			,cuTexObjectDestroy
+			,cuImportExternalMemory
+			,cuDestroyExternalMemory
+			,cuExternalMemoryGetMappedBuffer
+			,cuMemUnmap
+			,cuMemAddressFree
+			,cuMemGetAllocationGranularity
+			,cuMemAddressReserve
+			,cuMemCreate
+			,cuMemExportToShareableHandle
+			,cuMemMap
+			,cuMemRelease
+			,cuMemSetAccess
+			,cuMemImportFromShareableHandle
+			,cuLaunchHostFunc
+			,cuDestroyExternalSemaphore
+			,cuImportExternalSemaphore
+			,cuSignalExternalSemaphoresAsync
+			,cuWaitExternalSemaphoresAsync
 		);
 		const CUDA& getCUDAFunctionTable() const {return m_cuda;}
 
@@ -157,13 +175,25 @@ class CCUDAHandler : public core::IReferenceCounted
 			const auto filesize = file->getSize();
 			std::string source(filesize+1u,'0');
 
-			system::future<size_t> bytesRead;
+			system::IFile::success_t bytesRead;
 			file->read(bytesRead,source.data(),0u,file->getSize());
-			source.resize(bytesRead.get());
+			source.resize(bytesRead.getBytesProcessed());
 
 			return createProgram(prog,std::move(source),file->getFileName().string().c_str(),headerCount,headerContents,includeNames);
 		}
 
+		struct SCUDADeviceInfo
+		{
+			CUdevice handle = {};
+			CUuuid uuid = {};
+			int attributes[CU_DEVICE_ATTRIBUTE_MAX] = {};
+		};
+
+		inline core::vector<SCUDADeviceInfo> const& getAvailableDevices() const
+		{
+			return m_availableDevices;
+		}
+
 		//
 		inline nvrtcResult compileProgram(nvrtcProgram prog, core::SRange<const char* const> options)
 		{
@@ -199,6 +229,7 @@ class CCUDAHandler : public core::IReferenceCounted
 			result = createProgram(&program,std::move(source),filename,headerCount,headerContents,includeNames);
 			return compileDirectlyToPTX_impl(result,program,nvrtcOptions,log);
 		}
+
 		inline ptx_and_nvrtcResult_t compileDirectlyToPTX(
 			const char* source, const char* filename, core::SRange<const char* const> nvrtcOptions,
 			const int headerCount=0, const char* const* headerContents=nullptr, const char* const* includeNames=nullptr,
@@ -207,6 +238,7 @@ class CCUDAHandler : public core::IReferenceCounted
 		{
 			return compileDirectlyToPTX(std::string(source),filename,nvrtcOptions,headerCount,headerContents,includeNames,log);
 		}
+
 		inline ptx_and_nvrtcResult_t compileDirectlyToPTX(
 			system::IFile* file, core::SRange<const char* const> nvrtcOptions,
 			const int headerCount=0, const char* const* headerContents=nullptr, const char* const* includeNames=nullptr,
@@ -226,20 +258,12 @@ class CCUDAHandler : public core::IReferenceCounted
 		}
 
 		core::smart_refctd_ptr<CCUDADevice> createDevice(core::smart_refctd_ptr<CVulkanConnection>&& vulkanConnection, IPhysicalDevice* physicalDevice);
+protected:
+		CCUDAHandler(CUDA&& _cuda, NVRTC&& _nvrtc, core::vector<core::smart_refctd_ptr<system::IFile>>&& _headers, core::smart_refctd_ptr<system::ILogger>&& _logger, int _version);
 
-	protected:
-		CCUDAHandler(CUDA&& _cuda, NVRTC&& _nvrtc, core::vector<core::smart_refctd_ptr<system::IFile>>&& _headers, core::smart_refctd_ptr<system::ILogger>&& _logger, int _version)
-			: m_cuda(std::move(_cuda)), m_nvrtc(std::move(_nvrtc)), m_headers(std::move(_headers)), m_logger(std::move(_logger)), m_version(_version)
-		{
-			for (auto& header : m_headers)
-			{
-				m_headerContents.push_back(reinterpret_cast<const char*>(header->getMappedPointer()));
-				m_headerNamesStorage.push_back(header->getFileName().string());
-				m_headerNames.push_back(m_headerNamesStorage.back().c_str());
-			}
-		}
 		~CCUDAHandler() = default;
-		
+
+
 		//
 		inline ptx_and_nvrtcResult_t compileDirectlyToPTX_impl(nvrtcResult result, nvrtcProgram program, core::SRange<const char* const> nvrtcOptions, std::string* log)
 		{
@@ -266,10 +290,12 @@ class CCUDAHandler : public core::IReferenceCounted
 		core::vector<const char*> m_headerNames;
 		system::logger_opt_smart_ptr m_logger;
 		int m_version;
+
+		core::vector<SCUDADeviceInfo> m_availableDevices;
 };
 
 }
 
 #endif // _NBL_COMPILE_WITH_CUDA_
 
-#endif
+#endif
\ No newline at end of file
diff --git a/include/nbl/video/CCUDASharedMemory.h b/include/nbl/video/CCUDASharedMemory.h
new file mode 100644
index 0000000000..d900087d06
--- /dev/null
+++ b/include/nbl/video/CCUDASharedMemory.h
@@ -0,0 +1,71 @@
+// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O.
+// This file is part of the "Nabla Engine".
+// For conditions of distribution and use, see copyright notice in nabla.h
+#ifndef _NBL_VIDEO_C_CUDA_SHARED_MEMORY_H_
+#define _NBL_VIDEO_C_CUDA_SHARED_MEMORY_H_
+
+
+#ifdef _NBL_COMPILE_WITH_CUDA_
+
+#include "cuda.h"
+#include "nvrtc.h"
+#if CUDA_VERSION < 9000
+	#error "Need CUDA 9.0 SDK or higher."
+#endif
+
+// useful includes in the future
+//#include "cudaEGL.h"
+//#include "cudaVDPAU.h"
+
+namespace nbl::video
+{
+
+class CCUDASharedMemory : public core::IReferenceCounted
+{
+public:
+    // required for us to see the move ctor
+    friend class CCUDADevice;
+
+    CUdeviceptr getDeviceptr() const { return m_params.ptr;  }
+
+    struct SCreationParams
+    {
+        size_t            size;
+        uint32_t          alignment;
+        CUmemLocationType location;
+    };
+
+    struct SCachedCreationParams : SCreationParams
+    {
+        size_t granularSize;
+        CUdeviceptr ptr;
+        union
+        {
+            void* osHandle;
+            int fd;
+        };
+    };
+
+    const SCreationParams& getCreationParams() const { return m_params; }
+
+    core::smart_refctd_ptr<IDeviceMemoryAllocation> exportAsMemory(ILogicalDevice* device, IDeviceMemoryBacked* dedication = nullptr) const;
+
+    core::smart_refctd_ptr<IGPUImage>  createAndBindImage(ILogicalDevice* device, asset::IImage::SCreationParams&& params) const;
+
+protected:
+
+    CCUDASharedMemory(core::smart_refctd_ptr<CCUDADevice>&& device, SCachedCreationParams&& params)
+        : m_device(std::move(device))
+        , m_params(std::move(params))
+    {}
+    ~CCUDASharedMemory() override;
+
+    core::smart_refctd_ptr<CCUDADevice> m_device;
+    SCachedCreationParams m_params;
+};
+
+}
+
+#endif // _NBL_COMPILE_WITH_CUDA_
+
+#endif
\ No newline at end of file
diff --git a/include/nbl/video/CCUDASharedSemaphore.h b/include/nbl/video/CCUDASharedSemaphore.h
new file mode 100644
index 0000000000..882e794bd4
--- /dev/null
+++ b/include/nbl/video/CCUDASharedSemaphore.h
@@ -0,0 +1,49 @@
+// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O.
+// This file is part of the "Nabla Engine".
+// For conditions of distribution and use, see copyright notice in nabla.h
+#ifndef _NBL_VIDEO_C_CUDA_SHARED_SEMAPHORE_H_
+#define _NBL_VIDEO_C_CUDA_SHARED_SEMAPHORE_H_
+
+#ifdef _NBL_COMPILE_WITH_CUDA_
+
+#include "cuda.h"
+#include "nvrtc.h"
+#if CUDA_VERSION < 9000
+	#error "Need CUDA 9.0 SDK or higher."
+#endif
+
+// useful includes in the future
+//#include "cudaEGL.h"
+//#include "cudaVDPAU.h"
+
+namespace nbl::video
+{
+
+class CCUDASharedSemaphore : public core::IReferenceCounted
+{
+public:
+    friend class CCUDADevice;
+
+    CUexternalSemaphore getInternalObject() const { return m_handle; }
+
+protected:
+   
+    CCUDASharedSemaphore(core::smart_refctd_ptr<CCUDADevice> device, core::smart_refctd_ptr<ISemaphore> src, CUexternalSemaphore semaphore, void* osHandle)
+        : m_device(std::move(device))
+        , m_src(std::move(m_src))
+        , m_handle(semaphore)
+        , m_osHandle(osHandle)
+    {}
+    ~CCUDASharedSemaphore() override;
+
+    core::smart_refctd_ptr<CCUDADevice> m_device;
+    core::smart_refctd_ptr<ISemaphore> m_src;
+    CUexternalSemaphore m_handle;
+    void* m_osHandle;
+};
+
+}
+
+#endif // _NBL_COMPILE_WITH_CUDA_
+
+#endif
\ No newline at end of file
diff --git a/include/nbl/video/CVulkanDeviceMemoryBacked.h b/include/nbl/video/CVulkanDeviceMemoryBacked.h
index c996000e04..2505de6865 100644
--- a/include/nbl/video/CVulkanDeviceMemoryBacked.h
+++ b/include/nbl/video/CVulkanDeviceMemoryBacked.h
@@ -35,11 +35,11 @@ class CVulkanDeviceMemoryBacked : public Interface
 	protected:
 		// special constructor for when memory requirements are known up-front (so far only swapchains and internal forwarding here)
 		CVulkanDeviceMemoryBacked(const CVulkanLogicalDevice* dev, Interface::SCreationParams&& _creationParams, const IDeviceMemoryBacked::SDeviceMemoryRequirements& _memReqs, const VkResource_t vkHandle);
-		CVulkanDeviceMemoryBacked(const CVulkanLogicalDevice* dev, Interface::SCreationParams&& _creationParams, const VkResource_t vkHandle) :
-			CVulkanDeviceMemoryBacked(dev,std::move(_creationParams),obtainRequirements(dev,vkHandle),vkHandle) {}
+		CVulkanDeviceMemoryBacked(const CVulkanLogicalDevice* dev, Interface::SCreationParams&& _creationParams, bool dedicatedOnly, const VkResource_t vkHandle) :
+			CVulkanDeviceMemoryBacked(dev,std::move(_creationParams), obtainRequirements(dev, dedicatedOnly, vkHandle),vkHandle) {}
 
 	private:
-		static IDeviceMemoryBacked::SDeviceMemoryRequirements obtainRequirements(const CVulkanLogicalDevice* device, const VkResource_t vkHandle);
+		static IDeviceMemoryBacked::SDeviceMemoryRequirements obtainRequirements(const CVulkanLogicalDevice* device, bool dedicatedOnly, const VkResource_t vkHandle);
 
 		core::smart_refctd_ptr<IDeviceMemoryAllocation> m_memory = nullptr;
 		size_t m_offset = 0u;
diff --git a/include/nbl/video/EApiType.h b/include/nbl/video/EApiType.h
index e670dc90d8..275e3f0a7a 100644
--- a/include/nbl/video/EApiType.h
+++ b/include/nbl/video/EApiType.h
@@ -13,6 +13,15 @@ enum E_API_TYPE : uint32_t
     //EAT_WEBGPU
 };
 
+
+using ExternalHandleType =
+#ifdef _WIN32
+void*
+#else
+int
+#endif
+;
+
 }
 
 #endif
diff --git a/include/nbl/video/IDeviceMemoryAllocation.h b/include/nbl/video/IDeviceMemoryAllocation.h
index 673f1834e6..d162a029be 100644
--- a/include/nbl/video/IDeviceMemoryAllocation.h
+++ b/include/nbl/video/IDeviceMemoryAllocation.h
@@ -24,6 +24,8 @@ We only support persistently mapped buffers with ARB_buffer_storage.
 Please don't ask us to support Buffer Orphaning. */
 class IDeviceMemoryAllocation : public virtual core::IReferenceCounted
 {
+    friend class IDeviceMemoryAllocator;
+    friend class ILogicalDevice;
     public:
         //! Access flags for how the application plans to use mapped memory (if any)
         /** When you create the memory you can allow for it to be mapped (be given a pointer)
@@ -68,6 +70,19 @@ class IDeviceMemoryAllocation : public virtual core::IReferenceCounted
             EMHF_MULTI_INSTANCE_BIT = 0x00000002,
         };
 
+        //! Flags for imported/exported allocation
+        enum E_EXTERNAL_HANDLE_TYPE : uint32_t
+        {
+            EHT_NONE = 0,
+            EHT_OPAQUE_WIN32 = 0x00000002,
+            EHT_OPAQUE_WIN32_KMT = 0x00000004,
+            EHT_D3D11_TEXTURE = 0x00000008,
+            EHT_D3D11_TEXTURE_KMT = 0x00000010,
+            EHT_D3D12_HEAP = 0x00000020,
+            EHT_D3D12_RESOURCE = 0x00000040,
+            EHT_HOST_MAPPED_FOREIGN_MEMORY = 0x00000100,
+        };
+
         //
         const ILogicalDevice* getOriginDevice() const {return m_originDevice;}
 
@@ -75,25 +90,25 @@ class IDeviceMemoryAllocation : public virtual core::IReferenceCounted
         E_API_TYPE getAPIType() const;
 
         //! Whether the allocation was made for a specific resource and is supposed to only be bound to that resource.
-        inline bool isDedicated() const {return m_dedicated;}
+        inline bool isDedicated() const {return m_params.dedicated;}
 
         //! Returns the size of the memory allocation
-        inline size_t getAllocationSize() const {return m_allocationSize;}
+        inline size_t getAllocationSize() const {return m_params.allocationSize;}
 
         //!
-        inline core::bitflag<E_MEMORY_ALLOCATE_FLAGS> getAllocateFlags() const { return m_allocateFlags; }
+        inline core::bitflag<E_MEMORY_ALLOCATE_FLAGS> getAllocateFlags() const { return m_params.allocateFlags; }
 
         //!
-        inline core::bitflag<E_MEMORY_PROPERTY_FLAGS> getMemoryPropertyFlags() const { return m_memoryPropertyFlags; }
+        inline core::bitflag<E_MEMORY_PROPERTY_FLAGS> getMemoryPropertyFlags() const { return m_params.memoryPropertyFlags; }
 
         //! Utility function, tells whether the allocation can be mapped (whether mapMemory will ever return anything other than nullptr)
-        inline bool isMappable() const {return m_memoryPropertyFlags.hasFlags(EMPF_HOST_READABLE_BIT)||m_memoryPropertyFlags.hasFlags(EMPF_HOST_WRITABLE_BIT);}
+        inline bool isMappable() const {return m_params.memoryPropertyFlags.hasFlags(EMPF_HOST_READABLE_BIT)|| m_params.memoryPropertyFlags.hasFlags(EMPF_HOST_WRITABLE_BIT);}
         //! Utility function, tell us if writes by the CPU or GPU need extra visibility operations to become visible for reading on the other processor
         /** Only execute flushes or invalidations if the allocation requires them, and batch them (flush one combined range instead of two or more)
         for greater efficiency. To execute a flush or invalidation, use IDriver::flushMappedAllocationRanges and IDriver::invalidateMappedAllocationRanges respectively. */
         inline bool haveToMakeVisible() const
         {
-            return !m_memoryPropertyFlags.hasFlags(EMPF_HOST_COHERENT_BIT);
+            return !m_params.memoryPropertyFlags.hasFlags(EMPF_HOST_COHERENT_BIT);
         }
 
         //!
@@ -106,9 +121,9 @@ class IDeviceMemoryAllocation : public virtual core::IReferenceCounted
         {
             if (isCurrentlyMapped())
                 return nullptr;
-            if(accessHint.hasFlags(EMCAF_READ) && !m_memoryPropertyFlags.hasFlags(EMPF_HOST_READABLE_BIT))
+            if(accessHint.hasFlags(EMCAF_READ) && !m_params.memoryPropertyFlags.hasFlags(EMPF_HOST_READABLE_BIT))
                 return nullptr;
-            if(accessHint.hasFlags(EMCAF_WRITE) && !m_memoryPropertyFlags.hasFlags(EMPF_HOST_WRITABLE_BIT))
+            if(accessHint.hasFlags(EMCAF_WRITE) && !m_params.memoryPropertyFlags.hasFlags(EMPF_HOST_WRITABLE_BIT))
                 return nullptr;
             m_mappedPtr = reinterpret_cast<uint8_t*>(map_impl(range,accessHint));
             if (m_mappedPtr)
@@ -149,23 +164,49 @@ class IDeviceMemoryAllocation : public virtual core::IReferenceCounted
         //! Constant variant of getMappedPointer
         inline const void* getMappedPointer() const { return m_mappedPtr; }
 
+        struct SInfo
+        {
+            uint64_t allocationSize = 0;
+            core::bitflag<IDeviceMemoryAllocation::E_MEMORY_ALLOCATE_FLAGS> allocateFlags = IDeviceMemoryAllocation::EMAF_NONE;
+			// Handle Type for external resources
+			IDeviceMemoryAllocation::E_EXTERNAL_HANDLE_TYPE externalHandleType = IDeviceMemoryAllocation::EHT_NONE;
+			//! Imports the given handle  if externalHandle != nullptr && externalHandleType != EHT_NONE
+			//! Creates exportable memory if externalHandle == nullptr && externalHandleType != EHT_NONE
+            ExternalHandleType externalHandle = 0;
+        };
+
+        struct SCreationParams: SInfo
+        {
+            core::bitflag<E_MEMORY_PROPERTY_FLAGS> memoryPropertyFlags = E_MEMORY_PROPERTY_FLAGS::EMPF_NONE;
+            const bool dedicated = false;
+        };
+        
+        inline const SCreationParams& getCreationParams() const { return m_params; }
+
     protected:
-        inline IDeviceMemoryAllocation(
-            const ILogicalDevice* const originDevice, const size_t _size, const core::bitflag<E_MEMORY_ALLOCATE_FLAGS> allocateFlags, const core::bitflag<E_MEMORY_PROPERTY_FLAGS> memoryPropertyFlags, const bool dedicated
-        ) : m_originDevice(originDevice), m_allocationSize(_size), m_allocateFlags(allocateFlags), m_memoryPropertyFlags(memoryPropertyFlags), m_dedicated(dedicated) {}
+        inline void setPostDestroyCleanup(std::unique_ptr<struct ICleanup>&& cleanup)
+        {
+            m_postDestroyCleanup = std::move(cleanup);
+        }
+
+        IDeviceMemoryAllocation(
+            const ILogicalDevice* originDevice, SCreationParams&& params = {})
+            : m_originDevice(originDevice)
+            , m_params(std::move(params))
+            , m_mappedPtr(nullptr)
+            , m_mappedRange{ 0, 0 }
+            , m_currentMappingAccess(EMCAF_NO_MAPPING_ACCESS)
+        {}
 
         virtual void* map_impl(const MemoryRange& range, const core::bitflag<E_MAPPING_CPU_ACCESS_FLAGS> accessHint) = 0;
         virtual bool unmap_impl() = 0;
 
-
-        const ILogicalDevice* const m_originDevice;
-        const size_t m_allocationSize;
+        const ILogicalDevice* m_originDevice = nullptr;
+        SCreationParams m_params = {};
         uint8_t* m_mappedPtr = nullptr;
         MemoryRange m_mappedRange = {};
         core::bitflag<E_MAPPING_CPU_ACCESS_FLAGS> m_currentMappingAccess = EMCAF_NO_MAPPING_ACCESS;
-        const core::bitflag<E_MEMORY_ALLOCATE_FLAGS> m_allocateFlags;
-        const core::bitflag<E_MEMORY_PROPERTY_FLAGS> m_memoryPropertyFlags;
-        const bool m_dedicated;
+        std::unique_ptr<struct ICleanup> m_postDestroyCleanup = nullptr;
 };
 
 NBL_ENUM_ADD_BITWISE_OPERATORS(IDeviceMemoryAllocation::E_MEMORY_PROPERTY_FLAGS)
diff --git a/include/nbl/video/IDeviceMemoryAllocator.h b/include/nbl/video/IDeviceMemoryAllocator.h
index 0712ec24f6..22ea3c8238 100644
--- a/include/nbl/video/IDeviceMemoryAllocator.h
+++ b/include/nbl/video/IDeviceMemoryAllocator.h
@@ -12,11 +12,9 @@ namespace nbl::video
 class IDeviceMemoryAllocator
 {
 	public:
-		struct SAllocateInfo
+		struct SAllocateInfo: IDeviceMemoryAllocation::SInfo
 		{
-			size_t size : 54 = 0ull;
-			size_t flags : 5 = 0u; // IDeviceMemoryAllocation::E_MEMORY_ALLOCATE_FLAGS
-			size_t memoryTypeIndex : 5 = 0u;
+			uint32_t memoryTypeIndex = 0u;
 			IDeviceMemoryBacked* dedication = nullptr; // if you make the info have a `dedication` the memory will be bound right away, also it will use VK_KHR_dedicated_allocation on vulkan
 			// size_t opaqueCaptureAddress = 0u; Note that this mechanism is intended only to support capture/replay tools, and is not recommended for use in other applications.
 		};
@@ -27,8 +25,15 @@ class IDeviceMemoryAllocator
 		class IMemoryTypeIterator
 		{
 			public:
-				IMemoryTypeIterator(const IDeviceMemoryBacked::SDeviceMemoryRequirements& reqs, core::bitflag<IDeviceMemoryAllocation::E_MEMORY_ALLOCATE_FLAGS> allocateFlags)
-					: m_allocateFlags(static_cast<uint32_t>(allocateFlags.value)), m_reqs(reqs) {}
+				IMemoryTypeIterator(const IDeviceMemoryBacked::SDeviceMemoryRequirements& reqs,
+					core::bitflag<IDeviceMemoryAllocation::E_MEMORY_ALLOCATE_FLAGS> allocateFlags,
+					IDeviceMemoryAllocation::E_EXTERNAL_HANDLE_TYPE handleType,
+					void* handle)
+					: m_allocateFlags(static_cast<uint32_t>(allocateFlags.value))
+					, m_reqs(reqs)
+					, m_handleType(handleType)
+					, m_handle(handle)
+				{}
 
 				static inline uint32_t end() {return 32u;}
 
@@ -40,11 +45,13 @@ class IDeviceMemoryAllocator
 
 				inline SAllocateInfo operator()(IDeviceMemoryBacked* dedication)
 				{
-					SAllocateInfo ret;
-					ret.size = m_reqs.size;
-					ret.flags = m_allocateFlags;
+					SAllocateInfo ret = {};
+					ret.allocationSize = m_reqs.size;
+					ret.allocateFlags = core::bitflag<IDeviceMemoryAllocation::E_MEMORY_ALLOCATE_FLAGS>(m_allocateFlags);
 					ret.memoryTypeIndex = dereference();
 					ret.dedication = dedication;
+					ret.externalHandleType = m_handleType;
+					ret.externalHandle = m_handle;
 					return ret;
 				}
 		
@@ -57,17 +64,24 @@ class IDeviceMemoryAllocator
 		
 				IDeviceMemoryBacked::SDeviceMemoryRequirements m_reqs;
 				uint32_t m_allocateFlags;
+				IDeviceMemoryAllocation::E_EXTERNAL_HANDLE_TYPE m_handleType;
+				void* m_handle;
 		};
 
 		//! DefaultMemoryTypeIterator will iterate through set bits of memoryTypeBits from LSB to MSB
 		class DefaultMemoryTypeIterator : public IMemoryTypeIterator
 		{
 			public:
-				DefaultMemoryTypeIterator(const IDeviceMemoryBacked::SDeviceMemoryRequirements& reqs, core::bitflag<IDeviceMemoryAllocation::E_MEMORY_ALLOCATE_FLAGS> allocateFlags) : IMemoryTypeIterator(reqs, allocateFlags)
+				DefaultMemoryTypeIterator(const IDeviceMemoryBacked::SDeviceMemoryRequirements& reqs,
+					core::bitflag<IDeviceMemoryAllocation::E_MEMORY_ALLOCATE_FLAGS> allocateFlags,
+					IDeviceMemoryAllocation::E_EXTERNAL_HANDLE_TYPE handleType,
+					void* handle)
+					: IMemoryTypeIterator(reqs, allocateFlags, handleType, handle)
 				{
 					currentIndex = hlsl::findLSB(m_reqs.memoryTypeBits);
 				}
 
+
 			protected:
 				uint32_t dereference() const override
 				{
@@ -100,19 +114,26 @@ class IDeviceMemoryAllocator
 		};
 		virtual SAllocation allocate(const SAllocateInfo& info) = 0;
 
-		template<class memory_type_iterator_t=DefaultMemoryTypeIterator>
-		inline SAllocation allocate(
-			const IDeviceMemoryBacked::SDeviceMemoryRequirements& reqs, IDeviceMemoryBacked* dedication=nullptr,
-			const core::bitflag<IDeviceMemoryAllocation::E_MEMORY_ALLOCATE_FLAGS> allocateFlags=IDeviceMemoryAllocation::E_MEMORY_ALLOCATE_FLAGS::EMAF_NONE)
+		template<class memory_type_iterator_t = DefaultMemoryTypeIterator>
+		SAllocation allocate(
+			const IDeviceMemoryBacked::SDeviceMemoryRequirements& reqs,
+			IDeviceMemoryBacked* dedication = nullptr,
+			const core::bitflag<IDeviceMemoryAllocation::E_MEMORY_ALLOCATE_FLAGS> allocateFlags = IDeviceMemoryAllocation::E_MEMORY_ALLOCATE_FLAGS::EMAF_NONE,
+			IDeviceMemoryAllocation::E_EXTERNAL_HANDLE_TYPE handleType = IDeviceMemoryAllocation::EHT_NONE,
+			void* handle = nullptr,
+			std::unique_ptr<struct ICleanup>&& postDestroyCleanup = nullptr)
 		{
-			for(memory_type_iterator_t memTypeIt(reqs, allocateFlags); memTypeIt!=IMemoryTypeIterator::end(); ++memTypeIt)
+			for (memory_type_iterator_t memTypeIt(reqs, allocateFlags, handleType, handle); memTypeIt != IMemoryTypeIterator::end(); ++memTypeIt)
 			{
 				SAllocateInfo allocateInfo = memTypeIt.operator()(dedication);
-				auto allocation = allocate(allocateInfo);
+				SAllocation allocation = allocate(allocateInfo);
 				if (allocation.isValid())
+				{
+					allocation.memory->setPostDestroyCleanup(std::move(postDestroyCleanup));
 					return allocation;
+				}
 			}
-			return {};
+			return { };
 		}
 };
 
diff --git a/include/nbl/video/IDeviceMemoryBacked.h b/include/nbl/video/IDeviceMemoryBacked.h
index f2b449557c..c5c28ad717 100644
--- a/include/nbl/video/IDeviceMemoryBacked.h
+++ b/include/nbl/video/IDeviceMemoryBacked.h
@@ -19,6 +19,15 @@ namespace nbl::video
 struct NBL_API2 ICleanup
 {
     virtual ~ICleanup() = 0;
+
+    std::unique_ptr<ICleanup> next;
+
+    static void chain(std::unique_ptr<ICleanup>& first, std::unique_ptr<ICleanup>&& next)
+    {
+        if (first)
+            return chain(first->next, std::move(next));
+        first = std::move(next);
+    }
 };
 
 //! Interface from which resources backed by IDeviceMemoryAllocation inherit from
@@ -37,6 +46,8 @@ class IDeviceMemoryBacked : public IBackendObject
             // Thus the destructor will skip the call to `vkDestroy` or `glDelete` on the handle, this is only useful for "imported" objects
             bool skipHandleDestroy = false;
 
+            core::bitflag<IDeviceMemoryAllocation::E_EXTERNAL_HANDLE_TYPE> externalHandleTypes = IDeviceMemoryAllocation::EHT_NONE;
+
             //! If you specify queue family indices, then you're concurrent sharing
             inline bool isConcurrentSharing() const
             {
@@ -92,7 +103,12 @@ class IDeviceMemoryBacked : public IBackendObject
         {
             const uint32_t* queueFamilyIndices = nullptr;
         };
-
+        
+        void chainPreDestroyCleanup(std::unique_ptr<ICleanup> first)
+        {
+            ICleanup::chain(m_cachedCreationParams.preDestroyCleanup, std::move(first));
+        }
+        
     protected:
         inline IDeviceMemoryBacked(core::smart_refctd_ptr<const ILogicalDevice>&& originDevice, SCreationParams&& creationParams, const SDeviceMemoryRequirements& reqs)
             : IBackendObject(std::move(originDevice)), m_cachedCreationParams(std::move(creationParams)), m_cachedMemoryReqs(reqs) {}
@@ -107,10 +123,9 @@ class IDeviceMemoryBacked : public IBackendObject
             m_cachedCreationParams.preDestroyCleanup = nullptr;
         }
 
-
         //! members
         SCachedCreationParams m_cachedCreationParams;
-        SDeviceMemoryRequirements m_cachedMemoryReqs;
+        const SDeviceMemoryRequirements m_cachedMemoryReqs;
 };
 
 } // end namespace nbl::video
diff --git a/include/nbl/video/ILogicalDevice.h b/include/nbl/video/ILogicalDevice.h
index 24e1731cab..a102005371 100644
--- a/include/nbl/video/ILogicalDevice.h
+++ b/include/nbl/video/ILogicalDevice.h
@@ -147,7 +147,7 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe
         virtual IQueue::RESULT waitIdle() const = 0;
 
         //! Semaphore Stuff
-        virtual core::smart_refctd_ptr<ISemaphore> createSemaphore(const uint64_t initialValue) = 0;
+        virtual core::smart_refctd_ptr<ISemaphore> createSemaphore(uint64_t initialValue = 0, ISemaphore::SCreationParams&& = {}) = 0;
         virtual ISemaphore::WAIT_RESULT waitForSemaphores(const std::span<const ISemaphore::SWaitInfo> infos, const bool waitAll, const uint64_t timeout) = 0;
         // Forever waiting variant if you're confident that the fence will eventually be signalled
         inline ISemaphore::WAIT_RESULT blockForSemaphores(const std::span<const ISemaphore::SWaitInfo> infos, const bool waitAll=true)
@@ -285,29 +285,14 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe
 
         //! Descriptor Creation
         // Buffer (@see ICPUBuffer)
-        inline core::smart_refctd_ptr<IGPUBuffer> createBuffer(IGPUBuffer::SCreationParams&& creationParams)
-        {
-            const auto maxSize = getPhysicalDeviceLimits().maxBufferSize;
-            if (creationParams.size>maxSize)
-            {
-                m_logger.log("Failed to create Buffer, size %d larger than Device %p's limit!",system::ILogger::ELL_ERROR,creationParams.size,this,maxSize);
-                return nullptr;
-            }
-            return createBuffer_impl(std::move(creationParams));
-        }
+        core::smart_refctd_ptr<IGPUBuffer> createBuffer(IGPUBuffer::SCreationParams&& creationParams);
+
         // Create a BufferView, to a shader; a fake 1D-like texture with no interpolation (@see ICPUBufferView)
         core::smart_refctd_ptr<IGPUBufferView> createBufferView(const asset::SBufferRange<const IGPUBuffer>& underlying, const asset::E_FORMAT _fmt);
+
         // Creates an Image (@see ICPUImage)
-        inline core::smart_refctd_ptr<IGPUImage> createImage(IGPUImage::SCreationParams&& creationParams)
-        {
-            if (!IGPUImage::validateCreationParameters(creationParams))
-            {
-                m_logger.log("Failed to create Image, invalid creation parameters!",system::ILogger::ELL_ERROR);
-                return nullptr;
-            }
-            // TODO: @Cyprian validation of creationParams against the device's limits (sample counts, etc.) see vkCreateImage
-            return createImage_impl(std::move(creationParams));
-        }
+        core::smart_refctd_ptr<IGPUImage> createImage(IGPUImage::SCreationParams&& params);
+
         // Create an ImageView that can actually be used by shaders (@see ICPUImageView)
         inline core::smart_refctd_ptr<IGPUImageView> createImageView(IGPUImageView::SCreationParams&& params)
         {
@@ -765,9 +750,9 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe
         virtual bool bindBufferMemory_impl(const uint32_t count, const SBindBufferMemoryInfo* pInfos) = 0;
         virtual bool bindImageMemory_impl(const uint32_t count, const SBindImageMemoryInfo* pInfos) = 0;
 
-        virtual core::smart_refctd_ptr<IGPUBuffer> createBuffer_impl(IGPUBuffer::SCreationParams&& creationParams) = 0;
+        virtual core::smart_refctd_ptr<IGPUBuffer> createBuffer_impl(IGPUBuffer::SCreationParams&& creationParams, bool dedicatedOnly = false) = 0;
         virtual core::smart_refctd_ptr<IGPUBufferView> createBufferView_impl(const asset::SBufferRange<const IGPUBuffer>& underlying, const asset::E_FORMAT _fmt) = 0;
-        virtual core::smart_refctd_ptr<IGPUImage> createImage_impl(IGPUImage::SCreationParams&& params) = 0;
+        virtual core::smart_refctd_ptr<IGPUImage> createImage_impl(IGPUImage::SCreationParams&& params, bool dedicatedOnly = false) = 0;
         virtual core::smart_refctd_ptr<IGPUImageView> createImageView_impl(IGPUImageView::SCreationParams&& params) = 0;
         virtual core::smart_refctd_ptr<IGPUBottomLevelAccelerationStructure> createBottomLevelAccelerationStructure_impl(IGPUAccelerationStructure::SCreationParams&& params) = 0;
         virtual core::smart_refctd_ptr<IGPUTopLevelAccelerationStructure> createTopLevelAccelerationStructure_impl(IGPUTopLevelAccelerationStructure::SCreationParams&& params) = 0;
diff --git a/include/nbl/video/IPhysicalDevice.h b/include/nbl/video/IPhysicalDevice.h
index 583c8ac9d0..870a435f5e 100644
--- a/include/nbl/video/IPhysicalDevice.h
+++ b/include/nbl/video/IPhysicalDevice.h
@@ -26,8 +26,53 @@
 namespace nbl::video
 {
 
+
+
 class NBL_API2 IPhysicalDevice : public core::Interface, public core::Unmovable
 {
+    template<class F> static constexpr bool is_bitflag = false;
+    template<class F> static constexpr bool is_bitflag<core::bitflag<F>> = true;
+
+    template<class T> struct RequestMapTraits;
+    template<class R, class...Args>struct RequestMapTraits<R(IPhysicalDevice::*)(Args...) const> : RequestMapTraits<R(IPhysicalDevice::*)(Args...)> {};
+    template<class R, class...Args> struct RequestMapTraits<R(IPhysicalDevice::*)(Args...)>
+    {
+        using Key = std::tuple<std::remove_cvref_t<Args>...>;
+        struct Hasher
+        {
+            template<int N = sizeof...(Args)>
+            static size_t hash(size_t seed, Key const& key)
+            {
+                if constexpr (0 == N)
+                    return seed;
+                else 
+                {
+                    using cur = std::remove_cvref_t<decltype(std::get<N - 1>(key))>;
+       
+                    if constexpr (is_bitflag<cur>)
+                        core::hash_combine(seed, cur::UNDERLYING_TYPE(std::get<N - 1>(key).value));
+                    else if constexpr (std::is_convertible_v<cur, size_t>)
+                        core::hash_combine(seed, size_t(std::get<N - 1>(key)));
+                    else
+                        core::hash_combine(seed, std::get<N - 1>(key));
+                    
+                    return hash<N - 1>(seed, key);
+                }
+
+            }
+
+            size_t operator()(Key const& key) const
+            {
+                return hash(0, key);
+            }
+        };
+
+        using Map = std::unordered_map<Key, R, Hasher>;
+    };
+
+    template<class T>
+    using RequestMap = typename RequestMapTraits<T>::Map;
+
     public:
         //
         virtual E_API_TYPE getAPIType() const = 0;
@@ -242,6 +287,7 @@ class NBL_API2 IPhysicalDevice : public core::Interface, public core::Unmovable
 
             !! Same goes for `vkGetPhysicalDeviceSparseImageFormatProperties2`
         */
+
         struct SFormatBufferUsages
         {
             struct SUsage
@@ -687,6 +733,82 @@ class NBL_API2 IPhysicalDevice : public core::Interface, public core::Unmovable
             return createLogicalDevice_impl(std::move(params));
         }
 
+
+        /* ExternalMemoryProperties *//* provided by VK_KHR_external_memory_capabilities */
+        struct SExternalMemoryProperties
+        {
+            uint32_t exportableTypes : 7 = ~0u;
+            uint32_t compatibleTypes : 7 = ~0u;
+            uint32_t dedicatedOnly : 1 = 0u;
+            uint32_t exportable : 1 = ~0u;
+            uint32_t importable : 1 = ~0u;
+
+            bool operator == (SExternalMemoryProperties const& rhs) const = default;
+
+            SExternalMemoryProperties operator &(SExternalMemoryProperties rhs) const
+            {
+                rhs.exportableTypes &= exportableTypes;
+                rhs.compatibleTypes &= compatibleTypes;
+                rhs.dedicatedOnly |= dedicatedOnly;
+                rhs.exportable &= exportable;
+                rhs.importable &= importable;
+                return rhs;
+            }
+        };
+
+        static_assert(sizeof(SExternalMemoryProperties) == sizeof(uint32_t));
+
+        struct SImageFormatProperties
+        {
+            VkExtent3D maxExtent = {};
+            uint32_t maxMipLevels = {};
+            uint32_t maxArrayLayers = {};
+            IGPUImage::E_SAMPLE_COUNT_FLAGS sampleCounts = IGPUImage::ESCF_1_BIT;
+            uint64_t maxResourceSize = 0;
+
+            bool operator == (SImageFormatProperties const& rhs) const = default;
+        };
+
+        struct SExternalImageFormatProperties : SImageFormatProperties, SExternalMemoryProperties
+        {
+        };
+
+        SExternalMemoryProperties getExternalBufferProperties(
+            core::bitflag<IGPUBuffer::E_USAGE_FLAGS> usage, 
+            IDeviceMemoryAllocation::E_EXTERNAL_HANDLE_TYPE handleType) const
+        {
+            usage &= ~asset::IBuffer::EUF_SYNTHEHIC_FLAGS_MASK; // mask out synthetic flags
+            {
+                std::shared_lock lock(m_externalBufferPropertiesMutex);
+                auto it = m_externalBufferProperties.find({ usage, handleType });
+                if (it != m_externalBufferProperties.end())
+                    return it->second;
+            }
+
+            std::unique_lock lock(m_externalBufferPropertiesMutex);
+            return m_externalBufferProperties[{ usage, handleType }] = getExternalBufferProperties_impl(usage, handleType);
+        }
+
+        SExternalImageFormatProperties getExternalImageProperties(
+            asset::E_FORMAT format, 
+            IGPUImage::TILING tiling, 
+            IGPUImage::E_TYPE type,
+            core::bitflag<IGPUImage::E_USAGE_FLAGS> usage, 
+            core::bitflag<IGPUImage::E_CREATE_FLAGS> flags, 
+            IDeviceMemoryAllocation::E_EXTERNAL_HANDLE_TYPE handleType) const
+        {
+            auto key = std::tuple{ format, tiling, type, usage, flags, handleType };
+            {
+                std::shared_lock lock(m_externalImagePropertiesMutex);
+                auto it = m_externalImageProperties.find(key);
+                if (it != m_externalImageProperties.end())
+                    return it->second;
+            }
+
+            std::unique_lock lock(m_externalImagePropertiesMutex);
+            return m_externalImageProperties[key] = getExternalImageProperties_impl(format, tiling, type, usage, flags, handleType);
+        }
+
     protected:
         struct SInitData final
         {
@@ -745,6 +867,24 @@ class NBL_API2 IPhysicalDevice : public core::Interface, public core::Unmovable
                 return 220u; // largest from above
         }
 
+        // external memory 
+        /* ExternalBufferProperties *//* provided by VK_KHR_external_memory_capabilities */
+
+
+        virtual SExternalMemoryProperties getExternalBufferProperties_impl(core::bitflag<asset::IBuffer::E_USAGE_FLAGS> usage, IDeviceMemoryAllocation::E_EXTERNAL_HANDLE_TYPE handleType) const = 0;
+        mutable RequestMap<decltype(&getExternalBufferProperties_impl)> m_externalBufferProperties;
+        mutable std::shared_mutex m_externalBufferPropertiesMutex;
+
+        virtual SExternalImageFormatProperties getExternalImageProperties_impl(
+            asset::E_FORMAT format, 
+            IGPUImage::TILING tiling, 
+            IGPUImage::E_TYPE type,
+            core::bitflag<IGPUImage::E_USAGE_FLAGS> usage, 
+            core::bitflag<IGPUImage::E_CREATE_FLAGS> flags, 
+            IDeviceMemoryAllocation::E_EXTERNAL_HANDLE_TYPE handleType) const = 0;
+        mutable RequestMap<decltype(&getExternalImageProperties_impl)> m_externalImageProperties;
+        mutable std::shared_mutex m_externalImagePropertiesMutex;
+
         // Format Promotion
         struct SBufferFormatPromotionRequestHash
         {
diff --git a/include/nbl/video/IQueue.h b/include/nbl/video/IQueue.h
index 654d95a847..4000fcd2a3 100644
--- a/include/nbl/video/IQueue.h
+++ b/include/nbl/video/IQueue.h
@@ -68,7 +68,7 @@ class IQueue : public core::Interface, public core::Unmovable
         {
             SUCCESS,
             DEVICE_LOST,
-            OTHER_ERROR
+            OTHER_ERROR,
         };
         //
         struct SSubmitInfo
@@ -92,16 +92,23 @@ class IQueue : public core::Interface, public core::Unmovable
             std::span<const SCommandBufferInfo> commandBuffers = {};
             std::span<const SSemaphoreInfo> signalSemaphores = {};
 
-            inline bool valid() const
+            enum Validity
+            {
+                INVALID,
+                VALID,
+                WORK_WITHOUT_SYNC,
+            };
+
+            inline Validity valid() const
             {
                 // any two being empty is wrong
                 if (commandBuffers.empty() && signalSemaphores.empty()) // wait and do nothing
-                    return false;
+                    return INVALID;
                 if (waitSemaphores.empty() && signalSemaphores.empty()) // work without sync
-                    return false;
+                   return WORK_WITHOUT_SYNC;
                 if (waitSemaphores.empty() && commandBuffers.empty()) // signal without doing work first
-                    return false;
-                return true;
+                    return INVALID;
+                return VALID;
             }
         };
         virtual RESULT submit(const std::span<const SSubmitInfo> _submits);
diff --git a/include/nbl/video/ISemaphore.h b/include/nbl/video/ISemaphore.h
index b0e0452850..07506067af 100644
--- a/include/nbl/video/ISemaphore.h
+++ b/include/nbl/video/ISemaphore.h
@@ -44,9 +44,46 @@ class ISemaphore : public IBackendObject
         // Vulkan: const VkSemaphore*
         virtual const void* getNativeHandle() const = 0;
 
+        //! Flags for imported/exported allocation
+        enum E_EXTERNAL_HANDLE_TYPE : uint32_t
+        {
+            EHT_NONE = 0x00000000,
+            EHT_OPAQUE_FD = 0x00000001,
+            EHT_OPAQUE_WIN32 = 0x00000002,
+            EHT_OPAQUE_WIN32_KMT = 0x00000004,
+            EHT_D3D12_FENCE = 0x00000008,
+            EHT_SYNC_FD = 0x00000010,
+        };
+
+        //!
+        struct SCreationParams
+        {
+            // A Pre-Destroy-Step is called out just before a `vkDestory` or `glDelete`, this is only useful for "imported" resources
+            std::unique_ptr<ICleanup> preDestroyCleanup = nullptr;
+            // A Post-Destroy-Step is called in this class' destructor, this is only useful for "imported" resources
+            std::unique_ptr<ICleanup> postDestroyCleanup = nullptr;
+            // Thus the destructor will skip the call to `vkDestroy` or `glDelete` on the handle, this is only useful for "imported" objects
+            bool skipHandleDestroy = false;
+            // Handle Type for external resources
+            core::bitflag<E_EXTERNAL_HANDLE_TYPE> externalHandleTypes = EHT_NONE;
+            //! Imports the given handle  if externalHandle != nullptr && externalMemoryHandleType != EHT_NONE
+            //! Creates exportable memory if externalHandle == nullptr && externalMemoryHandleType != EHT_NONE
+            ExternalHandleType externalHandle = nullptr;
+        };
+
+        auto const& getCreationParams() const
+        {
+            return m_creationParams;
+        }
+
     protected:
-        inline ISemaphore(core::smart_refctd_ptr<const ILogicalDevice>&& dev) : IBackendObject(std::move(dev)) {}
+        ISemaphore(core::smart_refctd_ptr<const ILogicalDevice>&& dev, SCreationParams&& params = {})
+            : IBackendObject(std::move(dev))
+            , m_creationParams(std::move(params))
+        {}
         virtual ~ISemaphore() = default;
+
+        const SCreationParams m_creationParams;
 };
 
 }
diff --git a/include/nbl/video/SPhysicalDeviceLimits.h b/include/nbl/video/SPhysicalDeviceLimits.h
index ebb3e0dcbd..b639f37230 100644
--- a/include/nbl/video/SPhysicalDeviceLimits.h
+++ b/include/nbl/video/SPhysicalDeviceLimits.h
@@ -552,7 +552,6 @@ struct SPhysicalDeviceLimits
     /* CooperativeMatrixPropertiesKHR  *//* VK_KHR_cooperative_matrix */
     core::bitflag<asset::IShader::E_SHADER_STAGE> cooperativeMatrixSupportedStages = asset::IShader::ESS_UNKNOWN;
 
-
     /*  Always enabled if available, reported as limits */
 
     // Core 1.0 Features
diff --git a/include/nbl/video/utilities/IUtilities.h b/include/nbl/video/utilities/IUtilities.h
index 492a1db027..d91fe09107 100644
--- a/include/nbl/video/utilities/IUtilities.h
+++ b/include/nbl/video/utilities/IUtilities.h
@@ -234,7 +234,7 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
         //! WARNING: This function blocks CPU and stalls the GPU!
         inline bool autoSubmitAndBlock(const SIntendedSubmitInfo::SFrontHalf& submit, const std::function<bool(SIntendedSubmitInfo&)>& what)
         {            
-            auto semaphore = m_device->createSemaphore(0);
+            auto semaphore = m_device->createSemaphore();
             // so we begin latching everything on the value of 1, but if we overflow it increases
             IQueue::SSubmitInfo::SSemaphoreInfo info = {semaphore.get(),1};
 
diff --git a/src/nbl/CMakeLists.txt b/src/nbl/CMakeLists.txt
index 96218c67e5..517485d08c 100755
--- a/src/nbl/CMakeLists.txt
+++ b/src/nbl/CMakeLists.txt
@@ -117,7 +117,6 @@ nbl_get_conf_dir(NABLA_CONF_DIR_RELEASE Release)
 nbl_get_conf_dir(NABLA_CONF_DIR_RELWITHDEBINFO RelWithDebInfo)
 
 if (NBL_COMPILE_WITH_CUDA)
-	message(STATUS "Building with CUDA interop")
 	set(_NBL_COMPILE_WITH_CUDA_ ${NBL_COMPILE_WITH_CUDA})
 	if (NBL_BUILD_OPTIX)
 		set(_NBL_BUILD_OPTIX_ ${NBL_BUILD_OPTIX})
@@ -314,6 +313,8 @@ set(NBL_VIDEO_SOURCES
 # CUDA
 	${NBL_ROOT_PATH}/src/nbl/video/CCUDAHandler.cpp
 	${NBL_ROOT_PATH}/src/nbl/video/CCUDADevice.cpp
+	${NBL_ROOT_PATH}/src/nbl/video/CCUDASharedMemory.cpp
+	${NBL_ROOT_PATH}/src/nbl/video/CCUDASharedSemaphore.cpp
 )
 
 set(NBL_SCENE_SOURCES
@@ -378,6 +379,10 @@ endif()
 
 target_compile_definitions(Nabla PRIVATE __NBL_BUILDING_NABLA__)
 
+if (NBL_COMPILE_WITH_CUDA)
+    target_compile_definitions(Nabla PUBLIC _NBL_COMPILE_WITH_CUDA_)
+endif()
+
 if (ANDROID)
 	add_library(android_native_app_glue STATIC
     	${ANDROID_NDK_ROOT_PATH}/sources/android/native_app_glue/android_native_app_glue.c
diff --git a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp
index 4ad0710dbf..6eb93d7242 100644
--- a/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp
+++ b/src/nbl/asset/interchange/CPLYMeshFileLoader.cpp
@@ -96,6 +96,7 @@ void CPLYMeshFileLoader::initialize()
 		auto pipelineBundle = defaultOverride.findCachedAsset(pipelineCacheHash, types, fakeContext, _hierarchyLevel + ICPURenderpassIndependentPipeline::DESC_SET_HIERARCHYLEVELS_BELOW);
 		if (pipelineBundle.getContents().empty())
 		{
+#if 0 // WHAT IS THIS?
 			auto mbVertexShader = core::smart_refctd_ptr<ICPUSpecializedShader>();
 			auto mbFragmentShader = core::smart_refctd_ptr<ICPUSpecializedShader>();
 			{
@@ -108,6 +109,7 @@ void CPLYMeshFileLoader::initialize()
 				mbVertexShader = core::smart_refctd_ptr_static_cast<ICPUSpecializedShader>(vertexShaderBundle->begin()->getContents().begin()[0]);
 				mbFragmentShader = core::smart_refctd_ptr_static_cast<ICPUSpecializedShader>(fragmentShaderBundle->begin()->getContents().begin()[0]);
 			}
+#endif
 
 			auto mbPipelineLayout = defaultOverride.findDefaultAsset<ICPUPipelineLayout>("nbl/builtin/pipeline_layout/loader/PLY", fakeContext, 0u).first;
 
@@ -130,7 +132,7 @@ void CPLYMeshFileLoader::initialize()
 				const auto currentBitmask = core::createBitmask({ attrib });
 				inputParams.enabledBindingFlags |= currentBitmask;
 				inputParams.enabledAttribFlags |= currentBitmask;
-				inputParams.bindings[attrib] = { asset::getTexelOrBlockBytesize(static_cast<E_FORMAT>(vertexAttribParamsAllOptions[attrib].format)), EVIR_PER_VERTEX };
+				inputParams.bindings[attrib] = { asset::getTexelOrBlockBytesize(static_cast<E_FORMAT>(vertexAttribParamsAllOptions[attrib].format)), SVertexInputBindingParams::EVIR_PER_VERTEX};
 				inputParams.attributes[attrib] = vertexAttribParamsAllOptions[attrib];
 			}
 		
@@ -143,14 +145,15 @@ void CPLYMeshFileLoader::initialize()
 
 			SRasterizationParams rastarizationParmas;
 
+#if 0 // WHAT IS THIS?
 			auto mbPipeline = core::make_smart_refctd_ptr<ICPURenderpassIndependentPipeline>(std::move(mbPipelineLayout), nullptr, nullptr, inputParams, blendParams, primitiveAssemblyParams, rastarizationParmas);
 			{
 				mbPipeline->setShaderAtStage(asset::IShader::ESS_VERTEX, mbVertexShader.get());
 				mbPipeline->setShaderAtStage(asset::IShader::ESS_FRAGMENT, mbFragmentShader.get());
-			
 				asset::SAssetBundle newPipelineBundle(nullptr, { core::smart_refctd_ptr<asset::ICPURenderpassIndependentPipeline>(mbPipeline) });
 				defaultOverride.insertAssetIntoCache(newPipelineBundle, pipelineCacheHash, fakeContext, _hierarchyLevel + ICPURenderpassIndependentPipeline::DESC_SET_HIERARCHYLEVELS_BELOW);
 			}
+#endif
 		}
 		else
 			return;
diff --git a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp
index c080857c63..b507153916 100644
--- a/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp
+++ b/src/nbl/asset/interchange/CSTLMeshFileLoader.cpp
@@ -52,6 +52,7 @@ void CSTLMeshFileLoader::initialize()
 		auto pipelineBundle = defaultOverride.findCachedAsset(pipelineCacheHash, types, fakeContext, _hierarchyLevel + ICPURenderpassIndependentPipeline::DESC_SET_HIERARCHYLEVELS_BELOW);
 		if (pipelineBundle.getContents().empty())
 		{
+#if 0 // WHAT IS THIS?
 			auto mbVertexShader = core::smart_refctd_ptr<ICPUSpecializedShader>();
 			auto mbFragmentShader = core::smart_refctd_ptr<ICPUSpecializedShader>();
 			{
@@ -64,7 +65,7 @@ void CSTLMeshFileLoader::initialize()
 				mbVertexShader = core::smart_refctd_ptr_static_cast<ICPUSpecializedShader>(vertexShaderBundle->begin()->getContents().begin()[0]);
 				mbFragmentShader = core::smart_refctd_ptr_static_cast<ICPUSpecializedShader>(fragmentShaderBundle->begin()->getContents().begin()[0]);
 			}
-
+#endif
 			auto defaultOverride = IAssetLoaderOverride(m_assetMgr);
 
 			const IAssetLoader::SAssetLoadContext fakeContext(IAssetLoader::SAssetLoadParams{}, nullptr);
@@ -79,7 +80,7 @@ void CSTLMeshFileLoader::initialize()
 			const auto stride = positionFormatByteSize + colorFormatByteSize + normalFormatByteSize;
 			mbInputParams.enabledBindingFlags |= core::createBitmask({ 0 });
 			mbInputParams.enabledAttribFlags |= core::createBitmask({ POSITION_ATTRIBUTE, NORMAL_ATTRIBUTE, withColorAttribute ? COLOR_ATTRIBUTE : 0 });
-			mbInputParams.bindings[0] = { stride, EVIR_PER_VERTEX };
+			mbInputParams.bindings[0] = { stride, SVertexInputBindingParams::EVIR_PER_VERTEX };
 
 			mbInputParams.attributes[POSITION_ATTRIBUTE].format = EF_R32G32B32_SFLOAT;
 			mbInputParams.attributes[POSITION_ATTRIBUTE].relativeOffset = 0;
@@ -102,14 +103,15 @@ void CSTLMeshFileLoader::initialize()
 
 			SRasterizationParams rastarizationParmas;
 
+#if 0 // WHAT IS THIS?
 			auto mbPipeline = core::make_smart_refctd_ptr<ICPURenderpassIndependentPipeline>(std::move(mbPipelineLayout), nullptr, nullptr, mbInputParams, blendParams, primitiveAssemblyParams, rastarizationParmas);
 			{
 				mbPipeline->setShaderAtStage(asset::IShader::ESS_VERTEX, mbVertexShader.get());
 				mbPipeline->setShaderAtStage(asset::IShader::ESS_FRAGMENT, mbFragmentShader.get());
 			}
-
 			asset::SAssetBundle newPipelineBundle(nullptr, {core::smart_refctd_ptr<asset::ICPURenderpassIndependentPipeline>(mbPipeline)});
 			defaultOverride.insertAssetIntoCache(newPipelineBundle, pipelineCacheHash, fakeContext, _hierarchyLevel + ICPURenderpassIndependentPipeline::DESC_SET_HIERARCHYLEVELS_BELOW);
+#endif
 		}
 		else
 			return;
diff --git a/src/nbl/video/CCUDADevice.cpp b/src/nbl/video/CCUDADevice.cpp
index 4d2e880095..9fbb635f52 100644
--- a/src/nbl/video/CCUDADevice.cpp
+++ b/src/nbl/video/CCUDADevice.cpp
@@ -1,131 +1,159 @@
 // Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O.
 // This file is part of the "Nabla Engine".
 // For conditions of distribution and use, see copyright notice in nabla.h
-#include "nbl/video/CCUDADevice.h"
+#include "nbl/video/CCUDAHandler.h"
 
 #ifdef _NBL_COMPILE_WITH_CUDA_
 namespace nbl::video
 {
 
-CCUDADevice::CCUDADevice(core::smart_refctd_ptr<CVulkanConnection>&& _vulkanConnection, IPhysicalDevice* const _vulkanDevice, const E_VIRTUAL_ARCHITECTURE _virtualArchitecture)
-	: m_defaultCompileOptions(), m_vulkanConnection(std::move(_vulkanConnection)), m_vulkanDevice(_vulkanDevice), m_virtualArchitecture(_virtualArchitecture)
+CCUDADevice::CCUDADevice(core::smart_refctd_ptr<CVulkanConnection>&& _vulkanConnection, IPhysicalDevice* const _vulkanDevice, const E_VIRTUAL_ARCHITECTURE _virtualArchitecture, CUdevice _handle, core::smart_refctd_ptr<CCUDAHandler>&& _handler)
+	: m_defaultCompileOptions(), m_vulkanConnection(std::move(_vulkanConnection)), m_vulkanDevice(_vulkanDevice), m_virtualArchitecture(_virtualArchitecture), m_handle(_handle), m_handler(std::move(_handler)), m_allocationGranularity{}
 {
 	m_defaultCompileOptions.push_back("--std=c++14");
 	m_defaultCompileOptions.push_back(virtualArchCompileOption[m_virtualArchitecture]);
 	m_defaultCompileOptions.push_back("-dc");
 	m_defaultCompileOptions.push_back("-use_fast_math");
-}
+	auto& cu = m_handler->getCUDAFunctionTable();
+	
+	CUresult re = cu.pcuCtxCreate_v2(&m_context, 0, m_handle);
+	assert(CUDA_SUCCESS == re);
+	re = cu.pcuCtxSetCurrent(m_context);
+	assert(CUDA_SUCCESS == re);
+
+	for (uint32_t i = 0; i < ARRAYSIZE(m_allocationGranularity); ++i)
+	{
+		uint32_t metaData[16] = { 48 };
+		CUmemAllocationProp prop = {
+			.type = CU_MEM_ALLOCATION_TYPE_PINNED,
+			.requestedHandleTypes = ALLOCATION_HANDLE_TYPE,
+			.location = {.type = static_cast<CUmemLocationType>(i), .id = m_handle },
+			.win32HandleMetaData = metaData,
+		};
+		auto re = cu.pcuMemGetAllocationGranularity(&m_allocationGranularity[i], &prop, CU_MEM_ALLOC_GRANULARITY_MINIMUM);
+
+		assert(CUDA_SUCCESS == re);
+	}
 
+}
 
-#if 0
-CUresult CCUDAHandler::registerBuffer(GraphicsAPIObjLink<video::IGPUBuffer>* link, uint32_t flags)
+CCUDADevice::~CCUDADevice()
 {
-	assert(link->obj);
-	auto glbuf = static_cast<video::COpenGLBuffer*>(link->obj.get());
-	auto retval = cuda.pcuGraphicsGLRegisterBuffer(&link->cudaHandle,glbuf->getOpenGLName(),flags);
-	if (retval!=CUDA_SUCCESS)
-		link->obj = nullptr;
-	return retval;
+	m_handler->getCUDAFunctionTable().pcuCtxDestroy_v2(m_context);
 }
-CUresult CCUDAHandler::registerImage(GraphicsAPIObjLink<video::IGPUImage>* link, uint32_t flags)
+
+size_t CCUDADevice::roundToGranularity(CUmemLocationType location, size_t size) const
 {
-	assert(link->obj);
-			
-	auto format = link->obj->getCreationParameters().format;
-	if (asset::isBlockCompressionFormat(format) || asset::isDepthOrStencilFormat(format) || asset::isScaledFormat(format) || asset::isPlanarFormat(format))
-		return CUDA_ERROR_INVALID_IMAGE;
-
-	auto glimg = static_cast<video::COpenGLImage*>(link->obj.get());
-	GLenum target = glimg->getOpenGLTarget();
-	switch (target)
-	{
-		case GL_TEXTURE_2D:
-		case GL_TEXTURE_2D_ARRAY:
-		case GL_TEXTURE_CUBE_MAP:
-		case GL_TEXTURE_3D:
-			break;
-		default:
-			return CUDA_ERROR_INVALID_IMAGE;
-			break;
-	}
-	auto retval = cuda.pcuGraphicsGLRegisterImage(&link->cudaHandle,glimg->getOpenGLName(),target,flags);
-	if (retval != CUDA_SUCCESS)
-		link->obj = nullptr;
-	return retval;
+	return ((size - 1) / m_allocationGranularity[location] + 1) * m_allocationGranularity[location];
 }
 
-
-constexpr auto MaxAquireOps = 4096u;
-
-CUresult CCUDAHandler::acquireAndGetPointers(GraphicsAPIObjLink<video::IGPUBuffer>* linksBegin, GraphicsAPIObjLink<video::IGPUBuffer>* linksEnd, CUstream stream, size_t* outbufferSizes)
+CUresult CCUDADevice::reserveAdrressAndMapMemory(CUdeviceptr* outPtr, size_t size, size_t alignment, CUmemLocationType location, CUmemGenericAllocationHandle memory)
 {
-	if (linksBegin+MaxAquireOps<linksEnd)
-		return CUDA_ERROR_OUT_OF_MEMORY;
-	alignas(_NBL_SIMD_ALIGNMENT) uint8_t stackScratch[MaxAquireOps*sizeof(void*)];
-
-	CUresult result = acquireResourcesFromGraphics(stackScratch,linksBegin,linksEnd,stream);
-	if (result != CUDA_SUCCESS)
-		return result;
+	auto& cu = m_handler->getCUDAFunctionTable();
+	
+	CUdeviceptr ptr = 0;
+	if (auto err = cu.pcuMemAddressReserve(&ptr, size, alignment, 0, 0); CUDA_SUCCESS != err)
+		return err;
 
-	size_t tmp = 0xdeadbeefbadc0ffeull;
-	size_t* sit = outbufferSizes;
-	for (auto iit=linksBegin; iit!=linksEnd; iit++,sit++)
+	if (auto err = cu.pcuMemMap(ptr, size, 0, memory, 0); CUDA_SUCCESS != err)
 	{
-		if (!iit->acquired)
-			return CUDA_ERROR_UNKNOWN;
+		cu.pcuMemAddressFree(ptr, size);
+		return err;
+	}
+	
+	CUmemAccessDesc accessDesc = {
+		.location = { .type = location, .id = m_handle },
+		.flags = CU_MEM_ACCESS_FLAGS_PROT_READWRITE,
+	};
 
-		result = cuda::CCUDAHandler::cuda.pcuGraphicsResourceGetMappedPointer_v2(&iit->asBuffer.pointer,outbufferSizes ? sit:&tmp,iit->cudaHandle);
-		if (result != CUDA_SUCCESS)
-			return result;
+	if (auto err = cu.pcuMemSetAccess(ptr, size, &accessDesc, 1); CUDA_SUCCESS != err)
+	{
+		cu.pcuMemUnmap(ptr, size);
+		cu.pcuMemAddressFree(ptr, size);
+		return err;
 	}
+
+	*outPtr = ptr;
+
 	return CUDA_SUCCESS;
 }
-CUresult CCUDAHandler::acquireAndGetMipmappedArray(GraphicsAPIObjLink<video::IGPUImage>* linksBegin, GraphicsAPIObjLink<video::IGPUImage>* linksEnd, CUstream stream)
+
+CUresult CCUDADevice::createSharedMemory(
+	core::smart_refctd_ptr<CCUDASharedMemory>* outMem, 
+	CCUDASharedMemory::SCreationParams&& inParams)
 {
-	if (linksBegin+MaxAquireOps<linksEnd)
-		return CUDA_ERROR_OUT_OF_MEMORY;
-	alignas(_NBL_SIMD_ALIGNMENT) uint8_t stackScratch[MaxAquireOps*sizeof(void*)];
+	if (!outMem)
+		return CUDA_ERROR_INVALID_VALUE;
 
-	CUresult result = acquireResourcesFromGraphics(stackScratch,linksBegin,linksEnd,stream);
-	if (result != CUDA_SUCCESS)
-		return result;
+	CCUDASharedMemory::SCachedCreationParams params = { inParams };
 
-	for (auto iit=linksBegin; iit!=linksEnd; iit++)
-	{
-		if (!iit->acquired)
-			return CUDA_ERROR_UNKNOWN;
+	auto& cu = m_handler->getCUDAFunctionTable();
 
-		result = cuda::CCUDAHandler::cuda.pcuGraphicsResourceGetMappedMipmappedArray(&iit->asImage.mipmappedArray,iit->cudaHandle);
-		if (result != CUDA_SUCCESS)
-			return result;
-	}
-	return CUDA_SUCCESS;
-}
-CUresult CCUDAHandler::acquireAndGetArray(GraphicsAPIObjLink<video::IGPUImage>* linksBegin, GraphicsAPIObjLink<video::IGPUImage>* linksEnd, uint32_t* arrayIndices, uint32_t* mipLevels, CUstream stream)
-{
-	if (linksBegin+MaxAquireOps<linksEnd)
-		return CUDA_ERROR_OUT_OF_MEMORY;
-	alignas(_NBL_SIMD_ALIGNMENT) uint8_t stackScratch[MaxAquireOps*sizeof(void*)];
+	uint32_t metaData[16] = { 48 };
 
-	CUresult result = acquireResourcesFromGraphics(stackScratch,linksBegin,linksEnd,stream);
-	if (result != CUDA_SUCCESS)
-		return result;
+	CUmemAllocationProp prop = {
+		.type = CU_MEM_ALLOCATION_TYPE_PINNED,
+		.requestedHandleTypes = ALLOCATION_HANDLE_TYPE,
+		.location = { .type = params.location, .id = m_handle },
+		.win32HandleMetaData = metaData,
+	};
+	
+	params.granularSize = roundToGranularity(params.location, params.size);
 
-	auto ait = arrayIndices;
-	auto mit = mipLevels;
-	for (auto iit=linksBegin; iit!=linksEnd; iit++,ait++,mit++)
+	CUmemGenericAllocationHandle mem;
+	if(auto err = cu.pcuMemCreate(&mem, params.granularSize, &prop, 0); CUDA_SUCCESS != err)
+		return err;
+	
+	if (auto err = cu.pcuMemExportToShareableHandle(&params.osHandle, mem, prop.requestedHandleTypes, 0); CUDA_SUCCESS != err)
 	{
-		if (!iit->acquired)
-			return CUDA_ERROR_UNKNOWN;
+		cu.pcuMemRelease(mem);
+		return err;
+	}
 
-		result = cuda::CCUDAHandler::cuda.pcuGraphicsSubResourceGetMappedArray(&iit->asImage.array,iit->cudaHandle,*ait,*mit);
-		if (result != CUDA_SUCCESS)
-			return result;
+	if (auto err = reserveAdrressAndMapMemory(&params.ptr, params.granularSize, params.alignment, params.location, mem); CUDA_SUCCESS != err)
+	{
+		CloseHandle(params.osHandle);
+		cu.pcuMemRelease(mem);
+		return err;
 	}
+
+	if (auto err = cu.pcuMemRelease(mem); CUDA_SUCCESS != err)
+	{
+		CloseHandle(params.osHandle);
+		return err;
+	}
+	
+	*outMem = core::smart_refctd_ptr<CCUDASharedMemory>(new CCUDASharedMemory(core::smart_refctd_ptr<CCUDADevice>(this), std::move(params)), core::dont_grab);
+
 	return CUDA_SUCCESS;
 }
-#endif
+
+CUresult CCUDADevice::importGPUSemaphore(core::smart_refctd_ptr<CCUDASharedSemaphore>* outPtr, ISemaphore* sema)
+{
+	if (!sema || !outPtr)
+		return CUDA_ERROR_INVALID_VALUE;
+
+	auto& cu = m_handler->getCUDAFunctionTable();
+	auto handleType = sema->getCreationParams().externalHandleTypes;
+	auto handle = sema->getCreationParams().externalHandle;
+
+	if (!handleType.hasFlags(ISemaphore::EHT_OPAQUE_WIN32) || !handle)
+		return CUDA_ERROR_INVALID_VALUE;
+    
+	CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC desc = {
+		.type = CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32,
+		.handle = {.win32 = {.handle = handle }},
+	};
+
+	CUexternalSemaphore cusema;
+	if (auto err = cu.pcuImportExternalSemaphore(&cusema, &desc); CUDA_SUCCESS != err)
+		return err;
+	
+	*outPtr = core::smart_refctd_ptr<CCUDASharedSemaphore>(new CCUDASharedSemaphore(core::smart_refctd_ptr<CCUDADevice>(this), core::smart_refctd_ptr<ISemaphore>(sema), cusema, handle), core::dont_grab);
+	return CUDA_SUCCESS;
+}
+
 
 }
 
-#endif // _NBL_COMPILE_WITH_CUDA_
+#endif // _NBL_COMPILE_WITH_CUDA_
\ No newline at end of file
diff --git a/src/nbl/video/CCUDAHandler.cpp b/src/nbl/video/CCUDAHandler.cpp
index 183afe6b43..2789bed2a6 100644
--- a/src/nbl/video/CCUDAHandler.cpp
+++ b/src/nbl/video/CCUDAHandler.cpp
@@ -3,6 +3,7 @@
 // For conditions of distribution and use, see copyright notice in nabla.h
 
 #include "nbl/video/CCUDAHandler.h"
+#include "nbl/system/CFileView.h"
 
 #ifdef _NBL_COMPILE_WITH_CUDA_
 #include "jitify/jitify.hpp"
@@ -10,7 +11,49 @@
 
 namespace nbl::video
 {
-	
+
+CCUDAHandler::CCUDAHandler(
+	CUDA&& _cuda, 
+	NVRTC&& _nvrtc, 
+	core::vector<core::smart_refctd_ptr<system::IFile>>&& _headers, 
+	core::smart_refctd_ptr<system::ILogger>&& _logger,
+	int _version)
+	: m_cuda(std::move(_cuda))
+	, m_nvrtc(std::move(_nvrtc))
+	, m_headers(std::move(_headers))
+	, m_logger(std::move(_logger))
+	, m_version(_version)
+{
+	for (auto& header : m_headers)
+	{
+		m_headerContents.push_back(reinterpret_cast<const char*>(header->getMappedPointer()));
+		m_headerNamesStorage.push_back(header->getFileName().string());
+		m_headerNames.push_back(m_headerNamesStorage.back().c_str());
+	}
+
+	int deviceCount = 0;
+	if (m_cuda.pcuDeviceGetCount(&deviceCount) != CUDA_SUCCESS || deviceCount <= 0)
+		return;
+
+	for (int ordinal = 0; ordinal < deviceCount; ordinal++)
+	{
+		CUdevice handle = -1;
+		if (m_cuda.pcuDeviceGet(&handle, ordinal) != CUDA_SUCCESS || handle < 0)
+			continue;
+
+		CUuuid uuid = {};
+		if (m_cuda.pcuDeviceGetUuid(&uuid, handle) != CUDA_SUCCESS)
+			continue;
+
+		m_availableDevices.emplace_back(handle, uuid);
+
+		int* attributes = m_availableDevices.back().attributes;
+		for (int i = 0; i < CU_DEVICE_ATTRIBUTE_MAX; i++)
+			m_cuda.pcuDeviceGetAttribute(attributes + i, static_cast<CUdevice_attribute>(i), handle);
+
+	}
+}
+
 bool CCUDAHandler::defaultHandleResult(CUresult result, const system::logger_opt_ptr& logger)
 {
 	switch (result)
@@ -410,7 +453,7 @@ core::smart_refctd_ptr<CCUDAHandler> CCUDAHandler::create(system::ISystem* syste
 	NVRTC nvrtc = {};
 	#if defined(_NBL_WINDOWS_API_)
 	// Perpetual TODO: any new CUDA releases we need to account for?
-	const char* nvrtc64_versions[] = { "nvrtc64_111","nvrtc64_110","nvrtc64_102","nvrtc64_101","nvrtc64_100","nvrtc64_92","nvrtc64_91","nvrtc64_90","nvrtc64_80","nvrtc64_75","nvrtc64_70",nullptr };
+	const char* nvrtc64_versions[] = { "nvrtc64_120", "nvrtc64_111","nvrtc64_110","nvrtc64_102","nvrtc64_101","nvrtc64_100","nvrtc64_92","nvrtc64_91","nvrtc64_90","nvrtc64_80","nvrtc64_75","nvrtc64_70",nullptr };
 	const char* nvrtc64_suffices[] = {"","_","_0","_1","_2",nullptr};
 	for (auto verpath=nvrtc64_versions; *verpath; verpath++)
 	{
@@ -468,7 +511,7 @@ core::smart_refctd_ptr<CCUDAHandler> CCUDAHandler::create(system::ISystem* syste
 	{
 		const void* contents = it.second.data();
 		headers.push_back(core::make_smart_refctd_ptr<system::CFileView<system::CNullAllocator>>(
-			core::smart_refctd_ptr<system::ISystem>(system),it.first.c_str(),
+			it.first.c_str(),
 			core::bitflag(system::IFile::ECF_READ)|system::IFile::ECF_MAPPABLE,
 			const_cast<void*>(contents),it.second.size()+1u
 		));
@@ -514,7 +557,8 @@ CCUDAHandler::ptx_and_nvrtcResult_t CCUDAHandler::getPTX(nvrtcProgram prog)
 		return {nullptr,NVRTC_ERROR_INVALID_INPUT};
 
 	auto ptx = core::make_smart_refctd_ptr<asset::ICPUBuffer>(_size);
-	return {std::move(ptx),m_nvrtc.pnvrtcGetPTX(prog,reinterpret_cast<char*>(ptx->getPointer()))};
+	nvrtcResult result = m_nvrtc.pnvrtcGetPTX(prog, reinterpret_cast<char*>(ptx->getPointer()));
+	return {std::move(ptx),result};
 }
 
 core::smart_refctd_ptr<CCUDADevice> CCUDAHandler::createDevice(core::smart_refctd_ptr<CVulkanConnection>&& vulkanConnection, IPhysicalDevice* physicalDevice)
@@ -525,112 +569,98 @@ core::smart_refctd_ptr<CCUDADevice> CCUDAHandler::createDevice(core::smart_refct
 	if (std::find(devices.begin(),devices.end(),physicalDevice)==devices.end())
 		return nullptr;
 
-    int deviceCount = 0;
-    if (m_cuda.pcuDeviceGetCount(&deviceCount)!=CUDA_SUCCESS || deviceCount<=0)
-		return nullptr;
-
-    for (int ordinal=0; ordinal<deviceCount; ordinal++)
+   
+	for (auto& device : m_availableDevices)
 	{
-		CUdevice handle = -1;
-		if (m_cuda.pcuDeviceGet(&handle,ordinal)!=CUDA_SUCCESS || handle<0)
-			continue;
-
-		CUuuid uuid = {};
-		if (m_cuda.pcuDeviceGetUuid(&uuid,handle)!=CUDA_SUCCESS)
-			continue;
-        if (!memcmp(&uuid,&physicalDevice->getLimits().deviceUUID,VK_UUID_SIZE))
+		if (!memcmp(&device.uuid, &physicalDevice->getProperties().deviceUUID, VK_UUID_SIZE))
 		{
-			int attributes[CU_DEVICE_ATTRIBUTE_MAX] = {};
-			for (int i=0; i<CU_DEVICE_ATTRIBUTE_MAX; i++)
-				m_cuda.pcuDeviceGetAttribute(attributes+i,static_cast<CUdevice_attribute>(i),handle);
-
 			CCUDADevice::E_VIRTUAL_ARCHITECTURE arch = CCUDADevice::EVA_COUNT;
-			const int& archMajor = attributes[CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR];
-			const int& archMinor = attributes[CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR];
+			const int& archMajor = device.attributes[CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR];
+			const int& archMinor = device.attributes[CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR];
 			switch (archMajor)
 			{
-				case 3:
-					switch (archMinor)
-					{
-						case 0:
-							arch = CCUDADevice::EVA_30;
-							break;
-						case 2:
-							arch = CCUDADevice::EVA_32;
-							break;
-						case 5:
-							arch = CCUDADevice::EVA_35;
-							break;
-						case 7:
-							arch = CCUDADevice::EVA_37;
-							break;
-						default:
-							break;
-					}
+			case 3:
+				switch (archMinor)
+				{
+				case 0:
+					arch = CCUDADevice::EVA_30;
 					break;
-				case 5:
-					switch (archMinor)
-					{
-						case 0:
-							arch = CCUDADevice::EVA_50;
-							break;
-						case 2:
-							arch = CCUDADevice::EVA_52;
-							break;
-						case 3:
-							arch = CCUDADevice::EVA_53;
-							break;
-						default:
-							break;
-					}
+				case 2:
+					arch = CCUDADevice::EVA_32;
 					break;
-				case 6:
-					switch (archMinor)
-					{
-						case 0:
-							arch = CCUDADevice::EVA_60;
-							break;
-						case 1:
-							arch = CCUDADevice::EVA_61;
-							break;
-						case 2:
-							arch = CCUDADevice::EVA_62;
-							break;
-						default:
-							break;
-					}
+				case 5:
+					arch = CCUDADevice::EVA_35;
 					break;
 				case 7:
-					switch (archMinor)
-					{
-						case 0:
-							arch = CCUDADevice::EVA_70;
-							break;
-						case 2:
-							arch = CCUDADevice::EVA_72;
-							break;
-						case 5:
-							arch = CCUDADevice::EVA_75;
-							break;
-						default:
-							break;
-					}
+					arch = CCUDADevice::EVA_37;
+					break;
+				default:
+					break;
+				}
+				break;
+			case 5:
+				switch (archMinor)
+				{
+				case 0:
+					arch = CCUDADevice::EVA_50;
+					break;
+				case 2:
+					arch = CCUDADevice::EVA_52;
+					break;
+				case 3:
+					arch = CCUDADevice::EVA_53;
 					break;
 				default:
-					if (archMajor>=8)
-						arch = CCUDADevice::EVA_80;
 					break;
+				}
+				break;
+			case 6:
+				switch (archMinor)
+				{
+				case 0:
+					arch = CCUDADevice::EVA_60;
+					break;
+				case 1:
+					arch = CCUDADevice::EVA_61;
+					break;
+				case 2:
+					arch = CCUDADevice::EVA_62;
+					break;
+				default:
+					break;
+				}
+				break;
+			case 7:
+				switch (archMinor)
+				{
+				case 0:
+					arch = CCUDADevice::EVA_70;
+					break;
+				case 2:
+					arch = CCUDADevice::EVA_72;
+					break;
+				case 5:
+					arch = CCUDADevice::EVA_75;
+					break;
+				default:
+					break;
+				}
+				break;
+			default:
+				if (archMajor >= 8)
+					arch = CCUDADevice::EVA_80;
+				break;
 			}
-			if (arch==CCUDADevice::EVA_COUNT)
+			if (arch == CCUDADevice::EVA_COUNT)
 				continue;
 
-			auto device = new CCUDADevice(std::move(vulkanConnection),physicalDevice,arch);
-            return core::smart_refctd_ptr<CCUDADevice>(device,core::dont_grab);
-        }
-    }
+			return core::smart_refctd_ptr<CCUDADevice>(new CCUDADevice(std::move(vulkanConnection), physicalDevice, arch, device.handle, core::smart_refctd_ptr<CCUDAHandler>(this)), core::dont_grab);
+		}
+	}
+	
 	return nullptr;
 }
 
 }
 
-#endif // _NBL_COMPILE_WITH_CUDA_
+#endif // _NBL_COMPILE_WITH_CUDA_
\ No newline at end of file
diff --git a/src/nbl/video/CCUDASharedMemory.cpp b/src/nbl/video/CCUDASharedMemory.cpp
new file mode 100644
index 0000000000..a5b8011920
--- /dev/null
+++ b/src/nbl/video/CCUDASharedMemory.cpp
@@ -0,0 +1,65 @@
+// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O.
+// This file is part of the "Nabla Engine".
+// For conditions of distribution and use, see copyright notice in nabla.h
+
+#include "nbl/video/CCUDADevice.h"
+
+#ifdef _NBL_COMPILE_WITH_CUDA_
+namespace nbl::video
+{
+
+core::smart_refctd_ptr<IDeviceMemoryAllocation> CCUDASharedMemory::exportAsMemory(ILogicalDevice* device, IDeviceMemoryBacked* dedication) const
+{
+	auto pd = device->getPhysicalDevice();
+	uint32_t memoryTypeBits = (1 << pd->getMemoryProperties().memoryTypeCount) - 1;
+	uint32_t vram = pd->getDeviceLocalMemoryTypeBits();
+
+	switch (m_params.location)
+	{
+	case CU_MEM_LOCATION_TYPE_HOST:   memoryTypeBits &= ~vram; break;
+	case CU_MEM_LOCATION_TYPE_DEVICE: memoryTypeBits &=  vram; break;
+		// TODO(Atil): Figure out how to handle these
+	case CU_MEM_LOCATION_TYPE_HOST_NUMA:
+	case CU_MEM_LOCATION_TYPE_HOST_NUMA_CURRENT:
+	default: break;
+	}
+
+	IDeviceMemoryBacked::SDeviceMemoryRequirements req = {};
+	req.size = m_params.granularSize;
+	req.memoryTypeBits = memoryTypeBits;
+	req.prefersDedicatedAllocation  = nullptr != dedication;
+	req.requiresDedicatedAllocation = nullptr != dedication;
+
+	return device->allocate(req, 
+		dedication, 
+		IDeviceMemoryAllocation::E_MEMORY_ALLOCATE_FLAGS::EMAF_NONE, 
+		CCUDADevice::EXTERNAL_MEMORY_HANDLE_TYPE, m_params.osHandle, 
+		std::make_unique<CCUDADevice::SCUDACleaner>(core::smart_refctd_ptr<const CCUDASharedMemory>(this))).memory;
+}
+
+core::smart_refctd_ptr<IGPUImage>  CCUDASharedMemory::createAndBindImage(ILogicalDevice* device, asset::IImage::SCreationParams&& params) const
+{
+	if (!device || !m_device->isMatchingDevice(device->getPhysicalDevice()))
+		return nullptr;
+
+	auto img = device->createImage({ std::move(params), { {.externalHandleTypes = CCUDADevice::EXTERNAL_MEMORY_HANDLE_TYPE } }, IGPUImage::TILING::LINEAR });
+	
+	if (exportAsMemory(device, img.get()))
+		return img;
+	
+	return nullptr;
+}
+
+CCUDASharedMemory::~CCUDASharedMemory()
+{
+	auto& cu = m_device->getHandler()->getCUDAFunctionTable();
+
+	CUresult re[] = {
+		cu.pcuMemUnmap(m_params.ptr, m_params.granularSize),
+	};
+	CloseHandle(m_params.osHandle);
+
+}
+}
+
+#endif // _NBL_COMPILE_WITH_CUDA_
\ No newline at end of file
diff --git a/src/nbl/video/CCUDASharedSemaphore.cpp b/src/nbl/video/CCUDASharedSemaphore.cpp
new file mode 100644
index 0000000000..4d6d3aacc9
--- /dev/null
+++ b/src/nbl/video/CCUDASharedSemaphore.cpp
@@ -0,0 +1,18 @@
+// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O.
+// This file is part of the "Nabla Engine".
+// For conditions of distribution and use, see copyright notice in nabla.h
+
+#include "nbl/video/CCUDADevice.h"
+
+#ifdef _NBL_COMPILE_WITH_CUDA_
+namespace nbl::video
+{
+CCUDASharedSemaphore::~CCUDASharedSemaphore()
+{
+	auto& cu = m_device->getHandler()->getCUDAFunctionTable();
+	cu.pcuDestroyExternalSemaphore(m_handle);
+	CloseHandle(m_osHandle);
+}
+}
+
+#endif // _NBL_COMPILE_WITH_CUDA_
\ No newline at end of file
diff --git a/src/nbl/video/CVulkanBuffer.cpp b/src/nbl/video/CVulkanBuffer.cpp
index c24e13ceb8..2e542944d3 100644
--- a/src/nbl/video/CVulkanBuffer.cpp
+++ b/src/nbl/video/CVulkanBuffer.cpp
@@ -8,7 +8,7 @@ namespace nbl::video
 CVulkanBuffer::~CVulkanBuffer()
 {
 	preDestroyStep();
-	if (m_cachedCreationParams.skipHandleDestroy)
+	if (!m_cachedCreationParams.skipHandleDestroy)
 	{
 		const CVulkanLogicalDevice* vulkanDevice = static_cast<const CVulkanLogicalDevice*>(getOriginDevice());
 		auto* vk = vulkanDevice->getFunctionTable();
diff --git a/src/nbl/video/CVulkanBuffer.h b/src/nbl/video/CVulkanBuffer.h
index 4596981c2a..988d50c2ec 100644
--- a/src/nbl/video/CVulkanBuffer.h
+++ b/src/nbl/video/CVulkanBuffer.h
@@ -16,7 +16,7 @@ class CVulkanBuffer : public CVulkanDeviceMemoryBacked<IGPUBuffer>
        using base_t = CVulkanDeviceMemoryBacked<IGPUBuffer>;
 
     public:
-        inline CVulkanBuffer(const CVulkanLogicalDevice* dev, IGPUBuffer::SCreationParams&& creationParams, const VkBuffer buffer) : base_t(dev,std::move(creationParams),buffer) {}
+        inline CVulkanBuffer(const CVulkanLogicalDevice* dev, IGPUBuffer::SCreationParams&& creationParams, bool dedicatedOnly, const VkBuffer buffer) : base_t(dev,std::move(creationParams), dedicatedOnly, buffer) {}
     
         void setObjectDebugName(const char* label) const override;
 
diff --git a/src/nbl/video/CVulkanCommandBuffer.cpp b/src/nbl/video/CVulkanCommandBuffer.cpp
index 2b1f9d9070..64ec5f68c0 100644
--- a/src/nbl/video/CVulkanCommandBuffer.cpp
+++ b/src/nbl/video/CVulkanCommandBuffer.cpp
@@ -48,25 +48,41 @@ void fill(vk_barrier_t& out, const ResourceBarrier& in, uint32_t selfQueueFamily
     // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkBufferMemoryBarrier2-buffer-04088
     if (concurrentSharing)
         selfQueueFamilyIndex = IQueue::FamilyIgnored;
+
+    auto mapQFIdx = [](uint32_t idx)
+    {
+        switch (idx)
+        {
+        case IQueue::FamilyExternal:
+        case IQueue::FamilyIgnored:
+        case IQueue::FamilyForeign:
+            idx |= 1u << 31;
+            break;
+        }
+        return idx;
+    };
+
     if constexpr (!std::is_same_v<vk_barrier_t,VkMemoryBarrier2>)
     {
-        out.srcQueueFamilyIndex = selfQueueFamilyIndex;
-        out.dstQueueFamilyIndex = selfQueueFamilyIndex;
+        out.srcQueueFamilyIndex = mapQFIdx(selfQueueFamilyIndex);
+        out.dstQueueFamilyIndex = mapQFIdx(selfQueueFamilyIndex);
     }
     const asset::SMemoryBarrier* memoryBarrier;
     if constexpr (std::is_same_v<IGPUCommandBuffer::SOwnershipTransferBarrier,ResourceBarrier>)
     {
         memoryBarrier = &in.dep;
         // in.otherQueueFamilyIndex==selfQueueFamilyIndex not resulting in ownership transfer is implicit
-        if (!concurrentSharing && in.otherQueueFamilyIndex!=IQueue::FamilyIgnored)
-        switch (in.ownershipOp)
+        if (!concurrentSharing && in.otherQueueFamilyIndex != IQueue::FamilyIgnored)
         {
+            switch (in.ownershipOp)
+            {
             case IGPUCommandBuffer::SOwnershipTransferBarrier::OWNERSHIP_OP::RELEASE:
-                out.dstQueueFamilyIndex = in.otherQueueFamilyIndex;
+                out.dstQueueFamilyIndex = mapQFIdx(in.otherQueueFamilyIndex);
                 break;
             case IGPUCommandBuffer::SOwnershipTransferBarrier::OWNERSHIP_OP::ACQUIRE:
-                out.srcQueueFamilyIndex = in.otherQueueFamilyIndex;
+                out.srcQueueFamilyIndex = mapQFIdx(in.otherQueueFamilyIndex);
                 break;
+            }
         }
     }
     else
diff --git a/src/nbl/video/CVulkanDeviceMemoryBacked.cpp b/src/nbl/video/CVulkanDeviceMemoryBacked.cpp
index 2bec9e9d06..8f08f9aa67 100644
--- a/src/nbl/video/CVulkanDeviceMemoryBacked.cpp
+++ b/src/nbl/video/CVulkanDeviceMemoryBacked.cpp
@@ -6,7 +6,7 @@ namespace nbl::video
 {
 
 template<class Interface>
-IDeviceMemoryBacked::SDeviceMemoryRequirements CVulkanDeviceMemoryBacked<Interface>::obtainRequirements(const CVulkanLogicalDevice* device, const VkResource_t vkHandle)
+IDeviceMemoryBacked::SDeviceMemoryRequirements CVulkanDeviceMemoryBacked<Interface>::obtainRequirements(const CVulkanLogicalDevice* device, bool dedicatedOnly, const VkResource_t vkHandle)
 {    
     const std::conditional_t<IsImage,VkImageMemoryRequirementsInfo2,VkBufferMemoryRequirementsInfo2> vk_memoryRequirementsInfo = {
         IsImage ? VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2:VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,nullptr,vkHandle
@@ -24,8 +24,8 @@ IDeviceMemoryBacked::SDeviceMemoryRequirements CVulkanDeviceMemoryBacked<Interfa
     memoryReqs.size = vk_memoryRequirements.memoryRequirements.size;
     memoryReqs.memoryTypeBits = vk_memoryRequirements.memoryRequirements.memoryTypeBits;
     memoryReqs.alignmentLog2 = std::log2(vk_memoryRequirements.memoryRequirements.alignment);
-    memoryReqs.prefersDedicatedAllocation = vk_dedicatedMemoryRequirements.prefersDedicatedAllocation;
-    memoryReqs.requiresDedicatedAllocation = vk_dedicatedMemoryRequirements.requiresDedicatedAllocation;
+    memoryReqs.prefersDedicatedAllocation  = dedicatedOnly | vk_dedicatedMemoryRequirements.prefersDedicatedAllocation;
+    memoryReqs.requiresDedicatedAllocation = dedicatedOnly | vk_dedicatedMemoryRequirements.requiresDedicatedAllocation;
     return memoryReqs;
 }
 
diff --git a/src/nbl/video/CVulkanImage.cpp b/src/nbl/video/CVulkanImage.cpp
index 748f21720b..72e9dc62fc 100644
--- a/src/nbl/video/CVulkanImage.cpp
+++ b/src/nbl/video/CVulkanImage.cpp
@@ -9,7 +9,8 @@ namespace nbl::video
 CVulkanImage::~CVulkanImage()
 {
     preDestroyStep();
-    // don't destroy imported handles
+    // e.g. don't destroy imported handles from the same VkInstance (e.g. if hooking into external Vulkan codebase)
+    // truly EXTERNAL_MEMORY imported handles, do need to be destroyed + CloseHandled (separate thing)
     if (!m_cachedCreationParams.skipHandleDestroy)
     {
         const CVulkanLogicalDevice* vulkanDevice = static_cast<const CVulkanLogicalDevice*>(getOriginDevice());
diff --git a/src/nbl/video/CVulkanLogicalDevice.cpp b/src/nbl/video/CVulkanLogicalDevice.cpp
index 91d158b0ea..607aa69caa 100644
--- a/src/nbl/video/CVulkanLogicalDevice.cpp
+++ b/src/nbl/video/CVulkanLogicalDevice.cpp
@@ -45,21 +45,74 @@ CVulkanLogicalDevice::CVulkanLogicalDevice(core::smart_refctd_ptr<const IAPIConn
 }
 
 
-core::smart_refctd_ptr<ISemaphore> CVulkanLogicalDevice::createSemaphore(const uint64_t initialValue)
+core::smart_refctd_ptr<ISemaphore> CVulkanLogicalDevice::createSemaphore(uint64_t initialValue, ISemaphore::SCreationParams&& params)
 {
+#ifdef _WIN32
+    VkImportSemaphoreWin32HandleInfoKHR importInfo = { 
+        .sType = VK_STRUCTURE_TYPE_IMPORT_SEMAPHORE_WIN32_HANDLE_INFO_KHR,
+        .handleType = static_cast<VkExternalSemaphoreHandleTypeFlagBits>(params.externalHandleTypes.value),
+        .handle = params.externalHandle,
+    };
+    VkExportSemaphoreWin32HandleInfoKHR handleInfo = { 
+        .sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_WIN32_HANDLE_INFO_KHR, 
+        .dwAccess = GENERIC_ALL 
+    };
+#else
+    VkImportSemaphoreFdInfoKHR importInfo = { 
+        .sType = VK_STRUCTURE_TYPE_IMPORT_SEMAPHORE_FD_INFO_KHR,
+        .handleType = static_cast<VkExternalSemaphoreHandleTypeFlagBits>(params.externalHandleTypes.value),
+        .fd = params.externalHandle,
+    };
+#endif
+
+    VkExportSemaphoreCreateInfo  exportInfo = { 
+        .sType =  VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO, 
+#ifdef _WIN32
+        .pNext = &handleInfo, 
+#endif
+        .handleTypes = static_cast<VkExternalSemaphoreHandleTypeFlags>(params.externalHandleTypes.value) 
+    };
+
+
+    const bool importing = params.externalHandleTypes.value && params.externalHandle;
+    const bool exporting = params.externalHandleTypes.value && !params.externalHandle;
+
     VkSemaphoreTypeCreateInfoKHR type = { VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO_KHR };
-    type.pNext = nullptr; // Each pNext member of any structure (including this one) in the pNext chain must be either NULL or a pointer to a valid instance of VkExportSemaphoreCreateInfo, VkExportSemaphoreWin32HandleInfoKHR
+    type.pNext = exporting ? &exportInfo : nullptr; // Each pNext member of any structure (including this one) in the pNext chain must be either NULL or a pointer to a valid instance of VkExportSemaphoreCreateInfo, VkExportSemaphoreWin32HandleInfoKHR
     type.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE_KHR;
     type.initialValue = initialValue;
 
-    VkSemaphoreCreateInfo createInfo = { VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,&type };
+    VkSemaphoreCreateInfo createInfo = { VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, &type };
     createInfo.flags = static_cast<VkSemaphoreCreateFlags>(0); // flags must be 0
 
     VkSemaphore semaphore;
-    if (m_devf.vk.vkCreateSemaphore(m_vkdev,&createInfo,nullptr,&semaphore)==VK_SUCCESS)
-        return core::make_smart_refctd_ptr<CVulkanSemaphore>(core::smart_refctd_ptr<const CVulkanLogicalDevice>(this),semaphore);
-    else
+    if (VK_SUCCESS != m_devf.vk.vkCreateSemaphore(m_vkdev, &createInfo, nullptr, &semaphore))
         return nullptr;
+
+    VkSemaphoreGetWin32HandleInfoKHR props = {
+        .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_WIN32_HANDLE_INFO_KHR,
+        .semaphore = semaphore,
+        .handleType = static_cast<VkExternalSemaphoreHandleTypeFlagBits>(params.externalHandleTypes.value),
+    };
+    
+#ifdef _WIN32
+    auto importfn = m_devf.vk.vkImportSemaphoreWin32HandleKHR;
+    auto exportfn = m_devf.vk.vkGetSemaphoreWin32HandleKHR;
+#else
+    auto importfn = m_devf.vk.vkImportSemaphoreFdKHR;
+    auto exportfn = m_devf.vk.vkGetSemaphoreFdKHR;
+#endif
+
+    if (
+        (importing && (VK_SUCCESS != importfn(m_vkdev, &importInfo))) ||
+        (exporting && (VK_SUCCESS != exportfn(m_vkdev, &props, &params.externalHandle)))
+       )
+    {
+        m_devf.vk.vkDestroySemaphore(m_vkdev, semaphore, 0);
+        return nullptr;
+    }
+
+    return core::make_smart_refctd_ptr<CVulkanSemaphore>(core::smart_refctd_ptr<CVulkanLogicalDevice>(this), semaphore, std::move(params));
 }
 ISemaphore::WAIT_RESULT CVulkanLogicalDevice::waitForSemaphores(const std::span<const ISemaphore::SWaitInfo> infos, const bool waitAll, const uint64_t timeout)
 {
@@ -125,26 +178,89 @@ core::smart_refctd_ptr<IDeferredOperation> CVulkanLogicalDevice::createDeferredO
     return core::smart_refctd_ptr<CVulkanDeferredOperation>(reinterpret_cast<CVulkanDeferredOperation*>(memory),core::dont_grab);
 }
 
+void* DupeHandle(uint64_t pid, void* handle)
+{
+#ifdef _WIN32
+    DWORD flags;
+    HANDLE re = 0;
+
+    HANDLE cur = GetCurrentProcess();
+    HANDLE src = pid ? OpenProcess(GENERIC_ALL, false, pid) : cur;
+
+    if (!DuplicateHandle(src, handle, cur, &re, GENERIC_ALL, 0, DUPLICATE_SAME_ACCESS))
+        return 0;
+
+    CloseHandle(src);
+    return re;
+#endif
+    return handle;
+}
 
 IDeviceMemoryAllocator::SAllocation CVulkanLogicalDevice::allocate(const SAllocateInfo& info)
 {
-    IDeviceMemoryAllocator::SAllocation ret = {};
     if (info.memoryTypeIndex>=m_physicalDevice->getMemoryProperties().memoryTypeCount)
-        return ret;
+        return {};
 
-    const core::bitflag<IDeviceMemoryAllocation::E_MEMORY_ALLOCATE_FLAGS> allocateFlags(info.flags);
     VkMemoryAllocateFlagsInfo vk_allocateFlagsInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO, nullptr };
     {
-        if (allocateFlags.hasFlags(IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT))
+        if (info.allocateFlags.hasFlags(IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT))
             vk_allocateFlagsInfo.flags |= VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT;
         vk_allocateFlagsInfo.deviceMask = 0u; // unused: for now
     }
     VkMemoryDedicatedAllocateInfo vk_dedicatedInfo = {VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO, nullptr};
+    VkMemoryAllocateInfo vk_allocateInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, &vk_allocateFlagsInfo };
+    vk_allocateInfo.allocationSize = info.allocationSize;
+    vk_allocateInfo.memoryTypeIndex = info.memoryTypeIndex;
+
+#ifdef _WIN32
+    VkImportMemoryWin32HandleInfoKHR importInfo = { 
+        .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_WIN32_HANDLE_INFO_KHR,
+        .handleType = static_cast<VkExternalMemoryHandleTypeFlagBits>(info.externalHandleType),
+        .handle = info.externalHandle
+    };
+
+    VkExportMemoryWin32HandleInfoKHR handleInfo = {
+        .sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_WIN32_HANDLE_INFO_KHR,
+        .dwAccess = GENERIC_ALL,
+    };
+#else
+    VkImportMemoryFdInfoKHR importInfo = {
+        .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR,
+        .handleType = static_cast<VkExternalMemoryHandleTypeFlagBits>(info.externalHandleType),
+        .fd = (int)info.externalHandle,
+    };
+#endif
+
+    VkExportMemoryAllocateInfo exportInfo = {
+        .sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO,
+#ifdef _WIN32
+        .pNext = &handleInfo,
+#endif
+        .handleTypes = static_cast<VkExternalMemoryHandleTypeFlags>(info.externalHandleType),
+    };
+    
+    const void** pNext = &vk_allocateFlagsInfo.pNext;
+
+    if (info.externalHandleType)
+    {
+        if (info.externalHandle) //importing
+        {
+            auto duped = DupeHandle(0, info.externalHandle);
+            const_cast<void*&>(info.externalHandle) = duped;
+            *pNext = &importInfo;
+        }
+        else // exporting
+            *pNext = &exportInfo;
+        pNext = (const void**)&((VkBaseInStructure*)*pNext)->pNext;
+    }
+
     if(info.dedication)
     {
         // VK_KHR_dedicated_allocation is in core 1.1, no querying for support needed
         static_assert(MinimumVulkanApiVersion >= VK_MAKE_API_VERSION(0,1,1,0));
-        vk_allocateFlagsInfo.pNext = &vk_dedicatedInfo;
+        *pNext = &vk_dedicatedInfo;
+        pNext = &vk_dedicatedInfo.pNext;
+
         switch (info.dedication->getObjectType())
         {
             case IDeviceMemoryBacked::EOT_BUFFER:
@@ -155,23 +271,65 @@ IDeviceMemoryAllocator::SAllocation CVulkanLogicalDevice::allocate(const SAlloca
                 break;
             default:
                 assert(false);
-                return ret;
+                return {};
                 break;
         }
     }
-    VkMemoryAllocateInfo vk_allocateInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, &vk_allocateFlagsInfo};
-    vk_allocateInfo.allocationSize = info.size;
-    vk_allocateInfo.memoryTypeIndex = info.memoryTypeIndex;
 
     VkDeviceMemory vk_deviceMemory;
     auto vk_res = m_devf.vk.vkAllocateMemory(m_vkdev, &vk_allocateInfo, nullptr, &vk_deviceMemory);
     if (vk_res!=VK_SUCCESS)
-        return ret;
+        return {};
+    
+    const bool exported = info.externalHandleType && !info.externalHandle;
+
+    if (exported)
+    {
+#ifdef _WIN32
+        VkMemoryGetWin32HandleInfoKHR 
+#else
+        VkMemoryGetFdInfoKHR
+#endif
+        handleInfo = { .sType = 
+#ifdef _WIN32
+            VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR,
+#else 
+            VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR,
+#endif
+            .memory = vk_deviceMemory,
+            .handleType = static_cast<VkExternalMemoryHandleTypeFlagBits>(info.externalHandleType),
+        };
+
+        /*
+            For handle types defined as NT handles, 
+            the handles returned by vkGetMemoryWin32HandleKHR are owned by the application 
+            and hold a reference to their payload. To avoid leaking resources, 
+            the application must release ownership of them 
+            using the CloseHandle system call when they are no longer needed.
+        */
+
+        if (VK_SUCCESS != m_devf.vk.
+#ifdef _WIN32
+            vkGetMemoryWin32HandleKHR
+#else
+            vkGetMemoryFdKHR
+#endif
+            (m_vkdev, &handleInfo, const_cast<ExternalHandleType*>(&info.externalHandle)))
+        {
+            m_devf.vk.vkFreeMemory(m_vkdev, vk_deviceMemory, 0);
+            return {};
+        }
+
+    }
 
     // automatically allocation goes out of scope and frees itself if no success later on
     const auto memoryPropertyFlags = m_physicalDevice->getMemoryProperties().memoryTypes[info.memoryTypeIndex].propertyFlags;
-    ret.memory = core::make_smart_refctd_ptr<CVulkanMemoryAllocation>(this,info.size,allocateFlags,memoryPropertyFlags,info.dedication,vk_deviceMemory);
+
+    CVulkanMemoryAllocation::SCreationParams params = { info, memoryPropertyFlags, !!info.dedication };
+    IDeviceMemoryAllocator::SAllocation ret = {};
+    ret.memory = core::make_smart_refctd_ptr<CVulkanMemoryAllocation>(this, vk_deviceMemory, std::move(params));
     ret.offset = 0ull; // LogicalDevice doesn't suballocate, so offset is always 0, if you want to suballocate, write/use an allocator
+
     if(info.dedication)
     {
         bool dedicationSuccess = false;
@@ -288,22 +446,29 @@ bool CVulkanLogicalDevice::bindImageMemory_impl(const uint32_t count, const SBin
 }
 
 
-core::smart_refctd_ptr<IGPUBuffer> CVulkanLogicalDevice::createBuffer_impl(IGPUBuffer::SCreationParams&& creationParams)
+core::smart_refctd_ptr<IGPUBuffer> CVulkanLogicalDevice::createBuffer_impl(IGPUBuffer::SCreationParams&& creationParams, bool dedicatedOnly)
 {
     VkBufferCreateInfo vk_createInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
-    // VkBufferDeviceAddressCreateInfoEXT, VkExternalMemoryBufferCreateInfo, VkVideoProfileKHR, or VkVideoProfilesKHR
-    vk_createInfo.pNext = nullptr;
+    // Each pNext member of any structure (including this one) in the pNext chain must be either NULL or a pointer to a valid instance of VkBufferDeviceAddressCreateInfoEXT, VkBufferOpaqueCaptureAddressCreateInfo, VkDedicatedAllocationBufferCreateInfoNV, VkExternalMemoryBufferCreateInfo, VkVideoProfileKHR, or VkVideoProfilesKHR
+
+    VkExternalMemoryBufferCreateInfo externalMemoryInfo = {
+       .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO,
+       .handleTypes = creationParams.externalHandleTypes.value,
+    };
+
+    vk_createInfo.pNext = creationParams.externalHandleTypes.value ? &externalMemoryInfo : nullptr;
     vk_createInfo.flags = static_cast<VkBufferCreateFlags>(0u); // Nabla doesn't support any of these flags
     vk_createInfo.size = static_cast<VkDeviceSize>(creationParams.size);
     vk_createInfo.usage = getVkBufferUsageFlagsFromBufferUsageFlags(creationParams.usage);
-    vk_createInfo.sharingMode = creationParams.isConcurrentSharing() ? VK_SHARING_MODE_CONCURRENT:VK_SHARING_MODE_EXCLUSIVE;
+    vk_createInfo.sharingMode = creationParams.isConcurrentSharing() ? VK_SHARING_MODE_CONCURRENT : VK_SHARING_MODE_EXCLUSIVE;
     vk_createInfo.queueFamilyIndexCount = creationParams.queueFamilyIndexCount;
     vk_createInfo.pQueueFamilyIndices = creationParams.queueFamilyIndices;
 
+
     VkBuffer vk_buffer;
     if (m_devf.vk.vkCreateBuffer(m_vkdev,&vk_createInfo,nullptr,&vk_buffer)!=VK_SUCCESS)
         return nullptr;
-    return core::make_smart_refctd_ptr<CVulkanBuffer>(this,std::move(creationParams),vk_buffer);
+    return core::make_smart_refctd_ptr<CVulkanBuffer>(this,std::move(creationParams), dedicatedOnly, vk_buffer);
 }
 
 core::smart_refctd_ptr<IGPUBufferView> CVulkanLogicalDevice::createBufferView_impl(const asset::SBufferRange<const IGPUBuffer>& underlying, const asset::E_FORMAT _fmt)
@@ -322,19 +487,24 @@ core::smart_refctd_ptr<IGPUBufferView> CVulkanLogicalDevice::createBufferView_im
     return nullptr;
 }
 
-core::smart_refctd_ptr<IGPUImage> CVulkanLogicalDevice::createImage_impl(IGPUImage::SCreationParams&& params)
+core::smart_refctd_ptr<IGPUImage> CVulkanLogicalDevice::createImage_impl(IGPUImage::SCreationParams&& params, bool dedicatedOnly)
 {
-    VkImageStencilUsageCreateInfo vk_stencilUsage = { VK_STRUCTURE_TYPE_IMAGE_STENCIL_USAGE_CREATE_INFO, nullptr };
-    vk_stencilUsage.stencilUsage = getVkImageUsageFlagsFromImageUsageFlags(params.actualStencilUsage().value,true);
+    VkExternalMemoryImageCreateInfo  externalMemoryInfo = {
+        .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
+        .handleTypes = params.externalHandleTypes.value,
+    };
+ 
+    VkImageStencilUsageCreateInfo vk_stencilUsage = { VK_STRUCTURE_TYPE_IMAGE_STENCIL_USAGE_CREATE_INFO, &externalMemoryInfo };
+    vk_stencilUsage.stencilUsage = getVkImageUsageFlagsFromImageUsageFlags(params.actualStencilUsage().value, true);
 
-    std::array<VkFormat,asset::E_FORMAT::EF_COUNT> vk_formatList;
+    std::array<VkFormat, asset::E_FORMAT::EF_COUNT> vk_formatList;
     VkImageFormatListCreateInfo vk_formatListStruct = { VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO, &vk_stencilUsage };
     vk_formatListStruct.viewFormatCount = 0u;
     // if only there existed a nice iterator that would let me iterate over set bits 64 faster
     if (params.viewFormats.any())
-    for (auto fmt=0; fmt<vk_formatList.size(); fmt++)
-    if (params.viewFormats.test(fmt))
-        vk_formatList[vk_formatListStruct.viewFormatCount++] = getVkFormatFromFormat(static_cast<asset::E_FORMAT>(fmt));
+        for (auto fmt = 0; fmt < vk_formatList.size(); fmt++)
+            if (params.viewFormats.test(fmt))
+                vk_formatList[vk_formatListStruct.viewFormatCount++] = getVkFormatFromFormat(static_cast<asset::E_FORMAT>(fmt));
     vk_formatListStruct.pViewFormats = vk_formatList.data();
 
     VkImageCreateInfo vk_createInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, &vk_formatListStruct };
@@ -346,16 +516,17 @@ core::smart_refctd_ptr<IGPUImage> CVulkanLogicalDevice::createImage_impl(IGPUIma
     vk_createInfo.arrayLayers = params.arrayLayers;
     vk_createInfo.samples = static_cast<VkSampleCountFlagBits>(params.samples);
     vk_createInfo.tiling = static_cast<VkImageTiling>(params.tiling);
-    vk_createInfo.usage = getVkImageUsageFlagsFromImageUsageFlags(params.usage.value,asset::isDepthOrStencilFormat(params.format));
-    vk_createInfo.sharingMode = params.isConcurrentSharing() ? VK_SHARING_MODE_CONCURRENT:VK_SHARING_MODE_EXCLUSIVE;
+    vk_createInfo.usage = getVkImageUsageFlagsFromImageUsageFlags(params.usage.value, asset::isDepthOrStencilFormat(params.format));
+    vk_createInfo.sharingMode = params.isConcurrentSharing() ? VK_SHARING_MODE_CONCURRENT : VK_SHARING_MODE_EXCLUSIVE;
     vk_createInfo.queueFamilyIndexCount = params.queueFamilyIndexCount;
     vk_createInfo.pQueueFamilyIndices = params.queueFamilyIndices;
-    vk_createInfo.initialLayout = params.preinitialized ? VK_IMAGE_LAYOUT_PREINITIALIZED:VK_IMAGE_LAYOUT_UNDEFINED;
+    vk_createInfo.initialLayout = params.preinitialized ? VK_IMAGE_LAYOUT_PREINITIALIZED : VK_IMAGE_LAYOUT_UNDEFINED;
 
+   
     VkImage vk_image;
-    if (m_devf.vk.vkCreateImage(m_vkdev,&vk_createInfo,nullptr,&vk_image)!=VK_SUCCESS)
+    if (m_devf.vk.vkCreateImage(m_vkdev, &vk_createInfo, nullptr, &vk_image) != VK_SUCCESS)
         return nullptr;
-    return core::make_smart_refctd_ptr<CVulkanImage>(this,std::move(params),vk_image);
+    return core::make_smart_refctd_ptr<CVulkanImage>(this, std::move(params), dedicatedOnly, vk_image);
 }
 
 core::smart_refctd_ptr<IGPUImageView> CVulkanLogicalDevice::createImageView_impl(IGPUImageView::SCreationParams&& params)
diff --git a/src/nbl/video/CVulkanLogicalDevice.h b/src/nbl/video/CVulkanLogicalDevice.h
index 65489d9c53..f18fb3dad4 100644
--- a/src/nbl/video/CVulkanLogicalDevice.h
+++ b/src/nbl/video/CVulkanLogicalDevice.h
@@ -52,7 +52,7 @@ class CVulkanLogicalDevice final : public ILogicalDevice
             return CVulkanQueue::getResultFrom(m_devf.vk.vkDeviceWaitIdle(m_vkdev));
         }
             
-        core::smart_refctd_ptr<ISemaphore> createSemaphore(const uint64_t initialValue) override;
+        core::smart_refctd_ptr<ISemaphore> createSemaphore(uint64_t initialValue, ISemaphore::SCreationParams &&) override;
         ISemaphore::WAIT_RESULT waitForSemaphores(const std::span<const ISemaphore::SWaitInfo> infos, const bool waitAll, const uint64_t timeout) override;
             
         core::smart_refctd_ptr<IEvent> createEvent(const IEvent::CREATE_FLAGS flags) override;
@@ -103,9 +103,9 @@ class CVulkanLogicalDevice final : public ILogicalDevice
         bool bindImageMemory_impl(const uint32_t count, const SBindImageMemoryInfo* pInfos) override;
 
         // descriptor creation
-        core::smart_refctd_ptr<IGPUBuffer> createBuffer_impl(IGPUBuffer::SCreationParams&& creationParams) override;
+        core::smart_refctd_ptr<IGPUBuffer> createBuffer_impl(IGPUBuffer::SCreationParams&& creationParams, bool dedicatedOnly) override;
         core::smart_refctd_ptr<IGPUBufferView> createBufferView_impl(const asset::SBufferRange<const IGPUBuffer>& underlying, const asset::E_FORMAT _fmt) override;
-        core::smart_refctd_ptr<IGPUImage> createImage_impl(IGPUImage::SCreationParams&& params) override;
+        core::smart_refctd_ptr<IGPUImage> createImage_impl(IGPUImage::SCreationParams&& params, bool dedicatedOnly) override;
         core::smart_refctd_ptr<IGPUImageView> createImageView_impl(IGPUImageView::SCreationParams&& params) override;
         VkAccelerationStructureKHR createAccelerationStructure(const IGPUAccelerationStructure::SCreationParams& params, const VkAccelerationStructureTypeKHR type, const VkAccelerationStructureMotionInfoNV* motionInfo=nullptr);
         inline core::smart_refctd_ptr<IGPUBottomLevelAccelerationStructure> createBottomLevelAccelerationStructure_impl(IGPUAccelerationStructure::SCreationParams&& params) override
diff --git a/src/nbl/video/CVulkanMemoryAllocation.cpp b/src/nbl/video/CVulkanMemoryAllocation.cpp
index 5a4dfd5ff5..7597e33717 100644
--- a/src/nbl/video/CVulkanMemoryAllocation.cpp
+++ b/src/nbl/video/CVulkanMemoryAllocation.cpp
@@ -4,14 +4,24 @@
 namespace nbl::video
 {
 CVulkanMemoryAllocation::CVulkanMemoryAllocation(
-    const CVulkanLogicalDevice* dev, const size_t size,
-    const core::bitflag<E_MEMORY_ALLOCATE_FLAGS> flags,
-    const core::bitflag<E_MEMORY_PROPERTY_FLAGS> memoryPropertyFlags,
-    const bool isDedicated, const VkDeviceMemory deviceMemoryHandle
-) : IDeviceMemoryAllocation(dev,size,flags,memoryPropertyFlags,isDedicated), m_vulkanDevice(dev), m_deviceMemoryHandle(deviceMemoryHandle) {}
+    const CVulkanLogicalDevice* dev,
+    const VkDeviceMemory deviceMemoryHandle,
+    SCreationParams&& params
+) 
+    : IDeviceMemoryAllocation(dev,std::move(params))
+    , m_vulkanDevice(dev)
+    , m_deviceMemoryHandle(deviceMemoryHandle) 
+{
+}
 
 CVulkanMemoryAllocation::~CVulkanMemoryAllocation()
 {
+    if (m_params.externalHandle)
+    {
+        bool re = CloseHandle(getCreationParams().externalHandle);
+        assert(re);
+    }
+
     m_vulkanDevice->getFunctionTable()->vk.vkFreeMemory(m_vulkanDevice->getInternalObject(),m_deviceMemoryHandle,nullptr);
 }
 
diff --git a/src/nbl/video/CVulkanMemoryAllocation.h b/src/nbl/video/CVulkanMemoryAllocation.h
index 470e914ae3..d9508411b0 100644
--- a/src/nbl/video/CVulkanMemoryAllocation.h
+++ b/src/nbl/video/CVulkanMemoryAllocation.h
@@ -15,10 +15,9 @@ class CVulkanMemoryAllocation : public IDeviceMemoryAllocation
 {
     public:
         CVulkanMemoryAllocation(
-            const CVulkanLogicalDevice* dev, const size_t size,
-            const core::bitflag<E_MEMORY_ALLOCATE_FLAGS> flags,
-            const core::bitflag<E_MEMORY_PROPERTY_FLAGS> memoryPropertyFlags,
-            const bool isDedicated, const VkDeviceMemory deviceMemoryHandle
+            const CVulkanLogicalDevice* dev,
+            const VkDeviceMemory deviceMemoryHandle, 
+            SCreationParams&& params
         );
 
         inline VkDeviceMemory getInternalObject() const { return m_deviceMemoryHandle; }
diff --git a/src/nbl/video/CVulkanPhysicalDevice.cpp b/src/nbl/video/CVulkanPhysicalDevice.cpp
index c6304ec836..62dcde7d42 100644
--- a/src/nbl/video/CVulkanPhysicalDevice.cpp
+++ b/src/nbl/video/CVulkanPhysicalDevice.cpp
@@ -1204,6 +1204,7 @@ std::unique_ptr<CVulkanPhysicalDevice> CVulkanPhysicalDevice::create(core::smart
         if (isExtensionSupported(VK_KHR_COOPERATIVE_MATRIX_EXTENSION_NAME))
             properties.limits.cooperativeMatrixRobustness = cooperativeMatrixFeatures.robustness;
 #endif
+
     }
 
     // we compare all limits against the defaults easily!
diff --git a/src/nbl/video/CVulkanPhysicalDevice.h b/src/nbl/video/CVulkanPhysicalDevice.h
index c1552c88f1..9cfebccd3f 100644
--- a/src/nbl/video/CVulkanPhysicalDevice.h
+++ b/src/nbl/video/CVulkanPhysicalDevice.h
@@ -109,6 +109,79 @@ class CVulkanPhysicalDevice final : public IPhysicalDevice
             // [NOOP] If sparseImageFloat32AtomicMinMax is enabled, shaderImageFloat32AtomicMinMax must be enabled
         }
 
+        inline static SExternalMemoryProperties mapExternalMemoryProps(VkExternalMemoryProperties const& props)
+        {
+            return {
+                .exportableTypes = props.exportFromImportedHandleTypes,
+                .compatibleTypes = props.compatibleHandleTypes,
+                .dedicatedOnly = props.externalMemoryFeatures & VK_EXTERNAL_MEMORY_FEATURE_DEDICATED_ONLY_BIT ? 1u : 0u,
+                .exportable = props.externalMemoryFeatures & VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT ? 1u : 0u,
+                .importable = props.externalMemoryFeatures & VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT ? 1u : 0u,
+            };
+        }
+
+        SExternalMemoryProperties getExternalBufferProperties_impl(core::bitflag<IGPUBuffer::E_USAGE_FLAGS> usage, IDeviceMemoryAllocation::E_EXTERNAL_HANDLE_TYPE handleType) const override
+        {
+            assert(!(handleType & (handleType - 1)));
+            VkPhysicalDeviceExternalBufferInfo info = {
+                .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_BUFFER_INFO,
+                .usage = static_cast<VkBufferUsageFlags>(usage.value),
+                .handleType = static_cast<VkExternalMemoryHandleTypeFlagBits>(handleType)
+            };
+            VkExternalBufferProperties externalProps = { VK_STRUCTURE_TYPE_EXTERNAL_BUFFER_PROPERTIES };
+            vkGetPhysicalDeviceExternalBufferProperties(m_vkPhysicalDevice, &info, &externalProps);
+            return mapExternalMemoryProps(externalProps.externalMemoryProperties);
+        }
+
+        SExternalImageFormatProperties getExternalImageProperties_impl(
+            asset::E_FORMAT format, 
+            IGPUImage::TILING tiling, 
+            IGPUImage::E_TYPE type,
+            core::bitflag<IGPUImage::E_USAGE_FLAGS> usage, 
+            core::bitflag<IGPUImage::E_CREATE_FLAGS> flags,  
+            IDeviceMemoryAllocation::E_EXTERNAL_HANDLE_TYPE handleType) const override
+        {
+            assert(!(handleType & (handleType - 1)));
+
+            VkPhysicalDeviceExternalImageFormatInfo extInfo = {
+                .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO,
+                .handleType = static_cast<VkExternalMemoryHandleTypeFlagBits>(handleType),
+            };
+
+            VkPhysicalDeviceImageFormatInfo2 info = {
+                .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
+                .pNext = &extInfo,
+                .format = getVkFormatFromFormat(format),
+                .type  = static_cast<VkImageType>(type),
+                .tiling = static_cast<VkImageTiling>(tiling),
+                .usage = usage.value,
+                .flags = flags.value,
+            };
+
+            VkExternalImageFormatProperties externalProps = { VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES };
+
+            VkImageFormatProperties2 props = {
+                .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2,
+                .pNext = &externalProps,
+            };
+
+            VkResult re = vkGetPhysicalDeviceImageFormatProperties2(m_vkPhysicalDevice, &info, &props);
+            if(VK_SUCCESS != re)
+                return {};
+
+            return 
+                { 
+                    {
+                        .maxExtent = props.imageFormatProperties.maxExtent,
+                        .maxMipLevels = props.imageFormatProperties.maxMipLevels,
+                        .maxArrayLayers = props.imageFormatProperties.maxArrayLayers,
+                        .sampleCounts = static_cast<IGPUImage::E_SAMPLE_COUNT_FLAGS>(props.imageFormatProperties.sampleCounts),
+                        .maxResourceSize = props.imageFormatProperties.maxResourceSize,
+                    }, 
+                    mapExternalMemoryProps(externalProps.externalMemoryProperties) 
+                };
+        }
+
         core::smart_refctd_ptr<ILogicalDevice> createLogicalDevice_impl(ILogicalDevice::SCreationParams&& params) override;
 
     private:
diff --git a/src/nbl/video/CVulkanSemaphore.h b/src/nbl/video/CVulkanSemaphore.h
index 9290110d8d..2beb7cb21b 100644
--- a/src/nbl/video/CVulkanSemaphore.h
+++ b/src/nbl/video/CVulkanSemaphore.h
@@ -15,8 +15,11 @@ class ILogicalDevice;
 class CVulkanSemaphore final : public ISemaphore
 {
     public:
-        inline CVulkanSemaphore(core::smart_refctd_ptr<const ILogicalDevice>&& _vkdev, const VkSemaphore semaphore)
-            : ISemaphore(std::move(_vkdev)), m_semaphore(semaphore) {}
+        inline CVulkanSemaphore(core::smart_refctd_ptr<const ILogicalDevice>&& dev, const VkSemaphore semaphore, SCreationParams&& params = {})
+            : ISemaphore(std::move(dev), std::move(params))
+            , m_semaphore(semaphore)
+        {}
+
         ~CVulkanSemaphore();
 
         uint64_t getCounterValue() const override;
diff --git a/src/nbl/video/ILogicalDevice.cpp b/src/nbl/video/ILogicalDevice.cpp
index 97030ccbba..2902ff7509 100644
--- a/src/nbl/video/ILogicalDevice.cpp
+++ b/src/nbl/video/ILogicalDevice.cpp
@@ -111,7 +111,7 @@ bool ILogicalDevice::supportsMask(const uint32_t queueFamilyIndex, core::bitflag
     return getSupportedStageMask(queueFamilyIndex).hasFlags(stageMask);
 }
 
-bool ILogicalDevice::supportsMask(const uint32_t queueFamilyIndex, core::bitflag<asset::ACCESS_FLAGS> stageMask) const
+bool ILogicalDevice::supportsMask(const uint32_t queueFamilyIndex, core::bitflag<asset::ACCESS_FLAGS> accesMask) const
 {
     if (queueFamilyIndex>m_queueFamilyInfos->size())
         return false;
@@ -119,15 +119,15 @@ bool ILogicalDevice::supportsMask(const uint32_t queueFamilyIndex, core::bitflag
     const auto& familyProps = m_physicalDevice->getQueueFamilyProperties()[queueFamilyIndex].queueFlags;
     const bool shaderCapableFamily = bool(familyProps&(q_family_flags_t::COMPUTE_BIT|q_family_flags_t::GRAPHICS_BIT));
     // strip special values
-    if (stageMask.hasFlags(asset::ACCESS_FLAGS::MEMORY_READ_BITS))
-        stageMask ^= asset::ACCESS_FLAGS::MEMORY_READ_BITS;
-    else if (stageMask.hasFlags(asset::ACCESS_FLAGS::SHADER_READ_BITS) && shaderCapableFamily)
-        stageMask ^= asset::ACCESS_FLAGS::SHADER_READ_BITS;
-    if (stageMask.hasFlags(asset::ACCESS_FLAGS::MEMORY_WRITE_BITS))
-        stageMask ^= asset::ACCESS_FLAGS::MEMORY_WRITE_BITS;
-    else if (stageMask.hasFlags(asset::ACCESS_FLAGS::SHADER_WRITE_BITS) && shaderCapableFamily)
-        stageMask ^= asset::ACCESS_FLAGS::SHADER_WRITE_BITS;
-    return getSupportedAccessMask(queueFamilyIndex).hasFlags(stageMask);
+    if (accesMask.hasFlags(asset::ACCESS_FLAGS::MEMORY_READ_BITS))
+        accesMask ^= asset::ACCESS_FLAGS::MEMORY_READ_BITS;
+    else if (accesMask.hasFlags(asset::ACCESS_FLAGS::SHADER_READ_BITS) && shaderCapableFamily)
+        accesMask ^= asset::ACCESS_FLAGS::SHADER_READ_BITS;
+    if (accesMask.hasFlags(asset::ACCESS_FLAGS::MEMORY_WRITE_BITS))
+        accesMask ^= asset::ACCESS_FLAGS::MEMORY_WRITE_BITS;
+    else if (accesMask.hasFlags(asset::ACCESS_FLAGS::SHADER_WRITE_BITS) && shaderCapableFamily)
+        accesMask ^= asset::ACCESS_FLAGS::SHADER_WRITE_BITS;
+    return getSupportedAccessMask(queueFamilyIndex).hasFlags(accesMask);
 }
 
 bool ILogicalDevice::validateMemoryBarrier(const uint32_t queueFamilyIndex, asset::SMemoryBarrier barrier) const
@@ -647,4 +647,74 @@ bool ILogicalDevice::createGraphicsPipelines(
     if (!output[i])
         return false;
     return true;
+}
+
+core::smart_refctd_ptr<IGPUBuffer> ILogicalDevice::createBuffer(IGPUBuffer::SCreationParams&& creationParams)
+{
+    const auto maxSize = getPhysicalDeviceLimits().maxBufferSize;
+    if (creationParams.size > maxSize)
+    {
+        m_logger.log("Failed to create Buffer, size %d larger than Device %p's limit!", system::ILogger::ELL_ERROR, creationParams.size, this, maxSize);
+        return nullptr;
+    }
+
+    bool dedicatedOnly = false;
+    if (creationParams.externalHandleTypes.value)
+    {
+        core::bitflag<IDeviceMemoryAllocation::E_EXTERNAL_HANDLE_TYPE> requestedTypes = creationParams.externalHandleTypes;
+
+        while (const auto idx = hlsl::findLSB(static_cast<uint32_t>(requestedTypes.value)) + 1)
+        {
+            const auto handleType = static_cast<IDeviceMemoryAllocation::E_EXTERNAL_HANDLE_TYPE>(1u << (idx - 1));
+            requestedTypes ^= handleType;
+
+            auto props = m_physicalDevice->getExternalBufferProperties(creationParams.usage, handleType);
+
+            if (!core::bitflag(static_cast<IDeviceMemoryAllocation::E_EXTERNAL_HANDLE_TYPE>(props.compatibleTypes)).hasFlags(creationParams.externalHandleTypes)) // incompatibility between requested types
+                return nullptr;
+
+            dedicatedOnly |= props.dedicatedOnly;
+        }
+    }
+    return createBuffer_impl(std::move(creationParams), dedicatedOnly);
+}
+
+core::smart_refctd_ptr<IGPUImage> ILogicalDevice::createImage(IGPUImage::SCreationParams&& params)
+{
+    if (!IGPUImage::validateCreationParameters(params))
+    {
+        m_logger.log("Failed to create Image, invalid creation parameters!", system::ILogger::ELL_ERROR);
+        return nullptr;
+    }
+
+    const bool external = params.externalHandleTypes.value;
+    bool dedicatedOnly = false;
+    if (external)
+    {
+        core::bitflag<IDeviceMemoryAllocation::E_EXTERNAL_HANDLE_TYPE> requestedTypes = params.externalHandleTypes;
+        while (const auto idx = hlsl::findLSB(static_cast<uint32_t>(requestedTypes.value)) + 1)
+        {
+            const auto handleType = static_cast<IDeviceMemoryAllocation::E_EXTERNAL_HANDLE_TYPE>(1u << (idx - 1));
+            requestedTypes ^= handleType;
+
+            auto props = m_physicalDevice->getExternalImageProperties(params.format, params.tiling, params.type, params.usage, params.flags, handleType);
+
+            if (props.maxArrayLayers < params.arrayLayers ||
+                !core::bitflag<IGPUImage::E_SAMPLE_COUNT_FLAGS>(props.sampleCounts).hasFlags(params.samples) ||
+                /* props.maxResourceSize?? */
+                props.maxExtent.width < params.extent.width ||
+                props.maxExtent.height < params.extent.height ||
+                props.maxExtent.depth < params.extent.depth)
+            {
+                return nullptr;
+            }
+
+            if (!core::bitflag(static_cast<IDeviceMemoryAllocation::E_EXTERNAL_HANDLE_TYPE>(props.compatibleTypes)).hasFlags(params.externalHandleTypes)) // incompatibility between requested types
+                return nullptr;
+
+            dedicatedOnly |= props.dedicatedOnly;
+        }
+    }
+    // TODO: @Cyprian validation of creationParams against the device's limits (sample counts, etc.) see vkCreateImage
+    return createImage_impl(std::move(params), dedicatedOnly);
 }
\ No newline at end of file
diff --git a/src/nbl/video/IQueue.cpp b/src/nbl/video/IQueue.cpp
index e75e7b2cad..2527562bac 100644
--- a/src/nbl/video/IQueue.cpp
+++ b/src/nbl/video/IQueue.cpp
@@ -13,8 +13,15 @@ auto IQueue::submit(const std::span<const SSubmitInfo> _submits) -> RESULT
     auto* logger = m_originDevice->getPhysicalDevice()->getDebugCallback()->getLogger();
     for (const auto& submit : _submits)
     {
-        if (!submit.valid())
+        switch (submit.valid())
+        {
+        case SSubmitInfo::INVALID:
             return RESULT::OTHER_ERROR;
+        case SSubmitInfo::WORK_WITHOUT_SYNC:
+            logger->log("Work withouth sync!", system::ILogger::ELL_WARNING);
+        default:
+            break;
+        }
 
         auto invalidSemaphores = [this,logger](const std::span<const SSubmitInfo::SSemaphoreInfo> semaphoreInfos) -> bool
         {