diff --git a/src/d3d11/d3d11_device.cpp b/src/d3d11/d3d11_device.cpp index 9398e484208..db0d5faf26f 100644 --- a/src/d3d11/d3d11_device.cpp +++ b/src/d3d11/d3d11_device.cpp @@ -15,6 +15,7 @@ #include "d3d11_device.h" #include "d3d11_fence.h" #include "d3d11_input_layout.h" +#include "d3d11_interfaces.h" #include "d3d11_interop.h" #include "d3d11_query.h" #include "d3d11_resource.h" @@ -2469,12 +2470,14 @@ namespace dxvk { return deviceFeatures.nvxBinaryImport && deviceFeatures.vk12.bufferDeviceAddress; + case D3D11_VK_NV_LOW_LATENCY_2: + return deviceFeatures.nvLowLatency2; + default: return false; } } - - + bool STDMETHODCALLTYPE D3D11DeviceExt::GetCudaTextureObjectNVX(uint32_t srvDriverHandle, uint32_t samplerDriverHandle, uint32_t* pCudaTextureHandle) { ID3D11ShaderResourceView* srv = HandleToSrvNVX(srvDriverHandle); @@ -2783,8 +2786,132 @@ namespace dxvk { - - + + D3D11LowLatencyDevice::D3D11LowLatencyDevice( + D3D11DXGIDevice* pContainer, + D3D11Device* pDevice) + : m_container(pContainer), m_device(pDevice) { + + } + + + ULONG STDMETHODCALLTYPE D3D11LowLatencyDevice::AddRef() { + return m_container->AddRef(); + } + + + ULONG STDMETHODCALLTYPE D3D11LowLatencyDevice::Release() { + return m_container->Release(); + } + + + HRESULT STDMETHODCALLTYPE D3D11LowLatencyDevice::QueryInterface( + REFIID riid, + void** ppvObject) { + return m_container->QueryInterface(riid, ppvObject); + } + + + BOOL STDMETHODCALLTYPE D3D11LowLatencyDevice::SupportsLowLatency() { + return m_device->GetDXVKDevice()->features().nvLowLatency2; + } + + + HRESULT STDMETHODCALLTYPE D3D11LowLatencyDevice::LatencySleep() { + if (!m_device->GetDXVKDevice()->features().nvLowLatency2) { + return E_NOINTERFACE; + } + + D3D11SwapChain* pSwapChain = m_device->GetLowLatencySwapChain(); + if (pSwapChain && pSwapChain->LowLatencyEnabled()) { + pSwapChain->LatencySleep(); + } + + return S_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D11LowLatencyDevice::SetLatencySleepMode(BOOL lowLatencyMode, BOOL lowLatencyBoost, uint32_t minimumIntervalUs) { + if (!m_device->GetDXVKDevice()->features().nvLowLatency2) { + return E_NOINTERFACE; + } + + D3D11SwapChain* pSwapChain = m_device->GetLowLatencySwapChain(); + if (pSwapChain) { + pSwapChain->SetLatencySleepMode(lowLatencyMode, lowLatencyBoost, minimumIntervalUs); + } + + return S_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D11LowLatencyDevice::SetLatencyMarker(uint64_t frameID, uint32_t markerType) { + if (!m_device->GetDXVKDevice()->features().nvLowLatency2) { + return E_NOINTERFACE; + } + + D3D11SwapChain* pSwapChain = m_device->GetLowLatencySwapChain(); + VkLatencyMarkerNV marker = static_cast(markerType); + uint64_t internalFrameId = frameID + DXGI_MAX_SWAP_CHAIN_BUFFERS; + + m_device->GetDXVKDevice()->setLatencyMarker(marker, internalFrameId); + + if (pSwapChain && pSwapChain->LowLatencyEnabled()) { + pSwapChain->SetLatencyMarker(marker, internalFrameId); + } + + return S_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D11LowLatencyDevice::GetLatencyInfo(D3D11_LATENCY_RESULTS* latencyResults) + { + if (!m_device->GetDXVKDevice()->features().nvLowLatency2) { + return E_NOINTERFACE; + } + + constexpr uint32_t frameReportSize = 64; + D3D11SwapChain* pSwapChain = m_device->GetLowLatencySwapChain(); + + if (pSwapChain && pSwapChain->LowLatencyEnabled()) { + std::vector frameReports; + pSwapChain->GetLatencyTimings(frameReports); + + if (frameReports.size() >= frameReportSize) { + for (uint32_t i = 0; i < frameReportSize; i++) { + VkLatencyTimingsFrameReportNV& frameReport = frameReports[i]; + latencyResults->frame_reports[i].frameID = frameReport.presentID - DXGI_MAX_SWAP_CHAIN_BUFFERS; + latencyResults->frame_reports[i].inputSampleTime = frameReport.inputSampleTimeUs; + latencyResults->frame_reports[i].simStartTime = frameReport.simStartTimeUs; + latencyResults->frame_reports[i].simEndTime = frameReport.simEndTimeUs; + latencyResults->frame_reports[i].renderSubmitStartTime = frameReport.renderSubmitStartTimeUs; + latencyResults->frame_reports[i].renderSubmitEndTime = frameReport.renderSubmitEndTimeUs; + latencyResults->frame_reports[i].presentStartTime = frameReport.presentStartTimeUs; + latencyResults->frame_reports[i].presentEndTime = frameReport.presentEndTimeUs; + latencyResults->frame_reports[i].driverStartTime = frameReport.driverStartTimeUs; + latencyResults->frame_reports[i].driverEndTime = frameReport.driverEndTimeUs; + latencyResults->frame_reports[i].osRenderQueueStartTime = frameReport.osRenderQueueStartTimeUs; + latencyResults->frame_reports[i].osRenderQueueEndTime = frameReport.osRenderQueueEndTimeUs; + latencyResults->frame_reports[i].gpuRenderStartTime = frameReport.gpuRenderStartTimeUs; + latencyResults->frame_reports[i].gpuRenderEndTime = frameReport.gpuRenderEndTimeUs; + latencyResults->frame_reports[i].gpuActiveRenderTimeUs = + frameReport.gpuRenderEndTimeUs - frameReport.gpuRenderStartTimeUs; + latencyResults->frame_reports[i].gpuFrameTimeUs = 0; + + if (i) { + latencyResults->frame_reports[i].gpuFrameTimeUs = + frameReports[i].gpuRenderEndTimeUs - frameReports[i - 1].gpuRenderEndTimeUs; + } + } + } + } + + return S_OK; + } + + + + D3D11VideoDevice::D3D11VideoDevice( D3D11DXGIDevice* pContainer, D3D11Device* pDevice) @@ -3021,7 +3148,11 @@ namespace dxvk { Com presenter = new D3D11SwapChain( m_container, m_device, pSurfaceFactory, pDesc); - + + if (m_device->GetDXVKDevice()->features().nvLowLatency2) { + m_device->AddSwapchain(presenter.ref()); + } + *ppSwapChain = presenter.ref(); return S_OK; } catch (const DxvkError& e) { @@ -3078,17 +3209,18 @@ namespace dxvk { Rc pDxvkDevice, D3D_FEATURE_LEVEL FeatureLevel, UINT FeatureFlags) - : m_dxgiAdapter (pAdapter), - m_dxvkInstance (pDxvkInstance), - m_dxvkAdapter (pDxvkAdapter), - m_dxvkDevice (pDxvkDevice), - m_d3d11Device (this, FeatureLevel, FeatureFlags), - m_d3d11DeviceExt(this, &m_d3d11Device), - m_d3d11Interop (this, &m_d3d11Device), - m_d3d11Video (this, &m_d3d11Device), - m_d3d11on12 (this, &m_d3d11Device, pD3D12Device, pD3D12Queue), - m_metaDevice (this), - m_dxvkFactory (this, &m_d3d11Device) { + : m_dxgiAdapter (pAdapter), + m_dxvkInstance (pDxvkInstance), + m_dxvkAdapter (pDxvkAdapter), + m_dxvkDevice (pDxvkDevice), + m_d3d11Device (this, FeatureLevel, FeatureFlags), + m_d3d11DeviceExt (this, &m_d3d11Device), + m_d3d11Interop (this, &m_d3d11Device), + m_d3dLowLatencyDevice (this, &m_d3d11Device), + m_d3d11Video (this, &m_d3d11Device), + m_d3d11on12 (this, &m_d3d11Device, pD3D12Device, pD3D12Queue), + m_metaDevice (this), + m_dxvkFactory (this, &m_d3d11Device) { } @@ -3142,7 +3274,12 @@ namespace dxvk { *ppvObject = ref(&m_d3d11DeviceExt); return S_OK; } - + + if (riid == __uuidof(ID3DLowLatencyDevice)) { + *ppvObject = ref(&m_d3dLowLatencyDevice); + return S_OK; + } + if (riid == __uuidof(IDXGIDXVKDevice)) { *ppvObject = ref(&m_metaDevice); return S_OK; diff --git a/src/d3d11/d3d11_device.h b/src/d3d11/d3d11_device.h index 7a44b5ad99c..c04fcd75768 100644 --- a/src/d3d11/d3d11_device.h +++ b/src/d3d11/d3d11_device.h @@ -24,6 +24,7 @@ #include "d3d11_options.h" #include "d3d11_shader.h" #include "d3d11_state.h" +#include "d3d11_swapchain.h" #include "d3d11_util.h" namespace dxvk { @@ -428,6 +429,18 @@ namespace dxvk { bool Is11on12Device() const; + void AddSwapchain(D3D11SwapChain* swapchain) { + m_swapchains.push_back(swapchain); + } + + void RemoveSwapchain(D3D11SwapChain* swapchain) { + m_swapchains.erase(std::remove(m_swapchains.begin(), m_swapchains.end(), swapchain)); + } + + D3D11SwapChain* GetLowLatencySwapChain() { + return (m_swapchains.size() == 1) ? m_swapchains[0] : nullptr; + } + static D3D_FEATURE_LEVEL GetMaxFeatureLevel( const Rc& Instance, const Rc& Adapter); @@ -464,6 +477,8 @@ namespace dxvk { D3D_FEATURE_LEVEL m_maxFeatureLevel; D3D11DeviceFeatures m_deviceFeatures; + std::vector m_swapchains; + HRESULT CreateShaderModule( D3D11CommonShader* pShaderModule, DxvkShaderKey ShaderKey, @@ -545,28 +560,28 @@ namespace dxvk { uint64_t* gpuVAStart, uint64_t* gpuVASize); - bool STDMETHODCALLTYPE CreateUnorderedAccessViewAndGetDriverHandleNVX( + bool STDMETHODCALLTYPE CreateUnorderedAccessViewAndGetDriverHandleNVX( ID3D11Resource* pResource, const D3D11_UNORDERED_ACCESS_VIEW_DESC* pDesc, ID3D11UnorderedAccessView** ppUAV, uint32_t* pDriverHandle); - bool STDMETHODCALLTYPE CreateShaderResourceViewAndGetDriverHandleNVX( + bool STDMETHODCALLTYPE CreateShaderResourceViewAndGetDriverHandleNVX( ID3D11Resource* pResource, const D3D11_SHADER_RESOURCE_VIEW_DESC* pDesc, ID3D11ShaderResourceView** ppSRV, uint32_t* pDriverHandle); - bool STDMETHODCALLTYPE CreateSamplerStateAndGetDriverHandleNVX( + bool STDMETHODCALLTYPE CreateSamplerStateAndGetDriverHandleNVX( const D3D11_SAMPLER_DESC* pSamplerDesc, ID3D11SamplerState** ppSamplerState, uint32_t* pDriverHandle); - + private: D3D11DXGIDevice* m_container; D3D11Device* m_device; - + void AddSamplerAndHandleNVX( ID3D11SamplerState* pSampler, uint32_t Handle); @@ -586,6 +601,46 @@ namespace dxvk { std::unordered_map m_srvHandleToPtr; }; + /** + * \brief Extended D3D11 device + */ + class D3D11LowLatencyDevice : public ID3DLowLatencyDevice { + + public: + + D3D11LowLatencyDevice( + D3D11DXGIDevice* pContainer, + D3D11Device* pDevice); + + ULONG STDMETHODCALLTYPE AddRef(); + + ULONG STDMETHODCALLTYPE Release(); + + HRESULT STDMETHODCALLTYPE QueryInterface( + REFIID riid, + void** ppvObject); + + BOOL STDMETHODCALLTYPE SupportsLowLatency(); + + HRESULT STDMETHODCALLTYPE LatencySleep(); + + HRESULT STDMETHODCALLTYPE SetLatencySleepMode( + BOOL lowLatencyMode, + BOOL lowLatencyBoost, + uint32_t minimumIntervalUs); + + HRESULT STDMETHODCALLTYPE SetLatencyMarker( + uint64_t frameID, + uint32_t markerType); + + HRESULT STDMETHODCALLTYPE GetLatencyInfo( + D3D11_LATENCY_RESULTS* latencyResults); + + private: + + D3D11DXGIDevice* m_container; + D3D11Device* m_device; + }; /** * \brief D3D11 video device @@ -856,12 +911,13 @@ namespace dxvk { Rc m_dxvkAdapter; Rc m_dxvkDevice; - D3D11Device m_d3d11Device; - D3D11DeviceExt m_d3d11DeviceExt; - D3D11VkInterop m_d3d11Interop; - D3D11VideoDevice m_d3d11Video; - D3D11on12Device m_d3d11on12; - DXGIDXVKDevice m_metaDevice; + D3D11Device m_d3d11Device; + D3D11DeviceExt m_d3d11DeviceExt; + D3D11VkInterop m_d3d11Interop; + D3D11LowLatencyDevice m_d3dLowLatencyDevice; + D3D11VideoDevice m_d3d11Video; + D3D11on12Device m_d3d11on12; + DXGIDXVKDevice m_metaDevice; DXGIVkSwapChainFactory m_dxvkFactory; diff --git a/src/d3d11/d3d11_initializer.cpp b/src/d3d11/d3d11_initializer.cpp index 105485fabb9..9a97905fab6 100644 --- a/src/d3d11/d3d11_initializer.cpp +++ b/src/d3d11/d3d11_initializer.cpp @@ -280,7 +280,7 @@ namespace dxvk { void D3D11Initializer::FlushInternal() { - m_context->flushCommandList(nullptr); + m_context->flushCommandList(nullptr, false); m_transferCommands = 0; m_transferMemory = 0; diff --git a/src/d3d11/d3d11_interfaces.h b/src/d3d11/d3d11_interfaces.h index 8a2e6fcf4ff..f33eb6f34e2 100644 --- a/src/d3d11/d3d11_interfaces.h +++ b/src/d3d11/d3d11_interfaces.h @@ -16,6 +16,7 @@ enum D3D11_VK_EXTENSION : uint32_t { D3D11_VK_EXT_BARRIER_CONTROL = 3, D3D11_VK_NVX_BINARY_IMPORT = 4, D3D11_VK_NVX_IMAGE_VIEW_HANDLE = 5, + D3D11_VK_NV_LOW_LATENCY_2 = 6 }; @@ -27,6 +28,33 @@ enum D3D11_VK_BARRIER_CONTROL : uint32_t { D3D11_VK_BARRIER_CONTROL_IGNORE_GRAPHICS_UAV = 1 << 1, }; +/** + * \brief Frame Report Info + */ +typedef struct D3D11_LATENCY_RESULTS +{ + UINT32 version; + struct D3D11_FRAME_REPORT { + UINT64 frameID; + UINT64 inputSampleTime; + UINT64 simStartTime; + UINT64 simEndTime; + UINT64 renderSubmitStartTime; + UINT64 renderSubmitEndTime; + UINT64 presentStartTime; + UINT64 presentEndTime; + UINT64 driverStartTime; + UINT64 driverEndTime; + UINT64 osRenderQueueStartTime; + UINT64 osRenderQueueEndTime; + UINT64 gpuRenderStartTime; + UINT64 gpuRenderEndTime; + UINT32 gpuActiveRenderTimeUs; + UINT32 gpuFrameTimeUs; + UINT8 rsvd[120]; + } frame_reports[64]; + UINT8 rsvd[32]; +} D3D11_LATENCY_RESULTS; /** * \brief Extended shader interface @@ -114,6 +142,33 @@ ID3D11VkExtDevice1 : public ID3D11VkExtDevice { uint32_t* pCudaTextureHandle) = 0; }; +/** + * \brief Extended extended D3D11 device + * + * Introduces methods to get virtual addresses and driver + * handles for resources, and create and destroy objects + * for D3D11-Cuda interop. + */ +MIDL_INTERFACE("f3112584-41f9-348d-a59b-00b7e1d285d6") +ID3DLowLatencyDevice : public IUnknown { + static const GUID guid; + + virtual BOOL STDMETHODCALLTYPE SupportsLowLatency() = 0; + + virtual HRESULT STDMETHODCALLTYPE LatencySleep() = 0; + + virtual HRESULT STDMETHODCALLTYPE SetLatencySleepMode( + BOOL lowLatencyMode, + BOOL lowLatencyBoost, + uint32_t minimumIntervalUs) = 0; + + virtual HRESULT STDMETHODCALLTYPE SetLatencyMarker( + uint64_t frameID, + uint32_t markerType) = 0; + + virtual HRESULT STDMETHODCALLTYPE GetLatencyInfo( + D3D11_LATENCY_RESULTS* latencyResults) = 0; +}; /** * \brief Extended D3D11 context @@ -189,4 +244,5 @@ __CRT_UUID_DECL(ID3D11VkExtDevice, 0x8a6e3c42,0xf74c,0x45b7,0x82,0x65,0x __CRT_UUID_DECL(ID3D11VkExtDevice1, 0xcfcf64ef,0x9586,0x46d0,0xbc,0xa4,0x97,0xcf,0x2c,0xa6,0x1b,0x06); __CRT_UUID_DECL(ID3D11VkExtContext, 0xfd0bca13,0x5cb6,0x4c3a,0x98,0x7e,0x47,0x50,0xde,0x2c,0xa7,0x91); __CRT_UUID_DECL(ID3D11VkExtContext1, 0x874b09b2,0xae0b,0x41d8,0x84,0x76,0x5f,0x3b,0x7a,0x0e,0x87,0x9d); +__CRT_UUID_DECL(ID3DLowLatencyDevice, 0xf3112584,0x41f9,0x348d,0xa5,0x9b,0x00,0xb7,0xe1,0xd2,0x85,0xd6); #endif diff --git a/src/d3d11/d3d11_swapchain.cpp b/src/d3d11/d3d11_swapchain.cpp index 0e823f410ef..d224187515f 100644 --- a/src/d3d11/d3d11_swapchain.cpp +++ b/src/d3d11/d3d11_swapchain.cpp @@ -84,7 +84,11 @@ namespace dxvk { m_device->waitForSubmission(&m_presentStatus); m_device->waitForIdle(); - + + if (m_device->features().nvLowLatency2) { + m_parent->RemoveSwapchain(this); + } + DestroyFrameLatencyEvent(); } @@ -352,6 +356,43 @@ namespace dxvk { } + void D3D11SwapChain::SetLatencySleepMode( + bool lowLatencyMode, + bool lowLatencyBoost, + uint32_t minimumIntervalUs) { + m_presenter->setLatencySleepMode(lowLatencyMode, lowLatencyBoost, minimumIntervalUs); + + // The swapchain will have its low latency state updated + // when it gets recreated. The swapchain needs to be recreated + // to ensure we can use the frameId provided by the application + // as a presentId as presentation time. + m_dirty = true; + } + + + void D3D11SwapChain::LatencySleep() { + m_presenter->latencySleep(); + } + + + void D3D11SwapChain::SetLatencyMarker( + VkLatencyMarkerNV marker, + uint64_t presentId) { + m_presenter->setLatencyMarker(marker, presentId); + } + + + VkResult D3D11SwapChain::GetLatencyTimings( + std::vector& frameReports) { + return m_presenter->getLatencyTimings(frameReports); + } + + + bool D3D11SwapChain::LowLatencyEnabled() { + return m_presenter->lowLatencyEnabled(); + } + + HRESULT D3D11SwapChain::PresentImage(UINT SyncInterval) { // Flush pending rendering commands before auto immediateContext = m_parent->GetContext(); @@ -410,9 +451,11 @@ namespace dxvk { uint32_t Repeat) { auto lock = pContext->LockContext(); - // Bump frame ID as necessary - if (!Repeat) - m_frameId += 1; + if (!Repeat) { + m_frameId = (m_presenter->lowLatencyEnabled() && m_device->getLatencyMarkers().present) ? + m_device->getLatencyMarkers().present : + m_frameId + 1; + } // Present from CS thread so that we don't // have to synchronize with it first. diff --git a/src/d3d11/d3d11_swapchain.h b/src/d3d11/d3d11_swapchain.h index 00073d7690e..cfd8852c9ce 100644 --- a/src/d3d11/d3d11_swapchain.h +++ b/src/d3d11/d3d11_swapchain.h @@ -86,6 +86,22 @@ namespace dxvk { void STDMETHODCALLTYPE GetFrameStatistics( DXGI_VK_FRAME_STATISTICS* pFrameStatistics); + void SetLatencySleepMode( + bool lowLatencyMode, + bool lowLatencyBoost, + uint32_t minimumIntervalUs); + + void LatencySleep(); + + void SetLatencyMarker( + VkLatencyMarkerNV marker, + uint64_t presentId); + + VkResult GetLatencyTimings( + std::vector& frameReports); + + bool LowLatencyEnabled(); + private: enum BindingIds : uint32_t { @@ -176,4 +192,4 @@ namespace dxvk { }; -} \ No newline at end of file +} diff --git a/src/dxvk/dxvk_adapter.cpp b/src/dxvk/dxvk_adapter.cpp index cf4c3cce68f..6a3b4f08180 100644 --- a/src/dxvk/dxvk_adapter.cpp +++ b/src/dxvk/dxvk_adapter.cpp @@ -927,6 +927,9 @@ namespace dxvk { m_deviceFeatures.khrPresentWait.pNext = std::exchange(m_deviceFeatures.core.pNext, &m_deviceFeatures.khrPresentWait); } + if (m_deviceExtensions.supports(VK_NV_LOW_LATENCY_2_EXTENSION_NAME)) + m_deviceFeatures.nvLowLatency2 = VK_TRUE; + if (m_deviceExtensions.supports(VK_NVX_BINARY_IMPORT_EXTENSION_NAME)) m_deviceFeatures.nvxBinaryImport = VK_TRUE; @@ -994,6 +997,7 @@ namespace dxvk { &devExtensions.khrPresentWait, &devExtensions.khrSwapchain, &devExtensions.khrWin32KeyedMutex, + &devExtensions.nvLowLatency2, &devExtensions.nvxBinaryImport, &devExtensions.nvxImageViewHandle, }}; @@ -1133,8 +1137,13 @@ namespace dxvk { enabledFeatures.khrPresentWait.pNext = std::exchange(enabledFeatures.core.pNext, &enabledFeatures.khrPresentWait); } - if (devExtensions.nvxBinaryImport) + if (devExtensions.nvxBinaryImport) { enabledFeatures.nvxBinaryImport = VK_TRUE; + } + + if (devExtensions.nvLowLatency2) { + enabledFeatures.nvLowLatency2 = VK_TRUE; + } if (devExtensions.nvxImageViewHandle) enabledFeatures.nvxImageViewHandle = VK_TRUE; @@ -1279,6 +1288,8 @@ namespace dxvk { "\n presentId : ", features.khrPresentId.presentId ? "1" : "0", "\n", VK_KHR_PRESENT_WAIT_EXTENSION_NAME, "\n presentWait : ", features.khrPresentWait.presentWait ? "1" : "0", + "\n", VK_NV_LOW_LATENCY_2_EXTENSION_NAME, + "\n extension supported : ", features.nvLowLatency2 ? "1" : "0", "\n", VK_NVX_BINARY_IMPORT_EXTENSION_NAME, "\n extension supported : ", features.nvxBinaryImport ? "1" : "0", "\n", VK_NVX_IMAGE_VIEW_HANDLE_EXTENSION_NAME, diff --git a/src/dxvk/dxvk_cmdlist.cpp b/src/dxvk/dxvk_cmdlist.cpp index 3bd3aa953d4..54b50ea533c 100644 --- a/src/dxvk/dxvk_cmdlist.cpp +++ b/src/dxvk/dxvk_cmdlist.cpp @@ -56,10 +56,12 @@ namespace dxvk { VkResult DxvkCommandSubmission::submit( DxvkDevice* device, - VkQueue queue) { + VkQueue queue, + uint64_t frameId) { auto vk = device->vkd(); VkSubmitInfo2 submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO_2 }; + VkLatencySubmissionPresentIdNV latencySubmitInfo = { VK_STRUCTURE_TYPE_LATENCY_SUBMISSION_PRESENT_ID_NV }; if (!m_semaphoreWaits.empty()) { submitInfo.waitSemaphoreInfoCount = m_semaphoreWaits.size(); @@ -76,6 +78,11 @@ namespace dxvk { submitInfo.pSignalSemaphoreInfos = m_semaphoreSignals.data(); } + if (device->features().nvLowLatency2 && frameId && !m_commandBuffers.empty()) { + latencySubmitInfo.presentID = frameId; + latencySubmitInfo.pNext = std::exchange(submitInfo.pNext, &latencySubmitInfo); + } + VkResult vr = VK_SUCCESS; if (!this->isEmpty()) @@ -206,7 +213,7 @@ namespace dxvk { } - VkResult DxvkCommandList::submit() { + VkResult DxvkCommandList::submit(uint64_t frameId) { VkResult status = VK_SUCCESS; const auto& graphics = m_device->queues().graphics; @@ -238,7 +245,7 @@ namespace dxvk { // for any prior submissions, then block any subsequent ones m_commandSubmission.signalSemaphore(m_bindSemaphore, 0, VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT); - if ((status = m_commandSubmission.submit(m_device, graphics.queueHandle))) + if ((status = m_commandSubmission.submit(m_device, graphics.queueHandle, frameId))) return status; sparseBind->waitSemaphore(m_bindSemaphore, 0); @@ -259,7 +266,7 @@ namespace dxvk { if (m_device->hasDedicatedTransferQueue() && !m_commandSubmission.isEmpty()) { m_commandSubmission.signalSemaphore(m_sdmaSemaphore, 0, VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT); - if ((status = m_commandSubmission.submit(m_device, transfer.queueHandle))) + if ((status = m_commandSubmission.submit(m_device, transfer.queueHandle, frameId))) return status; m_commandSubmission.waitSemaphore(m_sdmaSemaphore, 0, VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT); @@ -297,7 +304,7 @@ namespace dxvk { } // Finally, submit all graphics commands of the current submission - if ((status = m_commandSubmission.submit(m_device, graphics.queueHandle))) + if ((status = m_commandSubmission.submit(m_device, graphics.queueHandle, frameId))) return status; } diff --git a/src/dxvk/dxvk_cmdlist.h b/src/dxvk/dxvk_cmdlist.h index b9b9a165dd3..f9527516e17 100644 --- a/src/dxvk/dxvk_cmdlist.h +++ b/src/dxvk/dxvk_cmdlist.h @@ -94,7 +94,8 @@ namespace dxvk { */ VkResult submit( DxvkDevice* device, - VkQueue queue); + VkQueue queue, + uint64_t frameId); /** * \brief Resets object @@ -199,7 +200,7 @@ namespace dxvk { * \brief Submits command list * \returns Submission status */ - VkResult submit(); + VkResult submit(uint64_t frameId); /** * \brief Stat counters diff --git a/src/dxvk/dxvk_context.cpp b/src/dxvk/dxvk_context.cpp index 80c2620de8e..f80ef9954b2 100644 --- a/src/dxvk/dxvk_context.cpp +++ b/src/dxvk/dxvk_context.cpp @@ -105,9 +105,9 @@ namespace dxvk { } - void DxvkContext::flushCommandList(DxvkSubmitStatus* status) { + void DxvkContext::flushCommandList(DxvkSubmitStatus* status, bool enableFrameId) { m_device->submitCommandList( - this->endRecording(), status); + this->endRecording(), status, enableFrameId); this->beginRecording( m_device->createCommandList()); diff --git a/src/dxvk/dxvk_context.h b/src/dxvk/dxvk_context.h index 93ed91e3603..0d1d094b074 100644 --- a/src/dxvk/dxvk_context.h +++ b/src/dxvk/dxvk_context.h @@ -65,8 +65,9 @@ namespace dxvk { * Transparently submits the current command * buffer and allocates a new one. * \param [out] status Submission feedback + * \param [in] enableFrameId Submission should include the frame id */ - void flushCommandList(DxvkSubmitStatus* status); + void flushCommandList(DxvkSubmitStatus* status, bool enableFrameId = true); /** * \brief Begins generating query data diff --git a/src/dxvk/dxvk_device.cpp b/src/dxvk/dxvk_device.cpp index 9a053791a7b..1e2c34b958c 100644 --- a/src/dxvk/dxvk_device.cpp +++ b/src/dxvk/dxvk_device.cpp @@ -18,6 +18,7 @@ namespace dxvk { m_properties (adapter->devicePropertiesExt()), m_perfHints (getPerfHints()), m_objects (this), + m_latencyMarkers ({}), m_queues (queues), m_submissionQueue (this, queueCallback) { @@ -271,9 +272,12 @@ namespace dxvk { void DxvkDevice::submitCommandList( const Rc& commandList, - DxvkSubmitStatus* status) { + DxvkSubmitStatus* status, + bool enableFrameId) { DxvkSubmitInfo submitInfo = { }; submitInfo.cmdList = commandList; + submitInfo.frameId = enableFrameId ? + m_latencyMarkers.render : 0; m_submissionQueue.submit(submitInfo, status); std::lock_guard statLock(m_statLock); diff --git a/src/dxvk/dxvk_device.h b/src/dxvk/dxvk_device.h index a24ee311bf5..305e6e00efb 100644 --- a/src/dxvk/dxvk_device.h +++ b/src/dxvk/dxvk_device.h @@ -66,7 +66,16 @@ namespace dxvk { DxvkDeviceQueue transfer; DxvkDeviceQueue sparse; }; - + + /** + * \brief Latency marker frame ids + */ + struct DxvkDeviceLowLatencyMarkers { + uint64_t simulation; + uint64_t render; + uint64_t present; + }; + /** * \brief DXVK device * @@ -471,10 +480,12 @@ namespace dxvk { * the given set of optional synchronization primitives. * \param [in] commandList The command list to submit * \param [out] status Submission feedback + * \param [in] enableFrameId Submission should include the frame id */ void submitCommandList( const Rc& commandList, - DxvkSubmitStatus* status); + DxvkSubmitStatus* status, + bool enableFrameId = true); /** * \brief Locks submission queue @@ -534,6 +545,44 @@ namespace dxvk { * used by the GPU can be safely destroyed. */ void waitForIdle(); + + /** + * \brief Updates the frame id for the given frame marker + * + * \param [in] marker The marker to set the frame ID for + * \param [in] id The frame ID to set + */ + void setLatencyMarker(VkLatencyMarkerNV marker, uint64_t id) { + switch (marker) { + case VK_LATENCY_MARKER_SIMULATION_START_NV: + m_latencyMarkers.simulation = id; + break; + case VK_LATENCY_MARKER_RENDERSUBMIT_START_NV: + m_latencyMarkers.render = id; + break; + case VK_LATENCY_MARKER_PRESENT_START_NV: + m_latencyMarkers.present = id; + break; + default: + break; + } + } + + /** + * \brief Resets the latency markers back to zero + */ + void resetLatencyMarkers() { + m_latencyMarkers = {}; + } + + /** + * \brief Returns the current set of latency marker frame IDs + * + * \returns The current set of frame marker IDs + */ + DxvkDeviceLowLatencyMarkers getLatencyMarkers() { + return m_latencyMarkers; + } private: @@ -549,6 +598,8 @@ namespace dxvk { DxvkDevicePerfHints m_perfHints; DxvkObjects m_objects; + DxvkDeviceLowLatencyMarkers m_latencyMarkers; + sync::Spinlock m_statLock; DxvkStatCounters m_statCounters; diff --git a/src/dxvk/dxvk_device_info.h b/src/dxvk/dxvk_device_info.h index e23a0e1812e..ec0bc5a645e 100644 --- a/src/dxvk/dxvk_device_info.h +++ b/src/dxvk/dxvk_device_info.h @@ -68,9 +68,10 @@ namespace dxvk { VkPhysicalDeviceMaintenance5FeaturesKHR khrMaintenance5; VkPhysicalDevicePresentIdFeaturesKHR khrPresentId; VkPhysicalDevicePresentWaitFeaturesKHR khrPresentWait; + VkBool32 nvLowLatency2; VkBool32 nvxBinaryImport; VkBool32 nvxImageViewHandle; VkBool32 khrWin32KeyedMutex; }; -} \ No newline at end of file +} diff --git a/src/dxvk/dxvk_extensions.h b/src/dxvk/dxvk_extensions.h index ae4c8a74f0a..acd5faf56be 100644 --- a/src/dxvk/dxvk_extensions.h +++ b/src/dxvk/dxvk_extensions.h @@ -325,6 +325,7 @@ namespace dxvk { DxvkExt khrPresentWait = { VK_KHR_PRESENT_WAIT_EXTENSION_NAME, DxvkExtMode::Optional }; DxvkExt khrSwapchain = { VK_KHR_SWAPCHAIN_EXTENSION_NAME, DxvkExtMode::Required }; DxvkExt khrWin32KeyedMutex = { VK_KHR_WIN32_KEYED_MUTEX_EXTENSION_NAME, DxvkExtMode::Optional }; + DxvkExt nvLowLatency2 = { VK_NV_LOW_LATENCY_2_EXTENSION_NAME, DxvkExtMode::Optional }; DxvkExt nvxBinaryImport = { VK_NVX_BINARY_IMPORT_EXTENSION_NAME, DxvkExtMode::Disabled }; DxvkExt nvxImageViewHandle = { VK_NVX_IMAGE_VIEW_HANDLE_EXTENSION_NAME, DxvkExtMode::Disabled }; }; diff --git a/src/dxvk/dxvk_presenter.cpp b/src/dxvk/dxvk_presenter.cpp index 10f13da2783..041e9b3d974 100644 --- a/src/dxvk/dxvk_presenter.cpp +++ b/src/dxvk/dxvk_presenter.cpp @@ -18,6 +18,15 @@ namespace dxvk { // with present operations and periodically signals the event if (m_device->features().khrPresentWait.presentWait && m_signal != nullptr) m_frameThread = dxvk::thread([this] { runFrameThread(); }); + + // If nvLowLatency2 is supported create the fence + if (m_device->features().nvLowLatency2) { + DxvkFenceCreateInfo info = {}; + info.initialValue = 0; + info.sharedType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_FLAG_BITS_MAX_ENUM; + + m_lowLatencyFence = DxvkFenceValuePair(m_device->createFence(info), 0u); + } } @@ -72,7 +81,7 @@ namespace dxvk { VkPresentIdKHR presentId = { VK_STRUCTURE_TYPE_PRESENT_ID_KHR }; presentId.swapchainCount = 1; - presentId.pPresentIds = &frameId; + presentId.pPresentIds = &frameId; VkSwapchainPresentModeInfoEXT modeInfo = { VK_STRUCTURE_TYPE_SWAPCHAIN_PRESENT_MODE_INFO_EXT }; modeInfo.swapchainCount = 1; @@ -88,8 +97,11 @@ namespace dxvk { if (m_device->features().khrPresentId.presentId && frameId) presentId.pNext = const_cast(std::exchange(info.pNext, &presentId)); - if (m_device->features().extSwapchainMaintenance1.swapchainMaintenance1) + if (m_device->features().extSwapchainMaintenance1.swapchainMaintenance1) { + if (m_device->features().nvLowLatency2) + m_presentSupportsLowLatency = std::find(m_lowLatencyModes.begin(), m_lowLatencyModes.end(), mode) != m_lowLatencyModes.end(); modeInfo.pNext = const_cast(std::exchange(info.pNext, &modeInfo)); + } VkResult status = m_vkd->vkQueuePresentKHR( m_device->queues().graphics.queueHandle, &info); @@ -173,6 +185,7 @@ namespace dxvk { std::vector formats; std::vector modes; + std::vector lowLatencyModes; VkResult status; @@ -283,6 +296,23 @@ namespace dxvk { dynamicModes.clear(); } + if (m_device->features().nvLowLatency2) { + VkLatencySurfaceCapabilitiesNV latencySurfaceCaps { VK_STRUCTURE_TYPE_LATENCY_SURFACE_CAPABILITIES_NV }; + + caps.pNext = &latencySurfaceCaps; + + if((status = m_vki->vkGetPhysicalDeviceSurfaceCapabilities2KHR(m_device->adapter()->handle(), &surfaceInfo, &caps))) + return status; + + lowLatencyModes.resize(latencySurfaceCaps.presentModeCount); + latencySurfaceCaps.pPresentModes = lowLatencyModes.data(); + + if ((status = m_vki->vkGetPhysicalDeviceSurfaceCapabilities2KHR(m_device->adapter()->handle(), &surfaceInfo, &caps))) + return status; + + caps.pNext = nullptr; + } + // Compute swap chain image count based on available info m_info.imageCount = pickImageCount(minImageCount, maxImageCount, desc.imageCount); @@ -293,6 +323,9 @@ namespace dxvk { modeInfo.presentModeCount = compatibleModes.size(); modeInfo.pPresentModes = compatibleModes.data(); + VkSwapchainLatencyCreateInfoNV lowLatencyInfo = { VK_STRUCTURE_TYPE_SWAPCHAIN_LATENCY_CREATE_INFO_NV }; + lowLatencyInfo.latencyModeEnable = VK_TRUE; + VkSwapchainCreateInfoKHR swapInfo = { VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR }; swapInfo.surface = m_surface; swapInfo.minImageCount = m_info.imageCount; @@ -314,6 +347,9 @@ namespace dxvk { if (m_device->features().extSwapchainMaintenance1.swapchainMaintenance1) modeInfo.pNext = std::exchange(swapInfo.pNext, &modeInfo); + if (m_device->features().nvLowLatency2) + lowLatencyInfo.pNext = std::exchange(swapInfo.pNext, &lowLatencyInfo); + Logger::info(str::format( "Presenter: Actual swap chain properties:" "\n Format: ", m_info.format.format, @@ -322,11 +358,29 @@ namespace dxvk { "\n Buffer size: ", m_info.imageExtent.width, "x", m_info.imageExtent.height, "\n Image count: ", m_info.imageCount, "\n Exclusive FS: ", desc.fullScreenExclusive)); - + if ((status = m_vkd->vkCreateSwapchainKHR(m_vkd->device(), &swapInfo, nullptr, &m_swapchain))) return status; - + + if (m_device->features().nvLowLatency2) { + std::lock_guard lock(m_lowLatencyMutex); + + if (!m_lowLatencyEnabled) + m_device->resetLatencyMarkers(); + + VkLatencySleepModeInfoNV sleepModeInfo = { VK_STRUCTURE_TYPE_LATENCY_SLEEP_MODE_INFO_NV }; + sleepModeInfo.lowLatencyMode = m_lowLatencyEnabled; + sleepModeInfo.lowLatencyBoost = m_lowLatencyBoost; + sleepModeInfo.minimumIntervalUs = m_minimumIntervalUs; + + if ((status = m_vkd->vkSetLatencySleepModeNV(m_vkd->device(), m_swapchain, &sleepModeInfo))) + return status; + + m_presentSupportsLowLatency = std::find( + lowLatencyModes.begin(), lowLatencyModes.end(), m_info.presentMode) != lowLatencyModes.end(); + } + // Acquire images and create views std::vector images; @@ -377,6 +431,7 @@ namespace dxvk { m_acquireStatus = VK_NOT_READY; m_dynamicModes = std::move(dynamicModes); + m_lowLatencyModes = std::move(lowLatencyModes); return VK_SUCCESS; } @@ -423,6 +478,75 @@ namespace dxvk { } + void Presenter::setLatencySleepMode(bool lowLatencyMode, bool lowLatencyBoost, uint32_t minimumIntervalUs) { + if (lowLatencyMode == m_lowLatencyEnabled + && lowLatencyBoost == m_lowLatencyBoost + && minimumIntervalUs == m_minimumIntervalUs) { + return; + } + + std::lock_guard lock(m_lowLatencyMutex); + + m_lowLatencyEnabled = lowLatencyMode; + m_lowLatencyBoost = lowLatencyBoost; + m_minimumIntervalUs = minimumIntervalUs; + } + + + void Presenter::latencySleep() { + VkSemaphore sem = m_lowLatencyFence.fence->handle(); + uint64_t waitValue = m_lowLatencyFence.value + 1; + m_lowLatencyFence.value++; + + VkLatencySleepInfoNV sleepInfo = { VK_STRUCTURE_TYPE_LATENCY_SLEEP_INFO_NV }; + sleepInfo.signalSemaphore = sem; + sleepInfo.value = waitValue; + + bool shouldSleep = false; + + { + std::lock_guard lock(m_lowLatencyMutex); + if (m_swapchain) { + shouldSleep = true; + m_vkd->vkLatencySleepNV(m_vkd->device(), m_swapchain, &sleepInfo); + } + } + + if (shouldSleep) + m_lowLatencyFence.fence->wait(waitValue); + } + + + void Presenter::setLatencyMarker(VkLatencyMarkerNV marker, uint64_t presentId) { + VkSetLatencyMarkerInfoNV markerInfo = { VK_STRUCTURE_TYPE_SET_LATENCY_MARKER_INFO_NV }; + markerInfo.presentID = presentId; + markerInfo.marker = marker; + + std::lock_guard lock(m_lowLatencyMutex); + if (m_swapchain) + m_vkd->vkSetLatencyMarkerNV(m_vkd->device(), m_swapchain, &markerInfo); + } + + + VkResult Presenter::getLatencyTimings(std::vector& frameReports) { + std::lock_guard lock(m_lowLatencyMutex); + + if (m_swapchain) { + VkGetLatencyMarkerInfoNV markerInfo = { VK_STRUCTURE_TYPE_GET_LATENCY_MARKER_INFO_NV }; + m_vkd->vkGetLatencyTimingsNV(m_vkd->device(), m_swapchain, &markerInfo); + + if (markerInfo.timingCount != 0) { + frameReports.resize(markerInfo.timingCount, { VK_STRUCTURE_TYPE_LATENCY_TIMINGS_FRAME_REPORT_NV }); + markerInfo.pTimings = frameReports.data(); + + m_vkd->vkGetLatencyTimingsNV(m_vkd->device(), m_swapchain, &markerInfo); + } + } + + return VK_SUCCESS; + } + + VkResult Presenter::getSupportedFormats(std::vector& formats, VkFullScreenExclusiveEXT fullScreenExclusive) const { uint32_t numFormats = 0; @@ -617,6 +741,8 @@ namespace dxvk { void Presenter::destroySwapchain() { + std::lock_guard lock(m_lowLatencyMutex); + if (m_signal != nullptr) m_signal->wait(m_lastFrameId.load(std::memory_order_acquire)); diff --git a/src/dxvk/dxvk_presenter.h b/src/dxvk/dxvk_presenter.h index c5ba1273364..017858535b7 100644 --- a/src/dxvk/dxvk_presenter.h +++ b/src/dxvk/dxvk_presenter.h @@ -15,6 +15,7 @@ #include "../vulkan/vulkan_loader.h" #include "dxvk_format.h" +#include "dxvk_fence.h" namespace dxvk { @@ -224,6 +225,42 @@ namespace dxvk { */ void setHdrMetadata(const VkHdrMetadataEXT& hdrMetadata); + /** + * \brief Set the latency mode of the swapchain + * + * \param [in] enableLowLatency Determines if the low latency + * mode should be enabled of disabled + */ + void setLatencySleepMode(bool lowLatencyMode, bool lowLatencyBoost, uint32_t minimumIntervalUs); + + /** + * \brief Delay rendering work for lower latency + */ + void latencySleep(); + + /** + * \brief Set a latency marker for the given stage + * + * \param [in] marker The stage this marker is for + * \param [in] presentId The presentId this marker is for + */ + void setLatencyMarker(VkLatencyMarkerNV marker, uint64_t presentId); + + /** + * \brief Get the low latency timing info + * + * \param [out] latencyInfo The structure to place + * the latency timings into + */ + VkResult getLatencyTimings(std::vector& frameReports); + + /** + * \brief Returns the low latency enabled state + */ + bool lowLatencyEnabled() { + return m_lowLatencyEnabled && m_presentSupportsLowLatency; + } + private: Rc m_device; @@ -237,10 +274,17 @@ namespace dxvk { VkSurfaceKHR m_surface = VK_NULL_HANDLE; VkSwapchainKHR m_swapchain = VK_NULL_HANDLE; + DxvkFenceValuePair m_lowLatencyFence = {}; + bool m_lowLatencyEnabled = false; + bool m_lowLatencyBoost = false; + uint32_t m_minimumIntervalUs = 0; + bool m_presentSupportsLowLatency = false; + std::vector m_images; std::vector m_semaphores; std::vector m_dynamicModes; + std::vector m_lowLatencyModes; uint32_t m_imageIndex = 0; uint32_t m_frameIndex = 0; @@ -250,6 +294,7 @@ namespace dxvk { FpsLimiter m_fpsLimiter; dxvk::mutex m_frameMutex; + dxvk::mutex m_lowLatencyMutex; dxvk::condition_variable m_frameCond; dxvk::thread m_frameThread; std::queue m_frameQueue; diff --git a/src/dxvk/dxvk_queue.cpp b/src/dxvk/dxvk_queue.cpp index 7273a37d608..546a1f838b8 100644 --- a/src/dxvk/dxvk_queue.cpp +++ b/src/dxvk/dxvk_queue.cpp @@ -126,7 +126,7 @@ namespace dxvk { m_callback(true); if (entry.submit.cmdList != nullptr) - entry.result = entry.submit.cmdList->submit(); + entry.result = entry.submit.cmdList->submit(entry.submit.frameId); else if (entry.present.presenter != nullptr) entry.result = entry.present.presenter->presentImage(entry.present.presentMode, entry.present.frameId); @@ -226,4 +226,4 @@ namespace dxvk { } } -} \ No newline at end of file +} diff --git a/src/dxvk/dxvk_queue.h b/src/dxvk/dxvk_queue.h index 38d91f5dd09..a3c6e581b31 100644 --- a/src/dxvk/dxvk_queue.h +++ b/src/dxvk/dxvk_queue.h @@ -32,6 +32,7 @@ namespace dxvk { */ struct DxvkSubmitInfo { Rc cmdList; + uint64_t frameId; }; diff --git a/src/vulkan/vulkan_loader.h b/src/vulkan/vulkan_loader.h index 1741ccb8722..6b0f80ea248 100644 --- a/src/vulkan/vulkan_loader.h +++ b/src/vulkan/vulkan_loader.h @@ -452,6 +452,14 @@ namespace dxvk::vk { VULKAN_FN(wine_vkAcquireKeyedMutex); VULKAN_FN(wine_vkReleaseKeyedMutex); #endif + + #ifdef VK_NV_LOW_LATENCY_2_EXTENSION_NAME + VULKAN_FN(vkSetLatencySleepModeNV); + VULKAN_FN(vkLatencySleepNV); + VULKAN_FN(vkSetLatencyMarkerNV); + VULKAN_FN(vkGetLatencyTimingsNV); + VULKAN_FN(vkQueueNotifyOutOfBandNV); + #endif }; }