From bf4332b9e738a082b6151debd98b1da4259560ce Mon Sep 17 00:00:00 2001 From: Cameron Gutman Date: Sun, 21 Aug 2022 17:50:56 -0500 Subject: [PATCH] Prefer D3D11VA in non-FSE modes for improved performance --- .../video/ffmpeg-renderers/d3d11va.cpp | 5 +-- .../video/ffmpeg-renderers/d3d11va.h | 4 ++- .../video/ffmpeg-renderers/dxva2.cpp | 32 +++++++++++++++++-- app/streaming/video/ffmpeg-renderers/dxva2.h | 5 ++- app/streaming/video/ffmpeg.cpp | 20 +++++++++--- 5 files changed, 55 insertions(+), 11 deletions(-) diff --git a/app/streaming/video/ffmpeg-renderers/d3d11va.cpp b/app/streaming/video/ffmpeg-renderers/d3d11va.cpp index d7f1b317..12e3fce8 100644 --- a/app/streaming/video/ffmpeg-renderers/d3d11va.cpp +++ b/app/streaming/video/ffmpeg-renderers/d3d11va.cpp @@ -73,8 +73,9 @@ typedef struct _CSC_CONST_BUF } CSC_CONST_BUF, *PCSC_CONST_BUF; static_assert(sizeof(CSC_CONST_BUF) % 16 == 0, "Constant buffer sizes must be a multiple of 16"); -D3D11VARenderer::D3D11VARenderer() - : m_Factory(nullptr), +D3D11VARenderer::D3D11VARenderer(int decoderSelectionPass) + : m_DecoderSelectionPass(decoderSelectionPass), + m_Factory(nullptr), m_Device(nullptr), m_SwapChain(nullptr), m_DeviceContext(nullptr), diff --git a/app/streaming/video/ffmpeg-renderers/d3d11va.h b/app/streaming/video/ffmpeg-renderers/d3d11va.h index fc49742c..2439f605 100644 --- a/app/streaming/video/ffmpeg-renderers/d3d11va.h +++ b/app/streaming/video/ffmpeg-renderers/d3d11va.h @@ -13,7 +13,7 @@ extern "C" { class D3D11VARenderer : public IFFmpegRenderer { public: - D3D11VARenderer(); + D3D11VARenderer(int decoderSelectionPass); virtual ~D3D11VARenderer() override; virtual bool initialize(PDECODER_PARAMETERS params) override; virtual bool prepareDecoderContext(AVCodecContext* context, AVDictionary**) override; @@ -36,6 +36,8 @@ private: bool checkDecoderSupport(IDXGIAdapter* adapter); bool createDeviceByAdapterIndex(int adapterIndex, bool* adapterNotFound = nullptr); + int m_DecoderSelectionPass; + IDXGIFactory5* m_Factory; ID3D11Device* m_Device; IDXGISwapChain4* m_SwapChain; diff --git a/app/streaming/video/ffmpeg-renderers/dxva2.cpp b/app/streaming/video/ffmpeg-renderers/dxva2.cpp index e8972904..cc9db16a 100644 --- a/app/streaming/video/ffmpeg-renderers/dxva2.cpp +++ b/app/streaming/video/ffmpeg-renderers/dxva2.cpp @@ -28,7 +28,8 @@ typedef struct _VERTEX float tu, tv; } VERTEX, *PVERTEX; -DXVA2Renderer::DXVA2Renderer() : +DXVA2Renderer::DXVA2Renderer(int decoderSelectionPass) : + m_DecoderSelectionPass(decoderSelectionPass), m_DecService(nullptr), m_Decoder(nullptr), m_SurfacesUsed(0), @@ -434,6 +435,13 @@ bool DXVA2Renderer::initializeDeviceQuirks() // For other GPUs, we'll avoid populating it as was our previous behavior. m_DeviceQuirks |= DXVA2_QUIRK_SET_DEST_FORMAT; } + + // Tag this display device if it has a WDDM 2.0+ driver for the decoder selection logic + if (HIWORD(id.DriverVersion.HighPart) >= 20) { + SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION, + "Detected WDDM 2.0 or later display driver"); + m_DeviceQuirks |= DXVA2_QUIRK_WDDM_20_PLUS; + } } return true; @@ -707,7 +715,7 @@ bool DXVA2Renderer::initialize(PDECODER_PARAMETERS params) return false; } #else - else if (qgetenv("DXVA2_ENABLED") != "1") { + else if (qgetenv("DXVA2_ENABLED") != "1" && m_DecoderSelectionPass == 0) { SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION, "DXVA2 is disabled by default on ARM64. Set DXVA2_ENABLED=1 to override."); return false; @@ -759,6 +767,26 @@ bool DXVA2Renderer::initialize(PDECODER_PARAMETERS params) return false; } + // If we have a WDDM 2.0 or later display driver and we're not running in + // full-screen exclusive mode, prefer the D3D11VA renderer. + // + // D3D11VA is better in this case because it can enable tearing in non-FSE + // modes when the user has V-Sync disabled. In non-FSE V-Sync cases, D3D11VA + // provides lower display latency on systems that support Independent Flip + // in windowed mode. When using D3D9, DWM will not promote us to IFlip unless + // we're full-screen (exclusive or not). + // + // NB: The reason we only do this for WDDM 2.0 and later is because older + // AMD drivers (such as those for the HD 5570) render garbage when using + // the D3D11VA renderer. + if (m_DecoderSelectionPass == 0 && + (m_DeviceQuirks & DXVA2_QUIRK_WDDM_20_PLUS) && + !((SDL_GetWindowFlags(params->window) & SDL_WINDOW_FULLSCREEN_DESKTOP) == SDL_WINDOW_FULLSCREEN)) { + SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION, + "Defaulting to D3D11VA for non-FSE mode"); + return false; + } + if (!initializeDecoder()) { return false; } diff --git a/app/streaming/video/ffmpeg-renderers/dxva2.h b/app/streaming/video/ffmpeg-renderers/dxva2.h index c1280a77..b6217b70 100644 --- a/app/streaming/video/ffmpeg-renderers/dxva2.h +++ b/app/streaming/video/ffmpeg-renderers/dxva2.h @@ -13,7 +13,7 @@ extern "C" { class DXVA2Renderer : public IFFmpegRenderer { public: - DXVA2Renderer(); + DXVA2Renderer(int decoderSelectionPass); virtual ~DXVA2Renderer() override; virtual bool initialize(PDECODER_PARAMETERS params) override; virtual bool prepareDecoderContext(AVCodecContext* context, AVDictionary** options) override; @@ -44,6 +44,8 @@ private: static int ffGetBuffer2(AVCodecContext* context, AVFrame* frame, int flags); + int m_DecoderSelectionPass; + int m_VideoFormat; int m_VideoWidth; int m_VideoHeight; @@ -77,5 +79,6 @@ private: #define DXVA2_QUIRK_NO_VP 0x01 #define DXVA2_QUIRK_SET_DEST_FORMAT 0x02 +#define DXVA2_QUIRK_WDDM_20_PLUS 0x04 int m_DeviceQuirks; }; diff --git a/app/streaming/video/ffmpeg.cpp b/app/streaming/video/ffmpeg.cpp index 066a50d5..98228a07 100644 --- a/app/streaming/video/ffmpeg.cpp +++ b/app/streaming/video/ffmpeg.cpp @@ -601,13 +601,14 @@ IFFmpegRenderer* FFmpegVideoDecoder::createHwAccelRenderer(const AVCodecHWConfig if (pass == 0) { switch (hwDecodeCfg->device_type) { #ifdef Q_OS_WIN32 - // DXVA2 appears in the hwaccel list before D3D11VA, so we will implicitly - // prefer it. When we want to switch to D3D11VA by default, we'll need to - // move it into the second pass set below. + // DXVA2 appears in the hwaccel list before D3D11VA, so we will prefer it. + // + // There is logic in DXVA2 that may elect to fail on the first selection pass + // to allow D3D11VA to be used in cases where it is known to be better. case AV_HWDEVICE_TYPE_DXVA2: - return new DXVA2Renderer(); + return new DXVA2Renderer(pass); case AV_HWDEVICE_TYPE_D3D11VA: - return new D3D11VARenderer(); + return new D3D11VARenderer(pass); #endif #ifdef Q_OS_DARWIN case AV_HWDEVICE_TYPE_VIDEOTOOLBOX: @@ -636,6 +637,15 @@ IFFmpegRenderer* FFmpegVideoDecoder::createHwAccelRenderer(const AVCodecHWConfig case AV_HWDEVICE_TYPE_CUDA: // CUDA should only be used to cover the NVIDIA+Wayland case return new CUDARenderer(); +#endif +#ifdef Q_OS_WIN32 + // This gives DXVA2 and D3D11VA another shot at handling cases where they + // chose to purposefully fail in the first selection pass to allow a more + // optimal decoder to be tried. + case AV_HWDEVICE_TYPE_DXVA2: + return new DXVA2Renderer(pass); + case AV_HWDEVICE_TYPE_D3D11VA: + return new D3D11VARenderer(pass); #endif default: return nullptr;