diff --git a/app/streaming/video/ffmpeg-renderers/d3d11va.cpp b/app/streaming/video/ffmpeg-renderers/d3d11va.cpp index 585cbb9f..8c0b8c02 100644 --- a/app/streaming/video/ffmpeg-renderers/d3d11va.cpp +++ b/app/streaming/video/ffmpeg-renderers/d3d11va.cpp @@ -193,26 +193,12 @@ bool D3D11VARenderer::createDeviceByAdapterIndex(int adapterIndex, bool* adapter goto Exit; } - bool ok; - m_BindDecoderOutputTextures = !!qEnvironmentVariableIntValue("D3D11VA_FORCE_BIND", &ok); - if (!ok) { - // Skip copying to our own internal texture on Intel GPUs due to - // significant performance impact of the extra copy. See: - // https://github.com/moonlight-stream/moonlight-qt/issues/1304 - m_BindDecoderOutputTextures = adapterDesc.VendorId == 0x8086; - } - else { - SDL_LogWarn(SDL_LOG_CATEGORY_APPLICATION, - "Using D3D11VA_FORCE_BIND to override default bind/copy logic"); - } - SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION, - "Detected GPU %d: %S (%x:%x) (decoder output: %s)", + "Detected GPU %d: %S (%x:%x)", adapterIndex, adapterDesc.Description, adapterDesc.VendorId, - adapterDesc.DeviceId, - m_BindDecoderOutputTextures ? "bind" : "copy"); + adapterDesc.DeviceId); hr = D3D11CreateDevice(adapter.Get(), D3D_DRIVER_TYPE_UNKNOWN, @@ -240,6 +226,47 @@ bool D3D11VARenderer::createDeviceByAdapterIndex(int adapterIndex, bool* adapter m_DevicesWithFL11Support++; } + bool ok; + m_BindDecoderOutputTextures = !!qEnvironmentVariableIntValue("D3D11VA_FORCE_BIND", &ok); + if (!ok) { + D3D11_FEATURE_DATA_D3D11_OPTIONS2 options = {}; + m_Device->CheckFeatureSupport(D3D11_FEATURE_D3D11_OPTIONS2, &options, sizeof(options)); + + SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION, + "Unified memory: %s", + options.UnifiedMemoryArchitecture ? "yes" : "no"); + + // Skip copying to our own internal texture on Intel GPUs due to + // significant performance impact of the extra copy. See: + // https://github.com/moonlight-stream/moonlight-qt/issues/1304 + // + // We also don't copy for modern UMA GPUs from other vendors to + // avoid performance impact due to shared system memory accesses. + m_BindDecoderOutputTextures = + adapterDesc.VendorId == 0x8086 || + (featureLevel >= D3D_FEATURE_LEVEL_11_1 && options.UnifiedMemoryArchitecture); + } + else { + SDL_LogWarn(SDL_LOG_CATEGORY_APPLICATION, + "Using D3D11VA_FORCE_BIND to override default bind/copy logic"); + } + + m_UseFenceHack = !!qEnvironmentVariableIntValue("D3D11VA_FORCE_FENCE", &ok); + if (!ok) { + // Old Intel GPUs (HD 4000) require a fence to properly synchronize + // the video engine with the 3D engine for texture sampling. + m_UseFenceHack = adapterDesc.VendorId == 0x8086 && featureLevel < D3D_FEATURE_LEVEL_11_1; + } + else { + SDL_LogWarn(SDL_LOG_CATEGORY_APPLICATION, + "Using D3D11VA_FORCE_FENCE to override default fence workaround logic"); + } + + SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION, + "Decoder texture access: %s (fence: %s)", + m_BindDecoderOutputTextures ? "bind" : "copy", + (m_BindDecoderOutputTextures && m_UseFenceHack) ? "yes" : "no"); + // Check which fence types are supported by this GPU { m_FenceType = SupportedFenceType::None; @@ -821,9 +848,9 @@ void D3D11VARenderer::renderVideo(AVFrame* frame) // Ensure decoding operations have completed using a dummy fence. // This is not necessary on modern GPU drivers, but it is required - // on some older GPU drivers that don't properly synchronize the - // video engine with 3D operations. - if (m_FenceType != SupportedFenceType::None) { + // on some older Intel GPU drivers that don't properly synchronize + // the video engine with 3D operations. + if (m_UseFenceHack && m_FenceType != SupportedFenceType::None) { ComPtr device5; ComPtr deviceContext4; if (SUCCEEDED(m_Device.As(&device5)) && SUCCEEDED(m_DeviceContext.As(&deviceContext4))) { diff --git a/app/streaming/video/ffmpeg-renderers/d3d11va.h b/app/streaming/video/ffmpeg-renderers/d3d11va.h index 82a35ce9..f37b815a 100644 --- a/app/streaming/video/ffmpeg-renderers/d3d11va.h +++ b/app/streaming/video/ffmpeg-renderers/d3d11va.h @@ -67,6 +67,7 @@ private: SupportedFenceType m_FenceType; SDL_mutex* m_ContextLock; bool m_BindDecoderOutputTextures; + bool m_UseFenceHack; DECODER_PARAMETERS m_DecoderParams; int m_TextureAlignment;