Files
moonlight-qt/app/streaming/video/ffmpeg-renderers/d3d11va.cpp
Cameron Gutman a0a4c1ea83 Use decoder texture binding by default when using separate devices
This significantly improves performance on Ryzen 3300U and should generally perform equal or better everywhere.

This decoder->SRV path has been prone to driver bugs, so we may need to adjust this logic if driver issues crop up.
2026-01-19 16:46:50 -06:00

1890 lines
68 KiB
C++

// For D3D11_DECODER_PROFILE values
#include <initguid.h>
#include "d3d11va.h"
#include "dxutil.h"
#include "path.h"
#include "utils.h"
#include "streaming/streamutils.h"
#include "streaming/session.h"
#include <SDL_syswm.h>
#include <VersionHelpers.h>
#include <dwmapi.h>
using Microsoft::WRL::ComPtr;
// Standard DXVA GUIDs for HEVC RExt profiles (redefined for compatibility with pre-24H2 SDKs)
DEFINE_GUID(k_D3D11_DECODER_PROFILE_HEVC_VLD_MAIN_444, 0x4008018f, 0xf537, 0x4b36, 0x98, 0xcf, 0x61, 0xaf, 0x8a, 0x2c, 0x1a, 0x33);
DEFINE_GUID(k_D3D11_DECODER_PROFILE_HEVC_VLD_MAIN10_444, 0x0dabeffa, 0x4458, 0x4602, 0xbc, 0x03, 0x07, 0x95, 0x65, 0x9d, 0x61, 0x7c);
typedef struct _VERTEX
{
float x, y;
float tu, tv;
} VERTEX, *PVERTEX;
#define CSC_MATRIX_RAW_ELEMENT_COUNT 9
#define CSC_MATRIX_PACKED_ELEMENT_COUNT 12
#define OFFSETS_ELEMENT_COUNT 3
typedef struct _CSC_CONST_BUF
{
// CscMatrix value from above but packed and scaled
float cscMatrix[CSC_MATRIX_PACKED_ELEMENT_COUNT];
// YUV offset values
float offsets[OFFSETS_ELEMENT_COUNT];
// Padding float to end 16-byte boundary
float padding;
// Chroma offset values
float chromaOffset[2];
// Max UV coordinates to avoid sampling alignment padding
float chromaUVMax[2];
} CSC_CONST_BUF, *PCSC_CONST_BUF;
static_assert(sizeof(CSC_CONST_BUF) % 16 == 0, "Constant buffer sizes must be a multiple of 16");
static const std::array<const char*, D3D11VARenderer::PixelShaders::_COUNT> k_VideoShaderNames =
{
"d3d11_yuv420_pixel.fxc",
"d3d11_ayuv_pixel.fxc",
"d3d11_y410_pixel.fxc",
};
D3D11VARenderer::D3D11VARenderer(int decoderSelectionPass)
: IFFmpegRenderer(RendererType::D3D11VA),
m_DecoderSelectionPass(decoderSelectionPass),
m_DevicesWithFL11Support(0),
m_DevicesWithCodecSupport(0),
m_LastColorTrc(AVCOL_TRC_UNSPECIFIED),
m_AllowTearing(false),
m_OverlayLock(0),
m_HwDeviceContext(nullptr)
{
m_ContextLock = SDL_CreateMutex();
DwmEnableMMCSS(TRUE);
}
D3D11VARenderer::~D3D11VARenderer()
{
DwmEnableMMCSS(FALSE);
SDL_DestroyMutex(m_ContextLock);
m_VideoVertexBuffer.Reset();
for (auto& shader : m_VideoPixelShaders) {
shader.Reset();
}
for (auto& textureSrvs : m_VideoTextureResourceViews) {
for (auto& srv : textureSrvs) {
srv.Reset();
}
}
m_VideoTexture.Reset();
for (auto& buffer : m_OverlayVertexBuffers) {
buffer.Reset();
}
for (auto& srv : m_OverlayTextureResourceViews) {
srv.Reset();
}
for (auto& texture : m_OverlayTextures) {
texture.Reset();
}
m_OverlayPixelShader.Reset();
m_OverlayBlendState.Reset();
m_VideoBlendState.Reset();
m_DecodeD2RFence.Reset();
m_DecodeR2DFence.Reset();
m_RenderD2RFence.Reset();
m_RenderR2DFence.Reset();
m_RenderTargetView.Reset();
m_SwapChain.Reset();
m_RenderSharedTextureArray.Reset();
av_buffer_unref(&m_HwDeviceContext);
m_DecodeDevice.Reset();
m_DecodeDeviceContext.Reset();
// Force destruction of the swapchain immediately
if (m_RenderDeviceContext != nullptr) {
m_RenderDeviceContext->ClearState();
m_RenderDeviceContext->Flush();
}
m_RenderDevice.Reset();
m_RenderDeviceContext.Reset();
m_Factory.Reset();
}
bool D3D11VARenderer::createSharedFencePair(UINT64 initialValue, ID3D11Device5* dev1, ID3D11Device5* dev2, ComPtr<ID3D11Fence>& dev1Fence, ComPtr<ID3D11Fence>& dev2Fence)
{
HRESULT hr;
D3D11_FENCE_FLAG flags;
flags = D3D11_FENCE_FLAG_SHARED;
if (m_FenceType == SupportedFenceType::NonMonitored) {
flags |= D3D11_FENCE_FLAG_NON_MONITORED;
}
hr = dev1->CreateFence(initialValue, flags, IID_PPV_ARGS(&dev1Fence));
if (FAILED(hr)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11Device5::CreateFence() failed: %x",
hr);
return false;
}
HANDLE fenceHandle;
hr = dev1Fence->CreateSharedHandle(nullptr, GENERIC_ALL, nullptr, &fenceHandle);
if (FAILED(hr)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11Fence::CreateSharedHandle() failed: %x",
hr);
dev1Fence.Reset();
return false;
}
hr = dev2->OpenSharedFence(fenceHandle, IID_PPV_ARGS(&dev2Fence));
CloseHandle(fenceHandle);
if (FAILED(hr)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11Device5::OpenSharedFence() failed: %x",
hr);
dev1Fence.Reset();
return false;
}
return true;
}
bool D3D11VARenderer::setupSharedDevice(IDXGIAdapter1* adapter)
{
const D3D_FEATURE_LEVEL supportedFeatureLevels[] = { D3D_FEATURE_LEVEL_11_1, D3D_FEATURE_LEVEL_11_0 };
D3D_FEATURE_LEVEL featureLevel;
HRESULT hr;
ComPtr<ID3D11Device> device;
ComPtr<ID3D11DeviceContext> deviceContext;
bool success = false;
// We don't support cross-device sharing without fences
if (m_FenceType == SupportedFenceType::None) {
return false;
}
// If we're going to use separate devices for decoding and rendering, create the decoding device
hr = D3D11CreateDevice(adapter,
D3D_DRIVER_TYPE_UNKNOWN,
nullptr,
D3D11_CREATE_DEVICE_VIDEO_SUPPORT
#ifdef QT_DEBUG
| D3D11_CREATE_DEVICE_DEBUG
#endif
,
supportedFeatureLevels,
ARRAYSIZE(supportedFeatureLevels),
D3D11_SDK_VERSION,
&device,
&featureLevel,
&deviceContext);
if (FAILED(hr)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"D3D11CreateDevice() failed: %x",
hr);
return false;
}
hr = device.As(&m_DecodeDevice);
if (FAILED(hr)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11Device::QueryInterface(ID3D11Device1) failed: %x",
hr);
goto Exit;
}
hr = deviceContext.As(&m_DecodeDeviceContext);
if (FAILED(hr)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11DeviceContext::QueryInterface(ID3D11DeviceContext1) failed: %x",
hr);
goto Exit;
}
// Create our decode->render fence
m_DecodeRenderSyncFenceValue = 1;
if (!createSharedFencePair(0, m_DecodeDevice.Get(), m_RenderDevice.Get(), m_DecodeD2RFence, m_RenderD2RFence)) {
goto Exit;
}
// Create our render->decode fence
if (!createSharedFencePair(0, m_DecodeDevice.Get(), m_RenderDevice.Get(), m_DecodeR2DFence, m_RenderR2DFence)) {
goto Exit;
}
success = true;
Exit:
if (!success) {
m_DecodeD2RFence.Reset();
m_RenderD2RFence.Reset();
m_DecodeR2DFence.Reset();
m_RenderR2DFence.Reset();
m_DecodeDevice.Reset();
}
return success;
}
bool D3D11VARenderer::createDeviceByAdapterIndex(int adapterIndex, bool* adapterNotFound)
{
const D3D_FEATURE_LEVEL supportedFeatureLevels[] = { D3D_FEATURE_LEVEL_11_1, D3D_FEATURE_LEVEL_11_0 };
bool success = false;
ComPtr<IDXGIAdapter1> adapter;
DXGI_ADAPTER_DESC1 adapterDesc;
D3D_FEATURE_LEVEL featureLevel;
HRESULT hr;
ComPtr<ID3D11Device> device;
ComPtr<ID3D11DeviceContext> deviceContext;
SDL_assert(!m_RenderDevice);
SDL_assert(!m_RenderDeviceContext);
SDL_assert(!m_DecodeDevice);
SDL_assert(!m_DecodeDeviceContext);
hr = m_Factory->EnumAdapters1(adapterIndex, &adapter);
if (hr == DXGI_ERROR_NOT_FOUND) {
// Expected at the end of enumeration
goto Exit;
}
else if (FAILED(hr)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"IDXGIFactory::EnumAdapters1() failed: %x",
hr);
goto Exit;
}
hr = adapter->GetDesc1(&adapterDesc);
if (FAILED(hr)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"IDXGIAdapter::GetDesc() failed: %x",
hr);
goto Exit;
}
if (adapterDesc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) {
// Skip the WARP device. We know it will fail.
goto Exit;
}
SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION,
"Detected GPU %d: %S (%x:%x)",
adapterIndex,
adapterDesc.Description,
adapterDesc.VendorId,
adapterDesc.DeviceId);
hr = D3D11CreateDevice(adapter.Get(),
D3D_DRIVER_TYPE_UNKNOWN,
nullptr,
D3D11_CREATE_DEVICE_VIDEO_SUPPORT
#ifdef QT_DEBUG
| D3D11_CREATE_DEVICE_DEBUG
#endif
,
supportedFeatureLevels,
ARRAYSIZE(supportedFeatureLevels),
D3D11_SDK_VERSION,
&device,
&featureLevel,
&deviceContext);
if (FAILED(hr)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"D3D11CreateDevice() failed: %x",
hr);
goto Exit;
}
else if (adapterDesc.VendorId == 0x8086 && featureLevel <= D3D_FEATURE_LEVEL_11_0 && !qEnvironmentVariableIntValue("D3D11VA_ENABLED")) {
SDL_LogWarn(SDL_LOG_CATEGORY_APPLICATION,
"Avoiding D3D11VA on old pre-FL11.1 Intel GPU. Set D3D11VA_ENABLED=1 to override.");
goto Exit;
}
else if (featureLevel >= D3D_FEATURE_LEVEL_11_0) {
// Remember that we found a non-software D3D11 devices with support for
// feature level 11.0 or later (Fermi, Terascale 2, or Ivy Bridge and later)
m_DevicesWithFL11Support++;
}
hr = device.As(&m_RenderDevice);
if (FAILED(hr)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11Device::QueryInterface(ID3D11Device1) failed: %x",
hr);
goto Exit;
}
hr = deviceContext.As(&m_RenderDeviceContext);
if (FAILED(hr)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11DeviceContext::QueryInterface(ID3D11DeviceContext1) failed: %x",
hr);
goto Exit;
}
// Check which fence types are supported by this GPU
{
m_FenceType = SupportedFenceType::None;
ComPtr<IDXGIAdapter4> adapter4;
if (SUCCEEDED(adapter.As(&adapter4))) {
DXGI_ADAPTER_DESC3 desc3;
if (SUCCEEDED(adapter4->GetDesc3(&desc3))) {
if (desc3.Flags & DXGI_ADAPTER_FLAG3_SUPPORT_MONITORED_FENCES) {
// Monitored fences must be used when they are supported
m_FenceType = SupportedFenceType::Monitored;
}
else if (desc3.Flags & DXGI_ADAPTER_FLAG3_SUPPORT_NON_MONITORED_FENCES) {
// Non-monitored fences must only be used when monitored fences are unsupported
m_FenceType = SupportedFenceType::NonMonitored;
}
}
}
}
bool separateDevices;
if (Utils::getEnvironmentVariableOverride("D3D11VA_FORCE_SEPARATE_DEVICES", &separateDevices)) {
SDL_LogWarn(SDL_LOG_CATEGORY_APPLICATION,
"Using D3D11VA_FORCE_SEPARATE_DEVICES to override default logic");
}
else {
D3D11_FEATURE_DATA_D3D11_OPTIONS d3d11Options;
// Check if cross-device sharing works for YUV textures and fences are supported
hr = m_RenderDevice->CheckFeatureSupport(D3D11_FEATURE_D3D11_OPTIONS, &d3d11Options, sizeof(d3d11Options));
separateDevices = SUCCEEDED(hr) && d3d11Options.ExtendedResourceSharing && m_FenceType != SupportedFenceType::None;
// The Radon HD 5570 GPU drivers deadlock when decoding into shared texture arrays, so let's
// limit usage of separate devices to FL 11.1+ GPUs to try to exclude old GPU drivers. We'll
// exempt Intel GPUs because those have been confirmed to work properly (and the extra fence
// that this device separation uses acts as a workaround for a bug in their old drivers where
// they don't properly synchronize between decoder output usage and SRV usage).
if (separateDevices && featureLevel < D3D_FEATURE_LEVEL_11_1 && adapterDesc.VendorId != 0x8086) {
SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION,
"Avoiding texture sharing for old pre-FL11.1 GPU");
separateDevices = false;
}
}
// If we're going to use separate devices for decoding and rendering, create the decoding device
if (!separateDevices || !setupSharedDevice(adapter.Get())) {
m_DecodeDevice = m_RenderDevice;
m_DecodeDeviceContext = m_RenderDeviceContext;
separateDevices = false;
}
if (Utils::getEnvironmentVariableOverride("D3D11VA_FORCE_BIND", &m_BindDecoderOutputTextures)) {
SDL_LogWarn(SDL_LOG_CATEGORY_APPLICATION,
"Using D3D11VA_FORCE_BIND to override default bind/copy logic");
}
else {
// Skip copying to our own internal texture on Intel GPUs due to
// significant performance impact of the extra copy. See:
// https://github.com/moonlight-stream/moonlight-qt/issues/1304
//
// Also bind SRVs when using separate decoding and rendering
// devices as this improves render times by about 2x on my
// Ryzen 3300U system. The fences we use between decoding
// and rendering contexts should hopefully avoid any of the
// synchronization issues we've seen between decoder and SRVs.
m_BindDecoderOutputTextures = adapterDesc.VendorId == 0x8086 || separateDevices;
}
SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION,
"Decoder texture access: %s (fence: %s)",
m_BindDecoderOutputTextures ? "bind" : "copy",
m_FenceType == SupportedFenceType::Monitored ? "monitored" :
(m_FenceType == SupportedFenceType::NonMonitored ? "non-monitored" : "unsupported"));
SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION,
"Using %s device for decoding and rendering",
separateDevices ? "separate" : "shared");
if (!checkDecoderSupport(adapter.Get())) {
goto Exit;
}
else {
// Remember that we found a device with support for decoding this codec
m_DevicesWithCodecSupport++;
}
success = true;
Exit:
if (adapterNotFound != nullptr) {
*adapterNotFound = !adapter;
}
if (!success) {
m_RenderDeviceContext.Reset();
m_RenderDevice.Reset();
m_DecodeDeviceContext.Reset();
m_DecodeDevice.Reset();
}
return success;
}
bool D3D11VARenderer::initialize(PDECODER_PARAMETERS params)
{
int outputIndex;
HRESULT hr;
m_DecoderParams = *params;
if (qgetenv("D3D11VA_ENABLED") == "0") {
SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION,
"D3D11VA is disabled by environment variable");
return false;
}
else if (!IsWindows10OrGreater()) {
// Use DXVA2 on anything older than Win10, so we don't have to handle a bunch
// of legacy Win7/Win8 codepaths in here.
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"D3D11VA renderer is only supported on Windows 10 or later.");
return false;
}
if (!SDL_DXGIGetOutputInfo(SDL_GetWindowDisplayIndex(params->window),
&m_AdapterIndex, &outputIndex)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"SDL_DXGIGetOutputInfo() failed: %s",
SDL_GetError());
return false;
}
hr = CreateDXGIFactory2(
#ifdef QT_DEBUG
DXGI_CREATE_FACTORY_DEBUG,
#else
0,
#endif
__uuidof(IDXGIFactory5),
(void**)&m_Factory);
if (FAILED(hr)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"CreateDXGIFactory() failed: %x",
hr);
return false;
}
// First try the adapter corresponding to the display where our window resides.
// This will let us avoid a copy if the display GPU has the required decoder.
if (!createDeviceByAdapterIndex(m_AdapterIndex)) {
// If that didn't work, we'll try all GPUs in order until we find one
// or run out of GPUs (DXGI_ERROR_NOT_FOUND from EnumAdapters())
bool adapterNotFound = false;
for (int i = 0; !adapterNotFound; i++) {
if (i == m_AdapterIndex) {
// Don't try the same GPU again
continue;
}
if (createDeviceByAdapterIndex(i, &adapterNotFound)) {
// This GPU worked! Continue initialization.
break;
}
}
if (adapterNotFound) {
SDL_assert(!m_RenderDevice);
SDL_assert(!m_RenderDeviceContext);
return false;
}
}
DXGI_SWAP_CHAIN_DESC1 swapChainDesc = {};
swapChainDesc.Stereo = FALSE;
swapChainDesc.SampleDesc.Count = 1;
swapChainDesc.SampleDesc.Quality = 0;
swapChainDesc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
swapChainDesc.Scaling = DXGI_SCALING_STRETCH;
swapChainDesc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD;
swapChainDesc.AlphaMode = DXGI_ALPHA_MODE_UNSPECIFIED;
swapChainDesc.Flags = 0;
// 3 front buffers (default GetMaximumFrameLatency() count)
// + 1 back buffer
// + 1 extra for DWM to hold on to for DirectFlip
//
// Even though we allocate 3 front buffers for pre-rendered frames,
// they won't actually increase presentation latency because we
// always use SyncInterval 0 which replaces the last one.
//
// IDXGIDevice1 has a SetMaximumFrameLatency() function, but counter-
// intuitively we must avoid it to reduce latency. If we set our max
// frame latency to 1 on thedevice, our SyncInterval 0 Present() calls
// will block on DWM (acting like SyncInterval 1) rather than doing
// the non-blocking present we expect.
//
// NB: 3 total buffers seems sufficient on NVIDIA hardware but
// causes performance issues (buffer starvation) on AMD GPUs.
swapChainDesc.BufferCount = 3 + 1 + 1;
// Use the current window size as the swapchain size
SDL_GetWindowSize(params->window, (int*)&swapChainDesc.Width, (int*)&swapChainDesc.Height);
m_DisplayWidth = swapChainDesc.Width;
m_DisplayHeight = swapChainDesc.Height;
if (params->videoFormat & VIDEO_FORMAT_MASK_10BIT) {
swapChainDesc.Format = DXGI_FORMAT_R10G10B10A2_UNORM;
}
else {
swapChainDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
}
// Use DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING with flip mode for non-vsync case, if possible.
// NOTE: This is only possible in windowed or borderless windowed mode.
if (!params->enableVsync) {
BOOL allowTearing = FALSE;
hr = m_Factory->CheckFeatureSupport(DXGI_FEATURE_PRESENT_ALLOW_TEARING,
&allowTearing,
sizeof(allowTearing));
if (SUCCEEDED(hr)) {
if (allowTearing) {
// Use flip discard with allow tearing mode if possible.
swapChainDesc.Flags |= DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING;
m_AllowTearing = true;
}
else {
SDL_LogWarn(SDL_LOG_CATEGORY_APPLICATION,
"OS/GPU doesn't support DXGI_FEATURE_PRESENT_ALLOW_TEARING");
}
}
else {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"IDXGIFactory::CheckFeatureSupport(DXGI_FEATURE_PRESENT_ALLOW_TEARING) failed: %x",
hr);
// Non-fatal
}
// DXVA2 may let us take over for FSE V-sync off cases. However, if we don't have DXGI_FEATURE_PRESENT_ALLOW_TEARING
// then we should not attempt to do this unless there's no other option (HDR, DXVA2 failed in pass 1, etc).
if (!m_AllowTearing && m_DecoderSelectionPass == 0 && !(params->videoFormat & VIDEO_FORMAT_MASK_10BIT) &&
(SDL_GetWindowFlags(params->window) & SDL_WINDOW_FULLSCREEN_DESKTOP) == SDL_WINDOW_FULLSCREEN) {
SDL_LogWarn(SDL_LOG_CATEGORY_APPLICATION,
"Defaulting to DXVA2 for FSE without DXGI_FEATURE_PRESENT_ALLOW_TEARING support");
return false;
}
}
SDL_SysWMinfo info;
SDL_VERSION(&info.version);
SDL_GetWindowWMInfo(params->window, &info);
SDL_assert(info.subsystem == SDL_SYSWM_WINDOWS);
// Always use windowed or borderless windowed mode.. SDL does mode-setting for us in
// full-screen exclusive mode (SDL_WINDOW_FULLSCREEN), so this actually works out okay.
ComPtr<IDXGISwapChain1> swapChain;
hr = m_Factory->CreateSwapChainForHwnd(m_RenderDevice.Get(),
info.info.win.window,
&swapChainDesc,
nullptr,
nullptr,
&swapChain);
if (FAILED(hr)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"IDXGIFactory::CreateSwapChainForHwnd() failed: %x",
hr);
return false;
}
hr = swapChain.As(&m_SwapChain);
if (FAILED(hr)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"IDXGISwapChain::QueryInterface(IDXGISwapChain4) failed: %x",
hr);
return false;
}
// Disable Alt+Enter, PrintScreen, and window message snooping. This makes
// it safe to run the renderer on a separate rendering thread rather than
// requiring the main (message loop) thread.
hr = m_Factory->MakeWindowAssociation(info.info.win.window, DXGI_MWA_NO_WINDOW_CHANGES);
if (FAILED(hr)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"IDXGIFactory::MakeWindowAssociation() failed: %x",
hr);
return false;
}
{
m_HwDeviceContext = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_D3D11VA);
if (!m_HwDeviceContext) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"Failed to allocate D3D11VA device context");
return false;
}
AVHWDeviceContext* deviceContext = (AVHWDeviceContext*)m_HwDeviceContext->data;
AVD3D11VADeviceContext* d3d11vaDeviceContext = (AVD3D11VADeviceContext*)deviceContext->hwctx;
// FFmpeg will take ownership of these pointers, so we use CopyTo() to bump the ref count
m_DecodeDevice.CopyTo(&d3d11vaDeviceContext->device);
m_DecodeDeviceContext.CopyTo(&d3d11vaDeviceContext->device_context);
// Set lock functions that we will use to synchronize with FFmpeg's usage of our device context
d3d11vaDeviceContext->lock = lockContext;
d3d11vaDeviceContext->unlock = unlockContext;
d3d11vaDeviceContext->lock_ctx = this;
int err = av_hwdevice_ctx_init(m_HwDeviceContext);
if (err < 0) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"Failed to initialize D3D11VA device context: %d",
err);
return false;
}
}
if (!setupRenderingResources()) {
return false;
}
return true;
}
bool D3D11VARenderer::prepareDecoderContext(AVCodecContext* context, AVDictionary**)
{
context->hw_device_ctx = av_buffer_ref(m_HwDeviceContext);
SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION,
"Using D3D11VA accelerated renderer");
return true;
}
bool D3D11VARenderer::prepareDecoderContextInGetFormat(AVCodecContext *context, AVPixelFormat pixelFormat)
{
// Create a new hardware frames context suitable for decoding our specified format
av_buffer_unref(&context->hw_frames_ctx);
int err = avcodec_get_hw_frames_parameters(context, m_HwDeviceContext, pixelFormat, &context->hw_frames_ctx);
if (err < 0) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"Failed to get hwframes context parameters: %d",
err);
return false;
}
auto framesContext = (AVHWFramesContext*)context->hw_frames_ctx->data;
auto d3d11vaFramesContext = (AVD3D11VAFramesContext*)framesContext->hwctx;
// If we're binding output textures directly, we need to add the SRV bind flag
if (m_BindDecoderOutputTextures) {
d3d11vaFramesContext->BindFlags |= D3D11_BIND_SHADER_RESOURCE;
}
// If we're using separate decode and render devices, we need to create shared textures
if (m_DecodeDevice != m_RenderDevice) {
d3d11vaFramesContext->MiscFlags |= D3D11_RESOURCE_MISC_SHARED | D3D11_RESOURCE_MISC_SHARED_NTHANDLE;
}
// Mimic the logic in ff_decode_get_hw_frames_ctx() which adds an extra 3 frames
if (framesContext->initial_pool_size) {
framesContext->initial_pool_size += 3;
}
err = av_hwframe_ctx_init(context->hw_frames_ctx);
if (err < 0) {
av_buffer_unref(&context->hw_frames_ctx);
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"Failed initialize hwframes context: %d",
err);
return false;
}
if (!setupFrameRenderingResources(framesContext)) {
av_buffer_unref(&context->hw_frames_ctx);
return false;
}
return true;
}
void D3D11VARenderer::renderFrame(AVFrame* frame)
{
// Acquire the context lock for rendering to prevent concurrent
// access from inside FFmpeg's decoding code
if (m_DecodeDevice == m_RenderDevice) {
lockContext(this);
}
// Clear the back buffer
const float clearColor[4] = {0.0f, 0.0f, 0.0f, 1.0f};
m_RenderDeviceContext->ClearRenderTargetView(m_RenderTargetView.Get(), clearColor);
// Bind the back buffer. This needs to be done each time,
// because the render target view will be unbound by Present().
m_RenderDeviceContext->OMSetRenderTargets(1, m_RenderTargetView.GetAddressOf(), nullptr);
// Render our video frame with the aspect-ratio adjusted viewport
renderVideo(frame);
// Render overlays on top of the video stream
for (int i = 0; i < Overlay::OverlayMax; i++) {
renderOverlay((Overlay::OverlayType)i);
}
UINT flags;
if (m_AllowTearing) {
SDL_assert(!m_DecoderParams.enableVsync);
// If tearing is allowed, use DXGI_PRESENT_ALLOW_TEARING with syncInterval 0.
// It is not valid to use any other syncInterval values in tearing mode.
flags = DXGI_PRESENT_ALLOW_TEARING;
}
else {
// Otherwise, we'll submit as fast as possible and DWM will discard excess
// frames for us. If frame pacing is also enabled or we're in full-screen,
// our Vsync source will keep us in sync with VBlank.
flags = 0;
}
HRESULT hr;
if (frame->color_trc != m_LastColorTrc) {
if (frame->color_trc == AVCOL_TRC_SMPTE2084) {
// Switch to Rec 2020 PQ (SMPTE ST 2084) colorspace for HDR10 rendering
hr = m_SwapChain->SetColorSpace1(DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020);
if (FAILED(hr)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"IDXGISwapChain::SetColorSpace1(DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020) failed: %x",
hr);
}
}
else {
// Restore default sRGB colorspace
hr = m_SwapChain->SetColorSpace1(DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709);
if (FAILED(hr)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"IDXGISwapChain::SetColorSpace1(DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709) failed: %x",
hr);
}
}
m_LastColorTrc = frame->color_trc;
}
// Present according to the decoder parameters
hr = m_SwapChain->Present(0, flags);
if (m_DecodeDevice == m_RenderDevice) {
// Release the context lock
unlockContext(this);
}
if (FAILED(hr)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"IDXGISwapChain::Present() failed: %x",
hr);
// The card may have been removed or crashed. Reset the decoder.
SDL_Event event;
event.type = SDL_RENDER_TARGETS_RESET;
SDL_PushEvent(&event);
return;
}
}
void D3D11VARenderer::renderOverlay(Overlay::OverlayType type)
{
if (!Session::get()->getOverlayManager().isOverlayEnabled(type)) {
return;
}
// If the overlay is being updated, just skip rendering it this frame
if (!SDL_AtomicTryLock(&m_OverlayLock)) {
return;
}
// Reference these objects so they don't immediately go away if the
// overlay update thread tries to release them.
ComPtr<ID3D11Texture2D> overlayTexture = m_OverlayTextures[type];
ComPtr<ID3D11Buffer> overlayVertexBuffer = m_OverlayVertexBuffers[type];
ComPtr<ID3D11ShaderResourceView> overlayTextureResourceView = m_OverlayTextureResourceViews[type];
SDL_AtomicUnlock(&m_OverlayLock);
if (!overlayTexture) {
return;
}
// If there was a texture, there must also be a vertex buffer and SRV
SDL_assert(overlayVertexBuffer);
SDL_assert(overlayTextureResourceView);
// Bind vertex buffer
UINT stride = sizeof(VERTEX);
UINT offset = 0;
m_RenderDeviceContext->IASetVertexBuffers(0, 1, overlayVertexBuffer.GetAddressOf(), &stride, &offset);
// Bind pixel shader and resources
m_RenderDeviceContext->PSSetShader(m_OverlayPixelShader.Get(), nullptr, 0);
m_RenderDeviceContext->PSSetShaderResources(0, 1, overlayTextureResourceView.GetAddressOf());
// Draw the overlay with alpha blending
m_RenderDeviceContext->OMSetBlendState(m_OverlayBlendState.Get(), nullptr, 0xffffffff);
m_RenderDeviceContext->DrawIndexed(6, 0, 0);
m_RenderDeviceContext->OMSetBlendState(m_VideoBlendState.Get(), nullptr, 0xffffffff);
}
void D3D11VARenderer::bindVideoVertexBuffer(bool frameChanged, AVFrame* frame)
{
if (frameChanged || !m_VideoVertexBuffer) {
// Scale video to the window size while preserving aspect ratio
SDL_Rect src, dst;
src.x = src.y = 0;
src.w = frame->width;
src.h = frame->height;
dst.x = dst.y = 0;
dst.w = m_DisplayWidth;
dst.h = m_DisplayHeight;
StreamUtils::scaleSourceToDestinationSurface(&src, &dst);
// Convert screen space to normalized device coordinates
SDL_FRect renderRect;
StreamUtils::screenSpaceToNormalizedDeviceCoords(&dst, &renderRect, m_DisplayWidth, m_DisplayHeight);
// Don't sample from the alignment padding area
auto framesContext = (AVHWFramesContext*)frame->hw_frames_ctx->data;
float uMax = (float)frame->width / framesContext->width;
float vMax = (float)frame->height / framesContext->height;
VERTEX verts[] =
{
{renderRect.x, renderRect.y, 0, vMax},
{renderRect.x, renderRect.y+renderRect.h, 0, 0},
{renderRect.x+renderRect.w, renderRect.y, uMax, vMax},
{renderRect.x+renderRect.w, renderRect.y+renderRect.h, uMax, 0},
};
D3D11_BUFFER_DESC vbDesc = {};
vbDesc.ByteWidth = sizeof(verts);
vbDesc.Usage = D3D11_USAGE_IMMUTABLE;
vbDesc.BindFlags = D3D11_BIND_VERTEX_BUFFER;
vbDesc.CPUAccessFlags = 0;
vbDesc.MiscFlags = 0;
vbDesc.StructureByteStride = sizeof(VERTEX);
D3D11_SUBRESOURCE_DATA vbData = {};
vbData.pSysMem = verts;
HRESULT hr = m_RenderDevice->CreateBuffer(&vbDesc, &vbData, &m_VideoVertexBuffer);
if (FAILED(hr)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11Device::CreateBuffer() failed: %x",
hr);
return;
}
}
// Bind video rendering vertex buffer
UINT stride = sizeof(VERTEX);
UINT offset = 0;
m_RenderDeviceContext->IASetVertexBuffers(0, 1, m_VideoVertexBuffer.GetAddressOf(), &stride, &offset);
}
void D3D11VARenderer::bindColorConversion(bool frameChanged, AVFrame* frame)
{
bool yuv444 = (m_DecoderParams.videoFormat & VIDEO_FORMAT_MASK_YUV444);
auto framesContext = (AVHWFramesContext*)frame->hw_frames_ctx->data;
if (yuv444) {
// We'll need to use one of the 4:4:4 shaders for this pixel format
switch (m_TextureFormat)
{
case DXGI_FORMAT_AYUV:
m_RenderDeviceContext->PSSetShader(m_VideoPixelShaders[PixelShaders::GENERIC_AYUV].Get(), nullptr, 0);
break;
case DXGI_FORMAT_Y410:
m_RenderDeviceContext->PSSetShader(m_VideoPixelShaders[PixelShaders::GENERIC_Y410].Get(), nullptr, 0);
break;
default:
SDL_assert(false);
}
}
else {
// We'll need to use the generic 4:2:0 shader for this colorspace and color range combo
m_RenderDeviceContext->PSSetShader(m_VideoPixelShaders[PixelShaders::GENERIC_YUV_420].Get(), nullptr, 0);
}
// If nothing has changed since last frame, we're done
if (!frameChanged) {
return;
}
D3D11_BUFFER_DESC constDesc = {};
constDesc.ByteWidth = sizeof(CSC_CONST_BUF);
constDesc.Usage = D3D11_USAGE_IMMUTABLE;
constDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
constDesc.CPUAccessFlags = 0;
constDesc.MiscFlags = 0;
CSC_CONST_BUF constBuf = {};
std::array<float, 9> cscMatrix;
std::array<float, 3> yuvOffsets;
getFramePremultipliedCscConstants(frame, cscMatrix, yuvOffsets);
std::copy(yuvOffsets.cbegin(), yuvOffsets.cend(), constBuf.offsets);
// We need to adjust our CSC matrix to be column-major and with float3 vectors
// padded with a float in between each of them to adhere to HLSL requirements.
for (int i = 0; i < 3; i++) {
for (int j = 0; j < 3; j++) {
constBuf.cscMatrix[i * 4 + j] = cscMatrix[j * 3 + i];
}
}
std::array<float, 2> chromaOffset;
getFrameChromaCositingOffsets(frame, chromaOffset);
constBuf.chromaOffset[0] = chromaOffset[0] / framesContext->width;
constBuf.chromaOffset[1] = chromaOffset[1] / framesContext->height;
// Limit chroma texcoords to avoid sampling from alignment texels
constBuf.chromaUVMax[0] = frame->width != framesContext->width ?
((float)(frame->width - 1) / framesContext->width) : 1.0f;
constBuf.chromaUVMax[1] = frame->height != (int)framesContext->height ?
((float)(frame->height - 1) / framesContext->height) : 1.0f;
D3D11_SUBRESOURCE_DATA constData = {};
constData.pSysMem = &constBuf;
ComPtr<ID3D11Buffer> constantBuffer;
HRESULT hr = m_RenderDevice->CreateBuffer(&constDesc, &constData, &constantBuffer);
if (SUCCEEDED(hr)) {
m_RenderDeviceContext->PSSetConstantBuffers(0, 1, constantBuffer.GetAddressOf());
}
else {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11Device::CreateBuffer() failed: %x",
hr);
return;
}
}
void D3D11VARenderer::renderVideo(AVFrame* frame)
{
// Insert a fence to force the render context to wait for the decode context to finish writing
if (m_DecodeDevice != m_RenderDevice) {
SDL_assert(m_DecodeD2RFence);
SDL_assert(m_RenderD2RFence);
lockContext(this);
if (SUCCEEDED(m_DecodeDeviceContext->Signal(m_DecodeD2RFence.Get(), m_DecodeRenderSyncFenceValue))) {
m_RenderDeviceContext->Wait(m_RenderD2RFence.Get(), m_DecodeRenderSyncFenceValue++);
}
unlockContext(this);
}
UINT srvIndex;
if (m_BindDecoderOutputTextures) {
// Our indexing logic depends on a direct mapping into m_VideoTextureResourceViews
// based on the texture index provided by FFmpeg.
srvIndex = (uintptr_t)frame->data[1];
SDL_assert(srvIndex < m_VideoTextureResourceViews.size());
if (srvIndex >= m_VideoTextureResourceViews.size()) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"Unexpected texture index: %u",
srvIndex);
return;
}
}
else {
// Copy this frame into our video texture
m_RenderDeviceContext->CopySubresourceRegion1(m_VideoTexture.Get(), 0, 0, 0, 0,
m_RenderSharedTextureArray.Get(),
(int)(intptr_t)frame->data[1],
nullptr, D3D11_COPY_DISCARD);
// SRV 0 is always mapped to the video texture
srvIndex = 0;
}
bool frameChanged = hasFrameFormatChanged(frame);
// Bind our vertex buffer
bindVideoVertexBuffer(frameChanged, frame);
// Bind our CSC shader (and constant buffer, if required)
bindColorConversion(frameChanged, frame);
// Bind SRVs for this frame
ID3D11ShaderResourceView* frameSrvs[] = { m_VideoTextureResourceViews[srvIndex][0].Get(), m_VideoTextureResourceViews[srvIndex][1].Get() };
m_RenderDeviceContext->PSSetShaderResources(0, 2, frameSrvs);
// Draw the video
m_RenderDeviceContext->DrawIndexed(6, 0, 0);
// Unbind SRVs for this frame
ID3D11ShaderResourceView* nullSrvs[2] = {};
m_RenderDeviceContext->PSSetShaderResources(0, 2, nullSrvs);
// Insert a fence to force the decode context to wait for the render context to finish reading
if (m_DecodeDevice != m_RenderDevice) {
SDL_assert(m_DecodeR2DFence);
SDL_assert(m_RenderR2DFence);
if (SUCCEEDED(m_RenderDeviceContext->Signal(m_RenderR2DFence.Get(), m_DecodeRenderSyncFenceValue))) {
lockContext(this);
m_DecodeDeviceContext->Wait(m_DecodeR2DFence.Get(), m_DecodeRenderSyncFenceValue++);
unlockContext(this);
}
}
}
// This function must NOT use any DXGI or ID3D11DeviceContext methods
// since it can be called on an arbitrary thread!
void D3D11VARenderer::notifyOverlayUpdated(Overlay::OverlayType type)
{
HRESULT hr;
SDL_Surface* newSurface = Session::get()->getOverlayManager().getUpdatedOverlaySurface(type);
bool overlayEnabled = Session::get()->getOverlayManager().isOverlayEnabled(type);
if (newSurface == nullptr && overlayEnabled) {
// The overlay is enabled and there is no new surface. Leave the old texture alone.
return;
}
SDL_AtomicLock(&m_OverlayLock);
ComPtr<ID3D11Texture2D> oldTexture = std::move(m_OverlayTextures[type]);
ComPtr<ID3D11Buffer> oldVertexBuffer = std::move(m_OverlayVertexBuffers[type]);
ComPtr<ID3D11ShaderResourceView> oldTextureResourceView = std::move(m_OverlayTextureResourceViews[type]);
SDL_AtomicUnlock(&m_OverlayLock);
// If the overlay is disabled, we're done
if (!overlayEnabled) {
SDL_FreeSurface(newSurface);
return;
}
// Create a texture with our pixel data
SDL_assert(!SDL_MUSTLOCK(newSurface));
SDL_assert(newSurface->format->format == SDL_PIXELFORMAT_ARGB8888);
D3D11_TEXTURE2D_DESC texDesc = {};
texDesc.Width = newSurface->w;
texDesc.Height = newSurface->h;
texDesc.MipLevels = 1;
texDesc.ArraySize = 1;
texDesc.Format = DXGI_FORMAT_B8G8R8A8_UNORM;
texDesc.SampleDesc.Count = 1;
texDesc.SampleDesc.Quality = 0;
texDesc.Usage = D3D11_USAGE_IMMUTABLE;
texDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE;
texDesc.CPUAccessFlags = 0;
texDesc.MiscFlags = 0;
D3D11_SUBRESOURCE_DATA texData = {};
texData.pSysMem = newSurface->pixels;
texData.SysMemPitch = newSurface->pitch;
ComPtr<ID3D11Texture2D> newTexture;
hr = m_RenderDevice->CreateTexture2D(&texDesc, &texData, &newTexture);
if (FAILED(hr)) {
SDL_FreeSurface(newSurface);
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11Device::CreateTexture2D() failed: %x",
hr);
return;
}
ComPtr<ID3D11ShaderResourceView> newTextureResourceView;
hr = m_RenderDevice->CreateShaderResourceView((ID3D11Resource*)newTexture.Get(), nullptr, &newTextureResourceView);
if (FAILED(hr)) {
SDL_FreeSurface(newSurface);
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11Device::CreateShaderResourceView() failed: %x",
hr);
return;
}
ComPtr<ID3D11Buffer> newVertexBuffer;
if (!createOverlayVertexBuffer(type, newSurface->w, newSurface->h, newVertexBuffer)) {
SDL_FreeSurface(newSurface);
return;
}
// The surface is no longer required
SDL_FreeSurface(newSurface);
newSurface = nullptr;
SDL_AtomicLock(&m_OverlayLock);
m_OverlayVertexBuffers[type] = std::move(newVertexBuffer);
m_OverlayTextures[type] = std::move(newTexture);
m_OverlayTextureResourceViews[type] = std::move(newTextureResourceView);
SDL_AtomicUnlock(&m_OverlayLock);
}
bool D3D11VARenderer::createOverlayVertexBuffer(Overlay::OverlayType type, int width, int height, ComPtr<ID3D11Buffer>& newVertexBuffer)
{
SDL_FRect renderRect = {};
if (type == Overlay::OverlayStatusUpdate) {
// Bottom Left
renderRect.x = 0;
renderRect.y = 0;
}
else if (type == Overlay::OverlayDebug) {
// Top left
renderRect.x = 0;
renderRect.y = m_DisplayHeight - height;
}
renderRect.w = width;
renderRect.h = height;
// Convert screen space to normalized device coordinates
StreamUtils::screenSpaceToNormalizedDeviceCoords(&renderRect, m_DisplayWidth, m_DisplayHeight);
VERTEX verts[] =
{
{renderRect.x, renderRect.y, 0, 1},
{renderRect.x, renderRect.y+renderRect.h, 0, 0},
{renderRect.x+renderRect.w, renderRect.y, 1, 1},
{renderRect.x+renderRect.w, renderRect.y+renderRect.h, 1, 0},
};
D3D11_BUFFER_DESC vbDesc = {};
vbDesc.ByteWidth = sizeof(verts);
vbDesc.Usage = D3D11_USAGE_IMMUTABLE;
vbDesc.BindFlags = D3D11_BIND_VERTEX_BUFFER;
vbDesc.CPUAccessFlags = 0;
vbDesc.MiscFlags = 0;
vbDesc.StructureByteStride = sizeof(VERTEX);
D3D11_SUBRESOURCE_DATA vbData = {};
vbData.pSysMem = verts;
HRESULT hr = m_RenderDevice->CreateBuffer(&vbDesc, &vbData, &newVertexBuffer);
if (FAILED(hr)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11Device::CreateBuffer() failed: %x",
hr);
return false;
}
return true;
}
bool D3D11VARenderer::notifyWindowChanged(PWINDOW_STATE_CHANGE_INFO stateInfo)
{
if (stateInfo->stateChangeFlags & WINDOW_STATE_CHANGE_DISPLAY) {
int adapterIndex, outputIndex;
if (!SDL_DXGIGetOutputInfo(stateInfo->displayIndex,
&adapterIndex, &outputIndex)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"SDL_DXGIGetOutputInfo() failed: %s",
SDL_GetError());
return false;
}
// If the window moved to a different GPU, recreate the renderer
// to see if we can use that new GPU for decoding
if (adapterIndex != m_AdapterIndex) {
return false;
}
// If an adapter was added or removed, we can't trust that our
// old indexes are still valid for comparison.
if (!m_Factory->IsCurrent()) {
return false;
}
// We've handled this state change
stateInfo->stateChangeFlags &= ~WINDOW_STATE_CHANGE_DISPLAY;
}
if (stateInfo->stateChangeFlags & WINDOW_STATE_CHANGE_SIZE) {
// Resize our swapchain and reconstruct size-dependent resources
DXGI_SWAP_CHAIN_DESC1 swapchainDesc;
m_SwapChain->GetDesc1(&swapchainDesc);
// Lock the context to avoid concurrent rendering
lockContext(this);
m_DisplayWidth = stateInfo->width;
m_DisplayHeight = stateInfo->height;
// Release the video vertex buffer so we will upload a new one after resize
m_VideoVertexBuffer.Reset();
// Create new vertex buffers for active overlays
SDL_AtomicLock(&m_OverlayLock);
for (size_t i = 0; i < m_OverlayVertexBuffers.size(); i++) {
if (!m_OverlayTextures[i]) {
continue;
}
D3D11_TEXTURE2D_DESC textureDesc;
m_OverlayTextures[i]->GetDesc(&textureDesc);
createOverlayVertexBuffer((Overlay::OverlayType)i, textureDesc.Width, textureDesc.Height, m_OverlayVertexBuffers[i]);
}
SDL_AtomicUnlock(&m_OverlayLock);
// We must release all references to the back buffer
m_RenderTargetView.Reset();
m_RenderDeviceContext->Flush();
HRESULT hr = m_SwapChain->ResizeBuffers(0, stateInfo->width, stateInfo->height, DXGI_FORMAT_UNKNOWN, swapchainDesc.Flags);
if (FAILED(hr)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"IDXGISwapChain::ResizeBuffers() failed: %x",
hr);
unlockContext(this);
return false;
}
// Reset swapchain-dependent resources (RTV, viewport, etc)
if (!setupSwapchainDependentResources()) {
unlockContext(this);
return false;
}
unlockContext(this);
// We've handled this state change
stateInfo->stateChangeFlags &= ~WINDOW_STATE_CHANGE_SIZE;
}
// Check if we've handled all state changes
return stateInfo->stateChangeFlags == 0;
}
bool D3D11VARenderer::checkDecoderSupport(IDXGIAdapter* adapter)
{
HRESULT hr;
Microsoft::WRL::ComPtr<ID3D11VideoDevice> videoDevice;
DXGI_ADAPTER_DESC adapterDesc;
hr = adapter->GetDesc(&adapterDesc);
if (FAILED(hr)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"IDXGIAdapter::GetDesc() failed: %x",
hr);
return false;
}
// Derive a ID3D11VideoDevice from our ID3D11Device.
hr = m_RenderDevice.As(&videoDevice);
if (FAILED(hr)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11Device::QueryInterface(ID3D11VideoDevice) failed: %x",
hr);
return false;
}
// Check if the format is supported by this decoder
BOOL supported;
switch (m_DecoderParams.videoFormat)
{
case VIDEO_FORMAT_H264:
if (FAILED(videoDevice->CheckVideoDecoderFormat(&D3D11_DECODER_PROFILE_H264_VLD_NOFGT, DXGI_FORMAT_NV12, &supported))) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"GPU doesn't support H.264 decoding");
return false;
}
else if (!supported) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"GPU doesn't support H.264 decoding to NV12 format");
return false;
}
break;
case VIDEO_FORMAT_H264_HIGH8_444:
// Unsupported by DXVA
return false;
case VIDEO_FORMAT_H265:
if (FAILED(videoDevice->CheckVideoDecoderFormat(&D3D11_DECODER_PROFILE_HEVC_VLD_MAIN, DXGI_FORMAT_NV12, &supported))) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"GPU doesn't support HEVC decoding");
return false;
}
else if (!supported) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"GPU doesn't support HEVC decoding to NV12 format");
return false;
}
break;
case VIDEO_FORMAT_H265_MAIN10:
if (FAILED(videoDevice->CheckVideoDecoderFormat(&D3D11_DECODER_PROFILE_HEVC_VLD_MAIN10, DXGI_FORMAT_P010, &supported))) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"GPU doesn't support HEVC Main10 decoding");
return false;
}
else if (!supported) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"GPU doesn't support HEVC Main10 decoding to P010 format");
return false;
}
break;
case VIDEO_FORMAT_H265_REXT8_444:
if (FAILED(videoDevice->CheckVideoDecoderFormat(&k_D3D11_DECODER_PROFILE_HEVC_VLD_MAIN_444, DXGI_FORMAT_AYUV, &supported)))
{
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"GPU doesn't support HEVC Main 444 8-bit decoding via D3D11VA");
return false;
}
else if (!supported) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"GPU doesn't support HEVC Main 444 8-bit decoding to AYUV format");
return false;
}
break;
case VIDEO_FORMAT_H265_REXT10_444:
if (FAILED(videoDevice->CheckVideoDecoderFormat(&k_D3D11_DECODER_PROFILE_HEVC_VLD_MAIN10_444, DXGI_FORMAT_Y410, &supported))) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"GPU doesn't support HEVC Main 444 10-bit decoding via D3D11VA");
return false;
}
else if (!supported) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"GPU doesn't support HEVC Main 444 10-bit decoding to Y410 format");
return false;
}
break;
case VIDEO_FORMAT_AV1_MAIN8:
if (FAILED(videoDevice->CheckVideoDecoderFormat(&D3D11_DECODER_PROFILE_AV1_VLD_PROFILE0, DXGI_FORMAT_NV12, &supported))) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"GPU doesn't support AV1 decoding");
return false;
}
else if (!supported) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"GPU doesn't support AV1 decoding to NV12 format");
return false;
}
break;
case VIDEO_FORMAT_AV1_MAIN10:
if (FAILED(videoDevice->CheckVideoDecoderFormat(&D3D11_DECODER_PROFILE_AV1_VLD_PROFILE0, DXGI_FORMAT_P010, &supported))) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"GPU doesn't support AV1 Main 10-bit decoding");
return false;
}
else if (!supported) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"GPU doesn't support AV1 Main 10-bit decoding to P010 format");
return false;
}
break;
case VIDEO_FORMAT_AV1_HIGH8_444:
if (FAILED(videoDevice->CheckVideoDecoderFormat(&D3D11_DECODER_PROFILE_AV1_VLD_PROFILE1, DXGI_FORMAT_AYUV, &supported))) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"GPU doesn't support AV1 High 444 8-bit decoding");
return false;
}
else if (!supported) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"GPU doesn't support AV1 High 444 8-bit decoding to AYUV format");
return false;
}
break;
case VIDEO_FORMAT_AV1_HIGH10_444:
if (FAILED(videoDevice->CheckVideoDecoderFormat(&D3D11_DECODER_PROFILE_AV1_VLD_PROFILE1, DXGI_FORMAT_Y410, &supported))) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"GPU doesn't support AV1 High 444 10-bit decoding");
return false;
}
else if (!supported) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"GPU doesn't support AV1 High 444 10-bit decoding to Y410 format");
return false;
}
break;
default:
SDL_assert(false);
return false;
}
if (DXUtil::isFormatHybridDecodedByHardware(m_DecoderParams.videoFormat, adapterDesc.VendorId, adapterDesc.DeviceId)) {
SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION,
"GPU decoding for format %x is blocked due to hardware limitations",
m_DecoderParams.videoFormat);
return false;
}
return true;
}
int D3D11VARenderer::getRendererAttributes()
{
int attributes = 0;
// This renderer supports HDR
attributes |= RENDERER_ATTRIBUTE_HDR_SUPPORT;
// This renderer requires frame pacing to synchronize with VBlank when we're in full-screen.
// In windowed mode, we will render as fast we can and DWM will grab whatever is latest at the
// time unless the user opts for pacing. We will use pacing in full-screen mode and normal DWM
// sequencing in full-screen desktop mode to behave similarly to the DXVA2 renderer.
if ((SDL_GetWindowFlags(m_DecoderParams.window) & SDL_WINDOW_FULLSCREEN_DESKTOP) == SDL_WINDOW_FULLSCREEN) {
attributes |= RENDERER_ATTRIBUTE_FORCE_PACING;
}
return attributes;
}
int D3D11VARenderer::getDecoderCapabilities()
{
return CAPABILITY_REFERENCE_FRAME_INVALIDATION_HEVC |
CAPABILITY_REFERENCE_FRAME_INVALIDATION_AV1;
}
IFFmpegRenderer::InitFailureReason D3D11VARenderer::getInitFailureReason()
{
// In the specific case where we found at least one D3D11 hardware device but none of the
// enumerated devices have support for the specified codec, tell the FFmpeg decoder not to
// bother trying other hwaccels. We don't want to try loading D3D9 if the device doesn't
// even have hardware support for the codec.
//
// NB: We use feature level 11.0 support as a gate here because we want to avoid returning
// this failure reason in cases where we might have an extremely old GPU with support for
// DXVA2 on D3D9 but not D3D11VA on D3D11. I'm unsure if any such drivers/hardware exists,
// but better be safe than sorry.
//
// NB2: We're also assuming that no GPU exists which lacks any D3D11 driver but has drivers
// for non-DX APIs like Vulkan. I believe this is a Windows Logo requirement so it should be
// safe to assume.
//
// NB3: Sigh, there *are* GPUs drivers with greater codec support available via Vulkan than
// D3D11VA even when both D3D11 and Vulkan APIs are supported. This is the case for HEVC RExt
// profiles that were not supported by Microsoft until the Windows 11 24H2 SDK. Don't report
// that hardware support is missing for YUV444 profiles since the Vulkan driver may support it.
if (m_DevicesWithFL11Support != 0 && m_DevicesWithCodecSupport == 0 && !(m_DecoderParams.videoFormat & VIDEO_FORMAT_MASK_YUV444)) {
return InitFailureReason::NoHardwareSupport;
}
else {
return InitFailureReason::Unknown;
}
}
void D3D11VARenderer::lockContext(void *lock_ctx)
{
auto me = (D3D11VARenderer*)lock_ctx;
SDL_LockMutex(me->m_ContextLock);
}
void D3D11VARenderer::unlockContext(void *lock_ctx)
{
auto me = (D3D11VARenderer*)lock_ctx;
SDL_UnlockMutex(me->m_ContextLock);
}
bool D3D11VARenderer::setupRenderingResources()
{
HRESULT hr;
m_RenderDeviceContext->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
// We use a common vertex shader for all pixel shaders
{
QByteArray vertexShaderBytecode = Path::readDataFile("d3d11_vertex.fxc");
ComPtr<ID3D11VertexShader> vertexShader;
hr = m_RenderDevice->CreateVertexShader(vertexShaderBytecode.constData(), vertexShaderBytecode.length(), nullptr, &vertexShader);
if (SUCCEEDED(hr)) {
m_RenderDeviceContext->VSSetShader(vertexShader.Get(), nullptr, 0);
}
else {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11Device::CreateVertexShader() failed: %x",
hr);
return false;
}
const D3D11_INPUT_ELEMENT_DESC vertexDesc[] =
{
{ "POSITION", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0 },
{ "TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 8, D3D11_INPUT_PER_VERTEX_DATA, 0 },
};
ComPtr<ID3D11InputLayout> inputLayout;
hr = m_RenderDevice->CreateInputLayout(vertexDesc, ARRAYSIZE(vertexDesc), vertexShaderBytecode.constData(), vertexShaderBytecode.length(), &inputLayout);
if (SUCCEEDED(hr)) {
m_RenderDeviceContext->IASetInputLayout(inputLayout.Get());
}
else {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11Device::CreateInputLayout() failed: %x",
hr);
return false;
}
}
{
QByteArray overlayPixelShaderBytecode = Path::readDataFile("d3d11_overlay_pixel.fxc");
hr = m_RenderDevice->CreatePixelShader(overlayPixelShaderBytecode.constData(), overlayPixelShaderBytecode.length(), nullptr, &m_OverlayPixelShader);
if (FAILED(hr)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11Device::CreatePixelShader() failed: %x",
hr);
return false;
}
}
for (int i = 0; i < PixelShaders::_COUNT; i++)
{
QByteArray videoPixelShaderBytecode = Path::readDataFile(k_VideoShaderNames[i]);
hr = m_RenderDevice->CreatePixelShader(videoPixelShaderBytecode.constData(), videoPixelShaderBytecode.length(), nullptr, &m_VideoPixelShaders[i]);
if (FAILED(hr)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11Device::CreatePixelShader() failed: %x",
hr);
return false;
}
}
// We use a common sampler for all pixel shaders
{
D3D11_SAMPLER_DESC samplerDesc = {};
samplerDesc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR;
samplerDesc.AddressU = D3D11_TEXTURE_ADDRESS_CLAMP;
samplerDesc.AddressV = D3D11_TEXTURE_ADDRESS_CLAMP;
samplerDesc.AddressW = D3D11_TEXTURE_ADDRESS_CLAMP;
samplerDesc.MipLODBias = 0.0f;
samplerDesc.MaxAnisotropy = 1;
samplerDesc.ComparisonFunc = D3D11_COMPARISON_ALWAYS;
samplerDesc.MinLOD = 0.0f;
samplerDesc.MaxLOD = D3D11_FLOAT32_MAX;
ComPtr<ID3D11SamplerState> sampler;
hr = m_RenderDevice->CreateSamplerState(&samplerDesc, &sampler);
if (SUCCEEDED(hr)) {
m_RenderDeviceContext->PSSetSamplers(0, 1, sampler.GetAddressOf());
}
else {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11Device::CreateSamplerState() failed: %x",
hr);
return false;
}
}
// We use a common index buffer for all geometry
{
const int indexes[] = {0, 1, 2, 3, 2, 1};
D3D11_BUFFER_DESC indexBufferDesc = {};
indexBufferDesc.ByteWidth = sizeof(indexes);
indexBufferDesc.Usage = D3D11_USAGE_IMMUTABLE;
indexBufferDesc.BindFlags = D3D11_BIND_INDEX_BUFFER;
indexBufferDesc.CPUAccessFlags = 0;
indexBufferDesc.MiscFlags = 0;
indexBufferDesc.StructureByteStride = sizeof(int);
D3D11_SUBRESOURCE_DATA indexBufferData = {};
indexBufferData.pSysMem = indexes;
indexBufferData.SysMemPitch = sizeof(int);
ComPtr<ID3D11Buffer> indexBuffer;
hr = m_RenderDevice->CreateBuffer(&indexBufferDesc, &indexBufferData, &indexBuffer);
if (SUCCEEDED(hr)) {
m_RenderDeviceContext->IASetIndexBuffer(indexBuffer.Get(), DXGI_FORMAT_R32_UINT, 0);
}
else {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11Device::CreateBuffer() failed: %x",
hr);
return false;
}
}
// Create our overlay blend state
{
D3D11_BLEND_DESC blendDesc = {};
blendDesc.AlphaToCoverageEnable = FALSE;
blendDesc.IndependentBlendEnable = FALSE;
blendDesc.RenderTarget[0].BlendEnable = TRUE;
blendDesc.RenderTarget[0].SrcBlend = D3D11_BLEND_SRC_ALPHA;
blendDesc.RenderTarget[0].DestBlend = D3D11_BLEND_INV_SRC_ALPHA;
blendDesc.RenderTarget[0].BlendOp = D3D11_BLEND_OP_ADD;
blendDesc.RenderTarget[0].SrcBlendAlpha = D3D11_BLEND_ONE;
blendDesc.RenderTarget[0].DestBlendAlpha = D3D11_BLEND_ZERO;
blendDesc.RenderTarget[0].BlendOpAlpha = D3D11_BLEND_OP_ADD;
blendDesc.RenderTarget[0].RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL;
hr = m_RenderDevice->CreateBlendState(&blendDesc, &m_OverlayBlendState);
if (FAILED(hr)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11Device::CreateBlendState() failed: %x",
hr);
return false;
}
}
// Create and bind our video blend state
{
D3D11_BLEND_DESC blendDesc = {};
blendDesc.AlphaToCoverageEnable = FALSE;
blendDesc.IndependentBlendEnable = FALSE;
blendDesc.RenderTarget[0].BlendEnable = FALSE;
blendDesc.RenderTarget[0].RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL;
hr = m_RenderDevice->CreateBlendState(&blendDesc, &m_VideoBlendState);
if (SUCCEEDED(hr)) {
m_RenderDeviceContext->OMSetBlendState(m_VideoBlendState.Get(), nullptr, 0xffffffff);
}
else {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11Device::CreateBlendState() failed: %x",
hr);
return false;
}
}
if (!setupSwapchainDependentResources()) {
return false;
}
return true;
}
bool D3D11VARenderer::setupSwapchainDependentResources()
{
HRESULT hr;
// Create our render target view
{
ComPtr<ID3D11Resource> backBufferResource;
hr = m_SwapChain->GetBuffer(0, IID_PPV_ARGS(&backBufferResource));
if (FAILED(hr)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"IDXGISwapChain::GetBuffer() failed: %x",
hr);
return false;
}
hr = m_RenderDevice->CreateRenderTargetView(backBufferResource.Get(), nullptr, &m_RenderTargetView);
if (FAILED(hr)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11Device::CreateRenderTargetView() failed: %x",
hr);
return false;
}
}
// Set a viewport that fills the window
{
D3D11_VIEWPORT viewport;
viewport.TopLeftX = 0;
viewport.TopLeftY = 0;
viewport.Width = m_DisplayWidth;
viewport.Height = m_DisplayHeight;
viewport.MinDepth = 0;
viewport.MaxDepth = 1;
m_RenderDeviceContext->RSSetViewports(1, &viewport);
}
return true;
}
// NB: This can be called more than once (and with different frame dimensions!)
bool D3D11VARenderer::setupFrameRenderingResources(AVHWFramesContext* framesContext)
{
auto d3d11vaFramesContext = (AVD3D11VAFramesContext*)framesContext->hwctx;
// Open the decoder texture array on the renderer device if we're using separate devices
if (m_DecodeDevice != m_RenderDevice) {
ComPtr<IDXGIResource1> dxgiDecoderResource;
HRESULT hr = d3d11vaFramesContext->texture_infos->texture->QueryInterface(IID_PPV_ARGS(&dxgiDecoderResource));
if (FAILED(hr)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11Texture2D::QueryInterface(IDXGIResource1) failed: %x",
hr);
return false;
}
HANDLE sharedHandle;
hr = dxgiDecoderResource->CreateSharedHandle(nullptr, DXGI_SHARED_RESOURCE_READ, nullptr, &sharedHandle);
if (FAILED(hr)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"IDXGIResource1::CreateSharedHandle() failed: %x",
hr);
return false;
}
hr = m_RenderDevice->OpenSharedResource1(sharedHandle, IID_PPV_ARGS(&m_RenderSharedTextureArray));
CloseHandle(sharedHandle);
if (FAILED(hr)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11Device1::OpenSharedResource1() failed: %x",
hr);
return false;
}
}
else {
d3d11vaFramesContext->texture_infos->texture->AddRef();
m_RenderSharedTextureArray.Attach(d3d11vaFramesContext->texture_infos->texture);
}
// Query the format of the underlying texture array
D3D11_TEXTURE2D_DESC textureDesc;
m_RenderSharedTextureArray->GetDesc(&textureDesc);
m_TextureFormat = textureDesc.Format;
if (m_BindDecoderOutputTextures) {
// Create SRVs for all textures in the decoder pool
if (!setupTexturePoolViews(framesContext)) {
return false;
}
}
else {
// Create our internal texture to copy and render
if (!setupVideoTexture(framesContext)) {
return false;
}
}
return true;
}
std::vector<DXGI_FORMAT> D3D11VARenderer::getVideoTextureSRVFormats()
{
if (m_DecoderParams.videoFormat & VIDEO_FORMAT_MASK_YUV444) {
// YUV 4:4:4 formats don't use a second SRV
return { (m_DecoderParams.videoFormat & VIDEO_FORMAT_MASK_10BIT) ?
DXGI_FORMAT_R10G10B10A2_UNORM : DXGI_FORMAT_R8G8B8A8_UNORM };
}
else if (m_DecoderParams.videoFormat & VIDEO_FORMAT_MASK_10BIT) {
return { DXGI_FORMAT_R16_UNORM, DXGI_FORMAT_R16G16_UNORM };
}
else {
return { DXGI_FORMAT_R8_UNORM, DXGI_FORMAT_R8G8_UNORM };
}
}
bool D3D11VARenderer::setupVideoTexture(AVHWFramesContext* framesContext)
{
SDL_assert(!m_BindDecoderOutputTextures);
HRESULT hr;
D3D11_TEXTURE2D_DESC texDesc = {};
texDesc.Width = framesContext->width;
texDesc.Height = framesContext->height;
texDesc.MipLevels = 1;
texDesc.ArraySize = 1;
texDesc.Format = m_TextureFormat;
texDesc.SampleDesc.Quality = 0;
texDesc.SampleDesc.Count = 1;
texDesc.Usage = D3D11_USAGE_DEFAULT;
texDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE;
texDesc.CPUAccessFlags = 0;
texDesc.MiscFlags = 0;
hr = m_RenderDevice->CreateTexture2D(&texDesc, nullptr, &m_VideoTexture);
if (FAILED(hr)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11Device::CreateTexture2D() failed: %x",
hr);
return false;
}
// We will only have one set of SRVs
m_VideoTextureResourceViews.resize(1);
// Create SRVs for the texture
D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc = {};
srvDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D;
srvDesc.Texture2D.MostDetailedMip = 0;
srvDesc.Texture2D.MipLevels = 1;
size_t srvIndex = 0;
for (DXGI_FORMAT srvFormat : getVideoTextureSRVFormats()) {
SDL_assert(srvIndex < m_VideoTextureResourceViews[0].size());
srvDesc.Format = srvFormat;
hr = m_RenderDevice->CreateShaderResourceView(m_VideoTexture.Get(), &srvDesc, &m_VideoTextureResourceViews[0][srvIndex]);
if (FAILED(hr)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11Device::CreateShaderResourceView() failed: %x",
hr);
return false;
}
srvIndex++;
}
return true;
}
bool D3D11VARenderer::setupTexturePoolViews(AVHWFramesContext* framesContext)
{
AVD3D11VAFramesContext* d3d11vaFramesContext = (AVD3D11VAFramesContext*)framesContext->hwctx;
SDL_assert(m_BindDecoderOutputTextures);
D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc = {};
srvDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2DARRAY;
srvDesc.Texture2DArray.MostDetailedMip = 0;
srvDesc.Texture2DArray.MipLevels = 1;
srvDesc.Texture2DArray.ArraySize = 1;
m_VideoTextureResourceViews.resize(framesContext->initial_pool_size);
// Create luminance and chrominance SRVs for each texture in the pool
for (int i = 0; i < framesContext->initial_pool_size; i++) {
HRESULT hr;
// Our rendering logic depends on the texture index working to map into our SRV array
SDL_assert(i == d3d11vaFramesContext->texture_infos[i].index);
srvDesc.Texture2DArray.FirstArraySlice = d3d11vaFramesContext->texture_infos[i].index;
size_t srvIndex = 0;
for (DXGI_FORMAT srvFormat : getVideoTextureSRVFormats()) {
SDL_assert(srvIndex < m_VideoTextureResourceViews[i].size());
srvDesc.Format = srvFormat;
hr = m_RenderDevice->CreateShaderResourceView(m_RenderSharedTextureArray.Get(),
&srvDesc,
&m_VideoTextureResourceViews[i][srvIndex]);
if (FAILED(hr)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11Device::CreateShaderResourceView() failed: %x",
hr);
return false;
}
srvIndex++;
}
}
return true;
}