Cameron Gutman 335ed0e8e6 Only sample directly from the video decoder output texture on Intel GPUs
This technique seems to actually make performance worse on some AMD GPUs (RX 480)
and causes rendering errors on others (HD 5570). These might be AMD-specific bugs
but let's not risk a behavior change for AMD/NVIDIA where nobody was having perf
issues with the old copy method anyway.
2024-06-29 14:44:08 -05:00

1542 lines
56 KiB
C++

// For D3D11_DECODER_PROFILE values
#include <initguid.h>
#include "d3d11va.h"
#include "dxutil.h"
#include "path.h"
#include "streaming/streamutils.h"
#include "streaming/session.h"
#include <SDL_syswm.h>
#include <VersionHelpers.h>
#include <dwmapi.h>
#define SAFE_COM_RELEASE(x) if (x) { (x)->Release(); }
typedef struct _VERTEX
{
float x, y;
float tu, tv;
} VERTEX, *PVERTEX;
#define CSC_MATRIX_RAW_ELEMENT_COUNT 9
#define CSC_MATRIX_PACKED_ELEMENT_COUNT 12
static const float k_CscMatrix_Bt601Lim[CSC_MATRIX_RAW_ELEMENT_COUNT] = {
1.1644f, 1.1644f, 1.1644f,
0.0f, -0.3917f, 2.0172f,
1.5960f, -0.8129f, 0.0f,
};
static const float k_CscMatrix_Bt601Full[CSC_MATRIX_RAW_ELEMENT_COUNT] = {
1.0f, 1.0f, 1.0f,
0.0f, -0.3441f, 1.7720f,
1.4020f, -0.7141f, 0.0f,
};
static const float k_CscMatrix_Bt709Lim[CSC_MATRIX_RAW_ELEMENT_COUNT] = {
1.1644f, 1.1644f, 1.1644f,
0.0f, -0.2132f, 2.1124f,
1.7927f, -0.5329f, 0.0f,
};
static const float k_CscMatrix_Bt709Full[CSC_MATRIX_RAW_ELEMENT_COUNT] = {
1.0f, 1.0f, 1.0f,
0.0f, -0.1873f, 1.8556f,
1.5748f, -0.4681f, 0.0f,
};
static const float k_CscMatrix_Bt2020Lim[CSC_MATRIX_RAW_ELEMENT_COUNT] = {
1.1644f, 1.1644f, 1.1644f,
0.0f, -0.1874f, 2.1418f,
1.6781f, -0.6505f, 0.0f,
};
static const float k_CscMatrix_Bt2020Full[CSC_MATRIX_RAW_ELEMENT_COUNT] = {
1.0f, 1.0f, 1.0f,
0.0f, -0.1646f, 1.8814f,
1.4746f, -0.5714f, 0.0f,
};
#define OFFSETS_ELEMENT_COUNT 3
static const float k_Offsets_Lim[OFFSETS_ELEMENT_COUNT] = { 16.0f / 255.0f, 128.0f / 255.0f, 128.0f / 255.0f };
static const float k_Offsets_Full[OFFSETS_ELEMENT_COUNT] = { 0.0f, 128.0f / 255.0f, 128.0f / 255.0f };
typedef struct _CSC_CONST_BUF
{
// CscMatrix value from above but packed appropriately
float cscMatrix[CSC_MATRIX_PACKED_ELEMENT_COUNT];
// YUV offset values from above
float offsets[OFFSETS_ELEMENT_COUNT];
// Padding float to be a multiple of 16 bytes
float padding;
} CSC_CONST_BUF, *PCSC_CONST_BUF;
static_assert(sizeof(CSC_CONST_BUF) % 16 == 0, "Constant buffer sizes must be a multiple of 16");
D3D11VARenderer::D3D11VARenderer(int decoderSelectionPass)
: m_DecoderSelectionPass(decoderSelectionPass),
m_DevicesWithFL11Support(0),
m_DevicesWithCodecSupport(0),
m_Factory(nullptr),
m_Device(nullptr),
m_SwapChain(nullptr),
m_DeviceContext(nullptr),
m_RenderTargetView(nullptr),
m_LastColorSpace(-1),
m_LastFullRange(false),
m_LastColorTrc(AVCOL_TRC_UNSPECIFIED),
m_AllowTearing(false),
m_VideoGenericPixelShader(nullptr),
m_VideoBt601LimPixelShader(nullptr),
m_VideoBt2020LimPixelShader(nullptr),
m_VideoVertexBuffer(nullptr),
m_VideoTexture(nullptr),
m_OverlayLock(0),
m_OverlayPixelShader(nullptr),
m_HwDeviceContext(nullptr),
m_HwFramesContext(nullptr)
{
RtlZeroMemory(m_OverlayVertexBuffers, sizeof(m_OverlayVertexBuffers));
RtlZeroMemory(m_OverlayTextures, sizeof(m_OverlayTextures));
RtlZeroMemory(m_OverlayTextureResourceViews, sizeof(m_OverlayTextureResourceViews));
RtlZeroMemory(m_VideoTextureResourceViews, sizeof(m_VideoTextureResourceViews));
m_ContextLock = SDL_CreateMutex();
DwmEnableMMCSS(TRUE);
}
D3D11VARenderer::~D3D11VARenderer()
{
DwmEnableMMCSS(FALSE);
SDL_DestroyMutex(m_ContextLock);
SAFE_COM_RELEASE(m_VideoVertexBuffer);
SAFE_COM_RELEASE(m_VideoBt2020LimPixelShader);
SAFE_COM_RELEASE(m_VideoBt601LimPixelShader);
SAFE_COM_RELEASE(m_VideoGenericPixelShader);
for (int i = 0; i < ARRAYSIZE(m_VideoTextureResourceViews); i++) {
SAFE_COM_RELEASE(m_VideoTextureResourceViews[i][0]);
SAFE_COM_RELEASE(m_VideoTextureResourceViews[i][1]);
}
SAFE_COM_RELEASE(m_VideoTexture);
for (int i = 0; i < ARRAYSIZE(m_OverlayVertexBuffers); i++) {
SAFE_COM_RELEASE(m_OverlayVertexBuffers[i]);
}
for (int i = 0; i < ARRAYSIZE(m_OverlayTextureResourceViews); i++) {
SAFE_COM_RELEASE(m_OverlayTextureResourceViews[i]);
}
for (int i = 0; i < ARRAYSIZE(m_OverlayTextures); i++) {
SAFE_COM_RELEASE(m_OverlayTextures[i]);
}
SAFE_COM_RELEASE(m_OverlayPixelShader);
SAFE_COM_RELEASE(m_RenderTargetView);
SAFE_COM_RELEASE(m_SwapChain);
if (m_HwFramesContext != nullptr) {
av_buffer_unref(&m_HwFramesContext);
}
// Force destruction of the swapchain immediately
if (m_DeviceContext != nullptr) {
m_DeviceContext->ClearState();
m_DeviceContext->Flush();
}
if (m_HwDeviceContext != nullptr) {
// This will release m_Device and m_DeviceContext too
av_buffer_unref(&m_HwDeviceContext);
}
else {
SAFE_COM_RELEASE(m_Device);
SAFE_COM_RELEASE(m_DeviceContext);
}
SAFE_COM_RELEASE(m_Factory);
}
bool D3D11VARenderer::createDeviceByAdapterIndex(int adapterIndex, bool* adapterNotFound)
{
const D3D_FEATURE_LEVEL supportedFeatureLevels[] = { D3D_FEATURE_LEVEL_11_0, D3D_FEATURE_LEVEL_11_1 };
bool success = false;
IDXGIAdapter1* adapter = nullptr;
DXGI_ADAPTER_DESC1 adapterDesc;
D3D_FEATURE_LEVEL featureLevel;
HRESULT hr;
SDL_assert(m_Device == nullptr);
SDL_assert(m_DeviceContext == nullptr);
hr = m_Factory->EnumAdapters1(adapterIndex, &adapter);
if (hr == DXGI_ERROR_NOT_FOUND) {
// Expected at the end of enumeration
goto Exit;
}
else if (FAILED(hr)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"IDXGIFactory::EnumAdapters1() failed: %x",
hr);
goto Exit;
}
hr = adapter->GetDesc1(&adapterDesc);
if (FAILED(hr)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"IDXGIAdapter::GetDesc() failed: %x",
hr);
goto Exit;
}
if (adapterDesc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) {
// Skip the WARP device. We know it will fail.
goto Exit;
}
bool ok;
m_BindDecoderOutputTextures = !!qEnvironmentVariableIntValue("D3D11VA_FORCE_BIND", &ok);
if (!ok) {
// Skip copying to our own internal texture on Intel GPUs due to
// significant performance impact of the extra copy. See:
// https://github.com/moonlight-stream/moonlight-qt/issues/1304
m_BindDecoderOutputTextures = adapterDesc.VendorId == 0x8086;
}
else {
SDL_LogWarn(SDL_LOG_CATEGORY_APPLICATION,
"Using D3D11VA_FORCE_BIND to override default bind/copy logic");
}
SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION,
"Detected GPU %d: %S (%x:%x) (decoder output: %s)",
adapterIndex,
adapterDesc.Description,
adapterDesc.VendorId,
adapterDesc.DeviceId,
m_BindDecoderOutputTextures ? "bind" : "copy");
hr = D3D11CreateDevice(adapter,
D3D_DRIVER_TYPE_UNKNOWN,
nullptr,
D3D11_CREATE_DEVICE_VIDEO_SUPPORT
#ifdef QT_DEBUG
| D3D11_CREATE_DEVICE_DEBUG
#endif
,
supportedFeatureLevels,
ARRAYSIZE(supportedFeatureLevels),
D3D11_SDK_VERSION,
&m_Device,
&featureLevel,
&m_DeviceContext);
if (FAILED(hr)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"D3D11CreateDevice() failed: %x",
hr);
goto Exit;
}
else if (featureLevel >= D3D_FEATURE_LEVEL_11_0) {
// Remember that we found a non-software D3D11 devices with support for
// feature level 11.0 or later (Fermi, Terascale 2, or Ivy Bridge and later)
m_DevicesWithFL11Support++;
}
if (!checkDecoderSupport(adapter)) {
m_DeviceContext->Release();
m_DeviceContext = nullptr;
m_Device->Release();
m_Device = nullptr;
goto Exit;
}
else {
// Remember that we found a device with support for decoding this codec
m_DevicesWithCodecSupport++;
}
success = true;
Exit:
if (adapterNotFound != nullptr) {
*adapterNotFound = (adapter == nullptr);
}
SAFE_COM_RELEASE(adapter);
return success;
}
bool D3D11VARenderer::initialize(PDECODER_PARAMETERS params)
{
int adapterIndex, outputIndex;
HRESULT hr;
m_DecoderParams = *params;
if (qgetenv("D3D11VA_ENABLED") == "0") {
SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION,
"D3D11VA is disabled by environment variable");
return false;
}
else if (!IsWindows10OrGreater()) {
// Use DXVA2 on anything older than Win10, so we don't have to handle a bunch
// of legacy Win7/Win8 codepaths in here.
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"D3D11VA renderer is only supported on Windows 10 or later.");
return false;
}
if (!SDL_DXGIGetOutputInfo(SDL_GetWindowDisplayIndex(params->window),
&adapterIndex, &outputIndex)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"SDL_DXGIGetOutputInfo() failed: %s",
SDL_GetError());
return false;
}
hr = CreateDXGIFactory(__uuidof(IDXGIFactory5), (void**)&m_Factory);
if (FAILED(hr)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"CreateDXGIFactory() failed: %x",
hr);
return false;
}
// First try the adapter corresponding to the display where our window resides.
// This will let us avoid a copy if the display GPU has the required decoder.
if (!createDeviceByAdapterIndex(adapterIndex)) {
// If that didn't work, we'll try all GPUs in order until we find one
// or run out of GPUs (DXGI_ERROR_NOT_FOUND from EnumAdapters())
bool adapterNotFound = false;
for (int i = 0; !adapterNotFound; i++) {
if (i == adapterIndex) {
// Don't try the same GPU again
continue;
}
if (createDeviceByAdapterIndex(i, &adapterNotFound)) {
// This GPU worked! Continue initialization.
break;
}
}
if (adapterNotFound) {
SDL_assert(m_Device == nullptr);
SDL_assert(m_DeviceContext == nullptr);
return false;
}
}
DXGI_SWAP_CHAIN_DESC1 swapChainDesc = {};
swapChainDesc.Stereo = FALSE;
swapChainDesc.SampleDesc.Count = 1;
swapChainDesc.SampleDesc.Quality = 0;
swapChainDesc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
swapChainDesc.Scaling = DXGI_SCALING_STRETCH;
swapChainDesc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD;
swapChainDesc.AlphaMode = DXGI_ALPHA_MODE_UNSPECIFIED;
swapChainDesc.Flags = 0;
// 3 front buffers (default GetMaximumFrameLatency() count)
// + 1 back buffer
// + 1 extra for DWM to hold on to for DirectFlip
//
// Even though we allocate 3 front buffers for pre-rendered frames,
// they won't actually increase presentation latency because we
// always use SyncInterval 0 which replaces the last one.
//
// IDXGIDevice1 has a SetMaximumFrameLatency() function, but counter-
// intuitively we must avoid it to reduce latency. If we set our max
// frame latency to 1 on thedevice, our SyncInterval 0 Present() calls
// will block on DWM (acting like SyncInterval 1) rather than doing
// the non-blocking present we expect.
//
// NB: 3 total buffers seems sufficient on NVIDIA hardware but
// causes performance issues (buffer starvation) on AMD GPUs.
swapChainDesc.BufferCount = 3 + 1 + 1;
// Use the current window size as the swapchain size
SDL_GetWindowSize(params->window, (int*)&swapChainDesc.Width, (int*)&swapChainDesc.Height);
m_DisplayWidth = swapChainDesc.Width;
m_DisplayHeight = swapChainDesc.Height;
if (params->videoFormat & VIDEO_FORMAT_MASK_10BIT) {
swapChainDesc.Format = DXGI_FORMAT_R10G10B10A2_UNORM;
}
else {
swapChainDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
}
// Use DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING with flip mode for non-vsync case, if possible.
// NOTE: This is only possible in windowed or borderless windowed mode.
if (!params->enableVsync) {
BOOL allowTearing = FALSE;
hr = m_Factory->CheckFeatureSupport(DXGI_FEATURE_PRESENT_ALLOW_TEARING,
&allowTearing,
sizeof(allowTearing));
if (SUCCEEDED(hr)) {
if (allowTearing) {
// Use flip discard with allow tearing mode if possible.
swapChainDesc.Flags |= DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING;
m_AllowTearing = true;
}
else {
SDL_LogWarn(SDL_LOG_CATEGORY_APPLICATION,
"OS/GPU doesn't support DXGI_FEATURE_PRESENT_ALLOW_TEARING");
}
}
else {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"IDXGIFactory::CheckFeatureSupport(DXGI_FEATURE_PRESENT_ALLOW_TEARING) failed: %x",
hr);
// Non-fatal
}
// DXVA2 may let us take over for FSE V-sync off cases. However, if we don't have DXGI_FEATURE_PRESENT_ALLOW_TEARING
// then we should not attempt to do this unless there's no other option (HDR, DXVA2 failed in pass 1, etc).
if (!m_AllowTearing && m_DecoderSelectionPass == 0 && !(params->videoFormat & VIDEO_FORMAT_MASK_10BIT) &&
(SDL_GetWindowFlags(params->window) & SDL_WINDOW_FULLSCREEN_DESKTOP) == SDL_WINDOW_FULLSCREEN) {
SDL_LogWarn(SDL_LOG_CATEGORY_APPLICATION,
"Defaulting to DXVA2 for FSE without DXGI_FEATURE_PRESENT_ALLOW_TEARING support");
return false;
}
}
SDL_SysWMinfo info;
SDL_VERSION(&info.version);
SDL_GetWindowWMInfo(params->window, &info);
SDL_assert(info.subsystem == SDL_SYSWM_WINDOWS);
// Always use windowed or borderless windowed mode.. SDL does mode-setting for us in
// full-screen exclusive mode (SDL_WINDOW_FULLSCREEN), so this actually works out okay.
IDXGISwapChain1* swapChain;
hr = m_Factory->CreateSwapChainForHwnd(m_Device,
info.info.win.window,
&swapChainDesc,
nullptr,
nullptr,
&swapChain);
if (FAILED(hr)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"IDXGIFactory::CreateSwapChainForHwnd() failed: %x",
hr);
return false;
}
hr = swapChain->QueryInterface(__uuidof(IDXGISwapChain4), (void**)&m_SwapChain);
swapChain->Release();
if (FAILED(hr)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"IDXGISwapChain::QueryInterface(IDXGISwapChain4) failed: %x",
hr);
return false;
}
// Disable Alt+Enter, PrintScreen, and window message snooping. This makes
// it safe to run the renderer on a separate rendering thread rather than
// requiring the main (message loop) thread.
hr = m_Factory->MakeWindowAssociation(info.info.win.window, DXGI_MWA_NO_WINDOW_CHANGES);
if (FAILED(hr)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"IDXGIFactory::MakeWindowAssociation() failed: %x",
hr);
return false;
}
// Surfaces must be 16 pixel aligned for H.264 and 128 pixel aligned for everything else
// https://github.com/FFmpeg/FFmpeg/blob/a234e5cd80224c95a205c1f3e297d8c04a1374c3/libavcodec/dxva2.c#L609-L616
m_TextureAlignment = (params->videoFormat & VIDEO_FORMAT_MASK_H264) ? 16 : 128;
if (!setupRenderingResources()) {
return false;
}
{
m_HwDeviceContext = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_D3D11VA);
if (!m_HwDeviceContext) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"Failed to allocate D3D11VA device context");
return false;
}
AVHWDeviceContext* deviceContext = (AVHWDeviceContext*)m_HwDeviceContext->data;
AVD3D11VADeviceContext* d3d11vaDeviceContext = (AVD3D11VADeviceContext*)deviceContext->hwctx;
// AVHWDeviceContext takes ownership of these objects
d3d11vaDeviceContext->device = m_Device;
d3d11vaDeviceContext->device_context = m_DeviceContext;
// Set lock functions that we will use to synchronize with FFmpeg's usage of our device context
d3d11vaDeviceContext->lock = lockContext;
d3d11vaDeviceContext->unlock = unlockContext;
d3d11vaDeviceContext->lock_ctx = this;
int err = av_hwdevice_ctx_init(m_HwDeviceContext);
if (err < 0) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"Failed to initialize D3D11VA device context: %d",
err);
return false;
}
}
{
m_HwFramesContext = av_hwframe_ctx_alloc(m_HwDeviceContext);
if (!m_HwFramesContext) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"Failed to allocate D3D11VA frame context");
return false;
}
AVHWFramesContext* framesContext = (AVHWFramesContext*)m_HwFramesContext->data;
// We require NV12 or P010 textures for our shader
framesContext->format = AV_PIX_FMT_D3D11;
framesContext->sw_format = (params->videoFormat & VIDEO_FORMAT_MASK_10BIT) ?
AV_PIX_FMT_P010 : AV_PIX_FMT_NV12;
framesContext->width = FFALIGN(params->width, m_TextureAlignment);
framesContext->height = FFALIGN(params->height, m_TextureAlignment);
// We can have up to 16 reference frames plus a working surface
framesContext->initial_pool_size = DECODER_BUFFER_POOL_SIZE;
AVD3D11VAFramesContext* d3d11vaFramesContext = (AVD3D11VAFramesContext*)framesContext->hwctx;
d3d11vaFramesContext->BindFlags = D3D11_BIND_DECODER;
if (m_BindDecoderOutputTextures) {
// We need to override the default D3D11VA bind flags to bind the textures as a shader resources
d3d11vaFramesContext->BindFlags |= D3D11_BIND_SHADER_RESOURCE;
}
int err = av_hwframe_ctx_init(m_HwFramesContext);
if (err < 0) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"Failed to initialize D3D11VA frame context: %d",
err);
return false;
}
if (m_BindDecoderOutputTextures) {
// Create SRVs for all textures in the decoder pool
if (!setupTexturePoolViews(d3d11vaFramesContext)) {
return false;
}
}
else {
// Create our internal texture to copy and render
if (!setupVideoTexture()) {
return false;
}
}
}
return true;
}
bool D3D11VARenderer::prepareDecoderContext(AVCodecContext* context, AVDictionary**)
{
context->hw_device_ctx = av_buffer_ref(m_HwDeviceContext);
SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION,
"Using D3D11VA accelerated renderer");
return true;
}
bool D3D11VARenderer::prepareDecoderContextInGetFormat(AVCodecContext *context, AVPixelFormat)
{
// hw_frames_ctx must be initialized in ffGetFormat().
context->hw_frames_ctx = av_buffer_ref(m_HwFramesContext);
return true;
}
void D3D11VARenderer::renderFrame(AVFrame* frame)
{
// Acquire the context lock for rendering to prevent concurrent
// access from inside FFmpeg's decoding code
lockContext(this);
// Clear the back buffer
const float clearColor[4] = {0.0f, 0.0f, 0.0f, 0.0f};
m_DeviceContext->ClearRenderTargetView(m_RenderTargetView, clearColor);
// Bind the back buffer. This needs to be done each time,
// because the render target view will be unbound by Present().
m_DeviceContext->OMSetRenderTargets(1, &m_RenderTargetView, nullptr);
// Render our video frame with the aspect-ratio adjusted viewport
renderVideo(frame);
// Render overlays on top of the video stream
for (int i = 0; i < Overlay::OverlayMax; i++) {
renderOverlay((Overlay::OverlayType)i);
}
UINT flags;
if (m_AllowTearing) {
SDL_assert(!m_DecoderParams.enableVsync);
// If tearing is allowed, use DXGI_PRESENT_ALLOW_TEARING with syncInterval 0.
// It is not valid to use any other syncInterval values in tearing mode.
flags = DXGI_PRESENT_ALLOW_TEARING;
}
else {
// Otherwise, we'll submit as fast as possible and DWM will discard excess
// frames for us. If frame pacing is also enabled or we're in full-screen,
// our Vsync source will keep us in sync with VBlank.
flags = 0;
}
HRESULT hr;
if (frame->color_trc != m_LastColorTrc) {
if (frame->color_trc == AVCOL_TRC_SMPTE2084) {
// Switch to Rec 2020 PQ (SMPTE ST 2084) colorspace for HDR10 rendering
hr = m_SwapChain->SetColorSpace1(DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020);
if (FAILED(hr)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"IDXGISwapChain::SetColorSpace1(DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020) failed: %x",
hr);
}
}
else {
// Restore default sRGB colorspace
hr = m_SwapChain->SetColorSpace1(DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709);
if (FAILED(hr)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"IDXGISwapChain::SetColorSpace1(DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709) failed: %x",
hr);
}
}
m_LastColorTrc = frame->color_trc;
}
// Present according to the decoder parameters
hr = m_SwapChain->Present(0, flags);
// Release the context lock
unlockContext(this);
if (FAILED(hr)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"IDXGISwapChain::Present() failed: %x",
hr);
// The card may have been removed or crashed. Reset the decoder.
SDL_Event event;
event.type = SDL_RENDER_TARGETS_RESET;
SDL_PushEvent(&event);
return;
}
}
void D3D11VARenderer::renderOverlay(Overlay::OverlayType type)
{
if (!Session::get()->getOverlayManager().isOverlayEnabled(type)) {
return;
}
// If the overlay is being updated, just skip rendering it this frame
if (!SDL_AtomicTryLock(&m_OverlayLock)) {
return;
}
ID3D11Texture2D* overlayTexture = m_OverlayTextures[type];
ID3D11Buffer* overlayVertexBuffer = m_OverlayVertexBuffers[type];
ID3D11ShaderResourceView* overlayTextureResourceView = m_OverlayTextureResourceViews[type];
if (overlayTexture == nullptr) {
SDL_AtomicUnlock(&m_OverlayLock);
return;
}
// Reference these objects so they don't immediately go away if the
// overlay update thread tries to release them.
SDL_assert(overlayVertexBuffer != nullptr);
overlayTexture->AddRef();
overlayVertexBuffer->AddRef();
overlayTextureResourceView->AddRef();
SDL_AtomicUnlock(&m_OverlayLock);
// Bind vertex buffer
UINT stride = sizeof(VERTEX);
UINT offset = 0;
m_DeviceContext->IASetVertexBuffers(0, 1, &overlayVertexBuffer, &stride, &offset);
// Bind pixel shader and resources
m_DeviceContext->PSSetShader(m_OverlayPixelShader, nullptr, 0);
m_DeviceContext->PSSetShaderResources(0, 1, &overlayTextureResourceView);
// Draw the overlay
m_DeviceContext->DrawIndexed(6, 0, 0);
overlayTextureResourceView->Release();
overlayTexture->Release();
overlayVertexBuffer->Release();
}
void D3D11VARenderer::bindColorConversion(AVFrame* frame)
{
bool fullRange = isFrameFullRange(frame);
int colorspace = getFrameColorspace(frame);
// We have purpose-built shaders for the common Rec 601 (SDR) and Rec 2020 (HDR) cases
if (!fullRange && colorspace == COLORSPACE_REC_601) {
m_DeviceContext->PSSetShader(m_VideoBt601LimPixelShader, nullptr, 0);
}
else if (!fullRange && colorspace == COLORSPACE_REC_2020) {
m_DeviceContext->PSSetShader(m_VideoBt2020LimPixelShader, nullptr, 0);
}
else {
// We'll need to use the generic shader for this colorspace and color range combo
m_DeviceContext->PSSetShader(m_VideoGenericPixelShader, nullptr, 0);
// If nothing has changed since last frame, we're done
if (colorspace == m_LastColorSpace && fullRange == m_LastFullRange) {
return;
}
SDL_LogWarn(SDL_LOG_CATEGORY_APPLICATION,
"Falling back to generic video pixel shader for %d (%s range)",
colorspace,
fullRange ? "full" : "limited");
D3D11_BUFFER_DESC constDesc = {};
constDesc.ByteWidth = sizeof(CSC_CONST_BUF);
constDesc.Usage = D3D11_USAGE_IMMUTABLE;
constDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
constDesc.CPUAccessFlags = 0;
constDesc.MiscFlags = 0;
CSC_CONST_BUF constBuf = {};
const float* rawCscMatrix;
switch (colorspace) {
case COLORSPACE_REC_601:
rawCscMatrix = fullRange ? k_CscMatrix_Bt601Full : k_CscMatrix_Bt601Lim;
break;
case COLORSPACE_REC_709:
rawCscMatrix = fullRange ? k_CscMatrix_Bt709Full : k_CscMatrix_Bt709Lim;
break;
case COLORSPACE_REC_2020:
rawCscMatrix = fullRange ? k_CscMatrix_Bt2020Full : k_CscMatrix_Bt2020Lim;
break;
default:
SDL_assert(false);
return;
}
// We need to adjust our raw CSC matrix to be column-major and with float3 vectors
// padded with a float in between each of them to adhere to HLSL requirements.
for (int i = 0; i < 3; i++) {
for (int j = 0; j < 3; j++) {
constBuf.cscMatrix[i * 4 + j] = rawCscMatrix[j * 3 + i];
}
}
// No adjustments are needed to the float[3] array of offsets, so it can just
// be copied with memcpy().
memcpy(constBuf.offsets,
fullRange ? k_Offsets_Full : k_Offsets_Lim,
sizeof(constBuf.offsets));
D3D11_SUBRESOURCE_DATA constData = {};
constData.pSysMem = &constBuf;
ID3D11Buffer* constantBuffer;
HRESULT hr = m_Device->CreateBuffer(&constDesc, &constData, &constantBuffer);
if (SUCCEEDED(hr)) {
m_DeviceContext->PSSetConstantBuffers(1, 1, &constantBuffer);
constantBuffer->Release();
}
else {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11Device::CreateBuffer() failed: %x",
hr);
return;
}
}
m_LastColorSpace = colorspace;
m_LastFullRange = fullRange;
}
void D3D11VARenderer::renderVideo(AVFrame* frame)
{
// Bind video rendering vertex buffer
UINT stride = sizeof(VERTEX);
UINT offset = 0;
m_DeviceContext->IASetVertexBuffers(0, 1, &m_VideoVertexBuffer, &stride, &offset);
UINT srvIndex;
if (m_BindDecoderOutputTextures) {
// Our indexing logic depends on a direct mapping into m_VideoTextureResourceViews
// based on the texture index provided by FFmpeg.
srvIndex = (uintptr_t)frame->data[1];
SDL_assert(srvIndex < DECODER_BUFFER_POOL_SIZE);
if (srvIndex >= DECODER_BUFFER_POOL_SIZE) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"Unexpected texture index: %u",
srvIndex);
return;
}
}
else {
// Copy this frame (minus alignment padding) into our video texture
D3D11_BOX srcBox;
srcBox.left = 0;
srcBox.top = 0;
srcBox.right = m_DecoderParams.width;
srcBox.bottom = m_DecoderParams.height;
srcBox.front = 0;
srcBox.back = 1;
m_DeviceContext->CopySubresourceRegion(m_VideoTexture, 0, 0, 0, 0, (ID3D11Resource*)frame->data[0], (int)(intptr_t)frame->data[1], &srcBox);
// SRV 0 is always mapped to the video texture
srvIndex = 0;
}
// Bind our CSC shader (and constant buffer, if required)
bindColorConversion(frame);
// Bind SRVs for this frame
m_DeviceContext->PSSetShaderResources(0, 2, m_VideoTextureResourceViews[srvIndex]);
// Draw the video
m_DeviceContext->DrawIndexed(6, 0, 0);
// Unbind SRVs for this frame
ID3D11ShaderResourceView* nullSrvs[2] = {};
m_DeviceContext->PSSetShaderResources(0, 2, nullSrvs);
}
// This function must NOT use any DXGI or ID3D11DeviceContext methods
// since it can be called on an arbitrary thread!
void D3D11VARenderer::notifyOverlayUpdated(Overlay::OverlayType type)
{
HRESULT hr;
SDL_Surface* newSurface = Session::get()->getOverlayManager().getUpdatedOverlaySurface(type);
bool overlayEnabled = Session::get()->getOverlayManager().isOverlayEnabled(type);
if (newSurface == nullptr && overlayEnabled) {
// The overlay is enabled and there is no new surface. Leave the old texture alone.
return;
}
SDL_AtomicLock(&m_OverlayLock);
ID3D11Texture2D* oldTexture = m_OverlayTextures[type];
m_OverlayTextures[type] = nullptr;
ID3D11Buffer* oldVertexBuffer = m_OverlayVertexBuffers[type];
m_OverlayVertexBuffers[type] = nullptr;
ID3D11ShaderResourceView* oldTextureResourceView = m_OverlayTextureResourceViews[type];
m_OverlayTextureResourceViews[type] = nullptr;
SDL_AtomicUnlock(&m_OverlayLock);
SAFE_COM_RELEASE(oldTextureResourceView);
SAFE_COM_RELEASE(oldTexture);
SAFE_COM_RELEASE(oldVertexBuffer);
// If the overlay is disabled, we're done
if (!overlayEnabled) {
SDL_FreeSurface(newSurface);
return;
}
// Create a texture with our pixel data
SDL_assert(!SDL_MUSTLOCK(newSurface));
SDL_assert(newSurface->format->format == SDL_PIXELFORMAT_ARGB8888);
D3D11_TEXTURE2D_DESC texDesc = {};
texDesc.Width = newSurface->w;
texDesc.Height = newSurface->h;
texDesc.MipLevels = 1;
texDesc.ArraySize = 1;
texDesc.Format = DXGI_FORMAT_B8G8R8A8_UNORM;
texDesc.SampleDesc.Count = 1;
texDesc.SampleDesc.Quality = 0;
texDesc.Usage = D3D11_USAGE_IMMUTABLE;
texDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE;
texDesc.CPUAccessFlags = 0;
texDesc.MiscFlags = 0;
D3D11_SUBRESOURCE_DATA texData = {};
texData.pSysMem = newSurface->pixels;
texData.SysMemPitch = newSurface->pitch;
ID3D11Texture2D* newTexture;
hr = m_Device->CreateTexture2D(&texDesc, &texData, &newTexture);
if (FAILED(hr)) {
SDL_FreeSurface(newSurface);
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11Device::CreateTexture2D() failed: %x",
hr);
return;
}
ID3D11ShaderResourceView* newTextureResourceView = nullptr;
hr = m_Device->CreateShaderResourceView((ID3D11Resource*)newTexture, nullptr, &newTextureResourceView);
if (FAILED(hr)) {
SAFE_COM_RELEASE(newTexture);
SDL_FreeSurface(newSurface);
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11Device::CreateShaderResourceView() failed: %x",
hr);
return;
}
SDL_FRect renderRect = {};
if (type == Overlay::OverlayStatusUpdate) {
// Bottom Left
renderRect.x = 0;
renderRect.y = 0;
}
else if (type == Overlay::OverlayDebug) {
// Top left
renderRect.x = 0;
renderRect.y = m_DisplayHeight - newSurface->h;
}
renderRect.w = newSurface->w;
renderRect.h = newSurface->h;
// Convert screen space to normalized device coordinates
StreamUtils::screenSpaceToNormalizedDeviceCoords(&renderRect, m_DisplayWidth, m_DisplayHeight);
// The surface is no longer required
SDL_FreeSurface(newSurface);
newSurface = nullptr;
VERTEX verts[] =
{
{renderRect.x, renderRect.y, 0, 1},
{renderRect.x, renderRect.y+renderRect.h, 0, 0},
{renderRect.x+renderRect.w, renderRect.y, 1, 1},
{renderRect.x+renderRect.w, renderRect.y+renderRect.h, 1, 0},
};
D3D11_BUFFER_DESC vbDesc = {};
vbDesc.ByteWidth = sizeof(verts);
vbDesc.Usage = D3D11_USAGE_IMMUTABLE;
vbDesc.BindFlags = D3D11_BIND_VERTEX_BUFFER;
vbDesc.CPUAccessFlags = 0;
vbDesc.MiscFlags = 0;
vbDesc.StructureByteStride = sizeof(VERTEX);
D3D11_SUBRESOURCE_DATA vbData = {};
vbData.pSysMem = verts;
ID3D11Buffer* newVertexBuffer;
hr = m_Device->CreateBuffer(&vbDesc, &vbData, &newVertexBuffer);
if (FAILED(hr)) {
SAFE_COM_RELEASE(newTextureResourceView);
SAFE_COM_RELEASE(newTexture);
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11Device::CreateBuffer() failed: %x",
hr);
return;
}
SDL_AtomicLock(&m_OverlayLock);
m_OverlayVertexBuffers[type] = newVertexBuffer;
m_OverlayTextures[type] = newTexture;
m_OverlayTextureResourceViews[type] = newTextureResourceView;
SDL_AtomicUnlock(&m_OverlayLock);
}
bool D3D11VARenderer::checkDecoderSupport(IDXGIAdapter* adapter)
{
HRESULT hr;
ID3D11VideoDevice* videoDevice;
// Derive a ID3D11VideoDevice from our ID3D11Device.
hr = m_Device->QueryInterface(__uuidof(ID3D11VideoDevice), (void**)&videoDevice);
if (FAILED(hr)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11Device::QueryInterface(ID3D11VideoDevice) failed: %x",
hr);
return false;
}
// Check if the format is supported by this decoder
BOOL supported;
switch (m_DecoderParams.videoFormat)
{
case VIDEO_FORMAT_H264:
if (FAILED(videoDevice->CheckVideoDecoderFormat(&D3D11_DECODER_PROFILE_H264_VLD_NOFGT, DXGI_FORMAT_NV12, &supported))) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"GPU doesn't support H.264 decoding");
videoDevice->Release();
return false;
}
else if (!supported) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"GPU doesn't support H.264 decoding to NV12 format");
videoDevice->Release();
return false;
}
break;
case VIDEO_FORMAT_H265:
if (FAILED(videoDevice->CheckVideoDecoderFormat(&D3D11_DECODER_PROFILE_HEVC_VLD_MAIN, DXGI_FORMAT_NV12, &supported))) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"GPU doesn't support HEVC decoding");
videoDevice->Release();
return false;
}
else if (!supported) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"GPU doesn't support HEVC decoding to NV12 format");
videoDevice->Release();
return false;
}
break;
case VIDEO_FORMAT_H265_MAIN10:
if (FAILED(videoDevice->CheckVideoDecoderFormat(&D3D11_DECODER_PROFILE_HEVC_VLD_MAIN10, DXGI_FORMAT_P010, &supported))) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"GPU doesn't support HEVC Main10 decoding");
videoDevice->Release();
return false;
}
else if (!supported) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"GPU doesn't support HEVC Main10 decoding to P010 format");
videoDevice->Release();
return false;
}
break;
case VIDEO_FORMAT_AV1_MAIN8:
if (FAILED(videoDevice->CheckVideoDecoderFormat(&D3D11_DECODER_PROFILE_AV1_VLD_PROFILE0, DXGI_FORMAT_NV12, &supported))) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"GPU doesn't support AV1 decoding");
videoDevice->Release();
return false;
}
else if (!supported) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"GPU doesn't support AV1 decoding to NV12 format");
videoDevice->Release();
return false;
}
break;
case VIDEO_FORMAT_AV1_MAIN10:
if (FAILED(videoDevice->CheckVideoDecoderFormat(&D3D11_DECODER_PROFILE_AV1_VLD_PROFILE0, DXGI_FORMAT_P010, &supported))) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"GPU doesn't support AV1 Main10 decoding");
videoDevice->Release();
return false;
}
else if (!supported) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"GPU doesn't support AV1 Main10 decoding to P010 format");
videoDevice->Release();
return false;
}
break;
default:
SDL_assert(false);
videoDevice->Release();
return false;
}
videoDevice->Release();
DXGI_ADAPTER_DESC adapterDesc;
hr = adapter->GetDesc(&adapterDesc);
if (FAILED(hr)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"IDXGIAdapter::GetDesc() failed: %x",
hr);
return false;
}
if (DXUtil::isFormatHybridDecodedByHardware(m_DecoderParams.videoFormat, adapterDesc.VendorId, adapterDesc.DeviceId)) {
SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION,
"GPU decoding for format %x is blocked due to hardware limitations",
m_DecoderParams.videoFormat);
return false;
}
return true;
}
int D3D11VARenderer::getRendererAttributes()
{
int attributes = 0;
// This renderer supports HDR
attributes |= RENDERER_ATTRIBUTE_HDR_SUPPORT;
// This renderer requires frame pacing to synchronize with VBlank when we're in full-screen.
// In windowed mode, we will render as fast we can and DWM will grab whatever is latest at the
// time unless the user opts for pacing. We will use pacing in full-screen mode and normal DWM
// sequencing in full-screen desktop mode to behave similarly to the DXVA2 renderer.
if ((SDL_GetWindowFlags(m_DecoderParams.window) & SDL_WINDOW_FULLSCREEN_DESKTOP) == SDL_WINDOW_FULLSCREEN) {
attributes |= RENDERER_ATTRIBUTE_FORCE_PACING;
}
return attributes;
}
int D3D11VARenderer::getDecoderCapabilities()
{
return CAPABILITY_REFERENCE_FRAME_INVALIDATION_HEVC |
CAPABILITY_REFERENCE_FRAME_INVALIDATION_AV1;
}
bool D3D11VARenderer::needsTestFrame()
{
// We can usually determine when D3D11VA will work based on which decoder GUIDs are supported,
// however there are some strange cases (Quadro P400 + Radeon HD 5570) where something goes
// horribly wrong and D3D11VideoDevice::CreateVideoDecoder() fails inside FFmpeg. We need to
// catch that case before we commit to using D3D11VA.
return true;
}
IFFmpegRenderer::InitFailureReason D3D11VARenderer::getInitFailureReason()
{
// In the specific case where we found at least one D3D11 hardware device but none of the
// enumerated devices have support for the specified codec, tell the FFmpeg decoder not to
// bother trying other hwaccels. We don't want to try loading D3D9 if the device doesn't
// even have hardware support for the codec.
//
// NB: We use feature level 11.0 support as a gate here because we want to avoid returning
// this failure reason in cases where we might have an extremely old GPU with support for
// DXVA2 on D3D9 but not D3D11VA on D3D11. I'm unsure if any such drivers/hardware exists,
// but better be safe than sorry.
//
// NB2: We're also assuming that no GPU exists which lacks any D3D11 driver but has drivers
// for non-DX APIs like Vulkan. I believe this is a Windows Logo requirement so it should be
// safe to assume.
if (m_DevicesWithFL11Support != 0 && m_DevicesWithCodecSupport == 0) {
return InitFailureReason::NoHardwareSupport;
}
else {
return InitFailureReason::Unknown;
}
}
void D3D11VARenderer::lockContext(void *lock_ctx)
{
auto me = (D3D11VARenderer*)lock_ctx;
SDL_LockMutex(me->m_ContextLock);
}
void D3D11VARenderer::unlockContext(void *lock_ctx)
{
auto me = (D3D11VARenderer*)lock_ctx;
SDL_UnlockMutex(me->m_ContextLock);
}
bool D3D11VARenderer::setupRenderingResources()
{
HRESULT hr;
m_DeviceContext->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
// We use a common vertex shader for all pixel shaders
{
QByteArray vertexShaderBytecode = Path::readDataFile("d3d11_vertex.fxc");
ID3D11VertexShader* vertexShader;
hr = m_Device->CreateVertexShader(vertexShaderBytecode.constData(), vertexShaderBytecode.length(), nullptr, &vertexShader);
if (SUCCEEDED(hr)) {
m_DeviceContext->VSSetShader(vertexShader, nullptr, 0);
vertexShader->Release();
}
else {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11Device::CreateVertexShader() failed: %x",
hr);
return false;
}
const D3D11_INPUT_ELEMENT_DESC vertexDesc[] =
{
{ "POSITION", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0 },
{ "TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 8, D3D11_INPUT_PER_VERTEX_DATA, 0 },
};
ID3D11InputLayout* inputLayout;
hr = m_Device->CreateInputLayout(vertexDesc, ARRAYSIZE(vertexDesc), vertexShaderBytecode.constData(), vertexShaderBytecode.length(), &inputLayout);
if (SUCCEEDED(hr)) {
m_DeviceContext->IASetInputLayout(inputLayout);
inputLayout->Release();
}
else {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11Device::CreateInputLayout() failed: %x",
hr);
return false;
}
}
{
QByteArray overlayPixelShaderBytecode = Path::readDataFile("d3d11_overlay_pixel.fxc");
hr = m_Device->CreatePixelShader(overlayPixelShaderBytecode.constData(), overlayPixelShaderBytecode.length(), nullptr, &m_OverlayPixelShader);
if (FAILED(hr)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11Device::CreatePixelShader() failed: %x",
hr);
return false;
}
}
{
QByteArray videoPixelShaderBytecode = Path::readDataFile("d3d11_genyuv_pixel.fxc");
hr = m_Device->CreatePixelShader(videoPixelShaderBytecode.constData(), videoPixelShaderBytecode.length(), nullptr, &m_VideoGenericPixelShader);
if (FAILED(hr)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11Device::CreatePixelShader() failed: %x",
hr);
return false;
}
}
{
QByteArray videoPixelShaderBytecode = Path::readDataFile("d3d11_bt601lim_pixel.fxc");
hr = m_Device->CreatePixelShader(videoPixelShaderBytecode.constData(), videoPixelShaderBytecode.length(), nullptr, &m_VideoBt601LimPixelShader);
if (FAILED(hr)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11Device::CreatePixelShader() failed: %x",
hr);
return false;
}
}
{
QByteArray videoPixelShaderBytecode = Path::readDataFile("d3d11_bt2020lim_pixel.fxc");
hr = m_Device->CreatePixelShader(videoPixelShaderBytecode.constData(), videoPixelShaderBytecode.length(), nullptr, &m_VideoBt2020LimPixelShader);
if (FAILED(hr)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11Device::CreatePixelShader() failed: %x",
hr);
return false;
}
}
// We use a common sampler for all pixel shaders
{
D3D11_SAMPLER_DESC samplerDesc = {};
samplerDesc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR;
samplerDesc.AddressU = D3D11_TEXTURE_ADDRESS_CLAMP;
samplerDesc.AddressV = D3D11_TEXTURE_ADDRESS_CLAMP;
samplerDesc.AddressW = D3D11_TEXTURE_ADDRESS_CLAMP;
samplerDesc.MipLODBias = 0.0f;
samplerDesc.MaxAnisotropy = 1;
samplerDesc.ComparisonFunc = D3D11_COMPARISON_ALWAYS;
samplerDesc.MinLOD = 0.0f;
samplerDesc.MaxLOD = D3D11_FLOAT32_MAX;
ID3D11SamplerState* sampler;
hr = m_Device->CreateSamplerState(&samplerDesc, &sampler);
if (SUCCEEDED(hr)) {
m_DeviceContext->PSSetSamplers(0, 1, &sampler);
sampler->Release();
}
else {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11Device::CreateSamplerState() failed: %x",
hr);
return false;
}
}
// Create our render target view
{
ID3D11Resource* backBufferResource;
hr = m_SwapChain->GetBuffer(0, __uuidof(ID3D11Resource), (void**)&backBufferResource);
if (FAILED(hr)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"IDXGISwapChain::GetBuffer() failed: %x",
hr);
return false;
}
hr = m_Device->CreateRenderTargetView(backBufferResource, nullptr, &m_RenderTargetView);
backBufferResource->Release();
if (FAILED(hr)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11Device::CreateRenderTargetView() failed: %x",
hr);
return false;
}
}
// We use a common index buffer for all geometry
{
const int indexes[] = {0, 1, 2, 3, 2, 1};
D3D11_BUFFER_DESC indexBufferDesc = {};
indexBufferDesc.ByteWidth = sizeof(indexes);
indexBufferDesc.Usage = D3D11_USAGE_IMMUTABLE;
indexBufferDesc.BindFlags = D3D11_BIND_INDEX_BUFFER;
indexBufferDesc.CPUAccessFlags = 0;
indexBufferDesc.MiscFlags = 0;
indexBufferDesc.StructureByteStride = sizeof(int);
D3D11_SUBRESOURCE_DATA indexBufferData = {};
indexBufferData.pSysMem = indexes;
indexBufferData.SysMemPitch = sizeof(int);
ID3D11Buffer* indexBuffer;
hr = m_Device->CreateBuffer(&indexBufferDesc, &indexBufferData, &indexBuffer);
if (SUCCEEDED(hr)) {
m_DeviceContext->IASetIndexBuffer(indexBuffer, DXGI_FORMAT_R32_UINT, 0);
indexBuffer->Release();
}
else {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11Device::CreateBuffer() failed: %x",
hr);
return false;
}
}
// Create our fixed vertex buffer for video rendering
{
// Scale video to the window size while preserving aspect ratio
SDL_Rect src, dst;
src.x = src.y = 0;
src.w = m_DecoderParams.width;
src.h = m_DecoderParams.height;
dst.x = dst.y = 0;
dst.w = m_DisplayWidth;
dst.h = m_DisplayHeight;
StreamUtils::scaleSourceToDestinationSurface(&src, &dst);
// Convert screen space to normalized device coordinates
SDL_FRect renderRect;
StreamUtils::screenSpaceToNormalizedDeviceCoords(&dst, &renderRect, m_DisplayWidth, m_DisplayHeight);
// If we're binding the decoder output textures directly, don't sample from the alignment padding area
SDL_assert(m_TextureAlignment != 0);
float uMax = m_BindDecoderOutputTextures ? ((float)m_DecoderParams.width / FFALIGN(m_DecoderParams.width, m_TextureAlignment)) : 1.0f;
float vMax = m_BindDecoderOutputTextures ? ((float)m_DecoderParams.height / FFALIGN(m_DecoderParams.height, m_TextureAlignment)) : 1.0f;
VERTEX verts[] =
{
{renderRect.x, renderRect.y, 0, vMax},
{renderRect.x, renderRect.y+renderRect.h, 0, 0},
{renderRect.x+renderRect.w, renderRect.y, uMax, vMax},
{renderRect.x+renderRect.w, renderRect.y+renderRect.h, uMax, 0},
};
D3D11_BUFFER_DESC vbDesc = {};
vbDesc.ByteWidth = sizeof(verts);
vbDesc.Usage = D3D11_USAGE_IMMUTABLE;
vbDesc.BindFlags = D3D11_BIND_VERTEX_BUFFER;
vbDesc.CPUAccessFlags = 0;
vbDesc.MiscFlags = 0;
vbDesc.StructureByteStride = sizeof(VERTEX);
D3D11_SUBRESOURCE_DATA vbData = {};
vbData.pSysMem = verts;
hr = m_Device->CreateBuffer(&vbDesc, &vbData, &m_VideoVertexBuffer);
if (FAILED(hr)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11Device::CreateBuffer() failed: %x",
hr);
return false;
}
}
// Create our fixed constant buffer to limit chroma texcoords and avoid sampling from alignment texels.
{
D3D11_BUFFER_DESC constDesc = {};
constDesc.ByteWidth = sizeof(CSC_CONST_BUF);
constDesc.Usage = D3D11_USAGE_IMMUTABLE;
constDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
constDesc.CPUAccessFlags = 0;
constDesc.MiscFlags = 0;
int textureWidth = m_BindDecoderOutputTextures ? FFALIGN(m_DecoderParams.width, m_TextureAlignment) : m_DecoderParams.width;
int textureHeight = m_BindDecoderOutputTextures ? FFALIGN(m_DecoderParams.height, m_TextureAlignment) : m_DecoderParams.height;
float chromaUVMax[3] = {};
chromaUVMax[0] = m_DecoderParams.width != textureWidth ? ((float)(m_DecoderParams.width - 1) / textureWidth) : 1.0f;
chromaUVMax[1] = m_DecoderParams.height != textureHeight ? ((float)(m_DecoderParams.height - 1) / textureHeight) : 1.0f;
D3D11_SUBRESOURCE_DATA constData = {};
constData.pSysMem = chromaUVMax;
ID3D11Buffer* constantBuffer;
HRESULT hr = m_Device->CreateBuffer(&constDesc, &constData, &constantBuffer);
if (SUCCEEDED(hr)) {
m_DeviceContext->PSSetConstantBuffers(0, 1, &constantBuffer);
constantBuffer->Release();
}
else {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11Device::CreateBuffer() failed: %x",
hr);
return false;
}
}
// Create our blend state
{
D3D11_BLEND_DESC blendDesc = {};
blendDesc.AlphaToCoverageEnable = FALSE;
blendDesc.IndependentBlendEnable = FALSE;
blendDesc.RenderTarget[0].BlendEnable = TRUE;
blendDesc.RenderTarget[0].SrcBlend = D3D11_BLEND_SRC_ALPHA;
blendDesc.RenderTarget[0].DestBlend = D3D11_BLEND_INV_SRC_ALPHA;
blendDesc.RenderTarget[0].BlendOp = D3D11_BLEND_OP_ADD;
blendDesc.RenderTarget[0].SrcBlendAlpha = D3D11_BLEND_ONE;
blendDesc.RenderTarget[0].DestBlendAlpha = D3D11_BLEND_ZERO;
blendDesc.RenderTarget[0].BlendOpAlpha = D3D11_BLEND_OP_ADD;
blendDesc.RenderTarget[0].RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL;
ID3D11BlendState* blendState;
hr = m_Device->CreateBlendState(&blendDesc, &blendState);
if (SUCCEEDED(hr)) {
m_DeviceContext->OMSetBlendState(blendState, nullptr, 0xffffffff);
blendState->Release();
}
else {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11Device::CreateBlendState() failed: %x",
hr);
return false;
}
}
// Set a viewport that fills the window
{
D3D11_VIEWPORT viewport;
viewport.TopLeftX = 0;
viewport.TopLeftY = 0;
viewport.Width = m_DisplayWidth;
viewport.Height = m_DisplayHeight;
viewport.MinDepth = 0;
viewport.MaxDepth = 1;
m_DeviceContext->RSSetViewports(1, &viewport);
}
return true;
}
bool D3D11VARenderer::setupVideoTexture()
{
SDL_assert(!m_BindDecoderOutputTextures);
HRESULT hr;
D3D11_TEXTURE2D_DESC texDesc = {};
texDesc.Width = m_DecoderParams.width;
texDesc.Height = m_DecoderParams.height;
texDesc.MipLevels = 1;
texDesc.ArraySize = 1;
texDesc.Format = (m_DecoderParams.videoFormat & VIDEO_FORMAT_MASK_10BIT) ? DXGI_FORMAT_P010 : DXGI_FORMAT_NV12;
texDesc.SampleDesc.Quality = 0;
texDesc.SampleDesc.Count = 1;
texDesc.Usage = D3D11_USAGE_DEFAULT;
texDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE;
texDesc.CPUAccessFlags = 0;
texDesc.MiscFlags = 0;
hr = m_Device->CreateTexture2D(&texDesc, nullptr, &m_VideoTexture);
if (FAILED(hr)) {
m_VideoTexture = nullptr;
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11Device::CreateTexture2D() failed: %x",
hr);
return false;
}
// Create luminance and chrominance SRVs for each plane of the texture
D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc = {};
srvDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D;
srvDesc.Texture2D.MostDetailedMip = 0;
srvDesc.Texture2D.MipLevels = 1;
srvDesc.Format = (m_DecoderParams.videoFormat & VIDEO_FORMAT_MASK_10BIT) ? DXGI_FORMAT_R16_UNORM : DXGI_FORMAT_R8_UNORM;
hr = m_Device->CreateShaderResourceView(m_VideoTexture, &srvDesc, &m_VideoTextureResourceViews[0][0]);
if (FAILED(hr)) {
m_VideoTextureResourceViews[0][0] = nullptr;
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11Device::CreateShaderResourceView() failed: %x",
hr);
return false;
}
srvDesc.Format = (m_DecoderParams.videoFormat & VIDEO_FORMAT_MASK_10BIT) ? DXGI_FORMAT_R16G16_UNORM : DXGI_FORMAT_R8G8_UNORM;
hr = m_Device->CreateShaderResourceView(m_VideoTexture, &srvDesc, &m_VideoTextureResourceViews[0][1]);
if (FAILED(hr)) {
m_VideoTextureResourceViews[0][1] = nullptr;
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11Device::CreateShaderResourceView() failed: %x",
hr);
return false;
}
return true;
}
bool D3D11VARenderer::setupTexturePoolViews(AVD3D11VAFramesContext* frameContext)
{
SDL_assert(m_BindDecoderOutputTextures);
D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc = {};
srvDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2DARRAY;
srvDesc.Texture2DArray.MostDetailedMip = 0;
srvDesc.Texture2DArray.MipLevels = 1;
srvDesc.Texture2DArray.ArraySize = 1;
// Create luminance and chrominance SRVs for each texture in the pool
for (int i = 0; i < DECODER_BUFFER_POOL_SIZE; i++) {
HRESULT hr;
// Our rendering logic depends on the texture index working to map into our SRV array
SDL_assert(i == frameContext->texture_infos[i].index);
srvDesc.Texture2DArray.FirstArraySlice = frameContext->texture_infos[i].index;
srvDesc.Format = (m_DecoderParams.videoFormat & VIDEO_FORMAT_MASK_10BIT) ? DXGI_FORMAT_R16_UNORM : DXGI_FORMAT_R8_UNORM;
hr = m_Device->CreateShaderResourceView(frameContext->texture_infos[i].texture, &srvDesc, &m_VideoTextureResourceViews[i][0]);
if (FAILED(hr)) {
m_VideoTextureResourceViews[i][0] = nullptr;
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11Device::CreateShaderResourceView() failed: %x",
hr);
return false;
}
srvDesc.Format = (m_DecoderParams.videoFormat & VIDEO_FORMAT_MASK_10BIT) ? DXGI_FORMAT_R16G16_UNORM : DXGI_FORMAT_R8G8_UNORM;
hr = m_Device->CreateShaderResourceView(frameContext->texture_infos[i].texture, &srvDesc, &m_VideoTextureResourceViews[i][1]);
if (FAILED(hr)) {
m_VideoTextureResourceViews[i][1] = nullptr;
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11Device::CreateShaderResourceView() failed: %x",
hr);
return false;
}
}
return true;
}