mirror of
https://github.com/moonlight-stream/moonlight-qt.git
synced 2025-07-02 15:55:39 +00:00
Only sample directly from the video decoder output texture on Intel GPUs
This technique seems to actually make performance worse on some AMD GPUs (RX 480) and causes rendering errors on others (HD 5570). These might be AMD-specific bugs but let's not risk a behavior change for AMD/NVIDIA where nobody was having perf issues with the old copy method anyway.
This commit is contained in:
parent
0abb9fd7c2
commit
335ed0e8e6
@ -90,6 +90,7 @@ D3D11VARenderer::D3D11VARenderer(int decoderSelectionPass)
|
|||||||
m_VideoBt601LimPixelShader(nullptr),
|
m_VideoBt601LimPixelShader(nullptr),
|
||||||
m_VideoBt2020LimPixelShader(nullptr),
|
m_VideoBt2020LimPixelShader(nullptr),
|
||||||
m_VideoVertexBuffer(nullptr),
|
m_VideoVertexBuffer(nullptr),
|
||||||
|
m_VideoTexture(nullptr),
|
||||||
m_OverlayLock(0),
|
m_OverlayLock(0),
|
||||||
m_OverlayPixelShader(nullptr),
|
m_OverlayPixelShader(nullptr),
|
||||||
m_HwDeviceContext(nullptr),
|
m_HwDeviceContext(nullptr),
|
||||||
@ -121,6 +122,8 @@ D3D11VARenderer::~D3D11VARenderer()
|
|||||||
SAFE_COM_RELEASE(m_VideoTextureResourceViews[i][1]);
|
SAFE_COM_RELEASE(m_VideoTextureResourceViews[i][1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SAFE_COM_RELEASE(m_VideoTexture);
|
||||||
|
|
||||||
for (int i = 0; i < ARRAYSIZE(m_OverlayVertexBuffers); i++) {
|
for (int i = 0; i < ARRAYSIZE(m_OverlayVertexBuffers); i++) {
|
||||||
SAFE_COM_RELEASE(m_OverlayVertexBuffers[i]);
|
SAFE_COM_RELEASE(m_OverlayVertexBuffers[i]);
|
||||||
}
|
}
|
||||||
@ -197,12 +200,26 @@ bool D3D11VARenderer::createDeviceByAdapterIndex(int adapterIndex, bool* adapter
|
|||||||
goto Exit;
|
goto Exit;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool ok;
|
||||||
|
m_BindDecoderOutputTextures = !!qEnvironmentVariableIntValue("D3D11VA_FORCE_BIND", &ok);
|
||||||
|
if (!ok) {
|
||||||
|
// Skip copying to our own internal texture on Intel GPUs due to
|
||||||
|
// significant performance impact of the extra copy. See:
|
||||||
|
// https://github.com/moonlight-stream/moonlight-qt/issues/1304
|
||||||
|
m_BindDecoderOutputTextures = adapterDesc.VendorId == 0x8086;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
SDL_LogWarn(SDL_LOG_CATEGORY_APPLICATION,
|
||||||
|
"Using D3D11VA_FORCE_BIND to override default bind/copy logic");
|
||||||
|
}
|
||||||
|
|
||||||
SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION,
|
SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION,
|
||||||
"Detected GPU %d: %S (%x:%x)",
|
"Detected GPU %d: %S (%x:%x) (decoder output: %s)",
|
||||||
adapterIndex,
|
adapterIndex,
|
||||||
adapterDesc.Description,
|
adapterDesc.Description,
|
||||||
adapterDesc.VendorId,
|
adapterDesc.VendorId,
|
||||||
adapterDesc.DeviceId);
|
adapterDesc.DeviceId,
|
||||||
|
m_BindDecoderOutputTextures ? "bind" : "copy");
|
||||||
|
|
||||||
hr = D3D11CreateDevice(adapter,
|
hr = D3D11CreateDevice(adapter,
|
||||||
D3D_DRIVER_TYPE_UNKNOWN,
|
D3D_DRIVER_TYPE_UNKNOWN,
|
||||||
@ -493,8 +510,11 @@ bool D3D11VARenderer::initialize(PDECODER_PARAMETERS params)
|
|||||||
|
|
||||||
AVD3D11VAFramesContext* d3d11vaFramesContext = (AVD3D11VAFramesContext*)framesContext->hwctx;
|
AVD3D11VAFramesContext* d3d11vaFramesContext = (AVD3D11VAFramesContext*)framesContext->hwctx;
|
||||||
|
|
||||||
// We need to override the default D3D11VA bind flags to bind the textures as a shader resources
|
d3d11vaFramesContext->BindFlags = D3D11_BIND_DECODER;
|
||||||
d3d11vaFramesContext->BindFlags = D3D11_BIND_DECODER | D3D11_BIND_SHADER_RESOURCE;
|
if (m_BindDecoderOutputTextures) {
|
||||||
|
// We need to override the default D3D11VA bind flags to bind the textures as a shader resources
|
||||||
|
d3d11vaFramesContext->BindFlags |= D3D11_BIND_SHADER_RESOURCE;
|
||||||
|
}
|
||||||
|
|
||||||
int err = av_hwframe_ctx_init(m_HwFramesContext);
|
int err = av_hwframe_ctx_init(m_HwFramesContext);
|
||||||
if (err < 0) {
|
if (err < 0) {
|
||||||
@ -504,9 +524,17 @@ bool D3D11VARenderer::initialize(PDECODER_PARAMETERS params)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create SRVs for all textures in the decoder pool
|
if (m_BindDecoderOutputTextures) {
|
||||||
if (!setupTexturePoolViews(d3d11vaFramesContext)) {
|
// Create SRVs for all textures in the decoder pool
|
||||||
return false;
|
if (!setupTexturePoolViews(d3d11vaFramesContext)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// Create our internal texture to copy and render
|
||||||
|
if (!setupVideoTexture()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -751,25 +779,46 @@ void D3D11VARenderer::renderVideo(AVFrame* frame)
|
|||||||
UINT offset = 0;
|
UINT offset = 0;
|
||||||
m_DeviceContext->IASetVertexBuffers(0, 1, &m_VideoVertexBuffer, &stride, &offset);
|
m_DeviceContext->IASetVertexBuffers(0, 1, &m_VideoVertexBuffer, &stride, &offset);
|
||||||
|
|
||||||
// Our indexing logic depends on a direct mapping into m_VideoTextureResourceViews
|
UINT srvIndex;
|
||||||
// based on the texture index provided by FFmpeg.
|
if (m_BindDecoderOutputTextures) {
|
||||||
UINT textureIndex = (uintptr_t)frame->data[1];
|
// Our indexing logic depends on a direct mapping into m_VideoTextureResourceViews
|
||||||
SDL_assert(textureIndex < DECODER_BUFFER_POOL_SIZE);
|
// based on the texture index provided by FFmpeg.
|
||||||
if (textureIndex >= DECODER_BUFFER_POOL_SIZE) {
|
srvIndex = (uintptr_t)frame->data[1];
|
||||||
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
|
SDL_assert(srvIndex < DECODER_BUFFER_POOL_SIZE);
|
||||||
"Unexpected texture index: %u",
|
if (srvIndex >= DECODER_BUFFER_POOL_SIZE) {
|
||||||
textureIndex);
|
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
|
||||||
return;
|
"Unexpected texture index: %u",
|
||||||
|
srvIndex);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// Copy this frame (minus alignment padding) into our video texture
|
||||||
|
D3D11_BOX srcBox;
|
||||||
|
srcBox.left = 0;
|
||||||
|
srcBox.top = 0;
|
||||||
|
srcBox.right = m_DecoderParams.width;
|
||||||
|
srcBox.bottom = m_DecoderParams.height;
|
||||||
|
srcBox.front = 0;
|
||||||
|
srcBox.back = 1;
|
||||||
|
m_DeviceContext->CopySubresourceRegion(m_VideoTexture, 0, 0, 0, 0, (ID3D11Resource*)frame->data[0], (int)(intptr_t)frame->data[1], &srcBox);
|
||||||
|
|
||||||
|
// SRV 0 is always mapped to the video texture
|
||||||
|
srvIndex = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Bind our CSC shader (and constant buffer, if required)
|
// Bind our CSC shader (and constant buffer, if required)
|
||||||
bindColorConversion(frame);
|
bindColorConversion(frame);
|
||||||
|
|
||||||
// Bind SRVs for this frame
|
// Bind SRVs for this frame
|
||||||
m_DeviceContext->PSSetShaderResources(0, 2, m_VideoTextureResourceViews[textureIndex]);
|
m_DeviceContext->PSSetShaderResources(0, 2, m_VideoTextureResourceViews[srvIndex]);
|
||||||
|
|
||||||
// Draw the video
|
// Draw the video
|
||||||
m_DeviceContext->DrawIndexed(6, 0, 0);
|
m_DeviceContext->DrawIndexed(6, 0, 0);
|
||||||
|
|
||||||
|
// Unbind SRVs for this frame
|
||||||
|
ID3D11ShaderResourceView* nullSrvs[2] = {};
|
||||||
|
m_DeviceContext->PSSetShaderResources(0, 2, nullSrvs);
|
||||||
}
|
}
|
||||||
|
|
||||||
// This function must NOT use any DXGI or ID3D11DeviceContext methods
|
// This function must NOT use any DXGI or ID3D11DeviceContext methods
|
||||||
@ -1281,10 +1330,10 @@ bool D3D11VARenderer::setupRenderingResources()
|
|||||||
SDL_FRect renderRect;
|
SDL_FRect renderRect;
|
||||||
StreamUtils::screenSpaceToNormalizedDeviceCoords(&dst, &renderRect, m_DisplayWidth, m_DisplayHeight);
|
StreamUtils::screenSpaceToNormalizedDeviceCoords(&dst, &renderRect, m_DisplayWidth, m_DisplayHeight);
|
||||||
|
|
||||||
// Don't sample from the alignment padding area since that's not part of the video
|
// If we're binding the decoder output textures directly, don't sample from the alignment padding area
|
||||||
SDL_assert(m_TextureAlignment != 0);
|
SDL_assert(m_TextureAlignment != 0);
|
||||||
float uMax = (float)m_DecoderParams.width / FFALIGN(m_DecoderParams.width, m_TextureAlignment);
|
float uMax = m_BindDecoderOutputTextures ? ((float)m_DecoderParams.width / FFALIGN(m_DecoderParams.width, m_TextureAlignment)) : 1.0f;
|
||||||
float vMax = (float)m_DecoderParams.height / FFALIGN(m_DecoderParams.height, m_TextureAlignment);
|
float vMax = m_BindDecoderOutputTextures ? ((float)m_DecoderParams.height / FFALIGN(m_DecoderParams.height, m_TextureAlignment)) : 1.0f;
|
||||||
|
|
||||||
VERTEX verts[] =
|
VERTEX verts[] =
|
||||||
{
|
{
|
||||||
@ -1323,12 +1372,12 @@ bool D3D11VARenderer::setupRenderingResources()
|
|||||||
constDesc.CPUAccessFlags = 0;
|
constDesc.CPUAccessFlags = 0;
|
||||||
constDesc.MiscFlags = 0;
|
constDesc.MiscFlags = 0;
|
||||||
|
|
||||||
int alignedWidth = FFALIGN(m_DecoderParams.width, m_TextureAlignment);
|
int textureWidth = m_BindDecoderOutputTextures ? FFALIGN(m_DecoderParams.width, m_TextureAlignment) : m_DecoderParams.width;
|
||||||
int alignedHeight = FFALIGN(m_DecoderParams.height, m_TextureAlignment);
|
int textureHeight = m_BindDecoderOutputTextures ? FFALIGN(m_DecoderParams.height, m_TextureAlignment) : m_DecoderParams.height;
|
||||||
|
|
||||||
float chromaUVMax[3] = {};
|
float chromaUVMax[3] = {};
|
||||||
chromaUVMax[0] = m_DecoderParams.width != alignedWidth ? ((float)(m_DecoderParams.width - 1) / alignedWidth) : 1.0f;
|
chromaUVMax[0] = m_DecoderParams.width != textureWidth ? ((float)(m_DecoderParams.width - 1) / textureWidth) : 1.0f;
|
||||||
chromaUVMax[1] = m_DecoderParams.height != alignedHeight ? ((float)(m_DecoderParams.height - 1) / alignedHeight) : 1.0f;
|
chromaUVMax[1] = m_DecoderParams.height != textureHeight ? ((float)(m_DecoderParams.height - 1) / textureHeight) : 1.0f;
|
||||||
|
|
||||||
D3D11_SUBRESOURCE_DATA constData = {};
|
D3D11_SUBRESOURCE_DATA constData = {};
|
||||||
constData.pSysMem = chromaUVMax;
|
constData.pSysMem = chromaUVMax;
|
||||||
@ -1392,8 +1441,66 @@ bool D3D11VARenderer::setupRenderingResources()
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool D3D11VARenderer::setupVideoTexture()
|
||||||
|
{
|
||||||
|
SDL_assert(!m_BindDecoderOutputTextures);
|
||||||
|
|
||||||
|
HRESULT hr;
|
||||||
|
D3D11_TEXTURE2D_DESC texDesc = {};
|
||||||
|
|
||||||
|
texDesc.Width = m_DecoderParams.width;
|
||||||
|
texDesc.Height = m_DecoderParams.height;
|
||||||
|
texDesc.MipLevels = 1;
|
||||||
|
texDesc.ArraySize = 1;
|
||||||
|
texDesc.Format = (m_DecoderParams.videoFormat & VIDEO_FORMAT_MASK_10BIT) ? DXGI_FORMAT_P010 : DXGI_FORMAT_NV12;
|
||||||
|
texDesc.SampleDesc.Quality = 0;
|
||||||
|
texDesc.SampleDesc.Count = 1;
|
||||||
|
texDesc.Usage = D3D11_USAGE_DEFAULT;
|
||||||
|
texDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE;
|
||||||
|
texDesc.CPUAccessFlags = 0;
|
||||||
|
texDesc.MiscFlags = 0;
|
||||||
|
|
||||||
|
hr = m_Device->CreateTexture2D(&texDesc, nullptr, &m_VideoTexture);
|
||||||
|
if (FAILED(hr)) {
|
||||||
|
m_VideoTexture = nullptr;
|
||||||
|
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
|
||||||
|
"ID3D11Device::CreateTexture2D() failed: %x",
|
||||||
|
hr);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create luminance and chrominance SRVs for each plane of the texture
|
||||||
|
D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc = {};
|
||||||
|
srvDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D;
|
||||||
|
srvDesc.Texture2D.MostDetailedMip = 0;
|
||||||
|
srvDesc.Texture2D.MipLevels = 1;
|
||||||
|
srvDesc.Format = (m_DecoderParams.videoFormat & VIDEO_FORMAT_MASK_10BIT) ? DXGI_FORMAT_R16_UNORM : DXGI_FORMAT_R8_UNORM;
|
||||||
|
hr = m_Device->CreateShaderResourceView(m_VideoTexture, &srvDesc, &m_VideoTextureResourceViews[0][0]);
|
||||||
|
if (FAILED(hr)) {
|
||||||
|
m_VideoTextureResourceViews[0][0] = nullptr;
|
||||||
|
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
|
||||||
|
"ID3D11Device::CreateShaderResourceView() failed: %x",
|
||||||
|
hr);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
srvDesc.Format = (m_DecoderParams.videoFormat & VIDEO_FORMAT_MASK_10BIT) ? DXGI_FORMAT_R16G16_UNORM : DXGI_FORMAT_R8G8_UNORM;
|
||||||
|
hr = m_Device->CreateShaderResourceView(m_VideoTexture, &srvDesc, &m_VideoTextureResourceViews[0][1]);
|
||||||
|
if (FAILED(hr)) {
|
||||||
|
m_VideoTextureResourceViews[0][1] = nullptr;
|
||||||
|
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
|
||||||
|
"ID3D11Device::CreateShaderResourceView() failed: %x",
|
||||||
|
hr);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
bool D3D11VARenderer::setupTexturePoolViews(AVD3D11VAFramesContext* frameContext)
|
bool D3D11VARenderer::setupTexturePoolViews(AVD3D11VAFramesContext* frameContext)
|
||||||
{
|
{
|
||||||
|
SDL_assert(m_BindDecoderOutputTextures);
|
||||||
|
|
||||||
D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc = {};
|
D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc = {};
|
||||||
srvDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2DARRAY;
|
srvDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2DARRAY;
|
||||||
srvDesc.Texture2DArray.MostDetailedMip = 0;
|
srvDesc.Texture2DArray.MostDetailedMip = 0;
|
||||||
|
@ -29,7 +29,8 @@ private:
|
|||||||
static void unlockContext(void* lock_ctx);
|
static void unlockContext(void* lock_ctx);
|
||||||
|
|
||||||
bool setupRenderingResources();
|
bool setupRenderingResources();
|
||||||
bool setupTexturePoolViews(AVD3D11VAFramesContext* frameContext);
|
bool setupVideoTexture(); // for !m_BindDecoderOutputTextures
|
||||||
|
bool setupTexturePoolViews(AVD3D11VAFramesContext* frameContext); // for m_BindDecoderOutputTextures
|
||||||
void renderOverlay(Overlay::OverlayType type);
|
void renderOverlay(Overlay::OverlayType type);
|
||||||
void bindColorConversion(AVFrame* frame);
|
void bindColorConversion(AVFrame* frame);
|
||||||
void renderVideo(AVFrame* frame);
|
void renderVideo(AVFrame* frame);
|
||||||
@ -46,6 +47,7 @@ private:
|
|||||||
ID3D11DeviceContext* m_DeviceContext;
|
ID3D11DeviceContext* m_DeviceContext;
|
||||||
ID3D11RenderTargetView* m_RenderTargetView;
|
ID3D11RenderTargetView* m_RenderTargetView;
|
||||||
SDL_mutex* m_ContextLock;
|
SDL_mutex* m_ContextLock;
|
||||||
|
bool m_BindDecoderOutputTextures;
|
||||||
|
|
||||||
DECODER_PARAMETERS m_DecoderParams;
|
DECODER_PARAMETERS m_DecoderParams;
|
||||||
int m_TextureAlignment;
|
int m_TextureAlignment;
|
||||||
@ -62,6 +64,10 @@ private:
|
|||||||
ID3D11PixelShader* m_VideoBt2020LimPixelShader;
|
ID3D11PixelShader* m_VideoBt2020LimPixelShader;
|
||||||
ID3D11Buffer* m_VideoVertexBuffer;
|
ID3D11Buffer* m_VideoVertexBuffer;
|
||||||
|
|
||||||
|
// Only valid if !m_BindDecoderOutputTextures
|
||||||
|
ID3D11Texture2D* m_VideoTexture;
|
||||||
|
|
||||||
|
// Only index 0 is valid if !m_BindDecoderOutputTextures
|
||||||
#define DECODER_BUFFER_POOL_SIZE 17
|
#define DECODER_BUFFER_POOL_SIZE 17
|
||||||
ID3D11ShaderResourceView* m_VideoTextureResourceViews[DECODER_BUFFER_POOL_SIZE][2];
|
ID3D11ShaderResourceView* m_VideoTextureResourceViews[DECODER_BUFFER_POOL_SIZE][2];
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user