Clamp chroma texcoords to avoid sampling alignment padding

This resolves #885 without the massive perf hit on lower end Intel GPUs.
This commit is contained in:
Cameron Gutman 2024-06-22 16:19:26 -05:00
parent c3bd7edc4e
commit 94943d2865
7 changed files with 43 additions and 4 deletions

Binary file not shown.

Binary file not shown.

View File

@ -1,6 +1,6 @@
#include "d3d11_video_pixel_start.hlsli"
cbuffer CSC_CONST_BUF : register(b0)
cbuffer CSC_CONST_BUF : register(b1)
{
min16float3x3 cscMatrix;
min16float3 offsets;

View File

@ -1,7 +1,8 @@
min16float4 main(ShaderInput input) : SV_TARGET
{
// Clamp the chrominance texcoords to avoid sampling the row of texels adjacent to the alignment padding
min16float3 yuv = min16float3(luminancePlane.Sample(theSampler, input.tex),
chrominancePlane.Sample(theSampler, input.tex));
chrominancePlane.Sample(theSampler, min(input.tex, chromaTexMax.rg)));
// Subtract the YUV offset for limited vs full range
yuv -= offsets;

View File

@ -6,4 +6,9 @@ struct ShaderInput
{
float4 pos : SV_POSITION;
float2 tex : TEXCOORD0;
};
};
cbuffer ChromaLimitBuf : register(b0)
{
min16float3 chromaTexMax;
};

View File

@ -728,7 +728,7 @@ void D3D11VARenderer::bindColorConversion(AVFrame* frame)
ID3D11Buffer* constantBuffer;
HRESULT hr = m_Device->CreateBuffer(&constDesc, &constData, &constantBuffer);
if (SUCCEEDED(hr)) {
m_DeviceContext->PSSetConstantBuffers(0, 1, &constantBuffer);
m_DeviceContext->PSSetConstantBuffers(1, 1, &constantBuffer);
constantBuffer->Release();
}
else {
@ -1313,6 +1313,39 @@ bool D3D11VARenderer::setupRenderingResources()
}
}
// Create our fixed constant buffer to limit chroma texcoords and avoid sampling from alignment texels.
{
D3D11_BUFFER_DESC constDesc = {};
constDesc.ByteWidth = sizeof(CSC_CONST_BUF);
constDesc.Usage = D3D11_USAGE_IMMUTABLE;
constDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
constDesc.CPUAccessFlags = 0;
constDesc.MiscFlags = 0;
int alignedWidth = FFALIGN(m_DecoderParams.width, m_TextureAlignment);
int alignedHeight = FFALIGN(m_DecoderParams.height, m_TextureAlignment);
float chromaUVMax[3] = {};
chromaUVMax[0] = m_DecoderParams.width != alignedWidth ? ((float)(m_DecoderParams.width - 1) / alignedWidth) : 1.0f;
chromaUVMax[1] = m_DecoderParams.height != alignedHeight ? ((float)(m_DecoderParams.height - 1) / alignedHeight) : 1.0f;
D3D11_SUBRESOURCE_DATA constData = {};
constData.pSysMem = chromaUVMax;
ID3D11Buffer* constantBuffer;
HRESULT hr = m_Device->CreateBuffer(&constDesc, &constData, &constantBuffer);
if (SUCCEEDED(hr)) {
m_DeviceContext->PSSetConstantBuffers(0, 1, &constantBuffer);
constantBuffer->Release();
}
else {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11Device::CreateBuffer() failed: %x",
hr);
return false;
}
}
// Create our blend state
{
D3D11_BLEND_DESC blendDesc = {};