Revert "Use a separate texture for rendering to avoid blending in the alignment padding"

This kills performance on some Intel iGPUs (particularly Atom chips like N100),
so let's remove the copy and solve this issue a different way instead.

This reverts commit a6fccf93d149a8b67eeac0b0fe109a142f0937d8.
This commit is contained in:
Cameron Gutman 2024-06-22 12:35:00 -05:00
parent b6bb96223d
commit c3bd7edc4e
2 changed files with 62 additions and 70 deletions

View File

@ -90,7 +90,6 @@ D3D11VARenderer::D3D11VARenderer(int decoderSelectionPass)
m_VideoBt601LimPixelShader(nullptr),
m_VideoBt2020LimPixelShader(nullptr),
m_VideoVertexBuffer(nullptr),
m_VideoTexture(nullptr),
m_OverlayLock(0),
m_OverlayPixelShader(nullptr),
m_HwDeviceContext(nullptr),
@ -118,11 +117,10 @@ D3D11VARenderer::~D3D11VARenderer()
SAFE_COM_RELEASE(m_VideoGenericPixelShader);
for (int i = 0; i < ARRAYSIZE(m_VideoTextureResourceViews); i++) {
SAFE_COM_RELEASE(m_VideoTextureResourceViews[i]);
SAFE_COM_RELEASE(m_VideoTextureResourceViews[i][0]);
SAFE_COM_RELEASE(m_VideoTextureResourceViews[i][1]);
}
SAFE_COM_RELEASE(m_VideoTexture);
for (int i = 0; i < ARRAYSIZE(m_OverlayVertexBuffers); i++) {
SAFE_COM_RELEASE(m_OverlayVertexBuffers[i]);
}
@ -490,11 +488,12 @@ bool D3D11VARenderer::initialize(PDECODER_PARAMETERS params)
framesContext->height = FFALIGN(params->height, m_TextureAlignment);
// We can have up to 16 reference frames plus a working surface
framesContext->initial_pool_size = 17;
framesContext->initial_pool_size = DECODER_BUFFER_POOL_SIZE;
AVD3D11VAFramesContext* d3d11vaFramesContext = (AVD3D11VAFramesContext*)framesContext->hwctx;
d3d11vaFramesContext->BindFlags = D3D11_BIND_DECODER;
// We need to override the default D3D11VA bind flags to bind the textures as a shader resources
d3d11vaFramesContext->BindFlags = D3D11_BIND_DECODER | D3D11_BIND_SHADER_RESOURCE;
int err = av_hwframe_ctx_init(m_HwFramesContext);
if (err < 0) {
@ -504,8 +503,8 @@ bool D3D11VARenderer::initialize(PDECODER_PARAMETERS params)
return false;
}
// Create our video texture and SRVs
if (!setupVideoTexture()) {
// Create SRVs for all textures in the decoder pool
if (!setupTexturePoolViews(d3d11vaFramesContext)) {
return false;
}
}
@ -751,21 +750,22 @@ void D3D11VARenderer::renderVideo(AVFrame* frame)
UINT offset = 0;
m_DeviceContext->IASetVertexBuffers(0, 1, &m_VideoVertexBuffer, &stride, &offset);
// Copy this frame (minus alignment padding) into our video texture
D3D11_BOX srcBox;
srcBox.left = 0;
srcBox.top = 0;
srcBox.right = m_DecoderParams.width;
srcBox.bottom = m_DecoderParams.height;
srcBox.front = 0;
srcBox.back = 1;
m_DeviceContext->CopySubresourceRegion(m_VideoTexture, 0, 0, 0, 0, (ID3D11Resource*)frame->data[0], (int)(intptr_t)frame->data[1], &srcBox);
// Our indexing logic depends on a direct mapping into m_VideoTextureResourceViews
// based on the texture index provided by FFmpeg.
UINT textureIndex = (uintptr_t)frame->data[1];
SDL_assert(textureIndex < DECODER_BUFFER_POOL_SIZE);
if (textureIndex >= DECODER_BUFFER_POOL_SIZE) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"Unexpected texture index: %u",
textureIndex);
return;
}
// Bind our CSC shader (and constant buffer, if required)
bindColorConversion(frame);
// Bind SRVs for this frame
m_DeviceContext->PSSetShaderResources(0, 2, m_VideoTextureResourceViews);
m_DeviceContext->PSSetShaderResources(0, 2, m_VideoTextureResourceViews[textureIndex]);
// Draw the video
m_DeviceContext->DrawIndexed(6, 0, 0);
@ -1280,12 +1280,17 @@ bool D3D11VARenderer::setupRenderingResources()
SDL_FRect renderRect;
StreamUtils::screenSpaceToNormalizedDeviceCoords(&dst, &renderRect, m_DisplayWidth, m_DisplayHeight);
// Don't sample from the alignment padding area since that's not part of the video
SDL_assert(m_TextureAlignment != 0);
float uMax = (float)m_DecoderParams.width / FFALIGN(m_DecoderParams.width, m_TextureAlignment);
float vMax = (float)m_DecoderParams.height / FFALIGN(m_DecoderParams.height, m_TextureAlignment);
VERTEX verts[] =
{
{renderRect.x, renderRect.y, 0, 1.0f},
{renderRect.x, renderRect.y, 0, vMax},
{renderRect.x, renderRect.y+renderRect.h, 0, 0},
{renderRect.x+renderRect.w, renderRect.y, 1.0f, 1.0f},
{renderRect.x+renderRect.w, renderRect.y+renderRect.h, 1.0f, 0},
{renderRect.x+renderRect.w, renderRect.y, uMax, vMax},
{renderRect.x+renderRect.w, renderRect.y+renderRect.h, uMax, 0},
};
D3D11_BUFFER_DESC vbDesc = {};
@ -1353,55 +1358,42 @@ bool D3D11VARenderer::setupRenderingResources()
return true;
}
bool D3D11VARenderer::setupVideoTexture()
bool D3D11VARenderer::setupTexturePoolViews(AVD3D11VAFramesContext* frameContext)
{
HRESULT hr;
D3D11_TEXTURE2D_DESC texDesc = {};
texDesc.Width = m_DecoderParams.width;
texDesc.Height = m_DecoderParams.height;
texDesc.MipLevels = 1;
texDesc.ArraySize = 1;
texDesc.Format = (m_DecoderParams.videoFormat & VIDEO_FORMAT_MASK_10BIT) ? DXGI_FORMAT_P010 : DXGI_FORMAT_NV12;
texDesc.SampleDesc.Quality = 0;
texDesc.SampleDesc.Count = 1;
texDesc.Usage = D3D11_USAGE_DEFAULT;
texDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE;
texDesc.CPUAccessFlags = 0;
texDesc.MiscFlags = 0;
hr = m_Device->CreateTexture2D(&texDesc, nullptr, &m_VideoTexture);
if (FAILED(hr)) {
m_VideoTexture = nullptr;
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11Device::CreateTexture2D() failed: %x",
hr);
return false;
}
// Create luminance and chrominance SRVs for each plane of the texture
D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc = {};
srvDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D;
srvDesc.Texture2D.MostDetailedMip = 0;
srvDesc.Texture2D.MipLevels = 1;
srvDesc.Format = (m_DecoderParams.videoFormat & VIDEO_FORMAT_MASK_10BIT) ? DXGI_FORMAT_R16_UNORM : DXGI_FORMAT_R8_UNORM;
hr = m_Device->CreateShaderResourceView(m_VideoTexture, &srvDesc, &m_VideoTextureResourceViews[0]);
if (FAILED(hr)) {
m_VideoTextureResourceViews[0] = nullptr;
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11Device::CreateShaderResourceView() failed: %x",
hr);
return false;
}
srvDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2DARRAY;
srvDesc.Texture2DArray.MostDetailedMip = 0;
srvDesc.Texture2DArray.MipLevels = 1;
srvDesc.Texture2DArray.ArraySize = 1;
srvDesc.Format = (m_DecoderParams.videoFormat & VIDEO_FORMAT_MASK_10BIT) ? DXGI_FORMAT_R16G16_UNORM : DXGI_FORMAT_R8G8_UNORM;
hr = m_Device->CreateShaderResourceView(m_VideoTexture, &srvDesc, &m_VideoTextureResourceViews[1]);
if (FAILED(hr)) {
m_VideoTextureResourceViews[1] = nullptr;
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11Device::CreateShaderResourceView() failed: %x",
hr);
return false;
// Create luminance and chrominance SRVs for each texture in the pool
for (int i = 0; i < DECODER_BUFFER_POOL_SIZE; i++) {
HRESULT hr;
// Our rendering logic depends on the texture index working to map into our SRV array
SDL_assert(i == frameContext->texture_infos[i].index);
srvDesc.Texture2DArray.FirstArraySlice = frameContext->texture_infos[i].index;
srvDesc.Format = (m_DecoderParams.videoFormat & VIDEO_FORMAT_MASK_10BIT) ? DXGI_FORMAT_R16_UNORM : DXGI_FORMAT_R8_UNORM;
hr = m_Device->CreateShaderResourceView(frameContext->texture_infos[i].texture, &srvDesc, &m_VideoTextureResourceViews[i][0]);
if (FAILED(hr)) {
m_VideoTextureResourceViews[i][0] = nullptr;
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11Device::CreateShaderResourceView() failed: %x",
hr);
return false;
}
srvDesc.Format = (m_DecoderParams.videoFormat & VIDEO_FORMAT_MASK_10BIT) ? DXGI_FORMAT_R16G16_UNORM : DXGI_FORMAT_R8G8_UNORM;
hr = m_Device->CreateShaderResourceView(frameContext->texture_infos[i].texture, &srvDesc, &m_VideoTextureResourceViews[i][1]);
if (FAILED(hr)) {
m_VideoTextureResourceViews[i][1] = nullptr;
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11Device::CreateShaderResourceView() failed: %x",
hr);
return false;
}
}
return true;

View File

@ -29,7 +29,7 @@ private:
static void unlockContext(void* lock_ctx);
bool setupRenderingResources();
bool setupVideoTexture();
bool setupTexturePoolViews(AVD3D11VAFramesContext* frameContext);
void renderOverlay(Overlay::OverlayType type);
void bindColorConversion(AVFrame* frame);
void renderVideo(AVFrame* frame);
@ -62,8 +62,8 @@ private:
ID3D11PixelShader* m_VideoBt2020LimPixelShader;
ID3D11Buffer* m_VideoVertexBuffer;
ID3D11Texture2D* m_VideoTexture;
ID3D11ShaderResourceView* m_VideoTextureResourceViews[2];
#define DECODER_BUFFER_POOL_SIZE 17
ID3D11ShaderResourceView* m_VideoTextureResourceViews[DECODER_BUFFER_POOL_SIZE][2];
SDL_SpinLock m_OverlayLock;
ID3D11Buffer* m_OverlayVertexBuffers[Overlay::OverlayMax];