Remove manual hwframes creation in D3D11VARenderer

This way we aren't always allocating the worst-case possible number of frames
This commit is contained in:
Cameron Gutman
2026-01-10 16:36:35 -06:00
parent 8b6ad55c9b
commit be9f465008
2 changed files with 132 additions and 145 deletions

View File

@@ -64,8 +64,7 @@ D3D11VARenderer::D3D11VARenderer(int decoderSelectionPass)
m_LastColorTrc(AVCOL_TRC_UNSPECIFIED), m_LastColorTrc(AVCOL_TRC_UNSPECIFIED),
m_AllowTearing(false), m_AllowTearing(false),
m_OverlayLock(0), m_OverlayLock(0),
m_HwDeviceContext(nullptr), m_HwDeviceContext(nullptr)
m_HwFramesContext(nullptr)
{ {
m_ContextLock = SDL_CreateMutex(); m_ContextLock = SDL_CreateMutex();
@@ -114,7 +113,6 @@ D3D11VARenderer::~D3D11VARenderer()
m_RenderTargetView.Reset(); m_RenderTargetView.Reset();
m_SwapChain.Reset(); m_SwapChain.Reset();
av_buffer_unref(&m_HwFramesContext);
av_buffer_unref(&m_HwDeviceContext); av_buffer_unref(&m_HwDeviceContext);
// Force destruction of the swapchain immediately // Force destruction of the swapchain immediately
@@ -504,10 +502,6 @@ bool D3D11VARenderer::initialize(PDECODER_PARAMETERS params)
return false; return false;
} }
// Surfaces must be 16 pixel aligned for H.264 and 128 pixel aligned for everything else
// https://github.com/FFmpeg/FFmpeg/blob/a234e5cd80224c95a205c1f3e297d8c04a1374c3/libavcodec/dxva2.c#L609-L616
m_TextureAlignment = (params->videoFormat & VIDEO_FORMAT_MASK_H264) ? 16 : 128;
{ {
m_HwDeviceContext = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_D3D11VA); m_HwDeviceContext = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_D3D11VA);
if (!m_HwDeviceContext) { if (!m_HwDeviceContext) {
@@ -537,70 +531,8 @@ bool D3D11VARenderer::initialize(PDECODER_PARAMETERS params)
} }
} }
{ if (!setupRenderingResources()) {
m_HwFramesContext = av_hwframe_ctx_alloc(m_HwDeviceContext); return false;
if (!m_HwFramesContext) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"Failed to allocate D3D11VA frame context");
return false;
}
AVHWFramesContext* framesContext = (AVHWFramesContext*)m_HwFramesContext->data;
framesContext->format = AV_PIX_FMT_D3D11;
if (params->videoFormat & VIDEO_FORMAT_MASK_10BIT) {
framesContext->sw_format = (params->videoFormat & VIDEO_FORMAT_MASK_YUV444) ?
AV_PIX_FMT_XV30 : AV_PIX_FMT_P010;
}
else {
framesContext->sw_format = (params->videoFormat & VIDEO_FORMAT_MASK_YUV444) ?
AV_PIX_FMT_VUYX : AV_PIX_FMT_NV12;
}
framesContext->width = FFALIGN(params->width, m_TextureAlignment);
framesContext->height = FFALIGN(params->height, m_TextureAlignment);
// We can have up to 16 reference frames plus a working surface
framesContext->initial_pool_size = DECODER_BUFFER_POOL_SIZE;
AVD3D11VAFramesContext* d3d11vaFramesContext = (AVD3D11VAFramesContext*)framesContext->hwctx;
d3d11vaFramesContext->BindFlags = D3D11_BIND_DECODER;
if (m_BindDecoderOutputTextures) {
// We need to override the default D3D11VA bind flags to bind the textures as a shader resources
d3d11vaFramesContext->BindFlags |= D3D11_BIND_SHADER_RESOURCE;
}
int err = av_hwframe_ctx_init(m_HwFramesContext);
if (err < 0) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"Failed to initialize D3D11VA frame context: %d",
err);
return false;
}
D3D11_TEXTURE2D_DESC textureDesc;
d3d11vaFramesContext->texture_infos->texture->GetDesc(&textureDesc);
m_TextureFormat = textureDesc.Format;
m_TextureWidth = textureDesc.Width;
m_TextureHeight = textureDesc.Height;
if (!setupRenderingResources()) {
return false;
}
if (m_BindDecoderOutputTextures) {
// Create SRVs for all textures in the decoder pool
if (!setupTexturePoolViews(d3d11vaFramesContext)) {
return false;
}
}
else {
// Create our internal texture to copy and render
if (!setupVideoTexture()) {
return false;
}
}
} }
return true; return true;
@@ -616,10 +548,39 @@ bool D3D11VARenderer::prepareDecoderContext(AVCodecContext* context, AVDictionar
return true; return true;
} }
bool D3D11VARenderer::prepareDecoderContextInGetFormat(AVCodecContext *context, AVPixelFormat) bool D3D11VARenderer::prepareDecoderContextInGetFormat(AVCodecContext *context, AVPixelFormat pixelFormat)
{ {
// hw_frames_ctx must be initialized in ffGetFormat(). // Create a new hardware frames context suitable for decoding our specified format
context->hw_frames_ctx = av_buffer_ref(m_HwFramesContext); av_buffer_unref(&context->hw_frames_ctx);
int err = avcodec_get_hw_frames_parameters(context, m_HwDeviceContext, pixelFormat, &context->hw_frames_ctx);
if (err < 0) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"Failed to get hwframes context parameters: %d",
err);
return false;
}
auto framesContext = (AVHWFramesContext*)context->hw_frames_ctx->data;
auto d3d11vaFramesContext = (AVD3D11VAFramesContext*)framesContext->hwctx;
// If we're binding output textures directly, we need to add the SRV bind flag
if (m_BindDecoderOutputTextures) {
d3d11vaFramesContext->BindFlags |= D3D11_BIND_SHADER_RESOURCE;
}
err = av_hwframe_ctx_init(context->hw_frames_ctx);
if (err < 0) {
av_buffer_unref(&context->hw_frames_ctx);
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"Failed initialize hwframes context: %d",
err);
return false;
}
if (!setupFrameRenderingResources(framesContext)) {
av_buffer_unref(&context->hw_frames_ctx);
return false;
}
return true; return true;
} }
@@ -750,6 +711,7 @@ void D3D11VARenderer::renderOverlay(Overlay::OverlayType type)
void D3D11VARenderer::bindColorConversion(AVFrame* frame) void D3D11VARenderer::bindColorConversion(AVFrame* frame)
{ {
bool yuv444 = (m_DecoderParams.videoFormat & VIDEO_FORMAT_MASK_YUV444); bool yuv444 = (m_DecoderParams.videoFormat & VIDEO_FORMAT_MASK_YUV444);
auto framesContext = (AVHWFramesContext*)frame->hw_frames_ctx->data;
if (yuv444) { if (yuv444) {
// We'll need to use one of the 4:4:4 shaders for this pixel format // We'll need to use one of the 4:4:4 shaders for this pixel format
@@ -799,14 +761,14 @@ void D3D11VARenderer::bindColorConversion(AVFrame* frame)
std::array<float, 2> chromaOffset; std::array<float, 2> chromaOffset;
getFrameChromaCositingOffsets(frame, chromaOffset); getFrameChromaCositingOffsets(frame, chromaOffset);
constBuf.chromaOffset[0] = chromaOffset[0] / m_TextureWidth; constBuf.chromaOffset[0] = chromaOffset[0] / framesContext->width;
constBuf.chromaOffset[1] = chromaOffset[1] / m_TextureHeight; constBuf.chromaOffset[1] = chromaOffset[1] / framesContext->height;
// Limit chroma texcoords to avoid sampling from alignment texels // Limit chroma texcoords to avoid sampling from alignment texels
constBuf.chromaUVMax[0] = m_DecoderParams.width != (int)m_TextureWidth ? constBuf.chromaUVMax[0] = frame->width != framesContext->width ?
((float)(m_DecoderParams.width - 1) / m_TextureWidth) : 1.0f; ((float)(frame->width - 1) / framesContext->width) : 1.0f;
constBuf.chromaUVMax[1] = m_DecoderParams.height != (int)m_TextureHeight ? constBuf.chromaUVMax[1] = frame->height != (int)framesContext->height ?
((float)(m_DecoderParams.height - 1) / m_TextureHeight) : 1.0f; ((float)(frame->height - 1) / framesContext->height) : 1.0f;
D3D11_SUBRESOURCE_DATA constData = {}; D3D11_SUBRESOURCE_DATA constData = {};
constData.pSysMem = &constBuf; constData.pSysMem = &constBuf;
@@ -1411,55 +1373,6 @@ bool D3D11VARenderer::setupRenderingResources()
} }
} }
// Create our fixed vertex buffer for video rendering
{
// Scale video to the window size while preserving aspect ratio
SDL_Rect src, dst;
src.x = src.y = 0;
src.w = m_DecoderParams.width;
src.h = m_DecoderParams.height;
dst.x = dst.y = 0;
dst.w = m_DisplayWidth;
dst.h = m_DisplayHeight;
StreamUtils::scaleSourceToDestinationSurface(&src, &dst);
// Convert screen space to normalized device coordinates
SDL_FRect renderRect;
StreamUtils::screenSpaceToNormalizedDeviceCoords(&dst, &renderRect, m_DisplayWidth, m_DisplayHeight);
// Don't sample from the alignment padding area
SDL_assert(m_TextureAlignment != 0);
float uMax = (float)m_DecoderParams.width / m_TextureWidth;
float vMax = (float)m_DecoderParams.height / m_TextureHeight;
VERTEX verts[] =
{
{renderRect.x, renderRect.y, 0, vMax},
{renderRect.x, renderRect.y+renderRect.h, 0, 0},
{renderRect.x+renderRect.w, renderRect.y, uMax, vMax},
{renderRect.x+renderRect.w, renderRect.y+renderRect.h, uMax, 0},
};
D3D11_BUFFER_DESC vbDesc = {};
vbDesc.ByteWidth = sizeof(verts);
vbDesc.Usage = D3D11_USAGE_IMMUTABLE;
vbDesc.BindFlags = D3D11_BIND_VERTEX_BUFFER;
vbDesc.CPUAccessFlags = 0;
vbDesc.MiscFlags = 0;
vbDesc.StructureByteStride = sizeof(VERTEX);
D3D11_SUBRESOURCE_DATA vbData = {};
vbData.pSysMem = verts;
hr = m_Device->CreateBuffer(&vbDesc, &vbData, &m_VideoVertexBuffer);
if (FAILED(hr)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11Device::CreateBuffer() failed: %x",
hr);
return false;
}
}
// Create our overlay blend state // Create our overlay blend state
{ {
D3D11_BLEND_DESC blendDesc = {}; D3D11_BLEND_DESC blendDesc = {};
@@ -1520,6 +1433,80 @@ bool D3D11VARenderer::setupRenderingResources()
return true; return true;
} }
// NB: This can be called more than once (and with different frame dimensions!)
bool D3D11VARenderer::setupFrameRenderingResources(AVHWFramesContext* framesContext)
{
auto d3d11vaFramesContext = (AVD3D11VAFramesContext*)framesContext->hwctx;
// Query the format of the underlying texture array
D3D11_TEXTURE2D_DESC textureDesc;
d3d11vaFramesContext->texture_infos->texture->GetDesc(&textureDesc);
m_TextureFormat = textureDesc.Format;
// Create our fixed vertex buffer for video rendering
{
// Scale video to the window size while preserving aspect ratio
SDL_Rect src, dst;
src.x = src.y = 0;
src.w = m_DecoderParams.width;
src.h = m_DecoderParams.height;
dst.x = dst.y = 0;
dst.w = m_DisplayWidth;
dst.h = m_DisplayHeight;
StreamUtils::scaleSourceToDestinationSurface(&src, &dst);
// Convert screen space to normalized device coordinates
SDL_FRect renderRect;
StreamUtils::screenSpaceToNormalizedDeviceCoords(&dst, &renderRect, m_DisplayWidth, m_DisplayHeight);
// Don't sample from the alignment padding area
float uMax = (float)m_DecoderParams.width / framesContext->width;
float vMax = (float)m_DecoderParams.height / framesContext->height;
VERTEX verts[] =
{
{renderRect.x, renderRect.y, 0, vMax},
{renderRect.x, renderRect.y+renderRect.h, 0, 0},
{renderRect.x+renderRect.w, renderRect.y, uMax, vMax},
{renderRect.x+renderRect.w, renderRect.y+renderRect.h, uMax, 0},
};
D3D11_BUFFER_DESC vbDesc = {};
vbDesc.ByteWidth = sizeof(verts);
vbDesc.Usage = D3D11_USAGE_IMMUTABLE;
vbDesc.BindFlags = D3D11_BIND_VERTEX_BUFFER;
vbDesc.CPUAccessFlags = 0;
vbDesc.MiscFlags = 0;
vbDesc.StructureByteStride = sizeof(VERTEX);
D3D11_SUBRESOURCE_DATA vbData = {};
vbData.pSysMem = verts;
HRESULT hr = m_Device->CreateBuffer(&vbDesc, &vbData, &m_VideoVertexBuffer);
if (FAILED(hr)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11Device::CreateBuffer() failed: %x",
hr);
return false;
}
}
if (m_BindDecoderOutputTextures) {
// Create SRVs for all textures in the decoder pool
if (!setupTexturePoolViews(framesContext)) {
return false;
}
}
else {
// Create our internal texture to copy and render
if (!setupVideoTexture(framesContext)) {
return false;
}
}
return true;
}
std::vector<DXGI_FORMAT> D3D11VARenderer::getVideoTextureSRVFormats() std::vector<DXGI_FORMAT> D3D11VARenderer::getVideoTextureSRVFormats()
{ {
if (m_DecoderParams.videoFormat & VIDEO_FORMAT_MASK_YUV444) { if (m_DecoderParams.videoFormat & VIDEO_FORMAT_MASK_YUV444) {
@@ -1535,15 +1522,15 @@ std::vector<DXGI_FORMAT> D3D11VARenderer::getVideoTextureSRVFormats()
} }
} }
bool D3D11VARenderer::setupVideoTexture() bool D3D11VARenderer::setupVideoTexture(AVHWFramesContext* framesContext)
{ {
SDL_assert(!m_BindDecoderOutputTextures); SDL_assert(!m_BindDecoderOutputTextures);
HRESULT hr; HRESULT hr;
D3D11_TEXTURE2D_DESC texDesc = {}; D3D11_TEXTURE2D_DESC texDesc = {};
texDesc.Width = m_TextureWidth; texDesc.Width = framesContext->width;
texDesc.Height = m_TextureHeight; texDesc.Height = framesContext->height;
texDesc.MipLevels = 1; texDesc.MipLevels = 1;
texDesc.ArraySize = 1; texDesc.ArraySize = 1;
texDesc.Format = m_TextureFormat; texDesc.Format = m_TextureFormat;
@@ -1586,8 +1573,10 @@ bool D3D11VARenderer::setupVideoTexture()
return true; return true;
} }
bool D3D11VARenderer::setupTexturePoolViews(AVD3D11VAFramesContext* frameContext) bool D3D11VARenderer::setupTexturePoolViews(AVHWFramesContext* framesContext)
{ {
AVD3D11VAFramesContext* d3d11vaFramesContext = (AVD3D11VAFramesContext*)framesContext->hwctx;
SDL_assert(m_BindDecoderOutputTextures); SDL_assert(m_BindDecoderOutputTextures);
D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc = {};
@@ -1596,21 +1585,23 @@ bool D3D11VARenderer::setupTexturePoolViews(AVD3D11VAFramesContext* frameContext
srvDesc.Texture2DArray.MipLevels = 1; srvDesc.Texture2DArray.MipLevels = 1;
srvDesc.Texture2DArray.ArraySize = 1; srvDesc.Texture2DArray.ArraySize = 1;
m_VideoTextureResourceViews.resize(framesContext->initial_pool_size);
// Create luminance and chrominance SRVs for each texture in the pool // Create luminance and chrominance SRVs for each texture in the pool
for (size_t i = 0; i < m_VideoTextureResourceViews.size(); i++) { for (int i = 0; i < framesContext->initial_pool_size; i++) {
HRESULT hr; HRESULT hr;
// Our rendering logic depends on the texture index working to map into our SRV array // Our rendering logic depends on the texture index working to map into our SRV array
SDL_assert(i == (size_t)frameContext->texture_infos[i].index); SDL_assert(i == d3d11vaFramesContext->texture_infos[i].index);
srvDesc.Texture2DArray.FirstArraySlice = frameContext->texture_infos[i].index; srvDesc.Texture2DArray.FirstArraySlice = d3d11vaFramesContext->texture_infos[i].index;
size_t srvIndex = 0; size_t srvIndex = 0;
for (DXGI_FORMAT srvFormat : getVideoTextureSRVFormats()) { for (DXGI_FORMAT srvFormat : getVideoTextureSRVFormats()) {
SDL_assert(srvIndex < m_VideoTextureResourceViews[i].size()); SDL_assert(srvIndex < m_VideoTextureResourceViews[i].size());
srvDesc.Format = srvFormat; srvDesc.Format = srvFormat;
hr = m_Device->CreateShaderResourceView(frameContext->texture_infos[i].texture, hr = m_Device->CreateShaderResourceView(d3d11vaFramesContext->texture_infos[i].texture,
&srvDesc, &srvDesc,
&m_VideoTextureResourceViews[i][srvIndex]); &m_VideoTextureResourceViews[i][srvIndex]);
if (FAILED(hr)) { if (FAILED(hr)) {

View File

@@ -40,8 +40,9 @@ private:
bool setupRenderingResources(); bool setupRenderingResources();
std::vector<DXGI_FORMAT> getVideoTextureSRVFormats(); std::vector<DXGI_FORMAT> getVideoTextureSRVFormats();
bool setupVideoTexture(); // for !m_BindDecoderOutputTextures bool setupFrameRenderingResources(AVHWFramesContext* framesContext);
bool setupTexturePoolViews(AVD3D11VAFramesContext* frameContext); // for m_BindDecoderOutputTextures bool setupVideoTexture(AVHWFramesContext* framesContext); // for !m_BindDecoderOutputTextures
bool setupTexturePoolViews(AVHWFramesContext* framesContext); // for m_BindDecoderOutputTextures
void renderOverlay(Overlay::OverlayType type); void renderOverlay(Overlay::OverlayType type);
void bindColorConversion(AVFrame* frame); void bindColorConversion(AVFrame* frame);
void renderVideo(AVFrame* frame); void renderVideo(AVFrame* frame);
@@ -76,10 +77,7 @@ private:
bool m_BindDecoderOutputTextures; bool m_BindDecoderOutputTextures;
DECODER_PARAMETERS m_DecoderParams; DECODER_PARAMETERS m_DecoderParams;
int m_TextureAlignment;
DXGI_FORMAT m_TextureFormat; DXGI_FORMAT m_TextureFormat;
UINT m_TextureWidth;
UINT m_TextureHeight;
int m_DisplayWidth; int m_DisplayWidth;
int m_DisplayHeight; int m_DisplayHeight;
AVColorTransferCharacteristic m_LastColorTrc; AVColorTransferCharacteristic m_LastColorTrc;
@@ -93,8 +91,7 @@ private:
Microsoft::WRL::ComPtr<ID3D11Texture2D> m_VideoTexture; Microsoft::WRL::ComPtr<ID3D11Texture2D> m_VideoTexture;
// Only index 0 is valid if !m_BindDecoderOutputTextures // Only index 0 is valid if !m_BindDecoderOutputTextures
#define DECODER_BUFFER_POOL_SIZE 17 std::vector<std::array<Microsoft::WRL::ComPtr<ID3D11ShaderResourceView>, 2>> m_VideoTextureResourceViews;
std::array<std::array<Microsoft::WRL::ComPtr<ID3D11ShaderResourceView>, 2>, DECODER_BUFFER_POOL_SIZE> m_VideoTextureResourceViews;
SDL_SpinLock m_OverlayLock; SDL_SpinLock m_OverlayLock;
std::array<Microsoft::WRL::ComPtr<ID3D11Buffer>, Overlay::OverlayMax> m_OverlayVertexBuffers; std::array<Microsoft::WRL::ComPtr<ID3D11Buffer>, Overlay::OverlayMax> m_OverlayVertexBuffers;
@@ -103,6 +100,5 @@ private:
Microsoft::WRL::ComPtr<ID3D11PixelShader> m_OverlayPixelShader; Microsoft::WRL::ComPtr<ID3D11PixelShader> m_OverlayPixelShader;
AVBufferRef* m_HwDeviceContext; AVBufferRef* m_HwDeviceContext;
AVBufferRef* m_HwFramesContext;
}; };