diff --git a/app/resources.qrc b/app/resources.qrc
index 9401fcd5..a56a6fa0 100644
--- a/app/resources.qrc
+++ b/app/resources.qrc
@@ -62,5 +62,7 @@
shaders/d3d11_vertex.fxc
shaders/d3d11_overlay_pixel.fxc
shaders/d3d11_video_pixel.fxc
+ shaders/d3d11_bt601lim_pixel.fxc
+ shaders/d3d11_bt2020lim_pixel.fxc
diff --git a/app/shaders/build_hlsl.bat b/app/shaders/build_hlsl.bat
index 2398134e..bf915204 100644
--- a/app/shaders/build_hlsl.bat
+++ b/app/shaders/build_hlsl.bat
@@ -1,4 +1,6 @@
fxc /T vs_4_0_level_9_3 /Fo d3d11_vertex.fxc d3d11_vertex.hlsl
fxc /T ps_4_0_level_9_3 /Fo d3d11_overlay_pixel.fxc d3d11_overlay_pixel.hlsl
-fxc /T ps_4_0_level_9_3 /Fo d3d11_video_pixel.fxc d3d11_video_pixel.hlsl
\ No newline at end of file
+fxc /T ps_4_0_level_9_3 /Fo d3d11_video_pixel.fxc d3d11_video_pixel.hlsl
+fxc /T ps_4_0_level_9_3 /Fo d3d11_bt601lim_pixel.fxc d3d11_bt601lim_pixel.hlsl
+fxc /T ps_4_0_level_9_3 /Fo d3d11_bt2020lim_pixel.fxc d3d11_bt2020lim_pixel.hlsl
\ No newline at end of file
diff --git a/app/shaders/d3d11_bt2020lim_pixel.fxc b/app/shaders/d3d11_bt2020lim_pixel.fxc
new file mode 100644
index 00000000..3bef4a80
Binary files /dev/null and b/app/shaders/d3d11_bt2020lim_pixel.fxc differ
diff --git a/app/shaders/d3d11_bt2020lim_pixel.hlsl b/app/shaders/d3d11_bt2020lim_pixel.hlsl
new file mode 100644
index 00000000..2fefd187
--- /dev/null
+++ b/app/shaders/d3d11_bt2020lim_pixel.hlsl
@@ -0,0 +1,35 @@
+Texture2D luminancePlane : register(t0);
+Texture2D chrominancePlane : register(t1);
+SamplerState theSampler : register(s0);
+
+static const min16float3x3 cscMatrix =
+{
+ 1.1644, 1.1644, 1.1644,
+ 0.0, -0.1874, 2.1418,
+ 1.6781, -0.6505, 0.0,
+};
+
+static const min16float3 offsets =
+{
+ 16.0 / 255.0, 128.0 / 255.0, 128.0 / 255.0
+};
+
+struct ShaderInput
+{
+ min16float4 pos : SV_POSITION;
+ min16float2 tex : TEXCOORD0;
+};
+
+min16float4 main(ShaderInput input) : SV_TARGET
+{
+ min16float3 yuv = min16float3(luminancePlane.Sample(theSampler, input.tex),
+ chrominancePlane.Sample(theSampler, input.tex));
+
+ // Subtract the YUV offset for limited vs full range
+ yuv -= offsets;
+
+ // Multiply by the conversion matrix for this colorspace
+ yuv = mul(yuv, cscMatrix);
+
+ return min16float4(yuv, 1.0);
+}
\ No newline at end of file
diff --git a/app/shaders/d3d11_bt601lim_pixel.fxc b/app/shaders/d3d11_bt601lim_pixel.fxc
new file mode 100644
index 00000000..530f4e9a
Binary files /dev/null and b/app/shaders/d3d11_bt601lim_pixel.fxc differ
diff --git a/app/shaders/d3d11_bt601lim_pixel.hlsl b/app/shaders/d3d11_bt601lim_pixel.hlsl
new file mode 100644
index 00000000..723fa311
--- /dev/null
+++ b/app/shaders/d3d11_bt601lim_pixel.hlsl
@@ -0,0 +1,35 @@
+Texture2D luminancePlane : register(t0);
+Texture2D chrominancePlane : register(t1);
+SamplerState theSampler : register(s0);
+
+static const min16float3x3 cscMatrix =
+{
+ 1.1644, 1.1644, 1.1644,
+ 0.0, -0.3917, 2.0172,
+ 1.5960, -0.8129, 0.0,
+};
+
+static const min16float3 offsets =
+{
+ 16.0 / 255.0, 128.0 / 255.0, 128.0 / 255.0
+};
+
+struct ShaderInput
+{
+ min16float4 pos : SV_POSITION;
+ min16float2 tex : TEXCOORD0;
+};
+
+min16float4 main(ShaderInput input) : SV_TARGET
+{
+ min16float3 yuv = min16float3(luminancePlane.Sample(theSampler, input.tex),
+ chrominancePlane.Sample(theSampler, input.tex));
+
+ // Subtract the YUV offset for limited vs full range
+ yuv -= offsets;
+
+ // Multiply by the conversion matrix for this colorspace
+ yuv = mul(yuv, cscMatrix);
+
+ return min16float4(yuv, 1.0);
+}
\ No newline at end of file
diff --git a/app/shaders/d3d11_video_pixel.fxc b/app/shaders/d3d11_video_pixel.fxc
index 126e9474..ecfd94ba 100644
Binary files a/app/shaders/d3d11_video_pixel.fxc and b/app/shaders/d3d11_video_pixel.fxc differ
diff --git a/app/shaders/d3d11_video_pixel.hlsl b/app/shaders/d3d11_video_pixel.hlsl
index f2b1f9df..350ed400 100644
--- a/app/shaders/d3d11_video_pixel.hlsl
+++ b/app/shaders/d3d11_video_pixel.hlsl
@@ -25,5 +25,5 @@ min16float4 main(ShaderInput input) : SV_TARGET
// Multiply by the conversion matrix for this colorspace
yuv = mul(yuv, cscMatrix);
- return min16float4(saturate(yuv), 1.0);
+ return min16float4(yuv, 1.0);
}
\ No newline at end of file
diff --git a/app/streaming/video/ffmpeg-renderers/d3d11va.cpp b/app/streaming/video/ffmpeg-renderers/d3d11va.cpp
index ec85fde8..3b546e71 100644
--- a/app/streaming/video/ffmpeg-renderers/d3d11va.cpp
+++ b/app/streaming/video/ffmpeg-renderers/d3d11va.cpp
@@ -81,7 +81,9 @@ D3D11VARenderer::D3D11VARenderer()
m_LastColorRange(AVCOL_RANGE_UNSPECIFIED),
m_AllowTearing(false),
m_FrameWaitableObject(nullptr),
- m_VideoPixelShader(nullptr),
+ m_VideoGenericPixelShader(nullptr),
+ m_VideoBt601LimPixelShader(nullptr),
+ m_VideoBt2020LimPixelShader(nullptr),
m_VideoVertexBuffer(nullptr),
m_OverlayLock(0),
m_OverlayPixelShader(nullptr),
@@ -101,7 +103,9 @@ D3D11VARenderer::~D3D11VARenderer()
SDL_DestroyMutex(m_ContextLock);
SAFE_COM_RELEASE(m_VideoVertexBuffer);
- SAFE_COM_RELEASE(m_VideoPixelShader);
+ SAFE_COM_RELEASE(m_VideoBt2020LimPixelShader);
+ SAFE_COM_RELEASE(m_VideoBt601LimPixelShader);
+ SAFE_COM_RELEASE(m_VideoGenericPixelShader);
for (int i = 0; i < ARRAYSIZE(m_VideoTextureResourceViews); i++) {
SAFE_COM_RELEASE(m_VideoTextureResourceViews[i][0]);
@@ -734,71 +738,88 @@ void D3D11VARenderer::renderOverlay(Overlay::OverlayType type)
overlayVertexBuffer->Release();
}
-void D3D11VARenderer::updateColorConversionConstants(AVFrame* frame)
+void D3D11VARenderer::bindColorConversion(AVFrame* frame)
{
- // If nothing has changed since last frame, we're done
- if (frame->colorspace == m_LastColorSpace && frame->color_range == m_LastColorRange) {
- return;
+ // We have purpose-built shaders for the common Rec 601 (SDR) and Rec 2020 (HDR) cases
+ if (frame->color_range == AVCOL_RANGE_MPEG && frame->colorspace == AVCOL_SPC_SMPTE170M) {
+ m_DeviceContext->PSSetShader(m_VideoBt601LimPixelShader, nullptr, 0);
}
-
- D3D11_BUFFER_DESC constDesc = {};
- constDesc.ByteWidth = sizeof(CSC_CONST_BUF);
- constDesc.Usage = D3D11_USAGE_IMMUTABLE;
- constDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
- constDesc.CPUAccessFlags = 0;
- constDesc.MiscFlags = 0;
-
- // This handles the case where the color range is unknown,
- // so that we use Limited color range which is the default
- // behavior for Moonlight.
- CSC_CONST_BUF constBuf = {};
- bool fullRange = (frame->color_range == AVCOL_RANGE_JPEG);
- const float* rawCscMatrix;
- switch (frame->colorspace) {
- case AVCOL_SPC_SMPTE170M:
- case AVCOL_SPC_BT470BG:
- rawCscMatrix = fullRange ? k_CscMatrix_Bt601Full : k_CscMatrix_Bt601Lim;
- break;
- case AVCOL_SPC_BT709:
- rawCscMatrix = fullRange ? k_CscMatrix_Bt709Full : k_CscMatrix_Bt709Lim;
- break;
- case AVCOL_SPC_BT2020_NCL:
- case AVCOL_SPC_BT2020_CL:
- rawCscMatrix = fullRange ? k_CscMatrix_Bt2020Full : k_CscMatrix_Bt2020Lim;
- break;
- default:
- SDL_assert(false);
- return;
- }
-
- // We need to adjust our raw CSC matrix to be column-major and with float3 vectors
- // padded with a float in between each of them to adhere to HLSL requirements.
- for (int i = 0; i < 3; i++) {
- for (int j = 0; j < 3; j++) {
- constBuf.cscMatrix[i * 4 + j] = rawCscMatrix[j * 3 + i];
- }
- }
-
- // No adjustments are needed to the float[3] array of offsets, so it can just
- // be copied with memcpy().
- memcpy(constBuf.offsets,
- fullRange ? k_Offsets_Full : k_Offsets_Lim,
- sizeof(constBuf.offsets));
-
- D3D11_SUBRESOURCE_DATA constData = {};
- constData.pSysMem = &constBuf;
-
- ID3D11Buffer* constantBuffer;
- HRESULT hr = m_Device->CreateBuffer(&constDesc, &constData, &constantBuffer);
- if (SUCCEEDED(hr)) {
- m_DeviceContext->PSSetConstantBuffers(0, 1, &constantBuffer);
- constantBuffer->Release();
+ else if (frame->color_range == AVCOL_RANGE_MPEG && frame->colorspace == AVCOL_SPC_BT2020_NCL) {
+ m_DeviceContext->PSSetShader(m_VideoBt2020LimPixelShader, nullptr, 0);
}
else {
- SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
- "ID3D11Device::CreateBuffer() failed: %x",
- hr);
- return;
+ // We'll need to use the generic shader for this colorspace and color range combo
+ m_DeviceContext->PSSetShader(m_VideoGenericPixelShader, nullptr, 0);
+
+ // If nothing has changed since last frame, we're done
+ if (frame->colorspace == m_LastColorSpace && frame->color_range == m_LastColorRange) {
+ return;
+ }
+
+ SDL_LogWarn(SDL_LOG_CATEGORY_APPLICATION,
+ "Falling back to generic video pixel shader for %d:%d",
+ frame->colorspace,
+ frame->color_range);
+
+ D3D11_BUFFER_DESC constDesc = {};
+ constDesc.ByteWidth = sizeof(CSC_CONST_BUF);
+ constDesc.Usage = D3D11_USAGE_IMMUTABLE;
+ constDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
+ constDesc.CPUAccessFlags = 0;
+ constDesc.MiscFlags = 0;
+
+ // This handles the case where the color range is unknown,
+ // so that we use Limited color range which is the default
+ // behavior for Moonlight.
+ CSC_CONST_BUF constBuf = {};
+ bool fullRange = (frame->color_range == AVCOL_RANGE_JPEG);
+ const float* rawCscMatrix;
+ switch (frame->colorspace) {
+ case AVCOL_SPC_SMPTE170M:
+ case AVCOL_SPC_BT470BG:
+ rawCscMatrix = fullRange ? k_CscMatrix_Bt601Full : k_CscMatrix_Bt601Lim;
+ break;
+ case AVCOL_SPC_BT709:
+ rawCscMatrix = fullRange ? k_CscMatrix_Bt709Full : k_CscMatrix_Bt709Lim;
+ break;
+ case AVCOL_SPC_BT2020_NCL:
+ case AVCOL_SPC_BT2020_CL:
+ rawCscMatrix = fullRange ? k_CscMatrix_Bt2020Full : k_CscMatrix_Bt2020Lim;
+ break;
+ default:
+ SDL_assert(false);
+ return;
+ }
+
+ // We need to adjust our raw CSC matrix to be column-major and with float3 vectors
+ // padded with a float in between each of them to adhere to HLSL requirements.
+ for (int i = 0; i < 3; i++) {
+ for (int j = 0; j < 3; j++) {
+ constBuf.cscMatrix[i * 4 + j] = rawCscMatrix[j * 3 + i];
+ }
+ }
+
+ // No adjustments are needed to the float[3] array of offsets, so it can just
+ // be copied with memcpy().
+ memcpy(constBuf.offsets,
+ fullRange ? k_Offsets_Full : k_Offsets_Lim,
+ sizeof(constBuf.offsets));
+
+ D3D11_SUBRESOURCE_DATA constData = {};
+ constData.pSysMem = &constBuf;
+
+ ID3D11Buffer* constantBuffer;
+ HRESULT hr = m_Device->CreateBuffer(&constDesc, &constData, &constantBuffer);
+ if (SUCCEEDED(hr)) {
+ m_DeviceContext->PSSetConstantBuffers(0, 1, &constantBuffer);
+ constantBuffer->Release();
+ }
+ else {
+ SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
+ "ID3D11Device::CreateBuffer() failed: %x",
+ hr);
+ return;
+ }
}
m_LastColorSpace = frame->colorspace;
@@ -807,9 +828,6 @@ void D3D11VARenderer::updateColorConversionConstants(AVFrame* frame)
void D3D11VARenderer::renderVideo(AVFrame* frame)
{
- // Update our CSC constants if the colorspace has changed
- updateColorConversionConstants(frame);
-
// Bind video rendering vertex buffer
UINT stride = sizeof(VERTEX);
UINT offset = 0;
@@ -826,8 +844,10 @@ void D3D11VARenderer::renderVideo(AVFrame* frame)
return;
}
- // Bind video pixel shader and SRVs for this frame
- m_DeviceContext->PSSetShader(m_VideoPixelShader, nullptr, 0);
+ // Bind our CSC shader (and constant buffer, if required)
+ bindColorConversion(frame);
+
+ // Bind SRVs for this frame
m_DeviceContext->PSSetShaderResources(0, 2, m_VideoTextureResourceViews[textureIndex]);
// Draw the video
@@ -1140,7 +1160,31 @@ bool D3D11VARenderer::setupRenderingResources()
{
QByteArray videoPixelShaderBytecode = Path::readDataFile("d3d11_video_pixel.fxc");
- hr = m_Device->CreatePixelShader(videoPixelShaderBytecode.constData(), videoPixelShaderBytecode.length(), nullptr, &m_VideoPixelShader);
+ hr = m_Device->CreatePixelShader(videoPixelShaderBytecode.constData(), videoPixelShaderBytecode.length(), nullptr, &m_VideoGenericPixelShader);
+ if (FAILED(hr)) {
+ SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
+ "ID3D11Device::CreatePixelShader() failed: %x",
+ hr);
+ return false;
+ }
+ }
+
+ {
+ QByteArray videoPixelShaderBytecode = Path::readDataFile("d3d11_bt601lim_pixel.fxc");
+
+ hr = m_Device->CreatePixelShader(videoPixelShaderBytecode.constData(), videoPixelShaderBytecode.length(), nullptr, &m_VideoBt601LimPixelShader);
+ if (FAILED(hr)) {
+ SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
+ "ID3D11Device::CreatePixelShader() failed: %x",
+ hr);
+ return false;
+ }
+ }
+
+ {
+ QByteArray videoPixelShaderBytecode = Path::readDataFile("d3d11_bt2020lim_pixel.fxc");
+
+ hr = m_Device->CreatePixelShader(videoPixelShaderBytecode.constData(), videoPixelShaderBytecode.length(), nullptr, &m_VideoBt2020LimPixelShader);
if (FAILED(hr)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"ID3D11Device::CreatePixelShader() failed: %x",
diff --git a/app/streaming/video/ffmpeg-renderers/d3d11va.h b/app/streaming/video/ffmpeg-renderers/d3d11va.h
index 195d2501..0c746672 100644
--- a/app/streaming/video/ffmpeg-renderers/d3d11va.h
+++ b/app/streaming/video/ffmpeg-renderers/d3d11va.h
@@ -30,7 +30,7 @@ private:
bool setupRenderingResources();
bool setupTexturePoolViews(AVD3D11VAFramesContext* frameContext);
void renderOverlay(Overlay::OverlayType type);
- void updateColorConversionConstants(AVFrame* frame);
+ void bindColorConversion(AVFrame* frame);
void renderVideo(AVFrame* frame);
bool checkDecoderSupport(IDXGIAdapter* adapter);
@@ -52,7 +52,9 @@ private:
bool m_AllowTearing;
HANDLE m_FrameWaitableObject;
- ID3D11PixelShader* m_VideoPixelShader;
+ ID3D11PixelShader* m_VideoGenericPixelShader;
+ ID3D11PixelShader* m_VideoBt601LimPixelShader;
+ ID3D11PixelShader* m_VideoBt2020LimPixelShader;
ID3D11Buffer* m_VideoVertexBuffer;
#define DECODER_BUFFER_POOL_SIZE 17