From 9a64c026ea5580c23a0e5ef79ab66f8ceabd935d Mon Sep 17 00:00:00 2001 From: Cameron Gutman Date: Wed, 9 Feb 2022 20:46:02 -0600 Subject: [PATCH] Use optimized pixel shaders for the normal SDR and HDR colorspaces --- app/resources.qrc | 2 + app/shaders/build_hlsl.bat | 4 +- app/shaders/d3d11_bt2020lim_pixel.fxc | Bin 0 -> 1352 bytes app/shaders/d3d11_bt2020lim_pixel.hlsl | 35 ++++ app/shaders/d3d11_bt601lim_pixel.fxc | Bin 0 -> 1352 bytes app/shaders/d3d11_bt601lim_pixel.hlsl | 35 ++++ app/shaders/d3d11_video_pixel.fxc | Bin 1456 -> 1456 bytes app/shaders/d3d11_video_pixel.hlsl | 2 +- .../video/ffmpeg-renderers/d3d11va.cpp | 182 +++++++++++------- .../video/ffmpeg-renderers/d3d11va.h | 6 +- 10 files changed, 193 insertions(+), 73 deletions(-) create mode 100644 app/shaders/d3d11_bt2020lim_pixel.fxc create mode 100644 app/shaders/d3d11_bt2020lim_pixel.hlsl create mode 100644 app/shaders/d3d11_bt601lim_pixel.fxc create mode 100644 app/shaders/d3d11_bt601lim_pixel.hlsl diff --git a/app/resources.qrc b/app/resources.qrc index 9401fcd5..a56a6fa0 100644 --- a/app/resources.qrc +++ b/app/resources.qrc @@ -62,5 +62,7 @@ shaders/d3d11_vertex.fxc shaders/d3d11_overlay_pixel.fxc shaders/d3d11_video_pixel.fxc + shaders/d3d11_bt601lim_pixel.fxc + shaders/d3d11_bt2020lim_pixel.fxc diff --git a/app/shaders/build_hlsl.bat b/app/shaders/build_hlsl.bat index 2398134e..bf915204 100644 --- a/app/shaders/build_hlsl.bat +++ b/app/shaders/build_hlsl.bat @@ -1,4 +1,6 @@ fxc /T vs_4_0_level_9_3 /Fo d3d11_vertex.fxc d3d11_vertex.hlsl fxc /T ps_4_0_level_9_3 /Fo d3d11_overlay_pixel.fxc d3d11_overlay_pixel.hlsl -fxc /T ps_4_0_level_9_3 /Fo d3d11_video_pixel.fxc d3d11_video_pixel.hlsl \ No newline at end of file +fxc /T ps_4_0_level_9_3 /Fo d3d11_video_pixel.fxc d3d11_video_pixel.hlsl +fxc /T ps_4_0_level_9_3 /Fo d3d11_bt601lim_pixel.fxc d3d11_bt601lim_pixel.hlsl +fxc /T ps_4_0_level_9_3 /Fo d3d11_bt2020lim_pixel.fxc d3d11_bt2020lim_pixel.hlsl \ No newline at end of file diff --git a/app/shaders/d3d11_bt2020lim_pixel.fxc b/app/shaders/d3d11_bt2020lim_pixel.fxc new file mode 100644 index 0000000000000000000000000000000000000000..3bef4a80a5b3e00b5c456aeda5e84f3ac8ef4991 GIT binary patch literal 1352 zcmZuwziU%b6#njeX`9+Yf-Xe_gNRft(uxRznAbK*6R1tidzIi~(;7)7O-hWL$g7}r z5(jl~${+}4=^x;ax#-ZLI6FiT>D1LBe&_vYZbNT4x##=Ncg{Wc<|I{|nLRPI=kC1! zyZ`IW^T}8Hrxz4}>;N#t{RZtN3CjdN(|*uCM1WDor|Y%rDaOQLbh~5RC+Pa$;!VK7 z`2Yh-KGfX?m>p|vy6%@v7vBIkDOtiR^0AS($$#H}dR)jG0l)J*`SsG)1?KpAGvKXL z!m?;pv5=Z1pJkEOZ&On(x9wdBBhzjq+dms&r9H7T%w4BUS zzkYb8u9ceoTH!+NHIB=QR(DYKdZB8xp~W&r?|j1o!a3OVW`UDI{YkC&7h109`{#pQ zkDk-?{q;q!&yCcd(e=mF*XN0M8+@ke<&9-#Z}O>aXWD5J4b$X(Wy#LJ$l>zd=@1=t z!W^AFj??3zZ)b9ooI3=+8Vad&W;YCS=U)|P1V=bwY~mx`Cp6*vm-HyMMU(RfXmW=x z7dI~^@prWTc$h0W%}UuWt#4Gz4OAbmuhmMm)$(GsR4Ze((x@LHZmq30>W}IVnu+nk z#YA@Ao=@17(*1HHF luminancePlane : register(t0); +Texture2D chrominancePlane : register(t1); +SamplerState theSampler : register(s0); + +static const min16float3x3 cscMatrix = +{ + 1.1644, 1.1644, 1.1644, + 0.0, -0.1874, 2.1418, + 1.6781, -0.6505, 0.0, +}; + +static const min16float3 offsets = +{ + 16.0 / 255.0, 128.0 / 255.0, 128.0 / 255.0 +}; + +struct ShaderInput +{ + min16float4 pos : SV_POSITION; + min16float2 tex : TEXCOORD0; +}; + +min16float4 main(ShaderInput input) : SV_TARGET +{ + min16float3 yuv = min16float3(luminancePlane.Sample(theSampler, input.tex), + chrominancePlane.Sample(theSampler, input.tex)); + + // Subtract the YUV offset for limited vs full range + yuv -= offsets; + + // Multiply by the conversion matrix for this colorspace + yuv = mul(yuv, cscMatrix); + + return min16float4(yuv, 1.0); +} \ No newline at end of file diff --git a/app/shaders/d3d11_bt601lim_pixel.fxc b/app/shaders/d3d11_bt601lim_pixel.fxc new file mode 100644 index 0000000000000000000000000000000000000000..530f4e9af39e98f59c9c9450b69c450b8359aebc GIT binary patch literal 1352 zcmZuw&ubG=5dPk7+D2MP@RW*RP!AP~wjzom?Y2$Q1geSItpqP^T2oTVj}q%mWa~ja zc<`dF!P5ln~^)>Dyz%*cl!|# zQI?|rPcU-7w4>xG%OKWu4Prhg;>9LolN`Til$h@$8}oYhdVrlxmv%|80Mw{YqDrxc ziPbnGpp-xaTfV=%+>coTiPz$YT#v`uPo6VS;fzN~KW2z3@56f_4U8V4k7u-+%nQGN zc&5&ko`YP$Lhenj%bM1ASoLQiYpkW^GDiP=!vexR*ztCOn?e3rZT44Mt?&o;gHw-P z)cAw_g|F|8^q<%Hr}WqNiFX@*sqy_yb7yDxsBLG?*#>ZiD$iF$EI*NB@t)IhbkYfH zbo4Y#&xgL9$xU(Z5WF=MQ|ZiMn55;Wit`djxM3XPGu`J@!3RZV6n#yV^M|N%haQE- z%SrwNbub^+O3ik)VwdY%wMq-MXZ6iSxv^d;)XI$t)~l`N3EJebh4Z vSo`5hA#dlLT>d_sbZH@ luminancePlane : register(t0); +Texture2D chrominancePlane : register(t1); +SamplerState theSampler : register(s0); + +static const min16float3x3 cscMatrix = +{ + 1.1644, 1.1644, 1.1644, + 0.0, -0.3917, 2.0172, + 1.5960, -0.8129, 0.0, +}; + +static const min16float3 offsets = +{ + 16.0 / 255.0, 128.0 / 255.0, 128.0 / 255.0 +}; + +struct ShaderInput +{ + min16float4 pos : SV_POSITION; + min16float2 tex : TEXCOORD0; +}; + +min16float4 main(ShaderInput input) : SV_TARGET +{ + min16float3 yuv = min16float3(luminancePlane.Sample(theSampler, input.tex), + chrominancePlane.Sample(theSampler, input.tex)); + + // Subtract the YUV offset for limited vs full range + yuv -= offsets; + + // Multiply by the conversion matrix for this colorspace + yuv = mul(yuv, cscMatrix); + + return min16float4(yuv, 1.0); +} \ No newline at end of file diff --git a/app/shaders/d3d11_video_pixel.fxc b/app/shaders/d3d11_video_pixel.fxc index 126e947417b14d8bd3206a3c542ea6d97e0284ff..ecfd94ba4428fd67929a49acff4fe4e7549a0809 100644 GIT binary patch delta 100 zcmdnMy@6ZACBn)1o8h+^1|L(E<7Q+$4Sjf|dZNfHMaBjOhbJJsfP;a7nbCm>%x8r2 oSvJcv+A%UROpasHW?^99R+^m1q&#^86Dx@01mbMI!*qoS0OcqicmMzZ delta 100 zcmdnMy@6ZACBn%$xkdCCXEwJ7XKmu^>^XZ2CyKmM6l`E{cml!;I2ah186AYcd`385 nWV1Y@9V4T{Release(); } -void D3D11VARenderer::updateColorConversionConstants(AVFrame* frame) +void D3D11VARenderer::bindColorConversion(AVFrame* frame) { - // If nothing has changed since last frame, we're done - if (frame->colorspace == m_LastColorSpace && frame->color_range == m_LastColorRange) { - return; + // We have purpose-built shaders for the common Rec 601 (SDR) and Rec 2020 (HDR) cases + if (frame->color_range == AVCOL_RANGE_MPEG && frame->colorspace == AVCOL_SPC_SMPTE170M) { + m_DeviceContext->PSSetShader(m_VideoBt601LimPixelShader, nullptr, 0); } - - D3D11_BUFFER_DESC constDesc = {}; - constDesc.ByteWidth = sizeof(CSC_CONST_BUF); - constDesc.Usage = D3D11_USAGE_IMMUTABLE; - constDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; - constDesc.CPUAccessFlags = 0; - constDesc.MiscFlags = 0; - - // This handles the case where the color range is unknown, - // so that we use Limited color range which is the default - // behavior for Moonlight. - CSC_CONST_BUF constBuf = {}; - bool fullRange = (frame->color_range == AVCOL_RANGE_JPEG); - const float* rawCscMatrix; - switch (frame->colorspace) { - case AVCOL_SPC_SMPTE170M: - case AVCOL_SPC_BT470BG: - rawCscMatrix = fullRange ? k_CscMatrix_Bt601Full : k_CscMatrix_Bt601Lim; - break; - case AVCOL_SPC_BT709: - rawCscMatrix = fullRange ? k_CscMatrix_Bt709Full : k_CscMatrix_Bt709Lim; - break; - case AVCOL_SPC_BT2020_NCL: - case AVCOL_SPC_BT2020_CL: - rawCscMatrix = fullRange ? k_CscMatrix_Bt2020Full : k_CscMatrix_Bt2020Lim; - break; - default: - SDL_assert(false); - return; - } - - // We need to adjust our raw CSC matrix to be column-major and with float3 vectors - // padded with a float in between each of them to adhere to HLSL requirements. - for (int i = 0; i < 3; i++) { - for (int j = 0; j < 3; j++) { - constBuf.cscMatrix[i * 4 + j] = rawCscMatrix[j * 3 + i]; - } - } - - // No adjustments are needed to the float[3] array of offsets, so it can just - // be copied with memcpy(). - memcpy(constBuf.offsets, - fullRange ? k_Offsets_Full : k_Offsets_Lim, - sizeof(constBuf.offsets)); - - D3D11_SUBRESOURCE_DATA constData = {}; - constData.pSysMem = &constBuf; - - ID3D11Buffer* constantBuffer; - HRESULT hr = m_Device->CreateBuffer(&constDesc, &constData, &constantBuffer); - if (SUCCEEDED(hr)) { - m_DeviceContext->PSSetConstantBuffers(0, 1, &constantBuffer); - constantBuffer->Release(); + else if (frame->color_range == AVCOL_RANGE_MPEG && frame->colorspace == AVCOL_SPC_BT2020_NCL) { + m_DeviceContext->PSSetShader(m_VideoBt2020LimPixelShader, nullptr, 0); } else { - SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, - "ID3D11Device::CreateBuffer() failed: %x", - hr); - return; + // We'll need to use the generic shader for this colorspace and color range combo + m_DeviceContext->PSSetShader(m_VideoGenericPixelShader, nullptr, 0); + + // If nothing has changed since last frame, we're done + if (frame->colorspace == m_LastColorSpace && frame->color_range == m_LastColorRange) { + return; + } + + SDL_LogWarn(SDL_LOG_CATEGORY_APPLICATION, + "Falling back to generic video pixel shader for %d:%d", + frame->colorspace, + frame->color_range); + + D3D11_BUFFER_DESC constDesc = {}; + constDesc.ByteWidth = sizeof(CSC_CONST_BUF); + constDesc.Usage = D3D11_USAGE_IMMUTABLE; + constDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; + constDesc.CPUAccessFlags = 0; + constDesc.MiscFlags = 0; + + // This handles the case where the color range is unknown, + // so that we use Limited color range which is the default + // behavior for Moonlight. + CSC_CONST_BUF constBuf = {}; + bool fullRange = (frame->color_range == AVCOL_RANGE_JPEG); + const float* rawCscMatrix; + switch (frame->colorspace) { + case AVCOL_SPC_SMPTE170M: + case AVCOL_SPC_BT470BG: + rawCscMatrix = fullRange ? k_CscMatrix_Bt601Full : k_CscMatrix_Bt601Lim; + break; + case AVCOL_SPC_BT709: + rawCscMatrix = fullRange ? k_CscMatrix_Bt709Full : k_CscMatrix_Bt709Lim; + break; + case AVCOL_SPC_BT2020_NCL: + case AVCOL_SPC_BT2020_CL: + rawCscMatrix = fullRange ? k_CscMatrix_Bt2020Full : k_CscMatrix_Bt2020Lim; + break; + default: + SDL_assert(false); + return; + } + + // We need to adjust our raw CSC matrix to be column-major and with float3 vectors + // padded with a float in between each of them to adhere to HLSL requirements. + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 3; j++) { + constBuf.cscMatrix[i * 4 + j] = rawCscMatrix[j * 3 + i]; + } + } + + // No adjustments are needed to the float[3] array of offsets, so it can just + // be copied with memcpy(). + memcpy(constBuf.offsets, + fullRange ? k_Offsets_Full : k_Offsets_Lim, + sizeof(constBuf.offsets)); + + D3D11_SUBRESOURCE_DATA constData = {}; + constData.pSysMem = &constBuf; + + ID3D11Buffer* constantBuffer; + HRESULT hr = m_Device->CreateBuffer(&constDesc, &constData, &constantBuffer); + if (SUCCEEDED(hr)) { + m_DeviceContext->PSSetConstantBuffers(0, 1, &constantBuffer); + constantBuffer->Release(); + } + else { + SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, + "ID3D11Device::CreateBuffer() failed: %x", + hr); + return; + } } m_LastColorSpace = frame->colorspace; @@ -807,9 +828,6 @@ void D3D11VARenderer::updateColorConversionConstants(AVFrame* frame) void D3D11VARenderer::renderVideo(AVFrame* frame) { - // Update our CSC constants if the colorspace has changed - updateColorConversionConstants(frame); - // Bind video rendering vertex buffer UINT stride = sizeof(VERTEX); UINT offset = 0; @@ -826,8 +844,10 @@ void D3D11VARenderer::renderVideo(AVFrame* frame) return; } - // Bind video pixel shader and SRVs for this frame - m_DeviceContext->PSSetShader(m_VideoPixelShader, nullptr, 0); + // Bind our CSC shader (and constant buffer, if required) + bindColorConversion(frame); + + // Bind SRVs for this frame m_DeviceContext->PSSetShaderResources(0, 2, m_VideoTextureResourceViews[textureIndex]); // Draw the video @@ -1140,7 +1160,31 @@ bool D3D11VARenderer::setupRenderingResources() { QByteArray videoPixelShaderBytecode = Path::readDataFile("d3d11_video_pixel.fxc"); - hr = m_Device->CreatePixelShader(videoPixelShaderBytecode.constData(), videoPixelShaderBytecode.length(), nullptr, &m_VideoPixelShader); + hr = m_Device->CreatePixelShader(videoPixelShaderBytecode.constData(), videoPixelShaderBytecode.length(), nullptr, &m_VideoGenericPixelShader); + if (FAILED(hr)) { + SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, + "ID3D11Device::CreatePixelShader() failed: %x", + hr); + return false; + } + } + + { + QByteArray videoPixelShaderBytecode = Path::readDataFile("d3d11_bt601lim_pixel.fxc"); + + hr = m_Device->CreatePixelShader(videoPixelShaderBytecode.constData(), videoPixelShaderBytecode.length(), nullptr, &m_VideoBt601LimPixelShader); + if (FAILED(hr)) { + SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, + "ID3D11Device::CreatePixelShader() failed: %x", + hr); + return false; + } + } + + { + QByteArray videoPixelShaderBytecode = Path::readDataFile("d3d11_bt2020lim_pixel.fxc"); + + hr = m_Device->CreatePixelShader(videoPixelShaderBytecode.constData(), videoPixelShaderBytecode.length(), nullptr, &m_VideoBt2020LimPixelShader); if (FAILED(hr)) { SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "ID3D11Device::CreatePixelShader() failed: %x", diff --git a/app/streaming/video/ffmpeg-renderers/d3d11va.h b/app/streaming/video/ffmpeg-renderers/d3d11va.h index 195d2501..0c746672 100644 --- a/app/streaming/video/ffmpeg-renderers/d3d11va.h +++ b/app/streaming/video/ffmpeg-renderers/d3d11va.h @@ -30,7 +30,7 @@ private: bool setupRenderingResources(); bool setupTexturePoolViews(AVD3D11VAFramesContext* frameContext); void renderOverlay(Overlay::OverlayType type); - void updateColorConversionConstants(AVFrame* frame); + void bindColorConversion(AVFrame* frame); void renderVideo(AVFrame* frame); bool checkDecoderSupport(IDXGIAdapter* adapter); @@ -52,7 +52,9 @@ private: bool m_AllowTearing; HANDLE m_FrameWaitableObject; - ID3D11PixelShader* m_VideoPixelShader; + ID3D11PixelShader* m_VideoGenericPixelShader; + ID3D11PixelShader* m_VideoBt601LimPixelShader; + ID3D11PixelShader* m_VideoBt2020LimPixelShader; ID3D11Buffer* m_VideoVertexBuffer; #define DECODER_BUFFER_POOL_SIZE 17