From 94943d2865897896e453ceb8b62b51a6558132a4 Mon Sep 17 00:00:00 2001 From: Cameron Gutman Date: Sat, 22 Jun 2024 16:19:26 -0500 Subject: [PATCH] Clamp chroma texcoords to avoid sampling alignment padding This resolves #885 without the massive perf hit on lower end Intel GPUs. --- app/shaders/d3d11_bt2020lim_pixel.fxc | Bin 1340 -> 1548 bytes app/shaders/d3d11_bt601lim_pixel.fxc | Bin 1340 -> 1548 bytes app/shaders/d3d11_genyuv_pixel.fxc | Bin 1444 -> 1632 bytes app/shaders/d3d11_genyuv_pixel.hlsl | 2 +- app/shaders/d3d11_video_pixel_end.hlsli | 3 +- app/shaders/d3d11_video_pixel_start.hlsli | 7 +++- .../video/ffmpeg-renderers/d3d11va.cpp | 35 +++++++++++++++++- 7 files changed, 43 insertions(+), 4 deletions(-) diff --git a/app/shaders/d3d11_bt2020lim_pixel.fxc b/app/shaders/d3d11_bt2020lim_pixel.fxc index d557a9edf09d6e7b1d6de091587613464411d27c..b015bbebb85ddb1f9bc696672ee1558510da44ad 100644 GIT binary patch delta 717 zcmYjPO-NNi6#nMt>GgHt-3+oI>Mha=gP=k!gLi%U9xgHr4YZYyFf>pp`f(GE5VVQl zF$6AL+&~C?n;>Y_53Qt~MGF@#Z_qM=cG2ydxsT{@zcc52^UawvGxtsLNw_vPQyu;M z=hyV@S6>IWH^TD8sq@zafUpDT0LAgiR10q@lNQ*+=R@IW>hNNvb676<>R-?yZ*<0 z6y%e-`C)La2(U3_5T^-{zN|74amlzCL7KQ6d2R%AZY@RJLH(|gUfXFzoDyG>>+XQD zVEo-k)|1#y0>55*jUtz8w9j>%))GyTD~dQE*^bBA0E=cBFg0GA*`pu^aG$sp2rQBqe^1HB`Vshjh&~+A(C+ktXnpUX4!pS%}pct5X@?dRskf}KzgdMDX4KkN~FavUL`c_NbToi~{( z+fWDWZxx6awxExR*S42_$2L`uW9^vkpBr7cv$E8Li)B@CaQtb{lN!mJx}4cXd%_Cmpr{Mzm8@4wm8ja zN=5NXk<(JquwBxyjG#J{pcEpwjiv!3fso%hFA{u@ym&&j zmvD^u@t)-}Fg}u>EHDrNm?u7YBHTk`rj82sM)H|FkbXcNXPom@y|lBc>6JUFN(#L^ z;gA0lw5!A(s78T3R{gB^>K*HA?dq8BudiW5xp7`@HFjSwwqM6ODf~5Pa@6o8`J`{8 iON6$Ey1|YbPuA>&X^|bfU|Q^v>99fSt^BZaslq?S4r-$~?2;o*Jfp7WjWo_GJ=<+lr)Q*))s@4sfc zcbbo%z4^9}p}zjA08nTH+Bse$_DDQc3^WiD%Q>J)Osub6ooCGfGEFaV9AzkQiQYw{ zkT~Cp9q)v0%uLAiYs_Z5i^XFv#(!*o{xEX7Ny4(=dY!R+Nf_D>9y@fL<#XVzT-9+3C2H- zWIu`hIPmkO*C=wCM*CbxNjcUOxu%E$lC5}}3t-bU1#V84=ME`|06Zk_214BCk1Ef5 zm}O^ScSUvL3ea&Gp(iV0=ntu4df3!c-j#v;AB6w8N=x7OgnHW{`)wc#ox{Aj&LgTk z>~(ok2)ii9=wHe~`Y+xKHFlgw$mM6|#+;p1k9yrB;Dilxe~09eQXS+^2TXOwQue{_nv$1oj0#dl@iIPiT&3f z^V3(>-|QdWc>DcOA)>VrqEXm8>?5EBiT1F+vJDeWAs=rwZ%-n}n*xKu0ZfWrx+c98 z{3+=ZDfr5jhJkQ9drRB!Wc2UFpA~NVMnq3j8r7@#~P*?-BKP z699@9MNNN~BK;m?64+6IF+)JnM!^5(KpfmrN74FDNbA3kxLn1>i1h>xxHvGxAr}XM zS&OuY6V^!h8T5tLU_fMK(xn3sG$MM2{Uo9&4~RngOBG0*frmk(5cWgvzZT;(l`bH( zA^eqoE$>#bWL$+9*F#W0;F8iK>_j??yL?&drg~&?Q%Q)FF?#X`-u z;PSglO_zJ=3|!{3ZsH7FK0mz6$hMCw&evJEi`Ir2H@o2A{^1gc#TI`e!{yC44EP+c z#upmu_+zK}hMt|l-IQO7sF+IYztD^|L$ExLeKFUS3UGWnF{aen_N$DD4}Hmt@*-ELETvr%hW&9c2*x0*JUtL@fbfg}g4 zT&+>-CN?XSG?J^y!jfUGCRTKJ4%VB;L*7icGxz8Bt0S(t5%I_|8S6YY=O0PzcC}1< zD{pPl&d!d4+o1SP@SWuGe>|@HAn_;I8Q_2PI_138ZP!>V2(6$`t5vCV>~4qhwQ{@F zX;r%Fbn&X1$r(A-s9KNhwwi1;HflH(-%LOBAW(!MXrc22}p_t-#E}+l6y_$@(_dd6 zw&(wpzFvR#ey5KFfI$LCF_(!N3pWzjcYqE7_I8L@Z`;pd#0D~oE|^M|7UNk75|84! zkr^SN>_{%7WYnOY>h+#=D}Vz(8vx<4aKpagBIY3HS}FWw0RJlBW~faC!Vd$-4}Z8; zE7%JS`XeqarzYmga!Ee&as%bV&askRux3#NG*{~D11fv~yTqO=#0Gc0{CO78pTyc^ z9hmr!99OWdU&?8{BnwaM;kI%t0Z(~*+hYZJQjFAA1Ek-Ld-{W%63~az(;uaq7&)HZ zW4HI%jI)@3Ku2DI4U?7LaNf^#Xm0%fnhH@Mpm-t{Tf{eg>v-a)&MQxx=*Jw-bwhdj zKouL!>Sp!L>t<`Svbv1naCpw8#`%WuiE+%D4=~IFn|?9oW#%RFOkA(&-Na&6@E!!r KA7ex7#N{1~lWkD| diff --git a/app/shaders/d3d11_genyuv_pixel.hlsl b/app/shaders/d3d11_genyuv_pixel.hlsl index b2f8a150..ea66df5d 100644 --- a/app/shaders/d3d11_genyuv_pixel.hlsl +++ b/app/shaders/d3d11_genyuv_pixel.hlsl @@ -1,6 +1,6 @@ #include "d3d11_video_pixel_start.hlsli" -cbuffer CSC_CONST_BUF : register(b0) +cbuffer CSC_CONST_BUF : register(b1) { min16float3x3 cscMatrix; min16float3 offsets; diff --git a/app/shaders/d3d11_video_pixel_end.hlsli b/app/shaders/d3d11_video_pixel_end.hlsli index db62c66a..6b7cc26a 100644 --- a/app/shaders/d3d11_video_pixel_end.hlsli +++ b/app/shaders/d3d11_video_pixel_end.hlsli @@ -1,7 +1,8 @@ min16float4 main(ShaderInput input) : SV_TARGET { + // Clamp the chrominance texcoords to avoid sampling the row of texels adjacent to the alignment padding min16float3 yuv = min16float3(luminancePlane.Sample(theSampler, input.tex), - chrominancePlane.Sample(theSampler, input.tex)); + chrominancePlane.Sample(theSampler, min(input.tex, chromaTexMax.rg))); // Subtract the YUV offset for limited vs full range yuv -= offsets; diff --git a/app/shaders/d3d11_video_pixel_start.hlsli b/app/shaders/d3d11_video_pixel_start.hlsli index 863d9d43..a148ff98 100644 --- a/app/shaders/d3d11_video_pixel_start.hlsli +++ b/app/shaders/d3d11_video_pixel_start.hlsli @@ -6,4 +6,9 @@ struct ShaderInput { float4 pos : SV_POSITION; float2 tex : TEXCOORD0; -}; \ No newline at end of file +}; + +cbuffer ChromaLimitBuf : register(b0) +{ + min16float3 chromaTexMax; +}; diff --git a/app/streaming/video/ffmpeg-renderers/d3d11va.cpp b/app/streaming/video/ffmpeg-renderers/d3d11va.cpp index 4b209a4c..2fd792e5 100644 --- a/app/streaming/video/ffmpeg-renderers/d3d11va.cpp +++ b/app/streaming/video/ffmpeg-renderers/d3d11va.cpp @@ -728,7 +728,7 @@ void D3D11VARenderer::bindColorConversion(AVFrame* frame) ID3D11Buffer* constantBuffer; HRESULT hr = m_Device->CreateBuffer(&constDesc, &constData, &constantBuffer); if (SUCCEEDED(hr)) { - m_DeviceContext->PSSetConstantBuffers(0, 1, &constantBuffer); + m_DeviceContext->PSSetConstantBuffers(1, 1, &constantBuffer); constantBuffer->Release(); } else { @@ -1313,6 +1313,39 @@ bool D3D11VARenderer::setupRenderingResources() } } + // Create our fixed constant buffer to limit chroma texcoords and avoid sampling from alignment texels. + { + D3D11_BUFFER_DESC constDesc = {}; + constDesc.ByteWidth = sizeof(CSC_CONST_BUF); + constDesc.Usage = D3D11_USAGE_IMMUTABLE; + constDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; + constDesc.CPUAccessFlags = 0; + constDesc.MiscFlags = 0; + + int alignedWidth = FFALIGN(m_DecoderParams.width, m_TextureAlignment); + int alignedHeight = FFALIGN(m_DecoderParams.height, m_TextureAlignment); + + float chromaUVMax[3] = {}; + chromaUVMax[0] = m_DecoderParams.width != alignedWidth ? ((float)(m_DecoderParams.width - 1) / alignedWidth) : 1.0f; + chromaUVMax[1] = m_DecoderParams.height != alignedHeight ? ((float)(m_DecoderParams.height - 1) / alignedHeight) : 1.0f; + + D3D11_SUBRESOURCE_DATA constData = {}; + constData.pSysMem = chromaUVMax; + + ID3D11Buffer* constantBuffer; + HRESULT hr = m_Device->CreateBuffer(&constDesc, &constData, &constantBuffer); + if (SUCCEEDED(hr)) { + m_DeviceContext->PSSetConstantBuffers(0, 1, &constantBuffer); + constantBuffer->Release(); + } + else { + SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, + "ID3D11Device::CreateBuffer() failed: %x", + hr); + return false; + } + } + // Create our blend state { D3D11_BLEND_DESC blendDesc = {};