From df814fef4a96bfcf8e670c2204a4ad34e323d1a8 Mon Sep 17 00:00:00 2001 From: Cameron Gutman Date: Thu, 22 Aug 2024 00:05:26 -0500 Subject: [PATCH] Implement support for HDR with software decoding on macOS --- app/shaders/vt_renderer.metal | 22 +- app/streaming/video/ffmpeg-renderers/vt.h | 2 +- .../video/ffmpeg-renderers/vt_metal.mm | 286 ++++++++++++++---- app/streaming/video/ffmpeg.cpp | 9 +- 4 files changed, 259 insertions(+), 60 deletions(-) diff --git a/app/shaders/vt_renderer.metal b/app/shaders/vt_renderer.metal index b87d2758..5bfb6b6d 100644 --- a/app/shaders/vt_renderer.metal +++ b/app/shaders/vt_renderer.metal @@ -10,6 +10,7 @@ struct CscParams { float3 matrix[3]; float3 offsets; + float bitnessScaleFactor; }; constexpr sampler s(coord::normalized, address::clamp_to_edge, filter::linear); @@ -24,7 +25,26 @@ fragment float4 ps_draw_biplanar(Vertex v [[ stage_in ]], texture2d luminancePlane [[ texture(0) ]], texture2d chrominancePlane [[ texture(1) ]]) { - float3 yuv = float3(luminancePlane.sample(s, v.texCoords).r, chrominancePlane.sample(s, v.texCoords).rg); + float3 yuv = float3(luminancePlane.sample(s, v.texCoords).r * cscParams.bitnessScaleFactor, + chrominancePlane.sample(s, v.texCoords).rg * cscParams.bitnessScaleFactor); + yuv -= cscParams.offsets; + + float3 rgb; + rgb.r = dot(yuv, cscParams.matrix[0]); + rgb.g = dot(yuv, cscParams.matrix[1]); + rgb.b = dot(yuv, cscParams.matrix[2]); + return float4(rgb, 1.0f); +} + +fragment float4 ps_draw_triplanar(Vertex v [[ stage_in ]], + constant CscParams &cscParams [[ buffer(0) ]], + texture2d luminancePlane [[ texture(0) ]], + texture2d chrominancePlaneU [[ texture(1) ]], + texture2d chrominancePlaneV [[ texture(2) ]]) +{ + float3 yuv = float3(luminancePlane.sample(s, v.texCoords).r * cscParams.bitnessScaleFactor, + chrominancePlaneU.sample(s, v.texCoords).r * cscParams.bitnessScaleFactor, + chrominancePlaneV.sample(s, v.texCoords).r * cscParams.bitnessScaleFactor); yuv -= cscParams.offsets; float3 rgb; diff --git a/app/streaming/video/ffmpeg-renderers/vt.h b/app/streaming/video/ffmpeg-renderers/vt.h index 06674080..10c7c033 100644 --- a/app/streaming/video/ffmpeg-renderers/vt.h +++ b/app/streaming/video/ffmpeg-renderers/vt.h @@ -16,7 +16,7 @@ public: class VTMetalRendererFactory { public: static - IFFmpegRenderer* createRenderer(); + IFFmpegRenderer* createRenderer(bool hwAccel); }; class VTRendererFactory { diff --git a/app/streaming/video/ffmpeg-renderers/vt_metal.mm b/app/streaming/video/ffmpeg-renderers/vt_metal.mm index 6fcca427..ed5d3c23 100644 --- a/app/streaming/video/ffmpeg-renderers/vt_metal.mm +++ b/app/streaming/video/ffmpeg-renderers/vt_metal.mm @@ -18,12 +18,22 @@ #import #import +extern "C" { + #include +} + struct CscParams { vector_float3 matrix[3]; vector_float3 offsets; }; +struct ParamBuffer +{ + CscParams cscParams; + float bitnessScaleFactor; +}; + static const CscParams k_CscParams_Bt601Lim = { // CSC Matrix { @@ -97,11 +107,14 @@ struct Vertex vector_float2 texCoord; }; +#define MAX_VIDEO_PLANES 3 + class VTMetalRenderer : public VTBaseRenderer { public: - VTMetalRenderer() - : m_Window(nullptr), + VTMetalRenderer(bool hwAccel) + : m_HwAccel(hwAccel), + m_Window(nullptr), m_HwContext(nullptr), m_MetalLayer(nullptr), m_TextureCache(nullptr), @@ -114,6 +127,7 @@ public: m_ShaderLibrary(nullptr), m_CommandQueue(nullptr), m_NextDrawable(nullptr), + m_SwMappingTextures{}, m_MetalView(nullptr), m_LastColorSpace(-1), m_LastFullRange(false), @@ -159,6 +173,12 @@ public: } } + for (int i = 0; i < MAX_VIDEO_PLANES; i++) { + if (m_SwMappingTextures[i] != nullptr) { + [m_SwMappingTextures[i] release]; + } + } + if (m_OverlayPipelineState != nullptr) { [m_OverlayPipelineState release]; } @@ -271,13 +291,43 @@ public: return true; } + int getFramePlaneCount(AVFrame* frame) + { + if (frame->format == AV_PIX_FMT_VIDEOTOOLBOX) { + return CVPixelBufferGetPlaneCount((CVPixelBufferRef)frame->data[3]); + } + else { + return av_pix_fmt_count_planes((AVPixelFormat)frame->format); + } + } + + int getBitnessScaleFactor(AVFrame* frame) + { + if (frame->format == AV_PIX_FMT_VIDEOTOOLBOX) { + // VideoToolbox frames never require scaling + return 1; + } + else { + const AVPixFmtDescriptor* formatDesc = av_pix_fmt_desc_get((AVPixelFormat)frame->format); + if (!formatDesc) { + // This shouldn't be possible but handle it anyway + SDL_assert(formatDesc); + return 1; + } + + // This assumes plane 0 is exclusively the Y component + SDL_assert(formatDesc->comp[0].step == 1 || formatDesc->comp[0].step == 2); + return pow(2, (formatDesc->comp[0].step * 8) - formatDesc->comp[0].depth); + } + } + bool updateColorSpaceForFrame(AVFrame* frame) { int colorspace = getFrameColorspace(frame); bool fullRange = isFrameFullRange(frame); if (colorspace != m_LastColorSpace || fullRange != m_LastFullRange) { CGColorSpaceRef newColorSpace; - void* paramBuffer; + ParamBuffer paramBuffer; // Free any unpresented drawable since we're changing pixel formats discardNextDrawable(); @@ -286,7 +336,7 @@ public: case COLORSPACE_REC_709: m_MetalLayer.colorspace = newColorSpace = CGColorSpaceCreateWithName(kCGColorSpaceITUR_709); m_MetalLayer.pixelFormat = MTLPixelFormatBGRA8Unorm; - paramBuffer = (void*)(fullRange ? &k_CscParams_Bt709Full : &k_CscParams_Bt709Lim); + paramBuffer.cscParams = (fullRange ? k_CscParams_Bt709Full : k_CscParams_Bt709Lim); break; case COLORSPACE_REC_2020: // https://developer.apple.com/documentation/metal/hdr_content/using_color_spaces_to_display_hdr_content @@ -298,32 +348,37 @@ public: m_MetalLayer.colorspace = newColorSpace = CGColorSpaceCreateWithName(kCGColorSpaceITUR_2020); m_MetalLayer.pixelFormat = MTLPixelFormatBGRA8Unorm; } - paramBuffer = (void*)(fullRange ? &k_CscParams_Bt2020Full : &k_CscParams_Bt2020Lim); + paramBuffer.cscParams = (fullRange ? k_CscParams_Bt2020Full : k_CscParams_Bt2020Lim); break; default: case COLORSPACE_REC_601: m_MetalLayer.colorspace = newColorSpace = CGColorSpaceCreateWithName(kCGColorSpaceSRGB); m_MetalLayer.pixelFormat = MTLPixelFormatBGRA8Unorm; - paramBuffer = (void*)(fullRange ? &k_CscParams_Bt601Full : &k_CscParams_Bt601Lim); + paramBuffer.cscParams = (fullRange ? k_CscParams_Bt601Full : k_CscParams_Bt601Lim); break; } + paramBuffer.bitnessScaleFactor = getBitnessScaleFactor(frame); + // The CAMetalLayer retains the CGColorSpace CGColorSpaceRelease(newColorSpace); // Create the new colorspace parameter buffer for our fragment shader [m_CscParamsBuffer release]; auto bufferOptions = MTLCPUCacheModeWriteCombined | MTLResourceStorageModeManaged; - m_CscParamsBuffer = [m_MetalLayer.device newBufferWithBytes:paramBuffer length:sizeof(CscParams) options:bufferOptions]; + m_CscParamsBuffer = [m_MetalLayer.device newBufferWithBytes:(void*)¶mBuffer length:sizeof(paramBuffer) options:bufferOptions]; if (!m_CscParamsBuffer) { SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "Failed to create CSC parameters buffer"); return false; } + int planes = getFramePlaneCount(frame); + SDL_assert(planes == 2 || planes == 3); + MTLRenderPipelineDescriptor *pipelineDesc = [[MTLRenderPipelineDescriptor new] autorelease]; pipelineDesc.vertexFunction = [[m_ShaderLibrary newFunctionWithName:@"vs_draw"] autorelease]; - pipelineDesc.fragmentFunction = [[m_ShaderLibrary newFunctionWithName:@"ps_draw_biplanar"] autorelease]; + pipelineDesc.fragmentFunction = [[m_ShaderLibrary newFunctionWithName:planes == 2 ? @"ps_draw_biplanar" : @"ps_draw_triplanar"] autorelease]; pipelineDesc.colorAttachments[0].pixelFormat = m_MetalLayer.pixelFormat; [m_VideoPipelineState release]; m_VideoPipelineState = [m_MetalLayer.device newRenderPipelineStateWithDescriptor:pipelineDesc error:nullptr]; @@ -359,11 +414,73 @@ public: return true; } + id mapPlaneForSoftwareFrame(AVFrame* frame, int planeIndex) + { + const AVPixFmtDescriptor* formatDesc = av_pix_fmt_desc_get((AVPixelFormat)frame->format); + if (!formatDesc) { + // This shouldn't be possible but handle it anyway + SDL_assert(formatDesc); + return nil; + } + + SDL_assert(planeIndex < MAX_VIDEO_PLANES); + + NSUInteger planeWidth = planeIndex ? AV_CEIL_RSHIFT(frame->width, formatDesc->log2_chroma_w) : frame->width; + NSUInteger planeHeight = planeIndex ? AV_CEIL_RSHIFT(frame->height, formatDesc->log2_chroma_h) : frame->height; + + // Recreate the texture if the plane size changes + if (m_SwMappingTextures[planeIndex] && (m_SwMappingTextures[planeIndex].width != planeWidth || + m_SwMappingTextures[planeIndex].height != planeHeight)) { + [m_SwMappingTextures[planeIndex] release]; + m_SwMappingTextures[planeIndex] = nil; + } + + if (!m_SwMappingTextures[planeIndex]) { + MTLPixelFormat metalFormat; + + switch (formatDesc->comp[planeIndex].step) { + case 1: + metalFormat = MTLPixelFormatR8Unorm; + break; + case 2: + metalFormat = MTLPixelFormatR16Unorm; + break; + default: + SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, + "Unhandled plane step: %d (plane: %d)", + formatDesc->comp[planeIndex].step, + planeIndex); + SDL_assert(false); + return nil; + } + + auto texDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:metalFormat + width:planeWidth + height:planeHeight + mipmapped:NO]; + texDesc.cpuCacheMode = MTLCPUCacheModeWriteCombined; + texDesc.storageMode = MTLStorageModeManaged; + texDesc.usage = MTLTextureUsageShaderRead; + + m_SwMappingTextures[planeIndex] = [m_MetalLayer.device newTextureWithDescriptor:texDesc]; + if (!m_SwMappingTextures[planeIndex]) { + SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, + "Failed to allocate software frame texture"); + return nil; + } + } + + [m_SwMappingTextures[planeIndex] replaceRegion:MTLRegionMake2D(0, 0, planeWidth, planeHeight) + mipmapLevel:0 + withBytes:frame->data[planeIndex] + bytesPerRow:frame->linesize[planeIndex]]; + + return m_SwMappingTextures[planeIndex]; + } + // Caller frees frame after we return virtual void renderFrame(AVFrame* frame) override { @autoreleasepool { - CVPixelBufferRef pixBuf = reinterpret_cast(frame->data[3]); - // Handle changes to the frame's colorspace from last time we rendered if (!updateColorSpaceForFrame(frame)) { // Trigger the main thread to recreate the decoder @@ -387,43 +504,50 @@ public: return; } - // Create Metal textures for the planes of the CVPixelBuffer - std::array textures; - for (size_t i = 0; i < textures.size(); i++) { - MTLPixelFormat fmt; + std::array cvMetalTextures; + size_t planes = getFramePlaneCount(frame); + SDL_assert(planes <= MAX_VIDEO_PLANES); - switch (CVPixelBufferGetPixelFormatType(pixBuf)) { - case kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange: - case kCVPixelFormatType_444YpCbCr8BiPlanarVideoRange: - case kCVPixelFormatType_420YpCbCr8BiPlanarFullRange: - case kCVPixelFormatType_444YpCbCr8BiPlanarFullRange: - fmt = (i == 0) ? MTLPixelFormatR8Unorm : MTLPixelFormatRG8Unorm; - break; + if (frame->format == AV_PIX_FMT_VIDEOTOOLBOX) { + CVPixelBufferRef pixBuf = reinterpret_cast(frame->data[3]); - case kCVPixelFormatType_420YpCbCr10BiPlanarFullRange: - case kCVPixelFormatType_444YpCbCr10BiPlanarFullRange: - case kCVPixelFormatType_420YpCbCr10BiPlanarVideoRange: - case kCVPixelFormatType_444YpCbCr10BiPlanarVideoRange: - fmt = (i == 0) ? MTLPixelFormatR16Unorm : MTLPixelFormatRG16Unorm; - break; + // Create Metal textures for the planes of the CVPixelBuffer + for (size_t i = 0; i < planes; i++) { + MTLPixelFormat fmt; - default: - SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, - "Unknown pixel format: %x", - CVPixelBufferGetPixelFormatType(pixBuf)); - return; - } + switch (CVPixelBufferGetPixelFormatType(pixBuf)) { + case kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange: + case kCVPixelFormatType_444YpCbCr8BiPlanarVideoRange: + case kCVPixelFormatType_420YpCbCr8BiPlanarFullRange: + case kCVPixelFormatType_444YpCbCr8BiPlanarFullRange: + fmt = (i == 0) ? MTLPixelFormatR8Unorm : MTLPixelFormatRG8Unorm; + break; - CVReturn err = CVMetalTextureCacheCreateTextureFromImage(kCFAllocatorDefault, m_TextureCache, pixBuf, nullptr, fmt, - CVPixelBufferGetWidthOfPlane(pixBuf, i), - CVPixelBufferGetHeightOfPlane(pixBuf, i), - i, - &textures[i]); - if (err != kCVReturnSuccess) { - SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, - "CVMetalTextureCacheCreateTextureFromImage() failed: %d", - err); - return; + case kCVPixelFormatType_420YpCbCr10BiPlanarFullRange: + case kCVPixelFormatType_444YpCbCr10BiPlanarFullRange: + case kCVPixelFormatType_420YpCbCr10BiPlanarVideoRange: + case kCVPixelFormatType_444YpCbCr10BiPlanarVideoRange: + fmt = (i == 0) ? MTLPixelFormatR16Unorm : MTLPixelFormatRG16Unorm; + break; + + default: + SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, + "Unknown pixel format: %x", + CVPixelBufferGetPixelFormatType(pixBuf)); + return; + } + + CVReturn err = CVMetalTextureCacheCreateTextureFromImage(kCFAllocatorDefault, m_TextureCache, pixBuf, nullptr, fmt, + CVPixelBufferGetWidthOfPlane(pixBuf, i), + CVPixelBufferGetHeightOfPlane(pixBuf, i), + i, + &cvMetalTextures[i]); + if (err != kCVReturnSuccess) { + SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, + "CVMetalTextureCacheCreateTextureFromImage() failed: %d", + err); + return; + } } } @@ -438,15 +562,22 @@ public: // Bind textures and buffers then draw the video region [renderEncoder setRenderPipelineState:m_VideoPipelineState]; - for (size_t i = 0; i < textures.size(); i++) { - [renderEncoder setFragmentTexture:CVMetalTextureGetTexture(textures[i]) atIndex:i]; - } - [commandBuffer addCompletedHandler:^(id) { - // Free textures after completion of rendering per CVMetalTextureCache requirements - for (const CVMetalTextureRef &tex : textures) { - CFRelease(tex); + if (frame->format == AV_PIX_FMT_VIDEOTOOLBOX) { + for (size_t i = 0; i < planes; i++) { + [renderEncoder setFragmentTexture:CVMetalTextureGetTexture(cvMetalTextures[i]) atIndex:i]; } - }]; + [commandBuffer addCompletedHandler:^(id) { + // Free textures after completion of rendering per CVMetalTextureCache requirements + for (size_t i = 0; i < planes; i++) { + CFRelease(cvMetalTextures[i]); + } + }]; + } + else { + for (size_t i = 0; i < planes; i++) { + [renderEncoder setFragmentTexture:mapPlaneForSoftwareFrame(frame, i) atIndex:i]; + } + } [renderEncoder setFragmentBuffer:m_CscParamsBuffer offset:0 atIndex:0]; [renderEncoder setVertexBuffer:m_VideoVertexBuffer offset:0 atIndex:0]; [renderEncoder drawPrimitives:MTLPrimitiveTypeTriangleStrip vertexStart:0 vertexCount:4]; @@ -542,7 +673,11 @@ public: } } - if (qgetenv("VT_FORCE_METAL") == "1") { + if (!m_HwAccel) { + // Metal software decoding is always available + return [MTLCreateSystemDefaultDevice() autorelease]; + } + else if (qgetenv("VT_FORCE_METAL") == "1") { SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION, "Using Metal renderer due to VT_FORCE_METAL=1 override."); return [MTLCreateSystemDefaultDevice() autorelease]; @@ -566,7 +701,7 @@ public: return false; } - if (!checkDecoderCapabilities(device, params)) { + if (m_HwAccel && !checkDecoderCapabilities(device, params)) { return false; } @@ -690,10 +825,13 @@ public: virtual bool prepareDecoderContext(AVCodecContext* context, AVDictionary**) override { - context->hw_device_ctx = av_buffer_ref(m_HwContext); + if (m_HwAccel) { + context->hw_device_ctx = av_buffer_ref(m_HwContext); + } SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION, - "Using VideoToolbox Metal renderer"); + "Using Metal renderer with %s decoding", + m_HwAccel ? "hardware" : "software"); return true; } @@ -725,6 +863,38 @@ public: return RENDERER_ATTRIBUTE_HDR_SUPPORT; } + bool isPixelFormatSupported(int videoFormat, AVPixelFormat pixelFormat) override + { + if (m_HwAccel) { + return pixelFormat == AV_PIX_FMT_VIDEOTOOLBOX; + } + else { + if (pixelFormat == AV_PIX_FMT_VIDEOTOOLBOX) { + // VideoToolbox frames are always supported + return true; + } + else { + // Otherwise it's supported if we can map it + const int expectedPixelDepth = (videoFormat & VIDEO_FORMAT_MASK_10BIT) ? 10 : 8; + const int expectedLog2ChromaW = (videoFormat & VIDEO_FORMAT_MASK_YUV444) ? 0 : 1; + const int expectedLog2ChromaH = (videoFormat & VIDEO_FORMAT_MASK_YUV444) ? 0 : 1; + + const AVPixFmtDescriptor* formatDesc = av_pix_fmt_desc_get(pixelFormat); + if (!formatDesc) { + // This shouldn't be possible but handle it anyway + SDL_assert(formatDesc); + return false; + } + + int planes = av_pix_fmt_count_planes(pixelFormat); + return (planes == 2 || planes == 3) && + formatDesc->comp[0].depth == expectedPixelDepth && + formatDesc->log2_chroma_w == expectedLog2ChromaW && + formatDesc->log2_chroma_h == expectedLog2ChromaH; + } + } + } + bool notifyWindowChanged(PWINDOW_STATE_CHANGE_INFO info) override { auto unhandledStateFlags = info->stateChangeFlags; @@ -740,6 +910,7 @@ public: } private: + bool m_HwAccel; SDL_Window* m_Window; AVBufferRef* m_HwContext; CAMetalLayer* m_MetalLayer; @@ -753,6 +924,7 @@ private: id m_ShaderLibrary; id m_CommandQueue; id m_NextDrawable; + id m_SwMappingTextures[MAX_VIDEO_PLANES]; SDL_MetalView m_MetalView; int m_LastColorSpace; bool m_LastFullRange; @@ -765,6 +937,6 @@ private: int m_PendingPresentationCount; }; -IFFmpegRenderer* VTMetalRendererFactory::createRenderer() { - return new VTMetalRenderer(); +IFFmpegRenderer* VTMetalRendererFactory::createRenderer(bool hwAccel) { + return new VTMetalRenderer(hwAccel); } diff --git a/app/streaming/video/ffmpeg.cpp b/app/streaming/video/ffmpeg.cpp index 1fadeca1..6e1f8065 100644 --- a/app/streaming/video/ffmpeg.cpp +++ b/app/streaming/video/ffmpeg.cpp @@ -887,7 +887,7 @@ IFFmpegRenderer* FFmpegVideoDecoder::createHwAccelRenderer(const AVCodecHWConfig #ifdef Q_OS_DARWIN case AV_HWDEVICE_TYPE_VIDEOTOOLBOX: // Prefer the Metal renderer if hardware is compatible - return VTMetalRendererFactory::createRenderer(); + return VTMetalRendererFactory::createRenderer(true); #endif #ifdef HAVE_LIBVA case AV_HWDEVICE_TYPE_VAAPI: @@ -1158,6 +1158,13 @@ bool FFmpegVideoDecoder::tryInitializeRendererForUnknownDecoder(const AVCodec* d } #endif +#ifdef Q_OS_DARWIN + if (tryInitializeRenderer(decoder, AV_PIX_FMT_NONE, params, nullptr, nullptr, + []() -> IFFmpegRenderer* { return VTMetalRendererFactory::createRenderer(false); })) { + return true; + } +#endif + if (tryInitializeRenderer(decoder, AV_PIX_FMT_NONE, params, nullptr, nullptr, []() -> IFFmpegRenderer* { return new SdlRenderer(); })) { return true;