From 57db20016a2edfaad540693cf57a7621bd5815f2 Mon Sep 17 00:00:00 2001 From: Cameron Gutman Date: Thu, 23 Oct 2025 19:43:10 -0500 Subject: [PATCH] Rewrite frame pacing mechanism for VTMetalRenderer This should hopefully improve performance and avoid performance issues on Tahoe. --- .../video/ffmpeg-renderers/vt_metal.mm | 270 +++++++++++------- 1 file changed, 162 insertions(+), 108 deletions(-) diff --git a/app/streaming/video/ffmpeg-renderers/vt_metal.mm b/app/streaming/video/ffmpeg-renderers/vt_metal.mm index 8fcb9df8..38fd4f39 100644 --- a/app/streaming/video/ffmpeg-renderers/vt_metal.mm +++ b/app/streaming/video/ffmpeg-renderers/vt_metal.mm @@ -109,6 +109,14 @@ struct Vertex #define MAX_VIDEO_PLANES 3 +class VTMetalRenderer; + +@interface DisplayLinkDelegate : NSObject + +- (id)initWithRenderer:(VTMetalRenderer *)renderer; + +@end + class VTMetalRenderer : public VTBaseRenderer { public: @@ -118,6 +126,10 @@ public: m_Window(nullptr), m_HwContext(nullptr), m_MetalLayer(nullptr), + m_MetalDisplayLink(nullptr), + m_LatestUnrenderedFrame(nullptr), + m_FrameLock(SDL_CreateMutex()), + m_FrameReady(SDL_CreateCond()), m_TextureCache(nullptr), m_CscParamsBuffer(nullptr), m_VideoVertexBuffer(nullptr), @@ -127,7 +139,6 @@ public: m_OverlayPipelineState(nullptr), m_ShaderLibrary(nullptr), m_CommandQueue(nullptr), - m_NextDrawable(nullptr), m_SwMappingTextures{}, m_MetalView(nullptr), m_LastColorSpace(-1), @@ -135,22 +146,17 @@ public: m_LastFrameWidth(-1), m_LastFrameHeight(-1), m_LastDrawableWidth(-1), - m_LastDrawableHeight(-1), - m_PresentationMutex(SDL_CreateMutex()), - m_PresentationCond(SDL_CreateCond()), - m_PendingPresentationCount(0) + m_LastDrawableHeight(-1) { } virtual ~VTMetalRenderer() override { @autoreleasepool { - if (m_PresentationCond != nullptr) { - SDL_DestroyCond(m_PresentationCond); - } - - if (m_PresentationMutex != nullptr) { - SDL_DestroyMutex(m_PresentationMutex); - } + // Stop the display link and free associated state + stopDisplayLink(); + av_frame_free(&m_LatestUnrenderedFrame); + SDL_DestroyCond(m_FrameReady); + SDL_DestroyMutex(m_FrameLock); if (m_HwContext != nullptr) { av_buffer_unref(&m_HwContext); @@ -201,45 +207,6 @@ public: } }} - void discardNextDrawable() - { @autoreleasepool { - if (!m_NextDrawable) { - return; - } - - [m_NextDrawable release]; - m_NextDrawable = nullptr; - }} - - virtual void waitToRender() override - { @autoreleasepool { - if (!m_NextDrawable) { - // Wait for the next available drawable before latching the frame to render - m_NextDrawable = [[m_MetalLayer nextDrawable] retain]; - if (m_NextDrawable == nullptr) { - return; - } - - if (m_MetalLayer.displaySyncEnabled) { - // Pace ourselves by waiting if too many frames are pending presentation - SDL_LockMutex(m_PresentationMutex); - if (m_PendingPresentationCount > 2) { - if (SDL_CondWaitTimeout(m_PresentationCond, m_PresentationMutex, 100) == SDL_MUTEX_TIMEDOUT) { - SDL_LogWarn(SDL_LOG_CATEGORY_APPLICATION, - "Presentation wait timed out after 100 ms"); - } - } - SDL_UnlockMutex(m_PresentationMutex); - } - } - }} - - virtual void cleanupRenderContext() override - { - // Free any unused drawable - discardNextDrawable(); - } - bool updateVideoRegionSizeForFrame(AVFrame* frame) { int drawableWidth, drawableHeight; @@ -330,8 +297,8 @@ public: CGColorSpaceRef newColorSpace; ParamBuffer paramBuffer; - // Free any unpresented drawable since we're changing pixel formats - discardNextDrawable(); + // Stop the display link before changing the Metal layer + stopDisplayLink(); switch (colorspace) { case COLORSPACE_REC_709: @@ -490,31 +457,8 @@ public: } // Caller frees frame after we return - virtual void renderFrame(AVFrame* frame) override + virtual void renderFrameIntoDrawable(AVFrame* frame, id drawable) { @autoreleasepool { - // Handle changes to the frame's colorspace from last time we rendered - if (!updateColorSpaceForFrame(frame)) { - // Trigger the main thread to recreate the decoder - SDL_Event event; - event.type = SDL_RENDER_DEVICE_RESET; - SDL_PushEvent(&event); - return; - } - - // Handle changes to the video size or drawable size - if (!updateVideoRegionSizeForFrame(frame)) { - // Trigger the main thread to recreate the decoder - SDL_Event event; - event.type = SDL_RENDER_DEVICE_RESET; - SDL_PushEvent(&event); - return; - } - - // Don't proceed with rendering if we don't have a drawable - if (m_NextDrawable == nullptr) { - return; - } - std::array cvMetalTextures; size_t planes = getFramePlaneCount(frame); SDL_assert(planes <= MAX_VIDEO_PLANES); @@ -564,7 +508,7 @@ public: // Prepare a render pass to render into the next drawable auto renderPassDescriptor = [MTLRenderPassDescriptor renderPassDescriptor]; - renderPassDescriptor.colorAttachments[0].texture = m_NextDrawable.texture; + renderPassDescriptor.colorAttachments[0].texture = drawable.texture; renderPassDescriptor.colorAttachments[0].loadAction = MTLLoadActionClear; renderPassDescriptor.colorAttachments[0].clearColor = MTLClearColorMake(0.0, 0.0, 0.0, 0.0); renderPassDescriptor.colorAttachments[0].storeAction = MTLStoreActionStore; @@ -640,28 +584,65 @@ public: [renderEncoder endEncoding]; - if (m_MetalLayer.displaySyncEnabled) { - // Queue a completion callback on the drawable to pace our rendering - SDL_LockMutex(m_PresentationMutex); - m_PendingPresentationCount++; - SDL_UnlockMutex(m_PresentationMutex); - [m_NextDrawable addPresentedHandler:^(id) { - SDL_LockMutex(m_PresentationMutex); - m_PendingPresentationCount--; - SDL_CondSignal(m_PresentationCond); - SDL_UnlockMutex(m_PresentationMutex); - }]; - } - // Flip to the newly rendered buffer - [commandBuffer presentDrawable:m_NextDrawable]; + [commandBuffer presentDrawable:drawable]; [commandBuffer commit]; // Wait for the command buffer to complete and free our CVMetalTextureCache references [commandBuffer waitUntilCompleted]; + }} - [m_NextDrawable release]; - m_NextDrawable = nullptr; + // Caller frees frame after we return + virtual void renderFrame(AVFrame* frame) override + { @autoreleasepool { + // Handle changes to the frame's colorspace from last time we rendered + if (!updateColorSpaceForFrame(frame)) { + // Trigger the main thread to recreate the decoder + SDL_Event event; + event.type = SDL_RENDER_DEVICE_RESET; + SDL_PushEvent(&event); + return; + } + + // Handle changes to the video size or drawable size + if (!updateVideoRegionSizeForFrame(frame)) { + // Trigger the main thread to recreate the decoder + SDL_Event event; + event.type = SDL_RENDER_DEVICE_RESET; + SDL_PushEvent(&event); + return; + } + + // Start the display link if necessary + startDisplayLink(); + + if (hasDisplayLink()) { + // Move the buffers into a new AVFrame + AVFrame* newFrame = av_frame_alloc(); + av_frame_move_ref(newFrame, frame); + + // Replace any existing unrendered frame with this new one + // and signal the CAMetalDisplayLink callback + AVFrame* oldFrame = nullptr; + SDL_LockMutex(m_FrameLock); + if (m_LatestUnrenderedFrame != nullptr) { + oldFrame = m_LatestUnrenderedFrame; + } + m_LatestUnrenderedFrame = newFrame; + SDL_UnlockMutex(m_FrameLock); + SDL_CondSignal(m_FrameReady); + + av_frame_free(&oldFrame); + } + else { + // Render to the next drawable right now when CAMetalDisplayLink is not in use + id drawable = [m_MetalLayer nextDrawable]; + if (drawable == nullptr) { + return; + } + + renderFrameIntoDrawable(frame, drawable); + } }} id getMetalDevice() { @@ -706,6 +687,7 @@ public: int err; m_Window = params->window; + m_FrameRateRange = CAFrameRateRangeMake(params->frameRate, params->frameRate, params->frameRate); id device = getMetalDevice(); if (!device) { @@ -750,16 +732,6 @@ public: // Allow EDR content if we're streaming in a 10-bit format m_MetalLayer.wantsExtendedDynamicRangeContent = !!(params->videoFormat & VIDEO_FORMAT_MASK_10BIT); - // Ideally, we don't actually want triple buffering due to increased - // display latency, since our render time is very short. However, we - // *need* 3 drawables in order to hit the offloaded "direct" display - // path for our Metal layer. - // - // If we only use 2 drawables, we'll be stuck in the composited path - // (particularly for windowed mode) and our latency will actually be - // higher than opting for triple buffering. - m_MetalLayer.maximumDrawableCount = 3; - // Allow tearing if V-Sync is off (also requires direct display path) m_MetalLayer.displaySyncEnabled = params->enableVsync; @@ -853,6 +825,44 @@ public: return true; } + void startDisplayLink() + { + if (@available(macOS 14, *)) { + if (m_MetalDisplayLink != nullptr || !m_MetalLayer.displaySyncEnabled) { + return; + } + + m_MetalDisplayLink = [[CAMetalDisplayLink alloc] initWithMetalLayer:m_MetalLayer]; + m_MetalDisplayLink.preferredFrameLatency = 1.0f; + m_MetalDisplayLink.preferredFrameRateRange = m_FrameRateRange; + m_MetalDisplayLink.delegate = [[DisplayLinkDelegate alloc] initWithRenderer:this]; + [m_MetalDisplayLink addToRunLoop:[NSRunLoop mainRunLoop] forMode:NSRunLoopCommonModes]; + } + } + + void stopDisplayLink() + { + if (@available(macOS 14, *)) { + if (m_MetalDisplayLink == nullptr) { + return; + } + + [m_MetalDisplayLink invalidate]; + m_MetalDisplayLink = nullptr; + } + } + + bool hasDisplayLink() + { + if (@available(macOS 14, *)) { + if (m_MetalDisplayLink != nullptr) { + return true; + } + } + + return false; + } + virtual bool needsTestFrame() override { // We used to trust VT to tell us whether decode will work, but @@ -926,11 +936,43 @@ public: return unhandledStateFlags == 0; } + void renderLatestFrameOnDrawable(id drawable, CFTimeInterval targetTimestamp) + { + AVFrame* frame = nullptr; + + // Determine how long we can wait depending on how long our CAMetalDisplayLink + // says we have until the next frame needs to be rendered. We will wait up to + // half the per-frame interval for a new frame to become available. + int waitTimeMs = ((targetTimestamp - CACurrentMediaTime()) * 1000) / 2; + if (waitTimeMs < 0) { + return; + } + + // Wait for a new frame to be ready + SDL_LockMutex(m_FrameLock); + if (m_LatestUnrenderedFrame != nullptr || SDL_CondWaitTimeout(m_FrameReady, m_FrameLock, waitTimeMs) == 0) { + frame = m_LatestUnrenderedFrame; + m_LatestUnrenderedFrame = nullptr; + } + SDL_UnlockMutex(m_FrameLock); + + // Render a frame if we got one in time + if (frame != nullptr) { + renderFrameIntoDrawable(frame, drawable); + av_frame_free(&frame); + } + } + private: bool m_HwAccel; SDL_Window* m_Window; AVBufferRef* m_HwContext; CAMetalLayer* m_MetalLayer; + CAMetalDisplayLink* m_MetalDisplayLink API_AVAILABLE(macos(14.0)); + CAFrameRateRange m_FrameRateRange; + AVFrame* m_LatestUnrenderedFrame; + SDL_mutex* m_FrameLock; + SDL_cond* m_FrameReady; CVMetalTextureCacheRef m_TextureCache; id m_CscParamsBuffer; id m_VideoVertexBuffer; @@ -940,7 +982,6 @@ private: id m_OverlayPipelineState; id m_ShaderLibrary; id m_CommandQueue; - id m_NextDrawable; id m_SwMappingTextures[MAX_VIDEO_PLANES]; SDL_MetalView m_MetalView; int m_LastColorSpace; @@ -949,11 +990,24 @@ private: int m_LastFrameHeight; int m_LastDrawableWidth; int m_LastDrawableHeight; - SDL_mutex* m_PresentationMutex; - SDL_cond* m_PresentationCond; - int m_PendingPresentationCount; }; +@implementation DisplayLinkDelegate { + VTMetalRenderer* _renderer; +} + +- (id)initWithRenderer:(VTMetalRenderer *)renderer { + _renderer = renderer; + return self; +} + +- (void)metalDisplayLink:(CAMetalDisplayLink *)link + needsUpdate:(CAMetalDisplayLinkUpdate *)update API_AVAILABLE(macos(14.0)) { + _renderer->renderLatestFrameOnDrawable(update.drawable, update.targetTimestamp); +} + +@end + IFFmpegRenderer* VTMetalRendererFactory::createRenderer(bool hwAccel) { return new VTMetalRenderer(hwAccel); }