Rewrite frame pacing mechanism for VTMetalRenderer

This should hopefully improve performance and avoid performance issues on Tahoe.
This commit is contained in:
Cameron Gutman
2025-10-23 19:43:10 -05:00
parent bdb6d02dac
commit 57db20016a

View File

@@ -109,6 +109,14 @@ struct Vertex
#define MAX_VIDEO_PLANES 3
class VTMetalRenderer;
@interface DisplayLinkDelegate : NSObject <CAMetalDisplayLinkDelegate>
- (id)initWithRenderer:(VTMetalRenderer *)renderer;
@end
class VTMetalRenderer : public VTBaseRenderer
{
public:
@@ -118,6 +126,10 @@ public:
m_Window(nullptr),
m_HwContext(nullptr),
m_MetalLayer(nullptr),
m_MetalDisplayLink(nullptr),
m_LatestUnrenderedFrame(nullptr),
m_FrameLock(SDL_CreateMutex()),
m_FrameReady(SDL_CreateCond()),
m_TextureCache(nullptr),
m_CscParamsBuffer(nullptr),
m_VideoVertexBuffer(nullptr),
@@ -127,7 +139,6 @@ public:
m_OverlayPipelineState(nullptr),
m_ShaderLibrary(nullptr),
m_CommandQueue(nullptr),
m_NextDrawable(nullptr),
m_SwMappingTextures{},
m_MetalView(nullptr),
m_LastColorSpace(-1),
@@ -135,22 +146,17 @@ public:
m_LastFrameWidth(-1),
m_LastFrameHeight(-1),
m_LastDrawableWidth(-1),
m_LastDrawableHeight(-1),
m_PresentationMutex(SDL_CreateMutex()),
m_PresentationCond(SDL_CreateCond()),
m_PendingPresentationCount(0)
m_LastDrawableHeight(-1)
{
}
virtual ~VTMetalRenderer() override
{ @autoreleasepool {
if (m_PresentationCond != nullptr) {
SDL_DestroyCond(m_PresentationCond);
}
if (m_PresentationMutex != nullptr) {
SDL_DestroyMutex(m_PresentationMutex);
}
// Stop the display link and free associated state
stopDisplayLink();
av_frame_free(&m_LatestUnrenderedFrame);
SDL_DestroyCond(m_FrameReady);
SDL_DestroyMutex(m_FrameLock);
if (m_HwContext != nullptr) {
av_buffer_unref(&m_HwContext);
@@ -201,45 +207,6 @@ public:
}
}}
void discardNextDrawable()
{ @autoreleasepool {
if (!m_NextDrawable) {
return;
}
[m_NextDrawable release];
m_NextDrawable = nullptr;
}}
virtual void waitToRender() override
{ @autoreleasepool {
if (!m_NextDrawable) {
// Wait for the next available drawable before latching the frame to render
m_NextDrawable = [[m_MetalLayer nextDrawable] retain];
if (m_NextDrawable == nullptr) {
return;
}
if (m_MetalLayer.displaySyncEnabled) {
// Pace ourselves by waiting if too many frames are pending presentation
SDL_LockMutex(m_PresentationMutex);
if (m_PendingPresentationCount > 2) {
if (SDL_CondWaitTimeout(m_PresentationCond, m_PresentationMutex, 100) == SDL_MUTEX_TIMEDOUT) {
SDL_LogWarn(SDL_LOG_CATEGORY_APPLICATION,
"Presentation wait timed out after 100 ms");
}
}
SDL_UnlockMutex(m_PresentationMutex);
}
}
}}
virtual void cleanupRenderContext() override
{
// Free any unused drawable
discardNextDrawable();
}
bool updateVideoRegionSizeForFrame(AVFrame* frame)
{
int drawableWidth, drawableHeight;
@@ -330,8 +297,8 @@ public:
CGColorSpaceRef newColorSpace;
ParamBuffer paramBuffer;
// Free any unpresented drawable since we're changing pixel formats
discardNextDrawable();
// Stop the display link before changing the Metal layer
stopDisplayLink();
switch (colorspace) {
case COLORSPACE_REC_709:
@@ -490,31 +457,8 @@ public:
}
// Caller frees frame after we return
virtual void renderFrame(AVFrame* frame) override
virtual void renderFrameIntoDrawable(AVFrame* frame, id<CAMetalDrawable> drawable)
{ @autoreleasepool {
// Handle changes to the frame's colorspace from last time we rendered
if (!updateColorSpaceForFrame(frame)) {
// Trigger the main thread to recreate the decoder
SDL_Event event;
event.type = SDL_RENDER_DEVICE_RESET;
SDL_PushEvent(&event);
return;
}
// Handle changes to the video size or drawable size
if (!updateVideoRegionSizeForFrame(frame)) {
// Trigger the main thread to recreate the decoder
SDL_Event event;
event.type = SDL_RENDER_DEVICE_RESET;
SDL_PushEvent(&event);
return;
}
// Don't proceed with rendering if we don't have a drawable
if (m_NextDrawable == nullptr) {
return;
}
std::array<CVMetalTextureRef, MAX_VIDEO_PLANES> cvMetalTextures;
size_t planes = getFramePlaneCount(frame);
SDL_assert(planes <= MAX_VIDEO_PLANES);
@@ -564,7 +508,7 @@ public:
// Prepare a render pass to render into the next drawable
auto renderPassDescriptor = [MTLRenderPassDescriptor renderPassDescriptor];
renderPassDescriptor.colorAttachments[0].texture = m_NextDrawable.texture;
renderPassDescriptor.colorAttachments[0].texture = drawable.texture;
renderPassDescriptor.colorAttachments[0].loadAction = MTLLoadActionClear;
renderPassDescriptor.colorAttachments[0].clearColor = MTLClearColorMake(0.0, 0.0, 0.0, 0.0);
renderPassDescriptor.colorAttachments[0].storeAction = MTLStoreActionStore;
@@ -640,28 +584,65 @@ public:
[renderEncoder endEncoding];
if (m_MetalLayer.displaySyncEnabled) {
// Queue a completion callback on the drawable to pace our rendering
SDL_LockMutex(m_PresentationMutex);
m_PendingPresentationCount++;
SDL_UnlockMutex(m_PresentationMutex);
[m_NextDrawable addPresentedHandler:^(id<MTLDrawable>) {
SDL_LockMutex(m_PresentationMutex);
m_PendingPresentationCount--;
SDL_CondSignal(m_PresentationCond);
SDL_UnlockMutex(m_PresentationMutex);
}];
}
// Flip to the newly rendered buffer
[commandBuffer presentDrawable:m_NextDrawable];
[commandBuffer presentDrawable:drawable];
[commandBuffer commit];
// Wait for the command buffer to complete and free our CVMetalTextureCache references
[commandBuffer waitUntilCompleted];
}}
[m_NextDrawable release];
m_NextDrawable = nullptr;
// Caller frees frame after we return
virtual void renderFrame(AVFrame* frame) override
{ @autoreleasepool {
// Handle changes to the frame's colorspace from last time we rendered
if (!updateColorSpaceForFrame(frame)) {
// Trigger the main thread to recreate the decoder
SDL_Event event;
event.type = SDL_RENDER_DEVICE_RESET;
SDL_PushEvent(&event);
return;
}
// Handle changes to the video size or drawable size
if (!updateVideoRegionSizeForFrame(frame)) {
// Trigger the main thread to recreate the decoder
SDL_Event event;
event.type = SDL_RENDER_DEVICE_RESET;
SDL_PushEvent(&event);
return;
}
// Start the display link if necessary
startDisplayLink();
if (hasDisplayLink()) {
// Move the buffers into a new AVFrame
AVFrame* newFrame = av_frame_alloc();
av_frame_move_ref(newFrame, frame);
// Replace any existing unrendered frame with this new one
// and signal the CAMetalDisplayLink callback
AVFrame* oldFrame = nullptr;
SDL_LockMutex(m_FrameLock);
if (m_LatestUnrenderedFrame != nullptr) {
oldFrame = m_LatestUnrenderedFrame;
}
m_LatestUnrenderedFrame = newFrame;
SDL_UnlockMutex(m_FrameLock);
SDL_CondSignal(m_FrameReady);
av_frame_free(&oldFrame);
}
else {
// Render to the next drawable right now when CAMetalDisplayLink is not in use
id<CAMetalDrawable> drawable = [m_MetalLayer nextDrawable];
if (drawable == nullptr) {
return;
}
renderFrameIntoDrawable(frame, drawable);
}
}}
id<MTLDevice> getMetalDevice() {
@@ -706,6 +687,7 @@ public:
int err;
m_Window = params->window;
m_FrameRateRange = CAFrameRateRangeMake(params->frameRate, params->frameRate, params->frameRate);
id<MTLDevice> device = getMetalDevice();
if (!device) {
@@ -750,16 +732,6 @@ public:
// Allow EDR content if we're streaming in a 10-bit format
m_MetalLayer.wantsExtendedDynamicRangeContent = !!(params->videoFormat & VIDEO_FORMAT_MASK_10BIT);
// Ideally, we don't actually want triple buffering due to increased
// display latency, since our render time is very short. However, we
// *need* 3 drawables in order to hit the offloaded "direct" display
// path for our Metal layer.
//
// If we only use 2 drawables, we'll be stuck in the composited path
// (particularly for windowed mode) and our latency will actually be
// higher than opting for triple buffering.
m_MetalLayer.maximumDrawableCount = 3;
// Allow tearing if V-Sync is off (also requires direct display path)
m_MetalLayer.displaySyncEnabled = params->enableVsync;
@@ -853,6 +825,44 @@ public:
return true;
}
void startDisplayLink()
{
if (@available(macOS 14, *)) {
if (m_MetalDisplayLink != nullptr || !m_MetalLayer.displaySyncEnabled) {
return;
}
m_MetalDisplayLink = [[CAMetalDisplayLink alloc] initWithMetalLayer:m_MetalLayer];
m_MetalDisplayLink.preferredFrameLatency = 1.0f;
m_MetalDisplayLink.preferredFrameRateRange = m_FrameRateRange;
m_MetalDisplayLink.delegate = [[DisplayLinkDelegate alloc] initWithRenderer:this];
[m_MetalDisplayLink addToRunLoop:[NSRunLoop mainRunLoop] forMode:NSRunLoopCommonModes];
}
}
void stopDisplayLink()
{
if (@available(macOS 14, *)) {
if (m_MetalDisplayLink == nullptr) {
return;
}
[m_MetalDisplayLink invalidate];
m_MetalDisplayLink = nullptr;
}
}
bool hasDisplayLink()
{
if (@available(macOS 14, *)) {
if (m_MetalDisplayLink != nullptr) {
return true;
}
}
return false;
}
virtual bool needsTestFrame() override
{
// We used to trust VT to tell us whether decode will work, but
@@ -926,11 +936,43 @@ public:
return unhandledStateFlags == 0;
}
void renderLatestFrameOnDrawable(id<CAMetalDrawable> drawable, CFTimeInterval targetTimestamp)
{
AVFrame* frame = nullptr;
// Determine how long we can wait depending on how long our CAMetalDisplayLink
// says we have until the next frame needs to be rendered. We will wait up to
// half the per-frame interval for a new frame to become available.
int waitTimeMs = ((targetTimestamp - CACurrentMediaTime()) * 1000) / 2;
if (waitTimeMs < 0) {
return;
}
// Wait for a new frame to be ready
SDL_LockMutex(m_FrameLock);
if (m_LatestUnrenderedFrame != nullptr || SDL_CondWaitTimeout(m_FrameReady, m_FrameLock, waitTimeMs) == 0) {
frame = m_LatestUnrenderedFrame;
m_LatestUnrenderedFrame = nullptr;
}
SDL_UnlockMutex(m_FrameLock);
// Render a frame if we got one in time
if (frame != nullptr) {
renderFrameIntoDrawable(frame, drawable);
av_frame_free(&frame);
}
}
private:
bool m_HwAccel;
SDL_Window* m_Window;
AVBufferRef* m_HwContext;
CAMetalLayer* m_MetalLayer;
CAMetalDisplayLink* m_MetalDisplayLink API_AVAILABLE(macos(14.0));
CAFrameRateRange m_FrameRateRange;
AVFrame* m_LatestUnrenderedFrame;
SDL_mutex* m_FrameLock;
SDL_cond* m_FrameReady;
CVMetalTextureCacheRef m_TextureCache;
id<MTLBuffer> m_CscParamsBuffer;
id<MTLBuffer> m_VideoVertexBuffer;
@@ -940,7 +982,6 @@ private:
id<MTLRenderPipelineState> m_OverlayPipelineState;
id<MTLLibrary> m_ShaderLibrary;
id<MTLCommandQueue> m_CommandQueue;
id<CAMetalDrawable> m_NextDrawable;
id<MTLTexture> m_SwMappingTextures[MAX_VIDEO_PLANES];
SDL_MetalView m_MetalView;
int m_LastColorSpace;
@@ -949,11 +990,24 @@ private:
int m_LastFrameHeight;
int m_LastDrawableWidth;
int m_LastDrawableHeight;
SDL_mutex* m_PresentationMutex;
SDL_cond* m_PresentationCond;
int m_PendingPresentationCount;
};
@implementation DisplayLinkDelegate {
VTMetalRenderer* _renderer;
}
- (id)initWithRenderer:(VTMetalRenderer *)renderer {
_renderer = renderer;
return self;
}
- (void)metalDisplayLink:(CAMetalDisplayLink *)link
needsUpdate:(CAMetalDisplayLinkUpdate *)update API_AVAILABLE(macos(14.0)) {
_renderer->renderLatestFrameOnDrawable(update.drawable, update.targetTimestamp);
}
@end
IFFmpegRenderer* VTMetalRendererFactory::createRenderer(bool hwAccel) {
return new VTMetalRenderer(hwAccel);
}