mirror of
https://github.com/moonlight-stream/moonlight-qt.git
synced 2026-02-16 02:30:52 +00:00
Rewrite frame pacing mechanism for VTMetalRenderer
This should hopefully improve performance and avoid performance issues on Tahoe.
This commit is contained in:
@@ -109,6 +109,14 @@ struct Vertex
|
||||
|
||||
#define MAX_VIDEO_PLANES 3
|
||||
|
||||
class VTMetalRenderer;
|
||||
|
||||
@interface DisplayLinkDelegate : NSObject <CAMetalDisplayLinkDelegate>
|
||||
|
||||
- (id)initWithRenderer:(VTMetalRenderer *)renderer;
|
||||
|
||||
@end
|
||||
|
||||
class VTMetalRenderer : public VTBaseRenderer
|
||||
{
|
||||
public:
|
||||
@@ -118,6 +126,10 @@ public:
|
||||
m_Window(nullptr),
|
||||
m_HwContext(nullptr),
|
||||
m_MetalLayer(nullptr),
|
||||
m_MetalDisplayLink(nullptr),
|
||||
m_LatestUnrenderedFrame(nullptr),
|
||||
m_FrameLock(SDL_CreateMutex()),
|
||||
m_FrameReady(SDL_CreateCond()),
|
||||
m_TextureCache(nullptr),
|
||||
m_CscParamsBuffer(nullptr),
|
||||
m_VideoVertexBuffer(nullptr),
|
||||
@@ -127,7 +139,6 @@ public:
|
||||
m_OverlayPipelineState(nullptr),
|
||||
m_ShaderLibrary(nullptr),
|
||||
m_CommandQueue(nullptr),
|
||||
m_NextDrawable(nullptr),
|
||||
m_SwMappingTextures{},
|
||||
m_MetalView(nullptr),
|
||||
m_LastColorSpace(-1),
|
||||
@@ -135,22 +146,17 @@ public:
|
||||
m_LastFrameWidth(-1),
|
||||
m_LastFrameHeight(-1),
|
||||
m_LastDrawableWidth(-1),
|
||||
m_LastDrawableHeight(-1),
|
||||
m_PresentationMutex(SDL_CreateMutex()),
|
||||
m_PresentationCond(SDL_CreateCond()),
|
||||
m_PendingPresentationCount(0)
|
||||
m_LastDrawableHeight(-1)
|
||||
{
|
||||
}
|
||||
|
||||
virtual ~VTMetalRenderer() override
|
||||
{ @autoreleasepool {
|
||||
if (m_PresentationCond != nullptr) {
|
||||
SDL_DestroyCond(m_PresentationCond);
|
||||
}
|
||||
|
||||
if (m_PresentationMutex != nullptr) {
|
||||
SDL_DestroyMutex(m_PresentationMutex);
|
||||
}
|
||||
// Stop the display link and free associated state
|
||||
stopDisplayLink();
|
||||
av_frame_free(&m_LatestUnrenderedFrame);
|
||||
SDL_DestroyCond(m_FrameReady);
|
||||
SDL_DestroyMutex(m_FrameLock);
|
||||
|
||||
if (m_HwContext != nullptr) {
|
||||
av_buffer_unref(&m_HwContext);
|
||||
@@ -201,45 +207,6 @@ public:
|
||||
}
|
||||
}}
|
||||
|
||||
void discardNextDrawable()
|
||||
{ @autoreleasepool {
|
||||
if (!m_NextDrawable) {
|
||||
return;
|
||||
}
|
||||
|
||||
[m_NextDrawable release];
|
||||
m_NextDrawable = nullptr;
|
||||
}}
|
||||
|
||||
virtual void waitToRender() override
|
||||
{ @autoreleasepool {
|
||||
if (!m_NextDrawable) {
|
||||
// Wait for the next available drawable before latching the frame to render
|
||||
m_NextDrawable = [[m_MetalLayer nextDrawable] retain];
|
||||
if (m_NextDrawable == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (m_MetalLayer.displaySyncEnabled) {
|
||||
// Pace ourselves by waiting if too many frames are pending presentation
|
||||
SDL_LockMutex(m_PresentationMutex);
|
||||
if (m_PendingPresentationCount > 2) {
|
||||
if (SDL_CondWaitTimeout(m_PresentationCond, m_PresentationMutex, 100) == SDL_MUTEX_TIMEDOUT) {
|
||||
SDL_LogWarn(SDL_LOG_CATEGORY_APPLICATION,
|
||||
"Presentation wait timed out after 100 ms");
|
||||
}
|
||||
}
|
||||
SDL_UnlockMutex(m_PresentationMutex);
|
||||
}
|
||||
}
|
||||
}}
|
||||
|
||||
virtual void cleanupRenderContext() override
|
||||
{
|
||||
// Free any unused drawable
|
||||
discardNextDrawable();
|
||||
}
|
||||
|
||||
bool updateVideoRegionSizeForFrame(AVFrame* frame)
|
||||
{
|
||||
int drawableWidth, drawableHeight;
|
||||
@@ -330,8 +297,8 @@ public:
|
||||
CGColorSpaceRef newColorSpace;
|
||||
ParamBuffer paramBuffer;
|
||||
|
||||
// Free any unpresented drawable since we're changing pixel formats
|
||||
discardNextDrawable();
|
||||
// Stop the display link before changing the Metal layer
|
||||
stopDisplayLink();
|
||||
|
||||
switch (colorspace) {
|
||||
case COLORSPACE_REC_709:
|
||||
@@ -490,31 +457,8 @@ public:
|
||||
}
|
||||
|
||||
// Caller frees frame after we return
|
||||
virtual void renderFrame(AVFrame* frame) override
|
||||
virtual void renderFrameIntoDrawable(AVFrame* frame, id<CAMetalDrawable> drawable)
|
||||
{ @autoreleasepool {
|
||||
// Handle changes to the frame's colorspace from last time we rendered
|
||||
if (!updateColorSpaceForFrame(frame)) {
|
||||
// Trigger the main thread to recreate the decoder
|
||||
SDL_Event event;
|
||||
event.type = SDL_RENDER_DEVICE_RESET;
|
||||
SDL_PushEvent(&event);
|
||||
return;
|
||||
}
|
||||
|
||||
// Handle changes to the video size or drawable size
|
||||
if (!updateVideoRegionSizeForFrame(frame)) {
|
||||
// Trigger the main thread to recreate the decoder
|
||||
SDL_Event event;
|
||||
event.type = SDL_RENDER_DEVICE_RESET;
|
||||
SDL_PushEvent(&event);
|
||||
return;
|
||||
}
|
||||
|
||||
// Don't proceed with rendering if we don't have a drawable
|
||||
if (m_NextDrawable == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::array<CVMetalTextureRef, MAX_VIDEO_PLANES> cvMetalTextures;
|
||||
size_t planes = getFramePlaneCount(frame);
|
||||
SDL_assert(planes <= MAX_VIDEO_PLANES);
|
||||
@@ -564,7 +508,7 @@ public:
|
||||
|
||||
// Prepare a render pass to render into the next drawable
|
||||
auto renderPassDescriptor = [MTLRenderPassDescriptor renderPassDescriptor];
|
||||
renderPassDescriptor.colorAttachments[0].texture = m_NextDrawable.texture;
|
||||
renderPassDescriptor.colorAttachments[0].texture = drawable.texture;
|
||||
renderPassDescriptor.colorAttachments[0].loadAction = MTLLoadActionClear;
|
||||
renderPassDescriptor.colorAttachments[0].clearColor = MTLClearColorMake(0.0, 0.0, 0.0, 0.0);
|
||||
renderPassDescriptor.colorAttachments[0].storeAction = MTLStoreActionStore;
|
||||
@@ -640,28 +584,65 @@ public:
|
||||
|
||||
[renderEncoder endEncoding];
|
||||
|
||||
if (m_MetalLayer.displaySyncEnabled) {
|
||||
// Queue a completion callback on the drawable to pace our rendering
|
||||
SDL_LockMutex(m_PresentationMutex);
|
||||
m_PendingPresentationCount++;
|
||||
SDL_UnlockMutex(m_PresentationMutex);
|
||||
[m_NextDrawable addPresentedHandler:^(id<MTLDrawable>) {
|
||||
SDL_LockMutex(m_PresentationMutex);
|
||||
m_PendingPresentationCount--;
|
||||
SDL_CondSignal(m_PresentationCond);
|
||||
SDL_UnlockMutex(m_PresentationMutex);
|
||||
}];
|
||||
}
|
||||
|
||||
// Flip to the newly rendered buffer
|
||||
[commandBuffer presentDrawable:m_NextDrawable];
|
||||
[commandBuffer presentDrawable:drawable];
|
||||
[commandBuffer commit];
|
||||
|
||||
// Wait for the command buffer to complete and free our CVMetalTextureCache references
|
||||
[commandBuffer waitUntilCompleted];
|
||||
}}
|
||||
|
||||
[m_NextDrawable release];
|
||||
m_NextDrawable = nullptr;
|
||||
// Caller frees frame after we return
|
||||
virtual void renderFrame(AVFrame* frame) override
|
||||
{ @autoreleasepool {
|
||||
// Handle changes to the frame's colorspace from last time we rendered
|
||||
if (!updateColorSpaceForFrame(frame)) {
|
||||
// Trigger the main thread to recreate the decoder
|
||||
SDL_Event event;
|
||||
event.type = SDL_RENDER_DEVICE_RESET;
|
||||
SDL_PushEvent(&event);
|
||||
return;
|
||||
}
|
||||
|
||||
// Handle changes to the video size or drawable size
|
||||
if (!updateVideoRegionSizeForFrame(frame)) {
|
||||
// Trigger the main thread to recreate the decoder
|
||||
SDL_Event event;
|
||||
event.type = SDL_RENDER_DEVICE_RESET;
|
||||
SDL_PushEvent(&event);
|
||||
return;
|
||||
}
|
||||
|
||||
// Start the display link if necessary
|
||||
startDisplayLink();
|
||||
|
||||
if (hasDisplayLink()) {
|
||||
// Move the buffers into a new AVFrame
|
||||
AVFrame* newFrame = av_frame_alloc();
|
||||
av_frame_move_ref(newFrame, frame);
|
||||
|
||||
// Replace any existing unrendered frame with this new one
|
||||
// and signal the CAMetalDisplayLink callback
|
||||
AVFrame* oldFrame = nullptr;
|
||||
SDL_LockMutex(m_FrameLock);
|
||||
if (m_LatestUnrenderedFrame != nullptr) {
|
||||
oldFrame = m_LatestUnrenderedFrame;
|
||||
}
|
||||
m_LatestUnrenderedFrame = newFrame;
|
||||
SDL_UnlockMutex(m_FrameLock);
|
||||
SDL_CondSignal(m_FrameReady);
|
||||
|
||||
av_frame_free(&oldFrame);
|
||||
}
|
||||
else {
|
||||
// Render to the next drawable right now when CAMetalDisplayLink is not in use
|
||||
id<CAMetalDrawable> drawable = [m_MetalLayer nextDrawable];
|
||||
if (drawable == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
renderFrameIntoDrawable(frame, drawable);
|
||||
}
|
||||
}}
|
||||
|
||||
id<MTLDevice> getMetalDevice() {
|
||||
@@ -706,6 +687,7 @@ public:
|
||||
int err;
|
||||
|
||||
m_Window = params->window;
|
||||
m_FrameRateRange = CAFrameRateRangeMake(params->frameRate, params->frameRate, params->frameRate);
|
||||
|
||||
id<MTLDevice> device = getMetalDevice();
|
||||
if (!device) {
|
||||
@@ -750,16 +732,6 @@ public:
|
||||
// Allow EDR content if we're streaming in a 10-bit format
|
||||
m_MetalLayer.wantsExtendedDynamicRangeContent = !!(params->videoFormat & VIDEO_FORMAT_MASK_10BIT);
|
||||
|
||||
// Ideally, we don't actually want triple buffering due to increased
|
||||
// display latency, since our render time is very short. However, we
|
||||
// *need* 3 drawables in order to hit the offloaded "direct" display
|
||||
// path for our Metal layer.
|
||||
//
|
||||
// If we only use 2 drawables, we'll be stuck in the composited path
|
||||
// (particularly for windowed mode) and our latency will actually be
|
||||
// higher than opting for triple buffering.
|
||||
m_MetalLayer.maximumDrawableCount = 3;
|
||||
|
||||
// Allow tearing if V-Sync is off (also requires direct display path)
|
||||
m_MetalLayer.displaySyncEnabled = params->enableVsync;
|
||||
|
||||
@@ -853,6 +825,44 @@ public:
|
||||
return true;
|
||||
}
|
||||
|
||||
void startDisplayLink()
|
||||
{
|
||||
if (@available(macOS 14, *)) {
|
||||
if (m_MetalDisplayLink != nullptr || !m_MetalLayer.displaySyncEnabled) {
|
||||
return;
|
||||
}
|
||||
|
||||
m_MetalDisplayLink = [[CAMetalDisplayLink alloc] initWithMetalLayer:m_MetalLayer];
|
||||
m_MetalDisplayLink.preferredFrameLatency = 1.0f;
|
||||
m_MetalDisplayLink.preferredFrameRateRange = m_FrameRateRange;
|
||||
m_MetalDisplayLink.delegate = [[DisplayLinkDelegate alloc] initWithRenderer:this];
|
||||
[m_MetalDisplayLink addToRunLoop:[NSRunLoop mainRunLoop] forMode:NSRunLoopCommonModes];
|
||||
}
|
||||
}
|
||||
|
||||
void stopDisplayLink()
|
||||
{
|
||||
if (@available(macOS 14, *)) {
|
||||
if (m_MetalDisplayLink == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
[m_MetalDisplayLink invalidate];
|
||||
m_MetalDisplayLink = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
bool hasDisplayLink()
|
||||
{
|
||||
if (@available(macOS 14, *)) {
|
||||
if (m_MetalDisplayLink != nullptr) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual bool needsTestFrame() override
|
||||
{
|
||||
// We used to trust VT to tell us whether decode will work, but
|
||||
@@ -926,11 +936,43 @@ public:
|
||||
return unhandledStateFlags == 0;
|
||||
}
|
||||
|
||||
void renderLatestFrameOnDrawable(id<CAMetalDrawable> drawable, CFTimeInterval targetTimestamp)
|
||||
{
|
||||
AVFrame* frame = nullptr;
|
||||
|
||||
// Determine how long we can wait depending on how long our CAMetalDisplayLink
|
||||
// says we have until the next frame needs to be rendered. We will wait up to
|
||||
// half the per-frame interval for a new frame to become available.
|
||||
int waitTimeMs = ((targetTimestamp - CACurrentMediaTime()) * 1000) / 2;
|
||||
if (waitTimeMs < 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Wait for a new frame to be ready
|
||||
SDL_LockMutex(m_FrameLock);
|
||||
if (m_LatestUnrenderedFrame != nullptr || SDL_CondWaitTimeout(m_FrameReady, m_FrameLock, waitTimeMs) == 0) {
|
||||
frame = m_LatestUnrenderedFrame;
|
||||
m_LatestUnrenderedFrame = nullptr;
|
||||
}
|
||||
SDL_UnlockMutex(m_FrameLock);
|
||||
|
||||
// Render a frame if we got one in time
|
||||
if (frame != nullptr) {
|
||||
renderFrameIntoDrawable(frame, drawable);
|
||||
av_frame_free(&frame);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
bool m_HwAccel;
|
||||
SDL_Window* m_Window;
|
||||
AVBufferRef* m_HwContext;
|
||||
CAMetalLayer* m_MetalLayer;
|
||||
CAMetalDisplayLink* m_MetalDisplayLink API_AVAILABLE(macos(14.0));
|
||||
CAFrameRateRange m_FrameRateRange;
|
||||
AVFrame* m_LatestUnrenderedFrame;
|
||||
SDL_mutex* m_FrameLock;
|
||||
SDL_cond* m_FrameReady;
|
||||
CVMetalTextureCacheRef m_TextureCache;
|
||||
id<MTLBuffer> m_CscParamsBuffer;
|
||||
id<MTLBuffer> m_VideoVertexBuffer;
|
||||
@@ -940,7 +982,6 @@ private:
|
||||
id<MTLRenderPipelineState> m_OverlayPipelineState;
|
||||
id<MTLLibrary> m_ShaderLibrary;
|
||||
id<MTLCommandQueue> m_CommandQueue;
|
||||
id<CAMetalDrawable> m_NextDrawable;
|
||||
id<MTLTexture> m_SwMappingTextures[MAX_VIDEO_PLANES];
|
||||
SDL_MetalView m_MetalView;
|
||||
int m_LastColorSpace;
|
||||
@@ -949,11 +990,24 @@ private:
|
||||
int m_LastFrameHeight;
|
||||
int m_LastDrawableWidth;
|
||||
int m_LastDrawableHeight;
|
||||
SDL_mutex* m_PresentationMutex;
|
||||
SDL_cond* m_PresentationCond;
|
||||
int m_PendingPresentationCount;
|
||||
};
|
||||
|
||||
@implementation DisplayLinkDelegate {
|
||||
VTMetalRenderer* _renderer;
|
||||
}
|
||||
|
||||
- (id)initWithRenderer:(VTMetalRenderer *)renderer {
|
||||
_renderer = renderer;
|
||||
return self;
|
||||
}
|
||||
|
||||
- (void)metalDisplayLink:(CAMetalDisplayLink *)link
|
||||
needsUpdate:(CAMetalDisplayLinkUpdate *)update API_AVAILABLE(macos(14.0)) {
|
||||
_renderer->renderLatestFrameOnDrawable(update.drawable, update.targetTimestamp);
|
||||
}
|
||||
|
||||
@end
|
||||
|
||||
IFFmpegRenderer* VTMetalRendererFactory::createRenderer(bool hwAccel) {
|
||||
return new VTMetalRenderer(hwAccel);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user