diff --git a/app/streaming/session.cpp b/app/streaming/session.cpp index 0c507d82..cffa8f0e 100644 --- a/app/streaming/session.cpp +++ b/app/streaming/session.cpp @@ -357,6 +357,13 @@ bool Session::populateDecoderProperties(SDL_Window* window) } m_VideoCallbacks.capabilities = decoder->getDecoderCapabilities(); + if (m_VideoCallbacks.capabilities & CAPABILITY_PULL_RENDERER) { + // It is an error to pass a push callback when in pull mode + m_VideoCallbacks.submitDecodeUnit = nullptr; + } + else { + m_VideoCallbacks.submitDecodeUnit = drSubmitDecodeUnit; + } m_StreamConfig.colorSpace = decoder->getDecoderColorspace(); @@ -439,7 +446,6 @@ bool Session::initialize() LiInitializeVideoCallbacks(&m_VideoCallbacks); m_VideoCallbacks.setup = drSetup; - m_VideoCallbacks.submitDecodeUnit = drSubmitDecodeUnit; LiInitializeStreamConfiguration(&m_StreamConfig); m_StreamConfig.width = m_Preferences->width; diff --git a/app/streaming/video/ffmpeg.cpp b/app/streaming/video/ffmpeg.cpp index 62dbac14..ac9ccd3a 100644 --- a/app/streaming/video/ffmpeg.cpp +++ b/app/streaming/video/ffmpeg.cpp @@ -46,8 +46,6 @@ #define FAILED_DECODES_RESET_THRESHOLD 20 -#define MAX_RECV_FRAME_RETRIES 100 - bool FFmpegVideoDecoder::isHardwareAccelerated() { return m_HwDecodeCfg != nullptr || @@ -72,6 +70,9 @@ int FFmpegVideoDecoder::getDecoderCapabilities() capabilities |= CAPABILITY_SLICES_PER_FRAME(slices); } + // We use our own decoder thread with the "pull" model + capabilities |= CAPABILITY_PULL_RENDERER; + return capabilities; } @@ -137,12 +138,14 @@ FFmpegVideoDecoder::FFmpegVideoDecoder(bool testOnly) m_VideoFormat(0), m_NeedsSpsFixup(false), m_TestOnly(testOnly), - m_CanRetryReceiveFrame(RRF_UNKNOWN) + m_DecoderThread(nullptr) { SDL_zero(m_ActiveWndVideoStats); SDL_zero(m_LastWndVideoStats); SDL_zero(m_GlobalVideoStats); + SDL_AtomicSet(&m_DecoderThreadShouldQuit, 0); + // Use linear filtering when renderer scaling is required SDL_SetHint(SDL_HINT_RENDER_SCALE_QUALITY, "1"); } @@ -167,6 +170,19 @@ IFFmpegRenderer* FFmpegVideoDecoder::getBackendRenderer() void FFmpegVideoDecoder::reset() { + // Terminate the decoder thread before doing anything else. + // It might be touching things we're about to free. + if (m_DecoderThread != nullptr) { + SDL_AtomicSet(&m_DecoderThreadShouldQuit, 1); + LiWakeWaitForVideoFrame(); + SDL_WaitThread(m_DecoderThread, NULL); + SDL_AtomicSet(&m_DecoderThreadShouldQuit, 0); + m_DecoderThread = nullptr; + } + + m_FramesIn = m_FramesOut = 0; + m_FrameInfoQueue.clear(); + delete m_Pacer; m_Pacer = nullptr; @@ -410,6 +426,13 @@ bool FFmpegVideoDecoder::completeInitialization(const AVCodec* decoder, PDECODER Session::get()->getOverlayManager().setOverlayRenderer(m_FrontendRenderer); } + m_DecoderThread = SDL_CreateThread(FFmpegVideoDecoder::decoderThreadProcThunk, "FFDecoder", (void*)this); + if (m_DecoderThread == nullptr) { + SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, + "Failed to create decoder thread: %s", SDL_GetError()); + return false; + } + return true; } @@ -924,14 +947,128 @@ void FFmpegVideoDecoder::writeBuffer(PLENTRY entry, int& offset) } } +int FFmpegVideoDecoder::decoderThreadProcThunk(void *context) +{ + ((FFmpegVideoDecoder*)context)->decoderThreadProc(); + return 0; +} + +void FFmpegVideoDecoder::decoderThreadProc() +{ + while (!SDL_AtomicGet(&m_DecoderThreadShouldQuit)) { + if (m_FramesIn == m_FramesOut) { + VIDEO_FRAME_HANDLE handle; + PDECODE_UNIT du; + + // Waiting for input. All output frames have been received. + // Block until we receive a new frame from the host. + if (!LiWaitForNextVideoFrame(&handle, &du)) { + // This might be a signal from the main thread to exit + continue; + } + + LiCompleteVideoFrame(handle, submitDecodeUnit(du)); + } + + if (m_FramesIn != m_FramesOut) { + SDL_assert(m_FramesIn > m_FramesOut); + + // We have output frames to receive. Let's poll until we get one, + // and submit new input data if/when we get it. + AVFrame* frame = av_frame_alloc(); + if (!frame) { + // Failed to allocate a frame but we did submit, + // so we can return DR_OK + SDL_LogWarn(SDL_LOG_CATEGORY_APPLICATION, + "Failed to allocate frame"); + continue; + } + + int err; + do { + err = avcodec_receive_frame(m_VideoDecoderCtx, frame); + if (err == 0) { + SDL_assert(m_FrameInfoQueue.size() == m_FramesIn - m_FramesOut); + m_FramesOut++; + + // Reset failed decodes count if we reached this far + m_ConsecutiveFailedDecodes = 0; + + // Restore default log level after a successful decode + av_log_set_level(AV_LOG_INFO); + + // Capture a frame timestamp to measuring pacing delay + frame->pkt_dts = SDL_GetTicks(); + + if (!m_FrameInfoQueue.isEmpty()) { + FrameInfoTuple infoTuple = m_FrameInfoQueue.dequeue(); + + // Count time in avcodec_send_packet() and avcodec_receive_frame() + // as time spent decoding. Also count time spent in the decode unit + // queue because that's directly caused by decoder latency. + m_ActiveWndVideoStats.totalDecodeTime += LiGetMillis() - infoTuple.enqueueTimeMs; + + // Store the presentation time + frame->pts = infoTuple.presentationTimeMs; + } + + m_ActiveWndVideoStats.decodedFrames++; + + // Queue the frame for rendering (or render now if pacer is disabled) + m_Pacer->submitFrame(frame); + } + else if (err == AVERROR(EAGAIN)) { + VIDEO_FRAME_HANDLE handle; + PDECODE_UNIT du; + + // No output data, so let's try to submit more input data, + // while we're waiting for this to frame to come back. + if (LiPollNextVideoFrame(&handle, &du)) { + // FIXME: Handle EAGAIN on avcodec_send_packet() properly? + LiCompleteVideoFrame(handle, submitDecodeUnit(du)); + } + else { + // No output data or input data. Let's wait a little bit. + SDL_Delay(2); + } + } + else { + char errorstring[512]; + av_strerror(err, errorstring, sizeof(errorstring)); + SDL_LogWarn(SDL_LOG_CATEGORY_APPLICATION, + "avcodec_receive_frame() failed: %s", errorstring); + + if (++m_ConsecutiveFailedDecodes == FAILED_DECODES_RESET_THRESHOLD) { + SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, + "Resetting decoder due to consistent failure"); + + SDL_Event event; + event.type = SDL_RENDER_DEVICE_RESET; + SDL_PushEvent(&event); + } + } + } while (err == AVERROR(EAGAIN) && !SDL_AtomicGet(&m_DecoderThreadShouldQuit)); + + if (err != 0) { + // Free the frame if we failed to submit it + av_frame_free(&frame); + } + } + } +} + int FFmpegVideoDecoder::submitDecodeUnit(PDECODE_UNIT du) { PLENTRY entry = du->bufferList; int err; - bool submittedFrame = false; SDL_assert(!m_TestOnly); + // Bail immediately if we need an IDR frame to continue + if (Session::get()->getAndClearPendingIdrFrameStatus()) { + return DR_NEED_IDR; + } + if (!m_LastFrameNumber) { m_ActiveWndVideoStats.measurementStartTimestamp = SDL_GetTicks(); m_LastFrameNumber = du->frameNumber; @@ -1016,115 +1153,10 @@ int FFmpegVideoDecoder::submitDecodeUnit(PDECODE_UNIT du) return DR_NEED_IDR; } + m_FrameInfoQueue.enqueue({.enqueueTimeMs = du->enqueueTimeMs, + .presentationTimeMs = du->presentationTimeMs}); + m_FramesIn++; - - // We can receive 0 or more frames after submission of a packet, so we must - // try to read until we get EAGAIN to ensure the queue is drained. Some decoders - // run asynchronously and may return several frames at once after warming up. - // - // Some decoders support calling avcodec_receive_frame() without queuing a packet. - // This allows us to drain excess frames and reduce latency. We will try to learn - // if a decoder is capable of this by trying it and seeing if it works. - int receiveRetries = 0; - do { - AVFrame* frame = av_frame_alloc(); - if (!frame) { - // Failed to allocate a frame but we did submit, - // so we can return DR_OK - SDL_LogWarn(SDL_LOG_CATEGORY_APPLICATION, - "Failed to allocate frame"); - return DR_OK; - } - - err = avcodec_receive_frame(m_VideoDecoderCtx, frame); - if (err == 0) { - m_FramesOut++; - - // Reset failed decodes count if we reached this far - m_ConsecutiveFailedDecodes = 0; - - // Restore default log level after a successful decode - av_log_set_level(AV_LOG_INFO); - - // Store the presentation time - // FIXME: This is wrong when reading a batch of frames - frame->pts = du->presentationTimeMs; - - // Capture a frame timestamp to measuring pacing delay - frame->pkt_dts = SDL_GetTicks(); - - // Count time in avcodec_send_packet() and avcodec_receive_frame() - // as time spent decoding. Also count time spent in the decode unit - // queue because that's directly caused by decoder latency. - m_ActiveWndVideoStats.totalDecodeTime += LiGetMillis() - du->enqueueTimeMs; - - // Also count the frame-to-frame delay if the decoder is delaying frames - // until a subsequent frame is submitted. - m_ActiveWndVideoStats.totalDecodeTime += (m_FramesIn - m_FramesOut) * (1000 / m_StreamFps); - - m_ActiveWndVideoStats.decodedFrames++; - - // Queue the frame for rendering (or render now if pacer is disabled) - m_Pacer->submitFrame(frame); - submittedFrame = true; - - // Once we receive a frame, transition out of the Unknown state by determining - // whether a receive frame retry was needed to get this frame. We assume that - // any asynchronous decoder is going to return EAGAIN on the first frame. - if (m_CanRetryReceiveFrame == RRF_UNKNOWN) { - SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION, "RRF mode: %s", receiveRetries > 0 ? "YES" : "NO"); - m_CanRetryReceiveFrame = receiveRetries > 0 ? RRF_YES : RRF_NO; - } - } - else { - av_frame_free(&frame); - - if (err == AVERROR(EAGAIN)) { - // Break out if we can't retry or we successfully received a frame. We only want - // to retry if we haven't gotten a frame back for this input packet. - if (m_CanRetryReceiveFrame == RRF_NO || receiveRetries == MAX_RECV_FRAME_RETRIES || submittedFrame) { - // We will transition from Unknown -> No if we exceed the maximum retries. - if (m_CanRetryReceiveFrame == RRF_UNKNOWN) { - SDL_assert(!submittedFrame); - SDL_assert(receiveRetries == MAX_RECV_FRAME_RETRIES); - - SDL_LogWarn(SDL_LOG_CATEGORY_APPLICATION, "RRF mode: NO (timeout)"); - m_CanRetryReceiveFrame = RRF_NO; - } - - break; - } - else { - SDL_Delay(1); - } - } - } - } while (err == 0 || (err == AVERROR(EAGAIN) && receiveRetries++ < MAX_RECV_FRAME_RETRIES)); - - // Treat this as a failed decode if we don't manage to receive a single frame or - // if we finish the loop above with an error other than EAGAIN. Note that some - // limited number of "failed decodes" with EAGAIN are expected for asynchronous - // decoders, so we only reset the decoder if we get a ton of them in a row. - if (!submittedFrame || err != AVERROR(EAGAIN)) { - // Don't spam EAGAIN log messages for asynchronous decoders as long as - // they produce a frame for at least every other submitted packet. - if (m_ConsecutiveFailedDecodes > 0 || err != AVERROR(EAGAIN)) { - char errorstring[512]; - av_strerror(err, errorstring, sizeof(errorstring)); - SDL_LogWarn(SDL_LOG_CATEGORY_APPLICATION, - "avcodec_receive_frame() failed: %s", errorstring); - } - - if (++m_ConsecutiveFailedDecodes == FAILED_DECODES_RESET_THRESHOLD) { - SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, - "Resetting decoder due to consistent failure"); - - SDL_Event event; - event.type = SDL_RENDER_DEVICE_RESET; - SDL_PushEvent(&event); - } - } - return DR_OK; } diff --git a/app/streaming/video/ffmpeg.h b/app/streaming/video/ffmpeg.h index a51990f3..89f19d4d 100644 --- a/app/streaming/video/ffmpeg.h +++ b/app/streaming/video/ffmpeg.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include "decoder.h" #include "ffmpeg-renderers/renderer.h" @@ -54,6 +55,10 @@ private: enum AVPixelFormat ffGetFormat(AVCodecContext* context, const enum AVPixelFormat* pixFmts); + void decoderThreadProc(); + + static int decoderThreadProcThunk(void* context); + AVPacket* m_Pkt; AVCodecContext* m_VideoDecoderCtx; QByteArray m_DecodeBuffer; @@ -74,11 +79,14 @@ private: int m_VideoFormat; bool m_NeedsSpsFixup; bool m_TestOnly; - enum { - RRF_UNKNOWN, - RRF_YES, - RRF_NO - } m_CanRetryReceiveFrame; + SDL_Thread* m_DecoderThread; + SDL_atomic_t m_DecoderThreadShouldQuit; + + typedef struct { + uint64_t enqueueTimeMs; + uint32_t presentationTimeMs; + } FrameInfoTuple; + QQueue m_FrameInfoQueue; static const uint8_t k_H264TestFrame[]; static const uint8_t k_HEVCMainTestFrame[]; diff --git a/moonlight-common-c/moonlight-common-c b/moonlight-common-c/moonlight-common-c index 6001ece0..921b59c4 160000 --- a/moonlight-common-c/moonlight-common-c +++ b/moonlight-common-c/moonlight-common-c @@ -1 +1 @@ -Subproject commit 6001ece0b8bfcea6a8122a3e56f48f515e1aaaf5 +Subproject commit 921b59c467ac78ef2a770ad1bb3e61fbef51bd09