Convert all time-based stats to be microsecond-based, improving accuracy of very fast events. Add bitrate average/peak.

This commit is contained in:
Andy Grundman
2024-09-13 13:56:10 -04:00
committed by Cameron Gutman
parent 1160f40655
commit 66b96e75af
11 changed files with 278 additions and 52 deletions

1
.gitignore vendored
View File

@@ -2,6 +2,7 @@
**/.vs/
.vscode/
build/
config.tests/*/.qmake.stash
config.tests/*/Makefile

View File

@@ -202,6 +202,7 @@ SOURCES += \
streaming/audio/renderers/sdlaud.cpp \
gui/computermodel.cpp \
gui/appmodel.cpp \
streaming/bandwidth.cpp \
streaming/streamutils.cpp \
backend/autoupdatechecker.cpp \
path.cpp \
@@ -239,6 +240,7 @@ HEADERS += \
gui/computermodel.h \
gui/appmodel.h \
streaming/video/decoder.h \
streaming/bandwidth.h \
streaming/streamutils.h \
backend/autoupdatechecker.h \
path.h \

104
app/streaming/bandwidth.cpp Normal file
View File

@@ -0,0 +1,104 @@
#include "bandwidth.h"
using namespace std::chrono;
BandwidthTracker::BandwidthTracker(uint32_t windowSeconds, uint32_t bucketIntervalMs)
: windowSeconds(seconds(windowSeconds)),
bucketIntervalMs(bucketIntervalMs)
{
if (bucketIntervalMs <= 0) {
bucketIntervalMs = 250;
}
bucketCount = (windowSeconds * 1000) / bucketIntervalMs;
buckets.resize(bucketCount);
}
// Add bytes recorded at the current time.
void BandwidthTracker::AddBytes(size_t bytes) {
std::lock_guard<std::mutex> lock(mtx);
auto now = steady_clock::now();
updateBucket(bytes, now);
}
// We don't want to average the entire window used for peak,
// so average only the newest 25% of complete buckets
double BandwidthTracker::GetAverageMbps() {
std::lock_guard<std::mutex> lock(mtx);
auto now = steady_clock::now();
auto ms = duration_cast<milliseconds>(now.time_since_epoch());
int currentIndex = (ms.count() / bucketIntervalMs) % bucketCount;
int maxBuckets = bucketCount / 4;
size_t totalBytes = 0;
steady_clock::time_point oldestBucket = now;
// Sum bytes from 25% most recent buckets as long as they are completed
for (int i = 0; i < maxBuckets; i++) {
int idx = (currentIndex - i + bucketCount) % bucketCount;
const Bucket &bucket = buckets[idx];
if (isValid(bucket, now) && (now - bucket.start >= milliseconds(bucketIntervalMs))) {
totalBytes += bucket.bytes;
if (bucket.start < oldestBucket) {
oldestBucket = bucket.start;
}
}
}
double elapsed = duration<double>(now - oldestBucket).count();
if (elapsed <= 0.0) {
return 0.0;
}
return totalBytes * 8.0 / 1000000.0 / elapsed;
}
double BandwidthTracker::GetPeakMbps() {
std::lock_guard<std::mutex> lock(mtx);
auto now = steady_clock::now();
double peak = 0.0;
for (const auto& bucket : buckets) {
if (isValid(bucket, now)) {
double throughput = getBucketMbps(bucket);
if (throughput > peak) {
peak = throughput;
}
}
}
return peak;
}
unsigned int BandwidthTracker::GetWindowSeconds() {
return windowSeconds.count();
}
/// private methods
inline double BandwidthTracker::getBucketMbps(const Bucket &bucket) const {
return bucket.bytes * 8.0 / 1000000.0 / (bucketIntervalMs / 1000.0);
}
// Check if a bucket's data is still valid (within the window)
inline bool BandwidthTracker::isValid(const Bucket &bucket, steady_clock::time_point now) const {
return (now - bucket.start) <= windowSeconds;
}
void BandwidthTracker::updateBucket(size_t bytes, steady_clock::time_point now) {
auto ms = duration_cast<milliseconds>(now.time_since_epoch()).count();
int bucketIndex = (ms / bucketIntervalMs) % bucketCount;
auto aligned_ms = ms - (ms % bucketIntervalMs);
auto bucketStart = steady_clock::time_point(milliseconds(aligned_ms));
Bucket &bucket = buckets[bucketIndex];
if (now - bucket.start > windowSeconds) {
bucket.bytes = 0;
bucket.start = bucketStart;
}
if (bucket.start != bucketStart) {
bucket.bytes = bytes;
bucket.start = bucketStart;
}
else {
bucket.bytes += bytes;
}
}

107
app/streaming/bandwidth.h Normal file
View File

@@ -0,0 +1,107 @@
#pragma once
#include <chrono>
#include <cstdint>
#include <mutex>
#include <vector>
/**
* @brief The BandwidthTracker class tracks network bandwidth usage over a sliding time window (default 10s).
*
* Byte totals are grouped into fixed time interval buckets (default 250ms). This provides an element of smoothing
* and deals well with spikes.
*
* GetAverageMbps() is calculated using the 25% most recent fully completed buckets. The default settings will
* return an average of the past 2.5s of data, ignoring the in-progress bucket. Using only 2.5s of data for the
* average provides a good balance of reactivity and smoothness.
*
* GetPeakMbps() returns the peak bandwidth seen during any one bucket interval across the full time window.
*
* All public methods are thread safe. A typical use case is calling AddBytes() in a data processing thread while
* calling GetAverageMbps() from a UI thread.
*
* Example usage:
* @code
* BandwidthTracker bwTracker(10, 250); // 10-second window, 250ms buckets
* bwTracker.AddBytes(64000);
* bwTracker.AddBytes(128000);
* double avg = bwTracker.GetAverageMbps();
* double peak = bwTracker.GetPeakMbps();
* @endcode
*/
class BandwidthTracker
{
public:
/**
* @brief Constructs a new BandwidthTracker object.
*
* Initializes the tracker to maintain statistics over a sliding window of time.
* The window is divided into buckets of fixed duration (bucketIntervalMs).
*
* @param windowSeconds The duration of the tracking window in seconds. Default is 10 seconds.
* @param bucketIntervalMs The interval for each bucket in milliseconds. Default is 250 ms.
*/
BandwidthTracker(std::uint32_t windowSeconds = 10, std::uint32_t bucketIntervalMs = 250);
/**
* @brief Record bytes that were received or sent.
*
* This method updates the corresponding bucket for the current time interval with the new data.
* It is thread-safe. Bytes are associated with the bucket for "now" and it is not possible to
* submit data for old buckets. This function should be called as needed at the time the bytes
* were received. Callers should not maintain their own byte totals.
*
* @param bytes The number of bytes to add.
*/
void AddBytes(size_t bytes);
/**
* @brief Computes and returns the average bandwidth in Mbps for the most recent 25% of buckets.
*
* @return The average bandwidth in megabits per second.
*/
double GetAverageMbps();
/**
* @brief Returns the peak bandwidth in Mbps observed in any single bucket within the current window.
*
* This value represents the highest instantaneous throughput measured over one bucket interval.
*
* @return The peak bandwidth in megabits per second.
*/
double GetPeakMbps();
/**
* @brief Retrieves the duration of the tracking window.
*
* This is useful when displaying the length of the peak, e.g.
* @code
* printf("Bitrate: %.1f Mbps Peak (%us): %.1f\n",
* bw.getAverageMbps(), bw.GetWindowSeconds(), bw.getPeakMbps());
* @endcode
*
* @return The window duration in seconds.
*/
unsigned int GetWindowSeconds();
private:
/**
* @brief A structure representing a single time bucket.
*
* Each bucket holds the start time of the interval and the total number of bytes recorded during that interval.
*/
struct Bucket {
std::chrono::steady_clock::time_point start{}; ///< The start time of the bucket's interval.
size_t bytes = 0; ///< The number of bytes recorded in this bucket.
};
const std::chrono::seconds windowSeconds; ///< T he duration of the tracking window.
const int bucketIntervalMs; ///< The duration of each bucket (in milliseconds).
std::uint32_t bucketCount; ///< The total number of buckets covering the window.
std::vector<Bucket> buckets; ///< Fixed-size circular buffer of buckets.
std::mutex mtx; ///< Mutex to ensure thread-safe access.
bool isValid(const Bucket &bucket, std::chrono::steady_clock::time_point now) const;
void updateBucket(size_t bytes, std::chrono::steady_clock::time_point now);
double getBucketMbps(const Bucket &bucket) const;
};

View File

@@ -220,7 +220,7 @@ bool StreamUtils::getNativeDesktopMode(int displayIndex, SDL_DisplayMode* mode,
CGDirectDisplayID displayIds[MAX_DISPLAYS];
uint32_t displayCount = 0;
CGGetActiveDisplayList(MAX_DISPLAYS, displayIds, &displayCount);
if (displayIndex >= displayCount) {
if (displayIndex >= (int)displayCount) {
return false;
}

View File

@@ -15,21 +15,22 @@ typedef struct _VIDEO_STATS {
uint32_t totalFrames;
uint32_t networkDroppedFrames;
uint32_t pacerDroppedFrames;
uint16_t minHostProcessingLatency;
uint16_t maxHostProcessingLatency;
uint32_t totalHostProcessingLatency;
uint32_t framesWithHostProcessingLatency;
uint32_t totalReassemblyTime;
uint32_t totalDecodeTime;
uint32_t totalPacerTime;
uint32_t totalRenderTime;
uint32_t lastRtt;
uint32_t lastRttVariance;
float totalFps;
float receivedFps;
float decodedFps;
float renderedFps;
uint32_t measurementStartTimestamp;
uint16_t minHostProcessingLatency; // low-res from RTP
uint16_t maxHostProcessingLatency; // low-res from RTP
uint32_t totalHostProcessingLatency; // low-res from RTP
uint32_t framesWithHostProcessingLatency; // low-res from RTP
uint64_t totalReassemblyTimeUs; // high-res (1us)
uint64_t totalDecodeTimeUs; // high-res (1us)
uint64_t totalPacerTimeUs; // high-res (1us)
uint64_t totalRenderTimeUs; // high-res (1us)
uint32_t lastRtt; // low-res from enet (1ms)
uint32_t lastRttVariance; // low-res from enet (1ms)
double totalFps; // high-res
double receivedFps; // high-res
double decodedFps; // high-res
double renderedFps; // high-res
double videoMegabitsPerSec; // current video bitrate in Mbps, not including FEC overhead
uint64_t measurementStartUs; // microseconds
} VIDEO_STATS, *PVIDEO_STATS;
typedef struct _DECODER_PARAMETERS {

View File

@@ -333,14 +333,14 @@ void Pacer::signalVsync()
void Pacer::renderFrame(AVFrame* frame)
{
// Count time spent in Pacer's queues
Uint32 beforeRender = SDL_GetTicks();
m_VideoStats->totalPacerTime += beforeRender - frame->pkt_dts;
uint64_t beforeRender = LiGetMicroseconds();
m_VideoStats->totalPacerTimeUs += (beforeRender - (uint64_t)frame->pkt_dts);
// Render it
m_VsyncRenderer->renderFrame(frame);
Uint32 afterRender = SDL_GetTicks();
uint64_t afterRender = LiGetMicroseconds();
m_VideoStats->totalRenderTime += afterRender - beforeRender;
m_VideoStats->totalRenderTimeUs += (afterRender - beforeRender);
m_VideoStats->renderedFrames++;
av_frame_free(&frame);

View File

@@ -227,6 +227,7 @@ FFmpegVideoDecoder::FFmpegVideoDecoder(bool testOnly)
m_FrontendRenderer(nullptr),
m_ConsecutiveFailedDecodes(0),
m_Pacer(nullptr),
m_BwTracker(10, 250),
m_FramesIn(0),
m_FramesOut(0),
m_LastFrameNumber(0),
@@ -520,6 +521,8 @@ bool FFmpegVideoDecoder::completeInitialization(const AVCodec* decoder, enum AVP
m_VideoDecoderCtx->width = params->width;
m_VideoDecoderCtx->height = params->height;
m_VideoDecoderCtx->get_format = ffGetFormat;
m_VideoDecoderCtx->pkt_timebase.num = 1;
m_VideoDecoderCtx->pkt_timebase.den = 90000;
// For non-hwaccel decoders, set the pix_fmt to hint to the decoder which
// format should be used. This is necessary for certain decoders like the
@@ -713,10 +716,10 @@ void FFmpegVideoDecoder::addVideoStats(VIDEO_STATS& src, VIDEO_STATS& dst)
dst.totalFrames += src.totalFrames;
dst.networkDroppedFrames += src.networkDroppedFrames;
dst.pacerDroppedFrames += src.pacerDroppedFrames;
dst.totalReassemblyTime += src.totalReassemblyTime;
dst.totalDecodeTime += src.totalDecodeTime;
dst.totalPacerTime += src.totalPacerTime;
dst.totalRenderTime += src.totalRenderTime;
dst.totalReassemblyTimeUs += src.totalReassemblyTimeUs;
dst.totalDecodeTimeUs += src.totalDecodeTimeUs;
dst.totalPacerTimeUs += src.totalPacerTimeUs;
dst.totalRenderTimeUs += src.totalRenderTimeUs;
if (dst.minHostProcessingLatency == 0) {
dst.minHostProcessingLatency = src.minHostProcessingLatency;
@@ -738,20 +741,19 @@ void FFmpegVideoDecoder::addVideoStats(VIDEO_STATS& src, VIDEO_STATS& dst)
SDL_assert(dst.lastRtt > 0);
}
Uint32 now = SDL_GetTicks();
// Initialize the measurement start point if this is the first video stat window
if (!dst.measurementStartTimestamp) {
dst.measurementStartTimestamp = src.measurementStartTimestamp;
if (!dst.measurementStartUs) {
dst.measurementStartUs = src.measurementStartUs;
}
// The following code assumes the global measure was already started first
SDL_assert(dst.measurementStartTimestamp <= src.measurementStartTimestamp);
SDL_assert(dst.measurementStartUs <= src.measurementStartUs);
dst.totalFps = (float)dst.totalFrames / ((float)(now - dst.measurementStartTimestamp) / 1000);
dst.receivedFps = (float)dst.receivedFrames / ((float)(now - dst.measurementStartTimestamp) / 1000);
dst.decodedFps = (float)dst.decodedFrames / ((float)(now - dst.measurementStartTimestamp) / 1000);
dst.renderedFps = (float)dst.renderedFrames / ((float)(now - dst.measurementStartTimestamp) / 1000);
double timeDiffSecs = (double)(LiGetMicroseconds() - dst.measurementStartUs) / 1000000.0;
dst.totalFps = (double)dst.totalFrames / timeDiffSecs;
dst.receivedFps = (double)dst.receivedFrames / timeDiffSecs;
dst.decodedFps = (double)dst.decodedFrames / timeDiffSecs;
dst.renderedFps = (double)dst.renderedFrames / timeDiffSecs;
}
void FFmpegVideoDecoder::stringifyVideoStats(VIDEO_STATS& stats, char* output, int length)
@@ -833,13 +835,20 @@ void FFmpegVideoDecoder::stringifyVideoStats(VIDEO_STATS& stats, char* output, i
if (stats.receivedFps > 0) {
if (m_VideoDecoderCtx != nullptr) {
double avgVideoMbps = m_BwTracker.GetAverageMbps();
double peakVideoMbps = m_BwTracker.GetPeakMbps();
ret = snprintf(&output[offset],
length - offset,
"Video stream: %dx%d %.2f FPS (Codec: %s)\n",
"Video stream: %dx%d %.2f FPS (Codec: %s)\n"
"Bitrate: %.1f Mbps, Peak (%us): %.1f\n",
m_VideoDecoderCtx->width,
m_VideoDecoderCtx->height,
stats.totalFps,
codecString);
codecString,
avgVideoMbps,
m_BwTracker.GetWindowSeconds(),
peakVideoMbps);
if (ret < 0 || ret >= length - offset) {
SDL_assert(false);
return;
@@ -900,9 +909,9 @@ void FFmpegVideoDecoder::stringifyVideoStats(VIDEO_STATS& stats, char* output, i
(float)stats.networkDroppedFrames / stats.totalFrames * 100,
(float)stats.pacerDroppedFrames / stats.decodedFrames * 100,
rttString,
(float)stats.totalDecodeTime / stats.decodedFrames,
(float)stats.totalPacerTime / stats.renderedFrames,
(float)stats.totalRenderTime / stats.renderedFrames);
(double)(stats.totalDecodeTimeUs / 1000.0) / stats.decodedFrames,
(double)(stats.totalPacerTimeUs / 1000.0) / stats.renderedFrames,
(double)(stats.totalRenderTimeUs / 1000.0) / stats.renderedFrames);
if (ret < 0 || ret >= length - offset) {
SDL_assert(false);
return;
@@ -919,10 +928,8 @@ void FFmpegVideoDecoder::logVideoStats(VIDEO_STATS& stats, const char* title)
stringifyVideoStats(stats, videoStatsStr, sizeof(videoStatsStr));
SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION,
"%s", title);
SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION,
"----------------------------------------------------------\n%s",
videoStatsStr);
"\n%s\n------------------\n%s",
title, videoStatsStr);
}
}
@@ -1744,7 +1751,7 @@ void FFmpegVideoDecoder::decoderThreadProc()
av_log_set_level(AV_LOG_INFO);
// Capture a frame timestamp to measuring pacing delay
frame->pkt_dts = SDL_GetTicks();
frame->pkt_dts = LiGetMicroseconds();
if (!m_FrameInfoQueue.isEmpty()) {
// Data buffers in the DU are not valid here!
@@ -1753,10 +1760,10 @@ void FFmpegVideoDecoder::decoderThreadProc()
// Count time in avcodec_send_packet() and avcodec_receive_frame()
// as time spent decoding. Also count time spent in the decode unit
// queue because that's directly caused by decoder latency.
m_ActiveWndVideoStats.totalDecodeTime += LiGetMillis() - du.enqueueTimeMs;
m_ActiveWndVideoStats.totalDecodeTimeUs += (LiGetMicroseconds() - du.enqueueTimeUs);
// Store the presentation time
frame->pts = du.presentationTimeMs;
// Store the presentation time (90 kHz timebase)
frame->pts = (int64_t)du.rtpTimestamp;
}
m_ActiveWndVideoStats.decodedFrames++;
@@ -1829,7 +1836,7 @@ int FFmpegVideoDecoder::submitDecodeUnit(PDECODE_UNIT du)
}
if (!m_LastFrameNumber) {
m_ActiveWndVideoStats.measurementStartTimestamp = SDL_GetTicks();
m_ActiveWndVideoStats.measurementStartUs = LiGetMicroseconds();
m_LastFrameNumber = du->frameNumber;
}
else {
@@ -1839,8 +1846,10 @@ int FFmpegVideoDecoder::submitDecodeUnit(PDECODE_UNIT du)
m_LastFrameNumber = du->frameNumber;
}
m_BwTracker.AddBytes(du->fullLength);
// Flip stats windows roughly every second
if (SDL_TICKS_PASSED(SDL_GetTicks(), m_ActiveWndVideoStats.measurementStartTimestamp + 1000)) {
if (LiGetMicroseconds() > m_ActiveWndVideoStats.measurementStartUs + 1000000) {
// Update overlay stats if it's enabled
if (Session::get()->getOverlayManager().isOverlayEnabled(Overlay::OverlayDebug)) {
VIDEO_STATS lastTwoWndStats = {};
@@ -1859,7 +1868,7 @@ int FFmpegVideoDecoder::submitDecodeUnit(PDECODE_UNIT du)
// Move this window into the last window slot and clear it for next window
SDL_memcpy(&m_LastWndVideoStats, &m_ActiveWndVideoStats, sizeof(m_ActiveWndVideoStats));
SDL_zero(m_ActiveWndVideoStats);
m_ActiveWndVideoStats.measurementStartTimestamp = SDL_GetTicks();
m_ActiveWndVideoStats.measurementStartUs = LiGetMicroseconds();
}
if (du->frameHostProcessingLatency != 0) {
@@ -1902,7 +1911,7 @@ int FFmpegVideoDecoder::submitDecodeUnit(PDECODE_UNIT du)
m_Pkt->flags = 0;
}
m_ActiveWndVideoStats.totalReassemblyTime += du->enqueueTimeMs - du->receiveTimeMs;
m_ActiveWndVideoStats.totalReassemblyTimeUs += (du->enqueueTimeUs - du->receiveTimeUs);
err = avcodec_send_packet(m_VideoDecoderCtx, m_Pkt);
if (err < 0) {

View File

@@ -4,6 +4,7 @@
#include <QQueue>
#include <set>
#include "../bandwidth.h"
#include "decoder.h"
#include "ffmpeg-renderers/renderer.h"
#include "ffmpeg-renderers/pacer/pacer.h"
@@ -99,6 +100,7 @@ private:
IFFmpegRenderer* m_FrontendRenderer;
int m_ConsecutiveFailedDecodes;
Pacer* m_Pacer;
BandwidthTracker m_BwTracker;
VIDEO_STATS m_ActiveWndVideoStats;
VIDEO_STATS m_LastWndVideoStats;
VIDEO_STATS m_GlobalVideoStats;

View File

@@ -46,7 +46,7 @@ private:
bool enabled;
int fontSize;
SDL_Color color;
char text[512];
char text[1024];
TTF_Font* font;
SDL_Surface* surface;