From 3ce2d6825095c79a2d46e0e2a13bc9722d8ed447 Mon Sep 17 00:00:00 2001 From: Cameron Gutman Date: Thu, 18 Feb 2016 12:27:12 -0500 Subject: [PATCH] Use a wait-free algorithm for audio queueing to minimize audio hiccups. This closes #8 --- auddec.cpp | 142 +++++++++++++++++------------------------------------ 1 file changed, 44 insertions(+), 98 deletions(-) diff --git a/auddec.cpp b/auddec.cpp index 467ec02..0c19f14 100644 --- a/auddec.cpp +++ b/auddec.cpp @@ -1,138 +1,84 @@ #include "moonlight.hpp" -#define MAX_CHANNEL_COUNT 6 +#define MAX_CHANNEL_COUNT 2 #define FRAME_SIZE 240 -typedef struct decoded_sample_entry { - struct decoded_sample_entry *next; - int sampleLength; - short sampleBuffer[1]; -} decoded_sample_entry_t; +#define CIRCULAR_BUFFER_SIZE 32 -#define MAX_QUEUE_LENGTH 14 -#define QUEUE_PRUNING_LENGTH 7 +// This code uses volatiles for synchronization between the producer and consumer side. This is +// only safe because this code executes under very specific conditions, namely that the framework +// ensures AudioPlayerSampleCallback and AudDecDecodeAndPlaySample are each only active on one thread +// at a time. -static int s_OpusChannelCount; -static decoded_sample_entry_t* s_SampleQueueHead; -static decoded_sample_entry_t* s_SampleQueueTail; -static int s_SampleQueueLength; -static pthread_mutex_t s_SampleQueueLock; - -static void ReapSampleQueue() { - decoded_sample_entry_t *entry; - - while (s_SampleQueueHead) { - entry = s_SampleQueueHead->next; - free(s_SampleQueueHead); - s_SampleQueueHead = entry; - } - - s_SampleQueueTail = NULL; - - s_SampleQueueLength = 0; -} +static short s_CircularBuffer[CIRCULAR_BUFFER_SIZE][FRAME_SIZE * MAX_CHANNEL_COUNT]; +static int s_ReadIndex = 0; +static int s_WriteIndex = 0; static void AudioPlayerSampleCallback(void* samples, uint32_t buffer_size, void* data) { - unsigned char* buffer = (unsigned char *)samples; - int offset = 0; - - pthread_mutex_lock(&s_SampleQueueLock); - - while (s_SampleQueueHead && s_SampleQueueHead->sampleLength <= buffer_size - offset) { - decoded_sample_entry_t* lastEnt; + // It should only ask us for complete buffers + assert(buffer_size == FRAME_SIZE * MAX_CHANNEL_COUNT * sizeof(short)); - memcpy(&buffer[offset], s_SampleQueueHead->sampleBuffer, s_SampleQueueHead->sampleLength); - offset += s_SampleQueueHead->sampleLength; + // If the indexes aren't equal, we have a sample + if (s_WriteIndex != s_ReadIndex) { + memcpy(samples, s_CircularBuffer[s_ReadIndex], buffer_size); - lastEnt = s_SampleQueueHead; - s_SampleQueueHead = s_SampleQueueHead->next; - free(lastEnt); - s_SampleQueueLength--; + // Use a full memory barrier to ensure the circular buffer is read before incrementing the index + __sync_synchronize(); - // Remove another sample if we're in pruning mode - if (s_SampleQueueLength > QUEUE_PRUNING_LENGTH) { - lastEnt = s_SampleQueueHead; - s_SampleQueueHead = s_SampleQueueHead->next; - free(lastEnt); - s_SampleQueueLength--; - } + // This can race with the reader in the AudDecDecodeAndPlaySample function. This is + // not a problem because at worst, it just won't see that we've consumed this sample yet. + s_ReadIndex = (s_ReadIndex + 1) % CIRCULAR_BUFFER_SIZE; } - - if (!s_SampleQueueHead) { - s_SampleQueueTail = NULL; - } - - pthread_mutex_unlock(&s_SampleQueueLock); - - // Zero the remaining portion of the sample buffer to reduce noise when underflowing - if (buffer_size != offset) { - memset(&buffer[offset], 0, buffer_size - offset); + else { + memset(samples, 0, buffer_size); } } void MoonlightInstance::AudDecInit(int audioConfiguration, POPUS_MULTISTREAM_CONFIGURATION opusConfig) { int rc; - pthread_mutex_init(&s_SampleQueueLock, NULL); - - s_OpusChannelCount = opusConfig->channelCount; g_Instance->m_OpusDecoder = opus_multistream_decoder_create(opusConfig->sampleRate, opusConfig->channelCount, opusConfig->streams, opusConfig->coupledStreams, opusConfig->mapping, &rc); - - pp::AudioConfig audioConfig = pp::AudioConfig(g_Instance, PP_AUDIOSAMPLERATE_48000, FRAME_SIZE * 3); - g_Instance->m_AudioPlayer = pp::Audio(g_Instance, audioConfig, AudioPlayerSampleCallback, NULL); + g_Instance->m_AudioPlayer = pp::Audio(g_Instance, pp::AudioConfig(g_Instance, PP_AUDIOSAMPLERATE_48000, FRAME_SIZE), + AudioPlayerSampleCallback, NULL); // Start playback now g_Instance->m_AudioPlayer.StartPlayback(); } -void MoonlightInstance::AudDecCleanup(void) { - pthread_mutex_destroy(&s_SampleQueueLock); +void MoonlightInstance::AudDecCleanup(void) { + // Stop playback + g_Instance->m_AudioPlayer.StopPlayback(); if (g_Instance->m_OpusDecoder) { opus_multistream_decoder_destroy(g_Instance->m_OpusDecoder); } - - ReapSampleQueue(); } void MoonlightInstance::AudDecDecodeAndPlaySample(char* sampleData, int sampleLength) { - decoded_sample_entry_t* entry = (decoded_sample_entry_t*)malloc(sizeof(decoded_sample_entry_t) + - (s_OpusChannelCount * FRAME_SIZE * sizeof(short))); - if (entry) { - int decodeLen = opus_multistream_decode(g_Instance->m_OpusDecoder, (unsigned char *)sampleData, sampleLength, - entry->sampleBuffer, FRAME_SIZE, 0); - if (decodeLen > 0) { - entry->sampleLength = decodeLen * s_OpusChannelCount * sizeof(short); - entry->next = NULL; - - pthread_mutex_lock(&s_SampleQueueLock); - - if (s_SampleQueueLength == MAX_QUEUE_LENGTH) { - printf("Reaped sample queue\n"); - ReapSampleQueue(); - } - - if (!s_SampleQueueTail) { - s_SampleQueueHead = s_SampleQueueTail = entry; - } - else { - s_SampleQueueTail->next = entry; - s_SampleQueueTail = entry; - } - - s_SampleQueueLength++; - - pthread_mutex_unlock(&s_SampleQueueLock); - } - else { - free(entry); - } + int decodeLen; + + // Check if there is space for this sample in the buffer. Again, this can race + // but in the worst case, we'll not see the sample callback having consumed a sample. + if (((s_WriteIndex + 1) % CIRCULAR_BUFFER_SIZE) == s_ReadIndex) { + return; + } + + decodeLen = opus_multistream_decode(g_Instance->m_OpusDecoder, (unsigned char *)sampleData, sampleLength, + s_CircularBuffer[s_WriteIndex], FRAME_SIZE, 0); + if (decodeLen > 0) { + // Use a full memory barrier to ensure the circular buffer is written before incrementing the index + __sync_synchronize(); + + // This can race with the reader in the sample callback, however this is a benign + // race since we'll either read the original value of s_WriteIndex (which is safe, + // we just won't consider this sample) or the new value of s_WriteIndex + s_WriteIndex = (s_WriteIndex + 1) % CIRCULAR_BUFFER_SIZE; } }