diff --git a/README.md b/README.md index e316878c..c2876a7a 100644 --- a/README.md +++ b/README.md @@ -53,8 +53,8 @@ Hosting for Moonlight's Debian and L4T package repositories is graciously provid * Qt 5.9 SDK or later. Qt 6 is also supported. * GCC or Clang * Install the required packages: - * Debian/Ubuntu: `libegl1-mesa-dev libgl1-mesa-dev libopus-dev libqt5svg5-dev libsdl2-dev libsdl2-ttf-dev libssl-dev libavcodec-dev libva-dev libvdpau-dev libxkbcommon-dev qtwayland5 qt5-qmake qtbase5-dev qtdeclarative5-dev qtquickcontrols2-5-dev wayland-protocols qml-module-qtquick-controls2 qml-module-qtquick-layouts qml-module-qtquick-window2 qml-module-qtquick2` - * RedHat/Fedora: `openssl-devel SDL2-devel SDL2_ttf-devel ffmpeg-devel qt5-qtsvg-devel qt5-qtquickcontrols2-devel libva-devel libvdpau-devel opus-devel pulseaudio-libs-devel alsa-lib-devel` + * Debian/Ubuntu: `libegl1-mesa-dev libgl1-mesa-dev libopus-dev libqt5svg5-dev libsdl2-dev libsdl2-ttf-dev libplacebo-dev libssl-dev libavcodec-dev libva-dev libvdpau-dev libxkbcommon-dev qtwayland5 qt5-qmake qtbase5-dev qtdeclarative5-dev qtquickcontrols2-5-dev wayland-protocols qml-module-qtquick-controls2 qml-module-qtquick-layouts qml-module-qtquick-window2 qml-module-qtquick2` + * RedHat/Fedora: `openssl-devel SDL2-devel SDL2_ttf-devel libplacebo-devel ffmpeg-devel qt5-qtsvg-devel qt5-qtquickcontrols2-devel libva-devel libvdpau-devel opus-devel pulseaudio-libs-devel alsa-lib-devel` * FFmpeg 4.0 or later is required to build. If your distro doesn't package FFmpeg 4.0 or later, you can build and install it from source on https://ffmpeg.org/ ### Steam Link Build Requirements @@ -75,7 +75,7 @@ Hosting for Moonlight's Debian and L4T package repositories is graciously provid * To build from the command line for development use on macOS or Linux, run `qmake moonlight-qt.pro` then `make debug` or `make release` * To create an embedded build for a single-purpose device, use `qmake "CONFIG+=embedded" moonlight-qt.pro` and build normally. * This build will lack windowed mode, Discord/Help links, and other features that don't make sense on an embedded device. - * For platforms with poor GL performance, add `"CONFIG+=glslow"` to prefer direct KMSDRM rendering over EGL/GLES renderers. Direct KMSDRM rendering can use dedicated YUV/RGB conversion and scaling hardware rather than slower GPU shaders for these operations. + * For platforms with poor GPU performance, add `"CONFIG+=gpuslow"` to prefer direct KMSDRM rendering over GL/Vulkan renderers. Direct KMSDRM rendering can use dedicated YUV/RGB conversion and scaling hardware rather than slower GPU shaders for these operations. ## Contribute 1. Fork us diff --git a/app/app.pro b/app/app.pro index 91eb5fea..dbaf9d29 100644 --- a/app/app.pro +++ b/app/app.pro @@ -120,6 +120,13 @@ unix:!macx { CONFIG += cuda } } + + !disable-libplacebo { + packagesExist(libplacebo) { + PKGCONFIG += libplacebo + CONFIG += libplacebo + } + } } !disable-wayland { @@ -291,7 +298,7 @@ mmal { # significantly better performance than EGL on the Pi. Setting # this option allows EGL usage even if built with MMAL support. # - # It is highly recommended to also build with 'glslow' to avoid + # It is highly recommended to also build with 'gpuslow' to avoid # EGL being preferred if direct DRM rendering is available. allow-egl-with-mmal { message(Allowing EGL usage with MMAL enabled) @@ -322,6 +329,16 @@ cuda { # ffnvcodec uses libdl in cuda_load_functions()/cuda_free_functions() LIBS += -ldl } +libplacebo { + message(Vulkan support enabled via libplacebo) + + DEFINES += HAVE_LIBPLACEBO_VULKAN + SOURCES += \ + streaming/video/ffmpeg-renderers/plvk.cpp \ + streaming/video/ffmpeg-renderers/plvk_c.c + HEADERS += \ + streaming/video/ffmpeg-renderers/plvk.h +} config_EGL { message(EGL renderer selected) @@ -396,6 +413,16 @@ glslow { DEFINES += GL_IS_SLOW } +vkslow { + message(Vulkan slow build) + + DEFINES += VULKAN_IS_SLOW +} +gpuslow { + message(GPU slow build) + + DEFINES += GL_IS_SLOW VULKAN_IS_SLOW +} wayland { message(Wayland extensions enabled) diff --git a/app/streaming/streamutils.cpp b/app/streaming/streamutils.cpp index a178480a..15a38980 100644 --- a/app/streaming/streamutils.cpp +++ b/app/streaming/streamutils.cpp @@ -8,8 +8,11 @@ Uint32 StreamUtils::getPlatformWindowFlags() { -#ifdef Q_OS_DARWIN +#if defined(Q_OS_DARWIN) return SDL_WINDOW_METAL; +#elif defined(HAVE_LIBPLACEBO_VULKAN) + // We'll fall back to GL if Vulkan fails + return SDL_WINDOW_VULKAN; #else return 0; #endif diff --git a/app/streaming/video/ffmpeg-renderers/eglvid.cpp b/app/streaming/video/ffmpeg-renderers/eglvid.cpp index 90b7d390..db71d170 100644 --- a/app/streaming/video/ffmpeg-renderers/eglvid.cpp +++ b/app/streaming/video/ffmpeg-renderers/eglvid.cpp @@ -452,13 +452,19 @@ bool EGLRenderer::initialize(PDECODER_PARAMETERS params) return false; } + // This renderer doesn't support HDR, so pick a different one. + // HACK: This avoids a deadlock in SDL_CreateRenderer() if + // Vulkan was used before and SDL is trying to load EGL. + if (params->videoFormat & VIDEO_FORMAT_MASK_10BIT) { + EGL_LOG(Info, "EGL doesn't support HDR rendering"); + return false; + } + // HACK: Work around bug where renderer will repeatedly fail with: // SDL_CreateRenderer() failed: Could not create GLES window surface // Don't retry if we've already failed to create a renderer for this // window *unless* the format has changed from 10-bit to 8-bit. - if (m_Window == s_LastFailedWindow && - !!(params->videoFormat & VIDEO_FORMAT_MASK_10BIT) == - !!(s_LastFailedVideoFormat & VIDEO_FORMAT_MASK_10BIT)) { + if (m_Window == s_LastFailedWindow) { EGL_LOG(Error, "SDL_CreateRenderer() already failed on this window!"); return false; } diff --git a/app/streaming/video/ffmpeg-renderers/plvk.cpp b/app/streaming/video/ffmpeg-renderers/plvk.cpp new file mode 100644 index 00000000..0f90cbda --- /dev/null +++ b/app/streaming/video/ffmpeg-renderers/plvk.cpp @@ -0,0 +1,671 @@ +#include "plvk.h" + +#include "streaming/session.h" +#include "streaming/streamutils.h" + +// Implementation in plvk_c.c +#define PL_LIBAV_IMPLEMENTATION 0 +#include + +#include + +#include + +#include + +// Keep these in sync with hwcontext_vulkan.c +static const char *k_OptionalDeviceExtensions[] = { + /* Misc or required by other extensions */ + //VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME, + VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, + VK_KHR_SAMPLER_YCBCR_CONVERSION_EXTENSION_NAME, + VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME, + VK_EXT_PHYSICAL_DEVICE_DRM_EXTENSION_NAME, + VK_EXT_SHADER_ATOMIC_FLOAT_EXTENSION_NAME, + VK_KHR_COOPERATIVE_MATRIX_EXTENSION_NAME, + + /* Imports/exports */ + VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, + VK_EXT_EXTERNAL_MEMORY_DMA_BUF_EXTENSION_NAME, + VK_EXT_IMAGE_DRM_FORMAT_MODIFIER_EXTENSION_NAME, + VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME, + VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME, +#ifdef _WIN32 + VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, + VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME, +#endif + + /* Video encoding/decoding */ + VK_KHR_VIDEO_QUEUE_EXTENSION_NAME, + VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME, + VK_KHR_VIDEO_DECODE_H264_EXTENSION_NAME, + VK_KHR_VIDEO_DECODE_H265_EXTENSION_NAME, + "VK_MESA_video_decode_av1", +}; + +static void pl_log_cb(void*, enum pl_log_level level, const char *msg) +{ + switch (level) { + case PL_LOG_FATAL: + SDL_LogCritical(SDL_LOG_CATEGORY_APPLICATION, "%s", msg); + break; + case PL_LOG_ERR: + SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "%s", msg); + break; + case PL_LOG_WARN: + SDL_LogWarn(SDL_LOG_CATEGORY_APPLICATION, "%s", msg); + break; + case PL_LOG_INFO: + SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION, "%s", msg); + break; + case PL_LOG_DEBUG: + SDL_LogDebug(SDL_LOG_CATEGORY_APPLICATION, "%s", msg); + break; + case PL_LOG_NONE: + case PL_LOG_TRACE: + SDL_LogVerbose(SDL_LOG_CATEGORY_APPLICATION, "%s", msg); + break; + } +} + +void PlVkRenderer::lockQueue(struct AVHWDeviceContext *dev_ctx, uint32_t queue_family, uint32_t index) +{ + auto me = (PlVkRenderer*)dev_ctx->user_opaque; + me->m_Vulkan->lock_queue(me->m_Vulkan, queue_family, index); +} + +void PlVkRenderer::unlockQueue(struct AVHWDeviceContext *dev_ctx, uint32_t queue_family, uint32_t index) +{ + auto me = (PlVkRenderer*)dev_ctx->user_opaque; + me->m_Vulkan->unlock_queue(me->m_Vulkan, queue_family, index); +} + +void PlVkRenderer::overlayUploadComplete(void* opaque) +{ + SDL_FreeSurface((SDL_Surface*)opaque); +} + +PlVkRenderer::PlVkRenderer(IFFmpegRenderer* backendRenderer) : + m_Backend(backendRenderer) +{ + pl_log_params logParams = pl_log_default_params; + logParams.log_cb = pl_log_cb; + logParams.log_level = PL_LOG_DEBUG; + m_Log = pl_log_create(PL_API_VER, &logParams); +} + +PlVkRenderer::~PlVkRenderer() +{ + for (int i = 0; i < (int)SDL_arraysize(m_Overlays); i++) { + if (m_Overlays[i].hasOverlay) { + pl_tex_destroy(m_Vulkan->gpu, &m_Overlays[i].overlay.tex); + } + if (m_Overlays[i].hasStagingOverlay) { + pl_tex_destroy(m_Vulkan->gpu, &m_Overlays[i].stagingOverlay.tex); + } + } + + for (int i = 0; i < (int)SDL_arraysize(m_Textures); i++) { + pl_tex_destroy(m_Vulkan->gpu, &m_Textures[i]); + } + + pl_renderer_destroy(&m_Renderer); + pl_swapchain_destroy(&m_Swapchain); + pl_vulkan_destroy(&m_Vulkan); + + // This surface was created by SDL, so there's no libplacebo API to destroy it + if (fn_vkDestroySurfaceKHR && m_VkSurface) { + fn_vkDestroySurfaceKHR(m_PlVkInstance->instance, m_VkSurface, nullptr); + } + + if (m_HwDeviceCtx != nullptr) { + av_buffer_unref(&m_HwDeviceCtx); + } + + pl_vk_inst_destroy(&m_PlVkInstance); + + // m_Log must always be the last object destroyed + pl_log_destroy(&m_Log); +} + +#define POPULATE_FUNCTION(name) \ + fn_##name = (PFN_##name)vkInstParams.get_proc_addr(m_PlVkInstance->instance, #name); \ + if (fn_##name == nullptr) { \ + SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, \ + "Missing required Vulkan function: " #name); \ + return false; \ + } + +bool PlVkRenderer::initialize(PDECODER_PARAMETERS params) +{ + unsigned int instanceExtensionCount = 0; + if (!SDL_Vulkan_GetInstanceExtensions(params->window, &instanceExtensionCount, nullptr)) { + SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, + "SDL_Vulkan_GetInstanceExtensions() #1 failed: %s", + SDL_GetError()); + return false; + } + + std::vector instanceExtensions(instanceExtensionCount); + if (!SDL_Vulkan_GetInstanceExtensions(params->window, &instanceExtensionCount, instanceExtensions.data())) { + SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, + "SDL_Vulkan_GetInstanceExtensions() #2 failed: %s", + SDL_GetError()); + return false; + } + + pl_vk_inst_params vkInstParams = pl_vk_inst_default_params; +#ifdef QT_DEBUG + vkInstParams.debug = true; +#endif + vkInstParams.get_proc_addr = (PFN_vkGetInstanceProcAddr)SDL_Vulkan_GetVkGetInstanceProcAddr(); + vkInstParams.extensions = instanceExtensions.data(); + vkInstParams.num_extensions = instanceExtensions.size(); + m_PlVkInstance = pl_vk_inst_create(m_Log, &vkInstParams); + if (m_PlVkInstance == nullptr) { + SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, + "pl_vk_inst_create() failed"); + return false; + } + + // Lookup all Vulkan functions we require + POPULATE_FUNCTION(vkDestroySurfaceKHR); + POPULATE_FUNCTION(vkGetPhysicalDeviceQueueFamilyProperties); + POPULATE_FUNCTION(vkGetPhysicalDeviceSurfacePresentModesKHR); + POPULATE_FUNCTION(vkGetPhysicalDeviceSurfaceFormatsKHR); + + if (!SDL_Vulkan_CreateSurface(params->window, m_PlVkInstance->instance, &m_VkSurface)) { + SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, + "SDL_Vulkan_CreateSurface() failed: %s", + SDL_GetError()); + return false; + } + + pl_vulkan_params vkParams = pl_vulkan_default_params; + vkParams.instance = m_PlVkInstance->instance; + vkParams.get_proc_addr = m_PlVkInstance->get_proc_addr; + vkParams.surface = m_VkSurface; + vkParams.allow_software = false; + vkParams.opt_extensions = k_OptionalDeviceExtensions; + vkParams.num_opt_extensions = SDL_arraysize(k_OptionalDeviceExtensions); + vkParams.extra_queues = VK_QUEUE_VIDEO_DECODE_BIT_KHR; + m_Vulkan = pl_vulkan_create(m_Log, &vkParams); + if (m_Vulkan == nullptr) { + SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, + "pl_vulkan_create() failed"); + return false; + } + + VkPresentModeKHR presentMode; + if (params->enableVsync) { + // We will use mailbox mode if present, otherwise libplacebo will fall back to FIFO + presentMode = VK_PRESENT_MODE_MAILBOX_KHR; + } + else { + // We want immediate mode for V-Sync disabled if possible + if (isPresentModeSupported(VK_PRESENT_MODE_IMMEDIATE_KHR)) { + SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, + "Using Immediate present mode with V-Sync disabled"); + presentMode = VK_PRESENT_MODE_IMMEDIATE_KHR; + } + else { + SDL_LogWarn(SDL_LOG_CATEGORY_APPLICATION, + "Immediate present mode is not supported by the Vulkan driver. Latency may be higher than normal with V-Sync disabled."); + + // FIFO Relaxed can tear if the frame is running late + if (isPresentModeSupported(VK_PRESENT_MODE_FIFO_RELAXED_KHR)) { + SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION, + "Using FIFO Relaxed present mode with V-Sync disabled"); + presentMode = VK_PRESENT_MODE_FIFO_RELAXED_KHR; + } + // Mailbox at least provides non-blocking behavior + else if (isPresentModeSupported(VK_PRESENT_MODE_MAILBOX_KHR)) { + SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION, + "Using Mailbox present mode with V-Sync disabled"); + presentMode = VK_PRESENT_MODE_MAILBOX_KHR; + } + // FIFO is always supported + else { + SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION, + "Using FIFO present mode with V-Sync disabled"); + presentMode = VK_PRESENT_MODE_FIFO_KHR; + } + } + } + + pl_vulkan_swapchain_params vkSwapchainParams = {}; + vkSwapchainParams.surface = m_VkSurface; + vkSwapchainParams.present_mode = presentMode; + vkSwapchainParams.swapchain_depth = 1; // No queued frames + m_Swapchain = pl_vulkan_create_swapchain(m_Vulkan, &vkSwapchainParams); + if (m_Swapchain == nullptr) { + SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, + "pl_vulkan_create_swapchain() failed"); + return false; + } + + int vkDrawableW, vkDrawableH; + SDL_Vulkan_GetDrawableSize(params->window, &vkDrawableW, &vkDrawableH); + pl_swapchain_resize(m_Swapchain, &vkDrawableW, &vkDrawableH); + + m_Renderer = pl_renderer_create(m_Log, m_Vulkan->gpu); + if (m_Renderer == nullptr) { + SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, + "pl_renderer_create() failed"); + return false; + } + + // We only need an hwaccel device context if we're going to act as the backend renderer too + if (m_Backend == nullptr) { + m_HwDeviceCtx = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_VULKAN); + if (m_HwDeviceCtx == nullptr) { + SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, + "av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_VULKAN) failed"); + return false; + } + + auto hwDeviceContext = ((AVHWDeviceContext *)m_HwDeviceCtx->data); + hwDeviceContext->user_opaque = this; // Used by lockQueue()/unlockQueue() + + auto vkDeviceContext = (AVVulkanDeviceContext*)((AVHWDeviceContext *)m_HwDeviceCtx->data)->hwctx; + vkDeviceContext->get_proc_addr = m_PlVkInstance->get_proc_addr; + vkDeviceContext->inst = m_PlVkInstance->instance; + vkDeviceContext->phys_dev = m_Vulkan->phys_device; + vkDeviceContext->act_dev = m_Vulkan->device; + vkDeviceContext->device_features = *m_Vulkan->features; + vkDeviceContext->enabled_inst_extensions = m_PlVkInstance->extensions; + vkDeviceContext->nb_enabled_inst_extensions = m_PlVkInstance->num_extensions; + vkDeviceContext->enabled_dev_extensions = m_Vulkan->extensions; + vkDeviceContext->nb_enabled_dev_extensions = m_Vulkan->num_extensions; + vkDeviceContext->queue_family_index = m_Vulkan->queue_graphics.index; + vkDeviceContext->nb_graphics_queues = m_Vulkan->queue_graphics.count; + vkDeviceContext->queue_family_tx_index = m_Vulkan->queue_transfer.index; + vkDeviceContext->nb_tx_queues = m_Vulkan->queue_transfer.count; + vkDeviceContext->queue_family_comp_index = m_Vulkan->queue_compute.index; + vkDeviceContext->nb_comp_queues = m_Vulkan->queue_compute.count; +#if LIBAVUTIL_VERSION_INT > AV_VERSION_INT(58, 9, 100) + vkDeviceContext->lock_queue = lockQueue; + vkDeviceContext->unlock_queue = unlockQueue; +#endif + + static_assert(sizeof(vkDeviceContext->queue_family_decode_index) == sizeof(uint32_t), "sizeof(int) != sizeof(uint32_t)"); + static_assert(sizeof(vkDeviceContext->nb_decode_queues) == sizeof(uint32_t), "sizeof(int) != sizeof(uint32_t)"); + if (!getQueue(VK_QUEUE_VIDEO_DECODE_BIT_KHR, (uint32_t*)&vkDeviceContext->queue_family_decode_index, (uint32_t*)&vkDeviceContext->nb_decode_queues)) { + SDL_LogWarn(SDL_LOG_CATEGORY_APPLICATION, + "Vulkan video decoding is not supported by the Vulkan device"); + return false; + } + + int err = av_hwdevice_ctx_init(m_HwDeviceCtx); + if (err < 0) { + SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, + "av_hwdevice_ctx_init() failed: %d", + err); + return false; + } + } + + return true; +} + +bool PlVkRenderer::prepareDecoderContext(AVCodecContext *context, AVDictionary **) +{ + SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION, + "Using libplacebo Vulkan renderer"); + + if (m_Backend) { + context->hw_device_ctx = av_buffer_ref(m_HwDeviceCtx); + } + return true; +} + +bool PlVkRenderer::mapAvFrameToPlacebo(const AVFrame *frame, pl_frame* mappedFrame) +{ + pl_avframe_params mapParams = {}; + mapParams.frame = frame; + mapParams.tex = m_Textures; + if (!pl_map_avframe_ex(m_Vulkan->gpu, mappedFrame, &mapParams)) { + SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, + "pl_map_avframe_ex() failed"); + return false; + } + + return true; +} + +bool PlVkRenderer::getQueue(VkQueueFlags requiredFlags, uint32_t *queueIndex, uint32_t *queueCount) +{ + uint32_t queueFamilyCount = 0; + fn_vkGetPhysicalDeviceQueueFamilyProperties(m_Vulkan->phys_device, &queueFamilyCount, nullptr); + + std::vector queueFamilies(queueFamilyCount); + fn_vkGetPhysicalDeviceQueueFamilyProperties(m_Vulkan->phys_device, &queueFamilyCount, queueFamilies.data()); + + for (uint32_t i = 0; i < queueFamilyCount; i++) { + if ((queueFamilies[i].queueFlags & requiredFlags) == requiredFlags) { + *queueIndex = i; + *queueCount = queueFamilies[i].queueCount; + return true; + } + } + + return false; +} + +bool PlVkRenderer::isPresentModeSupported(VkPresentModeKHR presentMode) +{ + uint32_t presentModeCount = 0; + fn_vkGetPhysicalDeviceSurfacePresentModesKHR(m_Vulkan->phys_device, m_VkSurface, &presentModeCount, nullptr); + + std::vector presentModes(presentModeCount); + fn_vkGetPhysicalDeviceSurfacePresentModesKHR(m_Vulkan->phys_device, m_VkSurface, &presentModeCount, presentModes.data()); + + for (uint32_t i = 0; i < presentModeCount; i++) { + if (presentModes[i] == presentMode) { + return true; + } + } + + return false; +} + +bool PlVkRenderer::isColorSpaceSupported(VkColorSpaceKHR colorSpace) +{ + uint32_t formatCount = 0; + fn_vkGetPhysicalDeviceSurfaceFormatsKHR(m_Vulkan->phys_device, m_VkSurface, &formatCount, nullptr); + + std::vector formats(formatCount); + fn_vkGetPhysicalDeviceSurfaceFormatsKHR(m_Vulkan->phys_device, m_VkSurface, &formatCount, formats.data()); + + for (uint32_t i = 0; i < formatCount; i++) { + if (formats[i].colorSpace == colorSpace) { + return true; + } + } + + return false; +} + +void PlVkRenderer::renderFrame(AVFrame *frame) +{ + pl_frame mappedFrame, targetFrame; + pl_swapchain_frame swapchainFrame; + + if (!mapAvFrameToPlacebo(frame, &mappedFrame)) { + // This function logs internally + return; + } + + // Reserve enough space to avoid allocating under the overlay lock + pl_overlay_part overlayParts[Overlay::OverlayMax] = {}; + std::vector texturesToDestroy; + std::vector overlays; + texturesToDestroy.reserve(Overlay::OverlayMax); + overlays.reserve(Overlay::OverlayMax); + + // Get the next swapchain buffer for rendering + // + // NB: After calling this successfully, we *MUST* call pl_swapchain_submit_frame()! + if (!pl_swapchain_start_frame(m_Swapchain, &swapchainFrame)) { + SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, + "pl_swapchain_start_frame() failed"); + + // Recreate the renderer + SDL_Event event; + event.type = SDL_RENDER_TARGETS_RESET; + SDL_PushEvent(&event); + goto UnmapExit; + } + + pl_frame_from_swapchain(&targetFrame, &swapchainFrame); + + // We perform minimal processing under the overlay lock to avoid blocking threads updating the overlay + SDL_AtomicLock(&m_OverlayLock); + for (int i = 0; i < Overlay::OverlayMax; i++) { + // If we have a staging overlay, we need to transfer ownership to us + if (m_Overlays[i].hasStagingOverlay) { + if (m_Overlays[i].hasOverlay) { + texturesToDestroy.push_back(m_Overlays[i].overlay.tex); + } + + // Copy the overlay fields from the staging area + m_Overlays[i].overlay = m_Overlays[i].stagingOverlay; + + // We now own the staging overlay + m_Overlays[i].hasStagingOverlay = false; + SDL_zero(m_Overlays[i].stagingOverlay); + m_Overlays[i].hasOverlay = true; + } + + // If we have an overlay but it's been disabled, free the overlay texture + if (m_Overlays[i].hasOverlay && !Session::get()->getOverlayManager().isOverlayEnabled((Overlay::OverlayType)i)) { + texturesToDestroy.push_back(m_Overlays[i].overlay.tex); + m_Overlays[i].hasOverlay = false; + } + + // We have an overlay to draw + if (m_Overlays[i].hasOverlay) { + // Position the overlay + overlayParts[i].src = { 0, 0, (float)m_Overlays[i].overlay.tex->params.w, (float)m_Overlays[i].overlay.tex->params.h }; + if (i == Overlay::OverlayStatusUpdate) { + // Bottom Left + overlayParts[i].dst.x0 = 0; + overlayParts[i].dst.y0 = SDL_min(0, targetFrame.crop.y1 - overlayParts[i].src.y1); + } + else if (i == Overlay::OverlayDebug) { + // Top left + overlayParts[i].dst.x0 = 0; + overlayParts[i].dst.y0 = 0; + } + overlayParts[i].dst.x1 = overlayParts[i].dst.x0 + overlayParts[i].src.x1; + overlayParts[i].dst.y1 = overlayParts[i].dst.y0 + overlayParts[i].src.y1; + + m_Overlays[i].overlay.parts = &overlayParts[i]; + m_Overlays[i].overlay.num_parts = 1; + + overlays.push_back(m_Overlays[i].overlay); + } + } + SDL_AtomicUnlock(&m_OverlayLock); + + SDL_Rect src; + src.x = 0; + src.y = 0; + src.w = frame->width; + src.h = frame->height; + + SDL_Rect dst; + dst.x = targetFrame.crop.x0; + dst.y = targetFrame.crop.y0; + dst.w = targetFrame.crop.x1 - targetFrame.crop.x0; + dst.h = targetFrame.crop.y1 - targetFrame.crop.y0; + + // Scale the video to the surface size while preserving the aspect ratio + StreamUtils::scaleSourceToDestinationSurface(&src, &dst); + + targetFrame.crop.x0 = dst.x; + targetFrame.crop.y0 = dst.y; + targetFrame.crop.x1 = dst.x + dst.w; + targetFrame.crop.y1 = dst.y + dst.h; + + // Render the video image and overlays into the swapchain buffer + targetFrame.num_overlays = overlays.size(); + targetFrame.overlays = overlays.data(); + if (!pl_render_image(m_Renderer, &mappedFrame, &targetFrame, &pl_render_fast_params)) { + SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, + "pl_render_image() failed"); + } + + // Submit the frame for display and swap buffers + if (!pl_swapchain_submit_frame(m_Swapchain)) { + SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, + "pl_swapchain_submit_frame() failed"); + + // Recreate the renderer + SDL_Event event; + event.type = SDL_RENDER_TARGETS_RESET; + SDL_PushEvent(&event); + goto UnmapExit; + } + pl_swapchain_swap_buffers(m_Swapchain); + +UnmapExit: + // Delete any textures that need to be destroyed + for (pl_tex texture : texturesToDestroy) { + pl_tex_destroy(m_Vulkan->gpu, &texture); + } + + pl_unmap_avframe(m_Vulkan->gpu, &mappedFrame); +} + +bool PlVkRenderer::testRenderFrame(AVFrame *frame) +{ + // Test if the frame can be mapped to libplacebo + pl_frame mappedFrame; + if (!mapAvFrameToPlacebo(frame, &mappedFrame)) { + return false; + } + + pl_unmap_avframe(m_Vulkan->gpu, &mappedFrame); + return true; +} + +void PlVkRenderer::notifyOverlayUpdated(Overlay::OverlayType type) +{ + SDL_Surface* newSurface = Session::get()->getOverlayManager().getUpdatedOverlaySurface(type); + if (newSurface == nullptr && Session::get()->getOverlayManager().isOverlayEnabled(type)) { + // The overlay is enabled and there is no new surface. Leave the old texture alone. + return; + } + + // If there's a staging texture already, free it + SDL_AtomicLock(&m_OverlayLock); + if (m_Overlays[type].hasStagingOverlay) { + pl_tex_destroy(m_Vulkan->gpu, &m_Overlays[type].stagingOverlay.tex); + SDL_zero(m_Overlays[type].stagingOverlay); + m_Overlays[type].hasStagingOverlay = false; + } + SDL_AtomicUnlock(&m_OverlayLock); + + // If there's no new surface, we're done now + if (newSurface == nullptr) { + return; + } + + SDL_assert(!SDL_ISPIXELFORMAT_INDEXED(newSurface->format->format)); + pl_plane_data planeData = {}; + planeData.type = PL_FMT_UNORM; + planeData.width = newSurface->w; + planeData.height = newSurface->h; + planeData.pixel_stride = newSurface->format->BytesPerPixel; + planeData.row_stride = (size_t)newSurface->pitch; + planeData.pixels = newSurface->pixels; + uint64_t formatMasks[4] = { newSurface->format->Rmask, newSurface->format->Gmask, newSurface->format->Bmask, newSurface->format->Amask }; + pl_plane_data_from_mask(&planeData, formatMasks); + + // This callback frees the surface after the upload completes + planeData.callback = overlayUploadComplete; + planeData.priv = newSurface; + + m_Overlays[type].stagingOverlay.mode = PL_OVERLAY_NORMAL; + m_Overlays[type].stagingOverlay.coords = PL_OVERLAY_COORDS_DST_FRAME; + m_Overlays[type].stagingOverlay.repr = pl_color_repr_rgb; + m_Overlays[type].stagingOverlay.color = pl_color_space_srgb; + + // Upload the surface to a new texture + bool success = pl_upload_plane(m_Vulkan->gpu, nullptr, &m_Overlays[type].stagingOverlay.tex, &planeData); + if (!success) { + SDL_FreeSurface(newSurface); + SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, + "pl_upload_plane() failed"); + return; + } + + // newSurface is now owned by the plane upload process. It will be freed in overlayUploadComplete() + newSurface = nullptr; + + // Make this staging overlay visible to the render thread + SDL_AtomicLock(&m_OverlayLock); + SDL_assert(!m_Overlays[type].hasStagingOverlay); + m_Overlays[type].hasStagingOverlay = true; + SDL_AtomicUnlock(&m_OverlayLock); +} + +void PlVkRenderer::setHdrMode(bool enabled) +{ + pl_color_space csp = {}; + + if (enabled) { + csp.primaries = PL_COLOR_PRIM_BT_2020; + csp.transfer = PL_COLOR_TRC_PQ; + + // Use the host's provided HDR metadata if present + SS_HDR_METADATA hdrMetadata; + if (LiGetHdrMetadata(&hdrMetadata)) { + csp.hdr.prim.red.x = hdrMetadata.displayPrimaries[0].x / 50000.f; + csp.hdr.prim.red.y = hdrMetadata.displayPrimaries[0].y / 50000.f; + csp.hdr.prim.green.x = hdrMetadata.displayPrimaries[1].x / 50000.f; + csp.hdr.prim.green.y = hdrMetadata.displayPrimaries[1].y / 50000.f; + csp.hdr.prim.blue.x = hdrMetadata.displayPrimaries[2].x / 50000.f; + csp.hdr.prim.blue.y = hdrMetadata.displayPrimaries[2].y / 50000.f; + csp.hdr.prim.white.x = hdrMetadata.whitePoint.x / 50000.f; + csp.hdr.prim.white.y = hdrMetadata.whitePoint.y / 50000.f; + csp.hdr.min_luma = hdrMetadata.minDisplayLuminance / 10000.f; + csp.hdr.max_luma = hdrMetadata.maxDisplayLuminance; + csp.hdr.max_cll = hdrMetadata.maxContentLightLevel; + csp.hdr.max_fall = hdrMetadata.maxFrameAverageLightLevel; + } + else { + // Use the generic HDR10 metadata if the host doesn't provide HDR metadata + csp.hdr = pl_hdr_metadata_hdr10; + } + } + else { + csp.primaries = PL_COLOR_PRIM_UNKNOWN; + csp.transfer = PL_COLOR_TRC_UNKNOWN; + } + + pl_swapchain_colorspace_hint(m_Swapchain, &csp); +} + +int PlVkRenderer::getRendererAttributes() +{ + int attributes = 0; + + if (isColorSpaceSupported(VK_COLOR_SPACE_HDR10_ST2084_EXT)) { + attributes |= RENDERER_ATTRIBUTE_HDR_SUPPORT; + } + + return attributes; +} + +int PlVkRenderer::getDecoderCapabilities() +{ + return CAPABILITY_REFERENCE_FRAME_INVALIDATION_HEVC | + CAPABILITY_REFERENCE_FRAME_INVALIDATION_AV1; +} + +bool PlVkRenderer::isPixelFormatSupported(int videoFormat, AVPixelFormat pixelFormat) +{ + if (m_Backend) { + return m_Backend->isPixelFormatSupported(videoFormat, pixelFormat); + } + else { + return IFFmpegRenderer::isPixelFormatSupported(videoFormat, pixelFormat); + } +} + +AVPixelFormat PlVkRenderer::getPreferredPixelFormat(int videoFormat) +{ + if (m_Backend) { + return m_Backend->getPreferredPixelFormat(videoFormat); + } + else { + return AV_PIX_FMT_VULKAN; + } +} + +IFFmpegRenderer::RendererType PlVkRenderer::getRendererType() +{ + return IFFmpegRenderer::RendererType::Vulkan; +} diff --git a/app/streaming/video/ffmpeg-renderers/plvk.h b/app/streaming/video/ffmpeg-renderers/plvk.h new file mode 100644 index 00000000..d86b6f70 --- /dev/null +++ b/app/streaming/video/ffmpeg-renderers/plvk.h @@ -0,0 +1,67 @@ +#pragma once + +#include "renderer.h" + +#include +#include +#include + +class PlVkRenderer : public IFFmpegRenderer { +public: + PlVkRenderer(IFFmpegRenderer* backendRenderer); + virtual ~PlVkRenderer() override; + virtual bool initialize(PDECODER_PARAMETERS params) override; + virtual bool prepareDecoderContext(AVCodecContext* context, AVDictionary** options) override; + virtual void renderFrame(AVFrame* frame) override; + virtual bool testRenderFrame(AVFrame* frame) override; + virtual void notifyOverlayUpdated(Overlay::OverlayType) override; + virtual void setHdrMode(bool enabled) override; + virtual int getRendererAttributes() override; + virtual int getDecoderCapabilities() override; + virtual bool isPixelFormatSupported(int videoFormat, enum AVPixelFormat pixelFormat) override; + virtual AVPixelFormat getPreferredPixelFormat(int videoFormat) override; + virtual RendererType getRendererType() override; + +private: + static void lockQueue(AVHWDeviceContext *dev_ctx, uint32_t queue_family, uint32_t index); + static void unlockQueue(AVHWDeviceContext *dev_ctx, uint32_t queue_family, uint32_t index); + static void overlayUploadComplete(void* opaque); + + bool mapAvFrameToPlacebo(const AVFrame *frame, pl_frame* mappedFrame); + bool getQueue(VkQueueFlags requiredFlags, uint32_t* queueIndex, uint32_t* queueCount); + bool isPresentModeSupported(VkPresentModeKHR presentMode); + bool isColorSpaceSupported(VkColorSpaceKHR colorSpace); + + // The backend renderer if we're frontend-only + IFFmpegRenderer* m_Backend; + + // The libplacebo rendering state + pl_log m_Log = nullptr; + pl_vk_inst m_PlVkInstance = nullptr; + VkSurfaceKHR m_VkSurface = nullptr; + pl_vulkan m_Vulkan = nullptr; + pl_swapchain m_Swapchain = nullptr; + pl_renderer m_Renderer = nullptr; + pl_tex m_Textures[4] = {}; + + // Overlay state + SDL_SpinLock m_OverlayLock = 0; + struct { + // The staging overlay state is copied here under the overlay lock in the render thread + bool hasOverlay; + pl_overlay overlay; + + // This state is written by the overlay update thread + bool hasStagingOverlay; + pl_overlay stagingOverlay; + } m_Overlays[Overlay::OverlayMax] = {}; + + // Device context used for hwaccel decoders + AVBufferRef* m_HwDeviceCtx = nullptr; + + // Vulkan functions we call directly + PFN_vkDestroySurfaceKHR fn_vkDestroySurfaceKHR = nullptr; + PFN_vkGetPhysicalDeviceQueueFamilyProperties fn_vkGetPhysicalDeviceQueueFamilyProperties = nullptr; + PFN_vkGetPhysicalDeviceSurfacePresentModesKHR fn_vkGetPhysicalDeviceSurfacePresentModesKHR = nullptr; + PFN_vkGetPhysicalDeviceSurfaceFormatsKHR fn_vkGetPhysicalDeviceSurfaceFormatsKHR = nullptr; +}; diff --git a/app/streaming/video/ffmpeg-renderers/plvk_c.c b/app/streaming/video/ffmpeg-renderers/plvk_c.c new file mode 100644 index 00000000..da2ad81d --- /dev/null +++ b/app/streaming/video/ffmpeg-renderers/plvk_c.c @@ -0,0 +1,14 @@ +// This compilation unit contains the implementations of libplacebo header-only libraries. +// These must be compiled as C code, so they cannot be placed inside plvk.cpp. + +#define PL_LIBAV_IMPLEMENTATION 1 +#include + +// Provide a dummy implementation of av_stream_get_side_data() to avoid having to link with libavformat +uint8_t *av_stream_get_side_data(const AVStream *stream, enum AVPacketSideDataType type, size_t *size) +{ + (void)stream; + (void)type; + (void)size; + return NULL; +} diff --git a/app/streaming/video/ffmpeg-renderers/renderer.h b/app/streaming/video/ffmpeg-renderers/renderer.h index 31ff91cf..add15639 100644 --- a/app/streaming/video/ffmpeg-renderers/renderer.h +++ b/app/streaming/video/ffmpeg-renderers/renderer.h @@ -233,6 +233,15 @@ public: return true; } + // Allow renderers to expose their type + enum class RendererType { + Unknown, + Vulkan + }; + virtual RendererType getRendererType() { + return RendererType::Unknown; + } + // IOverlayRenderer virtual void notifyOverlayUpdated(Overlay::OverlayType) override { // Nothing diff --git a/app/streaming/video/ffmpeg.cpp b/app/streaming/video/ffmpeg.cpp index 314cb2eb..b0d54588 100644 --- a/app/streaming/video/ffmpeg.cpp +++ b/app/streaming/video/ffmpeg.cpp @@ -40,6 +40,10 @@ #include "ffmpeg-renderers/cuda.h" #endif +#ifdef HAVE_LIBPLACEBO_VULKAN +#include "ffmpeg-renderers/plvk.h" +#endif + // This is gross but it allows us to use sizeof() #include "ffmpeg_videosamples.cpp" @@ -288,21 +292,47 @@ void FFmpegVideoDecoder::reset() bool FFmpegVideoDecoder::createFrontendRenderer(PDECODER_PARAMETERS params, bool useAlternateFrontend) { if (useAlternateFrontend) { -#ifdef HAVE_DRM - // If we're trying to stream HDR, we need to use the DRM renderer in direct - // rendering mode so it can set the HDR metadata on the display. EGL does - // not currently support this (and even if it did, Mesa and Wayland don't - // currently have protocols to actually get that metadata to the display). - if ((params->videoFormat & VIDEO_FORMAT_MASK_10BIT) && m_BackendRenderer->canExportDrmPrime()) { - m_FrontendRenderer = new DrmRenderer(false, m_BackendRenderer); - if (m_FrontendRenderer->initialize(params)) { - return true; + if (params->videoFormat & VIDEO_FORMAT_MASK_10BIT) { +#if defined(HAVE_LIBPLACEBO_VULKAN) && !defined(VULKAN_IS_SLOW) + // The Vulkan renderer can also handle HDR with a supported compositor. We prefer + // rendering HDR with Vulkan if possible since it's more fully featured than DRM. + if (m_BackendRenderer->getRendererType() != IFFmpegRenderer::RendererType::Vulkan) { + m_FrontendRenderer = new PlVkRenderer(m_BackendRenderer); + if (m_FrontendRenderer->initialize(params) && (m_FrontendRenderer->getRendererAttributes() & RENDERER_ATTRIBUTE_HDR_SUPPORT)) { + return true; + } + delete m_FrontendRenderer; + m_FrontendRenderer = nullptr; } - delete m_FrontendRenderer; - m_FrontendRenderer = nullptr; - } #endif +#ifdef HAVE_DRM + // If we're trying to stream HDR, we need to use the DRM renderer in direct + // rendering mode so it can set the HDR metadata on the display. EGL does + // not currently support this (and even if it did, Mesa and Wayland don't + // currently have protocols to actually get that metadata to the display). + if (m_BackendRenderer->canExportDrmPrime()) { + m_FrontendRenderer = new DrmRenderer(false, m_BackendRenderer); + if (m_FrontendRenderer->initialize(params) && (m_FrontendRenderer->getRendererAttributes() & RENDERER_ATTRIBUTE_HDR_SUPPORT)) { + return true; + } + delete m_FrontendRenderer; + m_FrontendRenderer = nullptr; + } +#endif + +#if defined(HAVE_LIBPLACEBO_VULKAN) && defined(VULKAN_IS_SLOW) + if (m_BackendRenderer->getRendererType() != IFFmpegRenderer::RendererType::Vulkan) { + m_FrontendRenderer = new PlVkRenderer(m_BackendRenderer); + if (m_FrontendRenderer->initialize(params) && (m_FrontendRenderer->getRendererAttributes() & RENDERER_ATTRIBUTE_HDR_SUPPORT)) { + return true; + } + delete m_FrontendRenderer; + m_FrontendRenderer = nullptr; + } +#endif + } + #if defined(HAVE_EGL) && !defined(GL_IS_SLOW) if (m_BackendRenderer->canExportEGL()) { m_FrontendRenderer = new EGLRenderer(m_BackendRenderer); @@ -313,6 +343,7 @@ bool FFmpegVideoDecoder::createFrontendRenderer(PDECODER_PARAMETERS params, bool m_FrontendRenderer = nullptr; } #endif + // If we made it here, we failed to create the EGLRenderer return false; } @@ -783,6 +814,10 @@ IFFmpegRenderer* FFmpegVideoDecoder::createHwAccelRenderer(const AVCodecHWConfig #ifdef HAVE_DRM case AV_HWDEVICE_TYPE_DRM: return new DrmRenderer(true); +#endif +#ifdef HAVE_LIBPLACEBO_VULKAN + case AV_HWDEVICE_TYPE_VULKAN: + return new PlVkRenderer(nullptr); #endif default: return nullptr;