diff --git a/app/app.pro b/app/app.pro index afadc1ca..3e3710aa 100644 --- a/app/app.pro +++ b/app/app.pro @@ -88,6 +88,11 @@ unix:!macx { PKGCONFIG += libdrm CONFIG += libdrm } + + packagesExist(ffnvcodec) { + PKGCONFIG += ffnvcodec + CONFIG += cuda + } } packagesExist(wayland-client) { @@ -196,7 +201,6 @@ ffmpeg { SOURCES += \ streaming/video/ffmpeg.cpp \ streaming/video/ffmpeg-renderers/sdlvid.cpp \ - streaming/video/ffmpeg-renderers/cuda.cpp \ streaming/video/ffmpeg-renderers/pacer/pacer.cpp \ streaming/video/ffmpeg-renderers/pacer/nullthreadedvsyncsource.cpp @@ -204,7 +208,6 @@ ffmpeg { streaming/video/ffmpeg.h \ streaming/video/ffmpeg-renderers/renderer.h \ streaming/video/ffmpeg-renderers/sdlvid.h \ - streaming/video/ffmpeg-renderers/cuda.h \ streaming/video/ffmpeg-renderers/pacer/pacer.h \ streaming/video/ffmpeg-renderers/pacer/nullthreadedvsyncsource.h } @@ -261,6 +264,13 @@ libdrm { LIBS += -ldl } } +cuda { + message(CUDA support enabled) + + DEFINES += HAVE_CUDA + SOURCES += streaming/video/ffmpeg-renderers/cuda.cpp + HEADERS += streaming/video/ffmpeg-renderers/cuda.h +} config_EGL { message(EGL renderer selected) diff --git a/app/streaming/video/ffmpeg-renderers/cuda.cpp b/app/streaming/video/ffmpeg-renderers/cuda.cpp index 56f52d7c..c1679301 100644 --- a/app/streaming/video/ffmpeg-renderers/cuda.cpp +++ b/app/streaming/video/ffmpeg-renderers/cuda.cpp @@ -1,5 +1,13 @@ #include "cuda.h" +#include + +#include + +extern "C" { + #include +} + CUDARenderer::CUDARenderer() : m_HwContext(nullptr) { @@ -55,3 +63,96 @@ bool CUDARenderer::isDirectRenderingSupported() return false; } +bool CUDARenderer::copyCudaFrameToBoundTexture(AVFrame* frame) +{ + static CudaFunctions* funcs; + CUresult err; + AVCUDADeviceContext* devCtx = (AVCUDADeviceContext*)(((AVHWFramesContext*)frame->hw_frames_ctx->data)->device_ctx->hwctx); + bool ret = false; + + if (!funcs) { + // One-time init of CUDA library + cuda_load_functions(&funcs, nullptr); + if (!funcs) { + SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "Failed to initialize CUDA library"); + return false; + } + } + + SDL_assert(frame->format == AV_PIX_FMT_CUDA); + + // Push FFmpeg's CUDA context to use for our CUDA operations + err = funcs->cuCtxPushCurrent(devCtx->cuda_ctx); + if (err != CUDA_SUCCESS) { + SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "cuCtxPushCurrent() failed: %d", err); + return false; + } + + // NV12 has 2 planes + for (int i = 0; i < 2; i++) { + CUgraphicsResource cudaResource; + CUarray cudaArray; + GLint tex; + + // Get the ID of this plane's texture + glActiveTexture(GL_TEXTURE0 + i); + glGetIntegerv(GL_TEXTURE_BINDING_2D, &tex); + + // Register it with CUDA + err = funcs->cuGraphicsGLRegisterImage(&cudaResource, tex, GL_TEXTURE_2D, CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD); + if (err != CUDA_SUCCESS) { + SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "cuGraphicsGLRegisterImage() failed: %d", err); + goto Exit; + } + + // Map it to allow us to use it as a copy destination + err = funcs->cuGraphicsMapResources(1, &cudaResource, devCtx->stream); + if (err != CUDA_SUCCESS) { + SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "cuGraphicsMapResources() failed: %d", err); + funcs->cuGraphicsUnregisterResource(cudaResource); + goto Exit; + } + + // Get a pointer to the mapped array + err = funcs->cuGraphicsSubResourceGetMappedArray(&cudaArray, cudaResource, 0, 0); + if (err != CUDA_SUCCESS) { + SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "cuGraphicsSubResourceGetMappedArray() failed: %d", err); + funcs->cuGraphicsUnmapResources(1, &cudaResource, devCtx->stream); + funcs->cuGraphicsUnregisterResource(cudaResource); + goto Exit; + } + + CUDA_MEMCPY2D cu2d = { + .srcMemoryType = CU_MEMORYTYPE_DEVICE, + .srcDevice = (CUdeviceptr)frame->data[i], + .srcPitch = (size_t)frame->linesize[i], + .dstMemoryType = CU_MEMORYTYPE_ARRAY, + .dstArray = cudaArray, + .dstPitch = (size_t)frame->width >> i, + .WidthInBytes = (size_t)frame->width, + .Height = (size_t)frame->height >> i + }; + + // Do the copy + err = funcs->cuMemcpy2D(&cu2d); + if (err != CUDA_SUCCESS) { + SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "cuMemcpy2D() failed: %d", err); + funcs->cuGraphicsUnmapResources(1, &cudaResource, devCtx->stream); + funcs->cuGraphicsUnregisterResource(cudaResource); + goto Exit; + } + + funcs->cuGraphicsUnmapResources(1, &cudaResource, devCtx->stream); + funcs->cuGraphicsUnregisterResource(cudaResource); + } + + ret = true; + +Exit: + { + CUcontext dummy; + funcs->cuCtxPopCurrent(&dummy); + } + return ret; +} + diff --git a/app/streaming/video/ffmpeg-renderers/cuda.h b/app/streaming/video/ffmpeg-renderers/cuda.h index 401b76f7..7a0f5af7 100644 --- a/app/streaming/video/ffmpeg-renderers/cuda.h +++ b/app/streaming/video/ffmpeg-renderers/cuda.h @@ -12,6 +12,9 @@ public: virtual bool needsTestFrame() override; virtual bool isDirectRenderingSupported() override; + // Helper function used by SDLRenderer to read our CUDA frame + static bool copyCudaFrameToBoundTexture(AVFrame* frame); + private: AVBufferRef* m_HwContext; }; diff --git a/app/streaming/video/ffmpeg-renderers/sdlvid.cpp b/app/streaming/video/ffmpeg-renderers/sdlvid.cpp index 19c68f4a..3048d8d2 100644 --- a/app/streaming/video/ffmpeg-renderers/sdlvid.cpp +++ b/app/streaming/video/ffmpeg-renderers/sdlvid.cpp @@ -5,6 +5,10 @@ #include +#ifdef HAVE_CUDA +#include "cuda.h" +#endif + SdlRenderer::SdlRenderer() : m_Renderer(nullptr), m_Texture(nullptr), @@ -203,7 +207,7 @@ void SdlRenderer::renderFrame(AVFrame* frame) return; } - if (frame->hw_frames_ctx != nullptr) { + if (frame->hw_frames_ctx != nullptr && frame->format != AV_PIX_FMT_CUDA) { // If we are acting as the frontend for a hardware // accelerated decoder, we'll need to read the frame // back to render it. @@ -254,6 +258,7 @@ void SdlRenderer::renderFrame(AVFrame* frame) case AV_PIX_FMT_YUV420P: sdlFormat = SDL_PIXELFORMAT_YV12; break; + case AV_PIX_FMT_CUDA: case AV_PIX_FMT_NV12: sdlFormat = SDL_PIXELFORMAT_NV12; break; @@ -290,7 +295,18 @@ void SdlRenderer::renderFrame(AVFrame* frame) } } - if (frame->format == AV_PIX_FMT_YUV420P) { + if (frame->format == AV_PIX_FMT_CUDA) { +#ifdef HAVE_CUDA + SDL_GL_BindTexture(m_Texture, nullptr, nullptr); + CUDARenderer::copyCudaFrameToBoundTexture(frame); + SDL_GL_UnbindTexture(m_Texture); +#else + SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, + "Got CUDA frame, but not built with CUDA support!"); + goto Exit; +#endif + } + else if (frame->format == AV_PIX_FMT_YUV420P) { SDL_UpdateYUVTexture(m_Texture, nullptr, frame->data[0], frame->linesize[0], diff --git a/app/streaming/video/ffmpeg.cpp b/app/streaming/video/ffmpeg.cpp index 8c3205ff..ed8717e6 100644 --- a/app/streaming/video/ffmpeg.cpp +++ b/app/streaming/video/ffmpeg.cpp @@ -6,7 +6,6 @@ #include #include "ffmpeg-renderers/sdlvid.h" -#include "ffmpeg-renderers/cuda.h" #ifdef Q_OS_WIN32 #include "ffmpeg-renderers/dxva2.h" @@ -36,6 +35,10 @@ #include "ffmpeg-renderers/eglvid.h" #endif +#ifdef HAVE_CUDA +#include "ffmpeg-renderers/cuda.h" +#endif + // This is gross but it allows us to use sizeof() #include "ffmpeg_videosamples.cpp" @@ -567,11 +570,11 @@ IFFmpegRenderer* FFmpegVideoDecoder::createHwAccelRenderer(const AVCodecHWConfig // Second pass for our second-tier hwaccel implementations else if (pass == 1) { switch (hwDecodeCfg->device_type) { +#ifdef HAVE_CUDA case AV_HWDEVICE_TYPE_CUDA: - // CUDA should only be used if all other options fail, since it requires - // read-back of frames. This should only be used for the NVIDIA+Wayland case - // with VDPAU covering the NVIDIA+X11 scenario. + // CUDA should only be used to cover the NVIDIA+Wayland case return new CUDARenderer(); +#endif default: return nullptr; }