diff --git a/app/app.pro b/app/app.pro
index afadc1ca..3e3710aa 100644
--- a/app/app.pro
+++ b/app/app.pro
@@ -88,6 +88,11 @@ unix:!macx {
             PKGCONFIG += libdrm
             CONFIG += libdrm
         }
+
+        packagesExist(ffnvcodec) {
+            PKGCONFIG += ffnvcodec
+            CONFIG += cuda
+        }
     }
 
     packagesExist(wayland-client) {
@@ -196,7 +201,6 @@ ffmpeg {
     SOURCES += \
         streaming/video/ffmpeg.cpp \
         streaming/video/ffmpeg-renderers/sdlvid.cpp \
-        streaming/video/ffmpeg-renderers/cuda.cpp \
         streaming/video/ffmpeg-renderers/pacer/pacer.cpp \
         streaming/video/ffmpeg-renderers/pacer/nullthreadedvsyncsource.cpp
 
@@ -204,7 +208,6 @@ ffmpeg {
         streaming/video/ffmpeg.h \
         streaming/video/ffmpeg-renderers/renderer.h \
         streaming/video/ffmpeg-renderers/sdlvid.h \
-        streaming/video/ffmpeg-renderers/cuda.h \
         streaming/video/ffmpeg-renderers/pacer/pacer.h \
         streaming/video/ffmpeg-renderers/pacer/nullthreadedvsyncsource.h
 }
@@ -261,6 +264,13 @@ libdrm {
         LIBS += -ldl
     }
 }
+cuda {
+    message(CUDA support enabled)
+
+    DEFINES += HAVE_CUDA
+    SOURCES += streaming/video/ffmpeg-renderers/cuda.cpp
+    HEADERS += streaming/video/ffmpeg-renderers/cuda.h
+}
 config_EGL {
     message(EGL renderer selected)
 
diff --git a/app/streaming/video/ffmpeg-renderers/cuda.cpp b/app/streaming/video/ffmpeg-renderers/cuda.cpp
index 56f52d7c..c1679301 100644
--- a/app/streaming/video/ffmpeg-renderers/cuda.cpp
+++ b/app/streaming/video/ffmpeg-renderers/cuda.cpp
@@ -1,5 +1,13 @@
 #include "cuda.h"
 
+#include <ffnvcodec/dynlink_loader.h>
+
+#include <SDL_opengl.h>
+
+extern "C" {
+    #include <libavutil/hwcontext_cuda.h>
+}
+
 CUDARenderer::CUDARenderer()
     : m_HwContext(nullptr)
 {
@@ -55,3 +63,96 @@ bool CUDARenderer::isDirectRenderingSupported()
     return false;
 }
 
+bool CUDARenderer::copyCudaFrameToBoundTexture(AVFrame* frame)
+{
+    static CudaFunctions* funcs;
+    CUresult err;
+    AVCUDADeviceContext* devCtx = (AVCUDADeviceContext*)(((AVHWFramesContext*)frame->hw_frames_ctx->data)->device_ctx->hwctx);
+    bool ret = false;
+
+    if (!funcs) {
+        // One-time init of CUDA library
+        cuda_load_functions(&funcs, nullptr);
+        if (!funcs) {
+            SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "Failed to initialize CUDA library");
+            return false;
+        }
+    }
+
+    SDL_assert(frame->format == AV_PIX_FMT_CUDA);
+
+    // Push FFmpeg's CUDA context to use for our CUDA operations
+    err = funcs->cuCtxPushCurrent(devCtx->cuda_ctx);
+    if (err != CUDA_SUCCESS) {
+        SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "cuCtxPushCurrent() failed: %d", err);
+        return false;
+    }
+
+    // NV12 has 2 planes
+    for (int i = 0; i < 2; i++) {
+        CUgraphicsResource cudaResource;
+        CUarray cudaArray;
+        GLint tex;
+
+        // Get the ID of this plane's texture
+        glActiveTexture(GL_TEXTURE0 + i);
+        glGetIntegerv(GL_TEXTURE_BINDING_2D, &tex);
+
+        // Register it with CUDA
+        err = funcs->cuGraphicsGLRegisterImage(&cudaResource, tex, GL_TEXTURE_2D, CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD);
+        if (err != CUDA_SUCCESS) {
+            SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "cuGraphicsGLRegisterImage() failed: %d", err);
+            goto Exit;
+        }
+
+        // Map it to allow us to use it as a copy destination
+        err = funcs->cuGraphicsMapResources(1, &cudaResource, devCtx->stream);
+        if (err != CUDA_SUCCESS) {
+            SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "cuGraphicsMapResources() failed: %d", err);
+            funcs->cuGraphicsUnregisterResource(cudaResource);
+            goto Exit;
+        }
+
+        // Get a pointer to the mapped array
+        err = funcs->cuGraphicsSubResourceGetMappedArray(&cudaArray, cudaResource, 0, 0);
+        if (err != CUDA_SUCCESS) {
+            SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "cuGraphicsSubResourceGetMappedArray() failed: %d", err);
+            funcs->cuGraphicsUnmapResources(1, &cudaResource, devCtx->stream);
+            funcs->cuGraphicsUnregisterResource(cudaResource);
+            goto Exit;
+        }
+
+        CUDA_MEMCPY2D cu2d = {
+            .srcMemoryType = CU_MEMORYTYPE_DEVICE,
+            .srcDevice = (CUdeviceptr)frame->data[i],
+            .srcPitch = (size_t)frame->linesize[i],
+            .dstMemoryType = CU_MEMORYTYPE_ARRAY,
+            .dstArray = cudaArray,
+            .dstPitch = (size_t)frame->width >> i,
+            .WidthInBytes = (size_t)frame->width,
+            .Height = (size_t)frame->height >> i
+        };
+
+        // Do the copy
+        err = funcs->cuMemcpy2D(&cu2d);
+        if (err != CUDA_SUCCESS) {
+            SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "cuMemcpy2D() failed: %d", err);
+            funcs->cuGraphicsUnmapResources(1, &cudaResource, devCtx->stream);
+            funcs->cuGraphicsUnregisterResource(cudaResource);
+            goto Exit;
+        }
+
+        funcs->cuGraphicsUnmapResources(1, &cudaResource, devCtx->stream);
+        funcs->cuGraphicsUnregisterResource(cudaResource);
+    }
+
+    ret = true;
+
+Exit:
+    {
+        CUcontext dummy;
+        funcs->cuCtxPopCurrent(&dummy);
+    }
+    return ret;
+}
+
diff --git a/app/streaming/video/ffmpeg-renderers/cuda.h b/app/streaming/video/ffmpeg-renderers/cuda.h
index 401b76f7..7a0f5af7 100644
--- a/app/streaming/video/ffmpeg-renderers/cuda.h
+++ b/app/streaming/video/ffmpeg-renderers/cuda.h
@@ -12,6 +12,9 @@ public:
     virtual bool needsTestFrame() override;
     virtual bool isDirectRenderingSupported() override;
 
+    // Helper function used by SDLRenderer to read our CUDA frame
+    static bool copyCudaFrameToBoundTexture(AVFrame* frame);
+
 private:
     AVBufferRef* m_HwContext;
 };
diff --git a/app/streaming/video/ffmpeg-renderers/sdlvid.cpp b/app/streaming/video/ffmpeg-renderers/sdlvid.cpp
index 19c68f4a..3048d8d2 100644
--- a/app/streaming/video/ffmpeg-renderers/sdlvid.cpp
+++ b/app/streaming/video/ffmpeg-renderers/sdlvid.cpp
@@ -5,6 +5,10 @@
 
 #include <Limelight.h>
 
+#ifdef HAVE_CUDA
+#include "cuda.h"
+#endif
+
 SdlRenderer::SdlRenderer()
     : m_Renderer(nullptr),
       m_Texture(nullptr),
@@ -203,7 +207,7 @@ void SdlRenderer::renderFrame(AVFrame* frame)
         return;
     }
 
-    if (frame->hw_frames_ctx != nullptr) {
+    if (frame->hw_frames_ctx != nullptr && frame->format != AV_PIX_FMT_CUDA) {
         // If we are acting as the frontend for a hardware
         // accelerated decoder, we'll need to read the frame
         // back to render it.
@@ -254,6 +258,7 @@ void SdlRenderer::renderFrame(AVFrame* frame)
         case AV_PIX_FMT_YUV420P:
             sdlFormat = SDL_PIXELFORMAT_YV12;
             break;
+        case AV_PIX_FMT_CUDA:
         case AV_PIX_FMT_NV12:
             sdlFormat = SDL_PIXELFORMAT_NV12;
             break;
@@ -290,7 +295,18 @@ void SdlRenderer::renderFrame(AVFrame* frame)
         }
     }
 
-    if (frame->format == AV_PIX_FMT_YUV420P) {
+    if (frame->format == AV_PIX_FMT_CUDA) {
+#ifdef HAVE_CUDA
+        SDL_GL_BindTexture(m_Texture, nullptr, nullptr);
+        CUDARenderer::copyCudaFrameToBoundTexture(frame);
+        SDL_GL_UnbindTexture(m_Texture);
+#else
+        SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
+                     "Got CUDA frame, but not built with CUDA support!");
+        goto Exit;
+#endif
+    }
+    else if (frame->format == AV_PIX_FMT_YUV420P) {
         SDL_UpdateYUVTexture(m_Texture, nullptr,
                              frame->data[0],
                              frame->linesize[0],
diff --git a/app/streaming/video/ffmpeg.cpp b/app/streaming/video/ffmpeg.cpp
index 8c3205ff..ed8717e6 100644
--- a/app/streaming/video/ffmpeg.cpp
+++ b/app/streaming/video/ffmpeg.cpp
@@ -6,7 +6,6 @@
 #include <h264_stream.h>
 
 #include "ffmpeg-renderers/sdlvid.h"
-#include "ffmpeg-renderers/cuda.h"
 
 #ifdef Q_OS_WIN32
 #include "ffmpeg-renderers/dxva2.h"
@@ -36,6 +35,10 @@
 #include "ffmpeg-renderers/eglvid.h"
 #endif
 
+#ifdef HAVE_CUDA
+#include "ffmpeg-renderers/cuda.h"
+#endif
+
 // This is gross but it allows us to use sizeof()
 #include "ffmpeg_videosamples.cpp"
 
@@ -567,11 +570,11 @@ IFFmpegRenderer* FFmpegVideoDecoder::createHwAccelRenderer(const AVCodecHWConfig
     // Second pass for our second-tier hwaccel implementations
     else if (pass == 1) {
         switch (hwDecodeCfg->device_type) {
+#ifdef HAVE_CUDA
         case AV_HWDEVICE_TYPE_CUDA:
-            // CUDA should only be used if all other options fail, since it requires
-            // read-back of frames. This should only be used for the NVIDIA+Wayland case
-            // with VDPAU covering the NVIDIA+X11 scenario.
+            // CUDA should only be used to cover the NVIDIA+Wayland case
             return new CUDARenderer();
+#endif
         default:
             return nullptr;
         }