#include "cuda.h" #include #include extern "C" { #include } CUDARenderer::CUDARenderer() : m_HwContext(nullptr) { } CUDARenderer::~CUDARenderer() { if (m_HwContext != nullptr) { av_buffer_unref(&m_HwContext); } } bool CUDARenderer::initialize(PDECODER_PARAMETERS) { int err; err = av_hwdevice_ctx_create(&m_HwContext, AV_HWDEVICE_TYPE_CUDA, nullptr, nullptr, 0); if (err != 0) { SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "av_hwdevice_ctx_create(CUDA) failed: %d", err); return false; } return true; } bool CUDARenderer::prepareDecoderContext(AVCodecContext* context, AVDictionary**) { context->hw_device_ctx = av_buffer_ref(m_HwContext); SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION, "Using CUDA accelerated decoder"); return true; } void CUDARenderer::renderFrame(AVFrame*) { // We only support indirect rendering SDL_assert(false); } bool CUDARenderer::needsTestFrame() { return true; } bool CUDARenderer::isDirectRenderingSupported() { // We only support rendering via SDL read-back return false; } bool CUDARenderer::copyCudaFrameToBoundTexture(AVFrame* frame) { static CudaFunctions* funcs; CUresult err; AVCUDADeviceContext* devCtx = (AVCUDADeviceContext*)(((AVHWFramesContext*)frame->hw_frames_ctx->data)->device_ctx->hwctx); bool ret = false; if (!funcs) { // One-time init of CUDA library cuda_load_functions(&funcs, nullptr); if (!funcs) { SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "Failed to initialize CUDA library"); return false; } } SDL_assert(frame->format == AV_PIX_FMT_CUDA); // Push FFmpeg's CUDA context to use for our CUDA operations err = funcs->cuCtxPushCurrent(devCtx->cuda_ctx); if (err != CUDA_SUCCESS) { SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "cuCtxPushCurrent() failed: %d", err); return false; } // NV12 has 2 planes for (int i = 0; i < 2; i++) { CUgraphicsResource cudaResource; CUarray cudaArray; GLint tex; // Get the ID of this plane's texture glActiveTexture(GL_TEXTURE0 + i); glGetIntegerv(GL_TEXTURE_BINDING_2D, &tex); // Register it with CUDA err = funcs->cuGraphicsGLRegisterImage(&cudaResource, tex, GL_TEXTURE_2D, CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD); if (err != CUDA_SUCCESS) { SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "cuGraphicsGLRegisterImage() failed: %d", err); goto Exit; } // Map it to allow us to use it as a copy destination err = funcs->cuGraphicsMapResources(1, &cudaResource, devCtx->stream); if (err != CUDA_SUCCESS) { SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "cuGraphicsMapResources() failed: %d", err); funcs->cuGraphicsUnregisterResource(cudaResource); goto Exit; } // Get a pointer to the mapped array err = funcs->cuGraphicsSubResourceGetMappedArray(&cudaArray, cudaResource, 0, 0); if (err != CUDA_SUCCESS) { SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "cuGraphicsSubResourceGetMappedArray() failed: %d", err); funcs->cuGraphicsUnmapResources(1, &cudaResource, devCtx->stream); funcs->cuGraphicsUnregisterResource(cudaResource); goto Exit; } CUDA_MEMCPY2D cu2d = { .srcMemoryType = CU_MEMORYTYPE_DEVICE, .srcDevice = (CUdeviceptr)frame->data[i], .srcPitch = (size_t)frame->linesize[i], .dstMemoryType = CU_MEMORYTYPE_ARRAY, .dstArray = cudaArray, .dstPitch = (size_t)frame->width >> i, .WidthInBytes = (size_t)frame->width, .Height = (size_t)frame->height >> i }; // Do the copy err = funcs->cuMemcpy2D(&cu2d); if (err != CUDA_SUCCESS) { SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "cuMemcpy2D() failed: %d", err); funcs->cuGraphicsUnmapResources(1, &cudaResource, devCtx->stream); funcs->cuGraphicsUnregisterResource(cudaResource); goto Exit; } funcs->cuGraphicsUnmapResources(1, &cudaResource, devCtx->stream); funcs->cuGraphicsUnregisterResource(cudaResource); } ret = true; Exit: { CUcontext dummy; funcs->cuCtxPopCurrent(&dummy); } return ret; }