Implement support for HDR with software decoding on macOS

This commit is contained in:
Cameron Gutman 2024-08-22 00:05:26 -05:00
parent d2b3bc962f
commit df814fef4a
4 changed files with 259 additions and 60 deletions

View File

@ -10,6 +10,7 @@ struct CscParams
{
float3 matrix[3];
float3 offsets;
float bitnessScaleFactor;
};
constexpr sampler s(coord::normalized, address::clamp_to_edge, filter::linear);
@ -24,7 +25,26 @@ fragment float4 ps_draw_biplanar(Vertex v [[ stage_in ]],
texture2d<float> luminancePlane [[ texture(0) ]],
texture2d<float> chrominancePlane [[ texture(1) ]])
{
float3 yuv = float3(luminancePlane.sample(s, v.texCoords).r, chrominancePlane.sample(s, v.texCoords).rg);
float3 yuv = float3(luminancePlane.sample(s, v.texCoords).r * cscParams.bitnessScaleFactor,
chrominancePlane.sample(s, v.texCoords).rg * cscParams.bitnessScaleFactor);
yuv -= cscParams.offsets;
float3 rgb;
rgb.r = dot(yuv, cscParams.matrix[0]);
rgb.g = dot(yuv, cscParams.matrix[1]);
rgb.b = dot(yuv, cscParams.matrix[2]);
return float4(rgb, 1.0f);
}
fragment float4 ps_draw_triplanar(Vertex v [[ stage_in ]],
constant CscParams &cscParams [[ buffer(0) ]],
texture2d<float> luminancePlane [[ texture(0) ]],
texture2d<float> chrominancePlaneU [[ texture(1) ]],
texture2d<float> chrominancePlaneV [[ texture(2) ]])
{
float3 yuv = float3(luminancePlane.sample(s, v.texCoords).r * cscParams.bitnessScaleFactor,
chrominancePlaneU.sample(s, v.texCoords).r * cscParams.bitnessScaleFactor,
chrominancePlaneV.sample(s, v.texCoords).r * cscParams.bitnessScaleFactor);
yuv -= cscParams.offsets;
float3 rgb;

View File

@ -16,7 +16,7 @@ public:
class VTMetalRendererFactory {
public:
static
IFFmpegRenderer* createRenderer();
IFFmpegRenderer* createRenderer(bool hwAccel);
};
class VTRendererFactory {

View File

@ -18,12 +18,22 @@
#import <Metal/Metal.h>
#import <MetalKit/MetalKit.h>
extern "C" {
#include <libavutil/pixdesc.h>
}
struct CscParams
{
vector_float3 matrix[3];
vector_float3 offsets;
};
struct ParamBuffer
{
CscParams cscParams;
float bitnessScaleFactor;
};
static const CscParams k_CscParams_Bt601Lim = {
// CSC Matrix
{
@ -97,11 +107,14 @@ struct Vertex
vector_float2 texCoord;
};
#define MAX_VIDEO_PLANES 3
class VTMetalRenderer : public VTBaseRenderer
{
public:
VTMetalRenderer()
: m_Window(nullptr),
VTMetalRenderer(bool hwAccel)
: m_HwAccel(hwAccel),
m_Window(nullptr),
m_HwContext(nullptr),
m_MetalLayer(nullptr),
m_TextureCache(nullptr),
@ -114,6 +127,7 @@ public:
m_ShaderLibrary(nullptr),
m_CommandQueue(nullptr),
m_NextDrawable(nullptr),
m_SwMappingTextures{},
m_MetalView(nullptr),
m_LastColorSpace(-1),
m_LastFullRange(false),
@ -159,6 +173,12 @@ public:
}
}
for (int i = 0; i < MAX_VIDEO_PLANES; i++) {
if (m_SwMappingTextures[i] != nullptr) {
[m_SwMappingTextures[i] release];
}
}
if (m_OverlayPipelineState != nullptr) {
[m_OverlayPipelineState release];
}
@ -271,13 +291,43 @@ public:
return true;
}
int getFramePlaneCount(AVFrame* frame)
{
if (frame->format == AV_PIX_FMT_VIDEOTOOLBOX) {
return CVPixelBufferGetPlaneCount((CVPixelBufferRef)frame->data[3]);
}
else {
return av_pix_fmt_count_planes((AVPixelFormat)frame->format);
}
}
int getBitnessScaleFactor(AVFrame* frame)
{
if (frame->format == AV_PIX_FMT_VIDEOTOOLBOX) {
// VideoToolbox frames never require scaling
return 1;
}
else {
const AVPixFmtDescriptor* formatDesc = av_pix_fmt_desc_get((AVPixelFormat)frame->format);
if (!formatDesc) {
// This shouldn't be possible but handle it anyway
SDL_assert(formatDesc);
return 1;
}
// This assumes plane 0 is exclusively the Y component
SDL_assert(formatDesc->comp[0].step == 1 || formatDesc->comp[0].step == 2);
return pow(2, (formatDesc->comp[0].step * 8) - formatDesc->comp[0].depth);
}
}
bool updateColorSpaceForFrame(AVFrame* frame)
{
int colorspace = getFrameColorspace(frame);
bool fullRange = isFrameFullRange(frame);
if (colorspace != m_LastColorSpace || fullRange != m_LastFullRange) {
CGColorSpaceRef newColorSpace;
void* paramBuffer;
ParamBuffer paramBuffer;
// Free any unpresented drawable since we're changing pixel formats
discardNextDrawable();
@ -286,7 +336,7 @@ public:
case COLORSPACE_REC_709:
m_MetalLayer.colorspace = newColorSpace = CGColorSpaceCreateWithName(kCGColorSpaceITUR_709);
m_MetalLayer.pixelFormat = MTLPixelFormatBGRA8Unorm;
paramBuffer = (void*)(fullRange ? &k_CscParams_Bt709Full : &k_CscParams_Bt709Lim);
paramBuffer.cscParams = (fullRange ? k_CscParams_Bt709Full : k_CscParams_Bt709Lim);
break;
case COLORSPACE_REC_2020:
// https://developer.apple.com/documentation/metal/hdr_content/using_color_spaces_to_display_hdr_content
@ -298,32 +348,37 @@ public:
m_MetalLayer.colorspace = newColorSpace = CGColorSpaceCreateWithName(kCGColorSpaceITUR_2020);
m_MetalLayer.pixelFormat = MTLPixelFormatBGRA8Unorm;
}
paramBuffer = (void*)(fullRange ? &k_CscParams_Bt2020Full : &k_CscParams_Bt2020Lim);
paramBuffer.cscParams = (fullRange ? k_CscParams_Bt2020Full : k_CscParams_Bt2020Lim);
break;
default:
case COLORSPACE_REC_601:
m_MetalLayer.colorspace = newColorSpace = CGColorSpaceCreateWithName(kCGColorSpaceSRGB);
m_MetalLayer.pixelFormat = MTLPixelFormatBGRA8Unorm;
paramBuffer = (void*)(fullRange ? &k_CscParams_Bt601Full : &k_CscParams_Bt601Lim);
paramBuffer.cscParams = (fullRange ? k_CscParams_Bt601Full : k_CscParams_Bt601Lim);
break;
}
paramBuffer.bitnessScaleFactor = getBitnessScaleFactor(frame);
// The CAMetalLayer retains the CGColorSpace
CGColorSpaceRelease(newColorSpace);
// Create the new colorspace parameter buffer for our fragment shader
[m_CscParamsBuffer release];
auto bufferOptions = MTLCPUCacheModeWriteCombined | MTLResourceStorageModeManaged;
m_CscParamsBuffer = [m_MetalLayer.device newBufferWithBytes:paramBuffer length:sizeof(CscParams) options:bufferOptions];
m_CscParamsBuffer = [m_MetalLayer.device newBufferWithBytes:(void*)&paramBuffer length:sizeof(paramBuffer) options:bufferOptions];
if (!m_CscParamsBuffer) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"Failed to create CSC parameters buffer");
return false;
}
int planes = getFramePlaneCount(frame);
SDL_assert(planes == 2 || planes == 3);
MTLRenderPipelineDescriptor *pipelineDesc = [[MTLRenderPipelineDescriptor new] autorelease];
pipelineDesc.vertexFunction = [[m_ShaderLibrary newFunctionWithName:@"vs_draw"] autorelease];
pipelineDesc.fragmentFunction = [[m_ShaderLibrary newFunctionWithName:@"ps_draw_biplanar"] autorelease];
pipelineDesc.fragmentFunction = [[m_ShaderLibrary newFunctionWithName:planes == 2 ? @"ps_draw_biplanar" : @"ps_draw_triplanar"] autorelease];
pipelineDesc.colorAttachments[0].pixelFormat = m_MetalLayer.pixelFormat;
[m_VideoPipelineState release];
m_VideoPipelineState = [m_MetalLayer.device newRenderPipelineStateWithDescriptor:pipelineDesc error:nullptr];
@ -359,11 +414,73 @@ public:
return true;
}
id<MTLTexture> mapPlaneForSoftwareFrame(AVFrame* frame, int planeIndex)
{
const AVPixFmtDescriptor* formatDesc = av_pix_fmt_desc_get((AVPixelFormat)frame->format);
if (!formatDesc) {
// This shouldn't be possible but handle it anyway
SDL_assert(formatDesc);
return nil;
}
SDL_assert(planeIndex < MAX_VIDEO_PLANES);
NSUInteger planeWidth = planeIndex ? AV_CEIL_RSHIFT(frame->width, formatDesc->log2_chroma_w) : frame->width;
NSUInteger planeHeight = planeIndex ? AV_CEIL_RSHIFT(frame->height, formatDesc->log2_chroma_h) : frame->height;
// Recreate the texture if the plane size changes
if (m_SwMappingTextures[planeIndex] && (m_SwMappingTextures[planeIndex].width != planeWidth ||
m_SwMappingTextures[planeIndex].height != planeHeight)) {
[m_SwMappingTextures[planeIndex] release];
m_SwMappingTextures[planeIndex] = nil;
}
if (!m_SwMappingTextures[planeIndex]) {
MTLPixelFormat metalFormat;
switch (formatDesc->comp[planeIndex].step) {
case 1:
metalFormat = MTLPixelFormatR8Unorm;
break;
case 2:
metalFormat = MTLPixelFormatR16Unorm;
break;
default:
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"Unhandled plane step: %d (plane: %d)",
formatDesc->comp[planeIndex].step,
planeIndex);
SDL_assert(false);
return nil;
}
auto texDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:metalFormat
width:planeWidth
height:planeHeight
mipmapped:NO];
texDesc.cpuCacheMode = MTLCPUCacheModeWriteCombined;
texDesc.storageMode = MTLStorageModeManaged;
texDesc.usage = MTLTextureUsageShaderRead;
m_SwMappingTextures[planeIndex] = [m_MetalLayer.device newTextureWithDescriptor:texDesc];
if (!m_SwMappingTextures[planeIndex]) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"Failed to allocate software frame texture");
return nil;
}
}
[m_SwMappingTextures[planeIndex] replaceRegion:MTLRegionMake2D(0, 0, planeWidth, planeHeight)
mipmapLevel:0
withBytes:frame->data[planeIndex]
bytesPerRow:frame->linesize[planeIndex]];
return m_SwMappingTextures[planeIndex];
}
// Caller frees frame after we return
virtual void renderFrame(AVFrame* frame) override
{ @autoreleasepool {
CVPixelBufferRef pixBuf = reinterpret_cast<CVPixelBufferRef>(frame->data[3]);
// Handle changes to the frame's colorspace from last time we rendered
if (!updateColorSpaceForFrame(frame)) {
// Trigger the main thread to recreate the decoder
@ -387,9 +504,15 @@ public:
return;
}
std::array<CVMetalTextureRef, MAX_VIDEO_PLANES> cvMetalTextures;
size_t planes = getFramePlaneCount(frame);
SDL_assert(planes <= MAX_VIDEO_PLANES);
if (frame->format == AV_PIX_FMT_VIDEOTOOLBOX) {
CVPixelBufferRef pixBuf = reinterpret_cast<CVPixelBufferRef>(frame->data[3]);
// Create Metal textures for the planes of the CVPixelBuffer
std::array<CVMetalTextureRef, 2> textures;
for (size_t i = 0; i < textures.size(); i++) {
for (size_t i = 0; i < planes; i++) {
MTLPixelFormat fmt;
switch (CVPixelBufferGetPixelFormatType(pixBuf)) {
@ -418,7 +541,7 @@ public:
CVPixelBufferGetWidthOfPlane(pixBuf, i),
CVPixelBufferGetHeightOfPlane(pixBuf, i),
i,
&textures[i]);
&cvMetalTextures[i]);
if (err != kCVReturnSuccess) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"CVMetalTextureCacheCreateTextureFromImage() failed: %d",
@ -426,6 +549,7 @@ public:
return;
}
}
}
// Prepare a render pass to render into the next drawable
auto renderPassDescriptor = [MTLRenderPassDescriptor renderPassDescriptor];
@ -438,15 +562,22 @@ public:
// Bind textures and buffers then draw the video region
[renderEncoder setRenderPipelineState:m_VideoPipelineState];
for (size_t i = 0; i < textures.size(); i++) {
[renderEncoder setFragmentTexture:CVMetalTextureGetTexture(textures[i]) atIndex:i];
if (frame->format == AV_PIX_FMT_VIDEOTOOLBOX) {
for (size_t i = 0; i < planes; i++) {
[renderEncoder setFragmentTexture:CVMetalTextureGetTexture(cvMetalTextures[i]) atIndex:i];
}
[commandBuffer addCompletedHandler:^(id<MTLCommandBuffer>) {
// Free textures after completion of rendering per CVMetalTextureCache requirements
for (const CVMetalTextureRef &tex : textures) {
CFRelease(tex);
for (size_t i = 0; i < planes; i++) {
CFRelease(cvMetalTextures[i]);
}
}];
}
else {
for (size_t i = 0; i < planes; i++) {
[renderEncoder setFragmentTexture:mapPlaneForSoftwareFrame(frame, i) atIndex:i];
}
}
[renderEncoder setFragmentBuffer:m_CscParamsBuffer offset:0 atIndex:0];
[renderEncoder setVertexBuffer:m_VideoVertexBuffer offset:0 atIndex:0];
[renderEncoder drawPrimitives:MTLPrimitiveTypeTriangleStrip vertexStart:0 vertexCount:4];
@ -542,7 +673,11 @@ public:
}
}
if (qgetenv("VT_FORCE_METAL") == "1") {
if (!m_HwAccel) {
// Metal software decoding is always available
return [MTLCreateSystemDefaultDevice() autorelease];
}
else if (qgetenv("VT_FORCE_METAL") == "1") {
SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION,
"Using Metal renderer due to VT_FORCE_METAL=1 override.");
return [MTLCreateSystemDefaultDevice() autorelease];
@ -566,7 +701,7 @@ public:
return false;
}
if (!checkDecoderCapabilities(device, params)) {
if (m_HwAccel && !checkDecoderCapabilities(device, params)) {
return false;
}
@ -690,10 +825,13 @@ public:
virtual bool prepareDecoderContext(AVCodecContext* context, AVDictionary**) override
{
if (m_HwAccel) {
context->hw_device_ctx = av_buffer_ref(m_HwContext);
}
SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION,
"Using VideoToolbox Metal renderer");
"Using Metal renderer with %s decoding",
m_HwAccel ? "hardware" : "software");
return true;
}
@ -725,6 +863,38 @@ public:
return RENDERER_ATTRIBUTE_HDR_SUPPORT;
}
bool isPixelFormatSupported(int videoFormat, AVPixelFormat pixelFormat) override
{
if (m_HwAccel) {
return pixelFormat == AV_PIX_FMT_VIDEOTOOLBOX;
}
else {
if (pixelFormat == AV_PIX_FMT_VIDEOTOOLBOX) {
// VideoToolbox frames are always supported
return true;
}
else {
// Otherwise it's supported if we can map it
const int expectedPixelDepth = (videoFormat & VIDEO_FORMAT_MASK_10BIT) ? 10 : 8;
const int expectedLog2ChromaW = (videoFormat & VIDEO_FORMAT_MASK_YUV444) ? 0 : 1;
const int expectedLog2ChromaH = (videoFormat & VIDEO_FORMAT_MASK_YUV444) ? 0 : 1;
const AVPixFmtDescriptor* formatDesc = av_pix_fmt_desc_get(pixelFormat);
if (!formatDesc) {
// This shouldn't be possible but handle it anyway
SDL_assert(formatDesc);
return false;
}
int planes = av_pix_fmt_count_planes(pixelFormat);
return (planes == 2 || planes == 3) &&
formatDesc->comp[0].depth == expectedPixelDepth &&
formatDesc->log2_chroma_w == expectedLog2ChromaW &&
formatDesc->log2_chroma_h == expectedLog2ChromaH;
}
}
}
bool notifyWindowChanged(PWINDOW_STATE_CHANGE_INFO info) override
{
auto unhandledStateFlags = info->stateChangeFlags;
@ -740,6 +910,7 @@ public:
}
private:
bool m_HwAccel;
SDL_Window* m_Window;
AVBufferRef* m_HwContext;
CAMetalLayer* m_MetalLayer;
@ -753,6 +924,7 @@ private:
id<MTLLibrary> m_ShaderLibrary;
id<MTLCommandQueue> m_CommandQueue;
id<CAMetalDrawable> m_NextDrawable;
id<MTLTexture> m_SwMappingTextures[MAX_VIDEO_PLANES];
SDL_MetalView m_MetalView;
int m_LastColorSpace;
bool m_LastFullRange;
@ -765,6 +937,6 @@ private:
int m_PendingPresentationCount;
};
IFFmpegRenderer* VTMetalRendererFactory::createRenderer() {
return new VTMetalRenderer();
IFFmpegRenderer* VTMetalRendererFactory::createRenderer(bool hwAccel) {
return new VTMetalRenderer(hwAccel);
}

View File

@ -887,7 +887,7 @@ IFFmpegRenderer* FFmpegVideoDecoder::createHwAccelRenderer(const AVCodecHWConfig
#ifdef Q_OS_DARWIN
case AV_HWDEVICE_TYPE_VIDEOTOOLBOX:
// Prefer the Metal renderer if hardware is compatible
return VTMetalRendererFactory::createRenderer();
return VTMetalRendererFactory::createRenderer(true);
#endif
#ifdef HAVE_LIBVA
case AV_HWDEVICE_TYPE_VAAPI:
@ -1158,6 +1158,13 @@ bool FFmpegVideoDecoder::tryInitializeRendererForUnknownDecoder(const AVCodec* d
}
#endif
#ifdef Q_OS_DARWIN
if (tryInitializeRenderer(decoder, AV_PIX_FMT_NONE, params, nullptr, nullptr,
[]() -> IFFmpegRenderer* { return VTMetalRendererFactory::createRenderer(false); })) {
return true;
}
#endif
if (tryInitializeRenderer(decoder, AV_PIX_FMT_NONE, params, nullptr, nullptr,
[]() -> IFFmpegRenderer* { return new SdlRenderer(); })) {
return true;