Optimize EGLRenderer with overlay VAOs and reduced GL_BLEND usage

This commit is contained in:
Cameron Gutman
2025-12-28 13:14:16 -06:00
parent f1451a0db3
commit c5b7a9c679
2 changed files with 123 additions and 98 deletions

View File

@@ -18,11 +18,11 @@
#define GL_UNPACK_ROW_LENGTH_EXT 0x0CF2
#endif
typedef struct _OVERLAY_VERTEX
typedef struct _VERTEX
{
float x, y;
float u, v;
} OVERLAY_VERTEX, *POVERLAY_VERTEX;
} VERTEX, *PVERTEX;
/* TODO:
* - handle more pixel formats
@@ -54,14 +54,15 @@ EGLRenderer::EGLRenderer(IFFmpegRenderer *backendRenderer)
m_EGLDisplay(EGL_NO_DISPLAY),
m_Textures{0},
m_OverlayTextures{0},
m_OverlayVbos{0},
m_OverlayVBOs{0},
m_OverlayVAOs{0},
m_OverlayHasValidData{},
m_ShaderProgram(0),
m_OverlayShaderProgram(0),
m_Context(0),
m_Window(nullptr),
m_Backend(backendRenderer),
m_VAO(0),
m_VideoVAO(0),
m_BlockingSwapBuffers(false),
m_LastRenderSync(EGL_NO_SYNC),
m_LastFrame(av_frame_alloc()),
@@ -97,23 +98,18 @@ EGLRenderer::~EGLRenderer()
if (m_OverlayShaderProgram) {
glDeleteProgram(m_OverlayShaderProgram);
}
if (m_VAO) {
if (m_VideoVAO) {
SDL_assert(m_glDeleteVertexArraysOES != nullptr);
m_glDeleteVertexArraysOES(1, &m_VAO);
m_glDeleteVertexArraysOES(1, &m_VideoVAO);
}
for (int i = 0; i < EGL_MAX_PLANES; i++) {
if (m_Textures[i] != 0) {
glDeleteTextures(1, &m_Textures[i]);
}
}
for (int i = 0; i < Overlay::OverlayMax; i++) {
if (m_OverlayTextures[i] != 0) {
glDeleteTextures(1, &m_OverlayTextures[i]);
}
if (m_OverlayVbos[i] != 0) {
glDeleteBuffers(1, &m_OverlayVbos[i]);
}
glDeleteTextures(EGL_MAX_PLANES, m_Textures);
glDeleteTextures(Overlay::OverlayMax, m_OverlayTextures);
glDeleteBuffers(Overlay::OverlayMax, m_OverlayVBOs);
if (m_glDeleteVertexArraysOES) {
m_glDeleteVertexArraysOES(Overlay::OverlayMax, m_OverlayVAOs);
}
SDL_GL_DeleteContext(m_Context);
}
@@ -179,24 +175,27 @@ void EGLRenderer::renderOverlay(Overlay::OverlayType type, int viewportWidth, in
glBindTexture(GL_TEXTURE_2D, m_OverlayTextures[type]);
// If the pixel data isn't tightly packed, it requires special handling
void* packedPixelData = nullptr;
if (m_GlesMajorVersion >= 3 || m_HasExtUnpackSubimage) {
// If we are GLES 3.0+ or have GL_EXT_unpack_subimage, GL can handle any pitch
SDL_assert(newSurface->pitch % newSurface->format->BytesPerPixel == 0);
glPixelStorei(GL_UNPACK_ROW_LENGTH_EXT, newSurface->pitch / newSurface->format->BytesPerPixel);
}
else if (newSurface->pitch != newSurface->w * newSurface->format->BytesPerPixel) {
// If we can't use GL_UNPACK_ROW_LENGTH and the surface isn't tightly packed,
// we must allocate a tightly packed buffer and copy our pixels there.
packedPixelData = malloc(newSurface->w * newSurface->h * newSurface->format->BytesPerPixel);
if (!packedPixelData) {
SDL_FreeSurface(newSurface);
return;
if (newSurface->pitch != newSurface->w * newSurface->format->BytesPerPixel) {
if (m_GlesMajorVersion >= 3 || m_HasExtUnpackSubimage) {
// If we are GLES 3.0+ or have GL_EXT_unpack_subimage, GL can handle any pitch
SDL_assert(newSurface->pitch % newSurface->format->BytesPerPixel == 0);
glPixelStorei(GL_UNPACK_ROW_LENGTH_EXT, newSurface->pitch / newSurface->format->BytesPerPixel);
}
else {
// If we can't use GL_UNPACK_ROW_LENGTH, we must allocate a tightly packed buffer
// and copy our pixels there.
packedPixelData = malloc(newSurface->w * newSurface->h * newSurface->format->BytesPerPixel);
if (!packedPixelData) {
SDL_FreeSurface(newSurface);
return;
}
SDL_ConvertPixels(newSurface->w, newSurface->h,
newSurface->format->format, newSurface->pixels, newSurface->pitch,
newSurface->format->format, packedPixelData, newSurface->w * newSurface->format->BytesPerPixel);
SDL_ConvertPixels(newSurface->w, newSurface->h,
newSurface->format->format, newSurface->pixels, newSurface->pitch,
newSurface->format->format, packedPixelData, newSurface->w * newSurface->format->BytesPerPixel);
}
}
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, newSurface->w, newSurface->h, 0, GL_RGBA, GL_UNSIGNED_BYTE,
@@ -205,6 +204,9 @@ void EGLRenderer::renderOverlay(Overlay::OverlayType type, int viewportWidth, in
if (packedPixelData) {
free(packedPixelData);
}
else if (newSurface->pitch != newSurface->w * newSurface->format->BytesPerPixel) {
glPixelStorei(GL_UNPACK_ROW_LENGTH_EXT, 0);
}
SDL_FRect overlayRect;
@@ -231,7 +233,7 @@ void EGLRenderer::renderOverlay(Overlay::OverlayType type, int viewportWidth, in
// Convert screen space to normalized device coordinates
StreamUtils::screenSpaceToNormalizedDeviceCoords(&overlayRect, viewportWidth, viewportHeight);
OVERLAY_VERTEX verts[] =
VERTEX verts[] =
{
{overlayRect.x + overlayRect.w, overlayRect.y + overlayRect.h, 1.0f, 0.0f},
{overlayRect.x, overlayRect.y + overlayRect.h, 0.0f, 0.0f},
@@ -241,7 +243,8 @@ void EGLRenderer::renderOverlay(Overlay::OverlayType type, int viewportWidth, in
{overlayRect.x + overlayRect.w, overlayRect.y + overlayRect.h, 1.0f, 0.0f}
};
glBindBuffer(GL_ARRAY_BUFFER, m_OverlayVbos[type]);
// Update the VBO for this overlay (already bound to a VAO)
glBindBuffer(GL_ARRAY_BUFFER, m_OverlayVBOs[type]);
glBufferData(GL_ARRAY_BUFFER, sizeof(verts), verts, GL_STATIC_DRAW);
SDL_AtomicSet(&m_OverlayHasValidData[type], 1);
@@ -257,17 +260,18 @@ void EGLRenderer::renderOverlay(Overlay::OverlayType type, int viewportWidth, in
glUseProgram(m_OverlayShaderProgram);
// compileShader() ensures that aPosition and aTexCoord are indexes 0 and 1 respectively
glBindBuffer(GL_ARRAY_BUFFER, m_OverlayVbos[type]);
glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, sizeof(OVERLAY_VERTEX), (void*)offsetof(OVERLAY_VERTEX, x));
glEnableVertexAttribArray(0);
glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, sizeof(OVERLAY_VERTEX), (void*)offsetof(OVERLAY_VERTEX, u));
glEnableVertexAttribArray(1);
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, m_OverlayTextures[type]);
// Temporarily enable blending to draw the overlays with alpha
glEnable(GL_BLEND);
// Draw the overlay
m_glBindVertexArrayOES(m_OverlayVAOs[type]);
glDrawArrays(GL_TRIANGLES, 0, 6);
m_glBindVertexArrayOES(0);
glDisable(GL_BLEND);
}
int EGLRenderer::loadAndBuildShader(int shaderType,
@@ -611,28 +615,10 @@ bool EGLRenderer::initialize(PDECODER_PARAMETERS params)
SDL_GL_SetSwapInterval(0);
}
glGenTextures(EGL_MAX_PLANES, m_Textures);
for (size_t i = 0; i < EGL_MAX_PLANES; ++i) {
glBindTexture(GL_TEXTURE_EXTERNAL_OES, m_Textures[i]);
glTexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
if (!setupVideoRenderingState() || !setupOverlayRenderingState()) {
return false;
}
glGenBuffers(Overlay::OverlayMax, m_OverlayVbos);
glGenTextures(Overlay::OverlayMax, m_OverlayTextures);
for (size_t i = 0; i < Overlay::OverlayMax; ++i) {
glBindTexture(GL_TEXTURE_2D, m_OverlayTextures[i]);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
}
glEnable(GL_BLEND);
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
GLenum err = glGetError();
if (err != GL_NO_ERROR)
EGL_LOG(Error, "OpenGL error: %d", err);
@@ -643,52 +629,87 @@ bool EGLRenderer::initialize(PDECODER_PARAMETERS params)
return err == GL_NO_ERROR;
}
bool EGLRenderer::specialize() {
SDL_assert(!m_VAO);
if (!compileShaders())
return false;
bool EGLRenderer::setupVideoRenderingState() {
// Setup the video plane textures
glGenTextures(EGL_MAX_PLANES, m_Textures);
for (size_t i = 0; i < EGL_MAX_PLANES; ++i) {
glBindTexture(GL_TEXTURE_EXTERNAL_OES, m_Textures[i]);
glTexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
}
// The viewport should have the aspect ratio of the video stream
static const float vertices[] = {
static const VERTEX vertices[] = {
// pos .... // tex coords
1.0f, 1.0f, 1.0f, 0.0f,
1.0f, -1.0f, 1.0f, 1.0f,
-1.0f, -1.0f, 0.0f, 1.0f,
-1.0f, 1.0f, 0.0f, 0.0f,
};
static const unsigned int indices[] = {
0, 1, 3,
1, 2, 3,
{ 1.0f, 1.0f, 1.0f, 0.0f },
{ -1.0f, 1.0f, 0.0f, 0.0f },
{ -1.0f, -1.0f, 0.0f, 1.0f },
{ -1.0f, -1.0f, 0.0f, 1.0f },
{ 1.0f, -1.0f, 1.0f, 1.0f },
{ 1.0f, 1.0f, 1.0f, 0.0f },
};
glUseProgram(m_ShaderProgram);
unsigned int VBO, EBO;
m_glGenVertexArraysOES(1, &m_VAO);
// Setup the VAO and VBO
unsigned int VBO;
m_glGenVertexArraysOES(1, &m_VideoVAO);
glGenBuffers(1, &VBO);
glGenBuffers(1, &EBO);
m_glBindVertexArrayOES(m_VAO);
m_glBindVertexArrayOES(m_VideoVAO);
glBindBuffer(GL_ARRAY_BUFFER, VBO);
glBufferData(GL_ARRAY_BUFFER, sizeof (vertices), vertices, GL_STATIC_DRAW);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, EBO);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof (indices), indices, GL_STATIC_DRAW);
glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STATIC_DRAW);
// compileShader() ensures that aPosition and aTexCoord are indexes 0 and 1 respectively
glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (void*)0);
glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (void*)offsetof(VERTEX, x));
glEnableVertexAttribArray(0);
glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (void*)(2 * sizeof (float)));
glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (void*)offsetof(VERTEX, u));
glEnableVertexAttribArray(1);
glBindBuffer(GL_ARRAY_BUFFER, 0);
m_glBindVertexArrayOES(0);
glDeleteBuffers(1, &VBO);
glDeleteBuffers(1, &EBO);
GLenum err = glGetError();
if (err != GL_NO_ERROR) {
EGL_LOG(Error, "OpenGL error: %d", err);
}
return err == GL_NO_ERROR;
}
bool EGLRenderer::setupOverlayRenderingState() {
// Create overlay textures, VBOs, and VAOs
glGenBuffers(Overlay::OverlayMax, m_OverlayVBOs);
glGenTextures(Overlay::OverlayMax, m_OverlayTextures);
m_glGenVertexArraysOES(Overlay::OverlayMax, m_OverlayVAOs);
for (size_t i = 0; i < Overlay::OverlayMax; ++i) {
// Set up the overlay texture
glBindTexture(GL_TEXTURE_2D, m_OverlayTextures[i]);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
// Create the VAO for the overlay
m_glBindVertexArrayOES(m_OverlayVAOs[i]);
glBindBuffer(GL_ARRAY_BUFFER, m_OverlayVBOs[i]);
// compileShader() ensures that aPosition and aTexCoord are indexes 0 and 1 respectively
glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (void*)offsetof(VERTEX, x));
glEnableVertexAttribArray(0);
glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (void*)offsetof(VERTEX, u));
glEnableVertexAttribArray(1);
glBindBuffer(GL_ARRAY_BUFFER, 0);
m_glBindVertexArrayOES(0);
}
// Enable alpha blending
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
GLenum err = glGetError();
if (err != GL_NO_ERROR) {
@@ -754,10 +775,11 @@ void EGLRenderer::renderFrame(AVFrame* frame)
SDL_assert(m_EGLImagePixelFormat != AV_PIX_FMT_NONE);
if (!specialize()) {
// Now that we know the image format, we can compile the shaders
if (!compileShaders()) {
m_EGLImagePixelFormat = AV_PIX_FMT_NONE;
// Failure to specialize is fatal. We must reset the renderer
// Failure to compile shaders is fatal. We must reset the renderer
// to recover successfully.
//
// Note: This seems to be easy to trigger when transitioning from
@@ -797,7 +819,6 @@ void EGLRenderer::renderFrame(AVFrame* frame)
glViewport(dst.x, dst.y, dst.w, dst.h);
glUseProgram(m_ShaderProgram);
m_glBindVertexArrayOES(m_VAO);
// If the frame format has changed, we'll need to recompute the constants
if (hasFrameFormatChanged(frame) && (m_EGLImagePixelFormat == AV_PIX_FMT_NV12 || m_EGLImagePixelFormat == AV_PIX_FMT_P010)) {
@@ -815,10 +836,12 @@ void EGLRenderer::renderFrame(AVFrame* frame)
glUniform2fv(m_ShaderProgramParams[NV12_PARAM_CHROMA_OFFSET], 1, chromaOffset.data());
}
glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_INT, 0);
// Draw the video
m_glBindVertexArrayOES(m_VideoVAO);
glDrawArrays(GL_TRIANGLES, 0, 6);
m_glBindVertexArrayOES(0);
// Draw overlays on top
for (int i = 0; i < Overlay::OverlayMax; i++) {
renderOverlay((Overlay::OverlayType)i, drawableWidth, drawableHeight);
}

View File

@@ -27,21 +27,23 @@ private:
void renderOverlay(Overlay::OverlayType type, int viewportWidth, int viewportHeight);
unsigned compileShader(const char* vertexShaderSrc, const char* fragmentShaderSrc);
bool compileShaders();
bool specialize();
bool setupVideoRenderingState();
bool setupOverlayRenderingState();
static int loadAndBuildShader(int shaderType, const char *filename);
AVPixelFormat m_EGLImagePixelFormat;
void *m_EGLDisplay;
unsigned m_Textures[EGL_MAX_PLANES];
unsigned m_OverlayTextures[Overlay::OverlayMax];
unsigned m_OverlayVbos[Overlay::OverlayMax];
unsigned m_OverlayVBOs[Overlay::OverlayMax];
unsigned m_OverlayVAOs[Overlay::OverlayMax];
SDL_atomic_t m_OverlayHasValidData[Overlay::OverlayMax];
unsigned m_ShaderProgram;
unsigned m_OverlayShaderProgram;
SDL_GLContext m_Context;
SDL_Window *m_Window;
IFFmpegRenderer *m_Backend;
unsigned int m_VAO;
unsigned int m_VideoVAO;
bool m_BlockingSwapBuffers;
EGLSync m_LastRenderSync;
AVFrame* m_LastFrame;