Rework NALU parsing to be more robust handling 3 byte prefixes

This commit is contained in:
Cameron Gutman
2022-09-05 16:45:14 -05:00
parent ec420615a1
commit 5a2911ffe4
+67 -100
View File
@@ -135,26 +135,13 @@ void destroyVideoDepacketizer(void) {
cleanupFrameState(); cleanupFrameState();
} }
static bool isSeqFrameStart(PBUFFER_DESC candidate) { static bool getAnnexBStartSequence(PBUFFER_DESC current, PBUFFER_DESC candidate) {
return (candidate->length == 4 && candidate->data[candidate->offset + candidate->length - 1] == 1);
}
static bool isSeqAnnexBStart(PBUFFER_DESC candidate) {
return (candidate->data[candidate->offset + candidate->length - 1] == 1);
}
static bool isSeqPadding(PBUFFER_DESC candidate) {
return (candidate->data[candidate->offset + candidate->length - 1] == 0);
}
static bool getSpecialSeq(PBUFFER_DESC current, PBUFFER_DESC candidate) {
if (current->length < 3) { if (current->length < 3) {
return false; return false;
} }
if (current->data[current->offset] == 0 && if (current->data[current->offset] == 0 &&
current->data[current->offset + 1] == 0) { current->data[current->offset + 1] == 0) {
// Padding or frame start
if (current->data[current->offset + 2] == 0) { if (current->data[current->offset + 2] == 0) {
if (current->length >= 4 && current->data[current->offset + 3] == 1) { if (current->length >= 4 && current->data[current->offset + 3] == 1) {
// Frame start // Frame start
@@ -163,13 +150,6 @@ static bool getSpecialSeq(PBUFFER_DESC current, PBUFFER_DESC candidate) {
candidate->length = 4; candidate->length = 4;
return true; return true;
} }
else {
// Padding
candidate->data = current->data;
candidate->offset = current->offset;
candidate->length = 3;
return true;
}
} }
else if (current->data[current->offset + 2] == 1) { else if (current->data[current->offset + 2] == 1) {
// NAL start // NAL start
@@ -256,12 +236,12 @@ void LiCompleteVideoFrame(VIDEO_FRAME_HANDLE handle, int drStatus) {
} }
} }
static bool isSeqReferenceFrameStart(PBUFFER_DESC specialSeq) { static bool isSeqReferenceFrameStart(PBUFFER_DESC startSeq) {
if (NegotiatedVideoFormat & VIDEO_FORMAT_MASK_H264) { if (NegotiatedVideoFormat & VIDEO_FORMAT_MASK_H264) {
return H264_NAL_TYPE(specialSeq->data[specialSeq->offset + specialSeq->length]) == 5; return H264_NAL_TYPE(startSeq->data[startSeq->offset + startSeq->length]) == 5;
} }
else if (NegotiatedVideoFormat & VIDEO_FORMAT_MASK_H265) { else if (NegotiatedVideoFormat & VIDEO_FORMAT_MASK_H265) {
switch (HEVC_NAL_TYPE(specialSeq->data[specialSeq->offset + specialSeq->length])) { switch (HEVC_NAL_TYPE(startSeq->data[startSeq->offset + startSeq->length])) {
case 16: case 16:
case 17: case 17:
case 18: case 18:
@@ -281,17 +261,17 @@ static bool isSeqReferenceFrameStart(PBUFFER_DESC specialSeq) {
} }
static bool isAccessUnitDelimiter(PBUFFER_DESC buffer) { static bool isAccessUnitDelimiter(PBUFFER_DESC buffer) {
BUFFER_DESC specialSeq; BUFFER_DESC startSeq;
if (!getSpecialSeq(buffer, &specialSeq)) { if (!getAnnexBStartSequence(buffer, &startSeq)) {
return false; return false;
} }
if (NegotiatedVideoFormat & VIDEO_FORMAT_MASK_H264) { if (NegotiatedVideoFormat & VIDEO_FORMAT_MASK_H264) {
return H264_NAL_TYPE(specialSeq.data[specialSeq.offset + specialSeq.length]) == H264_NAL_TYPE_AUD; return H264_NAL_TYPE(startSeq.data[startSeq.offset + startSeq.length]) == H264_NAL_TYPE_AUD;
} }
else if (NegotiatedVideoFormat & VIDEO_FORMAT_MASK_H265) { else if (NegotiatedVideoFormat & VIDEO_FORMAT_MASK_H265) {
return HEVC_NAL_TYPE(specialSeq.data[specialSeq.offset + specialSeq.length]) == HEVC_NAL_TYPE_AUD; return HEVC_NAL_TYPE(startSeq.data[startSeq.offset + startSeq.length]) == HEVC_NAL_TYPE_AUD;
} }
else { else {
LC_ASSERT(false); LC_ASSERT(false);
@@ -300,17 +280,17 @@ static bool isAccessUnitDelimiter(PBUFFER_DESC buffer) {
} }
static bool isSeiNal(PBUFFER_DESC buffer) { static bool isSeiNal(PBUFFER_DESC buffer) {
BUFFER_DESC specialSeq; BUFFER_DESC startSeq;
if (!getSpecialSeq(buffer, &specialSeq)) { if (!getAnnexBStartSequence(buffer, &startSeq)) {
return false; return false;
} }
if (NegotiatedVideoFormat & VIDEO_FORMAT_MASK_H264) { if (NegotiatedVideoFormat & VIDEO_FORMAT_MASK_H264) {
return H264_NAL_TYPE(specialSeq.data[specialSeq.offset + specialSeq.length]) == H264_NAL_TYPE_SEI; return H264_NAL_TYPE(startSeq.data[startSeq.offset + startSeq.length]) == H264_NAL_TYPE_SEI;
} }
else if (NegotiatedVideoFormat & VIDEO_FORMAT_MASK_H265) { else if (NegotiatedVideoFormat & VIDEO_FORMAT_MASK_H265) {
return HEVC_NAL_TYPE(specialSeq.data[specialSeq.offset + specialSeq.length]) == HEVC_NAL_TYPE_SEI; return HEVC_NAL_TYPE(startSeq.data[startSeq.offset + startSeq.length]) == HEVC_NAL_TYPE_SEI;
} }
else { else {
LC_ASSERT(false); LC_ASSERT(false);
@@ -318,21 +298,20 @@ static bool isSeiNal(PBUFFER_DESC buffer) {
} }
} }
// Advance the buffer descriptor to the start of the next NAL // Advance the buffer descriptor to the start of the next NAL or end of buffer
static void skipToNextNal(PBUFFER_DESC buffer) { static void skipToNextNalOrEnd(PBUFFER_DESC buffer) {
BUFFER_DESC specialSeq; BUFFER_DESC startSeq;
// If we're starting on a NAL boundary, skip to the next one // If we're starting on a NAL boundary, skip to the next one
if (getSpecialSeq(buffer, &specialSeq) && isSeqAnnexBStart(&specialSeq)) { if (getAnnexBStartSequence(buffer, &startSeq)) {
buffer->offset += specialSeq.length; buffer->offset += startSeq.length;
buffer->length -= specialSeq.length; buffer->length -= startSeq.length;
} }
// Loop until we find an Annex B start sequence (3 or 4 byte) // Loop until we find an Annex B start sequence (3 or 4 byte)
while (!getSpecialSeq(buffer, &specialSeq) || !isSeqAnnexBStart(&specialSeq)) { while (!getAnnexBStartSequence(buffer, &startSeq)) {
if (buffer->length == 0) { if (buffer->length == 0) {
// If we skipped all the data, something has gone horribly wrong // Reach the end of the buffer
LC_ASSERT(buffer->length > 0);
return; return;
} }
@@ -341,18 +320,26 @@ static void skipToNextNal(PBUFFER_DESC buffer) {
} }
} }
static bool isIdrFrameStart(PBUFFER_DESC buffer) { // Advance the buffer descriptor to the start of the next NAL
BUFFER_DESC specialSeq; static void skipToNextNal(PBUFFER_DESC buffer) {
skipToNextNalOrEnd(buffer);
if (!getSpecialSeq(buffer, &specialSeq) || !isSeqFrameStart(&specialSeq)) { // If we skipped all the data, something has gone horribly wrong
LC_ASSERT(buffer->length > 0);
}
static bool isIdrFrameStart(PBUFFER_DESC buffer) {
BUFFER_DESC startSeq;
if (!getAnnexBStartSequence(buffer, &startSeq)) {
return false; return false;
} }
if (NegotiatedVideoFormat & VIDEO_FORMAT_MASK_H264) { if (NegotiatedVideoFormat & VIDEO_FORMAT_MASK_H264) {
return H264_NAL_TYPE(specialSeq.data[specialSeq.offset + specialSeq.length]) == H264_NAL_TYPE_SPS; return H264_NAL_TYPE(startSeq.data[startSeq.offset + startSeq.length]) == H264_NAL_TYPE_SPS;
} }
else if (NegotiatedVideoFormat & VIDEO_FORMAT_MASK_H265) { else if (NegotiatedVideoFormat & VIDEO_FORMAT_MASK_H265) {
return HEVC_NAL_TYPE(specialSeq.data[specialSeq.offset + specialSeq.length]) == HEVC_NAL_TYPE_VPS; return HEVC_NAL_TYPE(startSeq.data[startSeq.offset + startSeq.length]) == HEVC_NAL_TYPE_VPS;
} }
else { else {
LC_ASSERT(false); LC_ASSERT(false);
@@ -435,7 +422,7 @@ static int getBufferFlags(char* data, int length) {
buffer.length = (unsigned int)length; buffer.length = (unsigned int)length;
buffer.offset = 0; buffer.offset = 0;
if (!getSpecialSeq(&buffer, &candidate) || !isSeqFrameStart(&candidate)) { if (!getAnnexBStartSequence(&buffer, &candidate)) {
return BUFFER_TYPE_PICDATA; return BUFFER_TYPE_PICDATA;
} }
@@ -524,14 +511,19 @@ static void queueFragment(PLENTRY_INTERNAL* existingEntry, char* data, int offse
// Process an RTP Payload using the slow path that handles multiple NALUs per packet // Process an RTP Payload using the slow path that handles multiple NALUs per packet
static void processRtpPayloadSlow(PBUFFER_DESC currentPos, PLENTRY_INTERNAL* existingEntry) { static void processRtpPayloadSlow(PBUFFER_DESC currentPos, PLENTRY_INTERNAL* existingEntry) {
BUFFER_DESC specialSeq; BUFFER_DESC startSeq;
bool decodingVideo = false;
// We should not have any NALUs when processing the first packet in an IDR frame // We should not have any NALUs when processing the first packet in an IDR frame
LC_ASSERT(nalChainHead == NULL); LC_ASSERT(nalChainHead == NULL);
LC_ASSERT(nalChainTail == NULL); LC_ASSERT(nalChainTail == NULL);
while (currentPos->length != 0) { while (currentPos->length != 0) {
// Skip through any padding bytes
if (!getAnnexBStartSequence(currentPos, &startSeq)) {
skipToNextNal(currentPos);
getAnnexBStartSequence(currentPos, &startSeq);
}
// Skip any prepended AUD or SEI NALUs. We may have padding between // Skip any prepended AUD or SEI NALUs. We may have padding between
// these on IDR frames, so the check in processRtpPayload() is not // these on IDR frames, so the check in processRtpPayload() is not
// completely sufficient to handle that case. // completely sufficient to handle that case.
@@ -542,62 +534,37 @@ static void processRtpPayloadSlow(PBUFFER_DESC currentPos, PLENTRY_INTERNAL* exi
int start = currentPos->offset; int start = currentPos->offset;
bool containsPicData = false; bool containsPicData = false;
if (getSpecialSeq(currentPos, &specialSeq)) { // Now we're decoding a frame
if (isSeqAnnexBStart(&specialSeq)) { decodingFrame = true;
// Now we're decoding video
decodingVideo = true;
if (isSeqFrameStart(&specialSeq)) { if (isSeqReferenceFrameStart(&startSeq)) {
// Now we're working on a frame // No longer waiting for an IDR frame
decodingFrame = true; waitingForIdrFrame = false;
if (isSeqReferenceFrameStart(&specialSeq)) { // Cancel any pending IDR frame request
// No longer waiting for an IDR frame waitingForNextSuccessfulFrame = false;
waitingForIdrFrame = false;
// Cancel any pending IDR frame request // Use the cached LENTRY for this NALU since it will be
waitingForNextSuccessfulFrame = false; // the bulk of the data in this packet.
containsPicData = true;
}
// Use the cached LENTRY for this NALU since it will be // Move to the next NALU
// the bulk of the data in this packet. skipToNextNalOrEnd(currentPos);
containsPicData = true;
}
}
// Skip the start sequence // If this is the picture data, we expect it to extend to the end of the packet
currentPos->length -= specialSeq.length; if (containsPicData) {
currentPos->offset += specialSeq.length; while (currentPos->length != 0) {
} // Any NALUs we encounter on the way to the end of the packet must be reference frame slices
else { LC_ASSERT(getAnnexBStartSequence(currentPos, &startSeq) && isSeqReferenceFrameStart(&startSeq));
// Not decoding video skipToNextNalOrEnd(currentPos);
decodingVideo = false;
// Just skip this byte
currentPos->length--;
currentPos->offset++;
} }
} }
// Move to the next special sequence // To minimize copies, we'll allocate for SPS, PPS, and VPS to allow
while (currentPos->length != 0) { // us to reuse the packet buffer for the picture data in the I-frame.
// Check if this should end the current NAL queueFragment(containsPicData ? existingEntry : NULL,
if (getSpecialSeq(currentPos, &specialSeq)) { currentPos->data, start, currentPos->offset - start);
if (decodingVideo || !isSeqPadding(&specialSeq)) {
break;
}
}
// This byte is part of the NAL data
currentPos->offset++;
currentPos->length--;
}
if (decodingVideo) {
// To minimize copies, we'll use allocate for SPS, PPS, and VPS to allow
// us to reuse the packet buffer for the picture data in the I-frame.
queueFragment(containsPicData ? existingEntry : NULL,
currentPos->data, start, currentPos->offset - start);
}
} }
} }
@@ -752,11 +719,11 @@ static void processRtpPayload(PNV_VIDEO_PACKET videoPacket, int length,
// Other versions don't have a frame header at all // Other versions don't have a frame header at all
} }
// Assert that the frame start NALU prefix is next // Assert that the 3 or 4 byte Annex B NALU prefix is next
LC_ASSERT(currentPos.data[currentPos.offset + 0] == 0); LC_ASSERT(currentPos.data[currentPos.offset + 0] == 0);
LC_ASSERT(currentPos.data[currentPos.offset + 1] == 0); LC_ASSERT(currentPos.data[currentPos.offset + 1] == 0);
LC_ASSERT(currentPos.data[currentPos.offset + 2] == 0); LC_ASSERT(currentPos.data[currentPos.offset + 2] == 0 || currentPos.data[currentPos.offset + 2] == 1);
LC_ASSERT(currentPos.data[currentPos.offset + 3] == 1); LC_ASSERT(currentPos.data[currentPos.offset + 3] == 1 || currentPos.data[currentPos.offset + 2] == 1);
// If an AUD NAL is prepended to this frame data, remove it. // If an AUD NAL is prepended to this frame data, remove it.
// Other parts of this code are not prepared to deal with a // Other parts of this code are not prepared to deal with a