Reduce GCs and CPU usage by avoiding HashSet and LinkedList usage in the depacketizer. Also avoid atomic ref count operations for direct submit decoders.

2026-07-15 19:24:15 +00:00 · 2015-03-25 00:14:48 -04:00
parent 1ac6439690
commit b3503cdede
5 changed files with 116 additions and 68 deletions
@@ -5,6 +5,8 @@ public class ByteBufferDescriptor {
 	public int offset;
 	public int length;
 	
+	public ByteBufferDescriptor nextDescriptor;
+	
 	public ByteBufferDescriptor(byte[] data, int offset, int length)
 	{
 		this.data = data;
@@ -24,6 +26,7 @@ public class ByteBufferDescriptor {
 		this.data = data;
 		this.offset = offset;
 		this.length = length;
+		this.nextDescriptor = null;
 	}
 	
 	public void print()
@@ -1,8 +1,5 @@
 package com.limelight.nvstream.av;

-import java.util.HashSet;
-import java.util.List;
-
 import com.limelight.nvstream.av.video.VideoPacket;

 public class DecodeUnit {
@@ -14,26 +11,26 @@ public class DecodeUnit {
 	public static final int DU_FLAG_SYNC_FRAME = 0x2;
 	
 	private int type;
-	private List<ByteBufferDescriptor> bufferList;
+	private ByteBufferDescriptor bufferHead;
 	private int dataLength;
 	private int frameNumber;
 	private long receiveTimestamp;
 	private int flags;
-	private HashSet<VideoPacket> backingPackets; 
+	private VideoPacket backingPacketHead;
 	
 	public DecodeUnit() {
 	}
 	
-	public void initialize(int type, List<ByteBufferDescriptor> bufferList, int dataLength,
-			int frameNumber, long receiveTimestamp, int flags, HashSet<VideoPacket> backingPackets)
+	public void initialize(int type, ByteBufferDescriptor bufferHead, int dataLength,
+			int frameNumber, long receiveTimestamp, int flags, VideoPacket backingPacketHead)
 	{
 		this.type = type;
-		this.bufferList = bufferList;
+		this.bufferHead = bufferHead;
 		this.dataLength = dataLength;
 		this.frameNumber = frameNumber;
 		this.receiveTimestamp = receiveTimestamp;
 		this.flags = flags;
-		this.backingPackets = backingPackets;
+		this.backingPacketHead = backingPacketHead;
 	}
 	
 	public int getType()
@@ -46,9 +43,9 @@ public class DecodeUnit {
 		return receiveTimestamp;
 	}
 	
-	public List<ByteBufferDescriptor> getBufferList()
+	public ByteBufferDescriptor getBufferHead()
 	{
-		return bufferList;
+		return bufferHead;
 	}
 	
 	public int getDataLength()
@@ -67,12 +64,11 @@ public class DecodeUnit {
 	}
 	
 	// Internal use only
-	public HashSet<VideoPacket> getBackingPackets() {
-		return backingPackets;
-	}
-	
-	// Internal use only
-	public void clearBackingPackets() {
-		backingPackets.clear();
+	public VideoPacket removeBackingPacketHead() {
+		VideoPacket pkt = backingPacketHead;
+		if (pkt != null) {
+			backingPacketHead = pkt.nextPacket;
+		}
+		return pkt;
 	}
 }
@@ -1,8 +1,5 @@
 package com.limelight.nvstream.av.video;

-import java.util.HashSet;
-import java.util.LinkedList;
-
 import com.limelight.LimeLog;
 import com.limelight.nvstream.av.ByteBufferDescriptor;
 import com.limelight.nvstream.av.DecodeUnit;
@@ -15,9 +12,11 @@ import com.limelight.nvstream.av.buffer.UnsynchronizedPopulatedBufferList;
 public class VideoDepacketizer {
 	
 	// Current frame state
-	private LinkedList<ByteBufferDescriptor> avcFrameDataChain = null;
 	private int avcFrameDataLength = 0;
-	private HashSet<VideoPacket> packetSet = null;
+	private ByteBufferDescriptor frameDataChainHead;
+	private ByteBufferDescriptor frameDataChainTail;
+	private VideoPacket backingPacketHead;
+	private VideoPacket backingPacketTail;
 	
 	// Sequencing state
 	private int lastPacketInStream = 0;
@@ -55,10 +54,10 @@ public class VideoDepacketizer {
 				DecodeUnit du = (DecodeUnit) o;
 				
 				// Disassociate video packets from this DU
-				for (VideoPacket pkt : du.getBackingPackets()) {
-					pkt.decodeUnitRefCount.decrementAndGet();
+				VideoPacket pkt;
+				while ((pkt = du.removeBackingPacketHead()) != null) {
+					pkt.dereferencePacket();
 				}
-				du.clearBackingPackets();
 			}
 		};
 		
@@ -94,22 +93,21 @@ public class VideoDepacketizer {
 	
 	private void cleanupAvcFrameState()
 	{
-		if (packetSet != null) {
-			for (VideoPacket pkt : packetSet) {
-				pkt.decodeUnitRefCount.decrementAndGet();
-			}
-			packetSet = null;
+		backingPacketTail = null;
+		while (backingPacketHead != null) {
+			backingPacketHead.dereferencePacket();
+			backingPacketHead = backingPacketHead.nextPacket;
 		}
 		
-		avcFrameDataChain = null;
+		frameDataChainHead = frameDataChainTail = null;
 		avcFrameDataLength = 0;
 	}
 	
 	private void reassembleAvcFrame(int frameNumber)
 	{
 		// This is the start of a new frame
-		if (avcFrameDataChain != null && avcFrameDataLength != 0) {
-			ByteBufferDescriptor firstBuffer = avcFrameDataChain.getFirst();
+		if (frameDataChainHead != null) {
+			ByteBufferDescriptor firstBuffer = frameDataChainHead;
 			
 			int flags = 0;
 			if (NAL.getSpecialSequenceDescriptor(firstBuffer, cachedSpecialDesc) && NAL.isAvcFrameStart(cachedSpecialDesc)) {
@@ -141,11 +139,11 @@ public class VideoDepacketizer {
 			}
 			
 			// Initialize the free DU
-			du.initialize(DecodeUnit.TYPE_H264, avcFrameDataChain,
-					avcFrameDataLength, frameNumber, frameStartTime, flags, packetSet);
+			du.initialize(DecodeUnit.TYPE_H264, frameDataChainHead,
+					avcFrameDataLength, frameNumber, frameStartTime, flags, backingPacketHead);
 			
 			// Packets now owned by the DU
-			packetSet = null;
+			backingPacketTail = backingPacketHead = null;
 			
 			controlListener.connectionReceivedFrame(frameNumber);
 			
@@ -160,6 +158,39 @@ public class VideoDepacketizer {
 		}
 	}
 	
+	private void chainBufferToCurrentFrame(ByteBufferDescriptor desc) {
+		desc.nextDescriptor = null;
+
+		// Chain the packet
+		if (frameDataChainTail != null) {
+			frameDataChainTail.nextDescriptor = desc;
+			frameDataChainTail = desc;
+		}
+		else {
+			frameDataChainHead = frameDataChainTail = desc;
+		}
+		
+		avcFrameDataLength += desc.length;
+	}
+	
+	private void chainPacketToCurrentFrame(VideoPacket packet) {
+		// It's possible to get more than one NAL from a packet but we can cheaply
+		// check for this condition because all duplicates must be contiguous
+		if (backingPacketTail != packet) {
+			packet.referencePacket();
+			packet.nextPacket = null;
+
+			// Chain the packet
+			if (backingPacketTail != null) {
+				backingPacketTail.nextPacket = packet;
+				backingPacketTail = packet;
+			}
+			else {
+				backingPacketHead = backingPacketTail = packet;
+			}
+		}
+	}
+	
 	private void addInputDataSlow(VideoPacket packet, ByteBufferDescriptor location)
 	{
 		boolean isDecodingH264 = false;
@@ -185,11 +216,6 @@ public class VideoDepacketizer {
 						
 						// Reassemble any pending AVC NAL
 						reassembleAvcFrame(packet.getFrameIndex());
-
-						// Setup state for the new NAL
-						avcFrameDataChain = new LinkedList<ByteBufferDescriptor>();
-						avcFrameDataLength = 0;
-						packetSet = new HashSet<VideoPacket>();
 						
 						if (cachedSpecialDesc.data[cachedSpecialDesc.offset+cachedSpecialDesc.length] == 0x65) {
 							// This is the NALU code for I-frame data
@@ -241,17 +267,13 @@ public class VideoDepacketizer {
 				location.length--;
 			}

-			if (isDecodingH264 && avcFrameDataChain != null)
+			if (isDecodingH264 && decodingFrame)
 			{
-				ByteBufferDescriptor data = new ByteBufferDescriptor(location.data, start, location.offset-start);
+				// Chain this packet to the current frame
+				chainPacketToCurrentFrame(packet);
 				
-				if (packetSet.add(packet)) {
-					packet.decodeUnitRefCount.incrementAndGet();
-				}
-
 				// Add a buffer descriptor describing the NAL data in this packet
-				avcFrameDataChain.add(data);
-				avcFrameDataLength += location.offset-start;
+				chainBufferToCurrentFrame(new ByteBufferDescriptor(location.data, start, location.offset-start));
 			}
 		}
 	}
@@ -261,19 +283,13 @@ public class VideoDepacketizer {
 		if (firstPacket) {
 			// Setup state for the new frame
 			frameStartTime = System.currentTimeMillis();
-			avcFrameDataChain = new LinkedList<ByteBufferDescriptor>();
-			avcFrameDataLength = 0;
-			packetSet = new HashSet<VideoPacket>();
 		}
 		
 		// Add the payload data to the chain
-		avcFrameDataChain.add(new ByteBufferDescriptor(location));
-		avcFrameDataLength += location.length;
+		chainBufferToCurrentFrame(new ByteBufferDescriptor(location));
 		
 		// The receive thread can't use this until we're done with it
-		if (packetSet.add(packet)) {
-			packet.decodeUnitRefCount.incrementAndGet();
-		}
+		chainPacketToCurrentFrame(packet);
 	}
 	
 	private static boolean isFirstPacket(int flags) {
@@ -9,8 +9,9 @@ import com.limelight.nvstream.av.RtpPacket;
 import com.limelight.nvstream.av.RtpPacketFields;

 public class VideoPacket implements RtpPacketFields {
-	private ByteBufferDescriptor buffer;
-	private ByteBuffer byteBuffer;
+	private final ByteBufferDescriptor buffer;
+	private final ByteBuffer byteBuffer;
+	private final boolean useAtomicRefCount;
 	
 	private int dataOffset;
 	
@@ -20,7 +21,11 @@ public class VideoPacket implements RtpPacketFields {
 	
 	private short rtpSequenceNumber;
 	
-	AtomicInteger decodeUnitRefCount = new AtomicInteger();
+	private AtomicInteger duAtomicRefCount = new AtomicInteger();
+	private int duRefCount;
+	
+	// Only for use in DecodeUnit for packet queuing
+	public VideoPacket nextPacket;
 	
 	public static final int FLAG_CONTAINS_PIC_DATA = 0x1;
 	public static final int FLAG_EOF = 0x2;
@@ -28,10 +33,11 @@ public class VideoPacket implements RtpPacketFields {
 	
 	public static final int HEADER_SIZE = 16;
 	
-	public VideoPacket(byte[] buffer)
+	public VideoPacket(byte[] buffer, boolean useAtomicRefCount)
 	{
 		this.buffer = new ByteBufferDescriptor(buffer, 0, buffer.length);
 		this.byteBuffer = ByteBuffer.wrap(buffer).order(ByteOrder.LITTLE_ENDIAN);
+		this.useAtomicRefCount = useAtomicRefCount;
 	}
 	
 	public void initializeWithLengthNoRtpHeader(int length)
@@ -108,4 +114,31 @@ public class VideoPacket implements RtpPacketFields {
 	public short getRtpSequenceNumber() {
 		return rtpSequenceNumber;
 	}
+	
+	int referencePacket() {
+		if (useAtomicRefCount) {
+			return duAtomicRefCount.incrementAndGet();
+		}
+		else {
+			return ++duRefCount;
+		}
+	}
+	
+	int dereferencePacket() {
+		if (useAtomicRefCount) {
+			return duAtomicRefCount.decrementAndGet();
+		}
+		else {
+			return --duRefCount;
+		}
+	}
+	
+	int getRefCount() {
+		if (useAtomicRefCount) {
+			return duAtomicRefCount.get();
+		}
+		else {
+			return duRefCount;
+		}
+	}
 }
@@ -189,14 +189,14 @@ public class VideoStream {
 				RtpReorderQueue rtpQueue = new RtpReorderQueue(16, MAX_RTP_QUEUE_DELAY_MS);
 				RtpReorderQueue.RtpQueueStatus queueStatus;
 				
+				boolean directSubmit = (decRend != null && (decRend.getCapabilities() &
+						VideoDecoderRenderer.CAPABILITY_DIRECT_SUBMIT) != 0);
+				
 				// Preinitialize the ring buffer
 				int requiredBufferSize = context.streamConfig.getMaxPacketSize() + RtpPacket.MAX_HEADER_SIZE;
 				for (int i = 0; i < VIDEO_RING_SIZE; i++) {
-					ring[i] = new VideoPacket(new byte[requiredBufferSize]);
+					ring[i] = new VideoPacket(new byte[requiredBufferSize], !directSubmit);
 				}
-				
-				boolean directSubmit = (decRend != null && (decRend.getCapabilities() &
-						VideoDecoderRenderer.CAPABILITY_DIRECT_SUBMIT) != 0);

 				byte[] buffer;
 				DatagramPacket packet = new DatagramPacket(new byte[1], 1); // Placeholder array
@@ -243,11 +243,11 @@ public class VideoStream {
 								// Reinitialize the video ring since they're all being used
 								LimeLog.warning("Packet ring wrapped around!");
 								for (int i = 0; i < VIDEO_RING_SIZE; i++) {
-									ring[i] = new VideoPacket(new byte[requiredBufferSize]);
+									ring[i] = new VideoPacket(new byte[requiredBufferSize], !directSubmit);
 								}
 								break;
 							}
-						} while (ring[ringIndex].decodeUnitRefCount.get() != 0);
+						} while (ring[ringIndex].getRefCount() != 0);
 					} catch (IOException e) {
 						context.connListener.connectionTerminated(e);
 						return;