Draw directly to the surface buffer. Improve amount of decoding and rendering that can be done in parallel. Add performance levels and choose them by cpuinfo. Improves Tegra 3 performance significantly.

This commit is contained in:
Cameron Gutman 2013-11-21 08:38:49 -05:00
parent 54839e672d
commit 45664dac2a
9 changed files with 218 additions and 120 deletions

View File

@ -9,7 +9,7 @@ include $(CLEAR_VARS)
LOCAL_MODULE := nv_avc_dec LOCAL_MODULE := nv_avc_dec
LOCAL_SRC_FILES := nv_avc_dec.c nv_avc_dec_jni.c LOCAL_SRC_FILES := nv_avc_dec.c nv_avc_dec_jni.c
LOCAL_C_INCLUDES := $(LOCAL_PATH)/ffmpeg/$(TARGET_ARCH_ABI)/include LOCAL_C_INCLUDES := $(LOCAL_PATH)/ffmpeg/$(TARGET_ARCH_ABI)/include
LOCAL_LDLIBS := -L$(SYSROOT)/usr/lib -llog LOCAL_LDLIBS := -L$(SYSROOT)/usr/lib -llog -landroid
# Link to ffmpeg libraries # Link to ffmpeg libraries
LOCAL_SHARED_LIBRARIES := libavcodec libavformat libswscale libavutil libavfilter libwsresample LOCAL_SHARED_LIBRARIES := libavcodec libavformat libswscale libavutil libavfilter libwsresample

View File

@ -5,20 +5,31 @@
#include <android/log.h> #include <android/log.h>
#include "nv_avc_dec.h" #include "nv_avc_dec.h"
#include <jni.h>
#include <android/native_window_jni.h>
AVCodec* decoder; AVCodec* decoder;
AVCodecContext* decoder_ctx; AVCodecContext* decoder_ctx;
AVFrame* yuv_frame; AVFrame* yuv_frame;
AVFrame* tmp_frame;
AVFrame* rgb_frame; AVFrame* rgb_frame;
AVFrame* rnd_frame;
AVFrame* dec_frame;
pthread_mutex_t mutex; pthread_mutex_t mutex;
char* rgb_frame_buf; char* rgb_frame_buf;
int picture_valid;
int rgb_dirty;
struct SwsContext* scaler_ctx; struct SwsContext* scaler_ctx;
int picture_new;
#define RENDER_PIX_FMT AV_PIX_FMT_RGBA
#define BYTES_PER_PIXEL 4
#define VERY_LOW_PERF 0
#define LOW_PERF 1
#define MED_PERF 2
#define HIGH_PERF 3
// This function must be called before // This function must be called before
// any other decoding functions // any other decoding functions
int nv_avc_init(int width, int height) { int nv_avc_init(int width, int height, int perf_lvl) {
int err; int err;
pthread_mutex_init(&mutex, NULL); pthread_mutex_init(&mutex, NULL);
@ -44,12 +55,23 @@ int nv_avc_init(int width, int height) {
// Show frames even before a reference frame // Show frames even before a reference frame
decoder_ctx->flags2 |= CODEC_FLAG2_SHOW_ALL; decoder_ctx->flags2 |= CODEC_FLAG2_SHOW_ALL;
// Skip the loop filter for performance reasons if (perf_lvl <= LOW_PERF) {
decoder_ctx->skip_loop_filter = AVDISCARD_ALL; // Skip the loop filter for performance reasons
decoder_ctx->skip_loop_filter = AVDISCARD_ALL;
}
// Run 2 threads for decoding if (perf_lvl <= MED_PERF) {
decoder_ctx->thread_count = 2; // Run 2 threads for decoding
decoder_ctx->thread_type = FF_THREAD_FRAME; decoder_ctx->thread_count = 2;
decoder_ctx->thread_type = FF_THREAD_FRAME;
// Use some tricks to make things faster
decoder_ctx->flags2 |= CODEC_FLAG2_FAST;
}
else {
// Use low delay single threaded encoding
decoder_ctx->flags |= CODEC_FLAG_LOW_DELAY;
}
decoder_ctx->width = width; decoder_ctx->width = width;
decoder_ctx->height = height; decoder_ctx->height = height;
@ -62,13 +84,13 @@ int nv_avc_init(int width, int height) {
return err; return err;
} }
tmp_frame = av_frame_alloc(); dec_frame = av_frame_alloc();
if (tmp_frame == NULL) { if (dec_frame == NULL) {
__android_log_write(ANDROID_LOG_ERROR, "NVAVCDEC", __android_log_write(ANDROID_LOG_ERROR, "NVAVCDEC",
"Couldn't allocate frame"); "Couldn't allocate frame");
return -1; return -1;
} }
rgb_frame = av_frame_alloc(); rgb_frame = av_frame_alloc();
if (rgb_frame == NULL) { if (rgb_frame == NULL) {
__android_log_write(ANDROID_LOG_ERROR, "NVAVCDEC", __android_log_write(ANDROID_LOG_ERROR, "NVAVCDEC",
@ -76,16 +98,16 @@ int nv_avc_init(int width, int height) {
return -1; return -1;
} }
rgb_frame_buf = (char*)av_malloc(nv_avc_get_rgb_frame_size()); rgb_frame_buf = (char*)av_malloc(width * height * BYTES_PER_PIXEL);
if (rgb_frame_buf == NULL) { if (rgb_frame_buf == NULL) {
__android_log_write(ANDROID_LOG_ERROR, "NVAVCDEC", __android_log_write(ANDROID_LOG_ERROR, "NVAVCDEC",
"Couldn't allocate picture"); "Couldn't allocate picture");
return -1; return -1;
} }
err = avpicture_fill((AVPicture*)rgb_frame, err = avpicture_fill((AVPicture*)rgb_frame,
rgb_frame_buf, rgb_frame_buf,
AV_PIX_FMT_RGB32, RENDER_PIX_FMT,
decoder_ctx->width, decoder_ctx->width,
decoder_ctx->height); decoder_ctx->height);
if (err < 0) { if (err < 0) {
@ -93,13 +115,13 @@ int nv_avc_init(int width, int height) {
"Couldn't fill picture"); "Couldn't fill picture");
return err; return err;
} }
scaler_ctx = sws_getContext(decoder_ctx->width, scaler_ctx = sws_getContext(decoder_ctx->width,
decoder_ctx->height, decoder_ctx->height,
decoder_ctx->pix_fmt, decoder_ctx->pix_fmt,
decoder_ctx->width, decoder_ctx->width,
decoder_ctx->height, decoder_ctx->height,
AV_PIX_FMT_RGB32, RENDER_PIX_FMT,
SWS_BICUBIC, SWS_BICUBIC,
NULL, NULL, NULL); NULL, NULL, NULL);
if (scaler_ctx == NULL) { if (scaler_ctx == NULL) {
@ -123,9 +145,13 @@ void nv_avc_destroy(void) {
sws_freeContext(scaler_ctx); sws_freeContext(scaler_ctx);
scaler_ctx = NULL; scaler_ctx = NULL;
} }
if (tmp_frame) { if (dec_frame) {
av_frame_free(&dec_frame);
dec_frame = NULL;
}
if (yuv_frame) {
av_frame_free(&yuv_frame); av_frame_free(&yuv_frame);
tmp_frame = NULL; yuv_frame = NULL;
} }
if (rgb_frame) { if (rgb_frame) {
av_frame_free(&rgb_frame); av_frame_free(&rgb_frame);
@ -135,32 +161,43 @@ void nv_avc_destroy(void) {
av_free(rgb_frame_buf); av_free(rgb_frame_buf);
rgb_frame_buf = NULL; rgb_frame_buf = NULL;
} }
if (rnd_frame) {
av_frame_free(&rnd_frame);
rnd_frame = NULL;
}
pthread_mutex_destroy(&mutex); pthread_mutex_destroy(&mutex);
} }
// The decoded frame is ARGB void nv_avc_redraw(JNIEnv *env, jobject surface) {
// Returns 1 on success, 0 on failure ANativeWindow* window;
int nv_avc_get_current_frame(char* rgbframe, int size) { ANativeWindow_Buffer buffer;
int err; int err;
if (size != nv_avc_get_rgb_frame_size()) { // Free the old decoded frame
return 0; if (rnd_frame) {
av_frame_free(&rnd_frame);
} }
pthread_mutex_lock(&mutex); pthread_mutex_lock(&mutex);
// Check if the RGB frame needs updating // Check if there's a new frame
if (rgb_dirty) { if (picture_new) {
// If the decoder doesn't have a new picture, we fail // Clone the decoder's last frame
if (!picture_valid) { rnd_frame = av_frame_clone(yuv_frame);
pthread_mutex_unlock(&mutex);
return 0; // The remaining processing can be done without the mutex
pthread_mutex_unlock(&mutex);
if (rnd_frame == NULL) {
__android_log_write(ANDROID_LOG_ERROR, "NVAVCDEC",
"Cloning failed");
return;
} }
// Convert the YUV image to RGB // Convert the YUV image to RGB
err = sws_scale(scaler_ctx, err = sws_scale(scaler_ctx,
yuv_frame->data, rnd_frame->data,
yuv_frame->linesize, rnd_frame->linesize,
0, 0,
decoder_ctx->height, decoder_ctx->height,
rgb_frame->data, rgb_frame->data,
@ -168,36 +205,42 @@ int nv_avc_get_current_frame(char* rgbframe, int size) {
if (err != decoder_ctx->height) { if (err != decoder_ctx->height) {
__android_log_write(ANDROID_LOG_ERROR, "NVAVCDEC", __android_log_write(ANDROID_LOG_ERROR, "NVAVCDEC",
"Scaling failed"); "Scaling failed");
pthread_mutex_unlock(&mutex); return;
return 0;
} }
// RGB frame is now clean window = ANativeWindow_fromSurface(env, surface);
rgb_dirty = 0; if (window == NULL) {
__android_log_write(ANDROID_LOG_ERROR, "NVAVCDEC",
"Failed to get window from surface");
return;
}
// Lock down a render buffer
if (ANativeWindow_lock(window, &buffer, NULL) >= 0) {
// Draw the frame to the buffer
err = avpicture_layout((AVPicture*)rgb_frame,
RENDER_PIX_FMT,
decoder_ctx->width,
decoder_ctx->height,
buffer.bits,
decoder_ctx->width *
decoder_ctx->height *
BYTES_PER_PIXEL);
if (err < 0) {
__android_log_write(ANDROID_LOG_ERROR, "NVAVCDEC",
"Picture fill failed");
}
// Draw the frame to the surface
ANativeWindow_unlockAndPost(window);
}
ANativeWindow_release(window);
} }
else {
// The remaining processing can be done without the mutex pthread_mutex_unlock(&mutex);
pthread_mutex_unlock(&mutex); rnd_frame = NULL;
err = avpicture_layout((AVPicture*)rgb_frame,
AV_PIX_FMT_RGB32,
decoder_ctx->width,
decoder_ctx->height,
rgbframe,
size);
if (err < 0) {
__android_log_write(ANDROID_LOG_ERROR, "NVAVCDEC",
"Picture fill failed");
return 0;
} }
return 1;
}
int nv_avc_get_rgb_frame_size(void) {
return avpicture_get_size(AV_PIX_FMT_RGB32,
decoder_ctx->width,
decoder_ctx->height);
} }
// packets must be decoded in order // packets must be decoded in order
@ -218,7 +261,7 @@ int nv_avc_decode(unsigned char* indata, int inlen) {
while (pkt.size > 0) { while (pkt.size > 0) {
err = avcodec_decode_video2( err = avcodec_decode_video2(
decoder_ctx, decoder_ctx,
tmp_frame, dec_frame,
&got_pic, &got_pic,
&pkt); &pkt);
if (err < 0) { if (err < 0) {
@ -237,14 +280,12 @@ int nv_avc_decode(unsigned char* indata, int inlen) {
} }
// Clone a new frame // Clone a new frame
yuv_frame = av_frame_clone(tmp_frame); yuv_frame = av_frame_clone(dec_frame);
if (yuv_frame) { if (yuv_frame) {
// If we got a new picture, the RGB frame needs refreshing picture_new = 1;
picture_valid = 1;
rgb_dirty = 1;
} }
else { else {
picture_valid = 0; picture_new = 0;
} }
pthread_mutex_unlock(&mutex); pthread_mutex_unlock(&mutex);

View File

@ -1,5 +1,6 @@
int nv_avc_init(int width, int height); #include <jni.h>
int nv_avc_init(int width, int height, int perf_lvl);
void nv_avc_destroy(void); void nv_avc_destroy(void);
int nv_avc_get_current_frame(char* yuvframe, int size); void nv_avc_redraw(JNIEnv *env, jobject surface);
int nv_avc_get_frame_size(void);
int nv_avc_decode(unsigned char* indata, int inlen); int nv_avc_decode(unsigned char* indata, int inlen);

View File

@ -6,8 +6,10 @@
// This function must be called before // This function must be called before
// any other decoding functions // any other decoding functions
JNIEXPORT jint JNICALL JNIEXPORT jint JNICALL
Java_com_limelight_nvstream_av_video_AvcDecoder_init(JNIEnv *env, jobject this, jint width, jint height) { Java_com_limelight_nvstream_av_video_AvcDecoder_init(JNIEnv *env, jobject this, jint width,
return nv_avc_init(width, height); jint height, jint perflvl)
{
return nv_avc_init(width, height, perflvl);
} }
// This function must be called after // This function must be called after
@ -17,27 +19,10 @@ Java_com_limelight_nvstream_av_video_AvcDecoder_destroy(JNIEnv *env, jobject thi
nv_avc_destroy(); nv_avc_destroy();
} }
// The decoded frame is ARGB // This function redraws the surface
// Returns 1 on success, 0 on failure JNIEXPORT void JNICALL
JNIEXPORT jboolean JNICALL Java_com_limelight_nvstream_av_video_AvcDecoder_redraw(JNIEnv *env, jobject this, jobject surface) {
Java_com_limelight_nvstream_av_video_AvcDecoder_getCurrentFrame(JNIEnv *env, jobject this, nv_avc_redraw(env, surface);
jintArray rgbframe, jint sizeints)
{
jint* jni_rgbframe;
jboolean ret;
jni_rgbframe = (*env)->GetIntArrayElements(env, rgbframe, 0);
ret = (nv_avc_get_current_frame((char*)jni_rgbframe, sizeints*4) != 0) ? JNI_TRUE : JNI_FALSE;
(*env)->ReleaseIntArrayElements(env, rgbframe, jni_rgbframe, 0);
return ret;
}
JNIEXPORT jint JNICALL
Java_com_limelight_nvstream_av_video_AvcDecoder_getFrameSize(JNIEnv *env, jobject this) {
return nv_avc_get_rgb_frame_size() / 4;
} }
// packets must be decoded in order // packets must be decoded in order

Binary file not shown.

Binary file not shown.

View File

@ -5,10 +5,13 @@ import com.limelight.nvstream.input.NvControllerPacket;
import android.app.Activity; import android.app.Activity;
import android.content.ComponentCallbacks2; import android.content.ComponentCallbacks2;
import android.graphics.ImageFormat;
import android.graphics.PixelFormat;
import android.os.Bundle; import android.os.Bundle;
import android.view.InputDevice; import android.view.InputDevice;
import android.view.KeyEvent; import android.view.KeyEvent;
import android.view.MotionEvent; import android.view.MotionEvent;
import android.view.SurfaceHolder;
import android.view.SurfaceView; import android.view.SurfaceView;
import android.view.View; import android.view.View;
import android.view.View.OnGenericMotionListener; import android.view.View.OnGenericMotionListener;
@ -54,7 +57,9 @@ public class Game extends Activity implements OnGenericMotionListener, OnTouchLi
SurfaceView sv = (SurfaceView) findViewById(R.id.surfaceView); SurfaceView sv = (SurfaceView) findViewById(R.id.surfaceView);
sv.setOnGenericMotionListener(this); sv.setOnGenericMotionListener(this);
sv.setOnTouchListener(this); sv.setOnTouchListener(this);
sv.getHolder().setFixedSize(1280, 720); SurfaceHolder sh = sv.getHolder();
sh.setFixedSize(1280, 720);
sh.setFormat(PixelFormat.RGBA_8888);
// Start the connection // Start the connection
conn = new NvConnection(Game.this.getIntent().getStringExtra("host"), Game.this, sv.getHolder().getSurface()); conn = new NvConnection(Game.this.getIntent().getStringExtra("host"), Game.this, sv.getHolder().getSurface());

View File

@ -1,5 +1,7 @@
package com.limelight.nvstream.av.video; package com.limelight.nvstream.av.video;
import android.view.Surface;
public class AvcDecoder { public class AvcDecoder {
static { static {
// FFMPEG dependencies // FFMPEG dependencies
@ -13,9 +15,8 @@ public class AvcDecoder {
System.loadLibrary("nv_avc_dec"); System.loadLibrary("nv_avc_dec");
} }
public static native int init(int width, int height); public static native int init(int width, int height, int perflvl);
public static native void destroy(); public static native void destroy();
public static native boolean getCurrentFrame(int[] rgbframe, int sizeints); public static native void redraw(Surface surface);
public static native int getFrameSize();
public static native int decode(byte[] indata, int inoff, int inlen); public static native int decode(byte[] indata, int inoff, int inlen);
} }

View File

@ -1,8 +1,10 @@
package com.limelight.nvstream.av.video; package com.limelight.nvstream.av.video;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import android.graphics.Canvas;
import android.view.Surface; import android.view.Surface;
import com.limelight.nvstream.av.AvByteBufferDescriptor; import com.limelight.nvstream.av.AvByteBufferDescriptor;
@ -13,26 +15,72 @@ public class CpuDecoderRenderer implements DecoderRenderer {
private Surface renderTarget; private Surface renderTarget;
private ByteBuffer decoderBuffer; private ByteBuffer decoderBuffer;
private Thread rendererThread; private Thread rendererThread;
private int width, height; private int perfLevel;
private static final int LOW_PERF = 1;
private static final int MED_PERF = 2;
private static final int HIGH_PERF = 3;
private int findOptimalPerformanceLevel() {
StringBuilder cpuInfo = new StringBuilder();
BufferedReader br = null;
try {
br = new BufferedReader(new FileReader(new File("/proc/cpuinfo")));
for (;;) {
int ch = br.read();
if (ch == -1)
break;
cpuInfo.append((char)ch);
}
// Here we're doing very simple heuristics based on CPU model
String cpuInfoStr = cpuInfo.toString();
// We order them from greatest to least for proper detection
// of devices with multiple sets of cores (like Exynos 5 Octa)
// TODO Make this better
if (cpuInfoStr.contains("0xc0f")) {
// Cortex-A15
return MED_PERF;
}
else if (cpuInfoStr.contains("0xc09")) {
// Cortex-A9
return LOW_PERF;
}
else if (cpuInfoStr.contains("0xc07")) {
// Cortex-A7
return LOW_PERF;
}
else {
// Didn't have anything we're looking for
return MED_PERF;
}
} catch (IOException e) {
} finally {
if (br != null) {
try {
br.close();
} catch (IOException e) {}
}
}
// Couldn't read cpuinfo, so assume medium
return MED_PERF;
}
@Override @Override
public void setup(int width, int height, Surface renderTarget) { public void setup(int width, int height, Surface renderTarget) {
this.renderTarget = renderTarget; this.renderTarget = renderTarget;
this.width = width; this.perfLevel = findOptimalPerformanceLevel();
this.height = height;
int err = AvcDecoder.init(width, height); int err = AvcDecoder.init(width, height, perfLevel);
if (err != 0) { if (err != 0) {
throw new IllegalStateException("AVC decoder initialization failure: "+err); throw new IllegalStateException("AVC decoder initialization failure: "+err);
} }
decoderBuffer = ByteBuffer.allocate(128*1024); decoderBuffer = ByteBuffer.allocate(92*1024);
System.out.println("Using software decoding"); System.out.println("Using software decoding (performance level: "+perfLevel+")");
}
private int getPerFrameDelayMs(int frameRate) {
return 1000 / frameRate;
} }
@Override @Override
@ -40,29 +88,46 @@ public class CpuDecoderRenderer implements DecoderRenderer {
rendererThread = new Thread() { rendererThread = new Thread() {
@Override @Override
public void run() { public void run() {
int[] frameBuffer = new int[AvcDecoder.getFrameSize()]; int frameRateTarget;
long nextFrameTime = System.currentTimeMillis();
switch (perfLevel) {
case HIGH_PERF:
frameRateTarget = 45;
break;
case MED_PERF:
frameRateTarget = 30;
break;
case LOW_PERF:
default:
frameRateTarget = 15;
break;
}
while (!isInterrupted()) while (!isInterrupted())
{ {
try { long diff = nextFrameTime - System.currentTimeMillis();
// CPU decoding frame rate target is 30 fps
Thread.sleep(getPerFrameDelayMs(30)); if (diff > 0) {
} catch (InterruptedException e) { // Sleep until the frame should be rendered
return; try {
Thread.sleep(diff);
} catch (InterruptedException e) {
return;
}
} }
if (!AvcDecoder.getCurrentFrame(frameBuffer, frameBuffer.length)) nextFrameTime = computePresentationTimeMs(frameRateTarget);
continue; AvcDecoder.redraw(renderTarget);
// Draw the new bitmap to the canvas
Canvas c = renderTarget.lockCanvas(null);
c.drawBitmap(frameBuffer, 0, width, 0, 0, width, height, false, null);
renderTarget.unlockCanvasAndPost(c);
} }
} }
}; };
rendererThread.start(); rendererThread.start();
} }
private long computePresentationTimeMs(int frameRate) {
return System.currentTimeMillis() + (1000 / frameRate);
}
@Override @Override
public void stop() { public void stop() {