From: "Ronald S. Bultje" <[email protected]>
---
libavcodec/motion_est.c | 26 +++++++++++++++++++++++---
libavcodec/mpegvideo.h | 6 ++++++
libavcodec/snow.c | 37 +++++++++++++++++++++----------------
libavcodec/snow.h | 2 ++
4 files changed, 52 insertions(+), 19 deletions(-)
diff --git a/libavcodec/motion_est.c b/libavcodec/motion_est.c
index ce802e5..0f283fd 100644
--- a/libavcodec/motion_est.c
+++ b/libavcodec/motion_est.c
@@ -202,7 +202,14 @@ static av_always_inline int cmp_inline(MpegEncContext *s,
const int x, const int
//FIXME x/y wrong, but mpeg4 qpel is sick anyway, we
should drop as much of it as possible in favor for h264
}
}else{
- c->hpel_put[size][dxy](c->temp, ref[0] + x + y*stride, stride,
h);
+ if (c->snow_hpel_put[size][dxy]) {
+ c->snow_hpel_put[size][dxy](c->temp,
+ ref[0] + x + y*stride,
+ c->tmp2_mc_buf, stride, h);
+ } else {
+ c->hpel_put[size][dxy](c->temp,
+ ref[0] + x + y*stride, stride, h);
+ }
if(chroma)
uvdxy= dxy | (x&1) | (2*(y&1));
}
@@ -214,8 +221,21 @@ static av_always_inline int cmp_inline(MpegEncContext *s,
const int x, const int
}
if(chroma){
uint8_t * const uvtemp= c->temp + 16*stride;
- c->hpel_put[size+1][uvdxy](uvtemp , ref[1] + (x>>1) +
(y>>1)*uvstride, uvstride, h>>1);
- c->hpel_put[size+1][uvdxy](uvtemp+8, ref[2] + (x>>1) +
(y>>1)*uvstride, uvstride, h>>1);
+ if (c->snow_hpel_put[size+1][uvdxy]) {
+ c->snow_hpel_put[size+1][uvdxy](uvtemp,
+ ref[1] + (x>>1) + (y>>1)*uvstride,
+ c->tmp2_mc_buf, uvstride, h>>1);
+ c->snow_hpel_put[size+1][uvdxy](uvtemp+8,
+ ref[2] + (x>>1) + (y>>1)*uvstride,
+ c->tmp2_mc_buf, uvstride, h>>1);
+ } else {
+ c->hpel_put[size+1][uvdxy](uvtemp,
+ ref[1] + (x>>1) + (y>>1)*uvstride,
+ uvstride, h>>1);
+ c->hpel_put[size+1][uvdxy](uvtemp+8,
+ ref[2] + (x>>1) + (y>>1)*uvstride,
+ uvstride, h>>1);
+ }
d += chroma_cmp_func(s, uvtemp , src[1], uvstride, h>>1);
d += chroma_cmp_func(s, uvtemp+8, src[2], uvstride, h>>1);
}
diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h
index b73da41..6270713 100644
--- a/libavcodec/mpegvideo.h
+++ b/libavcodec/mpegvideo.h
@@ -144,6 +144,10 @@ typedef struct Picture{
/**
* Motion estimation context.
*/
+typedef void (* snow_hpel_put_fn) (uint8_t *dst,
+ const uint8_t *src,
+ uint8_t *tmp2t,
+ int stride, int h);
typedef struct MotionEstContext{
AVCodecContext *avctx;
int skip; ///< set if ME is skipped for the
current MB
@@ -187,6 +191,8 @@ typedef struct MotionEstContext{
/* cmp, chroma_cmp;*/
op_pixels_func (*hpel_put)[4];
op_pixels_func (*hpel_avg)[4];
+ snow_hpel_put_fn snow_hpel_put[4][4];
+ uint8_t *tmp2_mc_buf;
qpel_mc_func (*qpel_put)[16];
qpel_mc_func (*qpel_avg)[16];
uint8_t (*mv_penalty)[MAX_MV*2+1]; ///< amount of bits needed to encode a
MV
diff --git a/libavcodec/snow.c b/libavcodec/snow.c
index 01f8c47..78153c8 100644
--- a/libavcodec/snow.c
+++ b/libavcodec/snow.c
@@ -103,7 +103,7 @@ static void init_qexp(void){
v *= pow(2, 1.0 / QROOT);
}
}
-static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, int stride,
int b_w, int b_h, int dx, int dy){
+static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t
*tmp2t, int stride, int b_w, int b_h, int dx, int dy){
static const uint8_t weight[64]={
8,7,6,5,4,3,2,1,
7,7,0,0,0,0,0,1,
@@ -143,9 +143,9 @@ static void mc_block(Plane *p, uint8_t *dst, const uint8_t
*src, int stride, int
int x, y, b, r, l;
int16_t tmpIt [64*(32+HTAPS_MAX)];
- uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
int16_t *tmpI= tmpIt;
- uint8_t *tmp2= tmp2t[0];
+ int tmp2_mc_buf_size = stride * (32 + HTAPS_MAX);
+ uint8_t *tmp2= tmp2t;
const uint8_t *hpel[11];
assert(dx<16 && dy<16);
r= brane[dx + 16*dy]&15;
@@ -187,7 +187,7 @@ static void mc_block(Plane *p, uint8_t *dst, const uint8_t
*src, int stride, int
src -= stride*y;
}
src += HTAPS_MAX/2 - 1;
- tmp2= tmp2t[1];
+ tmp2= tmp2t + tmp2_mc_buf_size;
if(b&2){
for(y=0; y < b_h; y++){
@@ -215,7 +215,7 @@ static void mc_block(Plane *p, uint8_t *dst, const uint8_t
*src, int stride, int
src -= stride*y;
}
src += stride*(HTAPS_MAX/2 - 1);
- tmp2= tmp2t[2];
+ tmp2= tmp2t + 2 * tmp2_mc_buf_size;
tmpI= tmpIt;
if(b&4){
for(y=0; y < b_h; y++){
@@ -242,12 +242,12 @@ static void mc_block(Plane *p, uint8_t *dst, const
uint8_t *src, int stride, int
}
hpel[ 0]= src;
- hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
+ hpel[ 1]= tmp2t + stride*(HTAPS_MAX/2-1);
hpel[ 2]= src + 1;
- hpel[ 4]= tmp2t[1];
- hpel[ 5]= tmp2t[2];
- hpel[ 6]= tmp2t[1] + 1;
+ hpel[ 4]= tmp2t + tmp2_mc_buf_size;
+ hpel[ 5]= hpel[4] + tmp2_mc_buf_size;
+ hpel[ 6]= hpel[4] + 1;
hpel[ 8]= src + stride;
hpel[ 9]= hpel[1] + stride;
@@ -347,7 +347,8 @@ void ff_snow_pred_block(SnowContext *s, uint8_t *dst,
uint8_t *tmp, int stride,
assert(b_w>1 && b_h>1);
assert((tab_index>=0 && tab_index<4) || b_w==32);
if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h)
|| (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
- mc_block(&s->plane[plane_index], dst, src, stride, b_w, b_h, dx,
dy);
+ mc_block(&s->plane[plane_index], dst, src, s->tmp2_mc_buf,
+ stride, b_w, b_h, dx, dy);
else if(b_w==32){
int y;
for(y=0; y<b_h; y+=16){
@@ -368,9 +369,9 @@ void ff_snow_pred_block(SnowContext *s, uint8_t *dst,
uint8_t *tmp, int stride,
}
#define mca(dx,dy,b_w)\
-static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src,
int stride, int h){\
+static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src,
uint8_t *tmp2t, int stride, int h){\
assert(h==b_w);\
- mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, stride,
b_w, b_w, dx, dy);\
+ mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, tmp2t,
stride, b_w, b_w, dx, dy);\
}
mca( 0, 0,16)
@@ -420,11 +421,9 @@ av_cold int ff_snow_common_init(AVCodecContext *avctx){
mcf(12,12)
#define mcfh(dx,dy)\
- s->dsp.put_pixels_tab [0][dy/4+dx/8]=\
- s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
+ s->m.me.snow_hpel_put[0][dy/4+dx/8] = \
mc_block_hpel ## dx ## dy ## 16;\
- s->dsp.put_pixels_tab [1][dy/4+dx/8]=\
- s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
+ s->m.me.snow_hpel_put[1][dy/4+dx/8] = \
mc_block_hpel ## dx ## dy ## 8;
mcfh(0, 0)
@@ -571,6 +570,7 @@ int ff_snow_frame_start(SnowContext *s){
AVFrame tmp;
int w= s->avctx->width; //FIXME round up to x16 ?
int h= s->avctx->height;
+ int tmp2_mc_buf_size;
if (s->current_picture.data[0] && !(s->avctx->flags&CODEC_FLAG_EMU_EDGE)) {
s->dsp.draw_edges(s->current_picture.data[0],
@@ -616,6 +616,10 @@ int ff_snow_frame_start(SnowContext *s){
s->current_picture.key_frame= s->keyframe;
+ tmp2_mc_buf_size = s->current_picture.linesize[0] * (32 + HTAPS_MAX);
+ av_fast_malloc(&s->tmp2_mc_buf, &s->tmp2_mc_buf_size, 3 *
tmp2_mc_buf_size);
+ s->m.me.tmp2_mc_buf = s->tmp2_mc_buf;
+
return 0;
}
@@ -638,6 +642,7 @@ av_cold void ff_snow_common_end(SnowContext *s)
av_freep(&s->block);
av_freep(&s->scratchbuf);
av_freep(&s->emu_edge_buffer);
+ av_freep(&s->tmp2_mc_buf);
for(i=0; i<MAX_REF_FRAMES; i++){
av_freep(&s->ref_mvs[i]);
diff --git a/libavcodec/snow.h b/libavcodec/snow.h
index b94331c..e679df0 100644
--- a/libavcodec/snow.h
+++ b/libavcodec/snow.h
@@ -166,6 +166,8 @@ typedef struct SnowContext{
uint8_t *scratchbuf;
uint8_t *emu_edge_buffer;
+ uint8_t *tmp2_mc_buf;
+ int tmp2_mc_buf_size;
}SnowContext;
/* Tables */
--
1.7.9.5
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel