From: "Ronald S. Bultje" <[email protected]>

---
 libavcodec/motion_est.c |   26 +++++++++++++++++++++++---
 libavcodec/mpegvideo.h  |    6 ++++++
 libavcodec/snow.c       |   37 +++++++++++++++++++++----------------
 libavcodec/snow.h       |    2 ++
 4 files changed, 52 insertions(+), 19 deletions(-)

diff --git a/libavcodec/motion_est.c b/libavcodec/motion_est.c
index ce802e5..0f283fd 100644
--- a/libavcodec/motion_est.c
+++ b/libavcodec/motion_est.c
@@ -202,7 +202,14 @@ static av_always_inline int cmp_inline(MpegEncContext *s, 
const int x, const int
                     //FIXME x/y wrong, but mpeg4 qpel is sick anyway, we 
should drop as much of it as possible in favor for h264
                 }
             }else{
-                c->hpel_put[size][dxy](c->temp, ref[0] + x + y*stride, stride, 
h);
+                if (c->snow_hpel_put[size][dxy]) {
+                    c->snow_hpel_put[size][dxy](c->temp,
+                        ref[0] + x + y*stride,
+                        c->tmp2_mc_buf, stride, h);
+                } else {
+                    c->hpel_put[size][dxy](c->temp,
+                        ref[0] + x + y*stride, stride, h);
+                }
                 if(chroma)
                     uvdxy= dxy | (x&1) | (2*(y&1));
             }
@@ -214,8 +221,21 @@ static av_always_inline int cmp_inline(MpegEncContext *s, 
const int x, const int
         }
         if(chroma){
             uint8_t * const uvtemp= c->temp + 16*stride;
-            c->hpel_put[size+1][uvdxy](uvtemp  , ref[1] + (x>>1) + 
(y>>1)*uvstride, uvstride, h>>1);
-            c->hpel_put[size+1][uvdxy](uvtemp+8, ref[2] + (x>>1) + 
(y>>1)*uvstride, uvstride, h>>1);
+            if (c->snow_hpel_put[size+1][uvdxy]) {
+                c->snow_hpel_put[size+1][uvdxy](uvtemp,
+                    ref[1] + (x>>1) + (y>>1)*uvstride,
+                    c->tmp2_mc_buf, uvstride, h>>1);
+                c->snow_hpel_put[size+1][uvdxy](uvtemp+8,
+                    ref[2] + (x>>1) + (y>>1)*uvstride,
+                    c->tmp2_mc_buf, uvstride, h>>1);
+            } else {
+                c->hpel_put[size+1][uvdxy](uvtemp,
+                    ref[1] + (x>>1) + (y>>1)*uvstride,
+                    uvstride, h>>1);
+                c->hpel_put[size+1][uvdxy](uvtemp+8,
+                    ref[2] + (x>>1) + (y>>1)*uvstride,
+                    uvstride, h>>1);
+            }
             d += chroma_cmp_func(s, uvtemp  , src[1], uvstride, h>>1);
             d += chroma_cmp_func(s, uvtemp+8, src[2], uvstride, h>>1);
         }
diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h
index b73da41..6270713 100644
--- a/libavcodec/mpegvideo.h
+++ b/libavcodec/mpegvideo.h
@@ -144,6 +144,10 @@ typedef struct Picture{
 /**
  * Motion estimation context.
  */
+typedef void (* snow_hpel_put_fn) (uint8_t *dst,
+                                   const uint8_t *src,
+                                   uint8_t *tmp2t,
+                                   int stride, int h);
 typedef struct MotionEstContext{
     AVCodecContext *avctx;
     int skip;                          ///< set if ME is skipped for the 
current MB
@@ -187,6 +191,8 @@ typedef struct MotionEstContext{
 /*    cmp, chroma_cmp;*/
     op_pixels_func (*hpel_put)[4];
     op_pixels_func (*hpel_avg)[4];
+    snow_hpel_put_fn snow_hpel_put[4][4];
+    uint8_t *tmp2_mc_buf;
     qpel_mc_func (*qpel_put)[16];
     qpel_mc_func (*qpel_avg)[16];
     uint8_t (*mv_penalty)[MAX_MV*2+1];  ///< amount of bits needed to encode a 
MV
diff --git a/libavcodec/snow.c b/libavcodec/snow.c
index 01f8c47..78153c8 100644
--- a/libavcodec/snow.c
+++ b/libavcodec/snow.c
@@ -103,7 +103,7 @@ static void init_qexp(void){
         v *= pow(2, 1.0 / QROOT);
     }
 }
-static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, int stride, 
int b_w, int b_h, int dx, int dy){
+static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t 
*tmp2t, int stride, int b_w, int b_h, int dx, int dy){
     static const uint8_t weight[64]={
     8,7,6,5,4,3,2,1,
     7,7,0,0,0,0,0,1,
@@ -143,9 +143,9 @@ static void mc_block(Plane *p, uint8_t *dst, const uint8_t 
*src, int stride, int
 
     int x, y, b, r, l;
     int16_t tmpIt   [64*(32+HTAPS_MAX)];
-    uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
     int16_t *tmpI= tmpIt;
-    uint8_t *tmp2= tmp2t[0];
+    int tmp2_mc_buf_size = stride * (32 + HTAPS_MAX);
+    uint8_t *tmp2= tmp2t;
     const uint8_t *hpel[11];
     assert(dx<16 && dy<16);
     r= brane[dx + 16*dy]&15;
@@ -187,7 +187,7 @@ static void mc_block(Plane *p, uint8_t *dst, const uint8_t 
*src, int stride, int
         src -= stride*y;
     }
     src += HTAPS_MAX/2 - 1;
-    tmp2= tmp2t[1];
+    tmp2= tmp2t + tmp2_mc_buf_size;
 
     if(b&2){
         for(y=0; y < b_h; y++){
@@ -215,7 +215,7 @@ static void mc_block(Plane *p, uint8_t *dst, const uint8_t 
*src, int stride, int
         src -= stride*y;
     }
     src += stride*(HTAPS_MAX/2 - 1);
-    tmp2= tmp2t[2];
+    tmp2= tmp2t + 2 * tmp2_mc_buf_size;
     tmpI= tmpIt;
     if(b&4){
         for(y=0; y < b_h; y++){
@@ -242,12 +242,12 @@ static void mc_block(Plane *p, uint8_t *dst, const 
uint8_t *src, int stride, int
     }
 
     hpel[ 0]= src;
-    hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
+    hpel[ 1]= tmp2t + stride*(HTAPS_MAX/2-1);
     hpel[ 2]= src + 1;
 
-    hpel[ 4]= tmp2t[1];
-    hpel[ 5]= tmp2t[2];
-    hpel[ 6]= tmp2t[1] + 1;
+    hpel[ 4]= tmp2t + tmp2_mc_buf_size;
+    hpel[ 5]= hpel[4] + tmp2_mc_buf_size;
+    hpel[ 6]= hpel[4] + 1;
 
     hpel[ 8]= src + stride;
     hpel[ 9]= hpel[1] + stride;
@@ -347,7 +347,8 @@ void ff_snow_pred_block(SnowContext *s, uint8_t *dst, 
uint8_t *tmp, int stride,
         assert(b_w>1 && b_h>1);
         assert((tab_index>=0 && tab_index<4) || b_w==32);
         if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) 
|| (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
-            mc_block(&s->plane[plane_index], dst, src, stride, b_w, b_h, dx, 
dy);
+            mc_block(&s->plane[plane_index], dst, src, s->tmp2_mc_buf,
+                     stride, b_w, b_h, dx, dy);
         else if(b_w==32){
             int y;
             for(y=0; y<b_h; y+=16){
@@ -368,9 +369,9 @@ void ff_snow_pred_block(SnowContext *s, uint8_t *dst, 
uint8_t *tmp, int stride,
 }
 
 #define mca(dx,dy,b_w)\
-static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, 
int stride, int h){\
+static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, 
uint8_t *tmp2t, int stride, int h){\
     assert(h==b_w);\
-    mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, stride, 
b_w, b_w, dx, dy);\
+    mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, tmp2t, 
stride, b_w, b_w, dx, dy);\
 }
 
 mca( 0, 0,16)
@@ -420,11 +421,9 @@ av_cold int ff_snow_common_init(AVCodecContext *avctx){
     mcf(12,12)
 
 #define mcfh(dx,dy)\
-    s->dsp.put_pixels_tab       [0][dy/4+dx/8]=\
-    s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
+    s->m.me.snow_hpel_put[0][dy/4+dx/8] = \
         mc_block_hpel ## dx ## dy ## 16;\
-    s->dsp.put_pixels_tab       [1][dy/4+dx/8]=\
-    s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
+    s->m.me.snow_hpel_put[1][dy/4+dx/8] = \
         mc_block_hpel ## dx ## dy ## 8;
 
     mcfh(0, 0)
@@ -571,6 +570,7 @@ int ff_snow_frame_start(SnowContext *s){
    AVFrame tmp;
    int w= s->avctx->width; //FIXME round up to x16 ?
    int h= s->avctx->height;
+    int tmp2_mc_buf_size;
 
     if (s->current_picture.data[0] && !(s->avctx->flags&CODEC_FLAG_EMU_EDGE)) {
         s->dsp.draw_edges(s->current_picture.data[0],
@@ -616,6 +616,10 @@ int ff_snow_frame_start(SnowContext *s){
 
     s->current_picture.key_frame= s->keyframe;
 
+    tmp2_mc_buf_size = s->current_picture.linesize[0] * (32 + HTAPS_MAX);
+    av_fast_malloc(&s->tmp2_mc_buf, &s->tmp2_mc_buf_size, 3 * 
tmp2_mc_buf_size);
+    s->m.me.tmp2_mc_buf = s->tmp2_mc_buf;
+
     return 0;
 }
 
@@ -638,6 +642,7 @@ av_cold void ff_snow_common_end(SnowContext *s)
     av_freep(&s->block);
     av_freep(&s->scratchbuf);
     av_freep(&s->emu_edge_buffer);
+    av_freep(&s->tmp2_mc_buf);
 
     for(i=0; i<MAX_REF_FRAMES; i++){
         av_freep(&s->ref_mvs[i]);
diff --git a/libavcodec/snow.h b/libavcodec/snow.h
index b94331c..e679df0 100644
--- a/libavcodec/snow.h
+++ b/libavcodec/snow.h
@@ -166,6 +166,8 @@ typedef struct SnowContext{
 
     uint8_t *scratchbuf;
     uint8_t *emu_edge_buffer;
+    uint8_t *tmp2_mc_buf;
+    int tmp2_mc_buf_size;
 }SnowContext;
 
 /* Tables */
-- 
1.7.9.5

_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to