[libav-commits] h264dec: fix Lossless Decoding (Profile 244) for 8x8 Intra Prediction

2017-08-14 Thread Yogender Kumar Gupta
Module: libav
Branch: release/12
Commit: 55ff67c45a6c778b7002f224d2018d98870b2510

Author:Yogender Kumar Gupta 
Committer: Sean McGovern 
Date:  Thu Jun 15 18:56:13 2017 -0400

h264dec: fix Lossless Decoding (Profile 244) for 8x8 Intra Prediction

CC: libav-sta...@libav.org

Signed-off-by: Anton Khirnov 
(cherry picked from commit 79c6477c2abd8cfa41eef0c4ac39779dd8a9ec8e)

Signed-off-by: Sean McGovern 

---

 libavcodec/h264_mb.c   |  7 +++-
 libavcodec/h264pred.c  |  2 ++
 libavcodec/h264pred.h  |  3 ++
 libavcodec/h264pred_template.c | 73 ++
 4 files changed, 84 insertions(+), 1 deletion(-)

diff --git a/libavcodec/h264_mb.c b/libavcodec/h264_mb.c
index f037bd5..51d73ce 100644
--- a/libavcodec/h264_mb.c
+++ b/libavcodec/h264_mb.c
@@ -636,7 +636,12 @@ static av_always_inline void 
hl_decode_mb_predict_luma(const H264Context *h,
 uint8_t *const ptr = dest_y + block_offset[i];
 const int dir  = sl->intra4x4_pred_mode_cache[scan8[i]];
 if (transform_bypass && h->ps.sps->profile_idc == 244 && dir 
<= 1) {
-h->hpc.pred8x8l_add[dir](ptr, sl->mb + (i * 16 + p * 256 
<< pixel_shift), linesize);
+if (h->x264_build < 151U) {
+h->hpc.pred8x8l_add[dir](ptr, sl->mb + (i * 16 + p * 
256 << pixel_shift), linesize);
+} else
+h->hpc.pred8x8l_filter_add[dir](ptr, sl->mb + (i * 16 
+ p * 256 << pixel_shift),
+(sl-> 
topleft_samples_available << i) & 0x8000,
+
(sl->topright_samples_available << i) & 0x4000, linesize);
 } else {
 const int nnz = sl->non_zero_count_cache[scan8[i + p * 
16]];
 h->hpc.pred8x8l[dir](ptr, (sl->topleft_samples_available 
<< i) & 0x8000,
diff --git a/libavcodec/h264pred.c b/libavcodec/h264pred.c
index 7627eb0..135babc 100644
--- a/libavcodec/h264pred.c
+++ b/libavcodec/h264pred.c
@@ -552,6 +552,8 @@ av_cold void ff_h264_pred_init(H264PredContext *h, int 
codec_id,
 h->pred4x4_add  [ HOR_PRED   ]= FUNCC(pred4x4_horizontal_add  , 
depth);\
 h->pred8x8l_add [VERT_PRED   ]= FUNCC(pred8x8l_vertical_add   , 
depth);\
 h->pred8x8l_add [ HOR_PRED   ]= FUNCC(pred8x8l_horizontal_add , 
depth);\
+h->pred8x8l_filter_add [VERT_PRED   ]= FUNCC(pred8x8l_vertical_filter_add  
 , depth);\
+h->pred8x8l_filter_add [ HOR_PRED   ]= 
FUNCC(pred8x8l_horizontal_filter_add , depth);\
 if (chroma_format_idc <= 1) {\
 h->pred8x8_add  [VERT_PRED8x8]= FUNCC(pred8x8_vertical_add, 
depth);\
 h->pred8x8_add  [ HOR_PRED8x8]= FUNCC(pred8x8_horizontal_add  , 
depth);\
diff --git a/libavcodec/h264pred.h b/libavcodec/h264pred.h
index 60e7434..795d8f3 100644
--- a/libavcodec/h264pred.h
+++ b/libavcodec/h264pred.h
@@ -101,6 +101,9 @@ typedef struct H264PredContext {
   int16_t *block /*align 16*/, ptrdiff_t stride);
 void(*pred8x8l_add[2])(uint8_t *pix /*align  8*/,
int16_t *block /*align 16*/, ptrdiff_t stride);
+void(*pred8x8l_filter_add[2])(uint8_t *pix /*align  8*/,
+  int16_t *block /*align 16*/,
+  int topleft, int topright, ptrdiff_t stride);
 void(*pred8x8_add[3])(uint8_t *pix /*align  8*/,
   const int *block_offset,
   int16_t *block /*align 16*/, ptrdiff_t stride);
diff --git a/libavcodec/h264pred_template.c b/libavcodec/h264pred_template.c
index 8492b2b..02494aa 100644
--- a/libavcodec/h264pred_template.c
+++ b/libavcodec/h264pred_template.c
@@ -1123,6 +1123,79 @@ static void FUNCC(pred8x8l_horizontal_up)(uint8_t *_src, 
int has_topleft,
 SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
 SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7;
 }
+
+static void FUNCC(pred8x8l_vertical_filter_add)(uint8_t *_src, int16_t 
*_block, int has_topleft,
+int has_topright, ptrdiff_t 
_stride)
+{
+int i;
+pixel *src = (pixel*)_src;
+const dctcoef *block = (const dctcoef*)_block;
+pixel pix[8];
+int stride = _stride/sizeof(pixel);
+PREDICT_8x8_LOAD_TOP;
+
+pix[0] = t0;
+pix[1] = t1;
+pix[2] = t2;
+pix[3] = t3;
+pix[4] = t4;
+pix[5] = t5;
+pix[6] = t6;
+pix[7] = t7;
+
+for (i = 0; i < 8; i++) {
+pixel v = pix[i];
+src[0 * stride] = v += block[0];
+src[1 * stride] = v += block[8];
+src[2 * stride] = v += block[16];
+src[3 * stride] = v += block[24];
+src[4 * stride] = v += block[32];
+src[5 * stride] = v += block[40];
+src[6 * 

[libav-commits] h264dec: fix Lossless Decoding (Profile 244) for 8x8 Intra Prediction

2017-07-26 Thread Yogender Kumar Gupta
Module: libav
Branch: master
Commit: 79c6477c2abd8cfa41eef0c4ac39779dd8a9ec8e

Author:Yogender Kumar Gupta 
Committer: Anton Khirnov 
Date:  Thu Jun 15 18:56:13 2017 -0400

h264dec: fix Lossless Decoding (Profile 244) for 8x8 Intra Prediction

CC: libav-sta...@libav.org

Signed-off-by: Anton Khirnov 

---

 libavcodec/h264_mb.c   |  7 +++-
 libavcodec/h264pred.c  |  2 ++
 libavcodec/h264pred.h  |  3 ++
 libavcodec/h264pred_template.c | 73 ++
 4 files changed, 84 insertions(+), 1 deletion(-)

diff --git a/libavcodec/h264_mb.c b/libavcodec/h264_mb.c
index f037bd5..51d73ce 100644
--- a/libavcodec/h264_mb.c
+++ b/libavcodec/h264_mb.c
@@ -636,7 +636,12 @@ static av_always_inline void 
hl_decode_mb_predict_luma(const H264Context *h,
 uint8_t *const ptr = dest_y + block_offset[i];
 const int dir  = sl->intra4x4_pred_mode_cache[scan8[i]];
 if (transform_bypass && h->ps.sps->profile_idc == 244 && dir 
<= 1) {
-h->hpc.pred8x8l_add[dir](ptr, sl->mb + (i * 16 + p * 256 
<< pixel_shift), linesize);
+if (h->x264_build < 151U) {
+h->hpc.pred8x8l_add[dir](ptr, sl->mb + (i * 16 + p * 
256 << pixel_shift), linesize);
+} else
+h->hpc.pred8x8l_filter_add[dir](ptr, sl->mb + (i * 16 
+ p * 256 << pixel_shift),
+(sl-> 
topleft_samples_available << i) & 0x8000,
+
(sl->topright_samples_available << i) & 0x4000, linesize);
 } else {
 const int nnz = sl->non_zero_count_cache[scan8[i + p * 
16]];
 h->hpc.pred8x8l[dir](ptr, (sl->topleft_samples_available 
<< i) & 0x8000,
diff --git a/libavcodec/h264pred.c b/libavcodec/h264pred.c
index 7627eb0..135babc 100644
--- a/libavcodec/h264pred.c
+++ b/libavcodec/h264pred.c
@@ -552,6 +552,8 @@ av_cold void ff_h264_pred_init(H264PredContext *h, int 
codec_id,
 h->pred4x4_add  [ HOR_PRED   ]= FUNCC(pred4x4_horizontal_add  , 
depth);\
 h->pred8x8l_add [VERT_PRED   ]= FUNCC(pred8x8l_vertical_add   , 
depth);\
 h->pred8x8l_add [ HOR_PRED   ]= FUNCC(pred8x8l_horizontal_add , 
depth);\
+h->pred8x8l_filter_add [VERT_PRED   ]= FUNCC(pred8x8l_vertical_filter_add  
 , depth);\
+h->pred8x8l_filter_add [ HOR_PRED   ]= 
FUNCC(pred8x8l_horizontal_filter_add , depth);\
 if (chroma_format_idc <= 1) {\
 h->pred8x8_add  [VERT_PRED8x8]= FUNCC(pred8x8_vertical_add, 
depth);\
 h->pred8x8_add  [ HOR_PRED8x8]= FUNCC(pred8x8_horizontal_add  , 
depth);\
diff --git a/libavcodec/h264pred.h b/libavcodec/h264pred.h
index 60e7434..795d8f3 100644
--- a/libavcodec/h264pred.h
+++ b/libavcodec/h264pred.h
@@ -101,6 +101,9 @@ typedef struct H264PredContext {
   int16_t *block /*align 16*/, ptrdiff_t stride);
 void(*pred8x8l_add[2])(uint8_t *pix /*align  8*/,
int16_t *block /*align 16*/, ptrdiff_t stride);
+void(*pred8x8l_filter_add[2])(uint8_t *pix /*align  8*/,
+  int16_t *block /*align 16*/,
+  int topleft, int topright, ptrdiff_t stride);
 void(*pred8x8_add[3])(uint8_t *pix /*align  8*/,
   const int *block_offset,
   int16_t *block /*align 16*/, ptrdiff_t stride);
diff --git a/libavcodec/h264pred_template.c b/libavcodec/h264pred_template.c
index 8492b2b..02494aa 100644
--- a/libavcodec/h264pred_template.c
+++ b/libavcodec/h264pred_template.c
@@ -1123,6 +1123,79 @@ static void FUNCC(pred8x8l_horizontal_up)(uint8_t *_src, 
int has_topleft,
 SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
 SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7;
 }
+
+static void FUNCC(pred8x8l_vertical_filter_add)(uint8_t *_src, int16_t 
*_block, int has_topleft,
+int has_topright, ptrdiff_t 
_stride)
+{
+int i;
+pixel *src = (pixel*)_src;
+const dctcoef *block = (const dctcoef*)_block;
+pixel pix[8];
+int stride = _stride/sizeof(pixel);
+PREDICT_8x8_LOAD_TOP;
+
+pix[0] = t0;
+pix[1] = t1;
+pix[2] = t2;
+pix[3] = t3;
+pix[4] = t4;
+pix[5] = t5;
+pix[6] = t6;
+pix[7] = t7;
+
+for (i = 0; i < 8; i++) {
+pixel v = pix[i];
+src[0 * stride] = v += block[0];
+src[1 * stride] = v += block[8];
+src[2 * stride] = v += block[16];
+src[3 * stride] = v += block[24];
+src[4 * stride] = v += block[32];
+src[5 * stride] = v += block[40];
+src[6 * stride] = v += block[48];
+src[7 * stride] = v +  block[56];
+src++;
+block++;
+}
+
+