Re: [x265] [PATCH] asm: code for scale2D_64to32 routine

2013-11-18 Thread chen
I guess this function have some problem, I am not sure he verify this function 
with testbench before upload
the problem is j and l miss a pixel, in some time, it make a mistake

At 2013-11-18 15:06:07,muru...@multicorewareinc.com wrote:
# HG changeset patch
# User Murugan Vairavel muru...@multicorewareinc.com
# Date 1384757077 -19800
#  Mon Nov 18 12:14:37 2013 +0530
# Node ID d756003f63691b7677b4cf4c98fbb2a1d67dbb02
# Parent  e2895ce7bbeb2c3d845fee2578758d0012fa2cb4
asm: code for scale2D_64to32 routine

diff -r e2895ce7bbeb -r d756003f6369 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Sun Nov 17 11:24:13 2013 -0600
+++ b/source/common/x86/asm-primitives.cpp Mon Nov 18 12:14:37 2013 +0530
@@ -529,6 +529,7 @@
 PIXEL_AVG_W4(ssse3);
 
 p.scale1D_128to64 = x265_scale1D_128to64_ssse3;
+p.scale2D_64to32 = x265_scale2D_64to32_ssse3;
 
 p.sad_x4[LUMA_8x4] = x265_pixel_sad_x4_8x4_ssse3;
 p.sad_x4[LUMA_8x8] = x265_pixel_sad_x4_8x8_ssse3;
diff -r e2895ce7bbeb -r d756003f6369 source/common/x86/pixel-a.asm
--- a/source/common/x86/pixel-a.asm Sun Nov 17 11:24:13 2013 -0600
+++ b/source/common/x86/pixel-a.asm Mon Nov 18 12:14:37 2013 +0530
@@ -8230,3 +8230,113 @@
 movu  [r0 + 48],m4
 
 RET
+
+;-
+; void scale2D_64to32(pixel *dst, pixel *src, intptr_t stride)
+;-
+INIT_XMM ssse3
+cglobal scale2D_64to32, 3, 4, 8, dest, src, stride
+
+movam7,  [deinterleave_shuf]
+mov r3d, 32
+.loop
+
+movum0,  [r1]  ;i
+palignr m1,  m0,1  ;j
+movum2,  [r1 + r2] ;k
+palignr m3,  m2,1  ;l
+movum4,  m0
+movum5,  m2
+
+pxorm4,  m1;i^j
+pxorm5,  m3;k^l
+por m4,  m5;ij|kl
+
+pavgb   m0,  m1;s
+pavgb   m2,  m3;t
+movum5,  m0
+pavgb   m0,  m2;(s+t+1)/2
+pxorm5,  m2;s^t
+pandm4,  m5;(ij|kl)st
+pandm4,  [hmul_16p]
+psubb   m0,  m4;Result
+
+movum1,  [r1 + 16] ;i
+palignr m2,  m1,1  ;j
+movum3,  [r1 + r2 + 16];k
+palignr m4,  m3,1  ;l
+movum5,  m1
+movum6,  m3
+
+pxorm5,  m2;i^j
+pxorm6,  m4;k^l
+por m5,  m6;ij|kl
+
+pavgb   m1,  m2;s
+pavgb   m3,  m4;t
+movum6,  m1
+pavgb   m1,  m3;(s+t+1)/2
+pxorm6,  m3;s^t
+pandm5,  m6;(ij|kl)st
+pandm5,  [hmul_16p]
+psubb   m1,  m5;Result
+
+pshufb  m0,  m0,m7
+pshufb  m1,  m1,m7
+
+punpcklqdqm0,   m1
+movu  [r0], m0
+
+movum0,  [r1 + 32] ;i
+palignr m1,  m0,1  ;j
+movum2,  [r1 + r2 + 32];k
+palignr m3,  m2,1  ;l
+movum4,  m0
+movum5,  m2
+
+pxorm4,  m1;i^j
+pxorm5,  m3;k^l
+por m4,  m5;ij|kl
+
+pavgb   m0,  m1;s
+pavgb   m2,  m3;t
+movum5,  m0
+pavgb   m0,  m2;(s+t+1)/2
+pxorm5,  m2;s^t
+pandm4,  m5;(ij|kl)st
+pandm4,  [hmul_16p]
+psubb   m0,  m4;Result
+
+movum1,  [r1 + 48] ;i
+palignr m2,  m1,1  ;j
+movum3,  [r1 + r2 + 48];k
+palignr m4,  m3,1  ;l
+movum5,  m1
+movum6,  m3
+
+pxorm5,  m2;i^j
+pxorm6,  m4;k^l
+por m5,  m6;ij|kl
+
+pavgb   m1,  m2;s
+pavgb   m3,  m4;t
+movum6,  m1
+pavgb   m1,  m3;(s+t+1)/2
+pxorm6,  m3;s^t
+pandm5,  m6;(ij|kl)st
+pandm5,  [hmul_16p]
+psubb   m1,  m5;Result
+
+pshufb  m0,  m0,m7
+pshufb  m1,  m1,m7
+
+punpcklqdqm0,   m1
+

Re: [x265] [PATCH] TComYuv::addAvg, primitive function for luma and chroma loops

2013-11-18 Thread chen
@@ -640,26 +621,9 @@
 width  = m_hChromaShift;
 height = m_vChromaShift;
 
-for (y = height - 1; y = 0; y--)
-{
-for (x = width - 1; x = 0; )
-{
-// note: chroma min width is 2
-dstU[x] = ClipC((srcU0[x] + srcU1[x] + offset)  shiftNum);
-dstV[x] = ClipC((srcV0[x] + srcV1[x] + offset)  shiftNum);
-x--;
-dstU[x] = ClipC((srcU0[x] + srcU1[x] + offset)  shiftNum);
-dstV[x] = ClipC((srcV0[x] + srcV1[x] + offset)  shiftNum);
-x--;
-}
-
-srcU0 += src0Stride;
-srcU1 += src1Stride;
-srcV0 += src0Stride;
-srcV1 += src1Stride;
-dstU  += dststride;
-dstV  += dststride;
-}
+int part = partitionFromSizes(width, height);
you use Chroma size to get index, I think is error.
 
+primitives.chroma_addAvg[part](dstU, dststride, srcU0, src0Stride, 
srcU1, src1Stride);
+primitives.chroma_addAvg[part](dstV, dststride, srcV0, src0Stride, 
srcV1, src1Stride);
 }
 }
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] [PATCH] TComYuv::addAvg, primitive function for luma and chroma loops

2013-11-18 Thread dnyaneshwar
# HG changeset patch
# User Dnyaneshwar G dnyanesh...@multicorewareinc.com
# Date 1384768323 -19800
#  Mon Nov 18 15:22:03 2013 +0530
# Node ID cdd54aa200bd635395c01bbb07c156be4edbf7b1
# Parent  ac9e64d8a80bffe33fdaa0a9b83fdbe84f39d0b0
TComYuv::addAvg, primitive function for luma and chroma loops

diff -r ac9e64d8a80b -r cdd54aa200bd source/Lib/TLibCommon/TComYuv.cpp
--- a/source/Lib/TLibCommon/TComYuv.cpp Mon Nov 18 12:26:44 2013 +0530
+++ b/source/Lib/TLibCommon/TComYuv.cpp Mon Nov 18 15:22:03 2013 +0530
@@ -589,9 +589,7 @@
 
 void TComYuv::addAvg(TShortYUV* srcYuv0, TShortYUV* srcYuv1, uint32_t 
partUnitIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma)
 {
-int x, y;
 uint32_t src0Stride, src1Stride, dststride;
-int shiftNum, offset;
 
 int16_t* srcY0 = srcYuv0-getLumaAddr(partUnitIdx);
 int16_t* srcU0 = srcYuv0-getCbAddr(partUnitIdx);
@@ -605,61 +603,24 @@
 Pel* dstU = getCbAddr(partUnitIdx);
 Pel* dstV = getCrAddr(partUnitIdx);
 
+int part = partitionFromSizes(width, height);
+
 if (bLuma)
 {
 src0Stride = srcYuv0-m_width;
 src1Stride = srcYuv1-m_width;
 dststride  = getStride();
-shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH;
-offset = (1  (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;
 
-for (y = 0; y  height; y++)
-{
-for (x = 0; x  width; x += 4)
-{
-dstY[x + 0] = ClipY((srcY0[x + 0] + srcY1[x + 0] + offset)  
shiftNum);
-dstY[x + 1] = ClipY((srcY0[x + 1] + srcY1[x + 1] + offset)  
shiftNum);
-dstY[x + 2] = ClipY((srcY0[x + 2] + srcY1[x + 2] + offset)  
shiftNum);
-dstY[x + 3] = ClipY((srcY0[x + 3] + srcY1[x + 3] + offset)  
shiftNum);
-}
-
-srcY0 += src0Stride;
-srcY1 += src1Stride;
-dstY  += dststride;
-}
+primitives.luma_addAvg[part](dstY, dststride, srcY0, src0Stride, 
srcY1, src1Stride);
 }
 if (bChroma)
 {
-shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH;
-offset = (1  (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;
-
 src0Stride = srcYuv0-m_cwidth;
 src1Stride = srcYuv1-m_cwidth;
 dststride  = getCStride();
 
-width  = m_hChromaShift;
-height = m_vChromaShift;
-
-for (y = height - 1; y = 0; y--)
-{
-for (x = width - 1; x = 0; )
-{
-// note: chroma min width is 2
-dstU[x] = ClipC((srcU0[x] + srcU1[x] + offset)  shiftNum);
-dstV[x] = ClipC((srcV0[x] + srcV1[x] + offset)  shiftNum);
-x--;
-dstU[x] = ClipC((srcU0[x] + srcU1[x] + offset)  shiftNum);
-dstV[x] = ClipC((srcV0[x] + srcV1[x] + offset)  shiftNum);
-x--;
-}
-
-srcU0 += src0Stride;
-srcU1 += src1Stride;
-srcV0 += src0Stride;
-srcV1 += src1Stride;
-dstU  += dststride;
-dstV  += dststride;
-}
+primitives.chroma_addAvg[part](dstU, dststride, srcU0, src0Stride, 
srcU1, src1Stride);
+primitives.chroma_addAvg[part](dstV, dststride, srcV0, src0Stride, 
srcV1, src1Stride);
 }
 }
 
diff -r ac9e64d8a80b -r cdd54aa200bd source/common/pixel.cpp
--- a/source/common/pixel.cpp   Mon Nov 18 12:26:44 2013 +0530
+++ b/source/common/pixel.cpp   Mon Nov 18 15:22:03 2013 +0530
@@ -794,6 +794,27 @@
 a += dstride;
 }
 }
+
+templateint bx, int by
+void addAvg(pixel* dst, intptr_t dstStride, int16_t* src0, intptr_t 
src0Stride, int16_t* src1, intptr_t src1Stride)
+{
+int shiftNum, offset;
+shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH;
+offset = (1  (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;
+
+for (int y = 0; y  by; y++)
+{
+for (int x = 0; x  bx; x += 2)
+{
+dst[x + 0] = ClipY((src0[x + 0] + src1[x + 0] + offset)  
shiftNum);
+dst[x + 1] = ClipY((src0[x + 1] + src1[x + 1] + offset)  
shiftNum);
+}
+
+src0 += src0Stride;
+src1 += src1Stride;
+dst  += dstStride;
+}
+}
 }  // end anonymous namespace
 
 namespace x265 {
@@ -835,12 +856,14 @@
 p.satd[LUMA_16x64] = satd816, 64;
 
 #define CHROMA(W, H) \
+p.chroma_addAvg[CHROMA_ ## W ## x ## H]  = addAvgW, H; \
 p.chroma_copy_pp[CSP_I420][CHROMA_ ## W ## x ## H] = blockcopy_pp_cW, H; 
\
 p.chroma_copy_sp[CHROMA_ ## W ## x ## H] = blockcopy_sp_cW, H; \
 p.chroma_copy_ps[CHROMA_ ## W ## x ## H] = blockcopy_ps_cW, H;\
 p.chroma_sub_ps[CHROMA_ ## W ## x ## H] = pixel_sub_ps_cW, H;
 
 #define LUMA(W, H) \
+p.luma_addAvg[LUMA_ ## W ## x ## H]  = addAvgW, H; \
 p.luma_copy_pp[LUMA_ ## W ## x ## H] = blockcopy_pp_cW, H; \
 p.luma_copy_sp[LUMA_ ## W ## x ## H] = blockcopy_sp_cW, H; \
 p.luma_copy_ps[LUMA_ ## W ## x ## H] = blockcopy_ps_cW, H;\
diff -r ac9e64d8a80b -r cdd54aa200bd source/common/primitives.h
--- 

[x265] [PATCH] asm: fix the bug caused on 32-bit linux due to satd routines

2013-11-18 Thread yuvaraj
# HG changeset patch
# User Yuvaraj Venkatesh yuva...@multicorewareinc.com
# Date 1384769347 -19800
#  Mon Nov 18 15:39:07 2013 +0530
# Node ID f076c5ca413a905d6d4e8c1bbea2638992cb21d7
# Parent  e2895ce7bbeb2c3d845fee2578758d0012fa2cb4
asm: fix the bug caused on 32-bit linux due to satd routines.

diff -r e2895ce7bbeb -r f076c5ca413a source/common/x86/pixel-a.asm
--- a/source/common/x86/pixel-a.asm Sun Nov 17 11:24:13 2013 -0600
+++ b/source/common/x86/pixel-a.asm Mon Nov 18 15:39:07 2013 +0530
@@ -2239,27 +2239,42 @@
 
 %else
 
-cglobal pixel_satd_32x8, 4,6,8;if !WIN64
+%if WIN64
+cglobal pixel_satd_32x8, 4,8,8;if WIN64  cpuflag(avx)
 SATD_START_SSE2 m6, m7
-BACKUP_POINTERS
-call pixel_satd_8x8_internal
-RESTORE_AND_INC_POINTERS
-BACKUP_POINTERS
-call pixel_satd_8x8_internal
-RESTORE_AND_INC_POINTERS
-%if WIN64 == 0
-add r0, 8*SIZEOF_PIXEL
-add r2, 8*SIZEOF_PIXEL
-%endif
-BACKUP_POINTERS
-call pixel_satd_8x8_internal
-RESTORE_AND_INC_POINTERS
-%if WIN64 == 0
-add r0, 16*SIZEOF_PIXEL
-add r2, 16*SIZEOF_PIXEL
-%endif
+mov r6, r0
+mov r7, r2
+call pixel_satd_8x8_internal
+lea r0, [r6 + 8]
+lea r2, [r7 + 8]
+call pixel_satd_8x8_internal
+lea r0, [r6 + 16]
+lea r2, [r7 + 16]
+call pixel_satd_8x8_internal
+lea r0, [r6 + 24]
+lea r2, [r7 + 24]
 call pixel_satd_8x8_internal
 SATD_END_SSE2 m6
+%else
+cglobal pixel_satd_32x8, 4,7,8,0-4;if !WIN64
+SATD_START_SSE2 m6, m7
+mov r6, r0
+mov [rsp], r2
+call pixel_satd_8x8_internal
+lea r0, [r6 + 8]
+mov r2, [rsp]
+add r2, 8
+call pixel_satd_8x8_internal
+lea r0, [r6 + 16]
+mov r2, [rsp]
+add r2, 16
+call pixel_satd_8x8_internal
+lea r0, [r6 + 24]
+mov r2, [rsp]
+add r2, 24
+call pixel_satd_8x8_internal
+SATD_END_SSE2 m6
+%endif
 
 %if WIN64
 cglobal pixel_satd_32x16, 4,8,8;if WIN64  cpuflag(avx)
@@ -2282,25 +2297,24 @@
 call pixel_satd_8x8_internal
 SATD_END_SSE2 m6
 %else
-cglobal pixel_satd_32x16, 4,6,8;if !WIN64
+cglobal pixel_satd_32x16, 4,7,8,0-4;if !WIN64
 SATD_START_SSE2 m6, m7
-call pixel_satd_8x8_internal
-call pixel_satd_8x8_internal
-mov r0, r0mp
-mov r2, r2mp
-add r0, 8
+mov r6, r0
+mov [rsp], r2
+call pixel_satd_8x8_internal
+call pixel_satd_8x8_internal
+lea r0, [r6 + 8]
+mov r2, [rsp]
 add r2, 8
 call pixel_satd_8x8_internal
 call pixel_satd_8x8_internal
-mov r0, r0mp
-mov r2, r2mp
-add r0, 16
+lea r0, [r6 + 16]
+mov r2, [rsp]
 add r2, 16
 call pixel_satd_8x8_internal
 call pixel_satd_8x8_internal
-mov r0, r0mp
-mov r2, r2mp
-add r0, 24
+lea r0, [r6 + 24]
+mov r2, [rsp]
 add r2, 24
 call pixel_satd_8x8_internal
 call pixel_satd_8x8_internal
@@ -2332,28 +2346,27 @@
 call pixel_satd_8x8_internal
 SATD_END_SSE2 m6
 %else
-cglobal pixel_satd_32x24, 4,6,8;if !WIN64
+cglobal pixel_satd_32x24, 4,7,8,0-4;if !WIN64
 SATD_START_SSE2 m6, m7
-call pixel_satd_8x8_internal
-call pixel_satd_8x8_internal
-call pixel_satd_8x8_internal
-mov r0, r0mp
-mov r2, r2mp
-add r0, 8
+mov r6, r0
+mov [rsp], r2
+call pixel_satd_8x8_internal
+call pixel_satd_8x8_internal
+call pixel_satd_8x8_internal
+lea r0, [r6 + 8]
+mov r2, [rsp]
 add r2, 8
 call pixel_satd_8x8_internal
 call pixel_satd_8x8_internal
 call pixel_satd_8x8_internal
-mov r0, r0mp
-mov r2, r2mp
-add r0, 16
+lea r0, [r6 + 16]
+mov r2, [rsp]
 add r2, 16
 call pixel_satd_8x8_internal
 call pixel_satd_8x8_internal
 call pixel_satd_8x8_internal
-mov r0, r0mp
-mov r2, r2mp
-add r0, 24
+lea r0, [r6 + 24]
+mov r2, [rsp]
 add r2, 24
 call pixel_satd_8x8_internal
 call pixel_satd_8x8_internal
@@ -2389,38 +2402,41 @@
 call pixel_satd_8x8_internal
 call pixel_satd_8x8_internal
 SATD_END_SSE2 m6
-%else
-cglobal pixel_satd_32x32, 4,6,8;if !WIN64
+
+
+%else   
+cglobal pixel_satd_32x32, 4,7,8,0-4;if !WIN64
+
 SATD_START_SSE2 m6, m7
-call pixel_satd_8x8_internal
-call pixel_satd_8x8_internal
-call pixel_satd_8x8_internal
-call pixel_satd_8x8_internal
-mov r0, r0mp
-mov r2, r2mp
-add r0, 8
+mov r6, r0
+mov [rsp], r2
+call pixel_satd_8x8_internal
+call pixel_satd_8x8_internal
+call pixel_satd_8x8_internal
+call pixel_satd_8x8_internal
+lea r0, [r6 + 8]
+mov r2, [rsp]
 add r2, 8
 call pixel_satd_8x8_internal
 call pixel_satd_8x8_internal
 call pixel_satd_8x8_internal
 call pixel_satd_8x8_internal
-mov r0, r0mp
-mov r2, r2mp
-add r0, 16
+lea r0, [r6 + 16]
+mov r2, [rsp]
 add r2, 16
 call pixel_satd_8x8_internal
 call pixel_satd_8x8_internal
 call pixel_satd_8x8_internal
 call 

[x265] [PATCH] b-pyramid implementation: Allow the use of B-frames as references for non B and B frames

2013-11-18 Thread Gopu Govindaswamy
# HG changeset patch
# User Gopu Govindaswamy g...@multicorewareinc.com
# Date 1384769433 -19800
# Node ID 1e22b93638072ed805478d7af17f90e285fb4969
# Parent  2321ebe0bf64e5f3c0034076c7edb3ecbcd48039
b-pyramid implementation: Allow the use of B-frames as references for non B and 
B frames

when we enable the b-pyramid the bitrates efficienctly reduced and there is not 
much diff in the performance
and the PSNR 00. increased some of the clips and decreased some of clips

Test results for reference when enable and disable the b-pyramid:
cli option : -b 10 --hash=1 -f 100 --b-pyramid=1 --ref=1 --b-adapt=2
Enable B-reference  : --b-pyramid=1
Disable B-reference : --b-pyramid=0

Results:
Enable / Disable

clip - FourPeople_1280x720_60.yuv
Total time taken - 9.70s (10.31 fps) / 9.93s (10.07 fps)
Bitrates - 516.30 kb/s / 544.68 kb/s
PSNR - 39.725 / 39.701

clip - BasketballDrive_1920x1080_50.y4m
Total time taken - 39.06s (2.51 fps) / 38.98s (2.57 fps)
Bitrates -  4166.92 kb/s / 4370.43 kb/s
PSNR -  37.261 / 37.268

clip - Johnny_1280x720_60.y4m
Total time taken - 8.88s (11.27 fps) / 11.08s (9.03 fps)
Bitrates - 304.29 kb/s / 328.84 kb/s
PSNR - 40.605 / 40.551

Total time taken - 30.97s (3.23 fps) / 33.65s (2.97 fps)
Bitrates - 3496.84 kb/s / 3683.93 kb/s
PSNR - 35.645 / 35.660

diff -r 2321ebe0bf64 -r 1e22b9363807 source/common/common.cpp
--- a/source/common/common.cpp  Mon Nov 18 11:32:06 2013 +0530
+++ b/source/common/common.cpp  Mon Nov 18 15:40:33 2013 +0530
@@ -54,6 +54,7 @@
 
 static int parseCspName(const char *arg, int error);
 static int parseName(const char *arg, const char * const * names, int error);
+static int parse_enum(const char *, const char * const * names, int *dst);
 
 using namespace x265;
 
@@ -165,6 +166,7 @@
 param-bframes = 3;
 param-lookaheadDepth = 40;
 param-bFrameAdaptive = X265_B_ADAPT_FAST;
+param-bpyramid = 0;
 param-scenecutThreshold = 40; /* Magic number pulled in from x264*/
 
 /* Intra Coding Tools */
@@ -532,7 +534,7 @@
 }
 
 CHECK(param-bEnableWavefront  0, WaveFrontSynchro cannot be negative);
-
+CHECK(param-bpyramid = 2, b-pyramid is 0 or 1);
 return check_failed;
 }
 
@@ -620,6 +622,7 @@
 x265_log(param, X265_LOG_INFO, RDpenalty: %d\n, 
param-rdPenalty);
 }
 x265_log(param, X265_LOG_INFO, Lookahead / bframes / badapt : %d / %d / 
%d\n, param-lookaheadDepth, param-bframes, param-bFrameAdaptive);
+x265_log(param, X265_LOG_INFO, b-pyramid / weightp / ref: %d / %d / 
%d\n, param-bpyramid, param-bEnableWeightedPred, param-maxNumReferences);
 x265_log(param, X265_LOG_INFO, tools: );
 #define TOOLOPT(FLAG, STR) if (FLAG) fprintf(stderr, %s , STR)
 TOOLOPT(param-bEnableRectInter, rect);
@@ -628,7 +631,6 @@
 TOOLOPT(param-bEnableConstrainedIntra, cip);
 TOOLOPT(param-bEnableEarlySkip, esd);
 fprintf(stderr, rd=%d , param-rdLevel);
-fprintf(stderr, ref=%d , param-maxNumReferences);
 
 TOOLOPT(param-bEnableLoopFilter, lft);
 if (param-bEnableSAO)
@@ -650,7 +652,6 @@
 else
 fprintf(stderr, tskip );
 }
-TOOLOPT(param-bEnableWeightedPred, weightp);
 TOOLOPT(param-bEnableWeightedBiPred, weightbp);
 TOOLOPT(param-rc.aqMode, aq);
 fprintf(stderr, \n);
@@ -747,6 +748,15 @@
 }
 OPT(input-csp) p-sourceCsp = ::parseCspName(value, berror);
 OPT(me)p-searchMethod = ::parseName(value, 
x265_motion_est_names, berror);
+OPT(b-pyramid)
+{
+berror |= parse_enum(value, x265_b_pyramid_names, p-bpyramid);
+if (berror)
+{
+berror = 0;
+p-bpyramid = atoi(value);
+}
+}
 else
 return X265_PARAM_BAD_NAME;
 #undef OPT
@@ -802,6 +812,7 @@
 BOOL(p-bEnableSAO, sao);
 s += sprintf(s,  sao-lcu-bounds=%d, p-saoLcuBoundary);
 s += sprintf(s,  sao-lcu-opt=%d, p-saoLcuBasedOptimization);
+s += sprintf(s,  b-pyramid=%d, p-bpyramid);
 #undef BOOL
 
 return buf;
@@ -843,3 +854,13 @@
 error = 1;
 return a;
 }
+static int parse_enum(const char *arg, const char * const * names, int *dst)
+{
+for (int i = 0; names[i]; i++)
+if (!strcmp(arg, names[i]))
+{
+*dst = i;
+return 0;
+}
+return -1;
+}
diff -r 2321ebe0bf64 -r 1e22b9363807 source/common/common.h
--- a/source/common/common.hMon Nov 18 11:32:06 2013 +0530
+++ b/source/common/common.hMon Nov 18 15:40:33 2013 +0530
@@ -107,6 +107,7 @@
 #define X265_LOG2(x)  log2(x)
 #endif
 
+static const char * const x265_b_pyramid_names[] = {none, normal, 0};
 /* defined in common.cpp */
 int64_t x265_mdate(void);
 void x265_log(x265_param *param, int level, const char *fmt, ...);
diff -r 2321ebe0bf64 -r 1e22b9363807 source/encoder/dpb.cpp
--- a/source/encoder/dpb.cppMon Nov 18 11:32:06 2013 +0530
+++ b/source/encoder/dpb.cppMon Nov 18 15:40:33 2013 +0530
@@ -78,7 +78,17 @@
 m_lastIDR = pocCurr;
 }
 

Re: [x265] [PATCH] TComYuv::addAvg, primitive function for luma and chroma loops

2013-11-18 Thread Deepthi Nandakumar
Pushed. But next time, please organize your patches more clearly.

1. Add C primitive, if it does not exist.
2. Add the function pointer declarations and new primitive declarations to
EncoderPrimitives struct.
3. Add testbench code for primitives.
4. Add asm code.

Once all above patches have been reviewed, pushed and tested on all
platforms, then you can integrate it with the actual encoder.




On Mon, Nov 18, 2013 at 3:23 PM, dnyanesh...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Dnyaneshwar G dnyanesh...@multicorewareinc.com
 # Date 1384768323 -19800
 #  Mon Nov 18 15:22:03 2013 +0530
 # Node ID cdd54aa200bd635395c01bbb07c156be4edbf7b1
 # Parent  ac9e64d8a80bffe33fdaa0a9b83fdbe84f39d0b0
 TComYuv::addAvg, primitive function for luma and chroma loops

 diff -r ac9e64d8a80b -r cdd54aa200bd source/Lib/TLibCommon/TComYuv.cpp
 --- a/source/Lib/TLibCommon/TComYuv.cpp Mon Nov 18 12:26:44 2013 +0530
 +++ b/source/Lib/TLibCommon/TComYuv.cpp Mon Nov 18 15:22:03 2013 +0530
 @@ -589,9 +589,7 @@

  void TComYuv::addAvg(TShortYUV* srcYuv0, TShortYUV* srcYuv1, uint32_t
 partUnitIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma)
  {
 -int x, y;
  uint32_t src0Stride, src1Stride, dststride;
 -int shiftNum, offset;

  int16_t* srcY0 = srcYuv0-getLumaAddr(partUnitIdx);
  int16_t* srcU0 = srcYuv0-getCbAddr(partUnitIdx);
 @@ -605,61 +603,24 @@
  Pel* dstU = getCbAddr(partUnitIdx);
  Pel* dstV = getCrAddr(partUnitIdx);

 +int part = partitionFromSizes(width, height);
 +
  if (bLuma)
  {
  src0Stride = srcYuv0-m_width;
  src1Stride = srcYuv1-m_width;
  dststride  = getStride();
 -shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH;
 -offset = (1  (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;

 -for (y = 0; y  height; y++)
 -{
 -for (x = 0; x  width; x += 4)
 -{
 -dstY[x + 0] = ClipY((srcY0[x + 0] + srcY1[x + 0] +
 offset)  shiftNum);
 -dstY[x + 1] = ClipY((srcY0[x + 1] + srcY1[x + 1] +
 offset)  shiftNum);
 -dstY[x + 2] = ClipY((srcY0[x + 2] + srcY1[x + 2] +
 offset)  shiftNum);
 -dstY[x + 3] = ClipY((srcY0[x + 3] + srcY1[x + 3] +
 offset)  shiftNum);
 -}
 -
 -srcY0 += src0Stride;
 -srcY1 += src1Stride;
 -dstY  += dststride;
 -}
 +primitives.luma_addAvg[part](dstY, dststride, srcY0, src0Stride,
 srcY1, src1Stride);
  }
  if (bChroma)
  {
 -shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH;
 -offset = (1  (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;
 -
  src0Stride = srcYuv0-m_cwidth;
  src1Stride = srcYuv1-m_cwidth;
  dststride  = getCStride();

 -width  = m_hChromaShift;
 -height = m_vChromaShift;
 -
 -for (y = height - 1; y = 0; y--)
 -{
 -for (x = width - 1; x = 0; )
 -{
 -// note: chroma min width is 2
 -dstU[x] = ClipC((srcU0[x] + srcU1[x] + offset) 
 shiftNum);
 -dstV[x] = ClipC((srcV0[x] + srcV1[x] + offset) 
 shiftNum);
 -x--;
 -dstU[x] = ClipC((srcU0[x] + srcU1[x] + offset) 
 shiftNum);
 -dstV[x] = ClipC((srcV0[x] + srcV1[x] + offset) 
 shiftNum);
 -x--;
 -}
 -
 -srcU0 += src0Stride;
 -srcU1 += src1Stride;
 -srcV0 += src0Stride;
 -srcV1 += src1Stride;
 -dstU  += dststride;
 -dstV  += dststride;
 -}
 +primitives.chroma_addAvg[part](dstU, dststride, srcU0,
 src0Stride, srcU1, src1Stride);
 +primitives.chroma_addAvg[part](dstV, dststride, srcV0,
 src0Stride, srcV1, src1Stride);
  }
  }

 diff -r ac9e64d8a80b -r cdd54aa200bd source/common/pixel.cpp
 --- a/source/common/pixel.cpp   Mon Nov 18 12:26:44 2013 +0530
 +++ b/source/common/pixel.cpp   Mon Nov 18 15:22:03 2013 +0530
 @@ -794,6 +794,27 @@
  a += dstride;
  }
  }
 +
 +templateint bx, int by
 +void addAvg(pixel* dst, intptr_t dstStride, int16_t* src0, intptr_t
 src0Stride, int16_t* src1, intptr_t src1Stride)
 +{
 +int shiftNum, offset;
 +shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH;
 +offset = (1  (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;
 +
 +for (int y = 0; y  by; y++)
 +{
 +for (int x = 0; x  bx; x += 2)
 +{
 +dst[x + 0] = ClipY((src0[x + 0] + src1[x + 0] + offset) 
 shiftNum);
 +dst[x + 1] = ClipY((src0[x + 1] + src1[x + 1] + offset) 
 shiftNum);
 +}
 +
 +src0 += src0Stride;
 +src1 += src1Stride;
 +dst  += dstStride;
 +}
 +}
  }  // end anonymous namespace

  namespace x265 {
 @@ -835,12 +856,14 @@
  p.satd[LUMA_16x64] = satd816, 64;

  #define CHROMA(W, H) \
 +p.chroma_addAvg[CHROMA_ ## W ## x ## H]  = addAvgW, H; \
  p.chroma_copy_pp[CSP_I420][CHROMA_ ## W ## x ## 

Re: [x265] [PATCH] b-pyramid implementation: Allow the use of B-frames as references for non B and B frames

2013-11-18 Thread Deepthi Nandakumar
In encoder::configure, there should be some check for --b-adapt 0 --bframes
0, in which case print warning and disable b-pyramid.


On Mon, Nov 18, 2013 at 3:40 PM, Gopu Govindaswamy 
g...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Gopu Govindaswamy g...@multicorewareinc.com
 # Date 1384769433 -19800
 # Node ID 1e22b93638072ed805478d7af17f90e285fb4969
 # Parent  2321ebe0bf64e5f3c0034076c7edb3ecbcd48039
 b-pyramid implementation: Allow the use of B-frames as references for non
 B and B frames

 when we enable the b-pyramid the bitrates efficienctly reduced and there
 is not much diff in the performance
 and the PSNR 00. increased some of the clips and decreased some of clips

 Test results for reference when enable and disable the b-pyramid:
 cli option : -b 10 --hash=1 -f 100 --b-pyramid=1 --ref=1 --b-adapt=2
 Enable B-reference  : --b-pyramid=1
 Disable B-reference : --b-pyramid=0

 Results:
 Enable / Disable

 clip - FourPeople_1280x720_60.yuv
 Total time taken - 9.70s (10.31 fps) / 9.93s (10.07 fps)
 Bitrates - 516.30 kb/s / 544.68 kb/s
 PSNR - 39.725 / 39.701

 clip - BasketballDrive_1920x1080_50.y4m
 Total time taken - 39.06s (2.51 fps) / 38.98s (2.57 fps)
 Bitrates -  4166.92 kb/s / 4370.43 kb/s
 PSNR -  37.261 / 37.268

 clip - Johnny_1280x720_60.y4m
 Total time taken - 8.88s (11.27 fps) / 11.08s (9.03 fps)
 Bitrates - 304.29 kb/s / 328.84 kb/s
 PSNR - 40.605 / 40.551

 Total time taken - 30.97s (3.23 fps) / 33.65s (2.97 fps)
 Bitrates - 3496.84 kb/s / 3683.93 kb/s
 PSNR - 35.645 / 35.660

 diff -r 2321ebe0bf64 -r 1e22b9363807 source/common/common.cpp
 --- a/source/common/common.cpp  Mon Nov 18 11:32:06 2013 +0530
 +++ b/source/common/common.cpp  Mon Nov 18 15:40:33 2013 +0530
 @@ -54,6 +54,7 @@

  static int parseCspName(const char *arg, int error);
  static int parseName(const char *arg, const char * const * names, int
 error);
 +static int parse_enum(const char *, const char * const * names, int *dst);

  using namespace x265;

 @@ -165,6 +166,7 @@
  param-bframes = 3;
  param-lookaheadDepth = 40;
  param-bFrameAdaptive = X265_B_ADAPT_FAST;
 +param-bpyramid = 0;
  param-scenecutThreshold = 40; /* Magic number pulled in from x264*/

  /* Intra Coding Tools */
 @@ -532,7 +534,7 @@
  }

  CHECK(param-bEnableWavefront  0, WaveFrontSynchro cannot be
 negative);
 -
 +CHECK(param-bpyramid = 2, b-pyramid is 0 or 1);
  return check_failed;
  }

 @@ -620,6 +622,7 @@
  x265_log(param, X265_LOG_INFO, RDpenalty:
 %d\n, param-rdPenalty);
  }
  x265_log(param, X265_LOG_INFO, Lookahead / bframes / badapt : %d /
 %d / %d\n, param-lookaheadDepth, param-bframes, param-bFrameAdaptive);
 +x265_log(param, X265_LOG_INFO, b-pyramid / weightp / ref: %d /
 %d / %d\n, param-bpyramid, param-bEnableWeightedPred,
 param-maxNumReferences);
  x265_log(param, X265_LOG_INFO, tools: );
  #define TOOLOPT(FLAG, STR) if (FLAG) fprintf(stderr, %s , STR)
  TOOLOPT(param-bEnableRectInter, rect);
 @@ -628,7 +631,6 @@
  TOOLOPT(param-bEnableConstrainedIntra, cip);
  TOOLOPT(param-bEnableEarlySkip, esd);
  fprintf(stderr, rd=%d , param-rdLevel);
 -fprintf(stderr, ref=%d , param-maxNumReferences);

  TOOLOPT(param-bEnableLoopFilter, lft);
  if (param-bEnableSAO)
 @@ -650,7 +652,6 @@
  else
  fprintf(stderr, tskip );
  }
 -TOOLOPT(param-bEnableWeightedPred, weightp);
  TOOLOPT(param-bEnableWeightedBiPred, weightbp);
  TOOLOPT(param-rc.aqMode, aq);
  fprintf(stderr, \n);
 @@ -747,6 +748,15 @@
  }
  OPT(input-csp) p-sourceCsp = ::parseCspName(value, berror);
  OPT(me)p-searchMethod = ::parseName(value,
 x265_motion_est_names, berror);
 +OPT(b-pyramid)
 +{
 +berror |= parse_enum(value, x265_b_pyramid_names, p-bpyramid);
 +if (berror)
 +{
 +berror = 0;
 +p-bpyramid = atoi(value);
 +}
 +}
  else
  return X265_PARAM_BAD_NAME;
  #undef OPT
 @@ -802,6 +812,7 @@
  BOOL(p-bEnableSAO, sao);
  s += sprintf(s,  sao-lcu-bounds=%d, p-saoLcuBoundary);
  s += sprintf(s,  sao-lcu-opt=%d, p-saoLcuBasedOptimization);
 +s += sprintf(s,  b-pyramid=%d, p-bpyramid);
  #undef BOOL

  return buf;
 @@ -843,3 +854,13 @@
  error = 1;
  return a;
  }
 +static int parse_enum(const char *arg, const char * const * names, int
 *dst)
 +{
 +for (int i = 0; names[i]; i++)
 +if (!strcmp(arg, names[i]))
 +{
 +*dst = i;
 +return 0;
 +}
 +return -1;
 +}
 diff -r 2321ebe0bf64 -r 1e22b9363807 source/common/common.h
 --- a/source/common/common.hMon Nov 18 11:32:06 2013 +0530
 +++ b/source/common/common.hMon Nov 18 15:40:33 2013 +0530
 @@ -107,6 +107,7 @@
  #define X265_LOG2(x)  log2(x)
  #endif

 +static const char * const x265_b_pyramid_names[] = {none, normal, 0};
  /* defined in common.cpp */
  int64_t 

[x265] [PATCH] asm: code for scale2D_64to32 routine

2013-11-18 Thread murugan
# HG changeset patch
# User Murugan Vairavel muru...@multicorewareinc.com
# Date 1384773570 -19800
#  Mon Nov 18 16:49:30 2013 +0530
# Node ID c355ba4b6711bfad87ff37d650a8f1946f878eec
# Parent  2321ebe0bf64e5f3c0034076c7edb3ecbcd48039
asm: code for scale2D_64to32 routine

diff -r 2321ebe0bf64 -r c355ba4b6711 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp  Mon Nov 18 11:32:06 2013 +0530
+++ b/source/common/x86/asm-primitives.cpp  Mon Nov 18 16:49:30 2013 +0530
@@ -530,6 +530,7 @@
 PIXEL_AVG_W4(ssse3);
 
 p.scale1D_128to64 = x265_scale1D_128to64_ssse3;
+p.scale2D_64to32 = x265_scale2D_64to32_ssse3;
 
 p.sad_x4[LUMA_8x4] = x265_pixel_sad_x4_8x4_ssse3;
 p.sad_x4[LUMA_8x8] = x265_pixel_sad_x4_8x8_ssse3;
diff -r 2321ebe0bf64 -r c355ba4b6711 source/common/x86/pixel-a.asm
--- a/source/common/x86/pixel-a.asm Mon Nov 18 11:32:06 2013 +0530
+++ b/source/common/x86/pixel-a.asm Mon Nov 18 16:49:30 2013 +0530
@@ -8230,3 +8230,113 @@
 movu  [r0 + 48],m4
 
 RET
+
+;-
+; void scale2D_64to32(pixel *dst, pixel *src, intptr_t stride)
+;-
+INIT_XMM ssse3
+cglobal scale2D_64to32, 3, 4, 8, dest, src, stride
+
+movam7,  [deinterleave_shuf]
+mov r3d, 32
+.loop
+
+movum0,  [r1]  ;i
+movum1,  [r1 + 1]  ;j
+movum2,  [r1 + r2] ;k
+movum3,  [r1 + r2 + 1] ;l
+movum4,  m0
+movum5,  m2
+
+pxorm4,  m1;i^j
+pxorm5,  m3;k^l
+por m4,  m5;ij|kl
+
+pavgb   m0,  m1;s
+pavgb   m2,  m3;t
+movum5,  m0
+pavgb   m0,  m2;(s+t+1)/2
+pxorm5,  m2;s^t
+pandm4,  m5;(ij|kl)st
+pandm4,  [hmul_16p]
+psubb   m0,  m4;Result
+
+movum1,  [r1 + 16] ;i
+movum2,  [r1 + 16 + 1] ;j
+movum3,  [r1 + r2 + 16];k
+movum4,  [r1 + r2 + 16 + 1];l
+movum5,  m1
+movum6,  m3
+
+pxorm5,  m2;i^j
+pxorm6,  m4;k^l
+por m5,  m6;ij|kl
+
+pavgb   m1,  m2;s
+pavgb   m3,  m4;t
+movum6,  m1
+pavgb   m1,  m3;(s+t+1)/2
+pxorm6,  m3;s^t
+pandm5,  m6;(ij|kl)st
+pandm5,  [hmul_16p]
+psubb   m1,  m5;Result
+
+pshufb  m0,  m0,m7
+pshufb  m1,  m1,m7
+
+punpcklqdqm0,   m1
+movu  [r0], m0
+
+movum0,  [r1 + 32] ;i
+movum1,  [r1 + 32 + 1] ;j
+movum2,  [r1 + r2 + 32];k
+movum3,  [r1 + r2 + 32 + 1];l
+movum4,  m0
+movum5,  m2
+
+pxorm4,  m1;i^j
+pxorm5,  m3;k^l
+por m4,  m5;ij|kl
+
+pavgb   m0,  m1;s
+pavgb   m2,  m3;t
+movum5,  m0
+pavgb   m0,  m2;(s+t+1)/2
+pxorm5,  m2;s^t
+pandm4,  m5;(ij|kl)st
+pandm4,  [hmul_16p]
+psubb   m0,  m4;Result
+
+movum1,  [r1 + 48] ;i
+movum2,  [r1 + 48 + 1] ;j
+movum3,  [r1 + r2 + 48];k
+movum4,  [r1 + r2 + 48 + 1];l
+movum5,  m1
+movum6,  m3
+
+pxorm5,  m2;i^j
+pxorm6,  m4;k^l
+por m5,  m6;ij|kl
+
+pavgb   m1,  m2;s
+pavgb   m3,  m4;t
+movum6,  m1
+pavgb   m1,  m3;(s+t+1)/2
+pxorm6,  m3;s^t
+pandm5,  m6;(ij|kl)st
+pandm5,  [hmul_16p]
+psubb   m1,  m5;Result
+
+pshufb  m0,  m0,m7
+pshufb  m1,  m1,m7
+
+punpcklqdqm0,   m1
+movu  

Re: [x265] [PATCH] b-pyramid implementation: Allow the use of B-frames as references for non B and B frames

2013-11-18 Thread Gopu Govindaswamy
On Mon, Nov 18, 2013 at 4:47 PM, Deepthi Nandakumar
deep...@multicorewareinc.com wrote:



 On Mon, Nov 18, 2013 at 3:40 PM, Gopu Govindaswamy
 g...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Gopu Govindaswamy g...@multicorewareinc.com
 # Date 1384769433 -19800
 # Node ID 1e22b93638072ed805478d7af17f90e285fb4969
 # Parent  2321ebe0bf64e5f3c0034076c7edb3ecbcd48039
 b-pyramid implementation: Allow the use of B-frames as references for non
 B and B frames

 when we enable the b-pyramid the bitrates efficienctly reduced and there
 is not much diff in the performance
 and the PSNR 00. increased some of the clips and decreased some of clips

 Test results for reference when enable and disable the b-pyramid:
 cli option : -b 10 --hash=1 -f 100 --b-pyramid=1 --ref=1 --b-adapt=2
 Enable B-reference  : --b-pyramid=1
 Disable B-reference : --b-pyramid=0

 Results:
 Enable / Disable

 clip - FourPeople_1280x720_60.yuv
 Total time taken - 9.70s (10.31 fps) / 9.93s (10.07 fps)
 Bitrates - 516.30 kb/s / 544.68 kb/s
 PSNR - 39.725 / 39.701

 clip - BasketballDrive_1920x1080_50.y4m
 Total time taken - 39.06s (2.51 fps) / 38.98s (2.57 fps)
 Bitrates -  4166.92 kb/s / 4370.43 kb/s
 PSNR -  37.261 / 37.268

 clip - Johnny_1280x720_60.y4m
 Total time taken - 8.88s (11.27 fps) / 11.08s (9.03 fps)
 Bitrates - 304.29 kb/s / 328.84 kb/s
 PSNR - 40.605 / 40.551

 Total time taken - 30.97s (3.23 fps) / 33.65s (2.97 fps)
 Bitrates - 3496.84 kb/s / 3683.93 kb/s
 PSNR - 35.645 / 35.660

 diff -r 2321ebe0bf64 -r 1e22b9363807 source/common/common.cpp
 --- a/source/common/common.cpp  Mon Nov 18 11:32:06 2013 +0530
 +++ b/source/common/common.cpp  Mon Nov 18 15:40:33 2013 +0530
 @@ -54,6 +54,7 @@

  static int parseCspName(const char *arg, int error);
  static int parseName(const char *arg, const char * const * names, int
 error);
 +static int parse_enum(const char *, const char * const * names, int
 *dst);

  using namespace x265;

 @@ -165,6 +166,7 @@
  param-bframes = 3;
  param-lookaheadDepth = 40;
  param-bFrameAdaptive = X265_B_ADAPT_FAST;
 +param-bpyramid = 0;
  param-scenecutThreshold = 40; /* Magic number pulled in from x264*/

  /* Intra Coding Tools */
 @@ -532,7 +534,7 @@
  }

  CHECK(param-bEnableWavefront  0, WaveFrontSynchro cannot be
 negative);
 -
 +CHECK(param-bpyramid = 2, b-pyramid is 0 or 1);
  return check_failed;
  }

 @@ -620,6 +622,7 @@
  x265_log(param, X265_LOG_INFO, RDpenalty:
 %d\n, param-rdPenalty);
  }
  x265_log(param, X265_LOG_INFO, Lookahead / bframes / badapt : %d /
 %d / %d\n, param-lookaheadDepth, param-bframes, param-bFrameAdaptive);
 +x265_log(param, X265_LOG_INFO, b-pyramid / weightp / ref: %d /
 %d / %d\n, param-bpyramid, param-bEnableWeightedPred,
 param-maxNumReferences);
  x265_log(param, X265_LOG_INFO, tools: );
  #define TOOLOPT(FLAG, STR) if (FLAG) fprintf(stderr, %s , STR)
  TOOLOPT(param-bEnableRectInter, rect);
 @@ -628,7 +631,6 @@
  TOOLOPT(param-bEnableConstrainedIntra, cip);
  TOOLOPT(param-bEnableEarlySkip, esd);
  fprintf(stderr, rd=%d , param-rdLevel);
 -fprintf(stderr, ref=%d , param-maxNumReferences);

  TOOLOPT(param-bEnableLoopFilter, lft);
  if (param-bEnableSAO)
 @@ -650,7 +652,6 @@
  else
  fprintf(stderr, tskip );
  }
 -TOOLOPT(param-bEnableWeightedPred, weightp);
  TOOLOPT(param-bEnableWeightedBiPred, weightbp);
  TOOLOPT(param-rc.aqMode, aq);
  fprintf(stderr, \n);
 @@ -747,6 +748,15 @@
  }
  OPT(input-csp) p-sourceCsp = ::parseCspName(value, berror);
  OPT(me)p-searchMethod = ::parseName(value,
 x265_motion_est_names, berror);
 +OPT(b-pyramid)
 +{
 +berror |= parse_enum(value, x265_b_pyramid_names, p-bpyramid);
 +if (berror)
 +{
 +berror = 0;
 +p-bpyramid = atoi(value);
 +}
 +}


 Not clear why parse_enum is required here? For now, this is a boolean flag
 which can be assigned directly to the param structure.

  Because we can use --b-pyramid=none  or --b-pyramid=0
 --b-pyramid=normalor --b-pyramid=1

 


  else
  return X265_PARAM_BAD_NAME;
  #undef OPT
 @@ -802,6 +812,7 @@
  BOOL(p-bEnableSAO, sao);
  s += sprintf(s,  sao-lcu-bounds=%d, p-saoLcuBoundary);
  s += sprintf(s,  sao-lcu-opt=%d, p-saoLcuBasedOptimization);
 +s += sprintf(s,  b-pyramid=%d, p-bpyramid);
  #undef BOOL

  return buf;
 @@ -843,3 +854,13 @@
  error = 1;
  return a;
  }
 +static int parse_enum(const char *arg, const char * const * names, int
 *dst)
 +{
 +for (int i = 0; names[i]; i++)
 +if (!strcmp(arg, names[i]))
 +{
 +*dst = i;
 +return 0;
 +}
 +return -1;
 +}
 diff -r 2321ebe0bf64 -r 1e22b9363807 source/common/common.h
 --- a/source/common/common.hMon Nov 18 11:32:06 2013 +0530
 +++ 

[x265] [PATCH] cli: add aq-strength to cli input options, add validations for aq mode

2013-11-18 Thread Aarthi Thirumalai
# HG changeset patch
# User Aarthi Thirumalai
# Date 1384773969 -19800
#  Mon Nov 18 16:56:09 2013 +0530
# Node ID 78225cfaa696fad7f2870c4064c8f0f387e5ba8d
# Parent  2321ebe0bf64e5f3c0034076c7edb3ecbcd48039
cli: add aq-strength to cli input options, add validations for aq mode

diff -r 2321ebe0bf64 -r 78225cfaa696 source/common/common.cpp
--- a/source/common/common.cpp  Mon Nov 18 11:32:06 2013 +0530
+++ b/source/common/common.cpp  Mon Nov 18 16:56:09 2013 +0530
@@ -519,8 +519,10 @@
   max consecutive bframe count must be 16 or smaller);
 CHECK(param-lookaheadDepth  X265_LOOKAHEAD_MAX,
   Lookahead depth must be less than 256);
-CHECK(param-rc.aqModeX265_AQ_NONE || param-rc.aqMode X265_AQ_VARIANCE,
+CHECK(param-rc.aqMode  X265_AQ_NONE || param-rc.aqMode  
X265_AQ_VARIANCE,
   Aq-Mode is out of range);
+CHECK(param-rc.aqStrength  0 || param-rc.aqStrength  3,
+  Aq-Strength is out of range);
 
 // max CU size should be power of 2
 uint32_t i = param-maxCUSize;
@@ -532,6 +534,16 @@
 }
 
 CHECK(param-bEnableWavefront  0, WaveFrontSynchro cannot be negative);
+if(param-rc.rateControlMode == X265_RC_CQP )
+{
+param-rc.aqMode = X265_AQ_NONE;
+param-rc.bitrate = 0;
+}
+if(param-rc.aqStrength == 0)
+{
+x265_log(param, X265_LOG_WARNING, Aq mode specified, but Aq strength 
is  0, ignored\n );
+param-rc.aqMode = 0;
+}
 
 return check_failed;
 }
@@ -652,7 +664,8 @@
 }
 TOOLOPT(param-bEnableWeightedPred, weightp);
 TOOLOPT(param-bEnableWeightedBiPred, weightbp);
-TOOLOPT(param-rc.aqMode, aq);
+TOOLOPT(param-rc.aqMode, aq-mode);
+fprintf(stderr, aq-strength=%.2f , param-rc.aqStrength);
 fprintf(stderr, \n);
 fflush(stderr);
 }
@@ -729,6 +742,7 @@
 OPT(psnr) p-bEnablePsnr = bvalue;
 OPT(hash) p-decodedPictureHashSEI = atoi(value);
 OPT(aq-mode) p-rc.aqMode = atoi(value);
+OPT(aq-strength) p-rc.aqStrength = atof(value);
 OPT(crf)
 {
 p-rc.rfConstant = atof(value);
@@ -794,6 +808,8 @@
 BOOL(p-bEnableWeightedPred, weightp);
 s += sprintf(s,  bitrate=%d, p-rc.bitrate);
 s += sprintf(s,  qp=%d, p-rc.qp);
+s += sprintf(s,  aq-mode=%d, p-rc.aqMode);
+s += sprintf(s,  aq-strength=%.2f, p-rc.aqStrength);
 s += sprintf(s,  cbqpoffs=%d, p-cbQpOffset);
 s += sprintf(s,  crqpoffs=%d, p-crQpOffset);
 s += sprintf(s,  rd=%d, p-rdLevel);
diff -r 2321ebe0bf64 -r 78225cfaa696 source/x265.cpp
--- a/source/x265.cpp   Mon Nov 18 11:32:06 2013 +0530
+++ b/source/x265.cpp   Mon Nov 18 16:56:09 2013 +0530
@@ -123,6 +123,7 @@
 { bitrate,required_argument, NULL, 0 },
 { qp, required_argument, NULL, 'q' },
 { aq-mode,required_argument, NULL, 0 },
+{ aq-strength,required_argument, NULL, 0 },
 { cbqpoffs,   required_argument, NULL, 0 },
 { crqpoffs,   required_argument, NULL, 0 },
 { rd, required_argument, NULL, 0 },
@@ -310,6 +311,7 @@
 H0(   --crf Quality-based VBR (0-51). Default 
%f\n, param-rc.rfConstant);
 H0(-q/--qp  Base QP for CQP mode. Default %d\n, 
param-rc.qp);
 H0(   --aq-mode Mode for Adaptive Quantization - 
0:none 1:aqVariance Default %d\n, param-rc.aqMode);
+H0(   --aq-strength Reduces blocking and blurring in flat 
and textured areas.(0 to 3.0)double . Default %f\n, param-rc.aqStrength);
 H0(   --cbqpoffsChroma Cb QP Offset. Default %d\n, 
param-cbQpOffset);
 H0(   --crqpoffsChroma Cr QP Offset. Default %d\n, 
param-crQpOffset);
 H0(   --rd  Level of RD in mode decision 
0:least2:full RDO. Default %d\n, param-rdLevel);
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH] b-pyramid implementation: Allow the use of B-frames as references for non B and B frames

2013-11-18 Thread Gopu Govindaswamy
yes the  numReorderdPics is 2, but once we increased the
numReorderPics then the max DPB size should also to increase by 1 if
we enable the b-pyramid,  instead of this i have directly increased by
3, i will change this to

numReorderdPics = 2 and
m_maxDecPicBuffering[i] = X265_MIN(MAX_NUM_REF,
X265_MAX(m_numReorderPics[i] + 1, _param-maxNumReferences) + 2 );

when we enable the b-pyramid else the compute RPS will not produce the
Correct L0 reference

On Mon, Nov 18, 2013 at 4:53 PM, Deepthi Devaki Akkoorath
deepthidev...@multicorewareinc.com wrote:



 On Mon, Nov 18, 2013 at 3:40 PM, Gopu Govindaswamy
 g...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Gopu Govindaswamy g...@multicorewareinc.com
 # Date 1384769433 -19800
 # Node ID 1e22b93638072ed805478d7af17f90e285fb4969
 # Parent  2321ebe0bf64e5f3c0034076c7edb3ecbcd48039
 b-pyramid implementation: Allow the use of B-frames as references for non
 B and B frames

 when we enable the b-pyramid the bitrates efficienctly reduced and there
 is not much diff in the performance
 and the PSNR 00. increased some of the clips and decreased some of clips

 Test results for reference when enable and disable the b-pyramid:
 cli option : -b 10 --hash=1 -f 100 --b-pyramid=1 --ref=1 --b-adapt=2
 Enable B-reference  : --b-pyramid=1
 Disable B-reference : --b-pyramid=0

 Results:
 Enable / Disable

 clip - FourPeople_1280x720_60.yuv
 Total time taken - 9.70s (10.31 fps) / 9.93s (10.07 fps)
 Bitrates - 516.30 kb/s / 544.68 kb/s
 PSNR - 39.725 / 39.701

 clip - BasketballDrive_1920x1080_50.y4m
 Total time taken - 39.06s (2.51 fps) / 38.98s (2.57 fps)
 Bitrates -  4166.92 kb/s / 4370.43 kb/s
 PSNR -  37.261 / 37.268

 clip - Johnny_1280x720_60.y4m
 Total time taken - 8.88s (11.27 fps) / 11.08s (9.03 fps)
 Bitrates - 304.29 kb/s / 328.84 kb/s
 PSNR - 40.605 / 40.551

 Total time taken - 30.97s (3.23 fps) / 33.65s (2.97 fps)
 Bitrates - 3496.84 kb/s / 3683.93 kb/s
 PSNR - 35.645 / 35.660

 diff -r 2321ebe0bf64 -r 1e22b9363807 source/common/common.cpp
 --- a/source/common/common.cpp  Mon Nov 18 11:32:06 2013 +0530
 +++ b/source/common/common.cpp  Mon Nov 18 15:40:33 2013 +0530
 @@ -54,6 +54,7 @@

  static int parseCspName(const char *arg, int error);
  static int parseName(const char *arg, const char * const * names, int
 error);
 +static int parse_enum(const char *, const char * const * names, int
 *dst);

  using namespace x265;

 @@ -165,6 +166,7 @@
  param-bframes = 3;
  param-lookaheadDepth = 40;
  param-bFrameAdaptive = X265_B_ADAPT_FAST;
 +param-bpyramid = 0;
  param-scenecutThreshold = 40; /* Magic number pulled in from x264*/

  /* Intra Coding Tools */
 @@ -532,7 +534,7 @@
  }

  CHECK(param-bEnableWavefront  0, WaveFrontSynchro cannot be
 negative);
 -
 +CHECK(param-bpyramid = 2, b-pyramid is 0 or 1);
  return check_failed;
  }

 @@ -620,6 +622,7 @@
  x265_log(param, X265_LOG_INFO, RDpenalty:
 %d\n, param-rdPenalty);
  }
  x265_log(param, X265_LOG_INFO, Lookahead / bframes / badapt : %d /
 %d / %d\n, param-lookaheadDepth, param-bframes, param-bFrameAdaptive);
 +x265_log(param, X265_LOG_INFO, b-pyramid / weightp / ref: %d /
 %d / %d\n, param-bpyramid, param-bEnableWeightedPred,
 param-maxNumReferences);
  x265_log(param, X265_LOG_INFO, tools: );
  #define TOOLOPT(FLAG, STR) if (FLAG) fprintf(stderr, %s , STR)
  TOOLOPT(param-bEnableRectInter, rect);
 @@ -628,7 +631,6 @@
  TOOLOPT(param-bEnableConstrainedIntra, cip);
  TOOLOPT(param-bEnableEarlySkip, esd);
  fprintf(stderr, rd=%d , param-rdLevel);
 -fprintf(stderr, ref=%d , param-maxNumReferences);

  TOOLOPT(param-bEnableLoopFilter, lft);
  if (param-bEnableSAO)
 @@ -650,7 +652,6 @@
  else
  fprintf(stderr, tskip );
  }
 -TOOLOPT(param-bEnableWeightedPred, weightp);
  TOOLOPT(param-bEnableWeightedBiPred, weightbp);
  TOOLOPT(param-rc.aqMode, aq);
  fprintf(stderr, \n);
 @@ -747,6 +748,15 @@
  }
  OPT(input-csp) p-sourceCsp = ::parseCspName(value, berror);
  OPT(me)p-searchMethod = ::parseName(value,
 x265_motion_est_names, berror);
 +OPT(b-pyramid)
 +{
 +berror |= parse_enum(value, x265_b_pyramid_names, p-bpyramid);
 +if (berror)
 +{
 +berror = 0;
 +p-bpyramid = atoi(value);
 +}
 +}
  else
  return X265_PARAM_BAD_NAME;
  #undef OPT
 @@ -802,6 +812,7 @@
  BOOL(p-bEnableSAO, sao);
  s += sprintf(s,  sao-lcu-bounds=%d, p-saoLcuBoundary);
  s += sprintf(s,  sao-lcu-opt=%d, p-saoLcuBasedOptimization);
 +s += sprintf(s,  b-pyramid=%d, p-bpyramid);
  #undef BOOL

  return buf;
 @@ -843,3 +854,13 @@
  error = 1;
  return a;
  }
 +static int parse_enum(const char *arg, const char * const * names, int
 *dst)
 +{
 +for (int i = 0; names[i]; i++)
 +if (!strcmp(arg, names[i]))
 +{
 +*dst 

[x265] [PATCH] TShortYUV: asm code integration for pixelsub_ps

2013-11-18 Thread murugan
# HG changeset patch
# User Murugan Vairavel muru...@multicorewareinc.com
# Date 1384777276 -19800
#  Mon Nov 18 17:51:16 2013 +0530
# Node ID be8373f115dd7f152588ba8c575ad10dc6f5afb1
# Parent  c355ba4b6711bfad87ff37d650a8f1946f878eec
TShortYUV: asm code integration for pixelsub_ps

diff -r c355ba4b6711 -r be8373f115dd source/common/TShortYUV.cpp
--- a/source/common/TShortYUV.cpp   Mon Nov 18 16:49:30 2013 +0530
+++ b/source/common/TShortYUV.cpp   Mon Nov 18 17:51:16 2013 +0530
@@ -58,6 +58,7 @@
 
 m_cwidth  = width   m_hChromaShift;
 m_cheight = height  m_vChromaShift;
+m_csp = csp;
 }
 
 void TShortYUV::destroy()
@@ -78,15 +79,14 @@
 }
 
 void TShortYUV::subtract(TComYuv* srcYuv0, TComYuv* srcYuv1, unsigned int 
trUnitIdx, unsigned int partSize)
-{
-subtractLuma(srcYuv0, srcYuv1, trUnitIdx, partSize);
-subtractChroma(srcYuv0, srcYuv1, trUnitIdx, partSize  m_hChromaShift);
+{
+int part = partitionFromSizes(partSize, partSize);
+subtractLuma(srcYuv0, srcYuv1, trUnitIdx, partSize, part);
+subtractChroma(srcYuv0, srcYuv1, trUnitIdx, partSize  m_hChromaShift, 
part);
 }
 
-void TShortYUV::subtractLuma(TComYuv* srcYuv0, TComYuv* srcYuv1, unsigned int 
trUnitIdx, unsigned int partSize)
+void TShortYUV::subtractLuma(TComYuv* srcYuv0, TComYuv* srcYuv1, unsigned int 
trUnitIdx, unsigned int partSize, uint32_t part)
 {
-int x = partSize, y = partSize;
-
 Pel* src0 = srcYuv0-getLumaAddr(trUnitIdx, partSize);
 Pel* src1 = srcYuv1-getLumaAddr(trUnitIdx, partSize);
 int16_t* dst = getLumaAddr(trUnitIdx, partSize);
@@ -95,13 +95,11 @@
 int src1Stride = srcYuv1-getStride();
 int dstStride  = m_width;
 
-primitives.pixelsub_ps(x, y, dst, dstStride, src0, src1, src0Stride, 
src1Stride);
+primitives.luma_sub_ps[part](dst, dstStride, src0, src1, src0Stride, 
src1Stride);
 }
 
-void TShortYUV::subtractChroma(TComYuv* srcYuv0, TComYuv* srcYuv1, unsigned 
int trUnitIdx, unsigned int partSize)
+void TShortYUV::subtractChroma(TComYuv* srcYuv0, TComYuv* srcYuv1, unsigned 
int trUnitIdx, unsigned int partSize, uint32_t part)
 {
-int x = partSize, y = partSize;
-
 Pel* srcU0 = srcYuv0-getCbAddr(trUnitIdx, partSize);
 Pel* srcU1 = srcYuv1-getCbAddr(trUnitIdx, partSize);
 Pel* srcV0 = srcYuv0-getCrAddr(trUnitIdx, partSize);
@@ -113,8 +111,8 @@
 int src1Stride = srcYuv1-getCStride();
 int dstStride  = m_cwidth;
 
-primitives.pixelsub_ps(x, y, dstU, dstStride, srcU0, srcU1, src0Stride, 
src1Stride);
-primitives.pixelsub_ps(x, y, dstV, dstStride, srcV0, srcV1, src0Stride, 
src1Stride);
+primitives.chroma_sub_ps[m_csp][part](dstU, dstStride, srcU0, srcU1, 
src0Stride, src1Stride);
+primitives.chroma_sub_ps[m_csp][part](dstV, dstStride, srcV0, srcV1, 
src0Stride, src1Stride);
 }
 
 void TShortYUV::addClip(TShortYUV* srcYuv0, TShortYUV* srcYuv1, unsigned int 
trUnitIdx, unsigned int partSize)
diff -r c355ba4b6711 -r be8373f115dd source/common/TShortYUV.h
--- a/source/common/TShortYUV.h Mon Nov 18 16:49:30 2013 +0530
+++ b/source/common/TShortYUV.h Mon Nov 18 17:51:16 2013 +0530
@@ -53,6 +53,8 @@
 return blkX + blkY * size;
 }
 
+int m_csp;
+
 public:
 
 int16_t* m_bufY;
@@ -95,8 +97,8 @@
 
 int16_t* getCrAddr(unsigned int partIdx, unsigned int size) { return 
m_bufCr + getAddrOffset(partIdx, size, m_cwidth); }
 
-void subtractLuma(TComYuv* srcYuv0, TComYuv* srcYuv1, unsigned int 
trUnitIdx, unsigned int partSize);
-void subtractChroma(TComYuv* srcYuv0, TComYuv* srcYuv1, unsigned int 
trUnitIdx, unsigned int partSize);
+void subtractLuma(TComYuv* srcYuv0, TComYuv* srcYuv1, unsigned int 
trUnitIdx, unsigned int partSize, uint32_t part);
+void subtractChroma(TComYuv* srcYuv0, TComYuv* srcYuv1, unsigned int 
trUnitIdx, unsigned int partSize, uint32_t part);
 void subtract(TComYuv* srcYuv0, TComYuv* srcYuv1, unsigned int trUnitIdx, 
unsigned int partSize);
 
 void addClip(TShortYUV* srcYuv0, TShortYUV* srcYuv1, unsigned int 
trUnitIdx, unsigned int partSize);
diff -r c355ba4b6711 -r be8373f115dd source/common/pixel.cpp
--- a/source/common/pixel.cpp   Mon Nov 18 16:49:30 2013 +0530
+++ b/source/common/pixel.cpp   Mon Nov 18 17:51:16 2013 +0530
@@ -838,7 +838,7 @@
 p.chroma_copy_pp[CSP_I420][CHROMA_ ## W ## x ## H] = blockcopy_pp_cW, H; 
\
 p.chroma_copy_sp[CHROMA_ ## W ## x ## H] = blockcopy_sp_cW, H; \
 p.chroma_copy_ps[CHROMA_ ## W ## x ## H] = blockcopy_ps_cW, H;\
-p.chroma_sub_ps[CHROMA_ ## W ## x ## H] = pixel_sub_ps_cW, H;
+p.chroma_sub_ps[CSP_I420][CHROMA_ ## W ## x ## H] = pixel_sub_ps_cW, H;
 
 #define LUMA(W, H) \
 p.luma_copy_pp[LUMA_ ## W ## x ## H] = blockcopy_pp_cW, H; \
diff -r c355ba4b6711 -r be8373f115dd source/common/primitives.h
--- a/source/common/primitives.hMon Nov 18 16:49:30 2013 +0530
+++ b/source/common/primitives.hMon Nov 18 17:51:16 2013 +0530
@@ -250,7 +250,7 @@
 copy_ps_t   

[x265] [PATCH] TComYuv::copyToPicChroma, blockcopy_pp asm integration

2013-11-18 Thread praveen
# HG changeset patch
# User Praveen Tiwari
# Date 1384780472 -19800
# Node ID 024d6ddf57596b6f77100b3bdcac555ddbec7c0a
# Parent  2321ebe0bf64e5f3c0034076c7edb3ecbcd48039
TComYuv::copyToPicChroma, blockcopy_pp asm integration

diff -r 2321ebe0bf64 -r 024d6ddf5759 source/Lib/TLibCommon/TComYuv.cpp
--- a/source/Lib/TLibCommon/TComYuv.cpp Mon Nov 18 11:32:06 2013 +0530
+++ b/source/Lib/TLibCommon/TComYuv.cpp Mon Nov 18 18:44:32 2013 +0530
@@ -140,8 +140,10 @@
 uint32_t srcstride = getCStride();
 uint32_t dststride = destPicYuv-getCStride();
 
-primitives.blockcpy_pp(width, height, dstU, dststride, srcU, srcstride);
-primitives.blockcpy_pp(width, height, dstV, dststride, srcV, srcstride);
+int part = partitionFromSizes(width, height);
+
+primitives.luma_copy_pp[part](dstU, dststride, srcU, srcstride);
+primitives.luma_copy_pp[part](dstV, dststride, srcV, srcstride);
 }
 
 void TComYuv::copyFromPicYuv(TComPicYuv* srcPicYuv, uint32_t cuAddr, uint32_t 
absZOrderIdx)
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH] TComYuv::copyToPicChroma, blockcopy_pp asm integration

2013-11-18 Thread chen
At 2013-11-18 21:14:52,prav...@multicorewareinc.com wrote:
# HG changeset patch
# User Praveen Tiwari
# Date 1384780472 -19800
# Node ID 024d6ddf57596b6f77100b3bdcac555ddbec7c0a
# Parent  2321ebe0bf64e5f3c0034076c7edb3ecbcd48039
TComYuv::copyToPicChroma, blockcopy_pp asm integration

diff -r 2321ebe0bf64 -r 024d6ddf5759 source/Lib/TLibCommon/TComYuv.cpp
--- a/source/Lib/TLibCommon/TComYuv.cpp Mon Nov 18 11:32:06 2013 +0530
+++ b/source/Lib/TLibCommon/TComYuv.cpp Mon Nov 18 18:44:32 2013 +0530
@@ -140,8 +140,10 @@
 uint32_t srcstride = getCStride();
 uint32_t dststride = destPicYuv-getCStride();
 
-primitives.blockcpy_pp(width, height, dstU, dststride, srcU, srcstride);
-primitives.blockcpy_pp(width, height, dstV, dststride, srcV, srcstride);
+int part = partitionFromSizes(width, height);
width/height is Chroma size
 
+
+primitives.luma_copy_pp[part](dstU, dststride, srcU, srcstride);
+primitives.luma_copy_pp[part](dstV, dststride, srcV, srcstride);
 }
 
 void TComYuv::copyFromPicYuv(TComPicYuv* srcPicYuv, uint32_t cuAddr, uint32_t 
 absZOrderIdx)
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] [PATCH] blockcopy_pp asm integration, TComYuv::copyToPicChroma

2013-11-18 Thread praveen
# HG changeset patch
# User Praveen Tiwari
# Date 1384783447 -19800
# Node ID b353d170c54f0e33a8869c413be226a48deb1f5c
# Parent  68d8ca28ac05b93accc6931abd576a56b621a492
blockcopy_pp asm integration, TComYuv::copyToPicChroma

diff -r 68d8ca28ac05 -r b353d170c54f source/Lib/TLibCommon/TComYuv.cpp
--- a/source/Lib/TLibCommon/TComYuv.cpp Mon Nov 18 19:15:32 2013 +0530
+++ b/source/Lib/TLibCommon/TComYuv.cpp Mon Nov 18 19:34:07 2013 +0530
@@ -140,10 +140,10 @@
 uint32_t srcstride = getCStride();
 uint32_t dststride = destPicYuv-getCStride();
 
-int part = partitionFromSizes(width, height);
+int part = partitionFromSizes(width  1, height  1);
 
-primitives.luma_copy_pp[part](dstU, dststride, srcU, srcstride);
-primitives.luma_copy_pp[part](dstV, dststride, srcV, srcstride);
+primitives.chroma_copy_pp[m_csp][part](dstU, dststride, srcU, srcstride);
+primitives.chroma_copy_pp[m_csp][part](dstV, dststride, srcV, srcstride);
 }
 
 void TComYuv::copyFromPicYuv(TComPicYuv* srcPicYuv, uint32_t cuAddr, uint32_t 
absZOrderIdx)
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] [PATCH Review only] asm: code for transpose4x4 routine

2013-11-18 Thread murugan
# HG changeset patch
# User Murugan Vairavel muru...@multicorewareinc.com
# Date 1384784621 -19800
#  Mon Nov 18 19:53:41 2013 +0530
# Node ID d24c22e915afd33a122326516b41eecf7e055934
# Parent  a4735d0fe4759c72a3af408a43723f219688eeb4
asm: code for transpose4x4 routine

diff -r a4735d0fe475 -r d24c22e915af source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp  Mon Nov 18 18:59:20 2013 +0530
+++ b/source/common/x86/asm-primitives.cpp  Mon Nov 18 19:53:41 2013 +0530
@@ -545,6 +545,7 @@
 p.calcrecon[BLOCK_8x8] = x265_calcRecons8_sse2;
 p.calcresidual[BLOCK_4x4] = x265_getResidual4_sse2;
 p.calcresidual[BLOCK_8x8] = x265_getResidual8_sse2;
+p.transpose[BLOCK_4x4] = x265_transpose4_sse2;
 }
 if (cpuMask  X265_CPU_SSSE3)
 {
diff -r a4735d0fe475 -r d24c22e915af source/common/x86/pixel-a.asm
--- a/source/common/x86/pixel-a.asm Mon Nov 18 18:59:20 2013 +0530
+++ b/source/common/x86/pixel-a.asm Mon Nov 18 19:53:41 2013 +0530
@@ -8340,3 +8340,25 @@
 jnz.loop
 
 RET
+
+;-
+; void transpose_4x4(pixel *dst, pixel *src, intptr_t stride)
+;-
+INIT_XMM sse2
+cglobal transpose4, 3, 3, 4, dest, src, stride
+
+movd m0,[r1]
+movd m1,[r1 + r2]
+movd m2,[r1 + 2 * r2]
+
+lea  r1,[r1 + 2 * r2]
+
+movd m3,[r1 + r2]
+
+punpcklbwm0,m1
+punpcklbwm2,m3
+punpcklwdm0,m2
+
+movu [r0],m0
+
+RET
diff -r a4735d0fe475 -r d24c22e915af source/common/x86/pixel.h
--- a/source/common/x86/pixel.h Mon Nov 18 18:59:20 2013 +0530
+++ b/source/common/x86/pixel.h Mon Nov 18 19:53:41 2013 +0530
@@ -365,5 +365,6 @@
 void x265_getResidual8_sse2(pixel *fenc, pixel *pred, int16_t *residual, 
intptr_t stride);
 void x265_getResidual16_sse4(pixel *fenc, pixel *pred, int16_t *residual, 
intptr_t stride);
 void x265_getResidual32_sse4(pixel *fenc, pixel *pred, int16_t *residual, 
intptr_t stride);
+void x265_transpose4_sse2(pixel *dest, pixel *src, intptr_t stride);
 
 #endif // ifndef X265_I386_PIXEL_H
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH Review only] asm: code for transpose4x4 routine

2013-11-18 Thread chen
good!


At 2013-11-18 22:24:12,muru...@multicorewareinc.com wrote:
# HG changeset patch
# User Murugan Vairavel muru...@multicorewareinc.com
# Date 1384784621 -19800
#  Mon Nov 18 19:53:41 2013 +0530
# Node ID d24c22e915afd33a122326516b41eecf7e055934
# Parent  a4735d0fe4759c72a3af408a43723f219688eeb4
asm: code for transpose4x4 routine

diff -r a4735d0fe475 -r d24c22e915af source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Mon Nov 18 18:59:20 2013 +0530
+++ b/source/common/x86/asm-primitives.cpp Mon Nov 18 19:53:41 2013 +0530
@@ -545,6 +545,7 @@
 p.calcrecon[BLOCK_8x8] = x265_calcRecons8_sse2;
 p.calcresidual[BLOCK_4x4] = x265_getResidual4_sse2;
 p.calcresidual[BLOCK_8x8] = x265_getResidual8_sse2;
+p.transpose[BLOCK_4x4] = x265_transpose4_sse2;
 }
 if (cpuMask  X265_CPU_SSSE3)
 {
diff -r a4735d0fe475 -r d24c22e915af source/common/x86/pixel-a.asm
--- a/source/common/x86/pixel-a.asmMon Nov 18 18:59:20 2013 +0530
+++ b/source/common/x86/pixel-a.asmMon Nov 18 19:53:41 2013 +0530
@@ -8340,3 +8340,25 @@
 jnz.loop
 
 RET
+
+;-
+; void transpose_4x4(pixel *dst, pixel *src, intptr_t stride)
+;-
+INIT_XMM sse2
+cglobal transpose4, 3, 3, 4, dest, src, stride
+
+movd m0,[r1]
+movd m1,[r1 + r2]
+movd m2,[r1 + 2 * r2]
+
+lea  r1,[r1 + 2 * r2]
+
+movd m3,[r1 + r2]
+
+punpcklbwm0,m1
+punpcklbwm2,m3
+punpcklwdm0,m2
+
+movu [r0],m0
+
+RET
diff -r a4735d0fe475 -r d24c22e915af source/common/x86/pixel.h
--- a/source/common/x86/pixel.hMon Nov 18 18:59:20 2013 +0530
+++ b/source/common/x86/pixel.hMon Nov 18 19:53:41 2013 +0530
@@ -365,5 +365,6 @@
 void x265_getResidual8_sse2(pixel *fenc, pixel *pred, int16_t *residual, 
 intptr_t stride);
 void x265_getResidual16_sse4(pixel *fenc, pixel *pred, int16_t *residual, 
 intptr_t stride);
 void x265_getResidual32_sse4(pixel *fenc, pixel *pred, int16_t *residual, 
 intptr_t stride);
+void x265_transpose4_sse2(pixel *dest, pixel *src, intptr_t stride);
 
 #endif // ifndef X265_I386_PIXEL_H
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] [PATCH] added csp support for blpckcopy_ps

2013-11-18 Thread praveen
# HG changeset patch
# User Praveen Tiwari
# Date 1384788209 -19800
# Node ID 59646d515e79b4d0f9a3a72c77c7af17a83bf3d9
# Parent  b353d170c54f0e33a8869c413be226a48deb1f5c
added csp support for blpckcopy_ps

diff -r b353d170c54f -r 59646d515e79 source/common/pixel.cpp
--- a/source/common/pixel.cpp   Mon Nov 18 19:34:07 2013 +0530
+++ b/source/common/pixel.cpp   Mon Nov 18 20:53:29 2013 +0530
@@ -837,7 +837,7 @@
 #define CHROMA(W, H) \
 p.chroma_copy_pp[CSP_I420][CHROMA_ ## W ## x ## H] = blockcopy_pp_cW, H; 
\
 p.chroma_copy_sp[CHROMA_ ## W ## x ## H] = blockcopy_sp_cW, H; \
-p.chroma_copy_ps[CHROMA_ ## W ## x ## H] = blockcopy_ps_cW, H;\
+p.chroma_copy_ps[CSP_I420][CHROMA_ ## W ## x ## H] = blockcopy_ps_cW, H;\
 p.chroma_sub_ps[CHROMA_ ## W ## x ## H] = pixel_sub_ps_cW, H;
 
 #define LUMA(W, H) \
diff -r b353d170c54f -r 59646d515e79 source/common/primitives.h
--- a/source/common/primitives.hMon Nov 18 19:34:07 2013 +0530
+++ b/source/common/primitives.hMon Nov 18 20:53:29 2013 +0530
@@ -247,7 +247,7 @@
 copy_sp_t   luma_copy_sp[NUM_LUMA_PARTITIONS];
 copy_sp_t   chroma_copy_sp[NUM_CHROMA_PARTITIONS];
 copy_ps_t   luma_copy_ps[NUM_LUMA_PARTITIONS];
-copy_ps_t   chroma_copy_ps[NUM_CHROMA_PARTITIONS];
+copy_ps_t   chroma_copy_ps[NUM_CSP][NUM_CHROMA_PARTITIONS];
 
 pixel_sub_ps_t  luma_sub_ps[NUM_LUMA_PARTITIONS];
 pixel_sub_ps_t  chroma_sub_ps[NUM_CHROMA_PARTITIONS];
diff -r b353d170c54f -r 59646d515e79 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp  Mon Nov 18 19:34:07 2013 +0530
+++ b/source/common/x86/asm-primitives.cpp  Mon Nov 18 20:53:29 2013 +0530
@@ -141,7 +141,6 @@
 p.chroma_hps[CHROMA_ ## W ## x ## H] = x265_interp_4tap_horiz_ps_ ## W ## 
x ## H ## cpu; \
 p.chroma_vpp[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_pp_ ## W ## x 
## H ## cpu; \
 p.chroma_vps[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_ps_ ## W ## x 
## H ## cpu; \
-p.chroma_copy_ps[CHROMA_ ## W ## x ## H] = x265_blockcopy_ps_ ## W ## x ## 
H ## cpu; \
 p.chroma_sub_ps[CHROMA_ ## W ## x ## H] = x265_pixel_sub_ps_ ## W ## x ## 
H ## cpu;
 
 #define SETUP_CHROMA_SP_FUNC_DEF(W, H, cpu) \
@@ -380,6 +379,36 @@
 SETUP_LUMA_BLOCKCOPY_FUNC_DEF(64, 16, cpu); \
 SETUP_LUMA_BLOCKCOPY_FUNC_DEF(16, 64, cpu);
 
+#define SETUP_CHROMA_FROM_LUMA_SSE4(W1, H1, W2, H2, cpu) \
+p.chroma_copy_ps[X265_CSP_I420][LUMA_ ## W1 ## x ## H1] = 
x265_blockcopy_ps_ ## W2 ## x ## H2 ## cpu;
+
+// For X265_CSP_I420 chroma width and height will be half of luma width and 
height
+#define CHROMA_BLOCKCOPY_SSE4(cpu) \
+SETUP_CHROMA_FROM_LUMA_SSE4(8,   8, 4,  4,  cpu); \
+SETUP_CHROMA_FROM_LUMA_SSE4(8,   4, 4,  2,  cpu); \
+SETUP_CHROMA_FROM_LUMA_SSE4(4,   8, 2,  4,  cpu); \
+SETUP_CHROMA_FROM_LUMA_SSE4(16, 16, 8,  8,  cpu); \
+SETUP_CHROMA_FROM_LUMA_SSE4(16,  8, 8,  4,  cpu); \
+SETUP_CHROMA_FROM_LUMA_SSE4(8,  16, 4,  8,  cpu); \
+SETUP_CHROMA_FROM_LUMA_SSE4(16, 12, 8,  6,  cpu); \
+SETUP_CHROMA_FROM_LUMA_SSE4(12, 16, 6,  8,  cpu); \
+SETUP_CHROMA_FROM_LUMA_SSE4(16,  4, 8,  2,  cpu); \
+SETUP_CHROMA_FROM_LUMA_SSE4(4,  16, 2,  8,  cpu); \
+SETUP_CHROMA_FROM_LUMA_SSE4(32, 32, 16, 16, cpu); \
+SETUP_CHROMA_FROM_LUMA_SSE4(32, 16, 16, 8,  cpu); \
+SETUP_CHROMA_FROM_LUMA_SSE4(16, 32, 8,  16, cpu); \
+SETUP_CHROMA_FROM_LUMA_SSE4(32, 24, 16, 12, cpu); \
+SETUP_CHROMA_FROM_LUMA_SSE4(24, 32, 12, 16, cpu); \
+SETUP_CHROMA_FROM_LUMA_SSE4(32,  8, 16, 4,  cpu); \
+SETUP_CHROMA_FROM_LUMA_SSE4(8,  32, 4,  16, cpu); \
+SETUP_CHROMA_FROM_LUMA_SSE4(64, 64, 32, 32, cpu); \
+SETUP_CHROMA_FROM_LUMA_SSE4(64, 32, 32, 16, cpu); \
+SETUP_CHROMA_FROM_LUMA_SSE4(32, 64, 16, 32, cpu); \
+SETUP_CHROMA_FROM_LUMA_SSE4(64, 48, 32, 24, cpu); \
+SETUP_CHROMA_FROM_LUMA_SSE4(48, 64, 24, 32, cpu); \
+SETUP_CHROMA_FROM_LUMA_SSE4(64, 16, 32, 8,  cpu); \
+SETUP_CHROMA_FROM_LUMA_SSE4(16, 64, 8,  32, cpu);
+
 using namespace x265;
 
 namespace {
@@ -591,6 +620,7 @@
 CHROMA_FILTERS(_sse4);
 LUMA_FILTERS(_sse4);
 HEVC_SATD(sse4);
+CHROMA_BLOCKCOPY_SSE4(_sse4);
 p.chroma_copy_sp[CHROMA_2x4] = x265_blockcopy_sp_2x4_sse4;
 p.chroma_copy_sp[CHROMA_2x8] = x265_blockcopy_sp_2x8_sse4;
 p.chroma_copy_sp[CHROMA_6x8] = x265_blockcopy_sp_6x8_sse4;
diff -r b353d170c54f -r 59646d515e79 source/test/pixelharness.cpp
--- a/source/test/pixelharness.cpp  Mon Nov 18 19:34:07 2013 +0530
+++ b/source/test/pixelharness.cpp  Mon Nov 18 20:53:29 2013 +0530
@@ -763,12 +763,15 @@
 }
 }
 
-if (opt.chroma_copy_ps[part])
+for(int i = 0; i  NUM_CSP; i++)
 {
-if (!check_block_copy_ps(ref.chroma_copy_ps[part], 
opt.chroma_copy_ps[part]))
+if (opt.chroma_copy_ps[i][part])
 {
-printf(chroma_copy_ps[%s] failed\n, chromaPartStr[part]);
-return false;
+

Re: [x265] [PATCH] cli: add aq-strength to cli input options, add validations for aq mode

2013-11-18 Thread Steve Borho

On Nov 18, 2013, at 5:26 AM, Aarthi Thirumalai aar...@multicorewareinc.com 
wrote:

 # HG changeset patch
 # User Aarthi Thirumalai
 # Date 1384773969 -19800
 #  Mon Nov 18 16:56:09 2013 +0530
 # Node ID 78225cfaa696fad7f2870c4064c8f0f387e5ba8d
 # Parent  2321ebe0bf64e5f3c0034076c7edb3ecbcd48039
 cli: add aq-strength to cli input options, add validations for aq mode
 
 diff -r 2321ebe0bf64 -r 78225cfaa696 source/common/common.cpp
 --- a/source/common/common.cppMon Nov 18 11:32:06 2013 +0530
 +++ b/source/common/common.cppMon Nov 18 16:56:09 2013 +0530
 @@ -519,8 +519,10 @@
   max consecutive bframe count must be 16 or smaller);
 CHECK(param-lookaheadDepth  X265_LOOKAHEAD_MAX,
   Lookahead depth must be less than 256);
 -CHECK(param-rc.aqModeX265_AQ_NONE || param-rc.aqMode 
 X265_AQ_VARIANCE,
 +CHECK(param-rc.aqMode  X265_AQ_NONE || param-rc.aqMode  
 X265_AQ_VARIANCE,
   Aq-Mode is out of range);
 +CHECK(param-rc.aqStrength  0 || param-rc.aqStrength  3,
 +  Aq-Strength is out of range);
 
 // max CU size should be power of 2
 uint32_t i = param-maxCUSize;
 @@ -532,6 +534,16 @@
 }
 
 CHECK(param-bEnableWavefront  0, WaveFrontSynchro cannot be negative);
 +if(param-rc.rateControlMode == X265_RC_CQP )

white-space

 +{
 +param-rc.aqMode = X265_AQ_NONE;
 +param-rc.bitrate = 0;
 +}
 +if(param-rc.aqStrength == 0)
 +{
 +x265_log(param, X265_LOG_WARNING, Aq mode specified, but Aq 
 strength is  0, ignored\n );
 +param-rc.aqMode = 0;
 +}
 
 return check_failed;
 }
 @@ -652,7 +664,8 @@
 }
 TOOLOPT(param-bEnableWeightedPred, weightp);
 TOOLOPT(param-bEnableWeightedBiPred, weightbp);
 -TOOLOPT(param-rc.aqMode, aq);
 +TOOLOPT(param-rc.aqMode, aq-mode);
 +fprintf(stderr, aq-strength=%.2f , param-rc.aqStrength);
 fprintf(stderr, \n);
 fflush(stderr);
 }
 @@ -729,6 +742,7 @@
 OPT(psnr) p-bEnablePsnr = bvalue;
 OPT(hash) p-decodedPictureHashSEI = atoi(value);
 OPT(aq-mode) p-rc.aqMode = atoi(value);
 +OPT(aq-strength) p-rc.aqStrength = atof(value);
 OPT(crf)
 {
 p-rc.rfConstant = atof(value);
 @@ -794,6 +808,8 @@
 BOOL(p-bEnableWeightedPred, weightp);
 s += sprintf(s,  bitrate=%d, p-rc.bitrate);
 s += sprintf(s,  qp=%d, p-rc.qp);
 +s += sprintf(s,  aq-mode=%d, p-rc.aqMode);
 +s += sprintf(s,  aq-strength=%.2f, p-rc.aqStrength);
 s += sprintf(s,  cbqpoffs=%d, p-cbQpOffset);
 s += sprintf(s,  crqpoffs=%d, p-crQpOffset);
 s += sprintf(s,  rd=%d, p-rdLevel);
 diff -r 2321ebe0bf64 -r 78225cfaa696 source/x265.cpp
 --- a/source/x265.cpp Mon Nov 18 11:32:06 2013 +0530
 +++ b/source/x265.cpp Mon Nov 18 16:56:09 2013 +0530
 @@ -123,6 +123,7 @@
 { bitrate,required_argument, NULL, 0 },
 { qp, required_argument, NULL, 'q' },
 { aq-mode,required_argument, NULL, 0 },
 +{ aq-strength,required_argument, NULL, 0 },
 { cbqpoffs,   required_argument, NULL, 0 },
 { crqpoffs,   required_argument, NULL, 0 },
 { rd, required_argument, NULL, 0 },
 @@ -310,6 +311,7 @@
 H0(   --crf Quality-based VBR (0-51). Default 
 %f\n, param-rc.rfConstant);
 H0(-q/--qp  Base QP for CQP mode. Default %d\n, 
 param-rc.qp);
 H0(   --aq-mode Mode for Adaptive Quantization - 
 0:none 1:aqVariance Default %d\n, param-rc.aqMode);
 +H0(   --aq-strength Reduces blocking and blurring in 
 flat and textured areas.(0 to 3.0)double . Default %f\n, 
 param-rc.aqStrength);
 H0(   --cbqpoffsChroma Cb QP Offset. Default %d\n, 
 param-cbQpOffset);
 H0(   --crqpoffsChroma Cr QP Offset. Default %d\n, 
 param-crQpOffset);
 H0(   --rd  Level of RD in mode decision 
 0:least2:full RDO. Default %d\n, param-rdLevel);
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel



signature.asc
Description: Message signed with OpenPGP using GPGMail
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH Review only] asm: code for transpose4x4 routine

2013-11-18 Thread chen
Excuse me, press button early.
Good code, but need do some insert some spaces before RET and remove unused 
blank line.

At 2013-11-18 22:24:12,muru...@multicorewareinc.com wrote:
# HG changeset patch
# User Murugan Vairavel muru...@multicorewareinc.com
# Date 1384784621 -19800
#  Mon Nov 18 19:53:41 2013 +0530
# Node ID d24c22e915afd33a122326516b41eecf7e055934
# Parent  a4735d0fe4759c72a3af408a43723f219688eeb4
asm: code for transpose4x4 routine

diff -r a4735d0fe475 -r d24c22e915af source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Mon Nov 18 18:59:20 2013 +0530
+++ b/source/common/x86/asm-primitives.cpp Mon Nov 18 19:53:41 2013 +0530
@@ -545,6 +545,7 @@
 p.calcrecon[BLOCK_8x8] = x265_calcRecons8_sse2;
 p.calcresidual[BLOCK_4x4] = x265_getResidual4_sse2;
 p.calcresidual[BLOCK_8x8] = x265_getResidual8_sse2;
+p.transpose[BLOCK_4x4] = x265_transpose4_sse2;
 }
 if (cpuMask  X265_CPU_SSSE3)
 {
diff -r a4735d0fe475 -r d24c22e915af source/common/x86/pixel-a.asm
--- a/source/common/x86/pixel-a.asmMon Nov 18 18:59:20 2013 +0530
+++ b/source/common/x86/pixel-a.asmMon Nov 18 19:53:41 2013 +0530
@@ -8340,3 +8340,25 @@
 jnz.loop
 
 RET
+
+;-
+; void transpose_4x4(pixel *dst, pixel *src, intptr_t stride)
+;-
+INIT_XMM sse2
+cglobal transpose4, 3, 3, 4, dest, src, stride
+
+movd m0,[r1]
+movd m1,[r1 + r2]
+movd m2,[r1 + 2 * r2]
+
+lea  r1,[r1 + 2 * r2]
+
+movd m3,[r1 + r2]
+
+punpcklbwm0,m1
+punpcklbwm2,m3
+punpcklwdm0,m2
+
+movu [r0],m0
+
+RET
diff -r a4735d0fe475 -r d24c22e915af source/common/x86/pixel.h
--- a/source/common/x86/pixel.hMon Nov 18 18:59:20 2013 +0530
+++ b/source/common/x86/pixel.hMon Nov 18 19:53:41 2013 +0530
@@ -365,5 +365,6 @@
 void x265_getResidual8_sse2(pixel *fenc, pixel *pred, int16_t *residual, 
 intptr_t stride);
 void x265_getResidual16_sse4(pixel *fenc, pixel *pred, int16_t *residual, 
 intptr_t stride);
 void x265_getResidual32_sse4(pixel *fenc, pixel *pred, int16_t *residual, 
 intptr_t stride);
+void x265_transpose4_sse2(pixel *dest, pixel *src, intptr_t stride);
 
 #endif // ifndef X265_I386_PIXEL_H
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] [PATCH] TComYuv::copyPartToPartYuv, asm code intergration for blockcopy_ps

2013-11-18 Thread praveen
# HG changeset patch
# User Praveen Tiwari
# Date 1384788645 -19800
# Node ID 49a556cf22721d846a94e07c1933fcd092b898dd
# Parent  59646d515e79b4d0f9a3a72c77c7af17a83bf3d9
TComYuv::copyPartToPartYuv, asm code intergration for blockcopy_ps

diff -r 59646d515e79 -r 49a556cf2272 source/Lib/TLibCommon/TComYuv.cpp
--- a/source/Lib/TLibCommon/TComYuv.cpp Mon Nov 18 20:53:29 2013 +0530
+++ b/source/Lib/TLibCommon/TComYuv.cpp Mon Nov 18 21:00:45 2013 +0530
@@ -256,10 +256,12 @@
 
 void TComYuv::copyPartToPartYuv(TShortYUV* dstPicYuv, uint32_t partIdx, 
uint32_t width, uint32_t height, bool bLuma, bool bChroma)
 {
+int part = partitionFromSizes(width, height);
+
 if (bLuma)
-copyPartToPartLuma(dstPicYuv, partIdx, width, height);
+copyPartToPartLuma(dstPicYuv, partIdx, part);
 if (bChroma)
-copyPartToPartChroma(dstPicYuv, partIdx, width  m_hChromaShift, 
height  m_vChromaShift);
+copyPartToPartChroma(dstPicYuv, partIdx, part);
 }
 
 void TComYuv::copyPartToPartLuma(TComYuv* dstPicYuv, uint32_t partIdx, 
uint32_t part)
@@ -275,7 +277,7 @@
 primitives.luma_copy_pp[part](dst, dststride, src, srcstride);
 }
 
-void TComYuv::copyPartToPartLuma(TShortYUV* dstPicYuv, uint32_t partIdx, 
uint32_t width, uint32_t height)
+void TComYuv::copyPartToPartLuma(TShortYUV* dstPicYuv, uint32_t partIdx, 
uint32_t part)
 {
 Pel* src = getLumaAddr(partIdx);
 int16_t* dst = dstPicYuv-getLumaAddr(partIdx);
@@ -283,7 +285,6 @@
 uint32_t  srcstride = getStride();
 uint32_t  dststride = dstPicYuv-m_width;
 
-int part = partitionFromSizes(width, height);
 primitives.luma_copy_ps[part](dst, dststride, src, srcstride);
 }
 
@@ -303,7 +304,7 @@
 primitives.chroma_copy_pp[m_csp][part](dstV, dststride, srcV, srcstride);
 }
 
-void TComYuv::copyPartToPartChroma(TShortYUV* dstPicYuv, uint32_t partIdx, 
uint32_t width, uint32_t height)
+void TComYuv::copyPartToPartChroma(TShortYUV* dstPicYuv, uint32_t partIdx, 
uint32_t part)
 {
 Pel*   srcU = getCbAddr(partIdx);
 Pel*   srcV = getCrAddr(partIdx);
@@ -313,8 +314,8 @@
 uint32_t srcstride = getCStride();
 uint32_t dststride = dstPicYuv-m_cwidth;
 
-primitives.blockcpy_sp(width, height, dstU, dststride, srcU, srcstride);
-primitives.blockcpy_sp(width, height, dstV, dststride, srcV, srcstride);
+primitives.chroma_copy_ps[m_csp][part](dstU, dststride, srcU, srcstride);
+primitives.chroma_copy_ps[m_csp][part](dstV, dststride, srcV, srcstride);
 }
 
 void TComYuv::copyPartToPartChroma(TComYuv* dstPicYuv, uint32_t partIdx, 
uint32_t width, uint32_t height, uint32_t chromaId)
diff -r 59646d515e79 -r 49a556cf2272 source/Lib/TLibCommon/TComYuv.h
--- a/source/Lib/TLibCommon/TComYuv.h   Mon Nov 18 20:53:29 2013 +0530
+++ b/source/Lib/TLibCommon/TComYuv.h   Mon Nov 18 21:00:45 2013 +0530
@@ -140,9 +140,9 @@
 voidcopyPartToPartYuv(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t 
width, uint32_t height, bool bLuma = true, bool bChroma = true);
 voidcopyPartToPartYuv(TShortYUV* dstPicYuv, uint32_t partIdx, uint32_t 
width, uint32_t height, bool bLuma = true, bool bChroma = true);
 voidcopyPartToPartLuma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t 
part);
-voidcopyPartToPartLuma(TShortYUV* dstPicYuv, uint32_t partIdx, 
uint32_t width, uint32_t height);
+voidcopyPartToPartLuma(TShortYUV* dstPicYuv, uint32_t partIdx, 
uint32_t part);
 voidcopyPartToPartChroma(TComYuv* dstPicYuv, uint32_t partIdx, 
uint32_t part);
-voidcopyPartToPartChroma(TShortYUV* dstPicYuv, uint32_t partIdx, 
uint32_t width, uint32_t height);
+voidcopyPartToPartChroma(TShortYUV* dstPicYuv, uint32_t partIdx, 
uint32_t part);
 
 voidcopyPartToPartChroma(TComYuv* dstPicYuv, uint32_t partIdx, 
uint32_t width, uint32_t height, uint32_t chromaId);
 voidcopyPartToPartChroma(TShortYUV* dstPicYuv, uint32_t partIdx, 
uint32_t width, uint32_t height, uint32_t chromaId);
diff -r 59646d515e79 -r 49a556cf2272 source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp Mon Nov 18 20:53:29 2013 +0530
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp Mon Nov 18 21:00:45 2013 +0530
@@ -1126,12 +1126,12 @@
 }
 
 //= copy reconstruction =
-m_qtTempTransformSkipTComYuv.copyPartToPartLuma(m_qtTempTComYuv[qtlayer], 
absPartIdx, 1  trSizeLog2, 1  trSizeLog2);
+int part = partitionFromSizes(1  trSizeLog2, 1  trSizeLog2);
+m_qtTempTransformSkipTComYuv.copyPartToPartLuma(m_qtTempTComYuv[qtlayer], 
absPartIdx, part);
 
 if (!bLumaOnly  !bSkipChroma)
 {
-uint32_t trSizeCLog2 = (bChromaSame ? trSizeLog2 : trSizeLog2 - 1);
-
m_qtTempTransformSkipTComYuv.copyPartToPartChroma(m_qtTempTComYuv[qtlayer], 
absPartIdx, 1  trSizeCLog2, 1  trSizeCLog2);
+
m_qtTempTransformSkipTComYuv.copyPartToPartChroma(m_qtTempTComYuv[qtlayer], 
absPartIdx, part);
 }
 
 uint32_t   zOrder   = cu-getZorderIdxInCU() + 

Re: [x265] [PATCH] b-pyramid implementation: Allow the use of B-frames as references for non B and B frames

2013-11-18 Thread Steve Borho

On Nov 18, 2013, at 4:10 AM, Gopu Govindaswamy g...@multicorewareinc.com 
wrote:

 # HG changeset patch
 # User Gopu Govindaswamy g...@multicorewareinc.com
 # Date 1384769433 -19800
 # Node ID 1e22b93638072ed805478d7af17f90e285fb4969
 # Parent  2321ebe0bf64e5f3c0034076c7edb3ecbcd48039
 b-pyramid implementation: Allow the use of B-frames as references for non B 
 and B frames
 
 when we enable the b-pyramid the bitrates efficienctly reduced and there is 
 not much diff in the performance
 and the PSNR 00. increased some of the clips and decreased some of clips
 
 Test results for reference when enable and disable the b-pyramid:
 cli option : -b 10 --hash=1 -f 100 --b-pyramid=1 --ref=1 --b-adapt=2
 Enable B-reference  : --b-pyramid=1
 Disable B-reference : --b-pyramid=0
 
 Results:
 Enable / Disable
 
 clip - FourPeople_1280x720_60.yuv
 Total time taken - 9.70s (10.31 fps) / 9.93s (10.07 fps)
 Bitrates - 516.30 kb/s / 544.68 kb/s
 PSNR - 39.725 / 39.701
 
 clip - BasketballDrive_1920x1080_50.y4m
 Total time taken - 39.06s (2.51 fps) / 38.98s (2.57 fps)
 Bitrates -  4166.92 kb/s / 4370.43 kb/s
 PSNR -  37.261 / 37.268
 
 clip - Johnny_1280x720_60.y4m
 Total time taken - 8.88s (11.27 fps) / 11.08s (9.03 fps)
 Bitrates - 304.29 kb/s / 328.84 kb/s
 PSNR - 40.605 / 40.551
 
 Total time taken - 30.97s (3.23 fps) / 33.65s (2.97 fps)
 Bitrates - 3496.84 kb/s / 3683.93 kb/s
 PSNR - 35.645 / 35.660
 
 diff -r 2321ebe0bf64 -r 1e22b9363807 source/common/common.cpp
 --- a/source/common/common.cppMon Nov 18 11:32:06 2013 +0530
 +++ b/source/common/common.cppMon Nov 18 15:40:33 2013 +0530
 @@ -54,6 +54,7 @@
 
 static int parseCspName(const char *arg, int error);
 static int parseName(const char *arg, const char * const * names, int error);
 +static int parse_enum(const char *, const char * const * names, int *dst);
 
 using namespace x265;
 
 @@ -165,6 +166,7 @@
 param-bframes = 3;
 param-lookaheadDepth = 40;
 param-bFrameAdaptive = X265_B_ADAPT_FAST;
 +param-bpyramid = 0;
 param-scenecutThreshold = 40; /* Magic number pulled in from x264*/
 
 /* Intra Coding Tools */
 @@ -532,7 +534,7 @@
 }
 
 CHECK(param-bEnableWavefront  0, WaveFrontSynchro cannot be negative);
 -
 +CHECK(param-bpyramid = 2, b-pyramid is 0 or 1);
 return check_failed;
 }
 
 @@ -620,6 +622,7 @@
 x265_log(param, X265_LOG_INFO, RDpenalty: %d\n, 
 param-rdPenalty);
 }
 x265_log(param, X265_LOG_INFO, Lookahead / bframes / badapt : %d / %d / 
 %d\n, param-lookaheadDepth, param-bframes, param-bFrameAdaptive);
 +x265_log(param, X265_LOG_INFO, b-pyramid / weightp / ref: %d / %d / 
 %d\n, param-bpyramid, param-bEnableWeightedPred, param-maxNumReferences);
 x265_log(param, X265_LOG_INFO, tools: );
 #define TOOLOPT(FLAG, STR) if (FLAG) fprintf(stderr, %s , STR)
 TOOLOPT(param-bEnableRectInter, rect);
 @@ -628,7 +631,6 @@
 TOOLOPT(param-bEnableConstrainedIntra, cip);
 TOOLOPT(param-bEnableEarlySkip, esd);
 fprintf(stderr, rd=%d , param-rdLevel);
 -fprintf(stderr, ref=%d , param-maxNumReferences);
 
 TOOLOPT(param-bEnableLoopFilter, lft);
 if (param-bEnableSAO)
 @@ -650,7 +652,6 @@
 else
 fprintf(stderr, tskip );
 }
 -TOOLOPT(param-bEnableWeightedPred, weightp);
 TOOLOPT(param-bEnableWeightedBiPred, weightbp);
 TOOLOPT(param-rc.aqMode, aq);
 fprintf(stderr, \n);
 @@ -747,6 +748,15 @@
 }
 OPT(input-csp) p-sourceCsp = ::parseCspName(value, berror);
 OPT(me)p-searchMethod = ::parseName(value, 
 x265_motion_est_names, berror);
 +OPT(b-pyramid)
 +{
 +berror |= parse_enum(value, x265_b_pyramid_names, p-bpyramid);

don't add a new function for this, use b-pyramid = ::parseName(value, 
x265_b_pyramid_names);

that helper function already does the atoi() fallback check

 +if (berror)
 +{
 +berror = 0;
 +p-bpyramid = atoi(value);
 +}
 +}
 else
 return X265_PARAM_BAD_NAME;
 #undef OPT
 @@ -802,6 +812,7 @@
 BOOL(p-bEnableSAO, sao);
 s += sprintf(s,  sao-lcu-bounds=%d, p-saoLcuBoundary);
 s += sprintf(s,  sao-lcu-opt=%d, p-saoLcuBasedOptimization);
 +s += sprintf(s,  b-pyramid=%d, p-bpyramid);
 #undef BOOL
 
 return buf;
 @@ -843,3 +854,13 @@
 error = 1;
 return a;
 }
 +static int parse_enum(const char *arg, const char * const * names, int *dst)
 +{
 +for (int i = 0; names[i]; i++)
 +if (!strcmp(arg, names[i]))
 +{
 +*dst = i;
 +return 0;
 +}
 +return -1;
 +}
 diff -r 2321ebe0bf64 -r 1e22b9363807 source/common/common.h
 --- a/source/common/common.h  Mon Nov 18 11:32:06 2013 +0530
 +++ b/source/common/common.h  Mon Nov 18 15:40:33 2013 +0530
 @@ -107,6 +107,7 @@
 #define X265_LOG2(x)  log2(x)
 #endif
 
 +static const char * const x265_b_pyramid_names[] = {none, normal, 0};
 /* defined in common.cpp */
 int64_t 

[x265] [PATCH] TComYuv::copyPartToPartChroma, blockcopy_pp asm integration

2013-11-18 Thread praveen
# HG changeset patch
# User Praveen Tiwari
# Date 1384790206 -19800
# Node ID a5f618af8d963efafaa8581f4484066b13f4f614
# Parent  49a556cf22721d846a94e07c1933fcd092b898dd
TComYuv::copyPartToPartChroma, blockcopy_pp asm integration

diff -r 49a556cf2272 -r a5f618af8d96 source/Lib/TLibCommon/TComYuv.cpp
--- a/source/Lib/TLibCommon/TComYuv.cpp Mon Nov 18 21:00:45 2013 +0530
+++ b/source/Lib/TLibCommon/TComYuv.cpp Mon Nov 18 21:26:46 2013 +0530
@@ -327,7 +327,8 @@
 if (srcU == dstU) return;
 uint32_t srcstride = getCStride();
 uint32_t dststride = dstPicYuv-getCStride();
-primitives.blockcpy_pp(width, height, dstU, dststride, srcU, 
srcstride);
+int part = partitionFromSizes(width  1, height  1);
+primitives.chroma_copy_pp[m_csp][part](dstU, dststride, srcU, 
srcstride);
 }
 else if (chromaId == 1)
 {
@@ -336,7 +337,8 @@
 if (srcV == dstV) return;
 uint32_t srcstride = getCStride();
 uint32_t dststride = dstPicYuv-getCStride();
-primitives.blockcpy_pp(width, height, dstV, dststride, srcV, 
srcstride);
+int part = partitionFromSizes(width  1, height  1);
+primitives.chroma_copy_pp[m_csp][part](dstV, dststride, srcV, 
srcstride);
 }
 else
 {
@@ -347,8 +349,9 @@
 if (srcU == dstU  srcV == dstV) return;
 uint32_t srcstride = getCStride();
 uint32_t dststride = dstPicYuv-getCStride();
-primitives.blockcpy_pp(width, height, dstU, dststride, srcU, 
srcstride);
-primitives.blockcpy_pp(width, height, dstV, dststride, srcV, 
srcstride);
+int part = partitionFromSizes(width  1, height  1);
+primitives.chroma_copy_pp[m_csp][part](dstU, dststride, srcU, 
srcstride);
+primitives.chroma_copy_pp[m_csp][part](dstV, dststride, srcV, 
srcstride);
 }
 }
 
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] [PATCH] asm integration for blockcopy_ps

2013-11-18 Thread praveen
# HG changeset patch
# User Praveen Tiwari
# Date 1384791507 -19800
# Node ID 4c5daf21c1583cae93dbdf404a1b68aeced6b690
# Parent  a5f618af8d963efafaa8581f4484066b13f4f614
asm integration for blockcopy_ps

diff -r a5f618af8d96 -r 4c5daf21c158 source/Lib/TLibCommon/TComYuv.cpp
--- a/source/Lib/TLibCommon/TComYuv.cpp Mon Nov 18 21:26:46 2013 +0530
+++ b/source/Lib/TLibCommon/TComYuv.cpp Mon Nov 18 21:48:27 2013 +0530
@@ -365,7 +365,8 @@
 uint32_t srcstride = getCStride();
 uint32_t dststride = dstPicYuv-m_cwidth;
 
-primitives.blockcpy_sp(width, height, dstU, dststride, srcU, 
srcstride);
+int part = partitionFromSizes(width  1, height  1);
+primitives.chroma_copy_ps[m_csp][part](dstU, dststride, srcU, 
srcstride);
 }
 else if (chromaId == 1)
 {
@@ -375,7 +376,8 @@
 uint32_t srcstride = getCStride();
 uint32_t dststride = dstPicYuv-m_cwidth;
 
-primitives.blockcpy_sp(width, height, dstV, dststride, srcV, 
srcstride);
+int part = partitionFromSizes(width  1, height  1);
+primitives.chroma_copy_ps[m_csp][part](dstV, dststride, srcV, 
srcstride);
 }
 else
 {
@@ -387,8 +389,9 @@
 uint32_t srcstride = getCStride();
 uint32_t dststride = dstPicYuv-m_cwidth;
 
-primitives.blockcpy_sp(width, height, dstU, dststride, srcU, 
srcstride);
-primitives.blockcpy_sp(width, height, dstV, dststride, srcV, 
srcstride);
+int part = partitionFromSizes(width  1, height  1);
+primitives.chroma_copy_ps[m_csp][part](dstU, dststride, srcU, 
srcstride);
+primitives.chroma_copy_ps[m_csp][part](dstV, dststride, srcV, 
srcstride);
 }
 }
 
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] [PATCH] cli: add aq-strength to cli input options, add validations for aq mode

2013-11-18 Thread aarthi
# HG changeset patch
# User Aarthi Thirumalaiaar...@multicorewareinc.com
# Date 1384792447 -19800
#  Mon Nov 18 22:04:07 2013 +0530
# Node ID 8b9afa5556b315391df143e5fb6e8f3eedd17bc5
# Parent  2321ebe0bf64e5f3c0034076c7edb3ecbcd48039
cli: add aq-strength to cli input options, add validations for aq mode

diff -r 2321ebe0bf64 -r 8b9afa5556b3 source/common/common.cpp
--- a/source/common/common.cpp  Mon Nov 18 11:32:06 2013 +0530
+++ b/source/common/common.cpp  Mon Nov 18 22:04:07 2013 +0530
@@ -519,8 +519,10 @@
   max consecutive bframe count must be 16 or smaller);
 CHECK(param-lookaheadDepth  X265_LOOKAHEAD_MAX,
   Lookahead depth must be less than 256);
-CHECK(param-rc.aqModeX265_AQ_NONE || param-rc.aqMode X265_AQ_VARIANCE,
+CHECK(param-rc.aqMode  X265_AQ_NONE || param-rc.aqMode  
X265_AQ_VARIANCE,
   Aq-Mode is out of range);
+CHECK(param-rc.aqStrength  0 || param-rc.aqStrength  3,
+  Aq-Strength is out of range);
 
 // max CU size should be power of 2
 uint32_t i = param-maxCUSize;
@@ -532,6 +534,16 @@
 }
 
 CHECK(param-bEnableWavefront  0, WaveFrontSynchro cannot be negative);
+if (param-rc.rateControlMode == X265_RC_CQP)
+{
+param-rc.aqMode = X265_AQ_NONE;
+param-rc.bitrate = 0;
+}
+if (param-rc.aqStrength == 0)
+{
+x265_log(param, X265_LOG_WARNING, Aq mode specified, but Aq strength 
is  0, ignored\n );
+param-rc.aqMode = 0;
+}
 
 return check_failed;
 }
@@ -652,7 +664,8 @@
 }
 TOOLOPT(param-bEnableWeightedPred, weightp);
 TOOLOPT(param-bEnableWeightedBiPred, weightbp);
-TOOLOPT(param-rc.aqMode, aq);
+TOOLOPT(param-rc.aqMode, aq-mode);
+fprintf(stderr, aq-strength=%.2f , param-rc.aqStrength);
 fprintf(stderr, \n);
 fflush(stderr);
 }
@@ -729,6 +742,7 @@
 OPT(psnr) p-bEnablePsnr = bvalue;
 OPT(hash) p-decodedPictureHashSEI = atoi(value);
 OPT(aq-mode) p-rc.aqMode = atoi(value);
+OPT(aq-strength) p-rc.aqStrength = atof(value);
 OPT(crf)
 {
 p-rc.rfConstant = atof(value);
@@ -794,6 +808,8 @@
 BOOL(p-bEnableWeightedPred, weightp);
 s += sprintf(s,  bitrate=%d, p-rc.bitrate);
 s += sprintf(s,  qp=%d, p-rc.qp);
+s += sprintf(s,  aq-mode=%d, p-rc.aqMode);
+s += sprintf(s,  aq-strength=%.2f, p-rc.aqStrength);
 s += sprintf(s,  cbqpoffs=%d, p-cbQpOffset);
 s += sprintf(s,  crqpoffs=%d, p-crQpOffset);
 s += sprintf(s,  rd=%d, p-rdLevel);
diff -r 2321ebe0bf64 -r 8b9afa5556b3 source/x265.cpp
--- a/source/x265.cpp   Mon Nov 18 11:32:06 2013 +0530
+++ b/source/x265.cpp   Mon Nov 18 22:04:07 2013 +0530
@@ -123,6 +123,7 @@
 { bitrate,required_argument, NULL, 0 },
 { qp, required_argument, NULL, 'q' },
 { aq-mode,required_argument, NULL, 0 },
+{ aq-strength,required_argument, NULL, 0 },
 { cbqpoffs,   required_argument, NULL, 0 },
 { crqpoffs,   required_argument, NULL, 0 },
 { rd, required_argument, NULL, 0 },
@@ -310,6 +311,7 @@
 H0(   --crf Quality-based VBR (0-51). Default 
%f\n, param-rc.rfConstant);
 H0(-q/--qp  Base QP for CQP mode. Default %d\n, 
param-rc.qp);
 H0(   --aq-mode Mode for Adaptive Quantization - 
0:none 1:aqVariance Default %d\n, param-rc.aqMode);
+H0(   --aq-strength Reduces blocking and blurring in flat 
and textured areas.(0 to 3.0)double . Default %f\n, param-rc.aqStrength);
 H0(   --cbqpoffsChroma Cb QP Offset. Default %d\n, 
param-cbQpOffset);
 H0(   --crqpoffsChroma Cr QP Offset. Default %d\n, 
param-crQpOffset);
 H0(   --rd  Level of RD in mode decision 
0:least2:full RDO. Default %d\n, param-rdLevel);
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH] added csp support for blpckcopy_ps

2013-11-18 Thread Steve Borho

On Nov 18, 2013, at 9:23 AM, prav...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Praveen Tiwari
 # Date 1384788209 -19800
 # Node ID 59646d515e79b4d0f9a3a72c77c7af17a83bf3d9
 # Parent  b353d170c54f0e33a8869c413be226a48deb1f5c
 added csp support for blpckcopy_ps
 
 diff -r b353d170c54f -r 59646d515e79 source/common/pixel.cpp
 --- a/source/common/pixel.cpp Mon Nov 18 19:34:07 2013 +0530
 +++ b/source/common/pixel.cpp Mon Nov 18 20:53:29 2013 +0530
 @@ -837,7 +837,7 @@
 #define CHROMA(W, H) \
 p.chroma_copy_pp[CSP_I420][CHROMA_ ## W ## x ## H] = blockcopy_pp_cW, H; \
 p.chroma_copy_sp[CHROMA_ ## W ## x ## H] = blockcopy_sp_cW, H; \
 -p.chroma_copy_ps[CHROMA_ ## W ## x ## H] = blockcopy_ps_cW, H;\
 +p.chroma_copy_ps[CSP_I420][CHROMA_ ## W ## x ## H] = blockcopy_ps_cW, 
 H;\
 p.chroma_sub_ps[CHROMA_ ## W ## x ## H] = pixel_sub_ps_cW, H;
 
 #define LUMA(W, H) \
 diff -r b353d170c54f -r 59646d515e79 source/common/primitives.h
 --- a/source/common/primitives.h  Mon Nov 18 19:34:07 2013 +0530
 +++ b/source/common/primitives.h  Mon Nov 18 20:53:29 2013 +0530
 @@ -247,7 +247,7 @@
 copy_sp_t   luma_copy_sp[NUM_LUMA_PARTITIONS];
 copy_sp_t   chroma_copy_sp[NUM_CHROMA_PARTITIONS];
 copy_ps_t   luma_copy_ps[NUM_LUMA_PARTITIONS];
 -copy_ps_t   chroma_copy_ps[NUM_CHROMA_PARTITIONS];
 +copy_ps_t   chroma_copy_ps[NUM_CSP][NUM_CHROMA_PARTITIONS];
 
 pixel_sub_ps_t  luma_sub_ps[NUM_LUMA_PARTITIONS];
 pixel_sub_ps_t  chroma_sub_ps[NUM_CHROMA_PARTITIONS];
 diff -r b353d170c54f -r 59646d515e79 source/common/x86/asm-primitives.cpp
 --- a/source/common/x86/asm-primitives.cppMon Nov 18 19:34:07 2013 +0530
 +++ b/source/common/x86/asm-primitives.cppMon Nov 18 20:53:29 2013 +0530
 @@ -141,7 +141,6 @@
 p.chroma_hps[CHROMA_ ## W ## x ## H] = x265_interp_4tap_horiz_ps_ ## W ## x 
 ## H ## cpu; \
 p.chroma_vpp[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_pp_ ## W ## x ## 
 H ## cpu; \
 p.chroma_vps[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_ps_ ## W ## x ## 
 H ## cpu; \
 -p.chroma_copy_ps[CHROMA_ ## W ## x ## H] = x265_blockcopy_ps_ ## W ## x 
 ## H ## cpu; \
 p.chroma_sub_ps[CHROMA_ ## W ## x ## H] = x265_pixel_sub_ps_ ## W ## x ## H 
 ## cpu;
 
 #define SETUP_CHROMA_SP_FUNC_DEF(W, H, cpu) \
 @@ -380,6 +379,36 @@
 SETUP_LUMA_BLOCKCOPY_FUNC_DEF(64, 16, cpu); \
 SETUP_LUMA_BLOCKCOPY_FUNC_DEF(16, 64, cpu);
 
 +#define SETUP_CHROMA_FROM_LUMA_SSE4(W1, H1, W2, H2, cpu) \
 +p.chroma_copy_ps[X265_CSP_I420][LUMA_ ## W1 ## x ## H1] = 
 x265_blockcopy_ps_ ## W2 ## x ## H2 ## cpu;
 +
 +// For X265_CSP_I420 chroma width and height will be half of luma width and 
 height
 +#define CHROMA_BLOCKCOPY_SSE4(cpu) \

When the macro accepts a cpu type argument, adding SSE4 to the name is 
redundant (and confusing)
there should probably be a generic I420 macro that maps luma blocks to I420 
blocks so adding more color spaces does not multiply amount of code in this file

 +SETUP_CHROMA_FROM_LUMA_SSE4(8,   8, 4,  4,  cpu); \
 +SETUP_CHROMA_FROM_LUMA_SSE4(8,   4, 4,  2,  cpu); \
 +SETUP_CHROMA_FROM_LUMA_SSE4(4,   8, 2,  4,  cpu); \
 +SETUP_CHROMA_FROM_LUMA_SSE4(16, 16, 8,  8,  cpu); \
 +SETUP_CHROMA_FROM_LUMA_SSE4(16,  8, 8,  4,  cpu); \
 +SETUP_CHROMA_FROM_LUMA_SSE4(8,  16, 4,  8,  cpu); \
 +SETUP_CHROMA_FROM_LUMA_SSE4(16, 12, 8,  6,  cpu); \
 +SETUP_CHROMA_FROM_LUMA_SSE4(12, 16, 6,  8,  cpu); \
 +SETUP_CHROMA_FROM_LUMA_SSE4(16,  4, 8,  2,  cpu); \
 +SETUP_CHROMA_FROM_LUMA_SSE4(4,  16, 2,  8,  cpu); \
 +SETUP_CHROMA_FROM_LUMA_SSE4(32, 32, 16, 16, cpu); \
 +SETUP_CHROMA_FROM_LUMA_SSE4(32, 16, 16, 8,  cpu); \
 +SETUP_CHROMA_FROM_LUMA_SSE4(16, 32, 8,  16, cpu); \
 +SETUP_CHROMA_FROM_LUMA_SSE4(32, 24, 16, 12, cpu); \
 +SETUP_CHROMA_FROM_LUMA_SSE4(24, 32, 12, 16, cpu); \
 +SETUP_CHROMA_FROM_LUMA_SSE4(32,  8, 16, 4,  cpu); \
 +SETUP_CHROMA_FROM_LUMA_SSE4(8,  32, 4,  16, cpu); \
 +SETUP_CHROMA_FROM_LUMA_SSE4(64, 64, 32, 32, cpu); \
 +SETUP_CHROMA_FROM_LUMA_SSE4(64, 32, 32, 16, cpu); \
 +SETUP_CHROMA_FROM_LUMA_SSE4(32, 64, 16, 32, cpu); \
 +SETUP_CHROMA_FROM_LUMA_SSE4(64, 48, 32, 24, cpu); \
 +SETUP_CHROMA_FROM_LUMA_SSE4(48, 64, 24, 32, cpu); \
 +SETUP_CHROMA_FROM_LUMA_SSE4(64, 16, 32, 8,  cpu); \
 +SETUP_CHROMA_FROM_LUMA_SSE4(16, 64, 8,  32, cpu);
 +
 using namespace x265;
 
 namespace {
 @@ -591,6 +620,7 @@
 CHROMA_FILTERS(_sse4);
 LUMA_FILTERS(_sse4);
 HEVC_SATD(sse4);
 +CHROMA_BLOCKCOPY_SSE4(_sse4);
 p.chroma_copy_sp[CHROMA_2x4] = x265_blockcopy_sp_2x4_sse4;
 p.chroma_copy_sp[CHROMA_2x8] = x265_blockcopy_sp_2x8_sse4;
 p.chroma_copy_sp[CHROMA_6x8] = x265_blockcopy_sp_6x8_sse4;
 diff -r b353d170c54f -r 59646d515e79 source/test/pixelharness.cpp
 --- a/source/test/pixelharness.cppMon Nov 18 19:34:07 2013 +0530
 +++ b/source/test/pixelharness.cppMon Nov 18 20:53:29 2013 +0530
 @@ -763,12 +763,15 @@
 }
 }
 
 -if (opt.chroma_copy_ps[part])
 +

Re: [x265] [PATCH] added csp support for blpckcopy_ps

2013-11-18 Thread Steve Borho

On Nov 18, 2013, at 9:23 AM, prav...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Praveen Tiwari
 # Date 1384788209 -19800
 # Node ID 59646d515e79b4d0f9a3a72c77c7af17a83bf3d9
 # Parent  b353d170c54f0e33a8869c413be226a48deb1f5c
 added csp support for blpckcopy_ps
 
 diff -r b353d170c54f -r 59646d515e79 source/common/pixel.cpp
 --- a/source/common/pixel.cpp Mon Nov 18 19:34:07 2013 +0530
 +++ b/source/common/pixel.cpp Mon Nov 18 20:53:29 2013 +0530
 @@ -837,7 +837,7 @@
 #define CHROMA(W, H) \
  p.chroma_copy_pp[CSP_I420][CHROMA_ ## W ## x ## H] = blockcopy_pp_cW, H; \
  p.chroma_copy_sp[CHROMA_ ## W ## x ## H] = blockcopy_sp_cW, H; \
 -p.chroma_copy_ps[CHROMA_ ## W ## x ## H] = blockcopy_ps_cW, H;\
 +p.chroma_copy_ps[CSP_I420][CHROMA_ ## W ## x ## H] = blockcopy_ps_cW, 
 H;\
  p.chroma_sub_ps[CHROMA_ ## W ## x ## H] = pixel_sub_ps_cW, H;
 
 #define LUMA(W, H) \
 diff -r b353d170c54f -r 59646d515e79 source/common/primitives.h
 --- a/source/common/primitives.h  Mon Nov 18 19:34:07 2013 +0530
 +++ b/source/common/primitives.h  Mon Nov 18 20:53:29 2013 +0530
 @@ -247,7 +247,7 @@
  copy_sp_t   luma_copy_sp[NUM_LUMA_PARTITIONS];
  copy_sp_t   chroma_copy_sp[NUM_CHROMA_PARTITIONS];
  copy_ps_t   luma_copy_ps[NUM_LUMA_PARTITIONS];
 -copy_ps_t   chroma_copy_ps[NUM_CHROMA_PARTITIONS];
 +copy_ps_t   chroma_copy_ps[NUM_CSP][NUM_CHROMA_PARTITIONS];
 
  pixel_sub_ps_t  luma_sub_ps[NUM_LUMA_PARTITIONS];
  pixel_sub_ps_t  chroma_sub_ps[NUM_CHROMA_PARTITIONS];
 diff -r b353d170c54f -r 59646d515e79 source/common/x86/asm-primitives.cpp
 --- a/source/common/x86/asm-primitives.cppMon Nov 18 19:34:07 2013 +0530
 +++ b/source/common/x86/asm-primitives.cppMon Nov 18 20:53:29 2013 +0530
 @@ -141,7 +141,6 @@
  p.chroma_hps[CHROMA_ ## W ## x ## H] = x265_interp_4tap_horiz_ps_ ## W ## x 
 ## H ## cpu; \
  p.chroma_vpp[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_pp_ ## W ## x 
 ## H ## cpu; \
  p.chroma_vps[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_ps_ ## W ## x 
 ## H ## cpu; \
 -p.chroma_copy_ps[CHROMA_ ## W ## x ## H] = x265_blockcopy_ps_ ## W ## x 
 ## H ## cpu; \
  p.chroma_sub_ps[CHROMA_ ## W ## x ## H] = x265_pixel_sub_ps_ ## W ## x ## H 
 ## cpu;
 
 #define SETUP_CHROMA_SP_FUNC_DEF(W, H, cpu) \
 @@ -380,6 +379,36 @@
  SETUP_LUMA_BLOCKCOPY_FUNC_DEF(64, 16, cpu); \
  SETUP_LUMA_BLOCKCOPY_FUNC_DEF(16, 64, cpu);
 
 +#define SETUP_CHROMA_FROM_LUMA_SSE4(W1, H1, W2, H2, cpu) \
 +p.chroma_copy_ps[X265_CSP_I420][LUMA_ ## W1 ## x ## H1] = 
 x265_blockcopy_ps_ ## W2 ## x ## H2 ## cpu;
 +
 +// For X265_CSP_I420 chroma width and height will be half of luma width and 
 height
 +#define CHROMA_BLOCKCOPY_SSE4(cpu) \

When the macro accepts a cpu type argument, adding SSE4 to the name is 
redundant (and confusing)
there should probably be a generic I420 macro that maps luma blocks to I420 
blocks so adding more color spaces does not multiply amount of code in this file

 +SETUP_CHROMA_FROM_LUMA_SSE4(8,   8, 4,  4,  cpu); \
 +SETUP_CHROMA_FROM_LUMA_SSE4(8,   4, 4,  2,  cpu); \
 +SETUP_CHROMA_FROM_LUMA_SSE4(4,   8, 2,  4,  cpu); \
 +SETUP_CHROMA_FROM_LUMA_SSE4(16, 16, 8,  8,  cpu); \
 +SETUP_CHROMA_FROM_LUMA_SSE4(16,  8, 8,  4,  cpu); \
 +SETUP_CHROMA_FROM_LUMA_SSE4(8,  16, 4,  8,  cpu); \
 +SETUP_CHROMA_FROM_LUMA_SSE4(16, 12, 8,  6,  cpu); \
 +SETUP_CHROMA_FROM_LUMA_SSE4(12, 16, 6,  8,  cpu); \
 +SETUP_CHROMA_FROM_LUMA_SSE4(16,  4, 8,  2,  cpu); \
 +SETUP_CHROMA_FROM_LUMA_SSE4(4,  16, 2,  8,  cpu); \
 +SETUP_CHROMA_FROM_LUMA_SSE4(32, 32, 16, 16, cpu); \
 +SETUP_CHROMA_FROM_LUMA_SSE4(32, 16, 16, 8,  cpu); \
 +SETUP_CHROMA_FROM_LUMA_SSE4(16, 32, 8,  16, cpu); \
 +SETUP_CHROMA_FROM_LUMA_SSE4(32, 24, 16, 12, cpu); \
 +SETUP_CHROMA_FROM_LUMA_SSE4(24, 32, 12, 16, cpu); \
 +SETUP_CHROMA_FROM_LUMA_SSE4(32,  8, 16, 4,  cpu); \
 +SETUP_CHROMA_FROM_LUMA_SSE4(8,  32, 4,  16, cpu); \
 +SETUP_CHROMA_FROM_LUMA_SSE4(64, 64, 32, 32, cpu); \
 +SETUP_CHROMA_FROM_LUMA_SSE4(64, 32, 32, 16, cpu); \
 +SETUP_CHROMA_FROM_LUMA_SSE4(32, 64, 16, 32, cpu); \
 +SETUP_CHROMA_FROM_LUMA_SSE4(64, 48, 32, 24, cpu); \
 +SETUP_CHROMA_FROM_LUMA_SSE4(48, 64, 24, 32, cpu); \
 +SETUP_CHROMA_FROM_LUMA_SSE4(64, 16, 32, 8,  cpu); \
 +SETUP_CHROMA_FROM_LUMA_SSE4(16, 64, 8,  32, cpu);
 +
 using namespace x265;
 
 namespace {
 @@ -591,6 +620,7 @@
  CHROMA_FILTERS(_sse4);
  LUMA_FILTERS(_sse4);
  HEVC_SATD(sse4);
 +CHROMA_BLOCKCOPY_SSE4(_sse4);
  p.chroma_copy_sp[CHROMA_2x4] = x265_blockcopy_sp_2x4_sse4;
  p.chroma_copy_sp[CHROMA_2x8] = x265_blockcopy_sp_2x8_sse4;
  p.chroma_copy_sp[CHROMA_6x8] = x265_blockcopy_sp_6x8_sse4;
 diff -r b353d170c54f -r 59646d515e79 source/test/pixelharness.cpp
 --- a/source/test/pixelharness.cppMon Nov 18 19:34:07 2013 +0530
 +++ b/source/test/pixelharness.cppMon Nov 18 20:53:29 2013 +0530
 @@ -763,12 +763,15 @@
  }
  }
 
 -if 

Re: [x265] [PATCH] TComYuv::copyPartToPartChroma, blockcopy_pp asm integration

2013-11-18 Thread Steve Borho

On Nov 18, 2013, at 9:56 AM, prav...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Praveen Tiwari
 # Date 1384790206 -19800
 # Node ID a5f618af8d963efafaa8581f4484066b13f4f614
 # Parent  49a556cf22721d846a94e07c1933fcd092b898dd
 TComYuv::copyPartToPartChroma, blockcopy_pp asm integration
 
 diff -r 49a556cf2272 -r a5f618af8d96 source/Lib/TLibCommon/TComYuv.cpp
 --- a/source/Lib/TLibCommon/TComYuv.cpp   Mon Nov 18 21:00:45 2013 +0530
 +++ b/source/Lib/TLibCommon/TComYuv.cpp   Mon Nov 18 21:26:46 2013 +0530
 @@ -327,7 +327,8 @@
 if (srcU == dstU) return;
 uint32_t srcstride = getCStride();
 uint32_t dststride = dstPicYuv-getCStride();
 -primitives.blockcpy_pp(width, height, dstU, dststride, srcU, 
 srcstride);
 +int part = partitionFromSizes(width  1, height  1);

you can't make those kinds of assumptions about relative chroma/luma size; can 
you not just use m_part?

 +primitives.chroma_copy_pp[m_csp][part](dstU, dststride, srcU, 
 srcstride);
 }
 else if (chromaId == 1)
 {
 @@ -336,7 +337,8 @@
 if (srcV == dstV) return;
 uint32_t srcstride = getCStride();
 uint32_t dststride = dstPicYuv-getCStride();
 -primitives.blockcpy_pp(width, height, dstV, dststride, srcV, 
 srcstride);
 +int part = partitionFromSizes(width  1, height  1);
 +primitives.chroma_copy_pp[m_csp][part](dstV, dststride, srcV, 
 srcstride);
 }
 else
 {
 @@ -347,8 +349,9 @@
 if (srcU == dstU  srcV == dstV) return;
 uint32_t srcstride = getCStride();
 uint32_t dststride = dstPicYuv-getCStride();
 -primitives.blockcpy_pp(width, height, dstU, dststride, srcU, 
 srcstride);
 -primitives.blockcpy_pp(width, height, dstV, dststride, srcV, 
 srcstride);
 +int part = partitionFromSizes(width  1, height  1);
 +primitives.chroma_copy_pp[m_csp][part](dstU, dststride, srcU, 
 srcstride);
 +primitives.chroma_copy_pp[m_csp][part](dstV, dststride, srcV, 
 srcstride);
 }
 }
 
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel



signature.asc
Description: Message signed with OpenPGP using GPGMail
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH] asm integration for blockcopy_ps

2013-11-18 Thread Steve Borho

On Nov 18, 2013, at 10:18 AM, prav...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Praveen Tiwari
 # Date 1384791507 -19800
 # Node ID 4c5daf21c1583cae93dbdf404a1b68aeced6b690
 # Parent  a5f618af8d963efafaa8581f4484066b13f4f614
 asm integration for blockcopy_ps
 
 diff -r a5f618af8d96 -r 4c5daf21c158 source/Lib/TLibCommon/TComYuv.cpp
 --- a/source/Lib/TLibCommon/TComYuv.cpp   Mon Nov 18 21:26:46 2013 +0530
 +++ b/source/Lib/TLibCommon/TComYuv.cpp   Mon Nov 18 21:48:27 2013 +0530
 @@ -365,7 +365,8 @@
 uint32_t srcstride = getCStride();
 uint32_t dststride = dstPicYuv-m_cwidth;
 
 -primitives.blockcpy_sp(width, height, dstU, dststride, srcU, 
 srcstride);
 +int part = partitionFromSizes(width  1, height  1);

same here, you can't assume luma partitions are twice widht/height of chroma

 +primitives.chroma_copy_ps[m_csp][part](dstU, dststride, srcU, 
 srcstride);
 }
 else if (chromaId == 1)
 {
 @@ -375,7 +376,8 @@
 uint32_t srcstride = getCStride();
 uint32_t dststride = dstPicYuv-m_cwidth;
 
 -primitives.blockcpy_sp(width, height, dstV, dststride, srcV, 
 srcstride);
 +int part = partitionFromSizes(width  1, height  1);
 +primitives.chroma_copy_ps[m_csp][part](dstV, dststride, srcV, 
 srcstride);
 }
 else
 {
 @@ -387,8 +389,9 @@
 uint32_t srcstride = getCStride();
 uint32_t dststride = dstPicYuv-m_cwidth;
 
 -primitives.blockcpy_sp(width, height, dstU, dststride, srcU, 
 srcstride);
 -primitives.blockcpy_sp(width, height, dstV, dststride, srcV, 
 srcstride);
 +int part = partitionFromSizes(width  1, height  1);
 +primitives.chroma_copy_ps[m_csp][part](dstU, dststride, srcU, 
 srcstride);
 +primitives.chroma_copy_ps[m_csp][part](dstV, dststride, srcV, 
 srcstride);
 }
 }
 
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel



signature.asc
Description: Message signed with OpenPGP using GPGMail
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH] added csp support for blpckcopy_ps

2013-11-18 Thread chen
At 2013-11-19 04:42:21,Steve Borho st...@borho.org wrote:

On Nov 18, 2013, at 9:23 AM, prav...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Praveen Tiwari
 # Date 1384788209 -19800
 # Node ID 59646d515e79b4d0f9a3a72c77c7af17a83bf3d9
 # Parent  b353d170c54f0e33a8869c413be226a48deb1f5c
 added csp support for blpckcopy_ps
 
 diff -r b353d170c54f -r 59646d515e79 source/common/pixel.cpp
 --- a/source/common/pixel.cpp Mon Nov 18 19:34:07 2013 +0530
 +++ b/source/common/pixel.cpp Mon Nov 18 20:53:29 2013 +0530
 @@ -837,7 +837,7 @@
 #define CHROMA(W, H) \
  p.chroma_copy_pp[CSP_I420][CHROMA_ ## W ## x ## H] = blockcopy_pp_cW, H; \
  p.chroma_copy_sp[CHROMA_ ## W ## x ## H] = blockcopy_sp_cW, H; \
 -p.chroma_copy_ps[CHROMA_ ## W ## x ## H] = blockcopy_ps_cW, H;\
 +p.chroma_copy_ps[CSP_I420][CHROMA_ ## W ## x ## H] = blockcopy_ps_cW, 
 H;\
  p.chroma_sub_ps[CHROMA_ ## W ## x ## H] = pixel_sub_ps_cW, H;
 
 #define LUMA(W, H) \
 diff -r b353d170c54f -r 59646d515e79 source/common/primitives.h
 --- a/source/common/primitives.h Mon Nov 18 19:34:07 2013 +0530
 +++ b/source/common/primitives.h Mon Nov 18 20:53:29 2013 +0530
 @@ -247,7 +247,7 @@
  copy_sp_t   luma_copy_sp[NUM_LUMA_PARTITIONS];
  copy_sp_t   chroma_copy_sp[NUM_CHROMA_PARTITIONS];
  copy_ps_t   luma_copy_ps[NUM_LUMA_PARTITIONS];
 -copy_ps_t   chroma_copy_ps[NUM_CHROMA_PARTITIONS];
 +copy_ps_t   chroma_copy_ps[NUM_CSP][NUM_CHROMA_PARTITIONS];
 
  pixel_sub_ps_t  luma_sub_ps[NUM_LUMA_PARTITIONS];
  pixel_sub_ps_t  chroma_sub_ps[NUM_CHROMA_PARTITIONS];
 diff -r b353d170c54f -r 59646d515e79 source/common/x86/asm-primitives.cpp
 --- a/source/common/x86/asm-primitives.cpp Mon Nov 18 19:34:07 2013 +0530
 +++ b/source/common/x86/asm-primitives.cpp Mon Nov 18 20:53:29 2013 +0530
 @@ -141,7 +141,6 @@
  p.chroma_hps[CHROMA_ ## W ## x ## H] = x265_interp_4tap_horiz_ps_ ## W ## x 
 ## H ## cpu; \
  p.chroma_vpp[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_pp_ ## W ## x 
 ## H ## cpu; \
  p.chroma_vps[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_ps_ ## W ## x 
 ## H ## cpu; \
 -p.chroma_copy_ps[CHROMA_ ## W ## x ## H] = x265_blockcopy_ps_ ## W ## x 
 ## H ## cpu; \
  p.chroma_sub_ps[CHROMA_ ## W ## x ## H] = x265_pixel_sub_ps_ ## W ## x ## H 
 ## cpu;
 
 #define SETUP_CHROMA_SP_FUNC_DEF(W, H, cpu) \
 @@ -380,6 +379,36 @@
  SETUP_LUMA_BLOCKCOPY_FUNC_DEF(64, 16, cpu); \
  SETUP_LUMA_BLOCKCOPY_FUNC_DEF(16, 64, cpu);
 
 +#define SETUP_CHROMA_FROM_LUMA_SSE4(W1, H1, W2, H2, cpu) \
 +p.chroma_copy_ps[X265_CSP_I420][LUMA_ ## W1 ## x ## H1] = 
 x265_blockcopy_ps_ ## W2 ## x ## H2 ## cpu;
 +
 +// For X265_CSP_I420 chroma width and height will be half of luma width and 
 height
 +#define CHROMA_BLOCKCOPY_SSE4(cpu) \

When the macro accepts a cpu type argument, adding SSE4 to the name is 
redundant (and confusing)
there should probably be a generic I420 macro that maps luma blocks to I420 
blocks so adding more color spaces does not multiply amount of code in this 
file

Steve give us a good idea, we may modify macro to SETUP_CHROMA_FROM_LUMA(W1, 
H1, W2, H2, cpu, csp)___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] [PATCH Review only] asm: code for transpose_8x8 routine

2013-11-18 Thread murugan
# HG changeset patch
# User Murugan Vairavel muru...@multicorewareinc.com
# Date 1384842189 -19800
#  Tue Nov 19 11:53:09 2013 +0530
# Node ID 3a94cc365533bf7def255dc5b28e6a6a1d1bfa50
# Parent  f6a050b79cfa400aa432f49ee8a4c2b9f20cf930
asm: code for transpose_8x8 routine

diff -r f6a050b79cfa -r 3a94cc365533 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp  Tue Nov 19 11:25:00 2013 +0530
+++ b/source/common/x86/asm-primitives.cpp  Tue Nov 19 11:53:09 2013 +0530
@@ -546,6 +546,7 @@
 p.calcresidual[BLOCK_4x4] = x265_getResidual4_sse2;
 p.calcresidual[BLOCK_8x8] = x265_getResidual8_sse2;
 p.transpose[BLOCK_4x4] = x265_transpose4_sse2;
+p.transpose[BLOCK_8x8] = x265_transpose8_sse2;
 }
 if (cpuMask  X265_CPU_SSSE3)
 {
diff -r f6a050b79cfa -r 3a94cc365533 source/common/x86/pixel-a.asm
--- a/source/common/x86/pixel-a.asm Tue Nov 19 11:25:00 2013 +0530
+++ b/source/common/x86/pixel-a.asm Tue Nov 19 11:53:09 2013 +0530
@@ -8359,3 +8359,45 @@
 movu [r0],m0
 
 RET
+
+;-
+; void transpose_8x8(pixel *dst, pixel *src, intptr_t stride)
+;-
+INIT_XMM sse2
+cglobal transpose8, 3, 3, 8, dest, src, stride
+
+movh m0,[r1]
+movh m1,[r1 + r2]
+movh m2,[r1 + 2 * r2]
+lea  r1,[r1 + 2 * r2]
+movh m3,[r1 + r2]
+movh m4,[r1 + 2 * r2]
+lea  r1,[r1 + 2 * r2]
+movh m5,[r1 + r2]
+movh m6,[r1 + 2 * r2]
+lea  r1,[r1 + 2 * r2]
+movh m7,[r1 + r2]
+
+punpcklbwm0,m1
+punpcklbwm2,m3
+punpcklbwm4,m5
+punpcklbwm6,m7
+movu m1,m0
+punpcklwdm0,m2
+punpckhwdm1,m2
+movu m5,m4
+punpcklwdm4,m6
+punpckhwdm5,m6
+movu m2,m0
+punpckldqm0,m4
+punpckhdqm2,m4
+movu m3,m1
+punpckldqm1,m5
+punpckhdqm3,m5
+
+movu [r0], m0
+movu [r0 + 16],m2
+movu [r0 + 32],m1
+movu [r0 + 48],m3
+
+RET
diff -r f6a050b79cfa -r 3a94cc365533 source/common/x86/pixel.h
--- a/source/common/x86/pixel.h Tue Nov 19 11:25:00 2013 +0530
+++ b/source/common/x86/pixel.h Tue Nov 19 11:53:09 2013 +0530
@@ -366,5 +366,6 @@
 void x265_getResidual16_sse4(pixel *fenc, pixel *pred, int16_t *residual, 
intptr_t stride);
 void x265_getResidual32_sse4(pixel *fenc, pixel *pred, int16_t *residual, 
intptr_t stride);
 void x265_transpose4_sse2(pixel *dest, pixel *src, intptr_t stride);
+void x265_transpose8_sse2(pixel *dest, pixel *src, intptr_t stride);
 
 #endif // ifndef X265_I386_PIXEL_H
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] [PATCH] b-pyramid implementation: Allow the use of B-frames as references for non B and B frames

2013-11-18 Thread Gopu Govindaswamy
# HG changeset patch
# User Gopu Govindaswamy g...@multicorewareinc.com
# Date 1384842731 -19800
# Node ID c386acea7ba1ca48f32060f265586618ee744a9f
# Parent  2f5f538d2cbca3b46e8d27d860e9787cc19f406f
b-pyramid implementation: Allow the use of B-frames as references for non B and 
B frames

when we enable the b-pyramid the bitrates efficienctly reduced and there is not 
much diff in the performance
and the PSNR 00. increased some of the clips and decreased some of clips

Test results for reference when enable and disable the b-pyramid:
cli option : -b 10 --hash=1 -f 100 --b-pyramid=1 --b-adapt=2
Enable B-reference  : --b-pyramid=1
Disable B-reference : --b-pyramid=0

Results:
Enable / Disable

Clips - Johnny_1280x720_60.y4m
Total time taken - 11.19s (8.94 fps) / 13.44s (7.44 fps)
Bitrates  - 303.52 kb/s / 326.79 kb/s
PSNR  - 40.679 / 40.612

Clips - Cactus_1920x1080_50.y4m
Total Time taken - 44.61s (2.24 fps) / 48.23s (2.07 fps)
Bitrates   - 3420.80 kb/s / 3575.20 kb/s
PSNR   - 35.709 / 35.726

Clips  - BasketballDrive_1920x1080_50.y4m
Total time taken -  54.15s (1.85 fps) / 53.72s (1.86 fps)
Bitrates  - 4114.07 kb/s / 4310.45 kb/s
PSNR  - 37.283 / 37.290

Clips  - FourPeople_1280x720_60
Total time taken -  11.79s (8.48 fps) / 12.16s (8.23 fps)
Bitrates  - 514.90 kb/s / 539.08 kb/s
PSNR  - 39.782 / 39.757

diff -r 2f5f538d2cbc -r c386acea7ba1 source/common/common.cpp
--- a/source/common/common.cpp  Mon Nov 18 16:44:31 2013 -0600
+++ b/source/common/common.cpp  Tue Nov 19 12:02:11 2013 +0530
@@ -165,6 +165,7 @@
 param-bframes = 3;
 param-lookaheadDepth = 40;
 param-bFrameAdaptive = X265_B_ADAPT_FAST;
+param-bpyramid = 0;
 param-scenecutThreshold = 40; /* Magic number pulled in from x264*/
 
 /* Intra Coding Tools */
@@ -634,6 +635,7 @@
 x265_log(param, X265_LOG_INFO, RDpenalty: %d\n, 
param-rdPenalty);
 }
 x265_log(param, X265_LOG_INFO, Lookahead / bframes / badapt : %d / %d / 
%d\n, param-lookaheadDepth, param-bframes, param-bFrameAdaptive);
+x265_log(param, X265_LOG_INFO, b-pyramid / weightp / ref: %d / %d / 
%d\n, param-bpyramid, param-bEnableWeightedPred, param-maxNumReferences);
 x265_log(param, X265_LOG_INFO, tools: );
 #define TOOLOPT(FLAG, STR) if (FLAG) fprintf(stderr, %s , STR)
 TOOLOPT(param-bEnableRectInter, rect);
@@ -642,7 +644,6 @@
 TOOLOPT(param-bEnableConstrainedIntra, cip);
 TOOLOPT(param-bEnableEarlySkip, esd);
 fprintf(stderr, rd=%d , param-rdLevel);
-fprintf(stderr, ref=%d , param-maxNumReferences);
 
 TOOLOPT(param-bEnableLoopFilter, lft);
 if (param-bEnableSAO)
@@ -664,7 +665,6 @@
 else
 fprintf(stderr, tskip );
 }
-TOOLOPT(param-bEnableWeightedPred, weightp);
 TOOLOPT(param-bEnableWeightedBiPred, weightbp);
 TOOLOPT(param-rc.aqMode, aq-mode);
 if (param-rc.aqMode)
@@ -764,6 +764,7 @@
 }
 OPT(input-csp) p-sourceCsp = ::parseCspName(value, berror);
 OPT(me)p-searchMethod = ::parseName(value, 
x265_motion_est_names, berror);
+OPT(b-pyramid) p-bpyramid = ::parseName(value, x265_b_pyramid_names, 
berror);
 else
 return X265_PARAM_BAD_NAME;
 #undef OPT
@@ -821,6 +822,7 @@
 BOOL(p-bEnableSAO, sao);
 s += sprintf(s,  sao-lcu-bounds=%d, p-saoLcuBoundary);
 s += sprintf(s,  sao-lcu-opt=%d, p-saoLcuBasedOptimization);
+s += sprintf(s,  b-pyramid=%d, p-bpyramid);
 #undef BOOL
 
 return buf;
diff -r 2f5f538d2cbc -r c386acea7ba1 source/common/common.h
--- a/source/common/common.hMon Nov 18 16:44:31 2013 -0600
+++ b/source/common/common.hTue Nov 19 12:02:11 2013 +0530
@@ -107,6 +107,7 @@
 #define X265_LOG2(x)  log2(x)
 #endif
 
+static const char * const x265_b_pyramid_names[] = {none, normal, 0};
 /* defined in common.cpp */
 int64_t x265_mdate(void);
 void x265_log(x265_param *param, int level, const char *fmt, ...);
diff -r 2f5f538d2cbc -r c386acea7ba1 source/encoder/dpb.cpp
--- a/source/encoder/dpb.cppMon Nov 18 16:44:31 2013 -0600
+++ b/source/encoder/dpb.cppTue Nov 19 12:02:11 2013 +0530
@@ -78,7 +78,17 @@
 m_lastIDR = pocCurr;
 }
 slice-setLastIDR(m_lastIDR);
-slice-setReferenced(slice-getSliceType() != B_SLICE);
+
+if (slice-getSliceType() != B_SLICE)
+slice-setReferenced(true);
+else
+{
+if (pic-m_lowres.sliceType == X265_TYPE_BREF)
+slice-setReferenced(true);
+else
+slice-setReferenced(false);
+}
+
 slice-setTemporalLayerNonReferenceFlag(!slice-isReferenced());
 // Set the nal unit type
 slice-setNalUnitType(getNalUnitType(pocCurr, m_lastIDR, pic));
diff -r 2f5f538d2cbc -r c386acea7ba1 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cppMon Nov 18 16:44:31 2013 -0600
+++ b/source/encoder/encoder.cppTue Nov 19 12:02:11 2013 +0530
@@ -1180,6 +1180,11 @@
 {
 _param-bEnableRDOQTS = 0;
 }
+if (_param-bpyramid  !_param-bframes)
+{

[x265] [PATCH 1 of 9] api: remove reserved NAL enums and C++ style comments from public header

2013-11-18 Thread Steve Borho
# HG changeset patch
# User Steve Borho st...@borho.org
# Date 1384824695 21600
#  Mon Nov 18 19:31:35 2013 -0600
# Node ID 85f3089367c11655b4e23c8a2dc6232a6d4934ed
# Parent  2f5f538d2cbca3b46e8d27d860e9787cc19f406f
api: remove reserved NAL enums and C++ style comments from public header

diff -r 2f5f538d2cbc -r 85f3089367c1 source/x265.h
--- a/source/x265.h Mon Nov 18 16:44:31 2013 -0600
+++ b/source/x265.h Mon Nov 18 19:31:35 2013 -0600
@@ -35,84 +35,34 @@
  *  opaque handler for encoder */
 typedef struct x265_encoder x265_encoder;
 
-// TODO: Existing names used for the different NAL unit types can be altered 
to better reflect the names in the spec.
-//   However, the names in the spec are not yet stable at this point. Once 
the names are stable, a cleanup
-//   effort can be done without use of macros to alter the names used to 
indicate the different NAL unit types.
 typedef enum
 {
-NAL_UNIT_CODED_SLICE_TRAIL_N = 0, // 0
-NAL_UNIT_CODED_SLICE_TRAIL_R,   // 1
-
-NAL_UNIT_CODED_SLICE_TSA_N, // 2
-NAL_UNIT_CODED_SLICE_TLA_R, // 3
-
-NAL_UNIT_CODED_SLICE_STSA_N,// 4
-NAL_UNIT_CODED_SLICE_STSA_R,// 5
-
-NAL_UNIT_CODED_SLICE_RADL_N,// 6
-NAL_UNIT_CODED_SLICE_RADL_R,// 7
-
-NAL_UNIT_CODED_SLICE_RASL_N,// 8
-NAL_UNIT_CODED_SLICE_RASL_R,// 9
-
-NAL_UNIT_RESERVED_VCL_N10,
-NAL_UNIT_RESERVED_VCL_R11,
-NAL_UNIT_RESERVED_VCL_N12,
-NAL_UNIT_RESERVED_VCL_R13,
-NAL_UNIT_RESERVED_VCL_N14,
-NAL_UNIT_RESERVED_VCL_R15,
-
-NAL_UNIT_CODED_SLICE_BLA_W_LP,  // 16
-NAL_UNIT_CODED_SLICE_BLA_W_RADL, // 17
-NAL_UNIT_CODED_SLICE_BLA_N_LP,  // 18
-NAL_UNIT_CODED_SLICE_IDR_W_RADL, // 19
-NAL_UNIT_CODED_SLICE_IDR_N_LP,  // 20
-NAL_UNIT_CODED_SLICE_CRA,   // 21
-NAL_UNIT_RESERVED_IRAP_VCL22,
-NAL_UNIT_RESERVED_IRAP_VCL23,
-
-NAL_UNIT_RESERVED_VCL24,
-NAL_UNIT_RESERVED_VCL25,
-NAL_UNIT_RESERVED_VCL26,
-NAL_UNIT_RESERVED_VCL27,
-NAL_UNIT_RESERVED_VCL28,
-NAL_UNIT_RESERVED_VCL29,
-NAL_UNIT_RESERVED_VCL30,
-NAL_UNIT_RESERVED_VCL31,
-
-NAL_UNIT_VPS,   // 32
-NAL_UNIT_SPS,   // 33
-NAL_UNIT_PPS,   // 34
-NAL_UNIT_ACCESS_UNIT_DELIMITER, // 35
-NAL_UNIT_EOS,   // 36
-NAL_UNIT_EOB,   // 37
-NAL_UNIT_FILLER_DATA,   // 38
-NAL_UNIT_PREFIX_SEI,// 39
-NAL_UNIT_SUFFIX_SEI,// 40
-NAL_UNIT_RESERVED_NVCL41,
-NAL_UNIT_RESERVED_NVCL42,
-NAL_UNIT_RESERVED_NVCL43,
-NAL_UNIT_RESERVED_NVCL44,
-NAL_UNIT_RESERVED_NVCL45,
-NAL_UNIT_RESERVED_NVCL46,
-NAL_UNIT_RESERVED_NVCL47,
-NAL_UNIT_UNSPECIFIED_48,
-NAL_UNIT_UNSPECIFIED_49,
-NAL_UNIT_UNSPECIFIED_50,
-NAL_UNIT_UNSPECIFIED_51,
-NAL_UNIT_UNSPECIFIED_52,
-NAL_UNIT_UNSPECIFIED_53,
-NAL_UNIT_UNSPECIFIED_54,
-NAL_UNIT_UNSPECIFIED_55,
-NAL_UNIT_UNSPECIFIED_56,
-NAL_UNIT_UNSPECIFIED_57,
-NAL_UNIT_UNSPECIFIED_58,
-NAL_UNIT_UNSPECIFIED_59,
-NAL_UNIT_UNSPECIFIED_60,
-NAL_UNIT_UNSPECIFIED_61,
-NAL_UNIT_UNSPECIFIED_62,
-NAL_UNIT_UNSPECIFIED_63,
-NAL_UNIT_INVALID,
+NAL_UNIT_CODED_SLICE_TRAIL_N = 0,
+NAL_UNIT_CODED_SLICE_TRAIL_R,
+NAL_UNIT_CODED_SLICE_TSA_N,
+NAL_UNIT_CODED_SLICE_TLA_R,
+NAL_UNIT_CODED_SLICE_STSA_N,
+NAL_UNIT_CODED_SLICE_STSA_R,
+NAL_UNIT_CODED_SLICE_RADL_N,
+NAL_UNIT_CODED_SLICE_RADL_R,
+NAL_UNIT_CODED_SLICE_RASL_N,
+NAL_UNIT_CODED_SLICE_RASL_R,
+NAL_UNIT_CODED_SLICE_BLA_W_LP = 16,
+NAL_UNIT_CODED_SLICE_BLA_W_RADL,
+NAL_UNIT_CODED_SLICE_BLA_N_LP,
+NAL_UNIT_CODED_SLICE_IDR_W_RADL,
+NAL_UNIT_CODED_SLICE_IDR_N_LP,
+NAL_UNIT_CODED_SLICE_CRA,
+NAL_UNIT_VPS = 32,
+NAL_UNIT_SPS,
+NAL_UNIT_PPS,
+NAL_UNIT_ACCESS_UNIT_DELIMITER,
+NAL_UNIT_EOS,
+NAL_UNIT_EOB,
+NAL_UNIT_FILLER_DATA,
+NAL_UNIT_PREFIX_SEI,
+NAL_UNIT_SUFFIX_SEI,
+NAL_UNIT_INVALID = 64,
 } NalUnitType;
 
 /* The data within the payload is already NAL-encapsulated; the type
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] [PATCH 4 of 9] api: make x265_encoder_get_stats() somewhat future proof

2013-11-18 Thread Steve Borho
# HG changeset patch
# User Steve Borho st...@borho.org
# Date 1384825870 21600
#  Mon Nov 18 19:51:10 2013 -0600
# Node ID c56f65c702978bf47b256528b503dd62602696dd
# Parent  ba9cb99c569329d13c66fd519f3f5ce8931c535c
api: make x265_encoder_get_stats() somewhat future proof

By passing in the size of x265_stats as the user application knows about the
encoder can know not to try to set new fields that were added to the end of
x265_stats.  This requires some discipline on our part to only append to the
structure and to always check the size for any new fields we might add.

diff -r ba9cb99c5693 -r c56f65c70297 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cppMon Nov 18 19:37:45 2013 -0600
+++ b/source/encoder/encoder.cppMon Nov 18 19:51:10 2013 -0600
@@ -478,29 +478,35 @@
 }
 }
 
-void Encoder::fetchStats(x265_stats *stats)
+void Encoder::fetchStats(x265_stats *stats, size_t statsSizeBytes)
 {
-stats-globalPsnrY = m_analyzeAll.m_psnrSumY;
-stats-globalPsnrU = m_analyzeAll.m_psnrSumU;
-stats-globalPsnrV = m_analyzeAll.m_psnrSumV;
-stats-encodedPictureCount = m_analyzeAll.m_numPics;
-stats-totalWPFrames = m_numWPFrames;
-stats-accBits = m_analyzeAll.m_accBits;
-stats-elapsedEncodeTime = (double)(x265_mdate() - m_encodeStartTime) / 
100;
-if (stats-encodedPictureCount  0)
+if (statsSizeBytes = sizeof(stats))
 {
-stats-globalSsim = m_analyzeAll.m_globalSsim / 
stats-encodedPictureCount;
-stats-globalPsnr = (stats-globalPsnrY * 6 + stats-globalPsnrU + 
stats-globalPsnrV) / (8 * stats-encodedPictureCount);
-stats-elapsedVideoTime = (double)stats-encodedPictureCount / 
param.frameRate;
-stats-bitrate = (0.001f * stats-accBits) / stats-elapsedVideoTime;
+stats-globalPsnrY = m_analyzeAll.m_psnrSumY;
+stats-globalPsnrU = m_analyzeAll.m_psnrSumU;
+stats-globalPsnrV = m_analyzeAll.m_psnrSumV;
+stats-encodedPictureCount = m_analyzeAll.m_numPics;
+stats-totalWPFrames = m_numWPFrames;
+stats-accBits = m_analyzeAll.m_accBits;
+stats-elapsedEncodeTime = (double)(x265_mdate() - m_encodeStartTime) 
/ 100;
+if (stats-encodedPictureCount  0)
+{
+stats-globalSsim = m_analyzeAll.m_globalSsim / 
stats-encodedPictureCount;
+stats-globalPsnr = (stats-globalPsnrY * 6 + stats-globalPsnrU + 
stats-globalPsnrV) / (8 * stats-encodedPictureCount);
+stats-elapsedVideoTime = (double)stats-encodedPictureCount / 
param.frameRate;
+stats-bitrate = (0.001f * stats-accBits) / 
stats-elapsedVideoTime;
+}
+else
+{
+stats-globalSsim = 0;
+stats-globalPsnr = 0;
+stats-bitrate = 0;
+stats-elapsedVideoTime = 0;
+}
 }
-else
-{
-stats-globalSsim = 0;
-stats-globalPsnr = 0;
-stats-bitrate = 0;
-stats-elapsedVideoTime = 0;
-}
+/* If new statistics are added to x265_stats, we must check here whether 
the
+ * structure provided by the user is the new structure or an older one (for
+ * future safety) */
 }
 
 void Encoder::writeLog(int argc, char **argv)
@@ -524,7 +530,7 @@
 fprintf(m_csvfpt, , %s, , buffer);
 
 x265_stats stats;
-fetchStats(stats);
+fetchStats(stats, sizeof(stats));
 
 // elapsed time, fps, bitrate
 fprintf(m_csvfpt, %.2f, %.2f, %.2f,,
@@ -1484,11 +1490,11 @@
 EXTERN_CYCLE_COUNTER(ME);
 
 extern C
-void x265_encoder_get_stats(x265_encoder *enc, x265_stats *outputStats)
+void x265_encoder_get_stats(x265_encoder *enc, x265_stats *outputStats, 
uint32_t statsSizeBytes)
 {
 Encoder *encoder = static_castEncoder*(enc);
 
-encoder-fetchStats(outputStats);
+encoder-fetchStats(outputStats, statsSizeBytes);
 }
 
 extern C
diff -r ba9cb99c5693 -r c56f65c70297 source/encoder/encoder.h
--- a/source/encoder/encoder.h  Mon Nov 18 19:37:45 2013 -0600
+++ b/source/encoder/encoder.h  Mon Nov 18 19:51:10 2013 -0600
@@ -116,7 +116,7 @@
 
 int getStreamHeaders(NALUnitEBSP **nalunits);
 
-void fetchStats(x265_stats* stats);
+void fetchStats(x265_stats* stats, size_t statsSizeBytes);
 
 void writeLog(int argc, char **argv);
 
diff -r ba9cb99c5693 -r c56f65c70297 source/x265.cpp
--- a/source/x265.cpp   Mon Nov 18 19:37:45 2013 -0600
+++ b/source/x265.cpp   Mon Nov 18 19:51:10 2013 -0600
@@ -653,7 +653,7 @@
 if (cliopt.bProgress)
 fprintf(stderr,   
 \r);
 
-x265_encoder_get_stats(encoder, stats);
+x265_encoder_get_stats(encoder, stats, sizeof(stats));
 if (param.csvfn  !b_ctrl_c)
 x265_encoder_log(encoder, argc, argv);
 x265_encoder_close(encoder);
diff -r ba9cb99c5693 -r c56f65c70297 source/x265.h
--- a/source/x265.h Mon Nov 18 19:37:45 2013 -0600
+++ b/source/x265.h Mon Nov 18 19:51:10 2013 -0600
@@ 

[x265] [PATCH 3 of 9] api: remove old suffix and prefix from C symbols in comment, reflow

2013-11-18 Thread Steve Borho
# HG changeset patch
# User Steve Borho st...@borho.org
# Date 1384825065 21600
#  Mon Nov 18 19:37:45 2013 -0600
# Node ID ba9cb99c569329d13c66fd519f3f5ce8931c535c
# Parent  08130599663661b31deeb7bf6771c55c1d7a3027
api: remove old suffix and prefix from C symbols in comment, reflow

diff -r 081305996636 -r ba9cb99c5693 source/x265.h
--- a/source/x265.h Mon Nov 18 19:35:53 2013 -0600
+++ b/source/x265.h Mon Nov 18 19:37:45 2013 -0600
@@ -65,11 +65,11 @@
 NAL_UNIT_INVALID = 64,
 } NalUnitType;
 
-/* The data within the payload is already NAL-encapsulated; the type
- * is merely in the struct for easy access by the calling application.
- * All data returned in an x265_nal_t, including the data in p_payload, is no 
longer
- * valid after the next call to x265_encoder_encode.  Thus it must be used or 
copied
- * before calling x265_encoder_encode again. */
+/* The data within the payload is already NAL-encapsulated; the type is merely
+ * in the struct for easy access by the calling application.  All data returned
+ * in an x265_nal, including the data in payload, is no longer valid after the
+ * next call to x265_encoder_encode.  Thus it must be used or copied before
+ * calling x265_encoder_encode again. */
 typedef struct x265_nal
 {
 uint32_t type;/* NalUnitType */
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] [PATCH 0 of 9] API improvements and cleanups

2013-11-18 Thread Steve Borho
Go through and enhance comments and fix some broken ones.  Simplify the
API in a few places, remove hungarian remnants, and future-proof one of
the pulbic methods.
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] [PATCH 9 of 9] common: nit

2013-11-18 Thread Steve Borho
# HG changeset patch
# User Steve Borho st...@borho.org
# Date 1384846824 21600
#  Tue Nov 19 01:40:24 2013 -0600
# Node ID f1f0a389a58185e98476bb9e8496735002bfe2a7
# Parent  baee128fdb029ff3379a3dc7b2574b3d52a6e264
common: nit

diff -r baee128fdb02 -r f1f0a389a581 source/common/common.cpp
--- a/source/common/common.cpp  Tue Nov 19 01:40:15 2013 -0600
+++ b/source/common/common.cpp  Tue Nov 19 01:40:24 2013 -0600
@@ -165,7 +165,7 @@
 param-bframes = 3;
 param-lookaheadDepth = 40;
 param-bFrameAdaptive = X265_B_ADAPT_FAST;
-param-scenecutThreshold = 40; /* Magic number pulled in from x264*/
+param-scenecutThreshold = 40; /* Magic number pulled in from x264 */
 
 /* Intra Coding Tools */
 param-bEnableConstrainedIntra = 0;
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] [PATCH 2 of 9] api: remove hungarian prefixes from x265_nal members

2013-11-18 Thread Steve Borho
# HG changeset patch
# User Steve Borho st...@borho.org
# Date 1384824953 21600
#  Mon Nov 18 19:35:53 2013 -0600
# Node ID 08130599663661b31deeb7bf6771c55c1d7a3027
# Parent  85f3089367c11655b4e23c8a2dc6232a6d4934ed
api: remove hungarian prefixes from x265_nal members

These particular prefixes came from x264 originally

diff -r 85f3089367c1 -r 081305996636 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cppMon Nov 18 19:31:35 2013 -0600
+++ b/source/encoder/encoder.cppMon Nov 18 19:35:53 2013 -0600
@@ -1375,15 +1375,15 @@
 size += nalSize;
 memsize += nalSize;
 
-m_nals[nalcount].i_type = nalu.m_nalUnitType;
-m_nals[nalcount].i_payload = size;
+m_nals[nalcount].type = nalu.m_nalUnitType;
+m_nals[nalcount].sizeBytes = size;
 }
 
 /* Setup payload pointers, now that we're done adding content to 
m_packetData */
 for (int i = 0; i  nalcount; i++)
 {
-m_nals[i].p_payload = (uint8_t*)m_packetData + offset;
-offset += m_nals[i].i_payload;
+m_nals[i].payload = (uint8_t*)m_packetData + offset;
+offset += m_nals[i].sizeBytes;
 }
 
 fail:
diff -r 85f3089367c1 -r 081305996636 source/x265.cpp
--- a/source/x265.cpp   Mon Nov 18 19:31:35 2013 -0600
+++ b/source/x265.cpp   Mon Nov 18 19:35:53 2013 -0600
@@ -205,8 +205,8 @@
 PPAScopeEvent(bitstream_write);
 for (uint32_t i = 0; i  nalcount; i++)
 {
-bitstreamFile.write((const char*)nal-p_payload, nal-i_payload);
-totalbytes += nal-i_payload;
+bitstreamFile.write((const char*)nal-payload, nal-sizeBytes);
+totalbytes += nal-sizeBytes;
 nal++;
 }
 }
diff -r 85f3089367c1 -r 081305996636 source/x265.h
--- a/source/x265.h Mon Nov 18 19:31:35 2013 -0600
+++ b/source/x265.h Mon Nov 18 19:35:53 2013 -0600
@@ -72,9 +72,9 @@
  * before calling x265_encoder_encode again. */
 typedef struct x265_nal
 {
-uint32_t i_type;  /* NalUnitType */
-uint32_t i_payload;   /* size in bytes */
-uint8_t* p_payload;
+uint32_t type;/* NalUnitType */
+uint32_t sizeBytes;   /* size in bytes */
+uint8_t* payload;
 } x265_nal;
 
 typedef struct x265_picture
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] [PATCH 5 of 9] api: nit

2013-11-18 Thread Steve Borho
# HG changeset patch
# User Steve Borho st...@borho.org
# Date 1384825878 21600
#  Mon Nov 18 19:51:18 2013 -0600
# Node ID d8766641126dc98b6005076c7b489c4cc5906591
# Parent  c56f65c702978bf47b256528b503dd62602696dd
api: nit

diff -r c56f65c70297 -r d8766641126d source/x265.h
--- a/source/x265.h Mon Nov 18 19:51:10 2013 -0600
+++ b/source/x265.h Mon Nov 18 19:51:18 2013 -0600
@@ -215,7 +215,7 @@
 /*Level of Rate Distortion Optimization Allowed */
 typedef enum
 {
-X265_NO_RDO_NO_RDOQ, /* Partial RDO during mode decision (only at each 
depth/mode), no RDO in quantization*/
+X265_NO_RDO_NO_RDOQ, /* Partial RDO during mode decision (only at each 
depth/mode), no RDO in quantization */
 X265_NO_RDO, /* Partial RDO during mode decision (only at each 
depth/mode), quantization RDO enabled */
 X265_FULL_RDO/* Full RD-based mode decision */
 } X265_RDO_LEVEL;
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel