Re: [x265] [PATCH] asm: code for scale2D_64to32 routine
I guess this function have some problem, I am not sure he verify this function with testbench before upload the problem is j and l miss a pixel, in some time, it make a mistake At 2013-11-18 15:06:07,muru...@multicorewareinc.com wrote: # HG changeset patch # User Murugan Vairavel muru...@multicorewareinc.com # Date 1384757077 -19800 # Mon Nov 18 12:14:37 2013 +0530 # Node ID d756003f63691b7677b4cf4c98fbb2a1d67dbb02 # Parent e2895ce7bbeb2c3d845fee2578758d0012fa2cb4 asm: code for scale2D_64to32 routine diff -r e2895ce7bbeb -r d756003f6369 source/common/x86/asm-primitives.cpp --- a/source/common/x86/asm-primitives.cpp Sun Nov 17 11:24:13 2013 -0600 +++ b/source/common/x86/asm-primitives.cpp Mon Nov 18 12:14:37 2013 +0530 @@ -529,6 +529,7 @@ PIXEL_AVG_W4(ssse3); p.scale1D_128to64 = x265_scale1D_128to64_ssse3; +p.scale2D_64to32 = x265_scale2D_64to32_ssse3; p.sad_x4[LUMA_8x4] = x265_pixel_sad_x4_8x4_ssse3; p.sad_x4[LUMA_8x8] = x265_pixel_sad_x4_8x8_ssse3; diff -r e2895ce7bbeb -r d756003f6369 source/common/x86/pixel-a.asm --- a/source/common/x86/pixel-a.asm Sun Nov 17 11:24:13 2013 -0600 +++ b/source/common/x86/pixel-a.asm Mon Nov 18 12:14:37 2013 +0530 @@ -8230,3 +8230,113 @@ movu [r0 + 48],m4 RET + +;- +; void scale2D_64to32(pixel *dst, pixel *src, intptr_t stride) +;- +INIT_XMM ssse3 +cglobal scale2D_64to32, 3, 4, 8, dest, src, stride + +movam7, [deinterleave_shuf] +mov r3d, 32 +.loop + +movum0, [r1] ;i +palignr m1, m0,1 ;j +movum2, [r1 + r2] ;k +palignr m3, m2,1 ;l +movum4, m0 +movum5, m2 + +pxorm4, m1;i^j +pxorm5, m3;k^l +por m4, m5;ij|kl + +pavgb m0, m1;s +pavgb m2, m3;t +movum5, m0 +pavgb m0, m2;(s+t+1)/2 +pxorm5, m2;s^t +pandm4, m5;(ij|kl)st +pandm4, [hmul_16p] +psubb m0, m4;Result + +movum1, [r1 + 16] ;i +palignr m2, m1,1 ;j +movum3, [r1 + r2 + 16];k +palignr m4, m3,1 ;l +movum5, m1 +movum6, m3 + +pxorm5, m2;i^j +pxorm6, m4;k^l +por m5, m6;ij|kl + +pavgb m1, m2;s +pavgb m3, m4;t +movum6, m1 +pavgb m1, m3;(s+t+1)/2 +pxorm6, m3;s^t +pandm5, m6;(ij|kl)st +pandm5, [hmul_16p] +psubb m1, m5;Result + +pshufb m0, m0,m7 +pshufb m1, m1,m7 + +punpcklqdqm0, m1 +movu [r0], m0 + +movum0, [r1 + 32] ;i +palignr m1, m0,1 ;j +movum2, [r1 + r2 + 32];k +palignr m3, m2,1 ;l +movum4, m0 +movum5, m2 + +pxorm4, m1;i^j +pxorm5, m3;k^l +por m4, m5;ij|kl + +pavgb m0, m1;s +pavgb m2, m3;t +movum5, m0 +pavgb m0, m2;(s+t+1)/2 +pxorm5, m2;s^t +pandm4, m5;(ij|kl)st +pandm4, [hmul_16p] +psubb m0, m4;Result + +movum1, [r1 + 48] ;i +palignr m2, m1,1 ;j +movum3, [r1 + r2 + 48];k +palignr m4, m3,1 ;l +movum5, m1 +movum6, m3 + +pxorm5, m2;i^j +pxorm6, m4;k^l +por m5, m6;ij|kl + +pavgb m1, m2;s +pavgb m3, m4;t +movum6, m1 +pavgb m1, m3;(s+t+1)/2 +pxorm6, m3;s^t +pandm5, m6;(ij|kl)st +pandm5, [hmul_16p] +psubb m1, m5;Result + +pshufb m0, m0,m7 +pshufb m1, m1,m7 + +punpcklqdqm0, m1 +
Re: [x265] [PATCH] TComYuv::addAvg, primitive function for luma and chroma loops
@@ -640,26 +621,9 @@ width = m_hChromaShift; height = m_vChromaShift; -for (y = height - 1; y = 0; y--) -{ -for (x = width - 1; x = 0; ) -{ -// note: chroma min width is 2 -dstU[x] = ClipC((srcU0[x] + srcU1[x] + offset) shiftNum); -dstV[x] = ClipC((srcV0[x] + srcV1[x] + offset) shiftNum); -x--; -dstU[x] = ClipC((srcU0[x] + srcU1[x] + offset) shiftNum); -dstV[x] = ClipC((srcV0[x] + srcV1[x] + offset) shiftNum); -x--; -} - -srcU0 += src0Stride; -srcU1 += src1Stride; -srcV0 += src0Stride; -srcV1 += src1Stride; -dstU += dststride; -dstV += dststride; -} +int part = partitionFromSizes(width, height); you use Chroma size to get index, I think is error. +primitives.chroma_addAvg[part](dstU, dststride, srcU0, src0Stride, srcU1, src1Stride); +primitives.chroma_addAvg[part](dstV, dststride, srcV0, src0Stride, srcV1, src1Stride); } } ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] [PATCH] TComYuv::addAvg, primitive function for luma and chroma loops
# HG changeset patch # User Dnyaneshwar G dnyanesh...@multicorewareinc.com # Date 1384768323 -19800 # Mon Nov 18 15:22:03 2013 +0530 # Node ID cdd54aa200bd635395c01bbb07c156be4edbf7b1 # Parent ac9e64d8a80bffe33fdaa0a9b83fdbe84f39d0b0 TComYuv::addAvg, primitive function for luma and chroma loops diff -r ac9e64d8a80b -r cdd54aa200bd source/Lib/TLibCommon/TComYuv.cpp --- a/source/Lib/TLibCommon/TComYuv.cpp Mon Nov 18 12:26:44 2013 +0530 +++ b/source/Lib/TLibCommon/TComYuv.cpp Mon Nov 18 15:22:03 2013 +0530 @@ -589,9 +589,7 @@ void TComYuv::addAvg(TShortYUV* srcYuv0, TShortYUV* srcYuv1, uint32_t partUnitIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma) { -int x, y; uint32_t src0Stride, src1Stride, dststride; -int shiftNum, offset; int16_t* srcY0 = srcYuv0-getLumaAddr(partUnitIdx); int16_t* srcU0 = srcYuv0-getCbAddr(partUnitIdx); @@ -605,61 +603,24 @@ Pel* dstU = getCbAddr(partUnitIdx); Pel* dstV = getCrAddr(partUnitIdx); +int part = partitionFromSizes(width, height); + if (bLuma) { src0Stride = srcYuv0-m_width; src1Stride = srcYuv1-m_width; dststride = getStride(); -shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH; -offset = (1 (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS; -for (y = 0; y height; y++) -{ -for (x = 0; x width; x += 4) -{ -dstY[x + 0] = ClipY((srcY0[x + 0] + srcY1[x + 0] + offset) shiftNum); -dstY[x + 1] = ClipY((srcY0[x + 1] + srcY1[x + 1] + offset) shiftNum); -dstY[x + 2] = ClipY((srcY0[x + 2] + srcY1[x + 2] + offset) shiftNum); -dstY[x + 3] = ClipY((srcY0[x + 3] + srcY1[x + 3] + offset) shiftNum); -} - -srcY0 += src0Stride; -srcY1 += src1Stride; -dstY += dststride; -} +primitives.luma_addAvg[part](dstY, dststride, srcY0, src0Stride, srcY1, src1Stride); } if (bChroma) { -shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH; -offset = (1 (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS; - src0Stride = srcYuv0-m_cwidth; src1Stride = srcYuv1-m_cwidth; dststride = getCStride(); -width = m_hChromaShift; -height = m_vChromaShift; - -for (y = height - 1; y = 0; y--) -{ -for (x = width - 1; x = 0; ) -{ -// note: chroma min width is 2 -dstU[x] = ClipC((srcU0[x] + srcU1[x] + offset) shiftNum); -dstV[x] = ClipC((srcV0[x] + srcV1[x] + offset) shiftNum); -x--; -dstU[x] = ClipC((srcU0[x] + srcU1[x] + offset) shiftNum); -dstV[x] = ClipC((srcV0[x] + srcV1[x] + offset) shiftNum); -x--; -} - -srcU0 += src0Stride; -srcU1 += src1Stride; -srcV0 += src0Stride; -srcV1 += src1Stride; -dstU += dststride; -dstV += dststride; -} +primitives.chroma_addAvg[part](dstU, dststride, srcU0, src0Stride, srcU1, src1Stride); +primitives.chroma_addAvg[part](dstV, dststride, srcV0, src0Stride, srcV1, src1Stride); } } diff -r ac9e64d8a80b -r cdd54aa200bd source/common/pixel.cpp --- a/source/common/pixel.cpp Mon Nov 18 12:26:44 2013 +0530 +++ b/source/common/pixel.cpp Mon Nov 18 15:22:03 2013 +0530 @@ -794,6 +794,27 @@ a += dstride; } } + +templateint bx, int by +void addAvg(pixel* dst, intptr_t dstStride, int16_t* src0, intptr_t src0Stride, int16_t* src1, intptr_t src1Stride) +{ +int shiftNum, offset; +shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH; +offset = (1 (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS; + +for (int y = 0; y by; y++) +{ +for (int x = 0; x bx; x += 2) +{ +dst[x + 0] = ClipY((src0[x + 0] + src1[x + 0] + offset) shiftNum); +dst[x + 1] = ClipY((src0[x + 1] + src1[x + 1] + offset) shiftNum); +} + +src0 += src0Stride; +src1 += src1Stride; +dst += dstStride; +} +} } // end anonymous namespace namespace x265 { @@ -835,12 +856,14 @@ p.satd[LUMA_16x64] = satd816, 64; #define CHROMA(W, H) \ +p.chroma_addAvg[CHROMA_ ## W ## x ## H] = addAvgW, H; \ p.chroma_copy_pp[CSP_I420][CHROMA_ ## W ## x ## H] = blockcopy_pp_cW, H; \ p.chroma_copy_sp[CHROMA_ ## W ## x ## H] = blockcopy_sp_cW, H; \ p.chroma_copy_ps[CHROMA_ ## W ## x ## H] = blockcopy_ps_cW, H;\ p.chroma_sub_ps[CHROMA_ ## W ## x ## H] = pixel_sub_ps_cW, H; #define LUMA(W, H) \ +p.luma_addAvg[LUMA_ ## W ## x ## H] = addAvgW, H; \ p.luma_copy_pp[LUMA_ ## W ## x ## H] = blockcopy_pp_cW, H; \ p.luma_copy_sp[LUMA_ ## W ## x ## H] = blockcopy_sp_cW, H; \ p.luma_copy_ps[LUMA_ ## W ## x ## H] = blockcopy_ps_cW, H;\ diff -r ac9e64d8a80b -r cdd54aa200bd source/common/primitives.h ---
[x265] [PATCH] asm: fix the bug caused on 32-bit linux due to satd routines
# HG changeset patch # User Yuvaraj Venkatesh yuva...@multicorewareinc.com # Date 1384769347 -19800 # Mon Nov 18 15:39:07 2013 +0530 # Node ID f076c5ca413a905d6d4e8c1bbea2638992cb21d7 # Parent e2895ce7bbeb2c3d845fee2578758d0012fa2cb4 asm: fix the bug caused on 32-bit linux due to satd routines. diff -r e2895ce7bbeb -r f076c5ca413a source/common/x86/pixel-a.asm --- a/source/common/x86/pixel-a.asm Sun Nov 17 11:24:13 2013 -0600 +++ b/source/common/x86/pixel-a.asm Mon Nov 18 15:39:07 2013 +0530 @@ -2239,27 +2239,42 @@ %else -cglobal pixel_satd_32x8, 4,6,8;if !WIN64 +%if WIN64 +cglobal pixel_satd_32x8, 4,8,8;if WIN64 cpuflag(avx) SATD_START_SSE2 m6, m7 -BACKUP_POINTERS -call pixel_satd_8x8_internal -RESTORE_AND_INC_POINTERS -BACKUP_POINTERS -call pixel_satd_8x8_internal -RESTORE_AND_INC_POINTERS -%if WIN64 == 0 -add r0, 8*SIZEOF_PIXEL -add r2, 8*SIZEOF_PIXEL -%endif -BACKUP_POINTERS -call pixel_satd_8x8_internal -RESTORE_AND_INC_POINTERS -%if WIN64 == 0 -add r0, 16*SIZEOF_PIXEL -add r2, 16*SIZEOF_PIXEL -%endif +mov r6, r0 +mov r7, r2 +call pixel_satd_8x8_internal +lea r0, [r6 + 8] +lea r2, [r7 + 8] +call pixel_satd_8x8_internal +lea r0, [r6 + 16] +lea r2, [r7 + 16] +call pixel_satd_8x8_internal +lea r0, [r6 + 24] +lea r2, [r7 + 24] call pixel_satd_8x8_internal SATD_END_SSE2 m6 +%else +cglobal pixel_satd_32x8, 4,7,8,0-4;if !WIN64 +SATD_START_SSE2 m6, m7 +mov r6, r0 +mov [rsp], r2 +call pixel_satd_8x8_internal +lea r0, [r6 + 8] +mov r2, [rsp] +add r2, 8 +call pixel_satd_8x8_internal +lea r0, [r6 + 16] +mov r2, [rsp] +add r2, 16 +call pixel_satd_8x8_internal +lea r0, [r6 + 24] +mov r2, [rsp] +add r2, 24 +call pixel_satd_8x8_internal +SATD_END_SSE2 m6 +%endif %if WIN64 cglobal pixel_satd_32x16, 4,8,8;if WIN64 cpuflag(avx) @@ -2282,25 +2297,24 @@ call pixel_satd_8x8_internal SATD_END_SSE2 m6 %else -cglobal pixel_satd_32x16, 4,6,8;if !WIN64 +cglobal pixel_satd_32x16, 4,7,8,0-4;if !WIN64 SATD_START_SSE2 m6, m7 -call pixel_satd_8x8_internal -call pixel_satd_8x8_internal -mov r0, r0mp -mov r2, r2mp -add r0, 8 +mov r6, r0 +mov [rsp], r2 +call pixel_satd_8x8_internal +call pixel_satd_8x8_internal +lea r0, [r6 + 8] +mov r2, [rsp] add r2, 8 call pixel_satd_8x8_internal call pixel_satd_8x8_internal -mov r0, r0mp -mov r2, r2mp -add r0, 16 +lea r0, [r6 + 16] +mov r2, [rsp] add r2, 16 call pixel_satd_8x8_internal call pixel_satd_8x8_internal -mov r0, r0mp -mov r2, r2mp -add r0, 24 +lea r0, [r6 + 24] +mov r2, [rsp] add r2, 24 call pixel_satd_8x8_internal call pixel_satd_8x8_internal @@ -2332,28 +2346,27 @@ call pixel_satd_8x8_internal SATD_END_SSE2 m6 %else -cglobal pixel_satd_32x24, 4,6,8;if !WIN64 +cglobal pixel_satd_32x24, 4,7,8,0-4;if !WIN64 SATD_START_SSE2 m6, m7 -call pixel_satd_8x8_internal -call pixel_satd_8x8_internal -call pixel_satd_8x8_internal -mov r0, r0mp -mov r2, r2mp -add r0, 8 +mov r6, r0 +mov [rsp], r2 +call pixel_satd_8x8_internal +call pixel_satd_8x8_internal +call pixel_satd_8x8_internal +lea r0, [r6 + 8] +mov r2, [rsp] add r2, 8 call pixel_satd_8x8_internal call pixel_satd_8x8_internal call pixel_satd_8x8_internal -mov r0, r0mp -mov r2, r2mp -add r0, 16 +lea r0, [r6 + 16] +mov r2, [rsp] add r2, 16 call pixel_satd_8x8_internal call pixel_satd_8x8_internal call pixel_satd_8x8_internal -mov r0, r0mp -mov r2, r2mp -add r0, 24 +lea r0, [r6 + 24] +mov r2, [rsp] add r2, 24 call pixel_satd_8x8_internal call pixel_satd_8x8_internal @@ -2389,38 +2402,41 @@ call pixel_satd_8x8_internal call pixel_satd_8x8_internal SATD_END_SSE2 m6 -%else -cglobal pixel_satd_32x32, 4,6,8;if !WIN64 + + +%else +cglobal pixel_satd_32x32, 4,7,8,0-4;if !WIN64 + SATD_START_SSE2 m6, m7 -call pixel_satd_8x8_internal -call pixel_satd_8x8_internal -call pixel_satd_8x8_internal -call pixel_satd_8x8_internal -mov r0, r0mp -mov r2, r2mp -add r0, 8 +mov r6, r0 +mov [rsp], r2 +call pixel_satd_8x8_internal +call pixel_satd_8x8_internal +call pixel_satd_8x8_internal +call pixel_satd_8x8_internal +lea r0, [r6 + 8] +mov r2, [rsp] add r2, 8 call pixel_satd_8x8_internal call pixel_satd_8x8_internal call pixel_satd_8x8_internal call pixel_satd_8x8_internal -mov r0, r0mp -mov r2, r2mp -add r0, 16 +lea r0, [r6 + 16] +mov r2, [rsp] add r2, 16 call pixel_satd_8x8_internal call pixel_satd_8x8_internal call pixel_satd_8x8_internal call
[x265] [PATCH] b-pyramid implementation: Allow the use of B-frames as references for non B and B frames
# HG changeset patch # User Gopu Govindaswamy g...@multicorewareinc.com # Date 1384769433 -19800 # Node ID 1e22b93638072ed805478d7af17f90e285fb4969 # Parent 2321ebe0bf64e5f3c0034076c7edb3ecbcd48039 b-pyramid implementation: Allow the use of B-frames as references for non B and B frames when we enable the b-pyramid the bitrates efficienctly reduced and there is not much diff in the performance and the PSNR 00. increased some of the clips and decreased some of clips Test results for reference when enable and disable the b-pyramid: cli option : -b 10 --hash=1 -f 100 --b-pyramid=1 --ref=1 --b-adapt=2 Enable B-reference : --b-pyramid=1 Disable B-reference : --b-pyramid=0 Results: Enable / Disable clip - FourPeople_1280x720_60.yuv Total time taken - 9.70s (10.31 fps) / 9.93s (10.07 fps) Bitrates - 516.30 kb/s / 544.68 kb/s PSNR - 39.725 / 39.701 clip - BasketballDrive_1920x1080_50.y4m Total time taken - 39.06s (2.51 fps) / 38.98s (2.57 fps) Bitrates - 4166.92 kb/s / 4370.43 kb/s PSNR - 37.261 / 37.268 clip - Johnny_1280x720_60.y4m Total time taken - 8.88s (11.27 fps) / 11.08s (9.03 fps) Bitrates - 304.29 kb/s / 328.84 kb/s PSNR - 40.605 / 40.551 Total time taken - 30.97s (3.23 fps) / 33.65s (2.97 fps) Bitrates - 3496.84 kb/s / 3683.93 kb/s PSNR - 35.645 / 35.660 diff -r 2321ebe0bf64 -r 1e22b9363807 source/common/common.cpp --- a/source/common/common.cpp Mon Nov 18 11:32:06 2013 +0530 +++ b/source/common/common.cpp Mon Nov 18 15:40:33 2013 +0530 @@ -54,6 +54,7 @@ static int parseCspName(const char *arg, int error); static int parseName(const char *arg, const char * const * names, int error); +static int parse_enum(const char *, const char * const * names, int *dst); using namespace x265; @@ -165,6 +166,7 @@ param-bframes = 3; param-lookaheadDepth = 40; param-bFrameAdaptive = X265_B_ADAPT_FAST; +param-bpyramid = 0; param-scenecutThreshold = 40; /* Magic number pulled in from x264*/ /* Intra Coding Tools */ @@ -532,7 +534,7 @@ } CHECK(param-bEnableWavefront 0, WaveFrontSynchro cannot be negative); - +CHECK(param-bpyramid = 2, b-pyramid is 0 or 1); return check_failed; } @@ -620,6 +622,7 @@ x265_log(param, X265_LOG_INFO, RDpenalty: %d\n, param-rdPenalty); } x265_log(param, X265_LOG_INFO, Lookahead / bframes / badapt : %d / %d / %d\n, param-lookaheadDepth, param-bframes, param-bFrameAdaptive); +x265_log(param, X265_LOG_INFO, b-pyramid / weightp / ref: %d / %d / %d\n, param-bpyramid, param-bEnableWeightedPred, param-maxNumReferences); x265_log(param, X265_LOG_INFO, tools: ); #define TOOLOPT(FLAG, STR) if (FLAG) fprintf(stderr, %s , STR) TOOLOPT(param-bEnableRectInter, rect); @@ -628,7 +631,6 @@ TOOLOPT(param-bEnableConstrainedIntra, cip); TOOLOPT(param-bEnableEarlySkip, esd); fprintf(stderr, rd=%d , param-rdLevel); -fprintf(stderr, ref=%d , param-maxNumReferences); TOOLOPT(param-bEnableLoopFilter, lft); if (param-bEnableSAO) @@ -650,7 +652,6 @@ else fprintf(stderr, tskip ); } -TOOLOPT(param-bEnableWeightedPred, weightp); TOOLOPT(param-bEnableWeightedBiPred, weightbp); TOOLOPT(param-rc.aqMode, aq); fprintf(stderr, \n); @@ -747,6 +748,15 @@ } OPT(input-csp) p-sourceCsp = ::parseCspName(value, berror); OPT(me)p-searchMethod = ::parseName(value, x265_motion_est_names, berror); +OPT(b-pyramid) +{ +berror |= parse_enum(value, x265_b_pyramid_names, p-bpyramid); +if (berror) +{ +berror = 0; +p-bpyramid = atoi(value); +} +} else return X265_PARAM_BAD_NAME; #undef OPT @@ -802,6 +812,7 @@ BOOL(p-bEnableSAO, sao); s += sprintf(s, sao-lcu-bounds=%d, p-saoLcuBoundary); s += sprintf(s, sao-lcu-opt=%d, p-saoLcuBasedOptimization); +s += sprintf(s, b-pyramid=%d, p-bpyramid); #undef BOOL return buf; @@ -843,3 +854,13 @@ error = 1; return a; } +static int parse_enum(const char *arg, const char * const * names, int *dst) +{ +for (int i = 0; names[i]; i++) +if (!strcmp(arg, names[i])) +{ +*dst = i; +return 0; +} +return -1; +} diff -r 2321ebe0bf64 -r 1e22b9363807 source/common/common.h --- a/source/common/common.hMon Nov 18 11:32:06 2013 +0530 +++ b/source/common/common.hMon Nov 18 15:40:33 2013 +0530 @@ -107,6 +107,7 @@ #define X265_LOG2(x) log2(x) #endif +static const char * const x265_b_pyramid_names[] = {none, normal, 0}; /* defined in common.cpp */ int64_t x265_mdate(void); void x265_log(x265_param *param, int level, const char *fmt, ...); diff -r 2321ebe0bf64 -r 1e22b9363807 source/encoder/dpb.cpp --- a/source/encoder/dpb.cppMon Nov 18 11:32:06 2013 +0530 +++ b/source/encoder/dpb.cppMon Nov 18 15:40:33 2013 +0530 @@ -78,7 +78,17 @@ m_lastIDR = pocCurr; }
Re: [x265] [PATCH] TComYuv::addAvg, primitive function for luma and chroma loops
Pushed. But next time, please organize your patches more clearly. 1. Add C primitive, if it does not exist. 2. Add the function pointer declarations and new primitive declarations to EncoderPrimitives struct. 3. Add testbench code for primitives. 4. Add asm code. Once all above patches have been reviewed, pushed and tested on all platforms, then you can integrate it with the actual encoder. On Mon, Nov 18, 2013 at 3:23 PM, dnyanesh...@multicorewareinc.com wrote: # HG changeset patch # User Dnyaneshwar G dnyanesh...@multicorewareinc.com # Date 1384768323 -19800 # Mon Nov 18 15:22:03 2013 +0530 # Node ID cdd54aa200bd635395c01bbb07c156be4edbf7b1 # Parent ac9e64d8a80bffe33fdaa0a9b83fdbe84f39d0b0 TComYuv::addAvg, primitive function for luma and chroma loops diff -r ac9e64d8a80b -r cdd54aa200bd source/Lib/TLibCommon/TComYuv.cpp --- a/source/Lib/TLibCommon/TComYuv.cpp Mon Nov 18 12:26:44 2013 +0530 +++ b/source/Lib/TLibCommon/TComYuv.cpp Mon Nov 18 15:22:03 2013 +0530 @@ -589,9 +589,7 @@ void TComYuv::addAvg(TShortYUV* srcYuv0, TShortYUV* srcYuv1, uint32_t partUnitIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma) { -int x, y; uint32_t src0Stride, src1Stride, dststride; -int shiftNum, offset; int16_t* srcY0 = srcYuv0-getLumaAddr(partUnitIdx); int16_t* srcU0 = srcYuv0-getCbAddr(partUnitIdx); @@ -605,61 +603,24 @@ Pel* dstU = getCbAddr(partUnitIdx); Pel* dstV = getCrAddr(partUnitIdx); +int part = partitionFromSizes(width, height); + if (bLuma) { src0Stride = srcYuv0-m_width; src1Stride = srcYuv1-m_width; dststride = getStride(); -shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH; -offset = (1 (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS; -for (y = 0; y height; y++) -{ -for (x = 0; x width; x += 4) -{ -dstY[x + 0] = ClipY((srcY0[x + 0] + srcY1[x + 0] + offset) shiftNum); -dstY[x + 1] = ClipY((srcY0[x + 1] + srcY1[x + 1] + offset) shiftNum); -dstY[x + 2] = ClipY((srcY0[x + 2] + srcY1[x + 2] + offset) shiftNum); -dstY[x + 3] = ClipY((srcY0[x + 3] + srcY1[x + 3] + offset) shiftNum); -} - -srcY0 += src0Stride; -srcY1 += src1Stride; -dstY += dststride; -} +primitives.luma_addAvg[part](dstY, dststride, srcY0, src0Stride, srcY1, src1Stride); } if (bChroma) { -shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH; -offset = (1 (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS; - src0Stride = srcYuv0-m_cwidth; src1Stride = srcYuv1-m_cwidth; dststride = getCStride(); -width = m_hChromaShift; -height = m_vChromaShift; - -for (y = height - 1; y = 0; y--) -{ -for (x = width - 1; x = 0; ) -{ -// note: chroma min width is 2 -dstU[x] = ClipC((srcU0[x] + srcU1[x] + offset) shiftNum); -dstV[x] = ClipC((srcV0[x] + srcV1[x] + offset) shiftNum); -x--; -dstU[x] = ClipC((srcU0[x] + srcU1[x] + offset) shiftNum); -dstV[x] = ClipC((srcV0[x] + srcV1[x] + offset) shiftNum); -x--; -} - -srcU0 += src0Stride; -srcU1 += src1Stride; -srcV0 += src0Stride; -srcV1 += src1Stride; -dstU += dststride; -dstV += dststride; -} +primitives.chroma_addAvg[part](dstU, dststride, srcU0, src0Stride, srcU1, src1Stride); +primitives.chroma_addAvg[part](dstV, dststride, srcV0, src0Stride, srcV1, src1Stride); } } diff -r ac9e64d8a80b -r cdd54aa200bd source/common/pixel.cpp --- a/source/common/pixel.cpp Mon Nov 18 12:26:44 2013 +0530 +++ b/source/common/pixel.cpp Mon Nov 18 15:22:03 2013 +0530 @@ -794,6 +794,27 @@ a += dstride; } } + +templateint bx, int by +void addAvg(pixel* dst, intptr_t dstStride, int16_t* src0, intptr_t src0Stride, int16_t* src1, intptr_t src1Stride) +{ +int shiftNum, offset; +shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH; +offset = (1 (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS; + +for (int y = 0; y by; y++) +{ +for (int x = 0; x bx; x += 2) +{ +dst[x + 0] = ClipY((src0[x + 0] + src1[x + 0] + offset) shiftNum); +dst[x + 1] = ClipY((src0[x + 1] + src1[x + 1] + offset) shiftNum); +} + +src0 += src0Stride; +src1 += src1Stride; +dst += dstStride; +} +} } // end anonymous namespace namespace x265 { @@ -835,12 +856,14 @@ p.satd[LUMA_16x64] = satd816, 64; #define CHROMA(W, H) \ +p.chroma_addAvg[CHROMA_ ## W ## x ## H] = addAvgW, H; \ p.chroma_copy_pp[CSP_I420][CHROMA_ ## W ## x ##
Re: [x265] [PATCH] b-pyramid implementation: Allow the use of B-frames as references for non B and B frames
In encoder::configure, there should be some check for --b-adapt 0 --bframes 0, in which case print warning and disable b-pyramid. On Mon, Nov 18, 2013 at 3:40 PM, Gopu Govindaswamy g...@multicorewareinc.com wrote: # HG changeset patch # User Gopu Govindaswamy g...@multicorewareinc.com # Date 1384769433 -19800 # Node ID 1e22b93638072ed805478d7af17f90e285fb4969 # Parent 2321ebe0bf64e5f3c0034076c7edb3ecbcd48039 b-pyramid implementation: Allow the use of B-frames as references for non B and B frames when we enable the b-pyramid the bitrates efficienctly reduced and there is not much diff in the performance and the PSNR 00. increased some of the clips and decreased some of clips Test results for reference when enable and disable the b-pyramid: cli option : -b 10 --hash=1 -f 100 --b-pyramid=1 --ref=1 --b-adapt=2 Enable B-reference : --b-pyramid=1 Disable B-reference : --b-pyramid=0 Results: Enable / Disable clip - FourPeople_1280x720_60.yuv Total time taken - 9.70s (10.31 fps) / 9.93s (10.07 fps) Bitrates - 516.30 kb/s / 544.68 kb/s PSNR - 39.725 / 39.701 clip - BasketballDrive_1920x1080_50.y4m Total time taken - 39.06s (2.51 fps) / 38.98s (2.57 fps) Bitrates - 4166.92 kb/s / 4370.43 kb/s PSNR - 37.261 / 37.268 clip - Johnny_1280x720_60.y4m Total time taken - 8.88s (11.27 fps) / 11.08s (9.03 fps) Bitrates - 304.29 kb/s / 328.84 kb/s PSNR - 40.605 / 40.551 Total time taken - 30.97s (3.23 fps) / 33.65s (2.97 fps) Bitrates - 3496.84 kb/s / 3683.93 kb/s PSNR - 35.645 / 35.660 diff -r 2321ebe0bf64 -r 1e22b9363807 source/common/common.cpp --- a/source/common/common.cpp Mon Nov 18 11:32:06 2013 +0530 +++ b/source/common/common.cpp Mon Nov 18 15:40:33 2013 +0530 @@ -54,6 +54,7 @@ static int parseCspName(const char *arg, int error); static int parseName(const char *arg, const char * const * names, int error); +static int parse_enum(const char *, const char * const * names, int *dst); using namespace x265; @@ -165,6 +166,7 @@ param-bframes = 3; param-lookaheadDepth = 40; param-bFrameAdaptive = X265_B_ADAPT_FAST; +param-bpyramid = 0; param-scenecutThreshold = 40; /* Magic number pulled in from x264*/ /* Intra Coding Tools */ @@ -532,7 +534,7 @@ } CHECK(param-bEnableWavefront 0, WaveFrontSynchro cannot be negative); - +CHECK(param-bpyramid = 2, b-pyramid is 0 or 1); return check_failed; } @@ -620,6 +622,7 @@ x265_log(param, X265_LOG_INFO, RDpenalty: %d\n, param-rdPenalty); } x265_log(param, X265_LOG_INFO, Lookahead / bframes / badapt : %d / %d / %d\n, param-lookaheadDepth, param-bframes, param-bFrameAdaptive); +x265_log(param, X265_LOG_INFO, b-pyramid / weightp / ref: %d / %d / %d\n, param-bpyramid, param-bEnableWeightedPred, param-maxNumReferences); x265_log(param, X265_LOG_INFO, tools: ); #define TOOLOPT(FLAG, STR) if (FLAG) fprintf(stderr, %s , STR) TOOLOPT(param-bEnableRectInter, rect); @@ -628,7 +631,6 @@ TOOLOPT(param-bEnableConstrainedIntra, cip); TOOLOPT(param-bEnableEarlySkip, esd); fprintf(stderr, rd=%d , param-rdLevel); -fprintf(stderr, ref=%d , param-maxNumReferences); TOOLOPT(param-bEnableLoopFilter, lft); if (param-bEnableSAO) @@ -650,7 +652,6 @@ else fprintf(stderr, tskip ); } -TOOLOPT(param-bEnableWeightedPred, weightp); TOOLOPT(param-bEnableWeightedBiPred, weightbp); TOOLOPT(param-rc.aqMode, aq); fprintf(stderr, \n); @@ -747,6 +748,15 @@ } OPT(input-csp) p-sourceCsp = ::parseCspName(value, berror); OPT(me)p-searchMethod = ::parseName(value, x265_motion_est_names, berror); +OPT(b-pyramid) +{ +berror |= parse_enum(value, x265_b_pyramid_names, p-bpyramid); +if (berror) +{ +berror = 0; +p-bpyramid = atoi(value); +} +} else return X265_PARAM_BAD_NAME; #undef OPT @@ -802,6 +812,7 @@ BOOL(p-bEnableSAO, sao); s += sprintf(s, sao-lcu-bounds=%d, p-saoLcuBoundary); s += sprintf(s, sao-lcu-opt=%d, p-saoLcuBasedOptimization); +s += sprintf(s, b-pyramid=%d, p-bpyramid); #undef BOOL return buf; @@ -843,3 +854,13 @@ error = 1; return a; } +static int parse_enum(const char *arg, const char * const * names, int *dst) +{ +for (int i = 0; names[i]; i++) +if (!strcmp(arg, names[i])) +{ +*dst = i; +return 0; +} +return -1; +} diff -r 2321ebe0bf64 -r 1e22b9363807 source/common/common.h --- a/source/common/common.hMon Nov 18 11:32:06 2013 +0530 +++ b/source/common/common.hMon Nov 18 15:40:33 2013 +0530 @@ -107,6 +107,7 @@ #define X265_LOG2(x) log2(x) #endif +static const char * const x265_b_pyramid_names[] = {none, normal, 0}; /* defined in common.cpp */ int64_t
[x265] [PATCH] asm: code for scale2D_64to32 routine
# HG changeset patch # User Murugan Vairavel muru...@multicorewareinc.com # Date 1384773570 -19800 # Mon Nov 18 16:49:30 2013 +0530 # Node ID c355ba4b6711bfad87ff37d650a8f1946f878eec # Parent 2321ebe0bf64e5f3c0034076c7edb3ecbcd48039 asm: code for scale2D_64to32 routine diff -r 2321ebe0bf64 -r c355ba4b6711 source/common/x86/asm-primitives.cpp --- a/source/common/x86/asm-primitives.cpp Mon Nov 18 11:32:06 2013 +0530 +++ b/source/common/x86/asm-primitives.cpp Mon Nov 18 16:49:30 2013 +0530 @@ -530,6 +530,7 @@ PIXEL_AVG_W4(ssse3); p.scale1D_128to64 = x265_scale1D_128to64_ssse3; +p.scale2D_64to32 = x265_scale2D_64to32_ssse3; p.sad_x4[LUMA_8x4] = x265_pixel_sad_x4_8x4_ssse3; p.sad_x4[LUMA_8x8] = x265_pixel_sad_x4_8x8_ssse3; diff -r 2321ebe0bf64 -r c355ba4b6711 source/common/x86/pixel-a.asm --- a/source/common/x86/pixel-a.asm Mon Nov 18 11:32:06 2013 +0530 +++ b/source/common/x86/pixel-a.asm Mon Nov 18 16:49:30 2013 +0530 @@ -8230,3 +8230,113 @@ movu [r0 + 48],m4 RET + +;- +; void scale2D_64to32(pixel *dst, pixel *src, intptr_t stride) +;- +INIT_XMM ssse3 +cglobal scale2D_64to32, 3, 4, 8, dest, src, stride + +movam7, [deinterleave_shuf] +mov r3d, 32 +.loop + +movum0, [r1] ;i +movum1, [r1 + 1] ;j +movum2, [r1 + r2] ;k +movum3, [r1 + r2 + 1] ;l +movum4, m0 +movum5, m2 + +pxorm4, m1;i^j +pxorm5, m3;k^l +por m4, m5;ij|kl + +pavgb m0, m1;s +pavgb m2, m3;t +movum5, m0 +pavgb m0, m2;(s+t+1)/2 +pxorm5, m2;s^t +pandm4, m5;(ij|kl)st +pandm4, [hmul_16p] +psubb m0, m4;Result + +movum1, [r1 + 16] ;i +movum2, [r1 + 16 + 1] ;j +movum3, [r1 + r2 + 16];k +movum4, [r1 + r2 + 16 + 1];l +movum5, m1 +movum6, m3 + +pxorm5, m2;i^j +pxorm6, m4;k^l +por m5, m6;ij|kl + +pavgb m1, m2;s +pavgb m3, m4;t +movum6, m1 +pavgb m1, m3;(s+t+1)/2 +pxorm6, m3;s^t +pandm5, m6;(ij|kl)st +pandm5, [hmul_16p] +psubb m1, m5;Result + +pshufb m0, m0,m7 +pshufb m1, m1,m7 + +punpcklqdqm0, m1 +movu [r0], m0 + +movum0, [r1 + 32] ;i +movum1, [r1 + 32 + 1] ;j +movum2, [r1 + r2 + 32];k +movum3, [r1 + r2 + 32 + 1];l +movum4, m0 +movum5, m2 + +pxorm4, m1;i^j +pxorm5, m3;k^l +por m4, m5;ij|kl + +pavgb m0, m1;s +pavgb m2, m3;t +movum5, m0 +pavgb m0, m2;(s+t+1)/2 +pxorm5, m2;s^t +pandm4, m5;(ij|kl)st +pandm4, [hmul_16p] +psubb m0, m4;Result + +movum1, [r1 + 48] ;i +movum2, [r1 + 48 + 1] ;j +movum3, [r1 + r2 + 48];k +movum4, [r1 + r2 + 48 + 1];l +movum5, m1 +movum6, m3 + +pxorm5, m2;i^j +pxorm6, m4;k^l +por m5, m6;ij|kl + +pavgb m1, m2;s +pavgb m3, m4;t +movum6, m1 +pavgb m1, m3;(s+t+1)/2 +pxorm6, m3;s^t +pandm5, m6;(ij|kl)st +pandm5, [hmul_16p] +psubb m1, m5;Result + +pshufb m0, m0,m7 +pshufb m1, m1,m7 + +punpcklqdqm0, m1 +movu
Re: [x265] [PATCH] b-pyramid implementation: Allow the use of B-frames as references for non B and B frames
On Mon, Nov 18, 2013 at 4:47 PM, Deepthi Nandakumar deep...@multicorewareinc.com wrote: On Mon, Nov 18, 2013 at 3:40 PM, Gopu Govindaswamy g...@multicorewareinc.com wrote: # HG changeset patch # User Gopu Govindaswamy g...@multicorewareinc.com # Date 1384769433 -19800 # Node ID 1e22b93638072ed805478d7af17f90e285fb4969 # Parent 2321ebe0bf64e5f3c0034076c7edb3ecbcd48039 b-pyramid implementation: Allow the use of B-frames as references for non B and B frames when we enable the b-pyramid the bitrates efficienctly reduced and there is not much diff in the performance and the PSNR 00. increased some of the clips and decreased some of clips Test results for reference when enable and disable the b-pyramid: cli option : -b 10 --hash=1 -f 100 --b-pyramid=1 --ref=1 --b-adapt=2 Enable B-reference : --b-pyramid=1 Disable B-reference : --b-pyramid=0 Results: Enable / Disable clip - FourPeople_1280x720_60.yuv Total time taken - 9.70s (10.31 fps) / 9.93s (10.07 fps) Bitrates - 516.30 kb/s / 544.68 kb/s PSNR - 39.725 / 39.701 clip - BasketballDrive_1920x1080_50.y4m Total time taken - 39.06s (2.51 fps) / 38.98s (2.57 fps) Bitrates - 4166.92 kb/s / 4370.43 kb/s PSNR - 37.261 / 37.268 clip - Johnny_1280x720_60.y4m Total time taken - 8.88s (11.27 fps) / 11.08s (9.03 fps) Bitrates - 304.29 kb/s / 328.84 kb/s PSNR - 40.605 / 40.551 Total time taken - 30.97s (3.23 fps) / 33.65s (2.97 fps) Bitrates - 3496.84 kb/s / 3683.93 kb/s PSNR - 35.645 / 35.660 diff -r 2321ebe0bf64 -r 1e22b9363807 source/common/common.cpp --- a/source/common/common.cpp Mon Nov 18 11:32:06 2013 +0530 +++ b/source/common/common.cpp Mon Nov 18 15:40:33 2013 +0530 @@ -54,6 +54,7 @@ static int parseCspName(const char *arg, int error); static int parseName(const char *arg, const char * const * names, int error); +static int parse_enum(const char *, const char * const * names, int *dst); using namespace x265; @@ -165,6 +166,7 @@ param-bframes = 3; param-lookaheadDepth = 40; param-bFrameAdaptive = X265_B_ADAPT_FAST; +param-bpyramid = 0; param-scenecutThreshold = 40; /* Magic number pulled in from x264*/ /* Intra Coding Tools */ @@ -532,7 +534,7 @@ } CHECK(param-bEnableWavefront 0, WaveFrontSynchro cannot be negative); - +CHECK(param-bpyramid = 2, b-pyramid is 0 or 1); return check_failed; } @@ -620,6 +622,7 @@ x265_log(param, X265_LOG_INFO, RDpenalty: %d\n, param-rdPenalty); } x265_log(param, X265_LOG_INFO, Lookahead / bframes / badapt : %d / %d / %d\n, param-lookaheadDepth, param-bframes, param-bFrameAdaptive); +x265_log(param, X265_LOG_INFO, b-pyramid / weightp / ref: %d / %d / %d\n, param-bpyramid, param-bEnableWeightedPred, param-maxNumReferences); x265_log(param, X265_LOG_INFO, tools: ); #define TOOLOPT(FLAG, STR) if (FLAG) fprintf(stderr, %s , STR) TOOLOPT(param-bEnableRectInter, rect); @@ -628,7 +631,6 @@ TOOLOPT(param-bEnableConstrainedIntra, cip); TOOLOPT(param-bEnableEarlySkip, esd); fprintf(stderr, rd=%d , param-rdLevel); -fprintf(stderr, ref=%d , param-maxNumReferences); TOOLOPT(param-bEnableLoopFilter, lft); if (param-bEnableSAO) @@ -650,7 +652,6 @@ else fprintf(stderr, tskip ); } -TOOLOPT(param-bEnableWeightedPred, weightp); TOOLOPT(param-bEnableWeightedBiPred, weightbp); TOOLOPT(param-rc.aqMode, aq); fprintf(stderr, \n); @@ -747,6 +748,15 @@ } OPT(input-csp) p-sourceCsp = ::parseCspName(value, berror); OPT(me)p-searchMethod = ::parseName(value, x265_motion_est_names, berror); +OPT(b-pyramid) +{ +berror |= parse_enum(value, x265_b_pyramid_names, p-bpyramid); +if (berror) +{ +berror = 0; +p-bpyramid = atoi(value); +} +} Not clear why parse_enum is required here? For now, this is a boolean flag which can be assigned directly to the param structure. Because we can use --b-pyramid=none or --b-pyramid=0 --b-pyramid=normalor --b-pyramid=1 else return X265_PARAM_BAD_NAME; #undef OPT @@ -802,6 +812,7 @@ BOOL(p-bEnableSAO, sao); s += sprintf(s, sao-lcu-bounds=%d, p-saoLcuBoundary); s += sprintf(s, sao-lcu-opt=%d, p-saoLcuBasedOptimization); +s += sprintf(s, b-pyramid=%d, p-bpyramid); #undef BOOL return buf; @@ -843,3 +854,13 @@ error = 1; return a; } +static int parse_enum(const char *arg, const char * const * names, int *dst) +{ +for (int i = 0; names[i]; i++) +if (!strcmp(arg, names[i])) +{ +*dst = i; +return 0; +} +return -1; +} diff -r 2321ebe0bf64 -r 1e22b9363807 source/common/common.h --- a/source/common/common.hMon Nov 18 11:32:06 2013 +0530 +++
[x265] [PATCH] cli: add aq-strength to cli input options, add validations for aq mode
# HG changeset patch # User Aarthi Thirumalai # Date 1384773969 -19800 # Mon Nov 18 16:56:09 2013 +0530 # Node ID 78225cfaa696fad7f2870c4064c8f0f387e5ba8d # Parent 2321ebe0bf64e5f3c0034076c7edb3ecbcd48039 cli: add aq-strength to cli input options, add validations for aq mode diff -r 2321ebe0bf64 -r 78225cfaa696 source/common/common.cpp --- a/source/common/common.cpp Mon Nov 18 11:32:06 2013 +0530 +++ b/source/common/common.cpp Mon Nov 18 16:56:09 2013 +0530 @@ -519,8 +519,10 @@ max consecutive bframe count must be 16 or smaller); CHECK(param-lookaheadDepth X265_LOOKAHEAD_MAX, Lookahead depth must be less than 256); -CHECK(param-rc.aqModeX265_AQ_NONE || param-rc.aqMode X265_AQ_VARIANCE, +CHECK(param-rc.aqMode X265_AQ_NONE || param-rc.aqMode X265_AQ_VARIANCE, Aq-Mode is out of range); +CHECK(param-rc.aqStrength 0 || param-rc.aqStrength 3, + Aq-Strength is out of range); // max CU size should be power of 2 uint32_t i = param-maxCUSize; @@ -532,6 +534,16 @@ } CHECK(param-bEnableWavefront 0, WaveFrontSynchro cannot be negative); +if(param-rc.rateControlMode == X265_RC_CQP ) +{ +param-rc.aqMode = X265_AQ_NONE; +param-rc.bitrate = 0; +} +if(param-rc.aqStrength == 0) +{ +x265_log(param, X265_LOG_WARNING, Aq mode specified, but Aq strength is 0, ignored\n ); +param-rc.aqMode = 0; +} return check_failed; } @@ -652,7 +664,8 @@ } TOOLOPT(param-bEnableWeightedPred, weightp); TOOLOPT(param-bEnableWeightedBiPred, weightbp); -TOOLOPT(param-rc.aqMode, aq); +TOOLOPT(param-rc.aqMode, aq-mode); +fprintf(stderr, aq-strength=%.2f , param-rc.aqStrength); fprintf(stderr, \n); fflush(stderr); } @@ -729,6 +742,7 @@ OPT(psnr) p-bEnablePsnr = bvalue; OPT(hash) p-decodedPictureHashSEI = atoi(value); OPT(aq-mode) p-rc.aqMode = atoi(value); +OPT(aq-strength) p-rc.aqStrength = atof(value); OPT(crf) { p-rc.rfConstant = atof(value); @@ -794,6 +808,8 @@ BOOL(p-bEnableWeightedPred, weightp); s += sprintf(s, bitrate=%d, p-rc.bitrate); s += sprintf(s, qp=%d, p-rc.qp); +s += sprintf(s, aq-mode=%d, p-rc.aqMode); +s += sprintf(s, aq-strength=%.2f, p-rc.aqStrength); s += sprintf(s, cbqpoffs=%d, p-cbQpOffset); s += sprintf(s, crqpoffs=%d, p-crQpOffset); s += sprintf(s, rd=%d, p-rdLevel); diff -r 2321ebe0bf64 -r 78225cfaa696 source/x265.cpp --- a/source/x265.cpp Mon Nov 18 11:32:06 2013 +0530 +++ b/source/x265.cpp Mon Nov 18 16:56:09 2013 +0530 @@ -123,6 +123,7 @@ { bitrate,required_argument, NULL, 0 }, { qp, required_argument, NULL, 'q' }, { aq-mode,required_argument, NULL, 0 }, +{ aq-strength,required_argument, NULL, 0 }, { cbqpoffs, required_argument, NULL, 0 }, { crqpoffs, required_argument, NULL, 0 }, { rd, required_argument, NULL, 0 }, @@ -310,6 +311,7 @@ H0( --crf Quality-based VBR (0-51). Default %f\n, param-rc.rfConstant); H0(-q/--qp Base QP for CQP mode. Default %d\n, param-rc.qp); H0( --aq-mode Mode for Adaptive Quantization - 0:none 1:aqVariance Default %d\n, param-rc.aqMode); +H0( --aq-strength Reduces blocking and blurring in flat and textured areas.(0 to 3.0)double . Default %f\n, param-rc.aqStrength); H0( --cbqpoffsChroma Cb QP Offset. Default %d\n, param-cbQpOffset); H0( --crqpoffsChroma Cr QP Offset. Default %d\n, param-crQpOffset); H0( --rd Level of RD in mode decision 0:least2:full RDO. Default %d\n, param-rdLevel); ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH] b-pyramid implementation: Allow the use of B-frames as references for non B and B frames
yes the numReorderdPics is 2, but once we increased the numReorderPics then the max DPB size should also to increase by 1 if we enable the b-pyramid, instead of this i have directly increased by 3, i will change this to numReorderdPics = 2 and m_maxDecPicBuffering[i] = X265_MIN(MAX_NUM_REF, X265_MAX(m_numReorderPics[i] + 1, _param-maxNumReferences) + 2 ); when we enable the b-pyramid else the compute RPS will not produce the Correct L0 reference On Mon, Nov 18, 2013 at 4:53 PM, Deepthi Devaki Akkoorath deepthidev...@multicorewareinc.com wrote: On Mon, Nov 18, 2013 at 3:40 PM, Gopu Govindaswamy g...@multicorewareinc.com wrote: # HG changeset patch # User Gopu Govindaswamy g...@multicorewareinc.com # Date 1384769433 -19800 # Node ID 1e22b93638072ed805478d7af17f90e285fb4969 # Parent 2321ebe0bf64e5f3c0034076c7edb3ecbcd48039 b-pyramid implementation: Allow the use of B-frames as references for non B and B frames when we enable the b-pyramid the bitrates efficienctly reduced and there is not much diff in the performance and the PSNR 00. increased some of the clips and decreased some of clips Test results for reference when enable and disable the b-pyramid: cli option : -b 10 --hash=1 -f 100 --b-pyramid=1 --ref=1 --b-adapt=2 Enable B-reference : --b-pyramid=1 Disable B-reference : --b-pyramid=0 Results: Enable / Disable clip - FourPeople_1280x720_60.yuv Total time taken - 9.70s (10.31 fps) / 9.93s (10.07 fps) Bitrates - 516.30 kb/s / 544.68 kb/s PSNR - 39.725 / 39.701 clip - BasketballDrive_1920x1080_50.y4m Total time taken - 39.06s (2.51 fps) / 38.98s (2.57 fps) Bitrates - 4166.92 kb/s / 4370.43 kb/s PSNR - 37.261 / 37.268 clip - Johnny_1280x720_60.y4m Total time taken - 8.88s (11.27 fps) / 11.08s (9.03 fps) Bitrates - 304.29 kb/s / 328.84 kb/s PSNR - 40.605 / 40.551 Total time taken - 30.97s (3.23 fps) / 33.65s (2.97 fps) Bitrates - 3496.84 kb/s / 3683.93 kb/s PSNR - 35.645 / 35.660 diff -r 2321ebe0bf64 -r 1e22b9363807 source/common/common.cpp --- a/source/common/common.cpp Mon Nov 18 11:32:06 2013 +0530 +++ b/source/common/common.cpp Mon Nov 18 15:40:33 2013 +0530 @@ -54,6 +54,7 @@ static int parseCspName(const char *arg, int error); static int parseName(const char *arg, const char * const * names, int error); +static int parse_enum(const char *, const char * const * names, int *dst); using namespace x265; @@ -165,6 +166,7 @@ param-bframes = 3; param-lookaheadDepth = 40; param-bFrameAdaptive = X265_B_ADAPT_FAST; +param-bpyramid = 0; param-scenecutThreshold = 40; /* Magic number pulled in from x264*/ /* Intra Coding Tools */ @@ -532,7 +534,7 @@ } CHECK(param-bEnableWavefront 0, WaveFrontSynchro cannot be negative); - +CHECK(param-bpyramid = 2, b-pyramid is 0 or 1); return check_failed; } @@ -620,6 +622,7 @@ x265_log(param, X265_LOG_INFO, RDpenalty: %d\n, param-rdPenalty); } x265_log(param, X265_LOG_INFO, Lookahead / bframes / badapt : %d / %d / %d\n, param-lookaheadDepth, param-bframes, param-bFrameAdaptive); +x265_log(param, X265_LOG_INFO, b-pyramid / weightp / ref: %d / %d / %d\n, param-bpyramid, param-bEnableWeightedPred, param-maxNumReferences); x265_log(param, X265_LOG_INFO, tools: ); #define TOOLOPT(FLAG, STR) if (FLAG) fprintf(stderr, %s , STR) TOOLOPT(param-bEnableRectInter, rect); @@ -628,7 +631,6 @@ TOOLOPT(param-bEnableConstrainedIntra, cip); TOOLOPT(param-bEnableEarlySkip, esd); fprintf(stderr, rd=%d , param-rdLevel); -fprintf(stderr, ref=%d , param-maxNumReferences); TOOLOPT(param-bEnableLoopFilter, lft); if (param-bEnableSAO) @@ -650,7 +652,6 @@ else fprintf(stderr, tskip ); } -TOOLOPT(param-bEnableWeightedPred, weightp); TOOLOPT(param-bEnableWeightedBiPred, weightbp); TOOLOPT(param-rc.aqMode, aq); fprintf(stderr, \n); @@ -747,6 +748,15 @@ } OPT(input-csp) p-sourceCsp = ::parseCspName(value, berror); OPT(me)p-searchMethod = ::parseName(value, x265_motion_est_names, berror); +OPT(b-pyramid) +{ +berror |= parse_enum(value, x265_b_pyramid_names, p-bpyramid); +if (berror) +{ +berror = 0; +p-bpyramid = atoi(value); +} +} else return X265_PARAM_BAD_NAME; #undef OPT @@ -802,6 +812,7 @@ BOOL(p-bEnableSAO, sao); s += sprintf(s, sao-lcu-bounds=%d, p-saoLcuBoundary); s += sprintf(s, sao-lcu-opt=%d, p-saoLcuBasedOptimization); +s += sprintf(s, b-pyramid=%d, p-bpyramid); #undef BOOL return buf; @@ -843,3 +854,13 @@ error = 1; return a; } +static int parse_enum(const char *arg, const char * const * names, int *dst) +{ +for (int i = 0; names[i]; i++) +if (!strcmp(arg, names[i])) +{ +*dst
[x265] [PATCH] TShortYUV: asm code integration for pixelsub_ps
# HG changeset patch # User Murugan Vairavel muru...@multicorewareinc.com # Date 1384777276 -19800 # Mon Nov 18 17:51:16 2013 +0530 # Node ID be8373f115dd7f152588ba8c575ad10dc6f5afb1 # Parent c355ba4b6711bfad87ff37d650a8f1946f878eec TShortYUV: asm code integration for pixelsub_ps diff -r c355ba4b6711 -r be8373f115dd source/common/TShortYUV.cpp --- a/source/common/TShortYUV.cpp Mon Nov 18 16:49:30 2013 +0530 +++ b/source/common/TShortYUV.cpp Mon Nov 18 17:51:16 2013 +0530 @@ -58,6 +58,7 @@ m_cwidth = width m_hChromaShift; m_cheight = height m_vChromaShift; +m_csp = csp; } void TShortYUV::destroy() @@ -78,15 +79,14 @@ } void TShortYUV::subtract(TComYuv* srcYuv0, TComYuv* srcYuv1, unsigned int trUnitIdx, unsigned int partSize) -{ -subtractLuma(srcYuv0, srcYuv1, trUnitIdx, partSize); -subtractChroma(srcYuv0, srcYuv1, trUnitIdx, partSize m_hChromaShift); +{ +int part = partitionFromSizes(partSize, partSize); +subtractLuma(srcYuv0, srcYuv1, trUnitIdx, partSize, part); +subtractChroma(srcYuv0, srcYuv1, trUnitIdx, partSize m_hChromaShift, part); } -void TShortYUV::subtractLuma(TComYuv* srcYuv0, TComYuv* srcYuv1, unsigned int trUnitIdx, unsigned int partSize) +void TShortYUV::subtractLuma(TComYuv* srcYuv0, TComYuv* srcYuv1, unsigned int trUnitIdx, unsigned int partSize, uint32_t part) { -int x = partSize, y = partSize; - Pel* src0 = srcYuv0-getLumaAddr(trUnitIdx, partSize); Pel* src1 = srcYuv1-getLumaAddr(trUnitIdx, partSize); int16_t* dst = getLumaAddr(trUnitIdx, partSize); @@ -95,13 +95,11 @@ int src1Stride = srcYuv1-getStride(); int dstStride = m_width; -primitives.pixelsub_ps(x, y, dst, dstStride, src0, src1, src0Stride, src1Stride); +primitives.luma_sub_ps[part](dst, dstStride, src0, src1, src0Stride, src1Stride); } -void TShortYUV::subtractChroma(TComYuv* srcYuv0, TComYuv* srcYuv1, unsigned int trUnitIdx, unsigned int partSize) +void TShortYUV::subtractChroma(TComYuv* srcYuv0, TComYuv* srcYuv1, unsigned int trUnitIdx, unsigned int partSize, uint32_t part) { -int x = partSize, y = partSize; - Pel* srcU0 = srcYuv0-getCbAddr(trUnitIdx, partSize); Pel* srcU1 = srcYuv1-getCbAddr(trUnitIdx, partSize); Pel* srcV0 = srcYuv0-getCrAddr(trUnitIdx, partSize); @@ -113,8 +111,8 @@ int src1Stride = srcYuv1-getCStride(); int dstStride = m_cwidth; -primitives.pixelsub_ps(x, y, dstU, dstStride, srcU0, srcU1, src0Stride, src1Stride); -primitives.pixelsub_ps(x, y, dstV, dstStride, srcV0, srcV1, src0Stride, src1Stride); +primitives.chroma_sub_ps[m_csp][part](dstU, dstStride, srcU0, srcU1, src0Stride, src1Stride); +primitives.chroma_sub_ps[m_csp][part](dstV, dstStride, srcV0, srcV1, src0Stride, src1Stride); } void TShortYUV::addClip(TShortYUV* srcYuv0, TShortYUV* srcYuv1, unsigned int trUnitIdx, unsigned int partSize) diff -r c355ba4b6711 -r be8373f115dd source/common/TShortYUV.h --- a/source/common/TShortYUV.h Mon Nov 18 16:49:30 2013 +0530 +++ b/source/common/TShortYUV.h Mon Nov 18 17:51:16 2013 +0530 @@ -53,6 +53,8 @@ return blkX + blkY * size; } +int m_csp; + public: int16_t* m_bufY; @@ -95,8 +97,8 @@ int16_t* getCrAddr(unsigned int partIdx, unsigned int size) { return m_bufCr + getAddrOffset(partIdx, size, m_cwidth); } -void subtractLuma(TComYuv* srcYuv0, TComYuv* srcYuv1, unsigned int trUnitIdx, unsigned int partSize); -void subtractChroma(TComYuv* srcYuv0, TComYuv* srcYuv1, unsigned int trUnitIdx, unsigned int partSize); +void subtractLuma(TComYuv* srcYuv0, TComYuv* srcYuv1, unsigned int trUnitIdx, unsigned int partSize, uint32_t part); +void subtractChroma(TComYuv* srcYuv0, TComYuv* srcYuv1, unsigned int trUnitIdx, unsigned int partSize, uint32_t part); void subtract(TComYuv* srcYuv0, TComYuv* srcYuv1, unsigned int trUnitIdx, unsigned int partSize); void addClip(TShortYUV* srcYuv0, TShortYUV* srcYuv1, unsigned int trUnitIdx, unsigned int partSize); diff -r c355ba4b6711 -r be8373f115dd source/common/pixel.cpp --- a/source/common/pixel.cpp Mon Nov 18 16:49:30 2013 +0530 +++ b/source/common/pixel.cpp Mon Nov 18 17:51:16 2013 +0530 @@ -838,7 +838,7 @@ p.chroma_copy_pp[CSP_I420][CHROMA_ ## W ## x ## H] = blockcopy_pp_cW, H; \ p.chroma_copy_sp[CHROMA_ ## W ## x ## H] = blockcopy_sp_cW, H; \ p.chroma_copy_ps[CHROMA_ ## W ## x ## H] = blockcopy_ps_cW, H;\ -p.chroma_sub_ps[CHROMA_ ## W ## x ## H] = pixel_sub_ps_cW, H; +p.chroma_sub_ps[CSP_I420][CHROMA_ ## W ## x ## H] = pixel_sub_ps_cW, H; #define LUMA(W, H) \ p.luma_copy_pp[LUMA_ ## W ## x ## H] = blockcopy_pp_cW, H; \ diff -r c355ba4b6711 -r be8373f115dd source/common/primitives.h --- a/source/common/primitives.hMon Nov 18 16:49:30 2013 +0530 +++ b/source/common/primitives.hMon Nov 18 17:51:16 2013 +0530 @@ -250,7 +250,7 @@ copy_ps_t
[x265] [PATCH] TComYuv::copyToPicChroma, blockcopy_pp asm integration
# HG changeset patch # User Praveen Tiwari # Date 1384780472 -19800 # Node ID 024d6ddf57596b6f77100b3bdcac555ddbec7c0a # Parent 2321ebe0bf64e5f3c0034076c7edb3ecbcd48039 TComYuv::copyToPicChroma, blockcopy_pp asm integration diff -r 2321ebe0bf64 -r 024d6ddf5759 source/Lib/TLibCommon/TComYuv.cpp --- a/source/Lib/TLibCommon/TComYuv.cpp Mon Nov 18 11:32:06 2013 +0530 +++ b/source/Lib/TLibCommon/TComYuv.cpp Mon Nov 18 18:44:32 2013 +0530 @@ -140,8 +140,10 @@ uint32_t srcstride = getCStride(); uint32_t dststride = destPicYuv-getCStride(); -primitives.blockcpy_pp(width, height, dstU, dststride, srcU, srcstride); -primitives.blockcpy_pp(width, height, dstV, dststride, srcV, srcstride); +int part = partitionFromSizes(width, height); + +primitives.luma_copy_pp[part](dstU, dststride, srcU, srcstride); +primitives.luma_copy_pp[part](dstV, dststride, srcV, srcstride); } void TComYuv::copyFromPicYuv(TComPicYuv* srcPicYuv, uint32_t cuAddr, uint32_t absZOrderIdx) ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH] TComYuv::copyToPicChroma, blockcopy_pp asm integration
At 2013-11-18 21:14:52,prav...@multicorewareinc.com wrote: # HG changeset patch # User Praveen Tiwari # Date 1384780472 -19800 # Node ID 024d6ddf57596b6f77100b3bdcac555ddbec7c0a # Parent 2321ebe0bf64e5f3c0034076c7edb3ecbcd48039 TComYuv::copyToPicChroma, blockcopy_pp asm integration diff -r 2321ebe0bf64 -r 024d6ddf5759 source/Lib/TLibCommon/TComYuv.cpp --- a/source/Lib/TLibCommon/TComYuv.cpp Mon Nov 18 11:32:06 2013 +0530 +++ b/source/Lib/TLibCommon/TComYuv.cpp Mon Nov 18 18:44:32 2013 +0530 @@ -140,8 +140,10 @@ uint32_t srcstride = getCStride(); uint32_t dststride = destPicYuv-getCStride(); -primitives.blockcpy_pp(width, height, dstU, dststride, srcU, srcstride); -primitives.blockcpy_pp(width, height, dstV, dststride, srcV, srcstride); +int part = partitionFromSizes(width, height); width/height is Chroma size + +primitives.luma_copy_pp[part](dstU, dststride, srcU, srcstride); +primitives.luma_copy_pp[part](dstV, dststride, srcV, srcstride); } void TComYuv::copyFromPicYuv(TComPicYuv* srcPicYuv, uint32_t cuAddr, uint32_t absZOrderIdx) ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] [PATCH] blockcopy_pp asm integration, TComYuv::copyToPicChroma
# HG changeset patch # User Praveen Tiwari # Date 1384783447 -19800 # Node ID b353d170c54f0e33a8869c413be226a48deb1f5c # Parent 68d8ca28ac05b93accc6931abd576a56b621a492 blockcopy_pp asm integration, TComYuv::copyToPicChroma diff -r 68d8ca28ac05 -r b353d170c54f source/Lib/TLibCommon/TComYuv.cpp --- a/source/Lib/TLibCommon/TComYuv.cpp Mon Nov 18 19:15:32 2013 +0530 +++ b/source/Lib/TLibCommon/TComYuv.cpp Mon Nov 18 19:34:07 2013 +0530 @@ -140,10 +140,10 @@ uint32_t srcstride = getCStride(); uint32_t dststride = destPicYuv-getCStride(); -int part = partitionFromSizes(width, height); +int part = partitionFromSizes(width 1, height 1); -primitives.luma_copy_pp[part](dstU, dststride, srcU, srcstride); -primitives.luma_copy_pp[part](dstV, dststride, srcV, srcstride); +primitives.chroma_copy_pp[m_csp][part](dstU, dststride, srcU, srcstride); +primitives.chroma_copy_pp[m_csp][part](dstV, dststride, srcV, srcstride); } void TComYuv::copyFromPicYuv(TComPicYuv* srcPicYuv, uint32_t cuAddr, uint32_t absZOrderIdx) ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] [PATCH Review only] asm: code for transpose4x4 routine
# HG changeset patch # User Murugan Vairavel muru...@multicorewareinc.com # Date 1384784621 -19800 # Mon Nov 18 19:53:41 2013 +0530 # Node ID d24c22e915afd33a122326516b41eecf7e055934 # Parent a4735d0fe4759c72a3af408a43723f219688eeb4 asm: code for transpose4x4 routine diff -r a4735d0fe475 -r d24c22e915af source/common/x86/asm-primitives.cpp --- a/source/common/x86/asm-primitives.cpp Mon Nov 18 18:59:20 2013 +0530 +++ b/source/common/x86/asm-primitives.cpp Mon Nov 18 19:53:41 2013 +0530 @@ -545,6 +545,7 @@ p.calcrecon[BLOCK_8x8] = x265_calcRecons8_sse2; p.calcresidual[BLOCK_4x4] = x265_getResidual4_sse2; p.calcresidual[BLOCK_8x8] = x265_getResidual8_sse2; +p.transpose[BLOCK_4x4] = x265_transpose4_sse2; } if (cpuMask X265_CPU_SSSE3) { diff -r a4735d0fe475 -r d24c22e915af source/common/x86/pixel-a.asm --- a/source/common/x86/pixel-a.asm Mon Nov 18 18:59:20 2013 +0530 +++ b/source/common/x86/pixel-a.asm Mon Nov 18 19:53:41 2013 +0530 @@ -8340,3 +8340,25 @@ jnz.loop RET + +;- +; void transpose_4x4(pixel *dst, pixel *src, intptr_t stride) +;- +INIT_XMM sse2 +cglobal transpose4, 3, 3, 4, dest, src, stride + +movd m0,[r1] +movd m1,[r1 + r2] +movd m2,[r1 + 2 * r2] + +lea r1,[r1 + 2 * r2] + +movd m3,[r1 + r2] + +punpcklbwm0,m1 +punpcklbwm2,m3 +punpcklwdm0,m2 + +movu [r0],m0 + +RET diff -r a4735d0fe475 -r d24c22e915af source/common/x86/pixel.h --- a/source/common/x86/pixel.h Mon Nov 18 18:59:20 2013 +0530 +++ b/source/common/x86/pixel.h Mon Nov 18 19:53:41 2013 +0530 @@ -365,5 +365,6 @@ void x265_getResidual8_sse2(pixel *fenc, pixel *pred, int16_t *residual, intptr_t stride); void x265_getResidual16_sse4(pixel *fenc, pixel *pred, int16_t *residual, intptr_t stride); void x265_getResidual32_sse4(pixel *fenc, pixel *pred, int16_t *residual, intptr_t stride); +void x265_transpose4_sse2(pixel *dest, pixel *src, intptr_t stride); #endif // ifndef X265_I386_PIXEL_H ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH Review only] asm: code for transpose4x4 routine
good! At 2013-11-18 22:24:12,muru...@multicorewareinc.com wrote: # HG changeset patch # User Murugan Vairavel muru...@multicorewareinc.com # Date 1384784621 -19800 # Mon Nov 18 19:53:41 2013 +0530 # Node ID d24c22e915afd33a122326516b41eecf7e055934 # Parent a4735d0fe4759c72a3af408a43723f219688eeb4 asm: code for transpose4x4 routine diff -r a4735d0fe475 -r d24c22e915af source/common/x86/asm-primitives.cpp --- a/source/common/x86/asm-primitives.cpp Mon Nov 18 18:59:20 2013 +0530 +++ b/source/common/x86/asm-primitives.cpp Mon Nov 18 19:53:41 2013 +0530 @@ -545,6 +545,7 @@ p.calcrecon[BLOCK_8x8] = x265_calcRecons8_sse2; p.calcresidual[BLOCK_4x4] = x265_getResidual4_sse2; p.calcresidual[BLOCK_8x8] = x265_getResidual8_sse2; +p.transpose[BLOCK_4x4] = x265_transpose4_sse2; } if (cpuMask X265_CPU_SSSE3) { diff -r a4735d0fe475 -r d24c22e915af source/common/x86/pixel-a.asm --- a/source/common/x86/pixel-a.asmMon Nov 18 18:59:20 2013 +0530 +++ b/source/common/x86/pixel-a.asmMon Nov 18 19:53:41 2013 +0530 @@ -8340,3 +8340,25 @@ jnz.loop RET + +;- +; void transpose_4x4(pixel *dst, pixel *src, intptr_t stride) +;- +INIT_XMM sse2 +cglobal transpose4, 3, 3, 4, dest, src, stride + +movd m0,[r1] +movd m1,[r1 + r2] +movd m2,[r1 + 2 * r2] + +lea r1,[r1 + 2 * r2] + +movd m3,[r1 + r2] + +punpcklbwm0,m1 +punpcklbwm2,m3 +punpcklwdm0,m2 + +movu [r0],m0 + +RET diff -r a4735d0fe475 -r d24c22e915af source/common/x86/pixel.h --- a/source/common/x86/pixel.hMon Nov 18 18:59:20 2013 +0530 +++ b/source/common/x86/pixel.hMon Nov 18 19:53:41 2013 +0530 @@ -365,5 +365,6 @@ void x265_getResidual8_sse2(pixel *fenc, pixel *pred, int16_t *residual, intptr_t stride); void x265_getResidual16_sse4(pixel *fenc, pixel *pred, int16_t *residual, intptr_t stride); void x265_getResidual32_sse4(pixel *fenc, pixel *pred, int16_t *residual, intptr_t stride); +void x265_transpose4_sse2(pixel *dest, pixel *src, intptr_t stride); #endif // ifndef X265_I386_PIXEL_H ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] [PATCH] added csp support for blpckcopy_ps
# HG changeset patch # User Praveen Tiwari # Date 1384788209 -19800 # Node ID 59646d515e79b4d0f9a3a72c77c7af17a83bf3d9 # Parent b353d170c54f0e33a8869c413be226a48deb1f5c added csp support for blpckcopy_ps diff -r b353d170c54f -r 59646d515e79 source/common/pixel.cpp --- a/source/common/pixel.cpp Mon Nov 18 19:34:07 2013 +0530 +++ b/source/common/pixel.cpp Mon Nov 18 20:53:29 2013 +0530 @@ -837,7 +837,7 @@ #define CHROMA(W, H) \ p.chroma_copy_pp[CSP_I420][CHROMA_ ## W ## x ## H] = blockcopy_pp_cW, H; \ p.chroma_copy_sp[CHROMA_ ## W ## x ## H] = blockcopy_sp_cW, H; \ -p.chroma_copy_ps[CHROMA_ ## W ## x ## H] = blockcopy_ps_cW, H;\ +p.chroma_copy_ps[CSP_I420][CHROMA_ ## W ## x ## H] = blockcopy_ps_cW, H;\ p.chroma_sub_ps[CHROMA_ ## W ## x ## H] = pixel_sub_ps_cW, H; #define LUMA(W, H) \ diff -r b353d170c54f -r 59646d515e79 source/common/primitives.h --- a/source/common/primitives.hMon Nov 18 19:34:07 2013 +0530 +++ b/source/common/primitives.hMon Nov 18 20:53:29 2013 +0530 @@ -247,7 +247,7 @@ copy_sp_t luma_copy_sp[NUM_LUMA_PARTITIONS]; copy_sp_t chroma_copy_sp[NUM_CHROMA_PARTITIONS]; copy_ps_t luma_copy_ps[NUM_LUMA_PARTITIONS]; -copy_ps_t chroma_copy_ps[NUM_CHROMA_PARTITIONS]; +copy_ps_t chroma_copy_ps[NUM_CSP][NUM_CHROMA_PARTITIONS]; pixel_sub_ps_t luma_sub_ps[NUM_LUMA_PARTITIONS]; pixel_sub_ps_t chroma_sub_ps[NUM_CHROMA_PARTITIONS]; diff -r b353d170c54f -r 59646d515e79 source/common/x86/asm-primitives.cpp --- a/source/common/x86/asm-primitives.cpp Mon Nov 18 19:34:07 2013 +0530 +++ b/source/common/x86/asm-primitives.cpp Mon Nov 18 20:53:29 2013 +0530 @@ -141,7 +141,6 @@ p.chroma_hps[CHROMA_ ## W ## x ## H] = x265_interp_4tap_horiz_ps_ ## W ## x ## H ## cpu; \ p.chroma_vpp[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_pp_ ## W ## x ## H ## cpu; \ p.chroma_vps[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_ps_ ## W ## x ## H ## cpu; \ -p.chroma_copy_ps[CHROMA_ ## W ## x ## H] = x265_blockcopy_ps_ ## W ## x ## H ## cpu; \ p.chroma_sub_ps[CHROMA_ ## W ## x ## H] = x265_pixel_sub_ps_ ## W ## x ## H ## cpu; #define SETUP_CHROMA_SP_FUNC_DEF(W, H, cpu) \ @@ -380,6 +379,36 @@ SETUP_LUMA_BLOCKCOPY_FUNC_DEF(64, 16, cpu); \ SETUP_LUMA_BLOCKCOPY_FUNC_DEF(16, 64, cpu); +#define SETUP_CHROMA_FROM_LUMA_SSE4(W1, H1, W2, H2, cpu) \ +p.chroma_copy_ps[X265_CSP_I420][LUMA_ ## W1 ## x ## H1] = x265_blockcopy_ps_ ## W2 ## x ## H2 ## cpu; + +// For X265_CSP_I420 chroma width and height will be half of luma width and height +#define CHROMA_BLOCKCOPY_SSE4(cpu) \ +SETUP_CHROMA_FROM_LUMA_SSE4(8, 8, 4, 4, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(8, 4, 4, 2, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(4, 8, 2, 4, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(16, 16, 8, 8, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(16, 8, 8, 4, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(8, 16, 4, 8, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(16, 12, 8, 6, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(12, 16, 6, 8, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(16, 4, 8, 2, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(4, 16, 2, 8, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(32, 32, 16, 16, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(32, 16, 16, 8, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(16, 32, 8, 16, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(32, 24, 16, 12, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(24, 32, 12, 16, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(32, 8, 16, 4, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(8, 32, 4, 16, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(64, 64, 32, 32, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(64, 32, 32, 16, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(32, 64, 16, 32, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(64, 48, 32, 24, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(48, 64, 24, 32, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(64, 16, 32, 8, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(16, 64, 8, 32, cpu); + using namespace x265; namespace { @@ -591,6 +620,7 @@ CHROMA_FILTERS(_sse4); LUMA_FILTERS(_sse4); HEVC_SATD(sse4); +CHROMA_BLOCKCOPY_SSE4(_sse4); p.chroma_copy_sp[CHROMA_2x4] = x265_blockcopy_sp_2x4_sse4; p.chroma_copy_sp[CHROMA_2x8] = x265_blockcopy_sp_2x8_sse4; p.chroma_copy_sp[CHROMA_6x8] = x265_blockcopy_sp_6x8_sse4; diff -r b353d170c54f -r 59646d515e79 source/test/pixelharness.cpp --- a/source/test/pixelharness.cpp Mon Nov 18 19:34:07 2013 +0530 +++ b/source/test/pixelharness.cpp Mon Nov 18 20:53:29 2013 +0530 @@ -763,12 +763,15 @@ } } -if (opt.chroma_copy_ps[part]) +for(int i = 0; i NUM_CSP; i++) { -if (!check_block_copy_ps(ref.chroma_copy_ps[part], opt.chroma_copy_ps[part])) +if (opt.chroma_copy_ps[i][part]) { -printf(chroma_copy_ps[%s] failed\n, chromaPartStr[part]); -return false; +
Re: [x265] [PATCH] cli: add aq-strength to cli input options, add validations for aq mode
On Nov 18, 2013, at 5:26 AM, Aarthi Thirumalai aar...@multicorewareinc.com wrote: # HG changeset patch # User Aarthi Thirumalai # Date 1384773969 -19800 # Mon Nov 18 16:56:09 2013 +0530 # Node ID 78225cfaa696fad7f2870c4064c8f0f387e5ba8d # Parent 2321ebe0bf64e5f3c0034076c7edb3ecbcd48039 cli: add aq-strength to cli input options, add validations for aq mode diff -r 2321ebe0bf64 -r 78225cfaa696 source/common/common.cpp --- a/source/common/common.cppMon Nov 18 11:32:06 2013 +0530 +++ b/source/common/common.cppMon Nov 18 16:56:09 2013 +0530 @@ -519,8 +519,10 @@ max consecutive bframe count must be 16 or smaller); CHECK(param-lookaheadDepth X265_LOOKAHEAD_MAX, Lookahead depth must be less than 256); -CHECK(param-rc.aqModeX265_AQ_NONE || param-rc.aqMode X265_AQ_VARIANCE, +CHECK(param-rc.aqMode X265_AQ_NONE || param-rc.aqMode X265_AQ_VARIANCE, Aq-Mode is out of range); +CHECK(param-rc.aqStrength 0 || param-rc.aqStrength 3, + Aq-Strength is out of range); // max CU size should be power of 2 uint32_t i = param-maxCUSize; @@ -532,6 +534,16 @@ } CHECK(param-bEnableWavefront 0, WaveFrontSynchro cannot be negative); +if(param-rc.rateControlMode == X265_RC_CQP ) white-space +{ +param-rc.aqMode = X265_AQ_NONE; +param-rc.bitrate = 0; +} +if(param-rc.aqStrength == 0) +{ +x265_log(param, X265_LOG_WARNING, Aq mode specified, but Aq strength is 0, ignored\n ); +param-rc.aqMode = 0; +} return check_failed; } @@ -652,7 +664,8 @@ } TOOLOPT(param-bEnableWeightedPred, weightp); TOOLOPT(param-bEnableWeightedBiPred, weightbp); -TOOLOPT(param-rc.aqMode, aq); +TOOLOPT(param-rc.aqMode, aq-mode); +fprintf(stderr, aq-strength=%.2f , param-rc.aqStrength); fprintf(stderr, \n); fflush(stderr); } @@ -729,6 +742,7 @@ OPT(psnr) p-bEnablePsnr = bvalue; OPT(hash) p-decodedPictureHashSEI = atoi(value); OPT(aq-mode) p-rc.aqMode = atoi(value); +OPT(aq-strength) p-rc.aqStrength = atof(value); OPT(crf) { p-rc.rfConstant = atof(value); @@ -794,6 +808,8 @@ BOOL(p-bEnableWeightedPred, weightp); s += sprintf(s, bitrate=%d, p-rc.bitrate); s += sprintf(s, qp=%d, p-rc.qp); +s += sprintf(s, aq-mode=%d, p-rc.aqMode); +s += sprintf(s, aq-strength=%.2f, p-rc.aqStrength); s += sprintf(s, cbqpoffs=%d, p-cbQpOffset); s += sprintf(s, crqpoffs=%d, p-crQpOffset); s += sprintf(s, rd=%d, p-rdLevel); diff -r 2321ebe0bf64 -r 78225cfaa696 source/x265.cpp --- a/source/x265.cpp Mon Nov 18 11:32:06 2013 +0530 +++ b/source/x265.cpp Mon Nov 18 16:56:09 2013 +0530 @@ -123,6 +123,7 @@ { bitrate,required_argument, NULL, 0 }, { qp, required_argument, NULL, 'q' }, { aq-mode,required_argument, NULL, 0 }, +{ aq-strength,required_argument, NULL, 0 }, { cbqpoffs, required_argument, NULL, 0 }, { crqpoffs, required_argument, NULL, 0 }, { rd, required_argument, NULL, 0 }, @@ -310,6 +311,7 @@ H0( --crf Quality-based VBR (0-51). Default %f\n, param-rc.rfConstant); H0(-q/--qp Base QP for CQP mode. Default %d\n, param-rc.qp); H0( --aq-mode Mode for Adaptive Quantization - 0:none 1:aqVariance Default %d\n, param-rc.aqMode); +H0( --aq-strength Reduces blocking and blurring in flat and textured areas.(0 to 3.0)double . Default %f\n, param-rc.aqStrength); H0( --cbqpoffsChroma Cb QP Offset. Default %d\n, param-cbQpOffset); H0( --crqpoffsChroma Cr QP Offset. Default %d\n, param-crQpOffset); H0( --rd Level of RD in mode decision 0:least2:full RDO. Default %d\n, param-rdLevel); ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel signature.asc Description: Message signed with OpenPGP using GPGMail ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH Review only] asm: code for transpose4x4 routine
Excuse me, press button early. Good code, but need do some insert some spaces before RET and remove unused blank line. At 2013-11-18 22:24:12,muru...@multicorewareinc.com wrote: # HG changeset patch # User Murugan Vairavel muru...@multicorewareinc.com # Date 1384784621 -19800 # Mon Nov 18 19:53:41 2013 +0530 # Node ID d24c22e915afd33a122326516b41eecf7e055934 # Parent a4735d0fe4759c72a3af408a43723f219688eeb4 asm: code for transpose4x4 routine diff -r a4735d0fe475 -r d24c22e915af source/common/x86/asm-primitives.cpp --- a/source/common/x86/asm-primitives.cpp Mon Nov 18 18:59:20 2013 +0530 +++ b/source/common/x86/asm-primitives.cpp Mon Nov 18 19:53:41 2013 +0530 @@ -545,6 +545,7 @@ p.calcrecon[BLOCK_8x8] = x265_calcRecons8_sse2; p.calcresidual[BLOCK_4x4] = x265_getResidual4_sse2; p.calcresidual[BLOCK_8x8] = x265_getResidual8_sse2; +p.transpose[BLOCK_4x4] = x265_transpose4_sse2; } if (cpuMask X265_CPU_SSSE3) { diff -r a4735d0fe475 -r d24c22e915af source/common/x86/pixel-a.asm --- a/source/common/x86/pixel-a.asmMon Nov 18 18:59:20 2013 +0530 +++ b/source/common/x86/pixel-a.asmMon Nov 18 19:53:41 2013 +0530 @@ -8340,3 +8340,25 @@ jnz.loop RET + +;- +; void transpose_4x4(pixel *dst, pixel *src, intptr_t stride) +;- +INIT_XMM sse2 +cglobal transpose4, 3, 3, 4, dest, src, stride + +movd m0,[r1] +movd m1,[r1 + r2] +movd m2,[r1 + 2 * r2] + +lea r1,[r1 + 2 * r2] + +movd m3,[r1 + r2] + +punpcklbwm0,m1 +punpcklbwm2,m3 +punpcklwdm0,m2 + +movu [r0],m0 + +RET diff -r a4735d0fe475 -r d24c22e915af source/common/x86/pixel.h --- a/source/common/x86/pixel.hMon Nov 18 18:59:20 2013 +0530 +++ b/source/common/x86/pixel.hMon Nov 18 19:53:41 2013 +0530 @@ -365,5 +365,6 @@ void x265_getResidual8_sse2(pixel *fenc, pixel *pred, int16_t *residual, intptr_t stride); void x265_getResidual16_sse4(pixel *fenc, pixel *pred, int16_t *residual, intptr_t stride); void x265_getResidual32_sse4(pixel *fenc, pixel *pred, int16_t *residual, intptr_t stride); +void x265_transpose4_sse2(pixel *dest, pixel *src, intptr_t stride); #endif // ifndef X265_I386_PIXEL_H ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] [PATCH] TComYuv::copyPartToPartYuv, asm code intergration for blockcopy_ps
# HG changeset patch # User Praveen Tiwari # Date 1384788645 -19800 # Node ID 49a556cf22721d846a94e07c1933fcd092b898dd # Parent 59646d515e79b4d0f9a3a72c77c7af17a83bf3d9 TComYuv::copyPartToPartYuv, asm code intergration for blockcopy_ps diff -r 59646d515e79 -r 49a556cf2272 source/Lib/TLibCommon/TComYuv.cpp --- a/source/Lib/TLibCommon/TComYuv.cpp Mon Nov 18 20:53:29 2013 +0530 +++ b/source/Lib/TLibCommon/TComYuv.cpp Mon Nov 18 21:00:45 2013 +0530 @@ -256,10 +256,12 @@ void TComYuv::copyPartToPartYuv(TShortYUV* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma) { +int part = partitionFromSizes(width, height); + if (bLuma) -copyPartToPartLuma(dstPicYuv, partIdx, width, height); +copyPartToPartLuma(dstPicYuv, partIdx, part); if (bChroma) -copyPartToPartChroma(dstPicYuv, partIdx, width m_hChromaShift, height m_vChromaShift); +copyPartToPartChroma(dstPicYuv, partIdx, part); } void TComYuv::copyPartToPartLuma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t part) @@ -275,7 +277,7 @@ primitives.luma_copy_pp[part](dst, dststride, src, srcstride); } -void TComYuv::copyPartToPartLuma(TShortYUV* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height) +void TComYuv::copyPartToPartLuma(TShortYUV* dstPicYuv, uint32_t partIdx, uint32_t part) { Pel* src = getLumaAddr(partIdx); int16_t* dst = dstPicYuv-getLumaAddr(partIdx); @@ -283,7 +285,6 @@ uint32_t srcstride = getStride(); uint32_t dststride = dstPicYuv-m_width; -int part = partitionFromSizes(width, height); primitives.luma_copy_ps[part](dst, dststride, src, srcstride); } @@ -303,7 +304,7 @@ primitives.chroma_copy_pp[m_csp][part](dstV, dststride, srcV, srcstride); } -void TComYuv::copyPartToPartChroma(TShortYUV* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height) +void TComYuv::copyPartToPartChroma(TShortYUV* dstPicYuv, uint32_t partIdx, uint32_t part) { Pel* srcU = getCbAddr(partIdx); Pel* srcV = getCrAddr(partIdx); @@ -313,8 +314,8 @@ uint32_t srcstride = getCStride(); uint32_t dststride = dstPicYuv-m_cwidth; -primitives.blockcpy_sp(width, height, dstU, dststride, srcU, srcstride); -primitives.blockcpy_sp(width, height, dstV, dststride, srcV, srcstride); +primitives.chroma_copy_ps[m_csp][part](dstU, dststride, srcU, srcstride); +primitives.chroma_copy_ps[m_csp][part](dstV, dststride, srcV, srcstride); } void TComYuv::copyPartToPartChroma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height, uint32_t chromaId) diff -r 59646d515e79 -r 49a556cf2272 source/Lib/TLibCommon/TComYuv.h --- a/source/Lib/TLibCommon/TComYuv.h Mon Nov 18 20:53:29 2013 +0530 +++ b/source/Lib/TLibCommon/TComYuv.h Mon Nov 18 21:00:45 2013 +0530 @@ -140,9 +140,9 @@ voidcopyPartToPartYuv(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height, bool bLuma = true, bool bChroma = true); voidcopyPartToPartYuv(TShortYUV* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height, bool bLuma = true, bool bChroma = true); voidcopyPartToPartLuma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t part); -voidcopyPartToPartLuma(TShortYUV* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height); +voidcopyPartToPartLuma(TShortYUV* dstPicYuv, uint32_t partIdx, uint32_t part); voidcopyPartToPartChroma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t part); -voidcopyPartToPartChroma(TShortYUV* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height); +voidcopyPartToPartChroma(TShortYUV* dstPicYuv, uint32_t partIdx, uint32_t part); voidcopyPartToPartChroma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height, uint32_t chromaId); voidcopyPartToPartChroma(TShortYUV* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height, uint32_t chromaId); diff -r 59646d515e79 -r 49a556cf2272 source/Lib/TLibEncoder/TEncSearch.cpp --- a/source/Lib/TLibEncoder/TEncSearch.cpp Mon Nov 18 20:53:29 2013 +0530 +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Mon Nov 18 21:00:45 2013 +0530 @@ -1126,12 +1126,12 @@ } //= copy reconstruction = -m_qtTempTransformSkipTComYuv.copyPartToPartLuma(m_qtTempTComYuv[qtlayer], absPartIdx, 1 trSizeLog2, 1 trSizeLog2); +int part = partitionFromSizes(1 trSizeLog2, 1 trSizeLog2); +m_qtTempTransformSkipTComYuv.copyPartToPartLuma(m_qtTempTComYuv[qtlayer], absPartIdx, part); if (!bLumaOnly !bSkipChroma) { -uint32_t trSizeCLog2 = (bChromaSame ? trSizeLog2 : trSizeLog2 - 1); - m_qtTempTransformSkipTComYuv.copyPartToPartChroma(m_qtTempTComYuv[qtlayer], absPartIdx, 1 trSizeCLog2, 1 trSizeCLog2); + m_qtTempTransformSkipTComYuv.copyPartToPartChroma(m_qtTempTComYuv[qtlayer], absPartIdx, part); } uint32_t zOrder = cu-getZorderIdxInCU() +
Re: [x265] [PATCH] b-pyramid implementation: Allow the use of B-frames as references for non B and B frames
On Nov 18, 2013, at 4:10 AM, Gopu Govindaswamy g...@multicorewareinc.com wrote: # HG changeset patch # User Gopu Govindaswamy g...@multicorewareinc.com # Date 1384769433 -19800 # Node ID 1e22b93638072ed805478d7af17f90e285fb4969 # Parent 2321ebe0bf64e5f3c0034076c7edb3ecbcd48039 b-pyramid implementation: Allow the use of B-frames as references for non B and B frames when we enable the b-pyramid the bitrates efficienctly reduced and there is not much diff in the performance and the PSNR 00. increased some of the clips and decreased some of clips Test results for reference when enable and disable the b-pyramid: cli option : -b 10 --hash=1 -f 100 --b-pyramid=1 --ref=1 --b-adapt=2 Enable B-reference : --b-pyramid=1 Disable B-reference : --b-pyramid=0 Results: Enable / Disable clip - FourPeople_1280x720_60.yuv Total time taken - 9.70s (10.31 fps) / 9.93s (10.07 fps) Bitrates - 516.30 kb/s / 544.68 kb/s PSNR - 39.725 / 39.701 clip - BasketballDrive_1920x1080_50.y4m Total time taken - 39.06s (2.51 fps) / 38.98s (2.57 fps) Bitrates - 4166.92 kb/s / 4370.43 kb/s PSNR - 37.261 / 37.268 clip - Johnny_1280x720_60.y4m Total time taken - 8.88s (11.27 fps) / 11.08s (9.03 fps) Bitrates - 304.29 kb/s / 328.84 kb/s PSNR - 40.605 / 40.551 Total time taken - 30.97s (3.23 fps) / 33.65s (2.97 fps) Bitrates - 3496.84 kb/s / 3683.93 kb/s PSNR - 35.645 / 35.660 diff -r 2321ebe0bf64 -r 1e22b9363807 source/common/common.cpp --- a/source/common/common.cppMon Nov 18 11:32:06 2013 +0530 +++ b/source/common/common.cppMon Nov 18 15:40:33 2013 +0530 @@ -54,6 +54,7 @@ static int parseCspName(const char *arg, int error); static int parseName(const char *arg, const char * const * names, int error); +static int parse_enum(const char *, const char * const * names, int *dst); using namespace x265; @@ -165,6 +166,7 @@ param-bframes = 3; param-lookaheadDepth = 40; param-bFrameAdaptive = X265_B_ADAPT_FAST; +param-bpyramid = 0; param-scenecutThreshold = 40; /* Magic number pulled in from x264*/ /* Intra Coding Tools */ @@ -532,7 +534,7 @@ } CHECK(param-bEnableWavefront 0, WaveFrontSynchro cannot be negative); - +CHECK(param-bpyramid = 2, b-pyramid is 0 or 1); return check_failed; } @@ -620,6 +622,7 @@ x265_log(param, X265_LOG_INFO, RDpenalty: %d\n, param-rdPenalty); } x265_log(param, X265_LOG_INFO, Lookahead / bframes / badapt : %d / %d / %d\n, param-lookaheadDepth, param-bframes, param-bFrameAdaptive); +x265_log(param, X265_LOG_INFO, b-pyramid / weightp / ref: %d / %d / %d\n, param-bpyramid, param-bEnableWeightedPred, param-maxNumReferences); x265_log(param, X265_LOG_INFO, tools: ); #define TOOLOPT(FLAG, STR) if (FLAG) fprintf(stderr, %s , STR) TOOLOPT(param-bEnableRectInter, rect); @@ -628,7 +631,6 @@ TOOLOPT(param-bEnableConstrainedIntra, cip); TOOLOPT(param-bEnableEarlySkip, esd); fprintf(stderr, rd=%d , param-rdLevel); -fprintf(stderr, ref=%d , param-maxNumReferences); TOOLOPT(param-bEnableLoopFilter, lft); if (param-bEnableSAO) @@ -650,7 +652,6 @@ else fprintf(stderr, tskip ); } -TOOLOPT(param-bEnableWeightedPred, weightp); TOOLOPT(param-bEnableWeightedBiPred, weightbp); TOOLOPT(param-rc.aqMode, aq); fprintf(stderr, \n); @@ -747,6 +748,15 @@ } OPT(input-csp) p-sourceCsp = ::parseCspName(value, berror); OPT(me)p-searchMethod = ::parseName(value, x265_motion_est_names, berror); +OPT(b-pyramid) +{ +berror |= parse_enum(value, x265_b_pyramid_names, p-bpyramid); don't add a new function for this, use b-pyramid = ::parseName(value, x265_b_pyramid_names); that helper function already does the atoi() fallback check +if (berror) +{ +berror = 0; +p-bpyramid = atoi(value); +} +} else return X265_PARAM_BAD_NAME; #undef OPT @@ -802,6 +812,7 @@ BOOL(p-bEnableSAO, sao); s += sprintf(s, sao-lcu-bounds=%d, p-saoLcuBoundary); s += sprintf(s, sao-lcu-opt=%d, p-saoLcuBasedOptimization); +s += sprintf(s, b-pyramid=%d, p-bpyramid); #undef BOOL return buf; @@ -843,3 +854,13 @@ error = 1; return a; } +static int parse_enum(const char *arg, const char * const * names, int *dst) +{ +for (int i = 0; names[i]; i++) +if (!strcmp(arg, names[i])) +{ +*dst = i; +return 0; +} +return -1; +} diff -r 2321ebe0bf64 -r 1e22b9363807 source/common/common.h --- a/source/common/common.h Mon Nov 18 11:32:06 2013 +0530 +++ b/source/common/common.h Mon Nov 18 15:40:33 2013 +0530 @@ -107,6 +107,7 @@ #define X265_LOG2(x) log2(x) #endif +static const char * const x265_b_pyramid_names[] = {none, normal, 0}; /* defined in common.cpp */ int64_t
[x265] [PATCH] TComYuv::copyPartToPartChroma, blockcopy_pp asm integration
# HG changeset patch # User Praveen Tiwari # Date 1384790206 -19800 # Node ID a5f618af8d963efafaa8581f4484066b13f4f614 # Parent 49a556cf22721d846a94e07c1933fcd092b898dd TComYuv::copyPartToPartChroma, blockcopy_pp asm integration diff -r 49a556cf2272 -r a5f618af8d96 source/Lib/TLibCommon/TComYuv.cpp --- a/source/Lib/TLibCommon/TComYuv.cpp Mon Nov 18 21:00:45 2013 +0530 +++ b/source/Lib/TLibCommon/TComYuv.cpp Mon Nov 18 21:26:46 2013 +0530 @@ -327,7 +327,8 @@ if (srcU == dstU) return; uint32_t srcstride = getCStride(); uint32_t dststride = dstPicYuv-getCStride(); -primitives.blockcpy_pp(width, height, dstU, dststride, srcU, srcstride); +int part = partitionFromSizes(width 1, height 1); +primitives.chroma_copy_pp[m_csp][part](dstU, dststride, srcU, srcstride); } else if (chromaId == 1) { @@ -336,7 +337,8 @@ if (srcV == dstV) return; uint32_t srcstride = getCStride(); uint32_t dststride = dstPicYuv-getCStride(); -primitives.blockcpy_pp(width, height, dstV, dststride, srcV, srcstride); +int part = partitionFromSizes(width 1, height 1); +primitives.chroma_copy_pp[m_csp][part](dstV, dststride, srcV, srcstride); } else { @@ -347,8 +349,9 @@ if (srcU == dstU srcV == dstV) return; uint32_t srcstride = getCStride(); uint32_t dststride = dstPicYuv-getCStride(); -primitives.blockcpy_pp(width, height, dstU, dststride, srcU, srcstride); -primitives.blockcpy_pp(width, height, dstV, dststride, srcV, srcstride); +int part = partitionFromSizes(width 1, height 1); +primitives.chroma_copy_pp[m_csp][part](dstU, dststride, srcU, srcstride); +primitives.chroma_copy_pp[m_csp][part](dstV, dststride, srcV, srcstride); } } ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] [PATCH] asm integration for blockcopy_ps
# HG changeset patch # User Praveen Tiwari # Date 1384791507 -19800 # Node ID 4c5daf21c1583cae93dbdf404a1b68aeced6b690 # Parent a5f618af8d963efafaa8581f4484066b13f4f614 asm integration for blockcopy_ps diff -r a5f618af8d96 -r 4c5daf21c158 source/Lib/TLibCommon/TComYuv.cpp --- a/source/Lib/TLibCommon/TComYuv.cpp Mon Nov 18 21:26:46 2013 +0530 +++ b/source/Lib/TLibCommon/TComYuv.cpp Mon Nov 18 21:48:27 2013 +0530 @@ -365,7 +365,8 @@ uint32_t srcstride = getCStride(); uint32_t dststride = dstPicYuv-m_cwidth; -primitives.blockcpy_sp(width, height, dstU, dststride, srcU, srcstride); +int part = partitionFromSizes(width 1, height 1); +primitives.chroma_copy_ps[m_csp][part](dstU, dststride, srcU, srcstride); } else if (chromaId == 1) { @@ -375,7 +376,8 @@ uint32_t srcstride = getCStride(); uint32_t dststride = dstPicYuv-m_cwidth; -primitives.blockcpy_sp(width, height, dstV, dststride, srcV, srcstride); +int part = partitionFromSizes(width 1, height 1); +primitives.chroma_copy_ps[m_csp][part](dstV, dststride, srcV, srcstride); } else { @@ -387,8 +389,9 @@ uint32_t srcstride = getCStride(); uint32_t dststride = dstPicYuv-m_cwidth; -primitives.blockcpy_sp(width, height, dstU, dststride, srcU, srcstride); -primitives.blockcpy_sp(width, height, dstV, dststride, srcV, srcstride); +int part = partitionFromSizes(width 1, height 1); +primitives.chroma_copy_ps[m_csp][part](dstU, dststride, srcU, srcstride); +primitives.chroma_copy_ps[m_csp][part](dstV, dststride, srcV, srcstride); } } ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] [PATCH] cli: add aq-strength to cli input options, add validations for aq mode
# HG changeset patch # User Aarthi Thirumalaiaar...@multicorewareinc.com # Date 1384792447 -19800 # Mon Nov 18 22:04:07 2013 +0530 # Node ID 8b9afa5556b315391df143e5fb6e8f3eedd17bc5 # Parent 2321ebe0bf64e5f3c0034076c7edb3ecbcd48039 cli: add aq-strength to cli input options, add validations for aq mode diff -r 2321ebe0bf64 -r 8b9afa5556b3 source/common/common.cpp --- a/source/common/common.cpp Mon Nov 18 11:32:06 2013 +0530 +++ b/source/common/common.cpp Mon Nov 18 22:04:07 2013 +0530 @@ -519,8 +519,10 @@ max consecutive bframe count must be 16 or smaller); CHECK(param-lookaheadDepth X265_LOOKAHEAD_MAX, Lookahead depth must be less than 256); -CHECK(param-rc.aqModeX265_AQ_NONE || param-rc.aqMode X265_AQ_VARIANCE, +CHECK(param-rc.aqMode X265_AQ_NONE || param-rc.aqMode X265_AQ_VARIANCE, Aq-Mode is out of range); +CHECK(param-rc.aqStrength 0 || param-rc.aqStrength 3, + Aq-Strength is out of range); // max CU size should be power of 2 uint32_t i = param-maxCUSize; @@ -532,6 +534,16 @@ } CHECK(param-bEnableWavefront 0, WaveFrontSynchro cannot be negative); +if (param-rc.rateControlMode == X265_RC_CQP) +{ +param-rc.aqMode = X265_AQ_NONE; +param-rc.bitrate = 0; +} +if (param-rc.aqStrength == 0) +{ +x265_log(param, X265_LOG_WARNING, Aq mode specified, but Aq strength is 0, ignored\n ); +param-rc.aqMode = 0; +} return check_failed; } @@ -652,7 +664,8 @@ } TOOLOPT(param-bEnableWeightedPred, weightp); TOOLOPT(param-bEnableWeightedBiPred, weightbp); -TOOLOPT(param-rc.aqMode, aq); +TOOLOPT(param-rc.aqMode, aq-mode); +fprintf(stderr, aq-strength=%.2f , param-rc.aqStrength); fprintf(stderr, \n); fflush(stderr); } @@ -729,6 +742,7 @@ OPT(psnr) p-bEnablePsnr = bvalue; OPT(hash) p-decodedPictureHashSEI = atoi(value); OPT(aq-mode) p-rc.aqMode = atoi(value); +OPT(aq-strength) p-rc.aqStrength = atof(value); OPT(crf) { p-rc.rfConstant = atof(value); @@ -794,6 +808,8 @@ BOOL(p-bEnableWeightedPred, weightp); s += sprintf(s, bitrate=%d, p-rc.bitrate); s += sprintf(s, qp=%d, p-rc.qp); +s += sprintf(s, aq-mode=%d, p-rc.aqMode); +s += sprintf(s, aq-strength=%.2f, p-rc.aqStrength); s += sprintf(s, cbqpoffs=%d, p-cbQpOffset); s += sprintf(s, crqpoffs=%d, p-crQpOffset); s += sprintf(s, rd=%d, p-rdLevel); diff -r 2321ebe0bf64 -r 8b9afa5556b3 source/x265.cpp --- a/source/x265.cpp Mon Nov 18 11:32:06 2013 +0530 +++ b/source/x265.cpp Mon Nov 18 22:04:07 2013 +0530 @@ -123,6 +123,7 @@ { bitrate,required_argument, NULL, 0 }, { qp, required_argument, NULL, 'q' }, { aq-mode,required_argument, NULL, 0 }, +{ aq-strength,required_argument, NULL, 0 }, { cbqpoffs, required_argument, NULL, 0 }, { crqpoffs, required_argument, NULL, 0 }, { rd, required_argument, NULL, 0 }, @@ -310,6 +311,7 @@ H0( --crf Quality-based VBR (0-51). Default %f\n, param-rc.rfConstant); H0(-q/--qp Base QP for CQP mode. Default %d\n, param-rc.qp); H0( --aq-mode Mode for Adaptive Quantization - 0:none 1:aqVariance Default %d\n, param-rc.aqMode); +H0( --aq-strength Reduces blocking and blurring in flat and textured areas.(0 to 3.0)double . Default %f\n, param-rc.aqStrength); H0( --cbqpoffsChroma Cb QP Offset. Default %d\n, param-cbQpOffset); H0( --crqpoffsChroma Cr QP Offset. Default %d\n, param-crQpOffset); H0( --rd Level of RD in mode decision 0:least2:full RDO. Default %d\n, param-rdLevel); ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH] added csp support for blpckcopy_ps
On Nov 18, 2013, at 9:23 AM, prav...@multicorewareinc.com wrote: # HG changeset patch # User Praveen Tiwari # Date 1384788209 -19800 # Node ID 59646d515e79b4d0f9a3a72c77c7af17a83bf3d9 # Parent b353d170c54f0e33a8869c413be226a48deb1f5c added csp support for blpckcopy_ps diff -r b353d170c54f -r 59646d515e79 source/common/pixel.cpp --- a/source/common/pixel.cpp Mon Nov 18 19:34:07 2013 +0530 +++ b/source/common/pixel.cpp Mon Nov 18 20:53:29 2013 +0530 @@ -837,7 +837,7 @@ #define CHROMA(W, H) \ p.chroma_copy_pp[CSP_I420][CHROMA_ ## W ## x ## H] = blockcopy_pp_cW, H; \ p.chroma_copy_sp[CHROMA_ ## W ## x ## H] = blockcopy_sp_cW, H; \ -p.chroma_copy_ps[CHROMA_ ## W ## x ## H] = blockcopy_ps_cW, H;\ +p.chroma_copy_ps[CSP_I420][CHROMA_ ## W ## x ## H] = blockcopy_ps_cW, H;\ p.chroma_sub_ps[CHROMA_ ## W ## x ## H] = pixel_sub_ps_cW, H; #define LUMA(W, H) \ diff -r b353d170c54f -r 59646d515e79 source/common/primitives.h --- a/source/common/primitives.h Mon Nov 18 19:34:07 2013 +0530 +++ b/source/common/primitives.h Mon Nov 18 20:53:29 2013 +0530 @@ -247,7 +247,7 @@ copy_sp_t luma_copy_sp[NUM_LUMA_PARTITIONS]; copy_sp_t chroma_copy_sp[NUM_CHROMA_PARTITIONS]; copy_ps_t luma_copy_ps[NUM_LUMA_PARTITIONS]; -copy_ps_t chroma_copy_ps[NUM_CHROMA_PARTITIONS]; +copy_ps_t chroma_copy_ps[NUM_CSP][NUM_CHROMA_PARTITIONS]; pixel_sub_ps_t luma_sub_ps[NUM_LUMA_PARTITIONS]; pixel_sub_ps_t chroma_sub_ps[NUM_CHROMA_PARTITIONS]; diff -r b353d170c54f -r 59646d515e79 source/common/x86/asm-primitives.cpp --- a/source/common/x86/asm-primitives.cppMon Nov 18 19:34:07 2013 +0530 +++ b/source/common/x86/asm-primitives.cppMon Nov 18 20:53:29 2013 +0530 @@ -141,7 +141,6 @@ p.chroma_hps[CHROMA_ ## W ## x ## H] = x265_interp_4tap_horiz_ps_ ## W ## x ## H ## cpu; \ p.chroma_vpp[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_pp_ ## W ## x ## H ## cpu; \ p.chroma_vps[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_ps_ ## W ## x ## H ## cpu; \ -p.chroma_copy_ps[CHROMA_ ## W ## x ## H] = x265_blockcopy_ps_ ## W ## x ## H ## cpu; \ p.chroma_sub_ps[CHROMA_ ## W ## x ## H] = x265_pixel_sub_ps_ ## W ## x ## H ## cpu; #define SETUP_CHROMA_SP_FUNC_DEF(W, H, cpu) \ @@ -380,6 +379,36 @@ SETUP_LUMA_BLOCKCOPY_FUNC_DEF(64, 16, cpu); \ SETUP_LUMA_BLOCKCOPY_FUNC_DEF(16, 64, cpu); +#define SETUP_CHROMA_FROM_LUMA_SSE4(W1, H1, W2, H2, cpu) \ +p.chroma_copy_ps[X265_CSP_I420][LUMA_ ## W1 ## x ## H1] = x265_blockcopy_ps_ ## W2 ## x ## H2 ## cpu; + +// For X265_CSP_I420 chroma width and height will be half of luma width and height +#define CHROMA_BLOCKCOPY_SSE4(cpu) \ When the macro accepts a cpu type argument, adding SSE4 to the name is redundant (and confusing) there should probably be a generic I420 macro that maps luma blocks to I420 blocks so adding more color spaces does not multiply amount of code in this file +SETUP_CHROMA_FROM_LUMA_SSE4(8, 8, 4, 4, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(8, 4, 4, 2, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(4, 8, 2, 4, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(16, 16, 8, 8, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(16, 8, 8, 4, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(8, 16, 4, 8, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(16, 12, 8, 6, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(12, 16, 6, 8, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(16, 4, 8, 2, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(4, 16, 2, 8, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(32, 32, 16, 16, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(32, 16, 16, 8, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(16, 32, 8, 16, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(32, 24, 16, 12, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(24, 32, 12, 16, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(32, 8, 16, 4, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(8, 32, 4, 16, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(64, 64, 32, 32, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(64, 32, 32, 16, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(32, 64, 16, 32, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(64, 48, 32, 24, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(48, 64, 24, 32, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(64, 16, 32, 8, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(16, 64, 8, 32, cpu); + using namespace x265; namespace { @@ -591,6 +620,7 @@ CHROMA_FILTERS(_sse4); LUMA_FILTERS(_sse4); HEVC_SATD(sse4); +CHROMA_BLOCKCOPY_SSE4(_sse4); p.chroma_copy_sp[CHROMA_2x4] = x265_blockcopy_sp_2x4_sse4; p.chroma_copy_sp[CHROMA_2x8] = x265_blockcopy_sp_2x8_sse4; p.chroma_copy_sp[CHROMA_6x8] = x265_blockcopy_sp_6x8_sse4; diff -r b353d170c54f -r 59646d515e79 source/test/pixelharness.cpp --- a/source/test/pixelharness.cppMon Nov 18 19:34:07 2013 +0530 +++ b/source/test/pixelharness.cppMon Nov 18 20:53:29 2013 +0530 @@ -763,12 +763,15 @@ } } -if (opt.chroma_copy_ps[part]) +
Re: [x265] [PATCH] added csp support for blpckcopy_ps
On Nov 18, 2013, at 9:23 AM, prav...@multicorewareinc.com wrote: # HG changeset patch # User Praveen Tiwari # Date 1384788209 -19800 # Node ID 59646d515e79b4d0f9a3a72c77c7af17a83bf3d9 # Parent b353d170c54f0e33a8869c413be226a48deb1f5c added csp support for blpckcopy_ps diff -r b353d170c54f -r 59646d515e79 source/common/pixel.cpp --- a/source/common/pixel.cpp Mon Nov 18 19:34:07 2013 +0530 +++ b/source/common/pixel.cpp Mon Nov 18 20:53:29 2013 +0530 @@ -837,7 +837,7 @@ #define CHROMA(W, H) \ p.chroma_copy_pp[CSP_I420][CHROMA_ ## W ## x ## H] = blockcopy_pp_cW, H; \ p.chroma_copy_sp[CHROMA_ ## W ## x ## H] = blockcopy_sp_cW, H; \ -p.chroma_copy_ps[CHROMA_ ## W ## x ## H] = blockcopy_ps_cW, H;\ +p.chroma_copy_ps[CSP_I420][CHROMA_ ## W ## x ## H] = blockcopy_ps_cW, H;\ p.chroma_sub_ps[CHROMA_ ## W ## x ## H] = pixel_sub_ps_cW, H; #define LUMA(W, H) \ diff -r b353d170c54f -r 59646d515e79 source/common/primitives.h --- a/source/common/primitives.h Mon Nov 18 19:34:07 2013 +0530 +++ b/source/common/primitives.h Mon Nov 18 20:53:29 2013 +0530 @@ -247,7 +247,7 @@ copy_sp_t luma_copy_sp[NUM_LUMA_PARTITIONS]; copy_sp_t chroma_copy_sp[NUM_CHROMA_PARTITIONS]; copy_ps_t luma_copy_ps[NUM_LUMA_PARTITIONS]; -copy_ps_t chroma_copy_ps[NUM_CHROMA_PARTITIONS]; +copy_ps_t chroma_copy_ps[NUM_CSP][NUM_CHROMA_PARTITIONS]; pixel_sub_ps_t luma_sub_ps[NUM_LUMA_PARTITIONS]; pixel_sub_ps_t chroma_sub_ps[NUM_CHROMA_PARTITIONS]; diff -r b353d170c54f -r 59646d515e79 source/common/x86/asm-primitives.cpp --- a/source/common/x86/asm-primitives.cppMon Nov 18 19:34:07 2013 +0530 +++ b/source/common/x86/asm-primitives.cppMon Nov 18 20:53:29 2013 +0530 @@ -141,7 +141,6 @@ p.chroma_hps[CHROMA_ ## W ## x ## H] = x265_interp_4tap_horiz_ps_ ## W ## x ## H ## cpu; \ p.chroma_vpp[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_pp_ ## W ## x ## H ## cpu; \ p.chroma_vps[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_ps_ ## W ## x ## H ## cpu; \ -p.chroma_copy_ps[CHROMA_ ## W ## x ## H] = x265_blockcopy_ps_ ## W ## x ## H ## cpu; \ p.chroma_sub_ps[CHROMA_ ## W ## x ## H] = x265_pixel_sub_ps_ ## W ## x ## H ## cpu; #define SETUP_CHROMA_SP_FUNC_DEF(W, H, cpu) \ @@ -380,6 +379,36 @@ SETUP_LUMA_BLOCKCOPY_FUNC_DEF(64, 16, cpu); \ SETUP_LUMA_BLOCKCOPY_FUNC_DEF(16, 64, cpu); +#define SETUP_CHROMA_FROM_LUMA_SSE4(W1, H1, W2, H2, cpu) \ +p.chroma_copy_ps[X265_CSP_I420][LUMA_ ## W1 ## x ## H1] = x265_blockcopy_ps_ ## W2 ## x ## H2 ## cpu; + +// For X265_CSP_I420 chroma width and height will be half of luma width and height +#define CHROMA_BLOCKCOPY_SSE4(cpu) \ When the macro accepts a cpu type argument, adding SSE4 to the name is redundant (and confusing) there should probably be a generic I420 macro that maps luma blocks to I420 blocks so adding more color spaces does not multiply amount of code in this file +SETUP_CHROMA_FROM_LUMA_SSE4(8, 8, 4, 4, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(8, 4, 4, 2, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(4, 8, 2, 4, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(16, 16, 8, 8, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(16, 8, 8, 4, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(8, 16, 4, 8, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(16, 12, 8, 6, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(12, 16, 6, 8, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(16, 4, 8, 2, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(4, 16, 2, 8, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(32, 32, 16, 16, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(32, 16, 16, 8, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(16, 32, 8, 16, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(32, 24, 16, 12, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(24, 32, 12, 16, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(32, 8, 16, 4, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(8, 32, 4, 16, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(64, 64, 32, 32, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(64, 32, 32, 16, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(32, 64, 16, 32, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(64, 48, 32, 24, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(48, 64, 24, 32, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(64, 16, 32, 8, cpu); \ +SETUP_CHROMA_FROM_LUMA_SSE4(16, 64, 8, 32, cpu); + using namespace x265; namespace { @@ -591,6 +620,7 @@ CHROMA_FILTERS(_sse4); LUMA_FILTERS(_sse4); HEVC_SATD(sse4); +CHROMA_BLOCKCOPY_SSE4(_sse4); p.chroma_copy_sp[CHROMA_2x4] = x265_blockcopy_sp_2x4_sse4; p.chroma_copy_sp[CHROMA_2x8] = x265_blockcopy_sp_2x8_sse4; p.chroma_copy_sp[CHROMA_6x8] = x265_blockcopy_sp_6x8_sse4; diff -r b353d170c54f -r 59646d515e79 source/test/pixelharness.cpp --- a/source/test/pixelharness.cppMon Nov 18 19:34:07 2013 +0530 +++ b/source/test/pixelharness.cppMon Nov 18 20:53:29 2013 +0530 @@ -763,12 +763,15 @@ } } -if
Re: [x265] [PATCH] TComYuv::copyPartToPartChroma, blockcopy_pp asm integration
On Nov 18, 2013, at 9:56 AM, prav...@multicorewareinc.com wrote: # HG changeset patch # User Praveen Tiwari # Date 1384790206 -19800 # Node ID a5f618af8d963efafaa8581f4484066b13f4f614 # Parent 49a556cf22721d846a94e07c1933fcd092b898dd TComYuv::copyPartToPartChroma, blockcopy_pp asm integration diff -r 49a556cf2272 -r a5f618af8d96 source/Lib/TLibCommon/TComYuv.cpp --- a/source/Lib/TLibCommon/TComYuv.cpp Mon Nov 18 21:00:45 2013 +0530 +++ b/source/Lib/TLibCommon/TComYuv.cpp Mon Nov 18 21:26:46 2013 +0530 @@ -327,7 +327,8 @@ if (srcU == dstU) return; uint32_t srcstride = getCStride(); uint32_t dststride = dstPicYuv-getCStride(); -primitives.blockcpy_pp(width, height, dstU, dststride, srcU, srcstride); +int part = partitionFromSizes(width 1, height 1); you can't make those kinds of assumptions about relative chroma/luma size; can you not just use m_part? +primitives.chroma_copy_pp[m_csp][part](dstU, dststride, srcU, srcstride); } else if (chromaId == 1) { @@ -336,7 +337,8 @@ if (srcV == dstV) return; uint32_t srcstride = getCStride(); uint32_t dststride = dstPicYuv-getCStride(); -primitives.blockcpy_pp(width, height, dstV, dststride, srcV, srcstride); +int part = partitionFromSizes(width 1, height 1); +primitives.chroma_copy_pp[m_csp][part](dstV, dststride, srcV, srcstride); } else { @@ -347,8 +349,9 @@ if (srcU == dstU srcV == dstV) return; uint32_t srcstride = getCStride(); uint32_t dststride = dstPicYuv-getCStride(); -primitives.blockcpy_pp(width, height, dstU, dststride, srcU, srcstride); -primitives.blockcpy_pp(width, height, dstV, dststride, srcV, srcstride); +int part = partitionFromSizes(width 1, height 1); +primitives.chroma_copy_pp[m_csp][part](dstU, dststride, srcU, srcstride); +primitives.chroma_copy_pp[m_csp][part](dstV, dststride, srcV, srcstride); } } ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel signature.asc Description: Message signed with OpenPGP using GPGMail ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH] asm integration for blockcopy_ps
On Nov 18, 2013, at 10:18 AM, prav...@multicorewareinc.com wrote: # HG changeset patch # User Praveen Tiwari # Date 1384791507 -19800 # Node ID 4c5daf21c1583cae93dbdf404a1b68aeced6b690 # Parent a5f618af8d963efafaa8581f4484066b13f4f614 asm integration for blockcopy_ps diff -r a5f618af8d96 -r 4c5daf21c158 source/Lib/TLibCommon/TComYuv.cpp --- a/source/Lib/TLibCommon/TComYuv.cpp Mon Nov 18 21:26:46 2013 +0530 +++ b/source/Lib/TLibCommon/TComYuv.cpp Mon Nov 18 21:48:27 2013 +0530 @@ -365,7 +365,8 @@ uint32_t srcstride = getCStride(); uint32_t dststride = dstPicYuv-m_cwidth; -primitives.blockcpy_sp(width, height, dstU, dststride, srcU, srcstride); +int part = partitionFromSizes(width 1, height 1); same here, you can't assume luma partitions are twice widht/height of chroma +primitives.chroma_copy_ps[m_csp][part](dstU, dststride, srcU, srcstride); } else if (chromaId == 1) { @@ -375,7 +376,8 @@ uint32_t srcstride = getCStride(); uint32_t dststride = dstPicYuv-m_cwidth; -primitives.blockcpy_sp(width, height, dstV, dststride, srcV, srcstride); +int part = partitionFromSizes(width 1, height 1); +primitives.chroma_copy_ps[m_csp][part](dstV, dststride, srcV, srcstride); } else { @@ -387,8 +389,9 @@ uint32_t srcstride = getCStride(); uint32_t dststride = dstPicYuv-m_cwidth; -primitives.blockcpy_sp(width, height, dstU, dststride, srcU, srcstride); -primitives.blockcpy_sp(width, height, dstV, dststride, srcV, srcstride); +int part = partitionFromSizes(width 1, height 1); +primitives.chroma_copy_ps[m_csp][part](dstU, dststride, srcU, srcstride); +primitives.chroma_copy_ps[m_csp][part](dstV, dststride, srcV, srcstride); } } ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel signature.asc Description: Message signed with OpenPGP using GPGMail ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH] added csp support for blpckcopy_ps
At 2013-11-19 04:42:21,Steve Borho st...@borho.org wrote: On Nov 18, 2013, at 9:23 AM, prav...@multicorewareinc.com wrote: # HG changeset patch # User Praveen Tiwari # Date 1384788209 -19800 # Node ID 59646d515e79b4d0f9a3a72c77c7af17a83bf3d9 # Parent b353d170c54f0e33a8869c413be226a48deb1f5c added csp support for blpckcopy_ps diff -r b353d170c54f -r 59646d515e79 source/common/pixel.cpp --- a/source/common/pixel.cpp Mon Nov 18 19:34:07 2013 +0530 +++ b/source/common/pixel.cpp Mon Nov 18 20:53:29 2013 +0530 @@ -837,7 +837,7 @@ #define CHROMA(W, H) \ p.chroma_copy_pp[CSP_I420][CHROMA_ ## W ## x ## H] = blockcopy_pp_cW, H; \ p.chroma_copy_sp[CHROMA_ ## W ## x ## H] = blockcopy_sp_cW, H; \ -p.chroma_copy_ps[CHROMA_ ## W ## x ## H] = blockcopy_ps_cW, H;\ +p.chroma_copy_ps[CSP_I420][CHROMA_ ## W ## x ## H] = blockcopy_ps_cW, H;\ p.chroma_sub_ps[CHROMA_ ## W ## x ## H] = pixel_sub_ps_cW, H; #define LUMA(W, H) \ diff -r b353d170c54f -r 59646d515e79 source/common/primitives.h --- a/source/common/primitives.h Mon Nov 18 19:34:07 2013 +0530 +++ b/source/common/primitives.h Mon Nov 18 20:53:29 2013 +0530 @@ -247,7 +247,7 @@ copy_sp_t luma_copy_sp[NUM_LUMA_PARTITIONS]; copy_sp_t chroma_copy_sp[NUM_CHROMA_PARTITIONS]; copy_ps_t luma_copy_ps[NUM_LUMA_PARTITIONS]; -copy_ps_t chroma_copy_ps[NUM_CHROMA_PARTITIONS]; +copy_ps_t chroma_copy_ps[NUM_CSP][NUM_CHROMA_PARTITIONS]; pixel_sub_ps_t luma_sub_ps[NUM_LUMA_PARTITIONS]; pixel_sub_ps_t chroma_sub_ps[NUM_CHROMA_PARTITIONS]; diff -r b353d170c54f -r 59646d515e79 source/common/x86/asm-primitives.cpp --- a/source/common/x86/asm-primitives.cpp Mon Nov 18 19:34:07 2013 +0530 +++ b/source/common/x86/asm-primitives.cpp Mon Nov 18 20:53:29 2013 +0530 @@ -141,7 +141,6 @@ p.chroma_hps[CHROMA_ ## W ## x ## H] = x265_interp_4tap_horiz_ps_ ## W ## x ## H ## cpu; \ p.chroma_vpp[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_pp_ ## W ## x ## H ## cpu; \ p.chroma_vps[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_ps_ ## W ## x ## H ## cpu; \ -p.chroma_copy_ps[CHROMA_ ## W ## x ## H] = x265_blockcopy_ps_ ## W ## x ## H ## cpu; \ p.chroma_sub_ps[CHROMA_ ## W ## x ## H] = x265_pixel_sub_ps_ ## W ## x ## H ## cpu; #define SETUP_CHROMA_SP_FUNC_DEF(W, H, cpu) \ @@ -380,6 +379,36 @@ SETUP_LUMA_BLOCKCOPY_FUNC_DEF(64, 16, cpu); \ SETUP_LUMA_BLOCKCOPY_FUNC_DEF(16, 64, cpu); +#define SETUP_CHROMA_FROM_LUMA_SSE4(W1, H1, W2, H2, cpu) \ +p.chroma_copy_ps[X265_CSP_I420][LUMA_ ## W1 ## x ## H1] = x265_blockcopy_ps_ ## W2 ## x ## H2 ## cpu; + +// For X265_CSP_I420 chroma width and height will be half of luma width and height +#define CHROMA_BLOCKCOPY_SSE4(cpu) \ When the macro accepts a cpu type argument, adding SSE4 to the name is redundant (and confusing) there should probably be a generic I420 macro that maps luma blocks to I420 blocks so adding more color spaces does not multiply amount of code in this file Steve give us a good idea, we may modify macro to SETUP_CHROMA_FROM_LUMA(W1, H1, W2, H2, cpu, csp)___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] [PATCH Review only] asm: code for transpose_8x8 routine
# HG changeset patch # User Murugan Vairavel muru...@multicorewareinc.com # Date 1384842189 -19800 # Tue Nov 19 11:53:09 2013 +0530 # Node ID 3a94cc365533bf7def255dc5b28e6a6a1d1bfa50 # Parent f6a050b79cfa400aa432f49ee8a4c2b9f20cf930 asm: code for transpose_8x8 routine diff -r f6a050b79cfa -r 3a94cc365533 source/common/x86/asm-primitives.cpp --- a/source/common/x86/asm-primitives.cpp Tue Nov 19 11:25:00 2013 +0530 +++ b/source/common/x86/asm-primitives.cpp Tue Nov 19 11:53:09 2013 +0530 @@ -546,6 +546,7 @@ p.calcresidual[BLOCK_4x4] = x265_getResidual4_sse2; p.calcresidual[BLOCK_8x8] = x265_getResidual8_sse2; p.transpose[BLOCK_4x4] = x265_transpose4_sse2; +p.transpose[BLOCK_8x8] = x265_transpose8_sse2; } if (cpuMask X265_CPU_SSSE3) { diff -r f6a050b79cfa -r 3a94cc365533 source/common/x86/pixel-a.asm --- a/source/common/x86/pixel-a.asm Tue Nov 19 11:25:00 2013 +0530 +++ b/source/common/x86/pixel-a.asm Tue Nov 19 11:53:09 2013 +0530 @@ -8359,3 +8359,45 @@ movu [r0],m0 RET + +;- +; void transpose_8x8(pixel *dst, pixel *src, intptr_t stride) +;- +INIT_XMM sse2 +cglobal transpose8, 3, 3, 8, dest, src, stride + +movh m0,[r1] +movh m1,[r1 + r2] +movh m2,[r1 + 2 * r2] +lea r1,[r1 + 2 * r2] +movh m3,[r1 + r2] +movh m4,[r1 + 2 * r2] +lea r1,[r1 + 2 * r2] +movh m5,[r1 + r2] +movh m6,[r1 + 2 * r2] +lea r1,[r1 + 2 * r2] +movh m7,[r1 + r2] + +punpcklbwm0,m1 +punpcklbwm2,m3 +punpcklbwm4,m5 +punpcklbwm6,m7 +movu m1,m0 +punpcklwdm0,m2 +punpckhwdm1,m2 +movu m5,m4 +punpcklwdm4,m6 +punpckhwdm5,m6 +movu m2,m0 +punpckldqm0,m4 +punpckhdqm2,m4 +movu m3,m1 +punpckldqm1,m5 +punpckhdqm3,m5 + +movu [r0], m0 +movu [r0 + 16],m2 +movu [r0 + 32],m1 +movu [r0 + 48],m3 + +RET diff -r f6a050b79cfa -r 3a94cc365533 source/common/x86/pixel.h --- a/source/common/x86/pixel.h Tue Nov 19 11:25:00 2013 +0530 +++ b/source/common/x86/pixel.h Tue Nov 19 11:53:09 2013 +0530 @@ -366,5 +366,6 @@ void x265_getResidual16_sse4(pixel *fenc, pixel *pred, int16_t *residual, intptr_t stride); void x265_getResidual32_sse4(pixel *fenc, pixel *pred, int16_t *residual, intptr_t stride); void x265_transpose4_sse2(pixel *dest, pixel *src, intptr_t stride); +void x265_transpose8_sse2(pixel *dest, pixel *src, intptr_t stride); #endif // ifndef X265_I386_PIXEL_H ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] [PATCH] b-pyramid implementation: Allow the use of B-frames as references for non B and B frames
# HG changeset patch # User Gopu Govindaswamy g...@multicorewareinc.com # Date 1384842731 -19800 # Node ID c386acea7ba1ca48f32060f265586618ee744a9f # Parent 2f5f538d2cbca3b46e8d27d860e9787cc19f406f b-pyramid implementation: Allow the use of B-frames as references for non B and B frames when we enable the b-pyramid the bitrates efficienctly reduced and there is not much diff in the performance and the PSNR 00. increased some of the clips and decreased some of clips Test results for reference when enable and disable the b-pyramid: cli option : -b 10 --hash=1 -f 100 --b-pyramid=1 --b-adapt=2 Enable B-reference : --b-pyramid=1 Disable B-reference : --b-pyramid=0 Results: Enable / Disable Clips - Johnny_1280x720_60.y4m Total time taken - 11.19s (8.94 fps) / 13.44s (7.44 fps) Bitrates - 303.52 kb/s / 326.79 kb/s PSNR - 40.679 / 40.612 Clips - Cactus_1920x1080_50.y4m Total Time taken - 44.61s (2.24 fps) / 48.23s (2.07 fps) Bitrates - 3420.80 kb/s / 3575.20 kb/s PSNR - 35.709 / 35.726 Clips - BasketballDrive_1920x1080_50.y4m Total time taken - 54.15s (1.85 fps) / 53.72s (1.86 fps) Bitrates - 4114.07 kb/s / 4310.45 kb/s PSNR - 37.283 / 37.290 Clips - FourPeople_1280x720_60 Total time taken - 11.79s (8.48 fps) / 12.16s (8.23 fps) Bitrates - 514.90 kb/s / 539.08 kb/s PSNR - 39.782 / 39.757 diff -r 2f5f538d2cbc -r c386acea7ba1 source/common/common.cpp --- a/source/common/common.cpp Mon Nov 18 16:44:31 2013 -0600 +++ b/source/common/common.cpp Tue Nov 19 12:02:11 2013 +0530 @@ -165,6 +165,7 @@ param-bframes = 3; param-lookaheadDepth = 40; param-bFrameAdaptive = X265_B_ADAPT_FAST; +param-bpyramid = 0; param-scenecutThreshold = 40; /* Magic number pulled in from x264*/ /* Intra Coding Tools */ @@ -634,6 +635,7 @@ x265_log(param, X265_LOG_INFO, RDpenalty: %d\n, param-rdPenalty); } x265_log(param, X265_LOG_INFO, Lookahead / bframes / badapt : %d / %d / %d\n, param-lookaheadDepth, param-bframes, param-bFrameAdaptive); +x265_log(param, X265_LOG_INFO, b-pyramid / weightp / ref: %d / %d / %d\n, param-bpyramid, param-bEnableWeightedPred, param-maxNumReferences); x265_log(param, X265_LOG_INFO, tools: ); #define TOOLOPT(FLAG, STR) if (FLAG) fprintf(stderr, %s , STR) TOOLOPT(param-bEnableRectInter, rect); @@ -642,7 +644,6 @@ TOOLOPT(param-bEnableConstrainedIntra, cip); TOOLOPT(param-bEnableEarlySkip, esd); fprintf(stderr, rd=%d , param-rdLevel); -fprintf(stderr, ref=%d , param-maxNumReferences); TOOLOPT(param-bEnableLoopFilter, lft); if (param-bEnableSAO) @@ -664,7 +665,6 @@ else fprintf(stderr, tskip ); } -TOOLOPT(param-bEnableWeightedPred, weightp); TOOLOPT(param-bEnableWeightedBiPred, weightbp); TOOLOPT(param-rc.aqMode, aq-mode); if (param-rc.aqMode) @@ -764,6 +764,7 @@ } OPT(input-csp) p-sourceCsp = ::parseCspName(value, berror); OPT(me)p-searchMethod = ::parseName(value, x265_motion_est_names, berror); +OPT(b-pyramid) p-bpyramid = ::parseName(value, x265_b_pyramid_names, berror); else return X265_PARAM_BAD_NAME; #undef OPT @@ -821,6 +822,7 @@ BOOL(p-bEnableSAO, sao); s += sprintf(s, sao-lcu-bounds=%d, p-saoLcuBoundary); s += sprintf(s, sao-lcu-opt=%d, p-saoLcuBasedOptimization); +s += sprintf(s, b-pyramid=%d, p-bpyramid); #undef BOOL return buf; diff -r 2f5f538d2cbc -r c386acea7ba1 source/common/common.h --- a/source/common/common.hMon Nov 18 16:44:31 2013 -0600 +++ b/source/common/common.hTue Nov 19 12:02:11 2013 +0530 @@ -107,6 +107,7 @@ #define X265_LOG2(x) log2(x) #endif +static const char * const x265_b_pyramid_names[] = {none, normal, 0}; /* defined in common.cpp */ int64_t x265_mdate(void); void x265_log(x265_param *param, int level, const char *fmt, ...); diff -r 2f5f538d2cbc -r c386acea7ba1 source/encoder/dpb.cpp --- a/source/encoder/dpb.cppMon Nov 18 16:44:31 2013 -0600 +++ b/source/encoder/dpb.cppTue Nov 19 12:02:11 2013 +0530 @@ -78,7 +78,17 @@ m_lastIDR = pocCurr; } slice-setLastIDR(m_lastIDR); -slice-setReferenced(slice-getSliceType() != B_SLICE); + +if (slice-getSliceType() != B_SLICE) +slice-setReferenced(true); +else +{ +if (pic-m_lowres.sliceType == X265_TYPE_BREF) +slice-setReferenced(true); +else +slice-setReferenced(false); +} + slice-setTemporalLayerNonReferenceFlag(!slice-isReferenced()); // Set the nal unit type slice-setNalUnitType(getNalUnitType(pocCurr, m_lastIDR, pic)); diff -r 2f5f538d2cbc -r c386acea7ba1 source/encoder/encoder.cpp --- a/source/encoder/encoder.cppMon Nov 18 16:44:31 2013 -0600 +++ b/source/encoder/encoder.cppTue Nov 19 12:02:11 2013 +0530 @@ -1180,6 +1180,11 @@ { _param-bEnableRDOQTS = 0; } +if (_param-bpyramid !_param-bframes) +{
[x265] [PATCH 1 of 9] api: remove reserved NAL enums and C++ style comments from public header
# HG changeset patch # User Steve Borho st...@borho.org # Date 1384824695 21600 # Mon Nov 18 19:31:35 2013 -0600 # Node ID 85f3089367c11655b4e23c8a2dc6232a6d4934ed # Parent 2f5f538d2cbca3b46e8d27d860e9787cc19f406f api: remove reserved NAL enums and C++ style comments from public header diff -r 2f5f538d2cbc -r 85f3089367c1 source/x265.h --- a/source/x265.h Mon Nov 18 16:44:31 2013 -0600 +++ b/source/x265.h Mon Nov 18 19:31:35 2013 -0600 @@ -35,84 +35,34 @@ * opaque handler for encoder */ typedef struct x265_encoder x265_encoder; -// TODO: Existing names used for the different NAL unit types can be altered to better reflect the names in the spec. -// However, the names in the spec are not yet stable at this point. Once the names are stable, a cleanup -// effort can be done without use of macros to alter the names used to indicate the different NAL unit types. typedef enum { -NAL_UNIT_CODED_SLICE_TRAIL_N = 0, // 0 -NAL_UNIT_CODED_SLICE_TRAIL_R, // 1 - -NAL_UNIT_CODED_SLICE_TSA_N, // 2 -NAL_UNIT_CODED_SLICE_TLA_R, // 3 - -NAL_UNIT_CODED_SLICE_STSA_N,// 4 -NAL_UNIT_CODED_SLICE_STSA_R,// 5 - -NAL_UNIT_CODED_SLICE_RADL_N,// 6 -NAL_UNIT_CODED_SLICE_RADL_R,// 7 - -NAL_UNIT_CODED_SLICE_RASL_N,// 8 -NAL_UNIT_CODED_SLICE_RASL_R,// 9 - -NAL_UNIT_RESERVED_VCL_N10, -NAL_UNIT_RESERVED_VCL_R11, -NAL_UNIT_RESERVED_VCL_N12, -NAL_UNIT_RESERVED_VCL_R13, -NAL_UNIT_RESERVED_VCL_N14, -NAL_UNIT_RESERVED_VCL_R15, - -NAL_UNIT_CODED_SLICE_BLA_W_LP, // 16 -NAL_UNIT_CODED_SLICE_BLA_W_RADL, // 17 -NAL_UNIT_CODED_SLICE_BLA_N_LP, // 18 -NAL_UNIT_CODED_SLICE_IDR_W_RADL, // 19 -NAL_UNIT_CODED_SLICE_IDR_N_LP, // 20 -NAL_UNIT_CODED_SLICE_CRA, // 21 -NAL_UNIT_RESERVED_IRAP_VCL22, -NAL_UNIT_RESERVED_IRAP_VCL23, - -NAL_UNIT_RESERVED_VCL24, -NAL_UNIT_RESERVED_VCL25, -NAL_UNIT_RESERVED_VCL26, -NAL_UNIT_RESERVED_VCL27, -NAL_UNIT_RESERVED_VCL28, -NAL_UNIT_RESERVED_VCL29, -NAL_UNIT_RESERVED_VCL30, -NAL_UNIT_RESERVED_VCL31, - -NAL_UNIT_VPS, // 32 -NAL_UNIT_SPS, // 33 -NAL_UNIT_PPS, // 34 -NAL_UNIT_ACCESS_UNIT_DELIMITER, // 35 -NAL_UNIT_EOS, // 36 -NAL_UNIT_EOB, // 37 -NAL_UNIT_FILLER_DATA, // 38 -NAL_UNIT_PREFIX_SEI,// 39 -NAL_UNIT_SUFFIX_SEI,// 40 -NAL_UNIT_RESERVED_NVCL41, -NAL_UNIT_RESERVED_NVCL42, -NAL_UNIT_RESERVED_NVCL43, -NAL_UNIT_RESERVED_NVCL44, -NAL_UNIT_RESERVED_NVCL45, -NAL_UNIT_RESERVED_NVCL46, -NAL_UNIT_RESERVED_NVCL47, -NAL_UNIT_UNSPECIFIED_48, -NAL_UNIT_UNSPECIFIED_49, -NAL_UNIT_UNSPECIFIED_50, -NAL_UNIT_UNSPECIFIED_51, -NAL_UNIT_UNSPECIFIED_52, -NAL_UNIT_UNSPECIFIED_53, -NAL_UNIT_UNSPECIFIED_54, -NAL_UNIT_UNSPECIFIED_55, -NAL_UNIT_UNSPECIFIED_56, -NAL_UNIT_UNSPECIFIED_57, -NAL_UNIT_UNSPECIFIED_58, -NAL_UNIT_UNSPECIFIED_59, -NAL_UNIT_UNSPECIFIED_60, -NAL_UNIT_UNSPECIFIED_61, -NAL_UNIT_UNSPECIFIED_62, -NAL_UNIT_UNSPECIFIED_63, -NAL_UNIT_INVALID, +NAL_UNIT_CODED_SLICE_TRAIL_N = 0, +NAL_UNIT_CODED_SLICE_TRAIL_R, +NAL_UNIT_CODED_SLICE_TSA_N, +NAL_UNIT_CODED_SLICE_TLA_R, +NAL_UNIT_CODED_SLICE_STSA_N, +NAL_UNIT_CODED_SLICE_STSA_R, +NAL_UNIT_CODED_SLICE_RADL_N, +NAL_UNIT_CODED_SLICE_RADL_R, +NAL_UNIT_CODED_SLICE_RASL_N, +NAL_UNIT_CODED_SLICE_RASL_R, +NAL_UNIT_CODED_SLICE_BLA_W_LP = 16, +NAL_UNIT_CODED_SLICE_BLA_W_RADL, +NAL_UNIT_CODED_SLICE_BLA_N_LP, +NAL_UNIT_CODED_SLICE_IDR_W_RADL, +NAL_UNIT_CODED_SLICE_IDR_N_LP, +NAL_UNIT_CODED_SLICE_CRA, +NAL_UNIT_VPS = 32, +NAL_UNIT_SPS, +NAL_UNIT_PPS, +NAL_UNIT_ACCESS_UNIT_DELIMITER, +NAL_UNIT_EOS, +NAL_UNIT_EOB, +NAL_UNIT_FILLER_DATA, +NAL_UNIT_PREFIX_SEI, +NAL_UNIT_SUFFIX_SEI, +NAL_UNIT_INVALID = 64, } NalUnitType; /* The data within the payload is already NAL-encapsulated; the type ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] [PATCH 4 of 9] api: make x265_encoder_get_stats() somewhat future proof
# HG changeset patch # User Steve Borho st...@borho.org # Date 1384825870 21600 # Mon Nov 18 19:51:10 2013 -0600 # Node ID c56f65c702978bf47b256528b503dd62602696dd # Parent ba9cb99c569329d13c66fd519f3f5ce8931c535c api: make x265_encoder_get_stats() somewhat future proof By passing in the size of x265_stats as the user application knows about the encoder can know not to try to set new fields that were added to the end of x265_stats. This requires some discipline on our part to only append to the structure and to always check the size for any new fields we might add. diff -r ba9cb99c5693 -r c56f65c70297 source/encoder/encoder.cpp --- a/source/encoder/encoder.cppMon Nov 18 19:37:45 2013 -0600 +++ b/source/encoder/encoder.cppMon Nov 18 19:51:10 2013 -0600 @@ -478,29 +478,35 @@ } } -void Encoder::fetchStats(x265_stats *stats) +void Encoder::fetchStats(x265_stats *stats, size_t statsSizeBytes) { -stats-globalPsnrY = m_analyzeAll.m_psnrSumY; -stats-globalPsnrU = m_analyzeAll.m_psnrSumU; -stats-globalPsnrV = m_analyzeAll.m_psnrSumV; -stats-encodedPictureCount = m_analyzeAll.m_numPics; -stats-totalWPFrames = m_numWPFrames; -stats-accBits = m_analyzeAll.m_accBits; -stats-elapsedEncodeTime = (double)(x265_mdate() - m_encodeStartTime) / 100; -if (stats-encodedPictureCount 0) +if (statsSizeBytes = sizeof(stats)) { -stats-globalSsim = m_analyzeAll.m_globalSsim / stats-encodedPictureCount; -stats-globalPsnr = (stats-globalPsnrY * 6 + stats-globalPsnrU + stats-globalPsnrV) / (8 * stats-encodedPictureCount); -stats-elapsedVideoTime = (double)stats-encodedPictureCount / param.frameRate; -stats-bitrate = (0.001f * stats-accBits) / stats-elapsedVideoTime; +stats-globalPsnrY = m_analyzeAll.m_psnrSumY; +stats-globalPsnrU = m_analyzeAll.m_psnrSumU; +stats-globalPsnrV = m_analyzeAll.m_psnrSumV; +stats-encodedPictureCount = m_analyzeAll.m_numPics; +stats-totalWPFrames = m_numWPFrames; +stats-accBits = m_analyzeAll.m_accBits; +stats-elapsedEncodeTime = (double)(x265_mdate() - m_encodeStartTime) / 100; +if (stats-encodedPictureCount 0) +{ +stats-globalSsim = m_analyzeAll.m_globalSsim / stats-encodedPictureCount; +stats-globalPsnr = (stats-globalPsnrY * 6 + stats-globalPsnrU + stats-globalPsnrV) / (8 * stats-encodedPictureCount); +stats-elapsedVideoTime = (double)stats-encodedPictureCount / param.frameRate; +stats-bitrate = (0.001f * stats-accBits) / stats-elapsedVideoTime; +} +else +{ +stats-globalSsim = 0; +stats-globalPsnr = 0; +stats-bitrate = 0; +stats-elapsedVideoTime = 0; +} } -else -{ -stats-globalSsim = 0; -stats-globalPsnr = 0; -stats-bitrate = 0; -stats-elapsedVideoTime = 0; -} +/* If new statistics are added to x265_stats, we must check here whether the + * structure provided by the user is the new structure or an older one (for + * future safety) */ } void Encoder::writeLog(int argc, char **argv) @@ -524,7 +530,7 @@ fprintf(m_csvfpt, , %s, , buffer); x265_stats stats; -fetchStats(stats); +fetchStats(stats, sizeof(stats)); // elapsed time, fps, bitrate fprintf(m_csvfpt, %.2f, %.2f, %.2f,, @@ -1484,11 +1490,11 @@ EXTERN_CYCLE_COUNTER(ME); extern C -void x265_encoder_get_stats(x265_encoder *enc, x265_stats *outputStats) +void x265_encoder_get_stats(x265_encoder *enc, x265_stats *outputStats, uint32_t statsSizeBytes) { Encoder *encoder = static_castEncoder*(enc); -encoder-fetchStats(outputStats); +encoder-fetchStats(outputStats, statsSizeBytes); } extern C diff -r ba9cb99c5693 -r c56f65c70297 source/encoder/encoder.h --- a/source/encoder/encoder.h Mon Nov 18 19:37:45 2013 -0600 +++ b/source/encoder/encoder.h Mon Nov 18 19:51:10 2013 -0600 @@ -116,7 +116,7 @@ int getStreamHeaders(NALUnitEBSP **nalunits); -void fetchStats(x265_stats* stats); +void fetchStats(x265_stats* stats, size_t statsSizeBytes); void writeLog(int argc, char **argv); diff -r ba9cb99c5693 -r c56f65c70297 source/x265.cpp --- a/source/x265.cpp Mon Nov 18 19:37:45 2013 -0600 +++ b/source/x265.cpp Mon Nov 18 19:51:10 2013 -0600 @@ -653,7 +653,7 @@ if (cliopt.bProgress) fprintf(stderr, \r); -x265_encoder_get_stats(encoder, stats); +x265_encoder_get_stats(encoder, stats, sizeof(stats)); if (param.csvfn !b_ctrl_c) x265_encoder_log(encoder, argc, argv); x265_encoder_close(encoder); diff -r ba9cb99c5693 -r c56f65c70297 source/x265.h --- a/source/x265.h Mon Nov 18 19:37:45 2013 -0600 +++ b/source/x265.h Mon Nov 18 19:51:10 2013 -0600 @@
[x265] [PATCH 3 of 9] api: remove old suffix and prefix from C symbols in comment, reflow
# HG changeset patch # User Steve Borho st...@borho.org # Date 1384825065 21600 # Mon Nov 18 19:37:45 2013 -0600 # Node ID ba9cb99c569329d13c66fd519f3f5ce8931c535c # Parent 08130599663661b31deeb7bf6771c55c1d7a3027 api: remove old suffix and prefix from C symbols in comment, reflow diff -r 081305996636 -r ba9cb99c5693 source/x265.h --- a/source/x265.h Mon Nov 18 19:35:53 2013 -0600 +++ b/source/x265.h Mon Nov 18 19:37:45 2013 -0600 @@ -65,11 +65,11 @@ NAL_UNIT_INVALID = 64, } NalUnitType; -/* The data within the payload is already NAL-encapsulated; the type - * is merely in the struct for easy access by the calling application. - * All data returned in an x265_nal_t, including the data in p_payload, is no longer - * valid after the next call to x265_encoder_encode. Thus it must be used or copied - * before calling x265_encoder_encode again. */ +/* The data within the payload is already NAL-encapsulated; the type is merely + * in the struct for easy access by the calling application. All data returned + * in an x265_nal, including the data in payload, is no longer valid after the + * next call to x265_encoder_encode. Thus it must be used or copied before + * calling x265_encoder_encode again. */ typedef struct x265_nal { uint32_t type;/* NalUnitType */ ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] [PATCH 0 of 9] API improvements and cleanups
Go through and enhance comments and fix some broken ones. Simplify the API in a few places, remove hungarian remnants, and future-proof one of the pulbic methods. ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] [PATCH 9 of 9] common: nit
# HG changeset patch # User Steve Borho st...@borho.org # Date 1384846824 21600 # Tue Nov 19 01:40:24 2013 -0600 # Node ID f1f0a389a58185e98476bb9e8496735002bfe2a7 # Parent baee128fdb029ff3379a3dc7b2574b3d52a6e264 common: nit diff -r baee128fdb02 -r f1f0a389a581 source/common/common.cpp --- a/source/common/common.cpp Tue Nov 19 01:40:15 2013 -0600 +++ b/source/common/common.cpp Tue Nov 19 01:40:24 2013 -0600 @@ -165,7 +165,7 @@ param-bframes = 3; param-lookaheadDepth = 40; param-bFrameAdaptive = X265_B_ADAPT_FAST; -param-scenecutThreshold = 40; /* Magic number pulled in from x264*/ +param-scenecutThreshold = 40; /* Magic number pulled in from x264 */ /* Intra Coding Tools */ param-bEnableConstrainedIntra = 0; ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] [PATCH 2 of 9] api: remove hungarian prefixes from x265_nal members
# HG changeset patch # User Steve Borho st...@borho.org # Date 1384824953 21600 # Mon Nov 18 19:35:53 2013 -0600 # Node ID 08130599663661b31deeb7bf6771c55c1d7a3027 # Parent 85f3089367c11655b4e23c8a2dc6232a6d4934ed api: remove hungarian prefixes from x265_nal members These particular prefixes came from x264 originally diff -r 85f3089367c1 -r 081305996636 source/encoder/encoder.cpp --- a/source/encoder/encoder.cppMon Nov 18 19:31:35 2013 -0600 +++ b/source/encoder/encoder.cppMon Nov 18 19:35:53 2013 -0600 @@ -1375,15 +1375,15 @@ size += nalSize; memsize += nalSize; -m_nals[nalcount].i_type = nalu.m_nalUnitType; -m_nals[nalcount].i_payload = size; +m_nals[nalcount].type = nalu.m_nalUnitType; +m_nals[nalcount].sizeBytes = size; } /* Setup payload pointers, now that we're done adding content to m_packetData */ for (int i = 0; i nalcount; i++) { -m_nals[i].p_payload = (uint8_t*)m_packetData + offset; -offset += m_nals[i].i_payload; +m_nals[i].payload = (uint8_t*)m_packetData + offset; +offset += m_nals[i].sizeBytes; } fail: diff -r 85f3089367c1 -r 081305996636 source/x265.cpp --- a/source/x265.cpp Mon Nov 18 19:31:35 2013 -0600 +++ b/source/x265.cpp Mon Nov 18 19:35:53 2013 -0600 @@ -205,8 +205,8 @@ PPAScopeEvent(bitstream_write); for (uint32_t i = 0; i nalcount; i++) { -bitstreamFile.write((const char*)nal-p_payload, nal-i_payload); -totalbytes += nal-i_payload; +bitstreamFile.write((const char*)nal-payload, nal-sizeBytes); +totalbytes += nal-sizeBytes; nal++; } } diff -r 85f3089367c1 -r 081305996636 source/x265.h --- a/source/x265.h Mon Nov 18 19:31:35 2013 -0600 +++ b/source/x265.h Mon Nov 18 19:35:53 2013 -0600 @@ -72,9 +72,9 @@ * before calling x265_encoder_encode again. */ typedef struct x265_nal { -uint32_t i_type; /* NalUnitType */ -uint32_t i_payload; /* size in bytes */ -uint8_t* p_payload; +uint32_t type;/* NalUnitType */ +uint32_t sizeBytes; /* size in bytes */ +uint8_t* payload; } x265_nal; typedef struct x265_picture ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] [PATCH 5 of 9] api: nit
# HG changeset patch # User Steve Borho st...@borho.org # Date 1384825878 21600 # Mon Nov 18 19:51:18 2013 -0600 # Node ID d8766641126dc98b6005076c7b489c4cc5906591 # Parent c56f65c702978bf47b256528b503dd62602696dd api: nit diff -r c56f65c70297 -r d8766641126d source/x265.h --- a/source/x265.h Mon Nov 18 19:51:10 2013 -0600 +++ b/source/x265.h Mon Nov 18 19:51:18 2013 -0600 @@ -215,7 +215,7 @@ /*Level of Rate Distortion Optimization Allowed */ typedef enum { -X265_NO_RDO_NO_RDOQ, /* Partial RDO during mode decision (only at each depth/mode), no RDO in quantization*/ +X265_NO_RDO_NO_RDOQ, /* Partial RDO during mode decision (only at each depth/mode), no RDO in quantization */ X265_NO_RDO, /* Partial RDO during mode decision (only at each depth/mode), quantization RDO enabled */ X265_FULL_RDO/* Full RD-based mode decision */ } X265_RDO_LEVEL; ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel