# HG changeset patch # User Praveen Tiwari # Date 1409661936 -19800 # Node ID 40e242e316b962116d64fb43444029c5c6546484 # Parent e1b2ab942177bc9f67547a61c365c6167b5cee38 quant_c optimization, downscaling qCoef from int32_t* to int16_t*
diff -r e1b2ab942177 -r 40e242e316b9 source/common/dct.cpp --- a/source/common/dct.cpp Tue Sep 02 17:52:33 2014 +0530 +++ b/source/common/dct.cpp Tue Sep 02 18:15:36 2014 +0530 @@ -769,7 +769,7 @@ } } -uint32_t quant_c(int32_t* coef, int32_t* quantCoeff, int32_t* deltaU, int32_t* qCoef, int qBits, int add, int numCoeff) +uint32_t quant_c(int32_t* coef, int32_t* quantCoeff, int32_t* deltaU, int16_t* qCoef, int qBits, int add, int numCoeff) { int qBits8 = qBits - 8; uint32_t numSig = 0; @@ -785,7 +785,7 @@ if (level) ++numSig; level *= sign; - qCoef[blockpos] = Clip3(-32768, 32767, level); + qCoef[blockpos] = (int16_t)Clip3(-32768, 32767, level); } return numSig; diff -r e1b2ab942177 -r 40e242e316b9 source/common/primitives.h --- a/source/common/primitives.h Tue Sep 02 17:52:33 2014 +0530 +++ b/source/common/primitives.h Tue Sep 02 18:15:36 2014 +0530 @@ -164,7 +164,7 @@ typedef void (*calcresidual_t)(pixel *fenc, pixel *pred, int16_t *residual, intptr_t stride); typedef void (*calcrecon_t)(pixel* pred, int16_t* residual, int16_t* reconqt, pixel *reconipred, int stride, int strideqt, int strideipred); typedef void (*transpose_t)(pixel* dst, pixel* src, intptr_t stride); -typedef uint32_t (*quant_t)(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int32_t *qCoef, int qBits, int add, int numCoeff); +typedef uint32_t (*quant_t)(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int16_t *qCoef, int qBits, int add, int numCoeff); typedef uint32_t (*nquant_t)(int32_t *coef, int32_t *quantCoeff, int16_t *qCoef, int qBits, int add, int numCoeff); typedef void (*dequant_scaling_t)(const int16_t* src, const int32_t *dequantCoef, int32_t* dst, int num, int mcqp_miper, int shift); typedef void (*dequant_normal_t)(const int16_t* quantCoef, int32_t* coef, int num, int scale, int shift); diff -r e1b2ab942177 -r 40e242e316b9 source/common/quant.cpp --- a/source/common/quant.cpp Tue Sep 02 17:52:33 2014 +0530 +++ b/source/common/quant.cpp Tue Sep 02 18:15:36 2014 +0530 @@ -409,7 +409,21 @@ int add = (cu->m_slice->m_sliceType == I_SLICE ? 171 : 85) << (qbits - 9); int numCoeff = 1 << log2TrSize * 2; - uint32_t numSig = primitives.quant(m_resiDctCoeff, quantCoeff, deltaU, coeff, qbits, add, numCoeff); + /* This section of code is to safely convert int32_t coefficients to int16_t, once the caller function is + * optimize to take coefficients as int16_t*, it will be cleanse.*/ + ALIGN_VAR_16(int16_t, qCoeff[32 * 32]); + for (int i = 0; i < numCoeff; i++) + { + qCoeff[i] = (int16_t)Clip3(-32768, 32767, coeff[i]); + } + uint32_t numSig = primitives.quant(m_resiDctCoeff, quantCoeff, deltaU, qCoeff, qbits, add, numCoeff); + + /* This section of code is to safely convert int32_t coefficients to int16_t, once the caller function is + * optimize to take coefficients as int16_t*, it will be cleanse.*/ + for (int i = 0; i < numCoeff; i++) + { + coeff[i] = qCoeff[i]; + } if (numSig >= 2 && cu->m_slice->m_pps->bSignHideEnabled) { diff -r e1b2ab942177 -r 40e242e316b9 source/common/x86/pixel-util.h --- a/source/common/x86/pixel-util.h Tue Sep 02 17:52:33 2014 +0530 +++ b/source/common/x86/pixel-util.h Tue Sep 02 18:15:36 2014 +0530 @@ -44,7 +44,7 @@ void x265_transpose32_sse2(pixel *dest, pixel *src, intptr_t stride); void x265_transpose64_sse2(pixel *dest, pixel *src, intptr_t stride); -uint32_t x265_quant_sse4(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int32_t *qCoef, int qBits, int add, int numCoeff); +uint32_t x265_quant_sse4(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int16_t *qCoef, int qBits, int add, int numCoeff); uint32_t x265_nquant_sse4(int32_t *coef, int32_t *quantCoeff, int16_t *qCoef, int qBits, int add, int numCoeff); void x265_dequant_normal_sse4(const int16_t* quantCoef, int32_t* coef, int num, int scale, int shift); int x265_count_nonzero_ssse3(const int16_t *quantCoeff, int numCoeff); diff -r e1b2ab942177 -r 40e242e316b9 source/common/x86/pixel-util8.asm --- a/source/common/x86/pixel-util8.asm Tue Sep 02 17:52:33 2014 +0530 +++ b/source/common/x86/pixel-util8.asm Tue Sep 02 18:15:36 2014 +0530 @@ -855,7 +855,7 @@ ;----------------------------------------------------------------------------- -; uint32_t quant(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int32_t *qCoef, int qBits, int add, int numCoeff); +; uint32_t quant(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int16_t *qCoef, int qBits, int add, int numCoeff); ;----------------------------------------------------------------------------- INIT_XMM sse4 cglobal quant, 5,6,8 @@ -895,8 +895,7 @@ pxor m2, m1 psubd m2, m1 packssdw m2, m2 - pmovsxwd m2, m2 - movu [r3], m2 + movh [r3], m2 ; 4 coeff movu m0, [r0 + 16] ; m0 = level pxor m1, m1 @@ -917,13 +916,12 @@ pxor m2, m1 psubd m2, m1 packssdw m2, m2 - pmovsxwd m2, m2 - movu [r3 + 16], m2 + movh [r3 + 8], m2 add r0, 32 add r1, 32 add r2, 32 - add r3, 32 + add r3, 16 dec r4d jnz .loop diff -r e1b2ab942177 -r 40e242e316b9 source/test/mbdstharness.cpp --- a/source/test/mbdstharness.cpp Tue Sep 02 17:52:33 2014 +0530 +++ b/source/test/mbdstharness.cpp Tue Sep 02 18:15:36 2014 +0530 @@ -214,18 +214,19 @@ int bits = rand() % 32; int valueToAdd = rand() % (32 * 1024); int cmp_size = sizeof(int) * height * width; + int cmp_size1 = sizeof(short) * height * width; int numCoeff = height * width; int index1 = rand() % TEST_CASES; int index2 = rand() % TEST_CASES; - refReturnValue = ref(int_test_buff[index1] + j, int_test_buff[index2] + j, mintbuf1, mintbuf2, bits, valueToAdd, numCoeff); - optReturnValue = (uint32_t)checked(opt, int_test_buff[index1] + j, int_test_buff[index2] + j, mintbuf3, mintbuf4, bits, valueToAdd, numCoeff); + refReturnValue = ref(int_test_buff[index1] + j, int_test_buff[index2] + j, mintbuf1, mshortbuf2, bits, valueToAdd, numCoeff); + optReturnValue = (uint32_t)checked(opt, int_test_buff[index1] + j, int_test_buff[index2] + j, mintbuf3, mshortbuf3, bits, valueToAdd, numCoeff); - if (memcmp(mintbuf3, mintbuf1, cmp_size)) + if (memcmp(mintbuf1, mintbuf3, cmp_size)) return false; - if (memcmp(mintbuf4, mintbuf2, cmp_size)) + if (memcmp(mshortbuf2, mshortbuf3, cmp_size1)) return false; if (optReturnValue != refReturnValue) @@ -430,7 +431,7 @@ if (opt.quant) { printf("quant\t\t"); - REPORT_SPEEDUP(opt.quant, ref.quant, int_test_buff[0], int_test_buff[1], mintbuf3, mintbuf4, 23, 23785, 32 * 32); + REPORT_SPEEDUP(opt.quant, ref.quant, int_test_buff[0], int_test_buff[1], mintbuf3, mshortbuf2, 23, 23785, 32 * 32); } if (opt.nquant) _______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel