# HG changeset patch # User Praveen Tiwari # Date 1407834530 -19800 # Node ID bb4d44663964237e4b66af6d92b2f13dbcf4f9b9 # Parent 8a7f4bb1d1be32fe668d410450c2e320ccae6098 count_nonzero primitive, downscaling quantCoeff from int32_t* to int16_t*
diff -r 8a7f4bb1d1be -r bb4d44663964 source/common/dct.cpp --- a/source/common/dct.cpp Tue Aug 12 01:11:39 2014 -0500 +++ b/source/common/dct.cpp Tue Aug 12 14:38:50 2014 +0530 @@ -815,7 +815,7 @@ return numSig; } -int count_nonzero_c(const int32_t *quantCoeff, int numCoeff) +int count_nonzero_c(const int16_t *quantCoeff, int numCoeff) { X265_CHECK(((intptr_t)quantCoeff & 15) == 0, "quant buffer not aligned\n"); X265_CHECK(numCoeff > 0 && (numCoeff & 15) == 0, "numCoeff invalid %d\n", numCoeff); diff -r 8a7f4bb1d1be -r bb4d44663964 source/common/primitives.h --- a/source/common/primitives.h Tue Aug 12 01:11:39 2014 -0500 +++ b/source/common/primitives.h Tue Aug 12 14:38:50 2014 +0530 @@ -163,7 +163,7 @@ typedef uint32_t (*nquant_t)(int32_t *coef, int32_t *quantCoeff, int32_t *qCoef, int qBits, int add, int numCoeff); typedef void (*dequant_scaling_t)(const int32_t* src, const int32_t *dequantCoef, int32_t* dst, int num, int mcqp_miper, int shift); typedef void (*dequant_normal_t)(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift); -typedef int (*count_nonzero_t)(const int32_t *quantCoeff, int numCoeff); +typedef int (*count_nonzero_t)(const int16_t *quantCoeff, int numCoeff); typedef void (*weightp_pp_t)(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset); typedef void (*weightp_sp_t)(int16_t *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset); diff -r 8a7f4bb1d1be -r bb4d44663964 source/common/quant.cpp --- a/source/common/quant.cpp Tue Aug 12 01:11:39 2014 -0500 +++ b/source/common/quant.cpp Tue Aug 12 14:38:50 2014 +0530 @@ -2,6 +2,7 @@ * Copyright (C) 2014 x265 project * * Authors: Steve Borho <st...@borho.org> + * Praveen Kumar Tiwari <prav...@multicorewareinc.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -463,7 +464,17 @@ const uint32_t sizeIdx = log2TrSize - 2; int useDST = !sizeIdx && ttype == TEXT_LUMA && bIntra; - X265_CHECK((int)numSig == primitives.count_nonzero(coeff, 1 << log2TrSize * 2), "numSig differ\n"); + /* This section of code is to safely convert int32_t coefficients to int16_t, once the caller function is + * optimize to take coefficients as int16_t*, it will be cleanse.*/ + int numCoeff = (1 << (log2TrSize * 2)); + assert(numCoeff <= 1024); + ALIGN_VAR_16(int16_t, qCoeff[32 * 32]); + for (int i = 0; i < numCoeff; i++) + { + qCoeff[i] = (coeff[i] & 0xFFFF); + } + + X265_CHECK((int)numSig == primitives.count_nonzero(qCoeff, 1 << log2TrSize * 2), "numSig differ\n"); // DC only if (numSig == 1 && coeff[0] != 0 && !useDST) @@ -501,7 +512,16 @@ int numCoeff = 1 << log2TrSize * 2; uint32_t numSig = primitives.nquant(m_resiDctCoeff, qCoef, dstCoeff, qbits, add, numCoeff); - X265_CHECK((int)numSig == primitives.count_nonzero(dstCoeff, numCoeff), "numSig differ\n"); + /* This section of code is to safely convert int32_t coefficients to int16_t, once the caller function is + * optimize to take coefficients as int16_t*, it will be cleanse.*/ + assert(numCoeff <= 1024); + ALIGN_VAR_16(int16_t, qCoeff[32 * 32]); + for (int i = 0; i < numCoeff; i++) + { + qCoeff[i] = (dstCoeff[i] & 0xFFFF); + } + + X265_CHECK((int)numSig == primitives.count_nonzero(qCoeff, numCoeff), "numSig differ\n"); if (!numSig) return 0; diff -r 8a7f4bb1d1be -r bb4d44663964 source/common/x86/pixel-util.h --- a/source/common/x86/pixel-util.h Tue Aug 12 01:11:39 2014 -0500 +++ b/source/common/x86/pixel-util.h Tue Aug 12 14:38:50 2014 +0530 @@ -2,6 +2,7 @@ * Copyright (C) 2013 x265 project * * Authors: Steve Borho <st...@borho.org> + * Praveen Kumar Tiwari <prav...@multicorewareinc.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -47,7 +48,7 @@ uint32_t x265_quant_sse4(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int32_t *qCoef, int qBits, int add, int numCoeff); uint32_t x265_nquant_sse4(int32_t *coef, int32_t *quantCoeff, int32_t *qCoef, int qBits, int add, int numCoeff); void x265_dequant_normal_sse4(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift); -int x265_count_nonzero_ssse3(const int32_t *quantCoeff, int numCoeff); +int x265_count_nonzero_ssse3(const int16_t *quantCoeff, int numCoeff); void x265_weight_pp_sse4(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset); void x265_weight_sp_sse4(int16_t *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset); diff -r 8a7f4bb1d1be -r bb4d44663964 source/common/x86/pixel-util8.asm --- a/source/common/x86/pixel-util8.asm Tue Aug 12 01:11:39 2014 -0500 +++ b/source/common/x86/pixel-util8.asm Tue Aug 12 14:38:50 2014 +0530 @@ -3,6 +3,7 @@ ;* ;* Authors: Min Chen <chenm...@163.com> <min.c...@multicorewareinc.com> ;* Nabajit Deka <naba...@multicorewareinc.com> +;* Praveen Kumar Tiwari <prav...@multicorewareinc.com> ;* ;* This program is free software; you can redistribute it and/or modify ;* it under the terms of the GNU General Public License as published by @@ -1091,10 +1092,10 @@ ;----------------------------------------------------------------------------- -; int count_nonzero(const int32_t *quantCoeff, int numCoeff); +; int count_nonzero(const int16_t *quantCoeff, int numCoeff); ;----------------------------------------------------------------------------- INIT_XMM ssse3 -cglobal count_nonzero, 2,2,5 +cglobal count_nonzero, 2,2,4 pxor m0, m0 shr r1d, 4 movd m1, r1d @@ -1103,12 +1104,8 @@ .loop: mova m2, [r0 + 0] mova m3, [r0 + 16] - packssdw m2, m3 - mova m3, [r0 + 32] - mova m4, [r0 + 48] - add r0, 64 - packssdw m3, m4 packsswb m2, m3 + add r0, 32 pcmpeqb m2, m0 paddb m1, m2 dec r1d diff -r 8a7f4bb1d1be -r bb4d44663964 source/encoder/entropy.cpp --- a/source/encoder/entropy.cpp Tue Aug 12 01:11:39 2014 -0500 +++ b/source/encoder/entropy.cpp Tue Aug 12 14:38:50 2014 +0530 @@ -2,6 +2,7 @@ * Copyright (C) 2013 x265 project * * Authors: Steve Borho <st...@borho.org> +* Praveen Kumar Tiwari <prav...@multicorewareinc.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -1488,8 +1489,18 @@ { uint32_t trSize = 1 << log2TrSize; + /* This section of code is to safely convert int32_t coefficients to int16_t, once the caller function is + * optimize to take coefficients as int16_t*, it will be cleanse.*/ + int numCoeff = (1 << (log2TrSize << 1)); + assert(numCoeff <= 1024); + ALIGN_VAR_16(int16_t, qCoeff[32 * 32]); + for (int i = 0; i < numCoeff; i++) + { + qCoeff[i] = (coeff[i] & 0xFFFF); + } + // compute number of significant coefficients - uint32_t numSig = primitives.count_nonzero(coeff, (1 << (log2TrSize << 1))); + uint32_t numSig = primitives.count_nonzero(qCoeff, (1 << (log2TrSize << 1))); X265_CHECK(numSig > 0, "cbf check fail\n"); diff -r 8a7f4bb1d1be -r bb4d44663964 source/test/mbdstharness.cpp --- a/source/test/mbdstharness.cpp Tue Aug 12 01:11:39 2014 -0500 +++ b/source/test/mbdstharness.cpp Tue Aug 12 14:38:50 2014 +0530 @@ -366,7 +366,7 @@ bool MBDstHarness::check_count_nonzero_primitive(count_nonzero_t ref, count_nonzero_t opt) { - ALIGN_VAR_32(int32_t, qcoeff[32 * 32]); + ALIGN_VAR_32(int16_t, qcoeff[32 * 32]); for (int i = 0; i < 4; i++) { @@ -376,7 +376,7 @@ for (int n = 0; n <= num; n++) { - memset(qcoeff, 0, num * sizeof(int32_t)); + memset(qcoeff, 0, num * sizeof(int16_t)); for (int j = 0; j < n; j++) { @@ -386,7 +386,7 @@ k = (k + 11) & mask; } - qcoeff[k] = rand() - RAND_MAX / 2; + qcoeff[k] = (int16_t)rand() - RAND_MAX / 2; } int refval = ref(qcoeff, num); @@ -516,7 +516,7 @@ for (int i = 4; i <= 32; i <<= 1) { printf("count_nonzero[%dx%d]", i, i); - REPORT_SPEEDUP(opt.count_nonzero, ref.count_nonzero, mbufidct, i * i) + REPORT_SPEEDUP(opt.count_nonzero, ref.count_nonzero, mbuf1, i * i) } } } _______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel