Min,
Praveen has sent a number of patches on changing the entire interface for
quant such that the coefficients are now 16-bit instead of 32-bit. Your
patches still assume they are 32-bit?
Can you review all his patches (8-10 patches) and see if we're moving in
the right direction?
Thanks,
Deepthi
On Thu, Sep 4, 2014 at 5:07 AM, Min Chen chenm...@163.com wrote:
# HG changeset patch
# User Min Chen chenm...@163.com
# Date 1409787419 25200
# Node ID 4ca9e972f48cb4530ca7181ad7cec351568a99b3
# Parent 94bd00d1af5d8c5f6f26f97c50a727588a860714
asm: optimize nquant by PSIGND, improve 13k cycles - 11k cycles
diff -r 94bd00d1af5d -r 4ca9e972f48c source/common/dct.cpp
--- a/source/common/dct.cpp Wed Sep 03 16:36:44 2014 -0700
+++ b/source/common/dct.cpp Wed Sep 03 16:36:59 2014 -0700
@@ -801,6 +801,10 @@
{
uint32_t numSig = 0;
+X265_CHECK((numCoeff % 16) == 0, number of quant coeff is not
multiple of 4x4\n);
+X265_CHECK((uint32_t)add ((uint32_t)1 qBits), 2 ^ qBits less
than add\n);
+X265_CHECK(((intptr_t)quantCoeff 15) == 0, quantCoeff buffer not
aligned\n);
+
for (int blockpos = 0; blockpos numCoeff; blockpos++)
{
int level = coef[blockpos];
diff -r 94bd00d1af5d -r 4ca9e972f48c source/common/x86/pixel-util8.asm
--- a/source/common/x86/pixel-util8.asm Wed Sep 03 16:36:44 2014 -0700
+++ b/source/common/x86/pixel-util8.asm Wed Sep 03 16:36:59 2014 -0700
@@ -941,55 +941,47 @@
; uint32_t nquant(int32_t *coef, int32_t *quantCoeff, int32_t *qCoef, int
qBits, int add, int numCoeff);
;-
INIT_XMM sse4
-cglobal nquant, 4,5,8
+cglobal nquant, 3,5,8
movdm6, r4m
mov r4d, r5m
pxorm7, m7 ; m7 = numZero
-movdm5, r3d ; m5 = qbits
+movdm5, r3m ; m5 = qbits
pshufd m6, m6, 0 ; m6 = add
mov r3d, r4d; r3 = numCoeff
shr r4d, 3
+
.loop:
movum0, [r0]; m0 = level
movum1, [r0 + 16] ; m1 = level
-movum2, [r1]; m2 = qcoeff
-movum3, [r1 + 16] ; m3 = qcoeff
+
+pabsd m2, m0
+pmulld m2, [r1]; m4 = tmpLevel1
+paddd m2, m6
+psrad m2, m5 ; m4 = level1
+psignd m2, m0 ; restore sign
+
+pabsd m3, m1
+pmulld m3, [r1 + 16] ; m4 = tmpLevel1
+paddd m3, m6
+psrad m3, m5 ; m4 = level1
+psignd m3, m1 ; restore sign
add r0, 32
add r1, 32
-pxorm4, m4
-pcmpgtd m4, m0 ; m4 = sign
-pabsd m0, m0
-pmulld m0, m2 ; m0 = tmpLevel1
-paddd m0, m6
-psrad m0, m5 ; m0 = level1
-pxorm0, m4
-psubd m0, m4
-
-pxorm4, m4
-pcmpgtd m4, m1 ; m4 = sign
-pabsd m1, m1
-pmulld m1, m3 ; m1 = tmpLevel1
-paddd m1, m6
-psrad m1, m5 ; m1 = level1
-pxorm1, m4
-psubd m1, m4
-
-packssdwm0, m0
-packssdwm1, m1
-pmovsxwdm0, m0
+packssdwm2, m3
+pmovsxwdm0, m2
+movhlps m1, m2
pmovsxwdm1, m1
-movu[r2], m0
+movu[r2 ], m0
movu[r2 + 16], m1
add r2, 32
+
+pxorm4, m4
+pcmpeqw m2, m4
+psubw m7, m2
+
dec r4d
-
-packssdwm0, m1
-pxorm4, m4
-pcmpeqw m0, m4
-psubw m7, m0
-
jnz .loop
packuswbm7, m7
@@ -997,10 +989,8 @@
mov eax, r3d
movdr4d, m7
sub eax, r4d; numSig
-
RET
-
;-
; void dequant_normal(const int32_t* quantCoef, int32_t* coef, int num,
int scale, int shift)
;-
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel