[x265] [PATCH] removed copy_cnt_4 avx2 asm code: SSE version is eualy faster

2014-09-11 Thread praveen
# HG changeset patch
# User Praveen Tiwari
# Date 1410433904 -19800
# Node ID 5740ec22db67267bfca97fbba07ef9239802d2b0
# Parent  012f315d3eda8044f5a49865e15ba2943fbab094
removed copy_cnt_4 avx2 asm code: SSE version is eualy faster

diff -r 012f315d3eda -r 5740ec22db67 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp  Wed Sep 10 17:27:20 2014 +0200
+++ b/source/common/x86/asm-primitives.cpp  Thu Sep 11 16:41:44 2014 +0530
@@ -1730,7 +1730,6 @@
 /* Need to update assembly code as per changed interface of the 
copy_cnt primitive, once
  * code is updated, avx2 version will be enabled */
 
-// p.copy_cnt[BLOCK_4x4] = x265_copy_cnt_4_avx2;
 p.copy_cnt[BLOCK_8x8] = x265_copy_cnt_8_avx2;
 // p.copy_cnt[BLOCK_16x16] = x265_copy_cnt_16_avx2;
 // p.copy_cnt[BLOCK_32x32] = x265_copy_cnt_32_avx2;
diff -r 012f315d3eda -r 5740ec22db67 source/common/x86/blockcopy8.asm
--- a/source/common/x86/blockcopy8.asm  Wed Sep 10 17:27:20 2014 +0200
+++ b/source/common/x86/blockcopy8.asm  Thu Sep 11 16:41:44 2014 +0530
@@ -3987,35 +3987,6 @@
 %endif
 RET
 
-
-INIT_YMM avx2
-cglobal copy_cnt_4, 3,3,3
-add r2d, r2d
-xorpd   xm2, xm2
-
-; row 0  1
-movqxm0, [r1]
-movhps  xm0, [r1 + r2]
-
-; row 2  3
-movqxm1, [r1 + r2 * 2]
-lea r2, [r2 * 3]
-movhps  xm1, [r1 + r2]
-
-vinserti128 m0, m0, xm1, 1
-movu[r0], m0
-
-vextractf128 xm1, m0, 1
-packsswb xm0, xm1
-pcmpeqb  xm0, xm2
-
-; get count
-pmovmskbeax, xm0
-not ax
-popcnt  ax, ax
-RET
-
-
 
;--
 ; uint32_t copy_cnt(int16_t *dst, int16_t *src, intptr_t stride);
 
;--
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH] removed copy_cnt_4 avx2 asm code: SSE version is eualy faster

2014-09-11 Thread Praveen Tiwari
Ignore It, need to correct commit message.


Regards,
Praveen Tiwari

On Thu, Sep 11, 2014 at 4:41 PM, prav...@multicorewareinc.com wrote:

 # HG changeset patch
 # User Praveen Tiwari
 # Date 1410433904 -19800
 # Node ID 5740ec22db67267bfca97fbba07ef9239802d2b0
 # Parent  012f315d3eda8044f5a49865e15ba2943fbab094
 removed copy_cnt_4 avx2 asm code: SSE version is eualy faster

 diff -r 012f315d3eda -r 5740ec22db67 source/common/x86/asm-primitives.cpp
 --- a/source/common/x86/asm-primitives.cpp  Wed Sep 10 17:27:20 2014
 +0200
 +++ b/source/common/x86/asm-primitives.cpp  Thu Sep 11 16:41:44 2014
 +0530
 @@ -1730,7 +1730,6 @@
  /* Need to update assembly code as per changed interface of the
 copy_cnt primitive, once
   * code is updated, avx2 version will be enabled */

 -// p.copy_cnt[BLOCK_4x4] = x265_copy_cnt_4_avx2;
  p.copy_cnt[BLOCK_8x8] = x265_copy_cnt_8_avx2;
  // p.copy_cnt[BLOCK_16x16] = x265_copy_cnt_16_avx2;
  // p.copy_cnt[BLOCK_32x32] = x265_copy_cnt_32_avx2;
 diff -r 012f315d3eda -r 5740ec22db67 source/common/x86/blockcopy8.asm
 --- a/source/common/x86/blockcopy8.asm  Wed Sep 10 17:27:20 2014 +0200
 +++ b/source/common/x86/blockcopy8.asm  Thu Sep 11 16:41:44 2014 +0530
 @@ -3987,35 +3987,6 @@
  %endif
  RET

 -
 -INIT_YMM avx2
 -cglobal copy_cnt_4, 3,3,3
 -add r2d, r2d
 -xorpd   xm2, xm2
 -
 -; row 0  1
 -movqxm0, [r1]
 -movhps  xm0, [r1 + r2]
 -
 -; row 2  3
 -movqxm1, [r1 + r2 * 2]
 -lea r2, [r2 * 3]
 -movhps  xm1, [r1 + r2]
 -
 -vinserti128 m0, m0, xm1, 1
 -movu[r0], m0
 -
 -vextractf128 xm1, m0, 1
 -packsswb xm0, xm1
 -pcmpeqb  xm0, xm2
 -
 -; get count
 -pmovmskbeax, xm0
 -not ax
 -popcnt  ax, ax
 -RET
 -
 -

  
 ;--
  ; uint32_t copy_cnt(int16_t *dst, int16_t *src, intptr_t stride);

  
 ;--

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel