[x265] [PATCH] asm: 8bpp and 10bpp code for idct8x8

2014-03-05 Thread murugan
# HG changeset patch
# User Murugan Vairavel muru...@multicorewareinc.com
# Date 1394014787 -19800
#  Wed Mar 05 15:49:47 2014 +0530
# Node ID b190855462b4d2a721f52d7a5ae093c4742e3182
# Parent  5cad3652bee8130e8861f5b7b2f6ff1c8695cc1a
asm: 8bpp and 10bpp code for idct8x8

diff -r 5cad3652bee8 -r b190855462b4 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp  Tue Mar 04 20:53:17 2014 -0600
+++ b/source/common/x86/asm-primitives.cpp  Wed Mar 05 15:49:47 2014 +0530
@@ -909,6 +909,7 @@
 SETUP_INTRA_ANG32(34, 2, ssse3);
 
 p.dct[DST_4x4] = x265_dst4_ssse3;
+p.idct[IDCT_8x8] = x265_idct8_ssse3;
 }
 if (cpuMask  X265_CPU_SSE4)
 {
@@ -1234,6 +1235,7 @@
 p.chroma_p2s[X265_CSP_I444] = x265_luma_p2s_ssse3; // for i444 , 
chroma_p2s can be replaced by luma_p2s
 
 p.dct[DST_4x4] = x265_dst4_ssse3;
+p.idct[IDCT_8x8] = x265_idct8_ssse3;
 }
 if (cpuMask  X265_CPU_SSE4)
 {
diff -r 5cad3652bee8 -r b190855462b4 source/common/x86/dct8.asm
--- a/source/common/x86/dct8.asmTue Mar 04 20:53:17 2014 -0600
+++ b/source/common/x86/dct8.asmWed Mar 05 15:49:47 2014 +0530
@@ -61,8 +61,26 @@
 times 1 dd 50, -89, 18, 75
 times 1 dd 18, -50, 75, -89
 
+tab_idct8_3:times 4 dw 89, 75
+times 4 dw 50, 18
+times 4 dw 75, -18
+times 4 dw -89, -50
+times 4 dw 50, -89
+times 4 dw 18, 75
+times 4 dw 18, -50
+times 4 dw 75, -89
+
 pb_unpackhlw1:  db 0,1,8,9,2,3,10,11,4,5,12,13,6,7,14,15
 
+pb_idct8even:   db 0, 1, 8, 9, 4, 5, 12, 13, 0, 1, 8, 9, 4, 5, 12, 13
+
+tab_idct8_1:times 1 dw 64, -64, 36, -83, 64, 64, 83, 36
+
+tab_idct8_2:times 1 dw 89, 75, 50, 18, 75, -18, -89, -50
+times 1 dw 50, -89, 18, 75, 18, -50, 75, -89
+
+pb_idct8odd:db 2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15
+
 SECTION .text
 cextern pd_1
 cextern pd_2
@@ -665,3 +683,187 @@
 dec r2
 jnz.pass2
 RET
+
+;---
+; void idct8(int32_t *src, int16_t *dst, intptr_t stride)
+;---
+INIT_XMM ssse3
+
+cglobal patial_butterfly_inverse_internal_pass1
+movum0, [r0]
+movum1, [r0 + 4 * 32]
+movum2, [r0 + 2 * 32]
+movum3, [r0 + 6 * 32]
+packssdwm0, m2
+packssdwm1, m3
+punpckhwd   m2, m0, m1  ; [2 6]
+punpcklwd   m0, m1  ; [0 4]
+pmaddwd m1, m0, [r6]; EE[0]
+pmaddwd m0, [r6 + 32]   ; EE[1]
+pmaddwd m3, m2, [r6 + 16]   ; EO[0]
+pmaddwd m2, [r6 + 48]   ; EO[1]
+
+paddd   m4, m1, m3  ; E[0]
+psubd   m1, m3  ; E[3]
+paddd   m3, m0, m2  ; E[1]
+psubd   m0, m2  ; E[2]
+
+;E[K] = E[k] + add
+movam5, [pd_64]
+paddd   m0, m5
+paddd   m1, m5
+paddd   m3, m5
+paddd   m4, m5
+
+movum2, [r0 + 32]
+movum5, [r0 + 5 * 32]
+packssdwm2, m5
+movum5, [r0 + 3 * 32]
+movum6, [r0 + 7 * 32]
+packssdwm5, m6
+punpcklwd   m6, m2, m5  ;[1 3]
+punpckhwd   m2, m5  ;[5 7]
+
+pmaddwd m5, m6, [r4]
+pmaddwd m7, m2, [r4 + 16]
+paddd   m5, m7  ; O[0]
+
+paddd   m7, m4, m5
+psrad   m7, 7
+
+psubd   m4, m5
+psrad   m4, 7
+
+packssdwm7, m4
+movh[r5 + 0 * 16], m7
+movhps  [r5 + 7 * 16], m7
+
+pmaddwd m5, m6, [r4 + 32]
+pmaddwd m4, m2, [r4 + 48]
+paddd   m5, m4  ; O[1]
+
+paddd   m4, m3, m5
+psrad   m4, 7
+
+psubd   m3, m5
+psrad   m3, 7
+
+packssdwm4, m3
+movh[r5 + 1 * 16], m4
+movhps  [r5 + 6 * 16], m4
+
+pmaddwd m5, m6, [r4 + 64]
+pmaddwd m4, m2, [r4 + 80]
+paddd   m5, m4  ; O[2]
+
+paddd   m4, m0, m5
+psrad   m4, 7
+
+psubd   m0, m5
+psrad   m0, 7
+
+packssdwm4, m0
+movh[r5 + 2 * 16], m4
+movhps  [r5 + 5 * 16], m4
+
+pmaddwd m5, m6, [r4 + 96]
+pmaddwd m4, m2, [r4 + 112]
+paddd   m5, m4  ; O[3]
+
+paddd   m4, m1, m5
+psrad   m4, 7
+
+psubd   m1, m5
+psrad   m1, 7
+
+packssdwm4, m1
+movh[r5 + 3 * 16], m4
+movhps  [r5 + 4 * 16], m4
+
+ret
+
+%macro PARTIAL_BUTTERFLY_PROCESS_ROW 1
+%if BIT_DEPTH == 10
+%define IDCT_SHIFT 10
+%elif BIT_DEPTH == 8
+%define IDCT_SHIFT 12
+%else
+%error Unsupported BIT_DEPTH!
+%endif
+pshufb 

[x265] [PATCH 1/2] weight prediction: use sqrtf

2014-03-05 Thread Rafaël Carré
---
 source/encoder/weightPrediction.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/encoder/weightPrediction.cpp 
b/source/encoder/weightPrediction.cpp
index c04361a..c0710fd 100644
--- a/source/encoder/weightPrediction.cpp
+++ b/source/encoder/weightPrediction.cpp
@@ -203,7 +203,7 @@ bool WeightPrediction::checkDenom(int denom)
 uint64_t fencVar = fenc-wp_ssd[yuv] + !ref-wp_ssd[yuv];
 uint64_t refVar  = ref-wp_ssd[yuv] + !ref-wp_ssd[yuv];
 if (fencVar  refVar)
-guessScale[yuv] = Clip3(-2.f, 1.8f, 
std::sqrt((float)fencVar / refVar));
+guessScale[yuv] = Clip3(-2.f, 1.8f, sqrtf((float)fencVar / 
refVar));
 else
 guessScale[yuv] = 1.8f;
 fencMean[yuv] = (float)fenc-wp_sum[yuv] / (height[yuv] * 
width[yuv]) / (1  (X265_DEPTH - 8));
-- 
1.9.0

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] [PATCH 2/2] Fix x86_64 build

2014-03-05 Thread Rafaël Carré
---
 source/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt
index 2febfaa..d2fea1a 100644
--- a/source/CMakeLists.txt
+++ b/source/CMakeLists.txt
@@ -21,7 +21,7 @@ configure_file(${PROJECT_SOURCE_DIR}/x265_config.h.in
 
 SET(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake ${CMAKE_MODULE_PATH})
 
-if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL i386 OR ${CMAKE_SYSTEM_PROCESSOR} 
STREQUAL x86)
+if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL i386 OR ${CMAKE_SYSTEM_PROCESSOR} 
STREQUAL x86 OR ${CMAKE_SYSTEM_PROCESSOR} STREQUAL x86_64)
 set(X86 1)
 add_definitions(-DX265_ARCH_X86=1)
 if(${CMAKE_SIZEOF_VOID_P} MATCHES 8)
-- 
1.9.0

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] [PATCH] primitives: cleaned up asm_primitives of intra_pred_ang

2014-03-05 Thread murugan
# HG changeset patch
# User Murugan Vairavel muru...@multicorewareinc.com
# Date 1394017591 -19800
#  Wed Mar 05 16:36:31 2014 +0530
# Node ID 724fb2004447992ea22be5edcf3c8a908118d831
# Parent  b190855462b4d2a721f52d7a5ae093c4742e3182
primitives: cleaned up asm_primitives of intra_pred_ang

diff -r b190855462b4 -r 724fb2004447 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp  Wed Mar 05 15:49:47 2014 +0530
+++ b/source/common/x86/asm-primitives.cpp  Wed Mar 05 16:36:31 2014 +0530
@@ -710,14 +710,113 @@
 SETUP_CHROMA_ADDAVG_FUNC_DEF(32, 24, cpu); \
 SETUP_CHROMA_ADDAVG_FUNC_DEF(32, 32, cpu); \
 
+#define SETUP_INTRA_ANG_COMMON(mode, fno, cpu) \
+p.intra_pred[BLOCK_4x4][mode] = x265_intra_pred_ang4_ ## fno ## _ ## cpu; \
+p.intra_pred[BLOCK_8x8][mode] = x265_intra_pred_ang8_ ## fno ## _ ## cpu; \
+p.intra_pred[BLOCK_16x16][mode] = x265_intra_pred_ang16_ ## fno ## _ ## 
cpu; \
+p.intra_pred[BLOCK_32x32][mode] = x265_intra_pred_ang32_ ## fno ## _ ## 
cpu;
+
+#define SETUP_INTRA_ANG(mode, fno, cpu) \
+p.intra_pred[BLOCK_8x8][mode] = x265_intra_pred_ang8_ ## fno ## _ ## cpu; \
+p.intra_pred[BLOCK_16x16][mode] = x265_intra_pred_ang16_ ## fno ## _ ## 
cpu; \
+p.intra_pred[BLOCK_32x32][mode] = x265_intra_pred_ang32_ ## fno ## _ ## 
cpu;
+
 #define SETUP_INTRA_ANG4(mode, fno, cpu) \
 p.intra_pred[BLOCK_4x4][mode] = x265_intra_pred_ang4_ ## fno ## _ ## cpu;
-#define SETUP_INTRA_ANG8(mode, fno, cpu) \
+
+#define SETUP_INTRA_ANG16_32(mode, fno, cpu) \
+p.intra_pred[BLOCK_16x16][mode] = x265_intra_pred_ang16_ ## fno ## _ ## 
cpu; \
+p.intra_pred[BLOCK_32x32][mode] = x265_intra_pred_ang32_ ## fno ## _ ## 
cpu;
+
+#define SETUP_INTRA_ANG4_8(mode, fno, cpu) \
+p.intra_pred[BLOCK_4x4][mode] = x265_intra_pred_ang4_ ## fno ## _ ## cpu; \
 p.intra_pred[BLOCK_8x8][mode] = x265_intra_pred_ang8_ ## fno ## _ ## cpu;
-#define SETUP_INTRA_ANG16(mode, fno, cpu) \
-p.intra_pred[BLOCK_16x16][mode] = x265_intra_pred_ang16_ ## fno ## _ ## 
cpu;
-#define SETUP_INTRA_ANG32(mode, fno, cpu) \
-p.intra_pred[BLOCK_32x32][mode] = x265_intra_pred_ang32_ ## fno ## _ ## 
cpu;
+
+#define INTRA_ANG_SSSE3(cpu) \
+SETUP_INTRA_ANG_COMMON(2, 2, cpu); \
+SETUP_INTRA_ANG_COMMON(34, 2, cpu);
+
+#define INTRA_ANG_SSE4_COMMON(cpu) \
+SETUP_INTRA_ANG_COMMON(3,  3,  cpu); \
+SETUP_INTRA_ANG_COMMON(4,  4,  cpu); \
+SETUP_INTRA_ANG_COMMON(5,  5,  cpu); \
+SETUP_INTRA_ANG_COMMON(6,  6,  cpu); \
+SETUP_INTRA_ANG_COMMON(7,  7,  cpu); \
+SETUP_INTRA_ANG_COMMON(8,  8,  cpu); \
+SETUP_INTRA_ANG_COMMON(9,  9,  cpu); \
+SETUP_INTRA_ANG_COMMON(10, 10, cpu); \
+SETUP_INTRA_ANG_COMMON(11, 11, cpu); \
+SETUP_INTRA_ANG_COMMON(12, 12, cpu); \
+SETUP_INTRA_ANG_COMMON(13, 13, cpu); \
+SETUP_INTRA_ANG_COMMON(14, 14, cpu); \
+SETUP_INTRA_ANG_COMMON(15, 15, cpu); \
+SETUP_INTRA_ANG_COMMON(16, 16, cpu); \
+SETUP_INTRA_ANG_COMMON(17, 17, cpu); \
+SETUP_INTRA_ANG_COMMON(18, 18, cpu);
+
+#define INTRA_ANG_SSE4_HIGH(cpu) \
+SETUP_INTRA_ANG(19, 19, cpu); \
+SETUP_INTRA_ANG(20, 20, cpu); \
+SETUP_INTRA_ANG(21, 21, cpu); \
+SETUP_INTRA_ANG(22, 22, cpu); \
+SETUP_INTRA_ANG(23, 23, cpu); \
+SETUP_INTRA_ANG(24, 24, cpu); \
+SETUP_INTRA_ANG(25, 25, cpu); \
+SETUP_INTRA_ANG(26, 26, cpu); \
+SETUP_INTRA_ANG(27, 27, cpu); \
+SETUP_INTRA_ANG(28, 28, cpu); \
+SETUP_INTRA_ANG(29, 29, cpu); \
+SETUP_INTRA_ANG(30, 30, cpu); \
+SETUP_INTRA_ANG(31, 31, cpu); \
+SETUP_INTRA_ANG(32, 32, cpu); \
+SETUP_INTRA_ANG(33, 33, cpu); \
+SETUP_INTRA_ANG4(19, 17, cpu); \
+SETUP_INTRA_ANG4(20, 16, cpu); \
+SETUP_INTRA_ANG4(21, 15, cpu); \
+SETUP_INTRA_ANG4(22, 14, cpu);\
+SETUP_INTRA_ANG4(23, 13, cpu); \
+SETUP_INTRA_ANG4(24, 12, cpu); \
+SETUP_INTRA_ANG4(25, 11, cpu); \
+SETUP_INTRA_ANG4(26, 26, cpu); \
+SETUP_INTRA_ANG4(27, 9, cpu); \
+SETUP_INTRA_ANG4(28, 8, cpu); \
+SETUP_INTRA_ANG4(29, 7, cpu); \
+SETUP_INTRA_ANG4(30, 6, cpu); \
+SETUP_INTRA_ANG4(31, 5, cpu); \
+SETUP_INTRA_ANG4(32, 4, cpu); \
+SETUP_INTRA_ANG4(33, 3, cpu);
+
+#define INTRA_ANG_SSE4(cpu) \
+SETUP_INTRA_ANG4_8(19, 17, cpu); \
+SETUP_INTRA_ANG4_8(20, 16, cpu); \
+SETUP_INTRA_ANG4_8(21, 15, cpu); \
+SETUP_INTRA_ANG4_8(22, 14, cpu);\
+SETUP_INTRA_ANG4_8(23, 13, cpu); \
+SETUP_INTRA_ANG4_8(24, 12, cpu); \
+SETUP_INTRA_ANG4_8(25, 11, cpu); \
+SETUP_INTRA_ANG4_8(26, 26, cpu); \
+SETUP_INTRA_ANG4_8(27, 9, cpu); \
+SETUP_INTRA_ANG4_8(28, 8, cpu); \
+SETUP_INTRA_ANG4_8(29, 7, cpu); \
+SETUP_INTRA_ANG4_8(30, 6, cpu); \
+SETUP_INTRA_ANG4_8(31, 5, cpu); \
+SETUP_INTRA_ANG4_8(32, 4, cpu); \
+SETUP_INTRA_ANG4_8(33, 3, cpu); \
+SETUP_INTRA_ANG16_32(19, 19, cpu); \
+SETUP_INTRA_ANG16_32(20, 20, cpu); \
+SETUP_INTRA_ANG16_32(21, 21, cpu); \
+SETUP_INTRA_ANG16_32(22, 22, cpu); \
+

Re: [x265] [PATCH 2/2] Fix x86_64 build

2014-03-05 Thread Deepthi Nandakumar
Can you convert this to an hg patch? Git patches don't apply cleanly


On Wed, Mar 5, 2014 at 4:08 PM, Rafaël Carré fun...@videolan.org wrote:

 ---
  source/CMakeLists.txt | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

 diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt
 index 2febfaa..d2fea1a 100644
 --- a/source/CMakeLists.txt
 +++ b/source/CMakeLists.txt
 @@ -21,7 +21,7 @@ configure_file(${PROJECT_SOURCE_DIR}/x265_config.h.in

  SET(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake
 ${CMAKE_MODULE_PATH})

 -if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL i386 OR ${CMAKE_SYSTEM_PROCESSOR}
 STREQUAL x86)
 +if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL i386 OR ${CMAKE_SYSTEM_PROCESSOR}
 STREQUAL x86 OR ${CMAKE_SYSTEM_PROCESSOR} STREQUAL x86_64)
  set(X86 1)
  add_definitions(-DX265_ARCH_X86=1)
  if(${CMAKE_SIZEOF_VOID_P} MATCHES 8)
 --
 1.9.0

 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH 2/2] Fix x86_64 build

2014-03-05 Thread Rafaël Carré
I know, I sent patches to mercurial to properly apply git patches with utf8,
but never finished them :/

Can you e.g. apply the diff manually and fix my name after?

I'm using git hg clone for all my development so making a hg patch is
not easy for me.

If you have a suggestion I'm all ears though

On 03/05/14 12:15, Deepthi Nandakumar wrote:
 Can you convert this to an hg patch? Git patches don't apply cleanly
 
 
 On Wed, Mar 5, 2014 at 4:08 PM, Rafaël Carré fun...@videolan.org wrote:
 
 ---
  source/CMakeLists.txt | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

 diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt
 index 2febfaa..d2fea1a 100644
 --- a/source/CMakeLists.txt
 +++ b/source/CMakeLists.txt
 @@ -21,7 +21,7 @@ configure_file(${PROJECT_SOURCE_DIR}/x265_config.h.in

  SET(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake
 ${CMAKE_MODULE_PATH})

 -if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL i386 OR ${CMAKE_SYSTEM_PROCESSOR}
 STREQUAL x86)
 +if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL i386 OR ${CMAKE_SYSTEM_PROCESSOR}
 STREQUAL x86 OR ${CMAKE_SYSTEM_PROCESSOR} STREQUAL x86_64)
  set(X86 1)
  add_definitions(-DX265_ARCH_X86=1)
  if(${CMAKE_SIZEOF_VOID_P} MATCHES 8)
 --
 1.9.0
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] Issue #35: Crash when encoding yuv420p10le/yuv420p10be video using ffmpeg (multicoreware/x265)

2014-03-05 Thread sfan5
New issue 35: Crash when encoding yuv420p10le/yuv420p10be video using ffmpeg
https://bitbucket.org/multicoreware/x265/issue/35/crash-when-encoding-yuv420p10le

sfan5:

Segfaults immediately(before ffmpeg outputs the frame=... status text) when 
using yuv420p10le.
Segfaults after encoding 25 frames when using yuv420p10be.
I'm not sure whether this is an ffmpeg bug or libx265 bug.

libx265 version: 31731a78d994
ffmpeg version: b2880a3de9e3

Backtrace:

```
#!none

Program received signal SIGSEGV, Segmentation fault.
0x723b63b8 in x265::TComPicYuv::copyFromPicture (this=0xa285220, 
pic=..., pad=0x66c870) at 
/tmp/build/src/libx265/source/Lib/TLibCommon/TComPicYuv.cpp:291
291 V[c] = (pixel)((v[c]  shift)  mask);
(gdb) bt full
#0  0x723b62b5 in x265::TComPicYuv::copyFromPicture (this=0xa278c20, 
pic=..., pad=0x66c850)
at /tmp/build/src/libx265/source/Lib/TLibCommon/TComPicYuv.cpp:279
c = 832
r = 982
Y = 0x7fffdc518400
U = 0x130800e0
y = 0x7fffd904e980
u = 0x7fffd8d20a70
mask = 1023
V = 0x131df120
v = 0x7fffd8e2c090
shift = 0
pady = 9
padx = 1
width = 1920
height = 1080
rem = 8 '\b'
#1  0x723046e4 in x265::Encoder::encode (this=0x66bf30, flush=false, 
pic_in=0x7fffd470, pic_out=0x7fffd4c0, nalunits=0x7fffd410)
at /tmp/build/src/libx265/source/encoder/encoder.cpp:323
pic = 0x117ad820
curEncoder = 0x66f500
ret = 0
out = 0x74dd74d0 __GI___libc_malloc+96
#2  0x7230aaaf in x265_encoder_encode (enc=0x66bf30, 
pp_nal=0x7fffd468, pi_nal=0x7fffd464, pic_in=0x7fffd470, 
pic_out=0x7fffd4c0)
at /tmp/build/src/libx265/source/encoder/api.cpp:105
encoder = 0x66bf30
nalunits = {0x0, 0x0, 0x0, 0x0, 0x0}
numEncoded = 119
#3  0x767a558f in ?? ()
   from /home/stefan/Dokumente/ffmpeg-git/lib/libavcodec.so.55
No symbol table info available.
#4  0x76906672 in avcodec_encode_video2 ()
   from /home/stefan/Dokumente/ffmpeg-git/lib/libavcodec.so.55
No symbol table info available.
#5  0x0041e95e in ?? ()
No symbol table info available.
#6  0x00408117 in ?? ()
No symbol table info available.
#7  0x74d75de5 in __libc_start_main (main=0x406ec0, argc=12, 
ubp_av=0x7fffdea8, init=optimized out, fini=optimized out, 
rtld_fini=optimized out, stack_end=0x7fffde98) at libc-start.c:260
result = optimized out
unwind_buf = {cancel_jmp_buf = {{jmp_buf = {0, -7112016030735285240, 
4234400, 140737488346784, 0, 0, 7112016031736120328, 
7111996038656410632}, mask_was_saved = 0}}, priv = {pad = {
  0x0, 0x0, 0x4244c0, 0x7fffdea8}, data = {prev = 0x0, 
  cleanup = 0x0, canceltype = 4342976}}}
not_first_call = optimized out
#8  0x00409cc9 in ?? ()
No symbol table info available.

```



___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] Issue #34: Cannot encode a raw YUV video (multicoreware/x265)

2014-03-05 Thread Jacques Boscq
New issue 34: Cannot encode a raw YUV video
https://bitbucket.org/multicoreware/x265/issue/34/cannot-encode-a-raw-yuv-video

Jacques Boscq:

Hello,

I tried to encode a raw YUV file into a HEVC raw file but it crashed:

```
#!c++

yuv  [info]: 1920x1080 fps 23980/1000 i420 frames 0 - 130425 of 130426
[New Thread 0x75f1a700 (LWP 31281)]
x265 [info]: using cpu capabilities: MMX2 SSE2Fast SSSE3 SSE4.2 AVX
x265 [info]: HEVC encoder version 0.7+375-6662df480e39
x265 [info]: build info [Linux][GCC 4.8.2][64 bit] 8bpp
x265 [info]: Main profile, Level-4 (Main tier)
[New Thread 0x75719700 (LWP 31282)]
[New Thread 0x74f18700 (LWP 31283)]
[New Thread 0x74717700 (LWP 31284)]
[New Thread 0x73f16700 (LWP 31285)]
x265 [info]: WPP streams / pool / frames : 17 / 4 / 2
x265 [info]: CU size : 64
x265 [info]: Max RQT depth inter / intra : 1 / 1
x265 [info]: ME / range / subpel / merge : hex / 57 / 2 / 2
x265 [info]: Keyframe min / max / scenecut   : 23 / 250 / 40
x265 [info]: Lookahead / bframes / badapt: 20 / 4 / 2
x265 [info]: b-pyramid / weightp / refs  : 1 / 1 / 3
x265 [info]: Rate Control / AQ-Strength / CUTree : CRF-28.0 / 1.0 / 1
x265 [info]: tools: rect amp rd=3 lft sao-lcu sign-hide 
[New Thread 0x72099700 (LWP 31286)]
[New Thread 0x700f2700 (LWP 31287)]
[4.5%] 5912/130426 frames, 2.14 fps, 849.07 kb/s, eta 16:09:46   
Program received signal SIGSEGV, Segmentation fault.
[Switching to Thread 0x74717700 (LWP 31284)]
0x00622e3e in x265::BitCost::bitcost (this=0x264f7e8, mv=...)
at /home/da/x265/source/encoder/bitcost.h:52
52s_bitsizes[(abs(mv.y - m_mvp.y)  1) + 
!!(mv.y  m_mvp.y)] + 0.5f);
(gdb) bt
#0  0x00622e3e in x265::BitCost::bitcost (this=0x264f7e8, mv=...)
at /home/da/x265/source/encoder/bitcost.h:52
#1  0x006196b7 in x265::TEncSearch::predInterSearch (this=0x264f658, 
cu=0x3fe9eb0, predYuv=0x3970d10, bUseMRG=false, bLuma=true, bChroma=false)
at /home/da/x265/source/Lib/TLibEncoder/TEncSearch.cpp:2707
#2  0x0062f6f2 in x265::TEncCu::xComputeCostInter (this=0x264fb58, 
outTempCU=0x3fe9eb0, outPredYuv=0x3970d10, partSize=x265::SIZE_Nx2N, 
bUseMRG=false) at /home/da/x265/source/encoder/compress.cpp:205
#3  0x00630ff8 in x265::TEncCu::xCompressInterCU (this=0x264fb58, 
outBestCU=@0x74716cb8: 0x3fd41f0, outTempCU=@0x1dbb190: 0x3fbe530, 
cu=@0x74716ca0: 0x2924aba0, depth=0, PartitionIndex=0, 
minDepth=0 '\000') at /home/da/x265/source/encoder/compress.cpp:447
#4  0x005f29a1 in x265::TEncCu::compressCU (this=0x264fb58, 
cu=0x2924aba0) at /home/da/x265/source/Lib/TLibEncoder/TEncCu.cpp:392
#5  0x0051e0e2 in x265::CTURow::processCU (this=0x264f178, 
cu=0x2924aba0, slice=0x27f94c90, bufferSbac=0x0, bSaveSBac=false)
at /home/da/x265/source/encoder/cturow.cpp:82
#6  0x00519121 in x265::FrameEncoder::processRowEncoder (
this=0x8cc788, row=14)
at /home/da/x265/source/encoder/frameencoder.cpp:1086
#7  0x0051b46b in x265::FrameEncoder::processRow (this=0x8cc788, 
row=28) at /home/da/x265/source/encoder/frameencoder.h:101
#8  0x005edad0 in x265::WaveFront::findJob (this=0x8cc788)
at /home/da/x265/source/common/wavefront.cpp:126
#9  0x0050ac2c in x265::PoolThread::threadMain (this=0x8c8600)
at /home/da/x265/source/common/threadpool.cpp:159
#10 0x00496003 in x265::ThreadShim (opaque=0x8c8600)
at /home/da/x265/source/common/threading.cpp:73
#11 0x77bc6e0e in start_thread (arg=0x74717700)
at pthread_create.c:311
#12 0x76edc0fd in clone ()
at ../sysdeps/unix/sysv/linux/x86_64/clone.S:113
(gdb) p (abs(mv.y - m_mvp.y)  1) + !!(mv.y  m_mvp.y)
$1 = 41217
(gdb) p s_bitsizes[41217]
Cannot access memory at address 0x77fff414
(gdb) p s_bitsizes[3]
$2 = 31.4634457
(gdb) p s_bitsizes[4]
$3 = -1.02578287e+34
(gdb) p (abs(mv.x - m_mvp.x)  1)
$4 = 5376
```


Regards


___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH] asm: added 10bpp primitives for chroma_vpp and chroma_vps filters

2014-03-05 Thread chen
At 2014-03-05 21:20:49,muru...@multicorewareinc.com wrote:
# HG changeset patch
# User Murugan Vairavel muru...@multicorewareinc.com
# Date 1394025626 -19800
#  Wed Mar 05 18:50:26 2014 +0530
# Node ID 820704b966e9c0fa3537a5789c92d897435ae304
# Parent  724fb2004447992ea22be5edcf3c8a908118d831
asm: added 10bpp primitives for  chroma_vpp and chroma_vps filters
no asm code in patch, so comment use 'Enable' is better.
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH 2/2] Fix x86_64 build

2014-03-05 Thread Tim Walker
On 05 Mar 2014, at 11:38, Rafaël Carré fun...@videolan.org wrote:

 ---
 source/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

What was the issue fixed by this (build failure on x86_64, but caused by what)?
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH 2/2] Fix x86_64 build

2014-03-05 Thread Steve Borho
On Wed, Mar 5, 2014 at 4:38 AM, Rafaël Carré fun...@videolan.org wrote:
 ---
  source/CMakeLists.txt | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

 diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt
 index 2febfaa..d2fea1a 100644
 --- a/source/CMakeLists.txt
 +++ b/source/CMakeLists.txt
 @@ -21,7 +21,7 @@ configure_file(${PROJECT_SOURCE_DIR}/x265_config.h.in

  SET(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake ${CMAKE_MODULE_PATH})

 -if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL i386 OR ${CMAKE_SYSTEM_PROCESSOR} 
 STREQUAL x86)
 +if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL i386 OR ${CMAKE_SYSTEM_PROCESSOR} 
 STREQUAL x86 OR ${CMAKE_SYSTEM_PROCESSOR} STREQUAL x86_64)
  set(X86 1)
  add_definitions(-DX265_ARCH_X86=1)
  if(${CMAKE_SIZEOF_VOID_P} MATCHES 8)


I suspect there is something wrong with your git clone.  This line now
looks like:

string(TOLOWER ${CMAKE_SYSTEM_PROCESSOR} SYSPROC)
if(${SYSPROC} STREQUAL i386   OR ${SYSPROC} STREQUAL amd64 OR
   ${SYSPROC} STREQUAL x86_64 OR ${SYSPROC} STREQUAL x86   OR
   ${SYSPROC} STREQUAL )
message(STATUS Detected x86 system processor)

The line you have is over a month old.

-- 
Steve Borho
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] A new screenshots comparator and loads of new x264 x265 vp8 vp9 tests

2014-03-05 Thread Steve Borho
On Tue, Mar 4, 2014 at 8:04 PM, Niccolò Belli darkba...@linuxsystems.it wrote:
 http://www.linuxsystems.it/2014/03/new-screenshots-comparator-loads-new-x264-x265-vp8-vp9-tests/

 Congratulations to the x265 team: at low bitrates x265 completely CRUSH x264
 if you use a very high quality source like the Blu-ray of The Hobbit: An
 Unexpected Journey.
 Hopefully in the future it will improve at high bitrates too. hi10p is just
 too bugged to be useful right now.

Hi Niccolò,

I'd like to understand the steps you took to encode 10bit video. I
think it should show much better, particularly with the 0.8 tag or
later.  The two basic preconditions x265 needs is to build with
HIGH_BIT_DEPTH enabled and then provide it input pixels that are at
least 10bits of resolution and in YUV 4:2:0 or 4:4:4 format (4:4:4 is
less well tested, so I suggest 4:2:0 for eval).

A 10bit build of x265 will happily encode an 8bit input video, it just
shifts up all the pixels by two bits.  Not necessarily a good
demonstration of 10bit encode, but it is useful for comparison
purposes.  Similarly, an 8bit build of x265 will happily encode a
10bit video if you ask it to (by downshifting).  Not necessarily
useful except for comparison.

-- 
Steve Borho
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] [PATCH] api: change meaning of pic.stride to be in bytes rather than pixels (fixes #35)

2014-03-05 Thread Steve Borho
# HG changeset patch
# User Steve Borho st...@borho.org
# Date 1394045238 21600
#  Wed Mar 05 12:47:18 2014 -0600
# Node ID 59f0664b3d9060a56cd64187cb5e9d1d6b173f09
# Parent  6d55869ed5e29c97977b89aa0218a5c8510c671a
api: change meaning of pic.stride to be in bytes rather than pixels (fixes #35)

x264's pic.plane pointer is a uint8_t* so their input strides are byte based,
ffmpeg is currently assuming our input strides are byte based.  This commit
will make that assumption correct.

diff -r 6d55869ed5e2 -r 59f0664b3d90 source/Lib/TLibCommon/TComPicYuv.cpp
--- a/source/Lib/TLibCommon/TComPicYuv.cpp  Wed Mar 05 11:48:14 2014 -0600
+++ b/source/Lib/TLibCommon/TComPicYuv.cpp  Wed Mar 05 12:47:18 2014 -0600
@@ -205,7 +205,7 @@
 }
 
 Y += getStride();
-y += pic.stride[0];
+y += pic.stride[0] / sizeof(pixel);
 }
 
 for (int r = 0; r  height  m_vChromaShift; r++)
@@ -218,8 +218,8 @@
 
 U += getCStride();
 V += getCStride();
-u += pic.stride[1];
-v += pic.stride[2];
+u += pic.stride[1] / sizeof(*u);
+v += pic.stride[2] / sizeof(*v);
 }
 }
 else if (pic.bitDepth == 8)
@@ -240,7 +240,7 @@
 }
 
 Y += getStride();
-y += pic.stride[0];
+y += pic.stride[0] / sizeof(*y);
 }
 
 for (int r = 0; r  height  m_vChromaShift; r++)
@@ -253,8 +253,8 @@
 
 U += getCStride();
 V += getCStride();
-u += pic.stride[1];
-v += pic.stride[2];
+u += pic.stride[1] / sizeof(*u);
+v += pic.stride[2] / sizeof(*v);
 }
 }
 else /* pic.bitDepth  8 */
@@ -280,7 +280,7 @@
 }
 
 Y += getStride();
-y += pic.stride[0];
+y += pic.stride[0] / sizeof(*y);
 }
 
 for (int r = 0; r  height  m_vChromaShift; r++)
@@ -293,8 +293,8 @@
 
 U += getCStride();
 V += getCStride();
-u += pic.stride[1];
-v += pic.stride[2];
+u += pic.stride[1] / sizeof(*u);
+v += pic.stride[2] / sizeof(*v);
 }
 }
 
diff -r 6d55869ed5e2 -r 59f0664b3d90 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cppWed Mar 05 11:48:14 2014 -0600
+++ b/source/encoder/encoder.cppWed Mar 05 12:47:18 2014 -0600
@@ -390,11 +390,11 @@
 }
 
 pic_out-planes[0] = recpic-getLumaAddr();
-pic_out-stride[0] = recpic-getStride();
+pic_out-stride[0] = recpic-getStride() * sizeof(pixel);
 pic_out-planes[1] = recpic-getCbAddr();
-pic_out-stride[1] = recpic-getCStride();
+pic_out-stride[1] = recpic-getCStride() * sizeof(pixel);
 pic_out-planes[2] = recpic-getCrAddr();
-pic_out-stride[2] = recpic-getCStride();
+pic_out-stride[2] = recpic-getCStride() * sizeof(pixel);
 }
 
 if (out-getSlice()-m_numWPRefs  0)
diff -r 6d55869ed5e2 -r 59f0664b3d90 source/input/yuv.cpp
--- a/source/input/yuv.cpp  Wed Mar 05 11:48:14 2014 -0600
+++ b/source/input/yuv.cpp  Wed Mar 05 12:47:18 2014 -0600
@@ -219,11 +219,11 @@
 
 pic.colorSpace = colorSpace;
 pic.bitDepth = depth;
+pic.stride[0] = width * pixelbytes;
+pic.stride[1] = pic.stride[2] = pic.stride[0]  1;
 pic.planes[0] = buf[head];
-pic.planes[1] = (char*)(pic.planes[0]) + width * height * pixelbytes;
-pic.planes[2] = (char*)(pic.planes[1]) + ((width * height * pixelbytes)  
2);
-pic.stride[0] = width;
-pic.stride[1] = pic.stride[2] = pic.stride[0]  1;
+pic.planes[1] = (char*)(pic.planes[0]) + height * pic.stride[0];
+pic.planes[2] = (char*)(pic.planes[1]) + ((height * pic.stride[1])  2);
 
 head = (head + 1) % QUEUE_SIZE;
 notFull.trigger();
diff -r 6d55869ed5e2 -r 59f0664b3d90 source/output/y4m.cpp
--- a/source/output/y4m.cpp Wed Mar 05 11:48:14 2014 -0600
+++ b/source/output/y4m.cpp Wed Mar 05 12:47:18 2014 -0600
@@ -93,18 +93,19 @@
 }
 
 ofs.write(buf, width  x265_cli_csps[colorSpace].width[i]);
-src += pic.stride[i];
+src += pic.stride[i] / sizeof(*src);
 }
 }
 
 #else // if HIGH_BIT_DEPTH
+
 for (int i = 0; i  x265_cli_csps[colorSpace].planes; i++)
 {
 char *src = (char*)pic.planes[i];
 for (int h = 0; h  height  x265_cli_csps[colorSpace].height[i]; h++)
 {
 ofs.write(src, width  x265_cli_csps[colorSpace].width[i]);
-src += pic.stride[i];
+src += pic.stride[i] / sizeof(*src);
 }
 }
 
diff -r 6d55869ed5e2 -r 59f0664b3d90 source/output/yuv.cpp
--- a/source/output/yuv.cpp Wed Mar 05 11:48:14 2014 -0600
+++ b/source/output/yuv.cpp Wed Mar 05 12:47:18 2014 -0600
@@ -73,7 +73,7 @@
 }
 
 ofs.write(buf, width  

Re: [x265] [PATCH] api: change meaning of pic.stride to be in bytes rather than pixels (fixes #35)

2014-03-05 Thread Steve Borho
On Wed, Mar 5, 2014 at 12:47 PM, Steve Borho st...@borho.org wrote:
 # HG changeset patch
 # User Steve Borho st...@borho.org
 # Date 1394045238 21600
 #  Wed Mar 05 12:47:18 2014 -0600
 # Node ID 59f0664b3d9060a56cd64187cb5e9d1d6b173f09
 # Parent  6d55869ed5e29c97977b89aa0218a5c8510c671a
 api: change meaning of pic.stride to be in bytes rather than pixels (fixes 
 #35)

 x264's pic.plane pointer is a uint8_t* so their input strides are byte based,
 ffmpeg is currently assuming our input strides are byte based.  This commit
 will make that assumption correct.

If no one objects, I'll push this with a bump to X265_BUILD


 diff -r 6d55869ed5e2 -r 59f0664b3d90 source/Lib/TLibCommon/TComPicYuv.cpp
 --- a/source/Lib/TLibCommon/TComPicYuv.cpp  Wed Mar 05 11:48:14 2014 -0600
 +++ b/source/Lib/TLibCommon/TComPicYuv.cpp  Wed Mar 05 12:47:18 2014 -0600
 @@ -205,7 +205,7 @@
  }

  Y += getStride();
 -y += pic.stride[0];
 +y += pic.stride[0] / sizeof(pixel);
  }

  for (int r = 0; r  height  m_vChromaShift; r++)
 @@ -218,8 +218,8 @@

  U += getCStride();
  V += getCStride();
 -u += pic.stride[1];
 -v += pic.stride[2];
 +u += pic.stride[1] / sizeof(*u);
 +v += pic.stride[2] / sizeof(*v);
  }
  }
  else if (pic.bitDepth == 8)
 @@ -240,7 +240,7 @@
  }

  Y += getStride();
 -y += pic.stride[0];
 +y += pic.stride[0] / sizeof(*y);
  }

  for (int r = 0; r  height  m_vChromaShift; r++)
 @@ -253,8 +253,8 @@

  U += getCStride();
  V += getCStride();
 -u += pic.stride[1];
 -v += pic.stride[2];
 +u += pic.stride[1] / sizeof(*u);
 +v += pic.stride[2] / sizeof(*v);
  }
  }
  else /* pic.bitDepth  8 */
 @@ -280,7 +280,7 @@
  }

  Y += getStride();
 -y += pic.stride[0];
 +y += pic.stride[0] / sizeof(*y);
  }

  for (int r = 0; r  height  m_vChromaShift; r++)
 @@ -293,8 +293,8 @@

  U += getCStride();
  V += getCStride();
 -u += pic.stride[1];
 -v += pic.stride[2];
 +u += pic.stride[1] / sizeof(*u);
 +v += pic.stride[2] / sizeof(*v);
  }
  }

 diff -r 6d55869ed5e2 -r 59f0664b3d90 source/encoder/encoder.cpp
 --- a/source/encoder/encoder.cppWed Mar 05 11:48:14 2014 -0600
 +++ b/source/encoder/encoder.cppWed Mar 05 12:47:18 2014 -0600
 @@ -390,11 +390,11 @@
  }

  pic_out-planes[0] = recpic-getLumaAddr();
 -pic_out-stride[0] = recpic-getStride();
 +pic_out-stride[0] = recpic-getStride() * sizeof(pixel);
  pic_out-planes[1] = recpic-getCbAddr();
 -pic_out-stride[1] = recpic-getCStride();
 +pic_out-stride[1] = recpic-getCStride() * sizeof(pixel);
  pic_out-planes[2] = recpic-getCrAddr();
 -pic_out-stride[2] = recpic-getCStride();
 +pic_out-stride[2] = recpic-getCStride() * sizeof(pixel);
  }

  if (out-getSlice()-m_numWPRefs  0)
 diff -r 6d55869ed5e2 -r 59f0664b3d90 source/input/yuv.cpp
 --- a/source/input/yuv.cpp  Wed Mar 05 11:48:14 2014 -0600
 +++ b/source/input/yuv.cpp  Wed Mar 05 12:47:18 2014 -0600
 @@ -219,11 +219,11 @@

  pic.colorSpace = colorSpace;
  pic.bitDepth = depth;
 +pic.stride[0] = width * pixelbytes;
 +pic.stride[1] = pic.stride[2] = pic.stride[0]  1;
  pic.planes[0] = buf[head];
 -pic.planes[1] = (char*)(pic.planes[0]) + width * height * pixelbytes;
 -pic.planes[2] = (char*)(pic.planes[1]) + ((width * height * pixelbytes) 
  2);
 -pic.stride[0] = width;
 -pic.stride[1] = pic.stride[2] = pic.stride[0]  1;
 +pic.planes[1] = (char*)(pic.planes[0]) + height * pic.stride[0];
 +pic.planes[2] = (char*)(pic.planes[1]) + ((height * pic.stride[1])  2);

  head = (head + 1) % QUEUE_SIZE;
  notFull.trigger();
 diff -r 6d55869ed5e2 -r 59f0664b3d90 source/output/y4m.cpp
 --- a/source/output/y4m.cpp Wed Mar 05 11:48:14 2014 -0600
 +++ b/source/output/y4m.cpp Wed Mar 05 12:47:18 2014 -0600
 @@ -93,18 +93,19 @@
  }

  ofs.write(buf, width  x265_cli_csps[colorSpace].width[i]);
 -src += pic.stride[i];
 +src += pic.stride[i] / sizeof(*src);
  }
  }

  #else // if HIGH_BIT_DEPTH
 +
  for (int i = 0; i  x265_cli_csps[colorSpace].planes; i++)
  {
  char *src = (char*)pic.planes[i];
  for (int h = 0; h  height  x265_cli_csps[colorSpace].height[i]; 
 h++)
  {
  ofs.write(src, width  x265_cli_csps[colorSpace].width[i]);
 -src += pic.stride[i];
 +src += pic.stride[i] / sizeof(*src);
  }
  }

 diff -r 6d55869ed5e2 -r 59f0664b3d90 

Re: [x265] new link

2014-03-05 Thread Steve Borho
On Wed, Mar 5, 2014 at 12:32 PM, Roger Pack rogerdpa...@gmail.com wrote:
 Hello.
 As a note, on this page:
 http://x265.org/developers.html

 possibly you could/should add a link to

 http://www.videolan.org/developers/x265.html

Oh right, our videolan page is finally live.  We should do that.

On a side note, the Encoder features on the videolan.org page
appears to be a direct copy of the x264 list, which is, uhm, wishful
thinking in some spots and just wrong in others.  I suggest this list:

Full prediction and transform quad-tree recursion supported
Adaptive B-frame placement
B-frames as references / arbitrary frame order
CABAC entropy coding
Intra: all block types (32x32, 16x16, 8x8, 4x4, and PCM with all predictions)
Inter P: all partitions (from 64x64 down to 8x4)
Inter B: partitions from 64x64 down to 8x4 (including all merge modes
and biprediction)
Weighted prediction for P slices
Multiple reference frames
Ratecontrol: constant quantizer, constant quality, single pass ABR, optional VBV
Scenecut detection
Parallel encoding on multiple CPUs, both frame-level and wavefront parallelism

-- 
Steve Borho
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] A new screenshots comparator and loads of new x264 x265 vp8 vp9 tests

2014-03-05 Thread Niccolò Belli
In data mercoledì 5 marzo 2014 11:38:41, Steve Borho ha scritto:
 I'd like to understand the steps you took to encode 10bit video

I simply misunderstood the 16bpp flag: it doesn't enable high bit depth but 
only 16 bit variables.

Niccolò
-- 
www.linuxsystems.it
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] A new screenshots comparator and loads of new x264 x265 vp8 vp9 tests

2014-03-05 Thread Steve Borho
On Wed, Mar 5, 2014 at 2:51 PM, Niccolò Belli darkba...@linuxsystems.it wrote:
 In data mercoledì 5 marzo 2014 11:38:41, Steve Borho ha scritto:
 I'd like to understand the steps you took to encode 10bit video

 I simply misunderstood the 16bpp flag: it doesn't enable high bit depth but
 only 16 bit variables.

When you compile with HIGH_BIT_DEPTH, the encoder will encode 10bit
streams, just as x264 does.

There was an impedance mismatch between x264 and ffmpeg which was
preventing 10bit inputs from working properly.  I sent a patch to the
ML for it this morning.  I will push it later today if no one objects.

-- 
Steve Borho
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] cleanup m_sharedPredTransformSkip[]

2014-03-05 Thread Steve Borho
On Tue, Mar 4, 2014 at 4:40 AM, Satoshi Nakagawa nakagawa...@oki.com wrote:
 # HG changeset patch
 # User Satoshi Nakagawa nakagawa...@oki.com
 # Date 1393929339 -32400
 #  Tue Mar 04 19:35:39 2014 +0900
 # Node ID 7a61566806f691ddff84cbbc42801f6c2d46df88
 # Parent  3cbde0b893e34e5770cc311d3f4b6fe064c27774
 cleanup m_sharedPredTransformSkip[]

 NEW_CALCRECON macro is TODO mark for asm experts, to optimize register 
 assignment.

Sorry I haven't responded to this yet; I would like Min to review it
before I push it.

 diff -r 3cbde0b893e3 -r 7a61566806f6 source/Lib/TLibEncoder/TEncSearch.cpp
 --- a/source/Lib/TLibEncoder/TEncSearch.cpp Mon Mar 03 13:37:35 2014 -0600
 +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Tue Mar 04 19:35:39 2014 +0900
 @@ -63,7 +63,6 @@
  m_qtTempTUCoeffCr = NULL;
  for (int i = 0; i  3; i++)
  {
 -m_sharedPredTransformSkip[i] = NULL;
  m_qtTempTransformSkipFlag[i] = NULL;
  m_qtTempCbf[i] = NULL;
  }
 @@ -96,7 +95,6 @@
  for (uint32_t i = 0; i  3; ++i)
  {
  X265_FREE(m_qtTempCbf[i]);
 -X265_FREE(m_sharedPredTransformSkip[i]);
  X265_FREE(m_qtTempTransformSkipFlag[i]);
  }

 @@ -153,9 +151,6 @@
  CHECKED_MALLOC(m_qtTempTransformSkipFlag[1], uint8_t, numPartitions);
  CHECKED_MALLOC(m_qtTempTransformSkipFlag[2], uint8_t, numPartitions);

 -CHECKED_MALLOC(m_sharedPredTransformSkip[0], pixel, MAX_TS_WIDTH * 
 MAX_TS_HEIGHT);
 -CHECKED_MALLOC(m_sharedPredTransformSkip[1], pixel, MAX_TS_WIDTH * 
 MAX_TS_HEIGHT);
 -CHECKED_MALLOC(m_sharedPredTransformSkip[2], pixel, MAX_TS_WIDTH * 
 MAX_TS_HEIGHT);
  CHECKED_MALLOC(m_qtTempTUCoeffY, TCoeff, MAX_TS_WIDTH * MAX_TS_HEIGHT);
  CHECKED_MALLOC(m_qtTempTUCoeffCb, TCoeff, MAX_TS_WIDTH * MAX_TS_HEIGHT);
  CHECKED_MALLOC(m_qtTempTUCoeffCr, TCoeff, MAX_TS_WIDTH * MAX_TS_HEIGHT);
 @@ -414,7 +409,6 @@
  Pel* fenc = fencYuv-getLumaAddr(absPartIdx);
  Pel* pred = predYuv-getLumaAddr(absPartIdx);
  int16_t* residual = resiYuv-getLumaAddr(absPartIdx);
 -Pel* recon= predYuv-getLumaAddr(absPartIdx);
  int  chFmt= cu-getChromaFormat();
  int  part = partitionFromSizes(width, height);

 @@ -439,15 +433,6 @@
  cu-getPattern()-initAdiPattern(cu, absPartIdx, trDepth, m_predBuf, 
 m_predBufStride, m_predBufHeight, m_refAbove, m_refLeft, m_refAboveFlt, 
 m_refLeftFlt);
  //= get prediction signal =
  predIntraLumaAng(lumaPredMode, pred, stride, width);
 -// save prediction
 -if (default0Save1Load2 == 1)
 -{
 -primitives.luma_copy_pp[part](m_sharedPredTransformSkip[0], 
 width, pred, stride);
 -}
 -}
 -else
 -{
 -primitives.luma_copy_pp[part](pred, stride, 
 m_sharedPredTransformSkip[0], width);
  }

  //= get residual signal =
 @@ -491,12 +476,19 @@
  primitives.blockfill_s[size](resiTmp, stride, 0);
  }

 +assert(width = 32);
 +#if NEW_CALCRECON
  //= reconstruction =
 -assert(width = 32);
 +primitives.calcrecon[size](pred, residual, 0, reconQt, reconIPred, 
 stride, MAX_CU_SIZE, reconIPredStride);
 +//= update distortion =
 +outDist += primitives.sse_sp[part](reconQt, MAX_CU_SIZE, fenc, stride);
 +#else
 +ALIGN_VAR_32(pixel, recon[MAX_CU_SIZE * MAX_CU_SIZE]);
 +//= reconstruction =
  primitives.calcrecon[size](pred, residual, recon, reconQt, reconIPred, 
 stride, MAX_CU_SIZE, reconIPredStride);
 -
  //= update distortion =
  outDist += primitives.sse_pp[part](fenc, stride, recon, stride);
 +#endif
  }

  void TEncSearch::xIntraCodingChromaBlk(TComDataCU* cu,
 @@ -534,7 +526,6 @@
  Pel* fenc   = (chromaId  0 ? fencYuv-getCrAddr(absPartIdx) 
 : fencYuv-getCbAddr(absPartIdx));
  Pel* pred   = (chromaId  0 ? predYuv-getCrAddr(absPartIdx) 
 : predYuv-getCbAddr(absPartIdx));
  int16_t* residual   = (chromaId  0 ? resiYuv-getCrAddr(absPartIdx) 
 : resiYuv-getCbAddr(absPartIdx));
 -Pel* recon  = (chromaId  0 ? predYuv-getCrAddr(absPartIdx) 
 : predYuv-getCbAddr(absPartIdx));

  uint32_t qtlayer= 
 cu-getSlice()-getSPS()-getQuadtreeTULog2MaxSize() - trSizeLog2;
  uint32_t numCoeffPerInc = (cu-getSlice()-getSPS()-getMaxCUWidth() * 
 cu-getSlice()-getSPS()-getMaxCUHeight()  
 (cu-getSlice()-getSPS()-getMaxCUDepth()  1))  (m_hChromaShift + 
 m_vChromaShift);
 @@ -561,19 +552,6 @@

  //= get prediction signal =
  predIntraChromaAng(chromaPred, chromaPredMode, pred, stride, width, 
 height, chFmt);
 -
 -// save prediction
 -if (default0Save1Load2 == 1)
 -{
 -Pel* predbuf = m_sharedPredTransformSkip[1 + chromaId];
 -primitives.luma_copy_pp[part](predbuf, width, pred, stride);
 -}
 -}
 -else
 -{
 -// load 

[x265] Unfixed typo in patch 6408 (889edfd2c4c3): VBV occupancy

2014-03-05 Thread Mario *LigH* Rohkrämer
Just caught this while reading over latest changes: Missing u was not  
recognized during clarify attempt in source/common/param.cpp


--
__

Fun and success!
Mario *LigH* Rohkrämer
mailto:cont...@ligh.de

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel