Package: release.debian.org Severity: normal User: release.debian....@packages.debian.org Usertags: unblock X-Debbugs-CC: sylves...@debian.org
Dear Release Team, I would like to make the attached changes to the openblas package. The debdiff fixes several important bugs, related to crashes or wrong results in widely used functions (dot product and matrix-vector product). It also fixes support for 32-bit Athlon CPUs. I consider that this patch complies with freeze policy, but since it is not so small and is difficult to parse (x86 assembly), I prefer to ask for pre-approval before upload. Regards, -- .''`. Sébastien Villemot : :' : Debian Developer `. `' http://www.dynare.org/sebastien `- GPG Key: 4096R/381A7594
diff -Nru openblas-0.1.1/debian/changelog openblas-0.1.1/debian/changelog --- openblas-0.1.1/debian/changelog 2012-08-11 17:50:30.000000000 +0200 +++ openblas-0.1.1/debian/changelog 2013-01-31 15:21:20.000000000 +0100 @@ -1,3 +1,16 @@ +openblas (0.1.1-7) UNRELEASED; urgency=low + + * sgemv_uninitialized_buffer.diff: new patch taken from upstream, ensures that + vectorized sgemv does not use uninitialized data (Closes: #696000) + * dot_uninitialized_buffer.diff: new patch taken from upstream, ensures that + vectorized dot does not use uninitialized data + * gemv_crash_big_data.diff: new patch taken from upstream, fixes crashes of + gemv on big input data (Closes: #697231) + * 32bit_athlon.diff: new patch taken from upstream, fixes crashes on 32-bit + Athlon CPUs (Closes: #697233) + + -- Sébastien Villemot <sebast...@debian.org> Sat, 05 Jan 2013 14:13:23 +0100 + openblas (0.1.1-6) unstable; urgency=low * kill_threads_at_unload.diff: new patch, taken upstream (Closes: #673061) diff -Nru openblas-0.1.1/debian/patches/32bit_athlon.diff openblas-0.1.1/debian/patches/32bit_athlon.diff --- openblas-0.1.1/debian/patches/32bit_athlon.diff 1970-01-01 01:00:00.000000000 +0100 +++ openblas-0.1.1/debian/patches/32bit_athlon.diff 2013-01-05 15:07:32.000000000 +0100 @@ -0,0 +1,21 @@ +Description: Fix crash on 32-bit Athlon CPU +Origin: upstream, https://github.com/xianyi/OpenBLAS/commit/9fb341a9f8d94e4d532d51b1216d92e74a67a569 +Bug-Debian: http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=697233 +Last-Update: 2013-01-04 +--- +This patch header follows DEP-3: http://dep.debian.net/deps/dep3/ +--- a/kernel/setparam-ref.c ++++ b/kernel/setparam-ref.c +@@ -634,10 +634,10 @@ static void init_parameter(void) { + TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q; + #endif + +-#if defined(CORE_KATMAI) || defined(CORE_COPPERMINE) || defined(CORE_BANIAS) || defined(CORE_YONAH) ++#if defined(CORE_KATMAI) || defined(CORE_COPPERMINE) || defined(CORE_BANIAS) || defined(CORE_YONAH) || defined(CORE_ATHLON) + + #ifdef DEBUG +- fprintf(stderr, "Katmai, Coppermine, Banias\n"); ++ fprintf(stderr, "Katmai, Coppermine, Banias, Athlon\n"); + #endif + + TABLE_NAME.sgemm_p = 64 * (l2 >> 7); diff -Nru openblas-0.1.1/debian/patches/dot_uninitialized_buffer.diff openblas-0.1.1/debian/patches/dot_uninitialized_buffer.diff --- openblas-0.1.1/debian/patches/dot_uninitialized_buffer.diff 1970-01-01 01:00:00.000000000 +0100 +++ openblas-0.1.1/debian/patches/dot_uninitialized_buffer.diff 2013-01-31 15:19:23.000000000 +0100 @@ -0,0 +1,75 @@ +Description: Ensure that vectorized dot product does not use uninitialized data +Origin: upstream, + https://github.com/xianyi/OpenBLAS/commit/d311236dfdefa41f31a2e7fefa548abf47f0461c +Bug: https://github.com/xianyi/OpenBLAS/issues/189 +Last-Update: 2013-01-31 +--- +This patch header follows DEP-3: http://dep.debian.net/deps/dep3/ +--- a/kernel/x86_64/dot_sse.S ++++ b/kernel/x86_64/dot_sse.S +@@ -530,7 +530,7 @@ + #endif + movsd -32 * SIZE(Y), %xmm8 + +- pshufd $0x39, %xmm4, %xmm5 ++ pshufd $0x29, %xmm4, %xmm5 + + mulps %xmm8, %xmm5 + addps %xmm5, %xmm3 +@@ -750,7 +750,8 @@ + xorps %xmm5, %xmm5 + movhlps %xmm4, %xmm5 + +- mulps -32 * SIZE(Y), %xmm5 ++ movlps -32 * SIZE(Y), %xmm4 ++ mulps %xmm4, %xmm5 + addps %xmm5, %xmm0 + + addq $2 * SIZE, X +@@ -992,7 +993,7 @@ + movsd -32 * SIZE(Y), %xmm8 + + movss %xmm5, %xmm4 +- shufps $0x93, %xmm5, %xmm4 ++ shufps $0x93, %xmm4, %xmm4 + + mulps %xmm8, %xmm4 + addps %xmm4, %xmm3 +--- a/kernel/x86_64/zdot_sse.S ++++ b/kernel/x86_64/zdot_sse.S +@@ -699,7 +699,7 @@ + movsd -32 * SIZE(X), %xmm4 + + pshufd $0xb1, %xmm4, %xmm12 +- shufps $0x39, %xmm8, %xmm8 ++ shufps $0x59, %xmm8, %xmm8 + mulps %xmm8, %xmm4 + addps %xmm4, %xmm0 + mulps %xmm8, %xmm12 +@@ -1336,7 +1336,7 @@ + + movss %xmm9, %xmm8 + pshufd $0xb1, %xmm4, %xmm12 +- shufps $0x93, %xmm8, %xmm8 ++ shufps $0x03, %xmm8, %xmm8 + mulps %xmm8, %xmm4 + addps %xmm4, %xmm0 + mulps %xmm8, %xmm12 +@@ -1697,7 +1697,7 @@ + movsd -32 * SIZE(Y), %xmm4 + + pshufd $0xb1, %xmm4, %xmm12 +- shufps $0x39, %xmm8, %xmm8 ++ shufps $0xa9, %xmm8, %xmm8 + mulps %xmm8, %xmm4 + addps %xmm4, %xmm0 + mulps %xmm8, %xmm12 +@@ -2024,7 +2024,7 @@ + + movss %xmm9, %xmm8 + pshufd $0xb1, %xmm4, %xmm12 +- shufps $0x93, %xmm8, %xmm8 ++ shufps $0x03, %xmm8, %xmm8 + mulps %xmm8, %xmm4 + addps %xmm4, %xmm0 + mulps %xmm8, %xmm12 diff -Nru openblas-0.1.1/debian/patches/gemv_crash_big_data.diff openblas-0.1.1/debian/patches/gemv_crash_big_data.diff --- openblas-0.1.1/debian/patches/gemv_crash_big_data.diff 1970-01-01 01:00:00.000000000 +0100 +++ openblas-0.1.1/debian/patches/gemv_crash_big_data.diff 2013-01-31 15:14:36.000000000 +0100 @@ -0,0 +1,685 @@ +Description: Fix crashes of gemv on big input data +Origin: upstream, + https://github.com/xianyi/OpenBLAS/commit/fd3046b32a1f7049fcb2bfb255d72e4204e5522e + https://github.com/xianyi/OpenBLAS/commit/0d1518add98bc3c0e83887be74cda3b23c8937ee + https://github.com/xianyi/OpenBLAS/commit/69200884e13e98b79487cfd1c78faf054278ec2f + https://github.com/xianyi/OpenBLAS/commit/5f0117385e1d4f986ad75fa66b873b014a7792c2 + https://github.com/xianyi/OpenBLAS/commit/cea1a885b5cd38bea67feb6437ef0c3622a96c58 + https://github.com/xianyi/OpenBLAS/commit/0b08f7479e26ce0ef8e076185bb89f16479335e9 +Bug: https://github.com/xianyi/OpenBLAS/issues/154 + https://github.com/xianyi/OpenBLAS/issues/173 +Bug-Debian: http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=697231 +Last-Update: 2013-01-31 +--- +This patch header follows DEP-3: http://dep.debian.net/deps/dep3/ +--- a/kernel/x86/gemv_t_sse.S ++++ b/kernel/x86/gemv_t_sse.S +@@ -89,17 +89,24 @@ + #endif + + #define STACKSIZE 16 ++#define ARGS 20 + +-#define M 4 + STACKSIZE(%esp) +-#define N 8 + STACKSIZE(%esp) +-#define ALPHA 16 + STACKSIZE(%esp) +-#define A 20 + STACKSIZE(%esp) +-#define STACK_LDA 24 + STACKSIZE(%esp) +-#define STACK_X 28 + STACKSIZE(%esp) +-#define STACK_INCX 32 + STACKSIZE(%esp) +-#define Y 36 + STACKSIZE(%esp) +-#define STACK_INCY 40 + STACKSIZE(%esp) +-#define BUFFER 44 + STACKSIZE(%esp) ++#define M 4 + STACKSIZE+ARGS(%esp) ++#define N 8 + STACKSIZE+ARGS(%esp) ++#define ALPHA 16 + STACKSIZE+ARGS(%esp) ++#define A 20 + STACKSIZE+ARGS(%esp) ++#define STACK_LDA 24 + STACKSIZE+ARGS(%esp) ++#define STACK_X 28 + STACKSIZE+ARGS(%esp) ++#define STACK_INCX 32 + STACKSIZE+ARGS(%esp) ++#define Y 36 + STACKSIZE+ARGS(%esp) ++#define STACK_INCY 40 + STACKSIZE+ARGS(%esp) ++#define BUFFER 44 + STACKSIZE+ARGS(%esp) ++ ++#define MMM 0+STACKSIZE(%esp) ++#define NN 4+STACKSIZE(%esp) ++#define AA 8+STACKSIZE(%esp) ++#define LDAX 12+STACKSIZE(%esp) ++#define XX 16+STACKSIZE(%esp) + + #define I %eax + #define J %ebx +@@ -114,6 +121,7 @@ + + PROLOGUE + ++ subl $ARGS,%esp + pushl %ebp + pushl %edi + pushl %esi +@@ -122,7 +130,42 @@ + PROFCODE + + movl STACK_LDA, LDA ++ movl LDA,LDAX # backup LDA + movl STACK_X, X ++ movl X,XX ++ movl N,J ++ movl J,NN # backup N ++ movl A,J ++ movl J,AA # backup A ++ movl M,J ++ movl J,MMM # mov M to MMM ++.L0t: ++ xorl J,J ++ addl $1,J ++ sall $22,J # J=2^24*sizeof(float)=buffer size(16MB) ++ subl $8, J # Don't use last 8 float in the buffer. ++ # Now, split M by block J ++ subl J,MMM # MMM=MMM-J ++ movl J,M ++ jge .L00t ++ ALIGN_4 ++ ++ movl MMM,%eax ++ addl J,%eax ++ jle .L999x ++ movl %eax,M ++ ++.L00t: ++ movl AA,%eax ++ movl %eax,A # mov AA to A ++ ++ movl NN,%eax ++ movl %eax,N # reset N ++ ++ ++ movl LDAX, LDA # reset LDA ++ movl XX,X ++ + movl STACK_INCX, INCX + movl STACK_INCY, INCY + +@@ -642,10 +685,22 @@ + ALIGN_4 + + .L999: ++ movl M,J ++ leal (,J,SIZE),%eax ++ addl %eax,AA ++ movl XX,J ++ addl %eax,J ++ movl J,XX ++ jmp .L0t ++ ALIGN_4 ++ ++.L999x: + popl %ebx + popl %esi + popl %edi + popl %ebp ++ ++ addl $ARGS,%esp + ret + + EPILOGUE +--- a/kernel/x86/gemv_t_sse2.S ++++ b/kernel/x86/gemv_t_sse2.S +@@ -76,18 +76,24 @@ + #endif + + #define STACKSIZE 16 ++#define ARGS 16 ++ ++#define M 4 + STACKSIZE+ARGS(%esp) ++#define N 8 + STACKSIZE+ARGS(%esp) ++#define ALPHA 16 + STACKSIZE+ARGS(%esp) ++#define A 24 + STACKSIZE+ARGS(%esp) ++#define STACK_LDA 28 + STACKSIZE+ARGS(%esp) ++#define STACK_X 32 + STACKSIZE+ARGS(%esp) ++#define STACK_INCX 36 + STACKSIZE+ARGS(%esp) ++#define Y 40 + STACKSIZE+ARGS(%esp) ++#define STACK_INCY 44 + STACKSIZE+ARGS(%esp) ++#define BUFFER 48 + STACKSIZE+ARGS(%esp) ++ ++#define MMM 0+STACKSIZE(%esp) ++#define AA 4+STACKSIZE(%esp) ++#define LDAX 8+STACKSIZE(%esp) ++#define NN 12+STACKSIZE(%esp) + +-#define M 4 + STACKSIZE(%esp) +-#define N 8 + STACKSIZE(%esp) +-#define ALPHA 16 + STACKSIZE(%esp) +-#define A 24 + STACKSIZE(%esp) +-#define STACK_LDA 28 + STACKSIZE(%esp) +-#define STACK_X 32 + STACKSIZE(%esp) +-#define STACK_INCX 36 + STACKSIZE(%esp) +-#define Y 40 + STACKSIZE(%esp) +-#define STACK_INCY 44 + STACKSIZE(%esp) +-#define BUFFER 48 + STACKSIZE(%esp) +- + #define I %eax + #define J %ebx + +@@ -101,6 +107,8 @@ + + PROLOGUE + ++ subl $ARGS,%esp ++ + pushl %ebp + pushl %edi + pushl %esi +@@ -108,7 +116,40 @@ + + PROFCODE + ++ + movl STACK_LDA, LDA ++ movl LDA,LDAX # backup LDA ++ movl N,J ++ movl J,NN # backup N ++ movl A,J ++ movl J,AA # backup A ++ movl M,J ++ movl J,MMM # mov M to MMM ++.L0t: ++ xorl J,J ++ addl $1,J ++ sall $21,J # J=2^21*sizeof(double)=buffer size(16MB) ++ subl $4, J # Don't use last 4 double in the buffer. ++ # Now, split M by block J ++ subl J,MMM # MMM=MMM-J ++ movl J,M ++ jge .L00t ++ ALIGN_4 ++ ++ movl MMM,%eax ++ addl J,%eax ++ jle .L999x ++ movl %eax,M ++ ++.L00t: ++ movl AA,%eax ++ movl %eax,A # mov AA to A ++ ++ movl NN,%eax ++ movl %eax,N # reset N ++ ++ ++ movl LDAX, LDA # reset LDA + movl STACK_X, X + movl STACK_INCX, INCX + movl STACK_INCY, INCY +@@ -117,6 +158,7 @@ + leal (,INCY, SIZE), INCY + leal (,LDA, SIZE), LDA + ++ + subl $-16 * SIZE, A + + cmpl $0, N +@@ -560,10 +602,19 @@ + ALIGN_4 + + .L999: ++ movl M,J ++ leal (,J,SIZE),%eax ++ addl %eax,AA ++ jmp .L0t ++ ALIGN_4 ++ ++.L999x: + popl %ebx + popl %esi + popl %edi + popl %ebp ++ ++ addl $ARGS,%esp + ret + + EPILOGUE +--- a/kernel/x86_64/sgemv_t.S ++++ b/kernel/x86_64/sgemv_t.S +@@ -47,7 +47,7 @@ + + #ifndef WINDOWS_ABI + +-#define STACKSIZE 64 ++#define STACKSIZE 128 + + #define OLD_M %rdi + #define OLD_N %rsi +@@ -57,6 +57,10 @@ + #define STACK_Y 16 + STACKSIZE(%rsp) + #define STACK_INCY 24 + STACKSIZE(%rsp) + #define STACK_BUFFER 32 + STACKSIZE(%rsp) ++#define MMM 56(%rsp) ++#define NN 64(%rsp) ++#define AA 72(%rsp) ++#define LDAX 80(%rsp) + + #else + +@@ -71,6 +75,10 @@ + #define STACK_Y 72 + STACKSIZE(%rsp) + #define STACK_INCY 80 + STACKSIZE(%rsp) + #define STACK_BUFFER 88 + STACKSIZE(%rsp) ++#defien MMM 216(%rsp) ++#defien NN 224(%rsp) ++#define AA 232(%rsp) ++#define LDAX 240(%rsp) + + #endif + +@@ -127,29 +135,46 @@ + movups %xmm14, 192(%rsp) + movups %xmm15, 208(%rsp) + +- movq OLD_M, M +- movq OLD_N, N +- movq OLD_A, A +- movq OLD_LDA, LDA ++ movq OLD_M, MMM ++ movq OLD_N, NN ++ movq OLD_A, AA ++ movq OLD_LDA, LDAX + movq OLD_X, X + #else +- movq OLD_M, M +- movq OLD_N, N +- movq OLD_A, A +- movq OLD_LDA, LDA ++ movq OLD_M, MMM ++ movq OLD_N, NN ++ movq OLD_A, AA ++ movq OLD_LDA, LDAX + #endif +- +- movq STACK_INCX, INCX +- movq STACK_Y, Y +- movq STACK_INCY, INCY +- movq STACK_BUFFER, BUFFER +- + #ifndef WINDOWS_ABI + pshufd $0, %xmm0, ALPHA + #else + pshufd $0, %xmm3, ALPHA + #endif + ++ ++.L0t: ++ xorq M,M ++ addq $1,M ++ salq $22,M ++ subq M,MMM ++ jge .L00t ++ ALIGN_4 ++ ++ movq MMM,%rax ++ addq M,%rax ++ jle .L999x ++ movq %rax,M ++ ++.L00t: ++ movq LDAX,LDA ++ movq NN,N ++ movq AA,A ++ movq STACK_INCX, INCX ++ movq STACK_Y, Y ++ movq STACK_INCY, INCY ++ movq STACK_BUFFER, BUFFER ++ + leaq (,INCX, SIZE), INCX + leaq (,INCY, SIZE), INCY + leaq (,LDA, SIZE), LDA +@@ -6341,6 +6366,12 @@ + ALIGN_4 + + .L999: ++ leaq (,M,SIZE),%rax ++ addq %rax,AA ++ jmp .L0t ++ ALIGN_4 ++ ++.L999x: + movq 0(%rsp), %rbx + movq 8(%rsp), %rbp + movq 16(%rsp), %r12 +--- a/kernel/x86/gemv_n_sse.S ++++ b/kernel/x86/gemv_n_sse.S +@@ -89,17 +89,22 @@ + #endif + + #define STACKSIZE 16 ++#define ARGS 16 + +-#define M 4 + STACKSIZE(%esp) +-#define N 8 + STACKSIZE(%esp) +-#define ALPHA 16 + STACKSIZE(%esp) +-#define A 20 + STACKSIZE(%esp) +-#define STACK_LDA 24 + STACKSIZE(%esp) +-#define STACK_X 28 + STACKSIZE(%esp) +-#define STACK_INCX 32 + STACKSIZE(%esp) +-#define Y 36 + STACKSIZE(%esp) +-#define STACK_INCY 40 + STACKSIZE(%esp) +-#define BUFFER 44 + STACKSIZE(%esp) ++#define M 4 + STACKSIZE+ARGS(%esp) ++#define N 8 + STACKSIZE+ARGS(%esp) ++#define ALPHA 16 + STACKSIZE+ARGS(%esp) ++#define A 20 + STACKSIZE+ARGS(%esp) ++#define STACK_LDA 24 + STACKSIZE+ARGS(%esp) ++#define STACK_X 28 + STACKSIZE+ARGS(%esp) ++#define STACK_INCX 32 + STACKSIZE+ARGS(%esp) ++#define Y 36 + STACKSIZE+ARGS(%esp) ++#define STACK_INCY 40 + STACKSIZE+ARGS(%esp) ++#define BUFFER 44 + STACKSIZE+ARGS(%esp) ++#define MMM 0+ARGS(%esp) ++#define YY 4+ARGS(%esp) ++#define AA 8+ARGS(%esp) ++#define LDAX 12+ARGS(%esp) + + #define I %eax + #define J %ebx +@@ -114,6 +119,7 @@ + + PROLOGUE + ++ subl $ARGS,%esp + pushl %ebp + pushl %edi + pushl %esi +@@ -121,7 +127,34 @@ + + PROFCODE + ++ movl Y,J ++ movl J,YY # backup Y ++ movl A,J ++ movl J,AA # backup A ++ movl M,J ++ movl J,MMM # backup MM ++.L0t: ++ xorl J,J ++ addl $1,J ++ sall $21,J ++ subl J,MMM ++ movl J,M ++ jge .L00t ++ ALIGN_4 ++ ++ movl MMM,%eax ++ addl J,%eax ++ jle .L999x ++ movl %eax,M ++ ++.L00t: ++ movl AA,%eax ++ movl %eax,A ++ ++ movl YY,J ++ movl J,Y + movl STACK_LDA, LDA ++ + movl STACK_X, X + movl STACK_INCX, INCX + +@@ -651,12 +684,22 @@ + addss 0 * SIZE(X), %xmm0 + movss %xmm0, (Y1) + ALIGN_3 +- + .L999: ++ movl M,J ++ leal (,J,SIZE),%eax ++ addl %eax,AA ++ movl YY,J ++ addl %eax,J ++ movl J,YY ++ jmp .L0t ++ ALIGN_4 ++ ++.L999x: + popl %ebx + popl %esi + popl %edi + popl %ebp ++ addl $ARGS,%esp + ret + + EPILOGUE +--- a/kernel/x86/gemv_n_sse2.S ++++ b/kernel/x86/gemv_n_sse2.S +@@ -76,17 +76,22 @@ + #endif + + #define STACKSIZE 16 ++#define ARGS 16 + +-#define M 4 + STACKSIZE(%esp) +-#define N 8 + STACKSIZE(%esp) +-#define ALPHA 16 + STACKSIZE(%esp) +-#define A 24 + STACKSIZE(%esp) +-#define STACK_LDA 28 + STACKSIZE(%esp) +-#define STACK_X 32 + STACKSIZE(%esp) +-#define STACK_INCX 36 + STACKSIZE(%esp) +-#define Y 40 + STACKSIZE(%esp) +-#define STACK_INCY 44 + STACKSIZE(%esp) +-#define BUFFER 48 + STACKSIZE(%esp) ++#define M 4 + STACKSIZE+ARGS(%esp) ++#define N 8 + STACKSIZE+ARGS(%esp) ++#define ALPHA 16 + STACKSIZE+ARGS(%esp) ++#define A 24 + STACKSIZE+ARGS(%esp) ++#define STACK_LDA 28 + STACKSIZE+ARGS(%esp) ++#define STACK_X 32 + STACKSIZE+ARGS(%esp) ++#define STACK_INCX 36 + STACKSIZE+ARGS(%esp) ++#define Y 40 + STACKSIZE+ARGS(%esp) ++#define STACK_INCY 44 + STACKSIZE+ARGS(%esp) ++#define BUFFER 48 + STACKSIZE+ARGS(%esp) ++ ++#define MMM 0+ARGS(%esp) ++#define YY 4+ARGS(%esp) ++#define AA 8+ARGS(%esp) + + #define I %eax + #define J %ebx +@@ -101,6 +106,8 @@ + + PROLOGUE + ++ ++ subl $ARGS,%esp + pushl %ebp + pushl %edi + pushl %esi +@@ -108,6 +115,33 @@ + + PROFCODE + ++ movl Y,J ++ movl J,YY # backup Y ++ movl A,J ++ movl J,AA # backup A ++ movl M,J ++ movl J,MMM # backup MM ++.L0t: ++ xorl J,J ++ addl $1,J ++ sall $20,J ++ subl J,MMM ++ movl J,M ++ jge .L00t ++ ALIGN_4 ++ ++ movl MMM,%eax ++ addl J,%eax ++ jle .L999x ++ movl %eax,M ++ ++.L00t: ++ movl AA,%eax ++ movl %eax,A ++ ++ movl YY,J ++ movl J,Y ++ + movl STACK_LDA, LDA + movl STACK_X, X + movl STACK_INCX, INCX +@@ -677,10 +711,22 @@ + ALIGN_3 + + .L999: ++ movl M,J ++ leal (,J,SIZE),%eax ++ addl %eax,AA ++ movl YY,J ++ addl %eax,J ++ movl J,YY ++ jmp .L0t ++ ALIGN_4 ++ ++.L999x: ++ + popl %ebx + popl %esi + popl %edi + popl %ebp ++ addl $ARGS,%esp + ret + + EPILOGUE +--- a/kernel/x86_64/dgemv_t.S ++++ b/kernel/x86_64/dgemv_t.S +@@ -47,7 +47,7 @@ + + #ifndef WINDOWS_ABI + +-#define STACKSIZE 64 ++#define STACKSIZE 128 + + #define OLD_M %rdi + #define OLD_N %rsi +@@ -57,7 +57,10 @@ + #define STACK_Y 16 + STACKSIZE(%rsp) + #define STACK_INCY 24 + STACKSIZE(%rsp) + #define STACK_BUFFER 32 + STACKSIZE(%rsp) +- ++#define MMM 56(%rsp) ++#define NN 64(%rsp) ++#define AA 72(%rsp) ++#define LDAX 80(%rsp) + #else + + #define STACKSIZE 256 +@@ -71,6 +74,11 @@ + #define STACK_Y 72 + STACKSIZE(%rsp) + #define STACK_INCY 80 + STACKSIZE(%rsp) + #define STACK_BUFFER 88 + STACKSIZE(%rsp) ++//Temp variables for M,N,A,LDA ++#define MMM 224(%rsp) ++#define NN 232(%rsp) ++#define AA 240(%rsp) ++#define LDAX 248(%rsp) + + #endif + +@@ -131,13 +139,51 @@ + movq OLD_A, A + movq OLD_LDA, LDA + movq OLD_X, X ++ ++ movq M, MMM ++ movq N, NN ++ movq A, AA ++ movq LDA, LDAX ++ + #else +- movq OLD_M, M +- movq OLD_N, N +- movq OLD_A, A +- movq OLD_LDA, LDA ++ movq OLD_M, MMM ++ movq OLD_N, NN ++ movq OLD_A, AA ++ movq OLD_LDA, LDAX ++#endif ++#ifdef HAVE_SSE3 ++#ifndef WINDOWS_ABI ++ movddup %xmm0, ALPHA ++#else ++ movddup %xmm3, ALPHA ++#endif ++#else ++#ifndef WINDOWS_ABI ++ movapd %xmm0, ALPHA ++#else ++ movapd %xmm3, ALPHA ++#endif ++ unpcklpd ALPHA, ALPHA + #endif + ++ ++ ++.L0x: ++ xorq M,M ++ addq $1,M ++ salq $22,M ++ subq M,MMM ++ jge .L00 ++ ++ movq MMM,%rax ++ addq M,%rax ++ jle .L999x ++ movq %rax,M ++ ++.L00: ++ movq LDAX,LDA ++ movq NN,N ++ movq AA,A + movq STACK_INCX, INCX + movq STACK_Y, Y + movq STACK_INCY, INCY +@@ -153,21 +199,6 @@ + + subq $-16 * SIZE, A + +-#ifdef HAVE_SSE3 +-#ifndef WINDOWS_ABI +- movddup %xmm0, ALPHA +-#else +- movddup %xmm3, ALPHA +-#endif +-#else +-#ifndef WINDOWS_ABI +- movapd %xmm0, ALPHA +-#else +- movapd %xmm3, ALPHA +-#endif +- unpcklpd ALPHA, ALPHA +-#endif +- + testq M, M + jle .L999 + testq N, N +@@ -854,7 +885,6 @@ + + .L21: + #endif +- + subq $4, N + + leaq 16 * SIZE(BUFFER), X1 +@@ -2461,6 +2491,12 @@ + ALIGN_4 + + .L999: ++ leaq (, M, SIZE), %rax ++ addq %rax,AA ++ jmp .L0x; ++ ALIGN_4 ++ ++.L999x: + movq 0(%rsp), %rbx + movq 8(%rsp), %rbp + movq 16(%rsp), %r12 diff -Nru openblas-0.1.1/debian/patches/series openblas-0.1.1/debian/patches/series --- openblas-0.1.1/debian/patches/series 2012-08-11 17:50:07.000000000 +0200 +++ openblas-0.1.1/debian/patches/series 2013-01-31 15:21:20.000000000 +0100 @@ -3,3 +3,7 @@ hurd.diff generic_profile.diff kill_threads_at_unload.diff +32bit_athlon.diff +sgemv_uninitialized_buffer.diff +gemv_crash_big_data.diff +dot_uninitialized_buffer.diff diff -Nru openblas-0.1.1/debian/patches/sgemv_uninitialized_buffer.diff openblas-0.1.1/debian/patches/sgemv_uninitialized_buffer.diff --- openblas-0.1.1/debian/patches/sgemv_uninitialized_buffer.diff 1970-01-01 01:00:00.000000000 +0100 +++ openblas-0.1.1/debian/patches/sgemv_uninitialized_buffer.diff 2013-01-31 15:21:20.000000000 +0100 @@ -0,0 +1,30 @@ +Description: Ensure that vectorized sgemv does not use uninitialized data +Origin: upstream, https://github.com/xianyi/OpenBLAS/commit/91ed4e4450ceabd71493e0bf80e7455df414bebf +Bug: https://github.com/xianyi/OpenBLAS/issues/171 +Bug-Debian: http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=696000 +Last-Update: 2013-01-04 +--- +This patch header follows DEP-3: http://dep.debian.net/deps/dep3/ +--- a/kernel/x86/gemv_t_sse.S ++++ b/kernel/x86/gemv_t_sse.S +@@ -198,6 +198,20 @@ + jg .L06 + ALIGN_4 + ++//Padding zero to prevent loading the dirty number from buffer. ++ movl M, I ++ movl $8, J ++ andl $7, I ++ xorps %xmm0, %xmm0 ++ subl I, J ++ ALIGN_2 ++.L07: ++ movss %xmm0, 0 * SIZE(Y1) ++ addl $SIZE, Y1 ++ decl J ++ jg .L07 ++ ALIGN_4 ++ + .L10: + movl Y, Y1 +
signature.asc
Description: Digital signature