Andreas Beckmann wrote:
I need more instructions to reproduce this ...

# apt-get install python3-pygpu python3-nose python3-scipy libclblas-dev ocl-icd-opencl-dev pocl-opencl-icd
$ DEVICE=opencl0:0 python3 /usr/bin/nosetests3 -v pygpu.tests.test_blas

You may need to run the second one twice (if it gets to test_ger and fails with "free(): invalid next size (normal)" or similar memory corruption errors, that's a different bug, which I am currently investigating), but do not need any source packages: the pygpu tests are included in the binary.

please try pocl from experimental which is built against llvm-7

Doesn't fix it but possibly makes it rarer (the pygpu case now requires running the test suite twice).

Using LLVM 7 also avoids #914021, allowing a backtrace:

$ DEVICE=opencl0:0 gdb --args python3 /usr/bin/nosetests3 -v pygpu.tests.test_blas
(gdb) break lib/IR/Verifier.cpp:1397
No source file named lib/IR/Verifier.cpp.
Make breakpoint pending on future shared library load? (y or [n]) y
Breakpoint 1 (lib/IR/Verifier.cpp:1397) pending.
(gdb) bt full
#0  visitModuleFlag ()
at /build/llvm-toolchain-7-Ael5Ob/llvm-toolchain-7-7.0.1/lib/IR/Verifier.cpp:1398
No locals.
#1  visitModuleFlags ()
at /build/llvm-toolchain-7-Ael5Ob/llvm-toolchain-7-7.0.1/lib/IR/Verifier.cpp:1306
No locals.
#2  verify ()
at /build/llvm-toolchain-7-Ael5Ob/llvm-toolchain-7-7.0.1/lib/IR/Verifier.cpp:384
No locals.
#3  0x00007ffff0db15db in verifyModule ()
at /build/llvm-toolchain-7-Ael5Ob/llvm-toolchain-7-7.0.1/lib/IR/Verifier.cpp:4689
No locals.
#4  0x00007ffff0cbcf5d in UpgradeDebugInfo ()
at /build/llvm-toolchain-7-Ael5Ob/llvm-toolchain-7-7.0.1/lib/IR/AutoUpgrade.cpp:3521
No locals.
#5  0x00007ffff12c001f in materializeModule ()
at /build/llvm-toolchain-7-Ael5Ob/llvm-toolchain-7-7.0.1/lib/Bitcode/Reader/BitcodeReader.cpp:4803
No locals.
#6  0x00007ffff0d890ea in materializeAll ()
at /build/llvm-toolchain-7-Ael5Ob/llvm-toolchain-7-7.0.1/lib/IR/Module.cpp:397
No locals.
#7  0x00007ffff12b806e in getModuleImpl ()
at /build/llvm-toolchain-7-Ael5Ob/llvm-toolchain-7-7.0.1/lib/Bitcode/Reader/BitcodeReader.cpp:5769
No locals.
#8  0x00007ffff12bd562 in parseModule ()
at /build/llvm-toolchain-7-Ael5Ob/llvm-toolchain-7-7.0.1/lib/Bitcode/Reader/BitcodeReader.cpp:5883
No locals.
#9  parseBitcodeFile ()
at /build/llvm-toolchain-7-Ael5Ob/llvm-toolchain-7-7.0.1/lib/Bitcode/Reader/BitcodeReader.cpp:5894
No locals.
#10 0x00007ffff0ddb2cf in parseIR ()
at /build/llvm-toolchain-7-Ael5Ob/llvm-toolchain-7-7.0.1/lib/IRReader/IRReader.cpp:79
No locals.
#11 0x00007ffff0ddb693 in parseIRFile ()
at /build/llvm-toolchain-7-Ael5Ob/llvm-toolchain-7-7.0.1/lib/IRReader/IRReader.cpp:108
No locals.
#12 0x00007ffff3f93f6b in parseModuleIR (
path=path@entry=0x7fffffff9360 "/home/test1/.cache/pocl/kcache/JG/ABBLKFOBNDALGEFMHDFBNNLBFBJABFEGHNMOO/program.bc")
    at /usr/lib/llvm-7/include/llvm/ADT/StringRef.h:85
        Err = {SM = 0x0, Loc = {Ptr = 0x0}, Filename = "", LineNo = 0,
          ColumnNo = 0, Kind = llvm::SourceMgr::DK_Error, Message = "",
          LineContents = "", Ranges = std::vector of length 0, capacity 0,
FixIts = {<llvm::SmallVectorImpl<llvm::SMFixIt>> = {<llvm::SmallVectorTemplateBase<llvm::SMFixIt, false>> = {<llvm::SmallVectorTemplateCommon<llvm::SMFixIt, void>> = {<llvm::SmallVectorBase> = {BeginX = 0x7fffffff9268, Size = 0, Capacity = 4}, <No data fields>}, <No data fields>}, <No data fields>}, <llvm::SmallVectorStorage<llvm::SMFixIt, 4>> = {InlineElts = {
                {<llvm::AlignedCharArray<8, 48>> = {
buffer = "@\223\377\377\377\177\000\000\200\222\377\377\377\177\000\000\000\261\245\353\227\346\350\327\020\223\377\377\377\177\000\000\321\223T\366\377\177\000\000\020\000\000\000\000\000\000"}, <No data fields>},
                {<llvm::AlignedCharArray<8, 48>> = {
buffer = "\002\000\000\000\000\000\000\000\000\000\022@\000\000\022@\300\v(\365\377\177\000\000\060\222\377\377\377\177\000\000\000\261\245\353\227\346\350\327\340\370S\a\000\000\000"}, <No data fields>},
                {<llvm::AlignedCharArray<8, 48>> = {
buffer = "\260\226\377\377\377\177\000\000\002\000\000\000\000\000\000\000\360\276\352\000\000\000\000\000\320\206P\a\000\000\000\000\340\206P\a\000\000\000\000\260\225\377\377\377\177\000"}, <No data fields>},
                {<llvm::AlignedCharArray<8, 48>> = {
buffer = "J\206W\366\377\177", '\000' <repeats 18 times>, "`\223\377\377\377\177\000\000\353\375\365\363\377\177\000\000\001\000\000\000\000\000\000"}, <No data fields>}}}, <No data fields>}}
#13 0x00007ffff3f95d2d in pocl_update_program_llvm_irs_unlocked (
    program=program@entry=0x750fe50, device_i=device_i@entry=0)
    at ./lib/CL/pocl_llvm_wg.cc:516
program_bc_path = "/home/test1/.cache/pocl/kcache/JG/ABBLKFOBNDALGEFMHDFBNNLBFBJABFEGHNMOO/program.bc", '\000' <repeats 374 times>... __PRETTY_FUNCTION__ = "int pocl_update_program_llvm_irs_unlocked(cl_program, unsigned int)"
#14 0x00007ffff3f95e3d in pocl_update_program_llvm_irs (
    program=program@entry=0x750fe50, device_i=device_i@entry=0)
    at ./lib/CL/pocl_llvm_wg.cc:526
        lockHolder = {<No data fields>}
#15 0x00007ffff3f1b04b in compile_and_link_program (
    compile_program=compile_program@entry=1,
    link_program=link_program@entry=1, program=0x750fe50, num_devices=1,
    device_list=0x162dd40, options=<optimized out>, num_input_headers=0,
    input_headers=0x0, header_include_names=0x0, num_input_programs=0,
    input_programs=0x0, pfn_notify=0x0, user_data=0x0)
    at ./lib/CL/pocl_build.c:708
        device = <optimized out>
        found = <optimized out>
program_bc_path = "/home/test1/.cache/pocl/kcache/JG/ABBLKFOBNDALGEFMHDFBNNLBFBJABFEGHNMOO/program.bc\000\000\000\000\000\000\350$A\354\377\177\000\000\026$A\354\377\177\000\000H$A\354\377\177", '\000' <repeats 17 times>, "\001p$A\354\377\177\000\000\340$A\354\377\177\000\000\310$A\354\377\177\000\000\000%A\354\377\177\000\000\001\000\000\000\000\000\000\000`\233\377\377\377\177", '\000' <repeats 11 times>, "\261\245\353\227\346\350\327\340$A\354\377\177\000\000"... link_options = "\000#A\354\377\177\000\000\000\261\245\353\227\346\350\327($A\354\377\177\000\000`\233\377\377", '\000' <repeats 12 times>, "@\242\377\377\377\177\000\000\000\232\377\377\377\177\000\000\377\377\377\377\377\377\377\377\000\000\000\000\000\000\000\000\070\272U\366\377\177", '\000' <repeats 42 times>, "\001\000\000\000\000\000\000\000@(~\366\377\177\000\000`w\272\367\377\177\000\000h\r\000\000\000\000\000\000\376uc\344\377\177\000\000\000\000\000\000\000\000\000\000\370uc\344\377\177\000\000\000\000\000\000\000\000\000\000\376uc\344\377\177\000\000\330#A\354\377\177\000\000\000\000\000\000\000\000\000\000"...
        errcode = <optimized out>
        error = <optimized out>
        create_library = 0
        requires_cr_sqrt_div = 0
        spir_build = 0
        flush_denorms = <optimized out>
        fsize = 35924
        unique_devlist = 0x162dd40
        binary = 0x71fd0f0 "BC\300\336\065\024"
        device_i = <optimized out>
        actually_built = <optimized out>
        i = <optimized out>
        j = <optimized out>
        build_error_code = -11
        __PRETTY_FUNCTION__ = "compile_and_link_program"
#16 0x00007ffff3f1a00c in POclBuildProgram (program=<optimized out>,
    num_devices=<optimized out>, device_list=<optimized out>,
    options=<optimized out>, pfn_notify=<optimized out>,
    user_data=<optimized out>) at ./lib/CL/clBuildProgram.c:37
No locals.
#17 0x00007fffe44cf645 in buildClProgram (source=<optimized out>,
source@entry=0x76c7fc0 "typedef union GPtr {\n __global float *f;\n __global float2 *f2v;\n __global float4 *f4v;\n __global float8 *f8v;\n __global float16 *f16v;\n} GPtr;\n\ntypedef union LPtr {\n __local float *f"..., buildOpts=buildOpts@entry=0x7fffffffa670 "-g", ctx=ctx@entry=0xf9f7d0,
    devID=<optimized out>, devID@entry=0xf9b050,
    logBuf=logBuf@entry=0x7722cf0 "", logBufSize=logBufSize@entry=65536,
    status=0x7fffffff9fe4) at ./src/library/common/clkern.c:179
        program = 0x750fe50
        stat = 0
#18 0x00007fffe450617f in makeKernel (device=device@entry=0xf9b050,
    context=context@entry=0xf9f7d0,
    kernelGenerator=kernelGenerator@entry=0x7fffe451abd0 <generator>,
    program=program@entry=0x0, dims=dims@entry=0x70e9d10,
    pgran=pgran@entry=0x70e9d88, extra=0x7fffffffa410,
    buildOpts=0x7fffffffa670 "-g", error=0x7fffffffa270)
    at ./src/library/blas/generic/common.c:476
        err = 0
source = 0x76c7fc0 "typedef union GPtr {\n __global float *f;\n __global float2 *f2v;\n __global float4 *f4v;\n __global float8 *f8v;\n __global float16 *f16v;\n} GPtr;\n\ntypedef union LPtr {\n __local float *f"...
        size = <optimized out>
        kernel = 0x7314750
        log = 0x7722cf0 ""
#19 0x00007fffe4506bc0 in makeKernelCached (device=<optimized out>,
    context=0xf9f7d0, sid=sid@entry=0, key=key@entry=0x7fffffffa380,
    kernelGenerator=<optimized out>, dims=dims@entry=0x70e9d10,
pgran=<optimized out>, extra=<optimized out>, buildOpts=<optimized out>,
    error=<optimized out>) at ./src/library/blas/generic/common2.cc:78
        kernel = <optimized out>
        name = "solver0\000v\000\000\000\000\000\000\000X\216c\344"
        bl = {m_cache_entry_name = "solver0", m_path = "", m_header = {
            magic_key = "\004\r\"F", whole_file_size = 714668120,
            header_size = 16363600, binary_size = 0,
            signature_size = 16381904}, m_context = 0xf9f7d0,
          m_device = 0xf9b050, m_program = 0x0, m_binary = 0x0,
          m_signature = 0x0,
--Type <RET> for more, q to quit, c to continue without paging--
          m_variants = std::vector of length 4, capacity 4 = {{
m_kind = BinaryLookup::INT, m_size = 4, m_data = 0x781bba0 ""}, {
              m_kind = BinaryLookup::INT, m_size = 4,
              m_data = 0x1c9f440 "\002"}, {m_kind = BinaryLookup::DATA,
              m_size = 80, m_data = 0x6e39190 "\001"}, {
              m_kind = BinaryLookup::DATA, m_size = 300,
              m_data = 0x6eab760 ""}}, m_cache_enabled = false}
#20 0x00007fffe450b825 in makeSolutionSeq (funcID=funcID@entry=CLBLAS_GEMV,
args=args@entry=0x7fffffffa8d0, numCommandQueues=numCommandQueues@entry=1,
    commandQueues=commandQueues@entry=0xa70a30,
    numEventsInWaitList=numEventsInWaitList@entry=0,
    eventWaitList=eventWaitList@entry=0x0, events=0x7fffffffaaa8,
seq=0x7fffffffa800) at ./src/library/blas/generic/solution_seq_make.c:587
        dims = 0x70e9d10
        prepDims = {{x = 0, y = 0, bwidth = 10, itemX = 5, itemY = 3}, {
            x = 5775357, y = 140737308720504, bwidth = 140737159574656,
            itemX = 0, itemY = 0}}
        ident = 0x70e9cd0
        err = 0
        j = <optimized out>
        totalCUs = <optimized out>
        numDevicesWithoutDoubles = <optimized out>
        hasDouble = <optimized out>
        step = 0x70e9b60
        extra = {dtype = TYPE_FLOAT,
flags = (KEXTRA_COLUMN_MAJOR | KEXTRA_UPPER_TRIANG | KEXTRA_UNIT_DIAGONAL | KEXTRA_TAILS_M | KEXTRA_BETA_ZERO | KEXTRA_INCX_ONE | KEXTRA_INCY_ONE),
          kernType = CLBLAS_COMPUTING_KERNEL, vecLen = 4, vecLenA = 4,
vecLenB = 4, vecLenC = 4, solverPriv = '\000' <repeats 15 times>,
          buildOptions = "-g", '\000' <repeats 253 times>}
        i = 0x70e9da8
        pattern = 0x7fffe57f9ce0 <clblasSolvers>
        sid = 0
key = {device = 0xf9b050, context = 0xf9f7d0, nrDims = 2, subdims = {{
              x = 1, y = 32, bwidth = 32, itemX = 1, itemY = 32}, {x = 1,
              y = 4, bwidth = 4, itemX = 1, itemY = 4}, {x = 0, y = 0,
              bwidth = 0, itemX = 0, itemY = 0}}}
        need = {true, false, false}
        ktype = CLBLAS_COMPUTING_KERNEL
        kernel = <optimized out>
        loadData = false
        buffer = {0x0, 0x0, 0x0}
        sizeBuffer = {140737328952632, 17561568, 140737328952632}
        bopts = "-g", '\000' <repeats 253 times>
        ik = <optimized out>
        firstDimIdx = <optimized out>
#21 0x00007fffe44d79bb in doGemv (events=0x7fffffffaaa8, eventWaitList=0x0,
numEventsInWaitList=0, commandQueues=0xa70a30, numCommandQueues=1, incy=1,
    offy=0, y=0x75113e0, incx=1, offx=0, x=0x14fc830, lda=100, offA=0,
    A=<optimized out>, N=128, M=100, transA=clblasNoTrans,
    order=clblasColumnMajor, kargs=0x7fffffffa8d0)
    at ./src/library/blas/xgemv.c:94
        err = <optimized out>
        seq = {prev = 0x70e9da8, next = 0x70e9da8}
        sizev = 100
        retCode = <optimized out>
        err = <optimized out>
        seq = <optimized out>
        sizev = <optimized out>
        retCode = <optimized out>
#22 doGemv (kargs=0x7fffffffa8d0, order=clblasColumnMajor,
transA=clblasNoTrans, M=100, N=128, A=<optimized out>, offA=0, lda=100,
    x=0x14fc830, offx=0, incx=1, y=0x75113e0, offy=0, incy=1,
    numCommandQueues=1, commandQueues=0xa70a30, numEventsInWaitList=0,
eventWaitList=0x0, events=0x7fffffffaaa8) at ./src/library/blas/xgemv.c:26
        err = <optimized out>
        seq = <optimized out>
        sizev = <optimized out>
        retCode = <optimized out>
#23 0x00007fffe44d7aef in clblasSgemv (order=<optimized out>,
transA=<optimized out>, M=<optimized out>, N=128, alpha=<optimized out>,
    A=<optimized out>, offA=<optimized out>, lda=100, x=0x14fc830, offx=0,
    incx=1, beta=<optimized out>, y=0x75113e0, offy=0, incy=1,
    numCommandQueues=1, commandQueues=0xa70a30, numEventsInWaitList=0,
eventWaitList=0x0, events=0x7fffffffaaa8) at ./src/library/blas/xgemv.c:135 kargs = {pigFuncID = CLBLAS_GEMV, kernType = CLBLAS_COMPUTING_KERNEL, dtype = TYPE_FLOAT, order = clblasColumnMajor, side = clblasLeft, uplo = clblasUpper, transA = clblasNoTrans, transB = clblasNoTrans, diag = clblasUnit, M = 100, N = 128, K = 100, alpha = {argFloat = 1, argDouble = 5.2635442471208903e-315, argFloatComplex = {s = {1,
                0}, v2 = {1, 0}}, argDoubleComplex = {s = {
5.2635442471208903e-315, 0}, v2 = {5.2635442471208903e-315,
                0}}}, A = 0x70cafe0, lda = {matrix = 100, Vector = 100},
B = 0x14fc830, ldb = {matrix = 1, Vector = 1}, beta = {argFloat = 0,
            argDouble = 0, argFloatComplex = {s = {0, 0}, v2 = {0, 0}},
            argDoubleComplex = {s = {0, 0}, v2 = {0, 0}}}, C = 0x75113e0,
          ldc = {matrix = 1, Vector = 1}, D = 0x0, E = 0x0, addrBits = 0,
          offsetM = 0, offsetN = 0, offsetK = 0, scimage = {0x0, 0x0},
          offA = 0, offBX = 0, offCY = 0, offa = 0, offb = 0, offc = 0,
          offd = 0, offe = 0, startRow = 0, endRow = 0, tailStartM = 0,
          tailStartN = 0, KL = 0, KU = 0, redctnType = REDUCE_BY_SUM}
#24 0x00007ffff6886f7e in ?? () from /usr/lib/libgpuarray.so.2
No symbol table info available.
#25 0x00007ffff686d743 in GpuArray_rgemv () from /usr/lib/libgpuarray.so.2
No symbol table info available.
#26 0x00007ffff6c696d2 in ?? ()
from /usr/lib/python3/dist-packages/pygpu/blas.cpython-37m-x86_64-linux-gnu.so
No symbol table info available.

Reply via email to