Control: reassign -1 libllvm10,libllvm11
Control: found -1 1:10.0.1-8
Control: found -1 1:11.0.0-5
Control: retitle -1 libgpuarray autopkgtest using pocl on armhf triggers 
segfault in LLVM
Control: affects -1 + src:pocl

This bug is reproducible with pocl built against llvm-10 (in sid)
and pocl built against llvm-11 (in experimental), while no error
occurrs with pocl built against llvm-9 (in testing).

I managed to create a reproducer in C with an embedded OpenCL kernel
that hopefully helps debugging the issue. (Removing python,
libgpuarray, libclblas, ... from the path triggering the issue.)

Installing the following packages should be sufficient:
  pocl-opencl-icd libpocl2-dbgsym libllvm10-dbgsym ocl-icd-opencl-dev 
ocl-icd-libopencl1-dbgsym

The 975931.sh script builds the ./975931 binary and runs it with an
empty pocl kernel cache, resulting in a segmentation fault with this backtrace:

#0  getEmissionKind () at 
/build/llvm-toolchain-10-cW4tHW/llvm-toolchain-10-10.0.1/llvm/include/llvm/IR/DebugInfoMetadata.h:1244
#1  initialize () at 
/build/llvm-toolchain-10-cW4tHW/llvm-toolchain-10-10.0.1/llvm/lib/CodeGen/LexicalScopes.cpp:53
#2  0xb14102f0 in computeIntervals () at 
/build/llvm-toolchain-10-cW4tHW/llvm-toolchain-10-10.0.1/llvm/lib/CodeGen/LiveDebugVariables.cpp:979
#3  runOnMachineFunction () at 
/build/llvm-toolchain-10-cW4tHW/llvm-toolchain-10-10.0.1/llvm/lib/CodeGen/LiveDebugVariables.cpp:996
#4  runOnMachineFunction () at 
/build/llvm-toolchain-10-cW4tHW/llvm-toolchain-10-10.0.1/llvm/lib/CodeGen/LiveDebugVariables.cpp:1023
#5  0xb14856c8 in runOnFunction () at 
/build/llvm-toolchain-10-cW4tHW/llvm-toolchain-10-10.0.1/llvm/lib/CodeGen/MachineFunctionPass.cpp:73
#6  0xb12ff494 in runOnFunction () at 
/build/llvm-toolchain-10-cW4tHW/llvm-toolchain-10-10.0.1/llvm/lib/IR/LegacyPassManager.cpp:1481
#7  0xb12ff750 in runOnModule () at 
/build/llvm-toolchain-10-cW4tHW/llvm-toolchain-10-10.0.1/llvm/lib/IR/LegacyPassManager.cpp:1517
#8  0xb12ffba8 in runOnModule () at 
/build/llvm-toolchain-10-cW4tHW/llvm-toolchain-10-10.0.1/llvm/lib/IR/LegacyPassManager.cpp:1582
#9  run () at 
/build/llvm-toolchain-10-cW4tHW/llvm-toolchain-10-10.0.1/llvm/lib/IR/LegacyPassManager.cpp:1694
#10 0xb6e64c82 in pocl_llvm_codegen (Device=Device@entry=0xdb0010, 
Modp=0x1361838, Output=Output@entry=0xbefde86c, 
OutputSize=OutputSize@entry=0xbefde880) at ./lib/CL/pocl_llvm_wg.cc:624
#11 0xb6e291de in llvm_codegen (output=output@entry=0xdeb898 
"pocl-kernel-cache-2020-12-10T00:06:19+00:00-hPVZwM/AP/PNFEAPBKBFEAKGGNMALGHGJEEKGMJFBFBMDHA/Sdot_kernel/0-0-0/Sdot_kernel.so",
 device_i=device_i@entry=0, kernel=kernel@entry=0xbefe0240, 
    device=0xdb0010, command=command@entry=0xbefe0278, 
specialize=specialize@entry=0) at ./lib/CL/devices/common.c:158
#12 0xb6e2ae44 in pocl_check_kernel_disk_cache 
(command=command@entry=0xbefe0278, specialized=specialized@entry=0) at 
./lib/CL/devices/common.c:958
#13 0xb6e2b262 in pocl_check_kernel_dlhandle_cache (command=0xbefe0278, 
initial_refcount=0, specialize=0) at ./lib/CL/devices/common.c:1081
#14 0xb6e033d4 in program_compile_dynamic_wg_binaries 
(program=program@entry=0xd8ab88) at ./lib/CL/pocl_build.c:179
#15 0xb6e13f20 in get_binary_sizes (sizes=0xbefe0384, program=0xd8ab88) at 
./lib/CL/clGetProgramInfo.c:36
#16 POclGetProgramInfo (program=0xd8ab88, param_name=4453, 
param_value_size=128, param_value=0xbefe0384, param_value_size_ret=0xbefe0380) 
at ./lib/CL/clGetProgramInfo.c:115
#17 0x00473070 in main () at 975931.c:238

Then it runs the binary again, this time with the pocl kernel cache contents
from previous failure, resulting in

inlinable function call in a function with debug info must have a !dbg location
  %11 = call i32 @_Z12get_local_idj(i32 0)
inlinable function call in a function with debug info must have a !dbg location
  %19 = call i32 @_Z12get_local_idj(i32 1)
inlinable function call in a function with debug info must have a !dbg location
  %27 = call i32 @_Z12get_local_idj(i32 2)
binary size: 52077
OK

It may well be that pocl calls llvm with some invalid input
(the fact that the second run does not segfault seems to
indicate something like this), but still a compiler (library)
should not segfault in this case.
I hope you can shed some light on whether llvm or pocl is to
blame here.


Andreas
#define CL_TARGET_OPENCL_VERSION 220

#include <CL/cl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

const char source[] =
"#ifdef DOUBLE_PRECISION\n"
"    #ifdef cl_khr_fp64\n"
"    #pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
"    #else\n"
"    #pragma OPENCL EXTENSION cl_amd_fp64 : enable\n"
"    #endif\n"
"#endif\n"
"\n"
"__kernel void Sdot_kernel( __global float *_X, __global float *_Y, __global float *scratchBuff,\n"
"                                        uint N, uint offx, int incx, uint offy, int incy, int doConj )\n"
"{\n"
"    __global float *X = _X + offx;\n"
"    __global float *Y = _Y + offy;\n"
"    float dotP = (float) 0.0;\n"
"\n"
"    if ( incx < 0 ) {\n"
"        X = X + (N - 1) * abs(incx);\n"
"    }\n"
"    if ( incy < 0 ) {\n"
"        Y = Y + (N - 1) * abs(incy);\n"
"    }\n"
"\n"
"    int gOffset;\n"
"    for( gOffset=(get_global_id(0) * 4); (gOffset + 4 - 1)<N; gOffset+=( get_global_size(0) * 4 ) )\n"
"    {\n"
"        float4 vReg1, vReg2, res;\n"
"\n"
"        #ifdef INCX_NONUNITY\n"
"             vReg1 = (float4)(  (X + (gOffset*incx))[0 + ( incx * 0)],  (X + (gOffset*incx))[0 + ( incx * 1)],  (X + (gOffset*incx))[0 + ( incx * 2)],  (X + (gOffset*incx))[0 + ( incx * 3)]);\n"
"        #else\n"
"            vReg1 = vload4(  0, (__global float *) (X + gOffset) );\n"
"        #endif\n"
"\n"
"        #ifdef INCY_NONUNITY\n"
"             vReg2 = (float4)(  (Y + (gOffset*incy))[0 + ( incy * 0)],  (Y + (gOffset*incy))[0 + ( incy * 1)],  (Y + (gOffset*incy))[0 + ( incy * 2)],  (Y + (gOffset*incy))[0 + ( incy * 3)]);\n"
"        #else\n"
"            vReg2 = vload4(  0, (__global float *) (Y + gOffset) );\n"
"        #endif\n"
"\n"
"        ;\n"
"         res =  vReg1 *  vReg2 ;\n"
"        dotP +=  res .S0 +  res .S1 +  res .S2 +  res .S3;\n"
";          // Add-up elements in the vector to give a scalar\n"
"    }\n"
"\n"
"    // Loop for the last thread to handle the tail part of the vector\n"
"    // Using the same gOffset used above\n"
"    for( ; gOffset<N; gOffset++ )\n"
"    {\n"
"        float sReg1, sReg2, res;\n"
"        sReg1 = X[gOffset * incx];\n"
"        sReg2 = Y[gOffset * incy];\n"
"\n"
"        ;\n"
"             res =  sReg1 *  sReg2 ;\n"
"             dotP =  dotP +  res ;\n"
"        }\n"
"\n"
"    // Note: this has to be called outside any if-conditions- because REDUCTION uses barrier\n"
"    // dotP of work-item 0 will have the final reduced item of the work-group\n"
"    __local float p1753 [ 64 ];\n"
"	uint QKiD0 = get_local_id(0);\n"
"	 p1753 [ QKiD0 ] =  dotP ;\n"
"	barrier(CLK_LOCAL_MEM_FENCE);\n"
"\n"
"	if( QKiD0 < 32 ) {\n"
"		 p1753 [ QKiD0 ] = p1753 [ QKiD0 ] + p1753 [ QKiD0 + 32 ];\n"
"	}\n"
"	barrier(CLK_LOCAL_MEM_FENCE);\n"
"\n"
"	if( QKiD0 < 16 ) {\n"
"		 p1753 [ QKiD0 ] = p1753 [ QKiD0 ] + p1753 [ QKiD0 + 16 ];\n"
"	}\n"
"	barrier(CLK_LOCAL_MEM_FENCE);\n"
"\n"
"	if( QKiD0 < 8 ) {\n"
"		 p1753 [ QKiD0 ] = p1753 [ QKiD0 ] + p1753 [ QKiD0 + 8 ];\n"
"	}\n"
"	barrier(CLK_LOCAL_MEM_FENCE);\n"
"\n"
"	if( QKiD0 < 4 ) {\n"
"		 p1753 [ QKiD0 ] = p1753 [ QKiD0 ] + p1753 [ QKiD0 + 4 ];\n"
"	}\n"
"	barrier(CLK_LOCAL_MEM_FENCE);\n"
"\n"
"	if( QKiD0 < 2 ) {\n"
"		 p1753 [ QKiD0 ] = p1753 [ QKiD0 ] + p1753 [ QKiD0 + 2 ];\n"
"	}\n"
"	barrier(CLK_LOCAL_MEM_FENCE);\n"
"\n"
"	if( QKiD0 == 0 ) {\n"
"	 dotP  = p1753 [0] + p1753 [1];\n"
"	}\n"
"\n"
"    if( (get_local_id(0)) == 0 ) {\n"
"        scratchBuff[ get_group_id(0) ] = dotP;\n"
"    }\n"
"}\n"
"\n"
;

#define MAX_PLATFORMS 32
#define MAX_DEVICES   32
#define MAX_BINARIES  32

#define OPENCL_ERROR_CASE(ERR) \
  case ERR:                                                             \
  { fprintf (stderr, "" #ERR " in %s on line %i\n", func_name, line);   \
    return 1; }

int
check_cl_error (cl_int cl_err, int line, const char* func_name) {

  switch (cl_err)
    {
    case CL_SUCCESS: return 0;

      OPENCL_ERROR_CASE (CL_DEVICE_NOT_FOUND)
        OPENCL_ERROR_CASE (CL_DEVICE_NOT_AVAILABLE)
        OPENCL_ERROR_CASE (CL_COMPILER_NOT_AVAILABLE)
        OPENCL_ERROR_CASE (CL_MEM_OBJECT_ALLOCATION_FAILURE)
        OPENCL_ERROR_CASE (CL_OUT_OF_RESOURCES)
        OPENCL_ERROR_CASE (CL_OUT_OF_HOST_MEMORY)
        OPENCL_ERROR_CASE (CL_PROFILING_INFO_NOT_AVAILABLE)
        OPENCL_ERROR_CASE (CL_MEM_COPY_OVERLAP)
        OPENCL_ERROR_CASE (CL_IMAGE_FORMAT_MISMATCH)
        OPENCL_ERROR_CASE (CL_IMAGE_FORMAT_NOT_SUPPORTED)
        OPENCL_ERROR_CASE (CL_BUILD_PROGRAM_FAILURE)
        OPENCL_ERROR_CASE (CL_MAP_FAILURE)
        OPENCL_ERROR_CASE (CL_MISALIGNED_SUB_BUFFER_OFFSET)
        OPENCL_ERROR_CASE (CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST)
        OPENCL_ERROR_CASE (CL_COMPILE_PROGRAM_FAILURE)
        OPENCL_ERROR_CASE (CL_LINKER_NOT_AVAILABLE)
        OPENCL_ERROR_CASE (CL_LINK_PROGRAM_FAILURE)
        OPENCL_ERROR_CASE (CL_DEVICE_PARTITION_FAILED)
        OPENCL_ERROR_CASE (CL_KERNEL_ARG_INFO_NOT_AVAILABLE)
        OPENCL_ERROR_CASE (CL_INVALID_VALUE)
        OPENCL_ERROR_CASE (CL_INVALID_DEVICE_TYPE)
        OPENCL_ERROR_CASE (CL_INVALID_PLATFORM)
        OPENCL_ERROR_CASE (CL_INVALID_DEVICE)
        OPENCL_ERROR_CASE (CL_INVALID_CONTEXT)
        OPENCL_ERROR_CASE (CL_INVALID_QUEUE_PROPERTIES)
        OPENCL_ERROR_CASE (CL_INVALID_COMMAND_QUEUE)
        OPENCL_ERROR_CASE (CL_INVALID_HOST_PTR)
        OPENCL_ERROR_CASE (CL_INVALID_MEM_OBJECT)
        OPENCL_ERROR_CASE (CL_INVALID_IMAGE_FORMAT_DESCRIPTOR)
        OPENCL_ERROR_CASE (CL_INVALID_IMAGE_SIZE)
        OPENCL_ERROR_CASE (CL_INVALID_SAMPLER)
        OPENCL_ERROR_CASE (CL_INVALID_BINARY)
        OPENCL_ERROR_CASE (CL_INVALID_BUILD_OPTIONS)
        OPENCL_ERROR_CASE (CL_INVALID_PROGRAM)
        OPENCL_ERROR_CASE (CL_INVALID_PROGRAM_EXECUTABLE)
        OPENCL_ERROR_CASE (CL_INVALID_KERNEL_NAME)
        OPENCL_ERROR_CASE (CL_INVALID_KERNEL_DEFINITION)
        OPENCL_ERROR_CASE (CL_INVALID_KERNEL)
        OPENCL_ERROR_CASE (CL_INVALID_ARG_INDEX)
        OPENCL_ERROR_CASE (CL_INVALID_ARG_VALUE)
        OPENCL_ERROR_CASE (CL_INVALID_ARG_SIZE)
        OPENCL_ERROR_CASE (CL_INVALID_KERNEL_ARGS)
        OPENCL_ERROR_CASE (CL_INVALID_WORK_DIMENSION)
        OPENCL_ERROR_CASE (CL_INVALID_WORK_GROUP_SIZE)
        OPENCL_ERROR_CASE (CL_INVALID_WORK_ITEM_SIZE)
        OPENCL_ERROR_CASE (CL_INVALID_GLOBAL_OFFSET)
        OPENCL_ERROR_CASE (CL_INVALID_EVENT_WAIT_LIST)
        OPENCL_ERROR_CASE (CL_INVALID_EVENT)
        OPENCL_ERROR_CASE (CL_INVALID_OPERATION)
        OPENCL_ERROR_CASE (CL_INVALID_GL_OBJECT)
        OPENCL_ERROR_CASE (CL_INVALID_BUFFER_SIZE)
        OPENCL_ERROR_CASE (CL_INVALID_MIP_LEVEL)
        OPENCL_ERROR_CASE (CL_INVALID_GLOBAL_WORK_SIZE)
        OPENCL_ERROR_CASE (CL_INVALID_PROPERTY)
        OPENCL_ERROR_CASE (CL_INVALID_IMAGE_DESCRIPTOR)
        OPENCL_ERROR_CASE (CL_INVALID_COMPILER_OPTIONS)
        OPENCL_ERROR_CASE (CL_INVALID_LINKER_OPTIONS)
        OPENCL_ERROR_CASE (CL_INVALID_DEVICE_PARTITION_COUNT)

    default:
      printf ("Unknown OpenCL error %i in %s on line %i\n", cl_err, func_name,
              line);
      return 1;
    }
}

#define _POCLU_CHECK_CL_ERROR_INNER(cond, func, line)                         \
  do                                                                          \
    {                                                                         \
      if (check_cl_error (cond, line, func))                                  \
        return (EXIT_FAILURE);                                                \
    }                                                                         \
  while (0)

#define CHECK_CL_ERROR(cond) _POCLU_CHECK_CL_ERROR_INNER(cond, __PRETTY_FUNCTION__, __LINE__)

#define CHECK_OPENCL_ERROR_IN(message) _POCLU_CHECK_CL_ERROR_INNER(err, message, __LINE__)

#define TEST_ASSERT(EXP)                                                \
do {                                                                    \
  if (!(EXP)) {                                                         \
    fprintf(stderr, "Assertion: \n" #EXP "\nfailed on %s:%i\n",         \
        __FILE__, __LINE__);                                            \
    return EXIT_FAILURE;                                                \
  }                                                                     \
} while (0)

int main(){
  cl_int err;
  cl_platform_id platforms[MAX_PLATFORMS];
  cl_uint nplatforms;
  cl_device_id devices[MAX_DEVICES];
  cl_uint ndevices;
  cl_program program = NULL;
  size_t binsizes[MAX_BINARIES];
  size_t nbinaries;

  CHECK_CL_ERROR(clGetPlatformIDs(MAX_PLATFORMS, platforms, &nplatforms));
  TEST_ASSERT(nplatforms > 0);

  CHECK_CL_ERROR(clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_ALL, MAX_DEVICES, devices, &ndevices));
  TEST_ASSERT(ndevices > 0);

  cl_context context = clCreateContext(NULL, 1, devices, NULL, NULL, &err);
  CHECK_OPENCL_ERROR_IN("clCreateContext");

  const char * src[] = {source};
  program = clCreateProgramWithSource(context, 1, src, NULL, &err);
  CHECK_OPENCL_ERROR_IN("clCreateProgramWithSource");

  CHECK_CL_ERROR(clBuildProgram(program, 1, devices, "-g -DINCX_NONUNITY -DINCY_NONUNITY", NULL, NULL));

  CHECK_CL_ERROR(clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(binsizes), binsizes, &nbinaries));
  printf("binary size: %zd\n", binsizes[0]);

  CHECK_CL_ERROR(clReleaseProgram(program));

  CHECK_CL_ERROR (clReleaseContext (context));

  printf ("OK\n");

  return EXIT_SUCCESS;
}

Attachment: 975931.sh
Description: Bourne shell script

Reply via email to