Author: AlexeySotkin Date: Thu Feb 22 03:54:14 2018 New Revision: 325771 URL: http://llvm.org/viewvc/llvm-project?rev=325771&view=rev Log: [OpenCL] Add '-cl-uniform-work-group-size' compile option
Summary: OpenCL 2.0 specification defines '-cl-uniform-work-group-size' option, which requires that the global work-size be a multiple of the work-group size specified to clEnqueueNDRangeKernel and allows optimizations that are made possible by this restriction. The patch introduces the support of this option. To keep information about whether an OpenCL kernel has uniform work group size or not, clang generates 'uniform-work-group-size' function attribute for every kernel: - "uniform-work-group-size"="true" for OpenCL 1.2 and lower, - "uniform-work-group-size"="true" for OpenCL 2.0 and higher if '-cl-uniform-work-group-size' option was specified, - "uniform-work-group-size"="false" for OpenCL 2.0 and higher if no '-cl-uniform-work-group-size' options was specified. If the function is not an OpenCL kernel, 'uniform-work-group-size' attribute isn't generated. Patch by: krisb Reviewers: yaxunl, Anastasia, b-sumner Reviewed By: yaxunl, Anastasia Subscribers: nhaehnle, yaxunl, Anastasia, cfe-commits Differential Revision: https://reviews.llvm.org/D43570 Added: cfe/trunk/test/CodeGenOpenCL/cl-uniform-wg-size.cl Modified: cfe/trunk/include/clang/Driver/Options.td cfe/trunk/include/clang/Frontend/CodeGenOptions.def cfe/trunk/lib/CodeGen/CGCall.cpp cfe/trunk/lib/Driver/ToolChains/Clang.cpp cfe/trunk/lib/Frontend/CompilerInvocation.cpp cfe/trunk/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl cfe/trunk/test/CodeGenOpenCL/convergent.cl cfe/trunk/test/Driver/opencl.cl Modified: cfe/trunk/include/clang/Driver/Options.td URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Options.td?rev=325771&r1=325770&r2=325771&view=diff ============================================================================== --- cfe/trunk/include/clang/Driver/Options.td (original) +++ cfe/trunk/include/clang/Driver/Options.td Thu Feb 22 03:54:14 2018 @@ -518,6 +518,8 @@ def cl_denorms_are_zero : Flag<["-"], "c HelpText<"OpenCL only. Allow denormals to be flushed to zero.">; def cl_fp32_correctly_rounded_divide_sqrt : Flag<["-"], "cl-fp32-correctly-rounded-divide-sqrt">, Group<opencl_Group>, Flags<[CC1Option]>, HelpText<"OpenCL only. Specify that single precision floating-point divide and sqrt used in the program source are correctly rounded.">; +def cl_uniform_work_group_size : Flag<["-"], "cl-uniform-work-group-size">, Group<opencl_Group>, Flags<[CC1Option]>, + HelpText<"OpenCL only. Defines that the global work-size be a multiple of the work-group size specified to clEnqueueNDRangeKernel">; def client__name : JoinedOrSeparate<["-"], "client_name">; def combine : Flag<["-", "--"], "combine">, Flags<[DriverOption, Unsupported]>; def compatibility__version : JoinedOrSeparate<["-"], "compatibility_version">; Modified: cfe/trunk/include/clang/Frontend/CodeGenOptions.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Frontend/CodeGenOptions.def?rev=325771&r1=325770&r2=325771&view=diff ============================================================================== --- cfe/trunk/include/clang/Frontend/CodeGenOptions.def (original) +++ cfe/trunk/include/clang/Frontend/CodeGenOptions.def Thu Feb 22 03:54:14 2018 @@ -128,6 +128,7 @@ CODEGENOPT(NoTrappingMath , 1, 0) /// CODEGENOPT(NoNaNsFPMath , 1, 0) ///< Assume FP arguments, results not NaN. CODEGENOPT(FlushDenorm , 1, 0) ///< Allow FP denorm numbers to be flushed to zero CODEGENOPT(CorrectlyRoundedDivSqrt, 1, 0) ///< -cl-fp32-correctly-rounded-divide-sqrt +CODEGENOPT(UniformWGSize , 1, 0) ///< -cl-uniform-work-group-size CODEGENOPT(NoZeroInitializedInBSS , 1, 0) ///< -fno-zero-initialized-in-bss. /// \brief Method of Objective-C dispatch to use. ENUM_CODEGENOPT(ObjCDispatchMethod, ObjCDispatchMethodKind, 2, Legacy) Modified: cfe/trunk/lib/CodeGen/CGCall.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCall.cpp?rev=325771&r1=325770&r2=325771&view=diff ============================================================================== --- cfe/trunk/lib/CodeGen/CGCall.cpp (original) +++ cfe/trunk/lib/CodeGen/CGCall.cpp Thu Feb 22 03:54:14 2018 @@ -1870,6 +1870,21 @@ void CodeGenModule::ConstructAttributeLi } } + if (TargetDecl && TargetDecl->hasAttr<OpenCLKernelAttr>()) { + if (getLangOpts().OpenCLVersion <= 120) { + // OpenCL v1.2 Work groups are always uniform + FuncAttrs.addAttribute("uniform-work-group-size", "true"); + } else { + // OpenCL v2.0 Work groups may be whether uniform or not. + // '-cl-uniform-work-group-size' compile option gets a hint + // to the compiler that the global work-size be a multiple of + // the work-group size specified to clEnqueueNDRangeKernel + // (i.e. work groups are uniform). + FuncAttrs.addAttribute("uniform-work-group-size", + llvm::toStringRef(CodeGenOpts.UniformWGSize)); + } + } + if (!AttrOnCallSite) { bool DisableTailCalls = CodeGenOpts.DisableTailCalls || Modified: cfe/trunk/lib/Driver/ToolChains/Clang.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChains/Clang.cpp?rev=325771&r1=325770&r2=325771&view=diff ============================================================================== --- cfe/trunk/lib/Driver/ToolChains/Clang.cpp (original) +++ cfe/trunk/lib/Driver/ToolChains/Clang.cpp Thu Feb 22 03:54:14 2018 @@ -2379,6 +2379,7 @@ static void RenderOpenCLOptions(const Ar options::OPT_cl_no_signed_zeros, options::OPT_cl_denorms_are_zero, options::OPT_cl_fp32_correctly_rounded_divide_sqrt, + options::OPT_cl_uniform_work_group_size }; if (Arg *A = Args.getLastArg(options::OPT_cl_std_EQ)) { Modified: cfe/trunk/lib/Frontend/CompilerInvocation.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Frontend/CompilerInvocation.cpp?rev=325771&r1=325770&r2=325771&view=diff ============================================================================== --- cfe/trunk/lib/Frontend/CompilerInvocation.cpp (original) +++ cfe/trunk/lib/Frontend/CompilerInvocation.cpp Thu Feb 22 03:54:14 2018 @@ -659,6 +659,8 @@ static bool ParseCodeGenArgs(CodeGenOpti Opts.FlushDenorm = Args.hasArg(OPT_cl_denorms_are_zero); Opts.CorrectlyRoundedDivSqrt = Args.hasArg(OPT_cl_fp32_correctly_rounded_divide_sqrt); + Opts.UniformWGSize = + Args.hasArg(OPT_cl_uniform_work_group_size); Opts.Reciprocals = Args.getAllArgValues(OPT_mrecip_EQ); Opts.ReciprocalMath = Args.hasArg(OPT_freciprocal_math); Opts.NoTrappingMath = Args.hasArg(OPT_fno_trapping_math); Modified: cfe/trunk/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl?rev=325771&r1=325770&r2=325771&view=diff ============================================================================== --- cfe/trunk/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl (original) +++ cfe/trunk/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl Thu Feb 22 03:54:14 2018 @@ -425,7 +425,7 @@ struct_char_arr32 func_ret_struct_char_a return s; } -// CHECK: define i32 @func_transparent_union_ret() local_unnamed_addr #0 { +// CHECK: define i32 @func_transparent_union_ret() local_unnamed_addr #1 { // CHECK: ret i32 0 transparent_u func_transparent_union_ret() { Added: cfe/trunk/test/CodeGenOpenCL/cl-uniform-wg-size.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/cl-uniform-wg-size.cl?rev=325771&view=auto ============================================================================== --- cfe/trunk/test/CodeGenOpenCL/cl-uniform-wg-size.cl (added) +++ cfe/trunk/test/CodeGenOpenCL/cl-uniform-wg-size.cl Thu Feb 22 03:54:14 2018 @@ -0,0 +1,16 @@ +// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL1.2 -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM +// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-NONUNIFORM +// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -cl-uniform-work-group-size -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM + +kernel void ker() {}; +// CHECK: define{{.*}}@ker() #0 + +void foo() {}; +// CHECK: define{{.*}}@foo() #1 + +// CHECK-LABEL: attributes #0 +// CHECK-UNIFORM: "uniform-work-group-size"="true" +// CHECK-NONUNIFORM: "uniform-work-group-size"="false" + +// CHECK-LABEL: attributes #1 +// CHECK-NOT: uniform-work-group-size Modified: cfe/trunk/test/CodeGenOpenCL/convergent.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/convergent.cl?rev=325771&r1=325770&r2=325771&view=diff ============================================================================== --- cfe/trunk/test/CodeGenOpenCL/convergent.cl (original) +++ cfe/trunk/test/CodeGenOpenCL/convergent.cl Thu Feb 22 03:54:14 2018 @@ -127,7 +127,7 @@ void test_not_unroll() { // CHECK: declare spir_func void @nodupfun(){{[^#]*}} #[[attr3:[0-9]+]] // CHECK-LABEL: @assume_convergent_asm -// CHECK: tail call void asm sideeffect "s_barrier", ""() #4 +// CHECK: tail call void asm sideeffect "s_barrier", ""() #5 kernel void assume_convergent_asm() { __asm__ volatile("s_barrier"); @@ -138,4 +138,5 @@ kernel void assume_convergent_asm() // CHECK: attributes #2 = { {{[^}]*}}convergent{{[^}]*}} } // CHECK: attributes #3 = { {{[^}]*}}convergent noduplicate{{[^}]*}} } // CHECK: attributes #4 = { {{[^}]*}}convergent{{[^}]*}} } -// CHECK: attributes #5 = { {{[^}]*}}convergent noduplicate{{[^}]*}} } +// CHECK: attributes #5 = { {{[^}]*}}convergent{{[^}]*}} } +// CHECK: attributes #6 = { {{[^}]*}}convergent noduplicate{{[^}]*}} } Modified: cfe/trunk/test/Driver/opencl.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/opencl.cl?rev=325771&r1=325770&r2=325771&view=diff ============================================================================== --- cfe/trunk/test/Driver/opencl.cl (original) +++ cfe/trunk/test/Driver/opencl.cl Thu Feb 22 03:54:14 2018 @@ -13,6 +13,7 @@ // RUN: %clang -S -### -cl-no-signed-zeros %s 2>&1 | FileCheck --check-prefix=CHECK-NO-SIGNED-ZEROS %s // RUN: %clang -S -### -cl-denorms-are-zero %s 2>&1 | FileCheck --check-prefix=CHECK-DENORMS-ARE-ZERO %s // RUN: %clang -S -### -cl-fp32-correctly-rounded-divide-sqrt %s 2>&1 | FileCheck --check-prefix=CHECK-ROUND-DIV %s +// RUN: %clang -S -### -cl-uniform-work-group-size %s 2>&1 | FileCheck --check-prefix=CHECK-UNIFORM-WG %s // RUN: not %clang -cl-std=c99 -DOPENCL %s 2>&1 | FileCheck --check-prefix=CHECK-C99 %s // RUN: not %clang -cl-std=invalid -DOPENCL %s 2>&1 | FileCheck --check-prefix=CHECK-INVALID %s @@ -31,6 +32,7 @@ // CHECK-NO-SIGNED-ZEROS: "-cc1" {{.*}} "-cl-no-signed-zeros" // CHECK-DENORMS-ARE-ZERO: "-cc1" {{.*}} "-cl-denorms-are-zero" // CHECK-ROUND-DIV: "-cc1" {{.*}} "-cl-fp32-correctly-rounded-divide-sqrt" +// CHECK-UNIFORM-WG: "-cc1" {{.*}} "-cl-uniform-work-group-size" // CHECK-C99: error: invalid value 'c99' in '-cl-std=c99' // CHECK-INVALID: error: invalid value 'invalid' in '-cl-std=invalid' _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits