jhuber6 wrote: > > ```llvm > > = load i32, ptr %.capture_expr., align 4 > > ``` > > Why do you think it reads beyond __context? %2 = getelementptr inbounds > %struct.anon, ptr %1, i32 0, i32 0 points to the first element in the > __context, if I'm not missing something. If it has the wrong value, looks > like it is not written correctly
I think I copied the wrong code somehow, ```llvm ; Function Attrs: convergent noinline norecurse nounwind optnone uwtable define weak_odr protected void @__omp_offloading_10302_adc9471_main_l10(ptr noalias noundef %dyn_ptr, ptr noalias noundef %__context) #0 { entry: %dyn_ptr.addr = alloca ptr, align 8 %__context.addr = alloca ptr, align 8 %Teams = alloca i32, align 4 %Threads = alloca i32, align 4 %.capture_expr. = alloca i32, align 4 %.capture_expr.1 = alloca i32, align 4 %Teams.casted = alloca i64, align 8 %Threads.casted = alloca i64, align 8 %0 = call i32 @__kmpc_global_thread_num(ptr @3) store ptr %dyn_ptr, ptr %dyn_ptr.addr, align 8 store ptr %__context, ptr %__context.addr, align 8 %1 = load ptr, ptr %__context.addr, align 8 %2 = getelementptr inbounds %struct.anon, ptr %1, i32 0, i32 0 %3 = load i32, ptr %2, align 4 store i32 %3, ptr %Teams, align 4 %4 = getelementptr inbounds %struct.anon, ptr %1, i32 0, i32 1 %5 = load i32, ptr %4, align 4 store i32 %5, ptr %Threads, align 4 %6 = getelementptr inbounds %struct.anon, ptr %1, i32 0, i32 2 %7 = load i32, ptr %6, align 4 store i32 %7, ptr %.capture_expr., align 4 %8 = getelementptr inbounds %struct.anon, ptr %1, i32 0, i32 3 %9 = load i32, ptr %8, align 4 store i32 %9, ptr %.capture_expr.1, align 4 %10 = load i32, ptr %.capture_expr., align 4 %11 = load i32, ptr %.capture_expr.1, align 4 call void @__kmpc_push_num_teams(ptr @3, i32 %0, i32 %10, i32 %11) %12 = load i32, ptr %Teams, align 4 store i32 %12, ptr %Teams.casted, align 4 %13 = load i64, ptr %Teams.casted, align 8 %14 = load i32, ptr %Threads, align 4 store i32 %14, ptr %Threads.casted, align 4 %15 = load i64, ptr %Threads.casted, align 8 call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @3, i32 2, ptr @__omp_offloading_10302_adc9471_main_l10.omp_outlined, i64 %13, i64 %15) ret void } ``` This is what I get from the corresponding C code. ```c #include <stdio.h> #include <assert.h> #include <stdlib.h> int main() { int Threads = 6; int Teams = 10; long unsigned s = 0; #pragma omp target teams distribute parallel for num_teams(Teams) \ thread_limit(Threads) for (int i = 0; i < Threads * Teams; ++i) { assert(Teams == 10); } return 0; } ``` When I compile run it, I get the following. So it warns on some nonsense team value (It will be even more corrupt with other cases, but this was the simplest I could get). ```console > clang malloc.c -fopenmp -fopenmp-targets=x86_64-pc-linux-gnu > > ./a.out OMP: Warning #96: Cannot form a team with 48 threads, using 21 instead. OMP: Hint Consider unsetting KMP_DEVICE_THREAD_LIMIT (KMP_ALL_THREADS), KMP_TEAMS_THREAD_LIMIT, and OMP_THREAD_LIMIT (if any are set). ``` The LLVM-IR is confusing to me because it's doing a GEP up to 3, which is suggesting that the Teams / Threads values are appended but the number of arguments isn't expected to be that big. https://github.com/llvm/llvm-project/pull/91264 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits