jhuber6 wrote:

> > ```llvm
> > = load i32, ptr %.capture_expr., align 4
> > ```
> 
> Why do you think it reads beyond __context? %2 = getelementptr inbounds 
> %struct.anon, ptr %1, i32 0, i32 0 points to the first element in the 
> __context, if I'm not missing something. If it has the wrong value, looks 
> like it is not written correctly

I think I copied the wrong code somehow,
```llvm
; Function Attrs: convergent noinline norecurse nounwind optnone uwtable
define weak_odr protected void @__omp_offloading_10302_adc9471_main_l10(ptr 
noalias noundef %dyn_ptr, ptr noalias noundef %__context) #0 {
entry:
  %dyn_ptr.addr = alloca ptr, align 8
  %__context.addr = alloca ptr, align 8
  %Teams = alloca i32, align 4
  %Threads = alloca i32, align 4
  %.capture_expr. = alloca i32, align 4
  %.capture_expr.1 = alloca i32, align 4
  %Teams.casted = alloca i64, align 8
  %Threads.casted = alloca i64, align 8
  %0 = call i32 @__kmpc_global_thread_num(ptr @3)
  store ptr %dyn_ptr, ptr %dyn_ptr.addr, align 8
  store ptr %__context, ptr %__context.addr, align 8
  %1 = load ptr, ptr %__context.addr, align 8
  %2 = getelementptr inbounds %struct.anon, ptr %1, i32 0, i32 0
  %3 = load i32, ptr %2, align 4
  store i32 %3, ptr %Teams, align 4
  %4 = getelementptr inbounds %struct.anon, ptr %1, i32 0, i32 1
  %5 = load i32, ptr %4, align 4
  store i32 %5, ptr %Threads, align 4
  %6 = getelementptr inbounds %struct.anon, ptr %1, i32 0, i32 2
  %7 = load i32, ptr %6, align 4
  store i32 %7, ptr %.capture_expr., align 4
  %8 = getelementptr inbounds %struct.anon, ptr %1, i32 0, i32 3
  %9 = load i32, ptr %8, align 4
  store i32 %9, ptr %.capture_expr.1, align 4
  %10 = load i32, ptr %.capture_expr., align 4
  %11 = load i32, ptr %.capture_expr.1, align 4
  call void @__kmpc_push_num_teams(ptr @3, i32 %0, i32 %10, i32 %11)
  %12 = load i32, ptr %Teams, align 4
  store i32 %12, ptr %Teams.casted, align 4
  %13 = load i64, ptr %Teams.casted, align 8
  %14 = load i32, ptr %Threads, align 4
  store i32 %14, ptr %Threads.casted, align 4
  %15 = load i64, ptr %Threads.casted, align 8
  call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @3, i32 2, ptr 
@__omp_offloading_10302_adc9471_main_l10.omp_outlined, i64 %13, i64 %15)
  ret void
}
```
This is what I get from the corresponding C code.
```c
#include <stdio.h>
#include <assert.h>
#include <stdlib.h>

int main() {
  int Threads = 6;
  int Teams = 10;

  long unsigned s = 0;
#pragma omp target teams distribute parallel for num_teams(Teams)              \
    thread_limit(Threads)
  for (int i = 0; i < Threads * Teams; ++i) {
    assert(Teams == 10);
  }

  return 0;
}
```
When I compile run it, I get the following. So it warns on some nonsense team 
value (It will be even more corrupt with other cases, but this was the simplest 
I could get).
```console
> clang malloc.c -fopenmp -fopenmp-targets=x86_64-pc-linux-gnu                  
>                        
> ./a.out 
OMP: Warning #96: Cannot form a team with 48 threads, using 21 instead.
OMP: Hint Consider unsetting KMP_DEVICE_THREAD_LIMIT (KMP_ALL_THREADS), 
KMP_TEAMS_THREAD_LIMIT, and OMP_THREAD_LIMIT (if any are set).
```
The LLVM-IR is confusing to me because it's doing a GEP up to 3, which is 
suggesting that the Teams / Threads values are appended but the number of 
arguments isn't expected to be that big.

https://github.com/llvm/llvm-project/pull/91264
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to