[PATCH] D64537: [WebAssembly] Implement thread-local storage for non-PIC cases

2019-07-11 Thread Guanzhong Chen via Phabricator via cfe-commits
quantum updated this revision to Diff 209304.
quantum added a comment.

Treat global.get of an immutable global as a memory access for now.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D64537/new/

https://reviews.llvm.org/D64537

Files:
  clang/include/clang/Basic/BuiltinsWebAssembly.def
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/builtins-wasm.c
  lld/test/wasm/data-layout.ll
  lld/test/wasm/data-segment-merging.ll
  lld/test/wasm/data-segments.ll
  lld/test/wasm/gc-imports.ll
  lld/test/wasm/gc-sections.ll
  lld/test/wasm/load-undefined.test
  lld/test/wasm/tls.ll
  lld/wasm/Driver.cpp
  lld/wasm/Symbols.cpp
  lld/wasm/Symbols.h
  lld/wasm/Writer.cpp
  llvm/include/llvm/BinaryFormat/Wasm.h
  llvm/include/llvm/IR/IntrinsicsWebAssembly.td
  llvm/include/llvm/MC/MCSectionWasm.h
  llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
  llvm/lib/Target/WebAssembly/WebAssemblyISD.def
  llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
  llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
  llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
  llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
  llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
  llvm/test/CodeGen/WebAssembly/target-features-tls.ll
  llvm/test/CodeGen/WebAssembly/tls.ll

Index: llvm/test/CodeGen/WebAssembly/tls.ll
===
--- llvm/test/CodeGen/WebAssembly/tls.ll
+++ llvm/test/CodeGen/WebAssembly/tls.ll
@@ -1,17 +1,61 @@
-; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck --check-prefix=SINGLE %s
+; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals | FileCheck %s
+; RUN: llc -O0 < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals | FileCheck %s
 target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
 target triple = "wasm32-unknown-unknown"
 
-; SINGLE-LABEL: address_of_tls:
+; CHECK-LABEL: address_of_tls:
 define i32 @address_of_tls() {
-  ; SINGLE: i32.const $push0=, tls
-  ; SINGLE-NEXT: return $pop0
+  ; CHECK-DAG: global.get __tls_base
+  ; CHECK-DAG: i32.const tls
+  ; CHECK-NEXT: i32.add
+  ; CHECK-NEXT: return
   ret i32 ptrtoint(i32* @tls to i32)
 }
 
-; SINGLE: .type	tls,@object
-; SINGLE-NEXT: .section	.bss.tls,"",@
-; SINGLE-NEXT: .p2align 2
-; SINGLE-NEXT: tls:
-; SINGLE-NEXT: .int32 0
-@tls = internal thread_local global i32 0
+; CHECK-LABEL: ptr_to_tls
+define i32* @ptr_to_tls() {
+  ; CHECK-DAG: global.get __tls_base
+  ; CHECK-DAG: i32.const tls
+  ; CHECK-NEXT: i32.add
+  ; CHECK-NEXT: return
+  ret i32* @tls
+}
+
+; CHECK-LABEL: tls_load
+define i32 @tls_load() {
+  ; CHECK-DAG: global.get __tls_base
+  ; CHECK-DAG: i32.const tls
+  ; CHECK-NEXT: i32.add
+  ; CHECK-NEXT: i32.load 0
+  ; CHECK-NEXT: return
+  %tmp = load i32, i32* @tls, align 4
+  ret i32 %tmp
+}
+
+; CHECK-LABEL: tls_store
+define void @tls_store(i32 %x) {
+  ; CHECK-DAG: global.get __tls_base
+  ; CHECK-DAG: i32.const tls
+  ; CHECK-NEXT: i32.add
+  ; CHECK-NEXT: i32.store 0
+  ; CHECK-NEXT: return
+  store i32 %x, i32* @tls, align 4
+  ret void
+}
+
+; CHECK-LABEL: tls_size:
+; CHECK-NEXT: .functype tls_size () -> (i32)
+; CHECK-NEXT: global.get __tls_size
+; CHECK-NEXT: return
+declare i32 @llvm.wasm.tls.size.i32()
+define i32 @tls_size() {
+  %1 = call i32 @llvm.wasm.tls.size.i32()
+  ret i32 %1
+}
+
+; CHECK: .type tls,@object
+; CHECK-NEXT: .section .tbss.tls,"",@
+; CHECK-NEXT: .p2align 2
+; CHECK-NEXT: tls:
+; CHECK-NEXT: .int32 0
+@tls = internal thread_local(localexec) global i32 0
Index: llvm/test/CodeGen/WebAssembly/target-features-tls.ll
===
--- llvm/test/CodeGen/WebAssembly/target-features-tls.ll
+++ llvm/test/CodeGen/WebAssembly/target-features-tls.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mattr=-atomics | FileCheck %s --check-prefixes CHECK,NO-ATOMICS
-; RUN: llc < %s -mattr=+atomics | FileCheck %s --check-prefixes CHECK,ATOMICS
+; RUN: llc < %s -mattr=-atomics | FileCheck %s --check-prefixes NO-ATOMICS
+; RUN: llc < %s -mattr=+atomics | FileCheck %s --check-prefixes ATOMICS
 
 ; Test that the target features section contains -atomics or +atomics
 ; for modules that have thread local storage in their source.
@@ -9,16 +9,13 @@
 
 @foo = internal thread_local global i32 0
 
-; CHECK-LABEL: .custom_section.target_features,"",@
 
 ; -atomics
-; NO-ATOMICS-NEXT: .int8 1
-; NO-ATOMICS-NEXT: .int8 45
-; NO-ATOMICS-NEXT: .int8 7
-; NO-ATOMICS-NEXT: .ascii "atomics"
-; NO-ATOMICS-NEXT: .bss.foo,"",@
+; NO-ATOMICS-NOT: .custom_section.target_features,"",@
+; NO-ATOMICS: .tbss.foo,"",@
 
 ; +atomics
+; ATOMICS-LABEL: .custom_section.target_features,"",@
 ; ATOMICS-NEXT: .int8 1
 ; ATOMICS-NEXT: .int8 43
 ; ATOMICS-NEXT: .int8 7
Index: llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp

[PATCH] D64537: [WebAssembly] Implement thread-local storage for non-PIC cases

2019-07-11 Thread Guanzhong Chen via Phabricator via cfe-commits
quantum marked 5 inline comments as done.
quantum added inline comments.



Comment at: clang/include/clang/Basic/BuiltinsWebAssembly.def:33
+// Thread-local storage
+TARGET_BUILTIN(__builtin_wasm_tls_size, "z", "nc", "bulk-memory")
+

quantum wrote:
> aheejin wrote:
> > Why is it `c`(const)? According to [[ 
> > https://github.com/llvm/llvm-project/blob/e6695821e592f95bffe1340b28be7bcfcce04284/clang/include/clang/Basic/Builtins.h#L104-L108
> >  | this comment ]], this is true if this function has no side effects and 
> > doesn't read memory, i.e., the result should be only dependent on its 
> > arguments. Can't wasm globals be memory locations in machines?
> I was thinking that since the global is immutable, so the value is always a 
> constant.
According to @tlively, there is no solid definition on whether a global 
(especially a constant one), counts as memory access. For now, I am going to 
change this to not constant. We can always change it back later.



Comment at: llvm/include/llvm/IR/IntrinsicsWebAssembly.td:134
+[],
+[IntrNoMem, IntrSpeculatable]>;
+

quantum wrote:
> aheejin wrote:
> > - Why is it speculatable?
> > - I'm not sure if it's `InstNoMem`, because wasm globals can be memory 
> > locations after all. What do other people think?
> @tlively suggested that I do this. It should be speculatable because it has 
> no side effects (since it returns a constant). As for `InstrNoMem`, I am not 
> sure if globals are memory, and whether reading a constant counts as memory 
> access.
I think I am going to remove these attributes for now. We can add them back if 
we end up deciding that immutable globals are not memory accesses.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D64537/new/

https://reviews.llvm.org/D64537



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D64537: [WebAssembly] Implement thread-local storage for non-PIC cases

2019-07-11 Thread Guanzhong Chen via Phabricator via cfe-commits
quantum updated this revision to Diff 209274.
quantum added a comment.

Clean up table gen.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D64537/new/

https://reviews.llvm.org/D64537

Files:
  clang/include/clang/Basic/BuiltinsWebAssembly.def
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/builtins-wasm.c
  lld/test/wasm/data-layout.ll
  lld/test/wasm/data-segment-merging.ll
  lld/test/wasm/data-segments.ll
  lld/test/wasm/gc-imports.ll
  lld/test/wasm/gc-sections.ll
  lld/test/wasm/load-undefined.test
  lld/test/wasm/tls.ll
  lld/wasm/Driver.cpp
  lld/wasm/Symbols.cpp
  lld/wasm/Symbols.h
  lld/wasm/Writer.cpp
  llvm/include/llvm/BinaryFormat/Wasm.h
  llvm/include/llvm/IR/IntrinsicsWebAssembly.td
  llvm/include/llvm/MC/MCSectionWasm.h
  llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
  llvm/lib/Target/WebAssembly/WebAssemblyISD.def
  llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
  llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
  llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
  llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
  llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
  llvm/test/CodeGen/WebAssembly/target-features-tls.ll
  llvm/test/CodeGen/WebAssembly/tls.ll

Index: llvm/test/CodeGen/WebAssembly/tls.ll
===
--- llvm/test/CodeGen/WebAssembly/tls.ll
+++ llvm/test/CodeGen/WebAssembly/tls.ll
@@ -1,17 +1,61 @@
-; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck --check-prefix=SINGLE %s
+; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals | FileCheck %s
+; RUN: llc -O0 < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals | FileCheck %s
 target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
 target triple = "wasm32-unknown-unknown"
 
-; SINGLE-LABEL: address_of_tls:
+; CHECK-LABEL: address_of_tls:
 define i32 @address_of_tls() {
-  ; SINGLE: i32.const $push0=, tls
-  ; SINGLE-NEXT: return $pop0
+  ; CHECK-DAG: global.get __tls_base
+  ; CHECK-DAG: i32.const tls
+  ; CHECK-NEXT: i32.add
+  ; CHECK-NEXT: return
   ret i32 ptrtoint(i32* @tls to i32)
 }
 
-; SINGLE: .type	tls,@object
-; SINGLE-NEXT: .section	.bss.tls,"",@
-; SINGLE-NEXT: .p2align 2
-; SINGLE-NEXT: tls:
-; SINGLE-NEXT: .int32 0
-@tls = internal thread_local global i32 0
+; CHECK-LABEL: ptr_to_tls
+define i32* @ptr_to_tls() {
+  ; CHECK-DAG: global.get __tls_base
+  ; CHECK-DAG: i32.const tls
+  ; CHECK-NEXT: i32.add
+  ; CHECK-NEXT: return
+  ret i32* @tls
+}
+
+; CHECK-LABEL: tls_load
+define i32 @tls_load() {
+  ; CHECK-DAG: global.get __tls_base
+  ; CHECK-DAG: i32.const tls
+  ; CHECK-NEXT: i32.add
+  ; CHECK-NEXT: i32.load 0
+  ; CHECK-NEXT: return
+  %tmp = load i32, i32* @tls, align 4
+  ret i32 %tmp
+}
+
+; CHECK-LABEL: tls_store
+define void @tls_store(i32 %x) {
+  ; CHECK-DAG: global.get __tls_base
+  ; CHECK-DAG: i32.const tls
+  ; CHECK-NEXT: i32.add
+  ; CHECK-NEXT: i32.store 0
+  ; CHECK-NEXT: return
+  store i32 %x, i32* @tls, align 4
+  ret void
+}
+
+; CHECK-LABEL: tls_size:
+; CHECK-NEXT: .functype tls_size () -> (i32)
+; CHECK-NEXT: global.get __tls_size
+; CHECK-NEXT: return
+declare i32 @llvm.wasm.tls.size.i32()
+define i32 @tls_size() {
+  %1 = call i32 @llvm.wasm.tls.size.i32()
+  ret i32 %1
+}
+
+; CHECK: .type tls,@object
+; CHECK-NEXT: .section .tbss.tls,"",@
+; CHECK-NEXT: .p2align 2
+; CHECK-NEXT: tls:
+; CHECK-NEXT: .int32 0
+@tls = internal thread_local(localexec) global i32 0
Index: llvm/test/CodeGen/WebAssembly/target-features-tls.ll
===
--- llvm/test/CodeGen/WebAssembly/target-features-tls.ll
+++ llvm/test/CodeGen/WebAssembly/target-features-tls.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mattr=-atomics | FileCheck %s --check-prefixes CHECK,NO-ATOMICS
-; RUN: llc < %s -mattr=+atomics | FileCheck %s --check-prefixes CHECK,ATOMICS
+; RUN: llc < %s -mattr=-atomics | FileCheck %s --check-prefixes NO-ATOMICS
+; RUN: llc < %s -mattr=+atomics | FileCheck %s --check-prefixes ATOMICS
 
 ; Test that the target features section contains -atomics or +atomics
 ; for modules that have thread local storage in their source.
@@ -9,16 +9,13 @@
 
 @foo = internal thread_local global i32 0
 
-; CHECK-LABEL: .custom_section.target_features,"",@
 
 ; -atomics
-; NO-ATOMICS-NEXT: .int8 1
-; NO-ATOMICS-NEXT: .int8 45
-; NO-ATOMICS-NEXT: .int8 7
-; NO-ATOMICS-NEXT: .ascii "atomics"
-; NO-ATOMICS-NEXT: .bss.foo,"",@
+; NO-ATOMICS-NOT: .custom_section.target_features,"",@
+; NO-ATOMICS: .tbss.foo,"",@
 
 ; +atomics
+; ATOMICS-LABEL: .custom_section.target_features,"",@
 ; ATOMICS-NEXT: .int8 1
 ; ATOMICS-NEXT: .int8 43
 ; ATOMICS-NEXT: .int8 7
Index: llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp

[PATCH] D64537: [WebAssembly] Implement thread-local storage for non-PIC cases

2019-07-11 Thread Guanzhong Chen via Phabricator via cfe-commits
quantum updated this revision to Diff 209271.
quantum marked 6 inline comments as done.
quantum added a comment.

Update comments regarding multable globals. Use `CHECK-DAG`.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D64537/new/

https://reviews.llvm.org/D64537

Files:
  clang/include/clang/Basic/BuiltinsWebAssembly.def
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/builtins-wasm.c
  lld/test/wasm/data-layout.ll
  lld/test/wasm/data-segment-merging.ll
  lld/test/wasm/data-segments.ll
  lld/test/wasm/gc-imports.ll
  lld/test/wasm/gc-sections.ll
  lld/test/wasm/load-undefined.test
  lld/test/wasm/tls.ll
  lld/wasm/Driver.cpp
  lld/wasm/Symbols.cpp
  lld/wasm/Symbols.h
  lld/wasm/Writer.cpp
  llvm/include/llvm/BinaryFormat/Wasm.h
  llvm/include/llvm/IR/IntrinsicsWebAssembly.td
  llvm/include/llvm/MC/MCSectionWasm.h
  llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
  llvm/lib/Target/WebAssembly/WebAssemblyISD.def
  llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
  llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
  llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
  llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
  llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
  llvm/test/CodeGen/WebAssembly/target-features-tls.ll
  llvm/test/CodeGen/WebAssembly/tls.ll

Index: llvm/test/CodeGen/WebAssembly/tls.ll
===
--- llvm/test/CodeGen/WebAssembly/tls.ll
+++ llvm/test/CodeGen/WebAssembly/tls.ll
@@ -1,17 +1,61 @@
-; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck --check-prefix=SINGLE %s
+; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals | FileCheck %s
+; RUN: llc -O0 < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals | FileCheck %s
 target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
 target triple = "wasm32-unknown-unknown"
 
-; SINGLE-LABEL: address_of_tls:
+; CHECK-LABEL: address_of_tls:
 define i32 @address_of_tls() {
-  ; SINGLE: i32.const $push0=, tls
-  ; SINGLE-NEXT: return $pop0
+  ; CHECK-DAG: global.get __tls_base
+  ; CHECK-DAG: i32.const tls
+  ; CHECK-NEXT: i32.add
+  ; CHECK-NEXT: return
   ret i32 ptrtoint(i32* @tls to i32)
 }
 
-; SINGLE: .type	tls,@object
-; SINGLE-NEXT: .section	.bss.tls,"",@
-; SINGLE-NEXT: .p2align 2
-; SINGLE-NEXT: tls:
-; SINGLE-NEXT: .int32 0
-@tls = internal thread_local global i32 0
+; CHECK-LABEL: ptr_to_tls
+define i32* @ptr_to_tls() {
+  ; CHECK-DAG: global.get __tls_base
+  ; CHECK-DAG: i32.const tls
+  ; CHECK-NEXT: i32.add
+  ; CHECK-NEXT: return
+  ret i32* @tls
+}
+
+; CHECK-LABEL: tls_load
+define i32 @tls_load() {
+  ; CHECK-DAG: global.get __tls_base
+  ; CHECK-DAG: i32.const tls
+  ; CHECK-NEXT: i32.add
+  ; CHECK-NEXT: i32.load 0
+  ; CHECK-NEXT: return
+  %tmp = load i32, i32* @tls, align 4
+  ret i32 %tmp
+}
+
+; CHECK-LABEL: tls_store
+define void @tls_store(i32 %x) {
+  ; CHECK-DAG: global.get __tls_base
+  ; CHECK-DAG: i32.const tls
+  ; CHECK-NEXT: i32.add
+  ; CHECK-NEXT: i32.store 0
+  ; CHECK-NEXT: return
+  store i32 %x, i32* @tls, align 4
+  ret void
+}
+
+; CHECK-LABEL: tls_size:
+; CHECK-NEXT: .functype tls_size () -> (i32)
+; CHECK-NEXT: global.get __tls_size
+; CHECK-NEXT: return
+declare i32 @llvm.wasm.tls.size.i32()
+define i32 @tls_size() {
+  %1 = call i32 @llvm.wasm.tls.size.i32()
+  ret i32 %1
+}
+
+; CHECK: .type tls,@object
+; CHECK-NEXT: .section .tbss.tls,"",@
+; CHECK-NEXT: .p2align 2
+; CHECK-NEXT: tls:
+; CHECK-NEXT: .int32 0
+@tls = internal thread_local(localexec) global i32 0
Index: llvm/test/CodeGen/WebAssembly/target-features-tls.ll
===
--- llvm/test/CodeGen/WebAssembly/target-features-tls.ll
+++ llvm/test/CodeGen/WebAssembly/target-features-tls.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mattr=-atomics | FileCheck %s --check-prefixes CHECK,NO-ATOMICS
-; RUN: llc < %s -mattr=+atomics | FileCheck %s --check-prefixes CHECK,ATOMICS
+; RUN: llc < %s -mattr=-atomics | FileCheck %s --check-prefixes NO-ATOMICS
+; RUN: llc < %s -mattr=+atomics | FileCheck %s --check-prefixes ATOMICS
 
 ; Test that the target features section contains -atomics or +atomics
 ; for modules that have thread local storage in their source.
@@ -9,16 +9,13 @@
 
 @foo = internal thread_local global i32 0
 
-; CHECK-LABEL: .custom_section.target_features,"",@
 
 ; -atomics
-; NO-ATOMICS-NEXT: .int8 1
-; NO-ATOMICS-NEXT: .int8 45
-; NO-ATOMICS-NEXT: .int8 7
-; NO-ATOMICS-NEXT: .ascii "atomics"
-; NO-ATOMICS-NEXT: .bss.foo,"",@
+; NO-ATOMICS-NOT: .custom_section.target_features,"",@
+; NO-ATOMICS: .tbss.foo,"",@
 
 ; +atomics
+; ATOMICS-LABEL: .custom_section.target_features,"",@
 ; ATOMICS-NEXT: .int8 1
 ; ATOMICS-NEXT: .int8 43
 ; ATOMICS-NEXT: .int8 7
Index: 

[PATCH] D64537: [WebAssembly] Implement thread-local storage for non-PIC cases

2019-07-11 Thread Guanzhong Chen via Phabricator via cfe-commits
quantum marked 20 inline comments as done.
quantum added a comment.

I'll clean up the table gen stuff.




Comment at: clang/include/clang/Basic/BuiltinsWebAssembly.def:33
+// Thread-local storage
+TARGET_BUILTIN(__builtin_wasm_tls_size, "z", "nc", "bulk-memory")
+

aheejin wrote:
> Why is it `c`(const)? According to [[ 
> https://github.com/llvm/llvm-project/blob/e6695821e592f95bffe1340b28be7bcfcce04284/clang/include/clang/Basic/Builtins.h#L104-L108
>  | this comment ]], this is true if this function has no side effects and 
> doesn't read memory, i.e., the result should be only dependent on its 
> arguments. Can't wasm globals be memory locations in machines?
I was thinking that since the global is immutable, so the value is always a 
constant.



Comment at: lld/test/wasm/tls.ll:57
+;   memory.init 0, 0
+;   end
+

aheejin wrote:
> Hmm, I think there might be a way to actually print disassembly results. 
> There are '*.test' files in test directory, in which we have some examples. 
> For example, [[ 
> https://github.com/llvm/llvm-project/blob/master/lld/test/wasm/export-table.test
>  | this test ]] has a sequence of commands you want to run, and you can put 
> input files in a separate directory. That test uses `obj2yaml`, but can we 
> possibly use `llvm-objdump` or something to disassemble?
We already know that we can do something like

Run: obj2yaml %t.wasm | sed -n '/Body:/{s/^\s*Body:\s*//;s/../0x& /gp}'  | 
llvm-mc -disassemble -triple=wasm32

to compare the actual assembly. As for `llvm-objdump`, it seems to be unable to 
disassemble the WASM properly:


```
.../tools/lld/test/wasm/Output/tls.ll.tmp.wasm: file format WASM


Disassembly of section CODE:

 CODE:
# 4 functions in section.
   1: 02 00 block   invalid_type
   3: 0bend
   4: 10 00 call0
   6: 20 00 local.get   0
   8: 24 01 global.set  1
   a: 20 00 local.get   0
   c: 41 00 i32.const   0
   e: 41 08 i32.const   8
  10: fc 08 00 00   memory.init 0, 0
  14: 0bend
  15: 0freturn
  16: 00llvm-objdump: 
lib/Target/WebAssembly/WebAssemblyGenAsmWriter.inc:2032: void 
llvm::WebAssemblyInstPrinter::printInstruction(const llvm::MCInst *, 
llvm::raw_ostream &): Assertion `Bits != 0 && "Cannot print this instruction."' 
failed.

```



Comment at: lld/wasm/Driver.cpp:543
+  "__wasm_init_tls", WASM_SYMBOL_VISIBILITY_HIDDEN,
+  make(I32ArgSignature, "__wasm_init_tls"));
+

aheejin wrote:
> Does this TLS thing work when `Config->Shared == true`, i.e., do we create 
> TLS globals even when this is a library?
Since TLS is module specific (we can't allocate memory for other modules), we 
must in fact generate this symbol for every module. Shared library support will 
not be implemented in this diff, however.



Comment at: lld/wasm/Writer.cpp:431
+error("'bulk-memory' feature must be used in order to use thread-local "
+  "storage");
+

aheejin wrote:
> Should we check for "mutable-global" feature too?
Do we need to? I thought it's always available since we use it for the stack 
pointer.



Comment at: lld/wasm/Writer.cpp:514
   if (G->getGlobalType()->Mutable) {
 // Only the __stack_pointer should ever be create as mutable.
+assert(G == WasmSym::StackPointer || G == WasmSym::TLSBase);

aheejin wrote:
> Now `__tls_base` is also mutable, I think we should fix the comment
Will do.



Comment at: lld/wasm/Writer.cpp:769
+  std::string BodyContent;
+  {
+raw_string_ostream OS(BodyContent);

aheejin wrote:
> Any reason for the new block scope?
Yes, the `raw_string_ostream` must destruct by the end of the scope. This is 
the pattern used in other functions above.



Comment at: lld/wasm/Writer.cpp:777
+  break;
+}
+

aheejin wrote:
> Is it guaranteed that there's only one TLS segment?
Yes, all the TLS input segments will be merged into `.tdata`.



Comment at: llvm/include/llvm/IR/IntrinsicsWebAssembly.td:134
+[],
+[IntrNoMem, IntrSpeculatable]>;
+

aheejin wrote:
> - Why is it speculatable?
> - I'm not sure if it's `InstNoMem`, because wasm globals can be memory 
> locations after all. What do other people think?
@tlively suggested that I do this. It should be speculatable because it has no 
side effects (since it returns a constant). As for `InstrNoMem`, I am not sure 
if globals are memory, and whether reading a constant counts as 

[PATCH] D64537: [WebAssembly] Implement thread-local storage for non-PIC cases

2019-07-11 Thread Guanzhong Chen via Phabricator via cfe-commits
quantum updated this revision to Diff 209258.
quantum added a comment.

Add error for non-local-exec TLS models. Fix unit tests.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D64537/new/

https://reviews.llvm.org/D64537

Files:
  clang/include/clang/Basic/BuiltinsWebAssembly.def
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/builtins-wasm.c
  lld/test/wasm/data-layout.ll
  lld/test/wasm/data-segment-merging.ll
  lld/test/wasm/data-segments.ll
  lld/test/wasm/gc-imports.ll
  lld/test/wasm/gc-sections.ll
  lld/test/wasm/load-undefined.test
  lld/test/wasm/tls.ll
  lld/wasm/Driver.cpp
  lld/wasm/Symbols.cpp
  lld/wasm/Symbols.h
  lld/wasm/Writer.cpp
  llvm/include/llvm/BinaryFormat/Wasm.h
  llvm/include/llvm/IR/IntrinsicsWebAssembly.td
  llvm/include/llvm/MC/MCSectionWasm.h
  llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
  llvm/lib/Target/WebAssembly/WebAssemblyISD.def
  llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
  llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
  llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
  llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
  llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
  llvm/test/CodeGen/WebAssembly/target-features-tls.ll
  llvm/test/CodeGen/WebAssembly/tls.ll

Index: llvm/test/CodeGen/WebAssembly/tls.ll
===
--- llvm/test/CodeGen/WebAssembly/tls.ll
+++ llvm/test/CodeGen/WebAssembly/tls.ll
@@ -1,17 +1,87 @@
-; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck --check-prefix=SINGLE %s
+; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals | FileCheck --check-prefixes=CHECK,O2 %s
+; RUN: llc -O0 < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals | FileCheck --check-prefixes=CHECK,O0 %s
 target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
 target triple = "wasm32-unknown-unknown"
 
-; SINGLE-LABEL: address_of_tls:
+; O0-LABEL: address_of_tls:
+; O2-LABEL: address_of_tls:
 define i32 @address_of_tls() {
-  ; SINGLE: i32.const $push0=, tls
-  ; SINGLE-NEXT: return $pop0
+  ; O0: i32.const tls
+  ; O0-NEXT: global.get __tls_base
+  ; O0-NEXT: i32.add
+  ; O0-NEXT: return
+
+  ; O2: global.get __tls_base
+  ; O2-NEXT: i32.const tls
+  ; O2-NEXT: i32.add
+  ; O2-NEXT: return
   ret i32 ptrtoint(i32* @tls to i32)
 }
 
-; SINGLE: .type	tls,@object
-; SINGLE-NEXT: .section	.bss.tls,"",@
-; SINGLE-NEXT: .p2align 2
-; SINGLE-NEXT: tls:
-; SINGLE-NEXT: .int32 0
-@tls = internal thread_local global i32 0
+; O0-LABEL: ptr_to_tls
+; O2-LABEL: ptr_to_tls
+define i32* @ptr_to_tls() {
+  ; O0: i32.const tls
+  ; O0-NEXT: global.get __tls_base
+  ; O0-NEXT: i32.add
+  ; O0-NEXT: return
+
+  ; O2: global.get __tls_base
+  ; O2-NEXT: i32.const tls
+  ; O2-NEXT: i32.add
+  ; O2-NEXT: return
+  ret i32* @tls
+}
+
+; O0-LABEL: tls_load
+; O2-LABEL: tls_load
+define i32 @tls_load() {
+  ; O0: i32.const tls
+  ; O0-NEXT: global.get __tls_base
+  ; O0-NEXT: i32.add
+  ; O0-NEXT: i32.load 0
+  ; O0-NEXT: return
+
+  ; O2: global.get __tls_base
+  ; O2-NEXT: i32.const tls
+  ; O2-NEXT: i32.add
+  ; O2-NEXT: i32.load 0
+  ; O2-NEXT: return
+  %tmp = load i32, i32* @tls, align 4
+  ret i32 %tmp
+}
+
+; O0-LABEL: tls_store
+; O2-LABEL: tls_store
+define void @tls_store(i32 %x) {
+  ; O0: i32.const tls
+  ; O0-NEXT: global.get __tls_base
+  ; O0-NEXT: i32.add
+  ; O0-NEXT: i32.store 0
+  ; O0-NEXT: return
+
+  ; O2: global.get __tls_base
+  ; O2-NEXT: i32.const tls
+  ; O2-NEXT: i32.add
+  ; O2-NEXT: i32.store 0
+  ; O2-NEXT: return
+  store i32 %x, i32* @tls, align 4
+  ret void
+}
+
+; CHECK-LABEL: tls_size:
+; CHECK-NEXT: .functype tls_size () -> (i32)
+; CHECK-NEXT: global.get __tls_size
+; CHECK-NEXT: return
+declare i32 @llvm.wasm.tls.size.i32()
+define i32 @tls_size() {
+  %1 = call i32 @llvm.wasm.tls.size.i32()
+  ret i32 %1
+}
+
+; CHECK: .type tls,@object
+; CHECK-NEXT: .section .tbss.tls,"",@
+; CHECK-NEXT: .p2align 2
+; CHECK-NEXT: tls:
+; CHECK-NEXT: .int32 0
+@tls = internal thread_local(localexec) global i32 0
Index: llvm/test/CodeGen/WebAssembly/target-features-tls.ll
===
--- llvm/test/CodeGen/WebAssembly/target-features-tls.ll
+++ llvm/test/CodeGen/WebAssembly/target-features-tls.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mattr=-atomics | FileCheck %s --check-prefixes CHECK,NO-ATOMICS
-; RUN: llc < %s -mattr=+atomics | FileCheck %s --check-prefixes CHECK,ATOMICS
+; RUN: llc < %s -mattr=-atomics | FileCheck %s --check-prefixes NO-ATOMICS
+; RUN: llc < %s -mattr=+atomics | FileCheck %s --check-prefixes ATOMICS
 
 ; Test that the target features section contains -atomics or +atomics
 ; for modules that have thread local storage in their source.
@@ -9,16 +9,13 @@
 
 @foo = internal thread_local 

[PATCH] D64537: [WebAssembly] Implement thread-local storage for non-PIC cases

2019-07-11 Thread Heejin Ahn via Phabricator via cfe-commits
aheejin added a comment.

Nice!
Then where should we call `__wasm_init_tls`, in case within a library?




Comment at: clang/include/clang/Basic/BuiltinsWebAssembly.def:33
+// Thread-local storage
+TARGET_BUILTIN(__builtin_wasm_tls_size, "z", "nc", "bulk-memory")
+

Why is it `c`(const)? According to [[ 
https://github.com/llvm/llvm-project/blob/e6695821e592f95bffe1340b28be7bcfcce04284/clang/include/clang/Basic/Builtins.h#L104-L108
 | this comment ]], this is true if this function has no side effects and 
doesn't read memory, i.e., the result should be only dependent on its 
arguments. Can't wasm globals be memory locations in machines?



Comment at: lld/test/wasm/tls.ll:57
+;   memory.init 0, 0
+;   end
+

Hmm, I think there might be a way to actually print disassembly results. There 
are '*.test' files in test directory, in which we have some examples. For 
example, [[ 
https://github.com/llvm/llvm-project/blob/master/lld/test/wasm/export-table.test
 | this test ]] has a sequence of commands you want to run, and you can put 
input files in a separate directory. That test uses `obj2yaml`, but can we 
possibly use `llvm-objdump` or something to disassemble?



Comment at: lld/wasm/Driver.cpp:543
+  "__wasm_init_tls", WASM_SYMBOL_VISIBILITY_HIDDEN,
+  make(I32ArgSignature, "__wasm_init_tls"));
+

Does this TLS thing work when `Config->Shared == true`, i.e., do we create TLS 
globals even when this is a library?



Comment at: lld/wasm/Writer.cpp:431
+error("'bulk-memory' feature must be used in order to use thread-local "
+  "storage");
+

Should we check for "mutable-global" feature too?



Comment at: lld/wasm/Writer.cpp:514
   if (G->getGlobalType()->Mutable) {
 // Only the __stack_pointer should ever be create as mutable.
+assert(G == WasmSym::StackPointer || G == WasmSym::TLSBase);

Now `__tls_base` is also mutable, I think we should fix the comment



Comment at: lld/wasm/Writer.cpp:769
+  std::string BodyContent;
+  {
+raw_string_ostream OS(BodyContent);

Any reason for the new block scope?



Comment at: lld/wasm/Writer.cpp:777
+  break;
+}
+

Is it guaranteed that there's only one TLS segment?



Comment at: llvm/include/llvm/IR/IntrinsicsWebAssembly.td:134
+[],
+[IntrNoMem, IntrSpeculatable]>;
+

- Why is it speculatable?
- I'm not sure if it's `InstNoMem`, because wasm globals can be memory 
locations after all. What do other people think?



Comment at: llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp:1095
+WebAssemblyTargetLowering::LowerGlobalTLSAddress(SDValue Op,
+ SelectionDAG ) const {
+  SDLoc DL(Op);

If you do the conversion in `WebAssemblyISelDAGToDAG.cpp`, you don't need to 
create `WebAssemblyISD` node,  `SDTypeProfile`, and `SDNode` for every single 
instruction you want to generate. This `WebAssemblyISelLowering.cpp` is a part 
of legalization so it cannot generate machine instructions directly, whereas 
`WebAssemblyISelDAGToDAG.cpp` is a part of instruction selection so you have 
direct access to machine instructions. I think moving routines there can be 
cleaner?



Comment at: llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp:1109
+  WebAssemblyISD::CONST_I32, DL, VT,
+  DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset(), 0));
+

Does this offset work when there are non-thread-local globals too?



Comment at: llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td:86
+def SDT_WebAssemblyConstI32   : SDTypeProfile<1, 1, [SDTCisVT<0, i32>,
+   SDTCisVT<1, i32>]>;
 

Nit: indentation



Comment at: llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td:124
+def WebAssemblyconsti32 : SDNode<"WebAssemblyISD::CONST_I32",
+SDT_WebAssemblyConstI32>;
 

Nit: indentations look weird for both defs



Comment at: llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp:192
   Stripped |= stripAtomics(M);
-  Stripped |= stripThreadLocals(M);
 }

Looks like this feature requires both bulk memory and mutable global. Shouldn't 
we strip thread locals if either is not enabled?



Comment at: llvm/test/CodeGen/WebAssembly/target-features-tls.ll:23
 ; ATOMICS-NEXT: .ascii "atomics"
 ; ATOMICS-NEXT: .tbss.foo,"",@

Is thread local feature still dependent on atomics? If not, do we still need to 
test this with both atomics on and off? This now looks rather dependent on bulk 
memory and mutable global. 

[PATCH] D64537: [WebAssembly] Implement thread-local storage for non-PIC cases

2019-07-10 Thread Guanzhong Chen via Phabricator via cfe-commits
quantum marked an inline comment as done.
quantum added a comment.

In D64537#1579626 , @sunfish wrote:

> This looks nice!
>
> > __wasm_init_tls(calloc(__builtin_wasm_tls_size(), 1));
>
> Would it make sense to change the API contract for `__wasm_init_tls` to 
> guarantee that initializes all bytes (with zeros as needed)? 
> `__wasm_init_tls` knows what bytes it's initializing, so we could use plain 
> `malloc` instead of `calloc` and avoid redundant zero initialization of those 
> bytes.


Updated the summary. I just realized that the way the code is written, the 
plain `malloc` would already work.




Comment at: llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp:
+
+  return DAG.getNode(ISD::ADD, DL, VT, TLSBase, TLSOffset);
+}

sunfish wrote:
> It looks like this supports local-exec, but would need to be extended to 
> handle initial-exec or the other TLS models. Assuming this is correct, could 
> you add a check for the TLS model and `report_fatal_error` on unsupported 
> models?
I'll add an error for non-local-exec for now.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D64537/new/

https://reviews.llvm.org/D64537



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D64537: [WebAssembly] Implement thread-local storage for non-PIC cases

2019-07-10 Thread Guanzhong Chen via Phabricator via cfe-commits
quantum updated this revision to Diff 209102.
quantum added a comment.

Fix linker warning.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D64537/new/

https://reviews.llvm.org/D64537

Files:
  clang/include/clang/Basic/BuiltinsWebAssembly.def
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/builtins-wasm.c
  lld/test/wasm/data-layout.ll
  lld/test/wasm/gc-sections.ll
  lld/test/wasm/tls.ll
  lld/wasm/Driver.cpp
  lld/wasm/Symbols.cpp
  lld/wasm/Symbols.h
  lld/wasm/Writer.cpp
  llvm/include/llvm/BinaryFormat/Wasm.h
  llvm/include/llvm/IR/IntrinsicsWebAssembly.td
  llvm/include/llvm/MC/MCSectionWasm.h
  llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
  llvm/lib/Target/WebAssembly/WebAssemblyISD.def
  llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
  llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
  llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
  llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
  llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
  llvm/test/CodeGen/WebAssembly/target-features-tls.ll
  llvm/test/CodeGen/WebAssembly/tls.ll

Index: llvm/test/CodeGen/WebAssembly/tls.ll
===
--- llvm/test/CodeGen/WebAssembly/tls.ll
+++ llvm/test/CodeGen/WebAssembly/tls.ll
@@ -1,17 +1,87 @@
-; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck --check-prefix=SINGLE %s
+; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals | FileCheck --check-prefixes=CHECK,O2 %s
+; RUN: llc -O0 < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals | FileCheck --check-prefixes=CHECK,O0 %s
 target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
 target triple = "wasm32-unknown-unknown"
 
-; SINGLE-LABEL: address_of_tls:
+; O0-LABEL: address_of_tls:
+; O2-LABEL: address_of_tls:
 define i32 @address_of_tls() {
-  ; SINGLE: i32.const $push0=, tls
-  ; SINGLE-NEXT: return $pop0
+  ; O0: i32.const tls
+  ; O0-NEXT: global.get __tls_base
+  ; O0-NEXT: i32.add
+  ; O0-NEXT: return
+
+  ; O2: global.get __tls_base
+  ; O2-NEXT: i32.const tls
+  ; O2-NEXT: i32.add
+  ; O2-NEXT: return
   ret i32 ptrtoint(i32* @tls to i32)
 }
 
-; SINGLE: .type	tls,@object
-; SINGLE-NEXT: .section	.bss.tls,"",@
-; SINGLE-NEXT: .p2align 2
-; SINGLE-NEXT: tls:
-; SINGLE-NEXT: .int32 0
+; O0-LABEL: ptr_to_tls
+; O2-LABEL: ptr_to_tls
+define i32* @ptr_to_tls() {
+  ; O0: i32.const tls
+  ; O0-NEXT: global.get __tls_base
+  ; O0-NEXT: i32.add
+  ; O0-NEXT: return
+
+  ; O2: global.get __tls_base
+  ; O2-NEXT: i32.const tls
+  ; O2-NEXT: i32.add
+  ; O2-NEXT: return
+  ret i32* @tls
+}
+
+; O0-LABEL: tls_load
+; O2-LABEL: tls_load
+define i32 @tls_load() {
+  ; O0: i32.const tls
+  ; O0-NEXT: global.get __tls_base
+  ; O0-NEXT: i32.add
+  ; O0-NEXT: i32.load 0
+  ; O0-NEXT: return
+
+  ; O2: global.get __tls_base
+  ; O2-NEXT: i32.const tls
+  ; O2-NEXT: i32.add
+  ; O2-NEXT: i32.load 0
+  ; O2-NEXT: return
+  %tmp = load i32, i32* @tls, align 4
+  ret i32 %tmp
+}
+
+; O0-LABEL: tls_store
+; O2-LABEL: tls_store
+define void @tls_store(i32 %x) {
+  ; O0: i32.const tls
+  ; O0-NEXT: global.get __tls_base
+  ; O0-NEXT: i32.add
+  ; O0-NEXT: i32.store 0
+  ; O0-NEXT: return
+
+  ; O2: global.get __tls_base
+  ; O2-NEXT: i32.const tls
+  ; O2-NEXT: i32.add
+  ; O2-NEXT: i32.store 0
+  ; O2-NEXT: return
+  store i32 %x, i32* @tls, align 4
+  ret void
+}
+
+; CHECK-LABEL: tls_size:
+; CHECK-NEXT: .functype tls_size () -> (i32)
+; CHECK-NEXT: global.get __tls_size
+; CHECK-NEXT: return
+declare i32 @llvm.wasm.tls.size.i32()
+define i32 @tls_size() {
+  %1 = call i32 @llvm.wasm.tls.size.i32()
+  ret i32 %1
+}
+
+; CHECK: .type tls,@object
+; CHECK-NEXT: .section .tbss.tls,"",@
+; CHECK-NEXT: .p2align 2
+; CHECK-NEXT: tls:
+; CHECK-NEXT: .int32 0
 @tls = internal thread_local global i32 0
Index: llvm/test/CodeGen/WebAssembly/target-features-tls.ll
===
--- llvm/test/CodeGen/WebAssembly/target-features-tls.ll
+++ llvm/test/CodeGen/WebAssembly/target-features-tls.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mattr=-atomics | FileCheck %s --check-prefixes CHECK,NO-ATOMICS
-; RUN: llc < %s -mattr=+atomics | FileCheck %s --check-prefixes CHECK,ATOMICS
+; RUN: llc < %s -mattr=-atomics | FileCheck %s --check-prefixes NO-ATOMICS
+; RUN: llc < %s -mattr=+atomics | FileCheck %s --check-prefixes ATOMICS
 
 ; Test that the target features section contains -atomics or +atomics
 ; for modules that have thread local storage in their source.
@@ -9,16 +9,13 @@
 
 @foo = internal thread_local global i32 0
 
-; CHECK-LABEL: .custom_section.target_features,"",@
 
 ; -atomics
-; NO-ATOMICS-NEXT: .int8 1
-; NO-ATOMICS-NEXT: .int8 45
-; NO-ATOMICS-NEXT: .int8 7
-; NO-ATOMICS-NEXT: .ascii "atomics"
-; NO-ATOMICS-NEXT: 

[PATCH] D64537: [WebAssembly] Implement thread-local storage for non-PIC cases

2019-07-10 Thread Dan Gohman via Phabricator via cfe-commits
sunfish added a comment.

This looks nice!

> __wasm_init_tls(calloc(__builtin_wasm_tls_size(), 1));

Would it make sense to change the API contract for `__wasm_init_tls` to 
guarantee that initializes all bytes (with zeros as needed)? `__wasm_init_tls` 
knows what bytes it's initializing, so we could use plain `malloc` instead of 
`calloc` and avoid redundant zero initialization of those bytes.




Comment at: llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp:
+
+  return DAG.getNode(ISD::ADD, DL, VT, TLSBase, TLSOffset);
+}

It looks like this supports local-exec, but would need to be extended to handle 
initial-exec or the other TLS models. Assuming this is correct, could you add a 
check for the TLS model and `report_fatal_error` on unsupported models?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D64537/new/

https://reviews.llvm.org/D64537



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D64537: [WebAssembly] Implement thread-local storage for non-PIC cases

2019-07-10 Thread Guanzhong Chen via Phabricator via cfe-commits
quantum updated this revision to Diff 209090.
quantum added a comment.

Fix `fail` call.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D64537/new/

https://reviews.llvm.org/D64537

Files:
  clang/include/clang/Basic/BuiltinsWebAssembly.def
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/builtins-wasm.c
  lld/test/wasm/data-layout.ll
  lld/test/wasm/gc-sections.ll
  lld/test/wasm/tls.ll
  lld/wasm/Driver.cpp
  lld/wasm/Symbols.cpp
  lld/wasm/Symbols.h
  lld/wasm/Writer.cpp
  llvm/include/llvm/BinaryFormat/Wasm.h
  llvm/include/llvm/IR/IntrinsicsWebAssembly.td
  llvm/include/llvm/MC/MCSectionWasm.h
  llvm/lib/MC/WasmObjectWriter.cpp
  llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
  llvm/lib/Target/WebAssembly/WebAssemblyISD.def
  llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
  llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
  llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
  llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
  llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
  llvm/test/CodeGen/WebAssembly/target-features-tls.ll
  llvm/test/CodeGen/WebAssembly/tls.ll

Index: llvm/test/CodeGen/WebAssembly/tls.ll
===
--- llvm/test/CodeGen/WebAssembly/tls.ll
+++ llvm/test/CodeGen/WebAssembly/tls.ll
@@ -1,17 +1,87 @@
-; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck --check-prefix=SINGLE %s
+; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals | FileCheck --check-prefixes=CHECK,O2 %s
+; RUN: llc -O0 < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals | FileCheck --check-prefixes=CHECK,O0 %s
 target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
 target triple = "wasm32-unknown-unknown"
 
-; SINGLE-LABEL: address_of_tls:
+; O0-LABEL: address_of_tls:
+; O2-LABEL: address_of_tls:
 define i32 @address_of_tls() {
-  ; SINGLE: i32.const $push0=, tls
-  ; SINGLE-NEXT: return $pop0
+  ; O0: i32.const tls
+  ; O0-NEXT: global.get __tls_base
+  ; O0-NEXT: i32.add
+  ; O0-NEXT: return
+
+  ; O2: global.get __tls_base
+  ; O2-NEXT: i32.const tls
+  ; O2-NEXT: i32.add
+  ; O2-NEXT: return
   ret i32 ptrtoint(i32* @tls to i32)
 }
 
-; SINGLE: .type	tls,@object
-; SINGLE-NEXT: .section	.bss.tls,"",@
-; SINGLE-NEXT: .p2align 2
-; SINGLE-NEXT: tls:
-; SINGLE-NEXT: .int32 0
+; O0-LABEL: ptr_to_tls
+; O2-LABEL: ptr_to_tls
+define i32* @ptr_to_tls() {
+  ; O0: i32.const tls
+  ; O0-NEXT: global.get __tls_base
+  ; O0-NEXT: i32.add
+  ; O0-NEXT: return
+
+  ; O2: global.get __tls_base
+  ; O2-NEXT: i32.const tls
+  ; O2-NEXT: i32.add
+  ; O2-NEXT: return
+  ret i32* @tls
+}
+
+; O0-LABEL: tls_load
+; O2-LABEL: tls_load
+define i32 @tls_load() {
+  ; O0: i32.const tls
+  ; O0-NEXT: global.get __tls_base
+  ; O0-NEXT: i32.add
+  ; O0-NEXT: i32.load 0
+  ; O0-NEXT: return
+
+  ; O2: global.get __tls_base
+  ; O2-NEXT: i32.const tls
+  ; O2-NEXT: i32.add
+  ; O2-NEXT: i32.load 0
+  ; O2-NEXT: return
+  %tmp = load i32, i32* @tls, align 4
+  ret i32 %tmp
+}
+
+; O0-LABEL: tls_store
+; O2-LABEL: tls_store
+define void @tls_store(i32 %x) {
+  ; O0: i32.const tls
+  ; O0-NEXT: global.get __tls_base
+  ; O0-NEXT: i32.add
+  ; O0-NEXT: i32.store 0
+  ; O0-NEXT: return
+
+  ; O2: global.get __tls_base
+  ; O2-NEXT: i32.const tls
+  ; O2-NEXT: i32.add
+  ; O2-NEXT: i32.store 0
+  ; O2-NEXT: return
+  store i32 %x, i32* @tls, align 4
+  ret void
+}
+
+; CHECK-LABEL: tls_size:
+; CHECK-NEXT: .functype tls_size () -> (i32)
+; CHECK-NEXT: global.get __tls_size
+; CHECK-NEXT: return
+declare i32 @llvm.wasm.tls.size.i32()
+define i32 @tls_size() {
+  %1 = call i32 @llvm.wasm.tls.size.i32()
+  ret i32 %1
+}
+
+; CHECK: .type tls,@object
+; CHECK-NEXT: .section .tbss.tls,"",@
+; CHECK-NEXT: .p2align 2
+; CHECK-NEXT: tls:
+; CHECK-NEXT: .int32 0
 @tls = internal thread_local global i32 0
Index: llvm/test/CodeGen/WebAssembly/target-features-tls.ll
===
--- llvm/test/CodeGen/WebAssembly/target-features-tls.ll
+++ llvm/test/CodeGen/WebAssembly/target-features-tls.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mattr=-atomics | FileCheck %s --check-prefixes CHECK,NO-ATOMICS
-; RUN: llc < %s -mattr=+atomics | FileCheck %s --check-prefixes CHECK,ATOMICS
+; RUN: llc < %s -mattr=-atomics | FileCheck %s --check-prefixes NO-ATOMICS
+; RUN: llc < %s -mattr=+atomics | FileCheck %s --check-prefixes ATOMICS
 
 ; Test that the target features section contains -atomics or +atomics
 ; for modules that have thread local storage in their source.
@@ -9,16 +9,13 @@
 
 @foo = internal thread_local global i32 0
 
-; CHECK-LABEL: .custom_section.target_features,"",@
 
 ; -atomics
-; NO-ATOMICS-NEXT: .int8 1
-; NO-ATOMICS-NEXT: .int8 45
-; NO-ATOMICS-NEXT: .int8 7
-; NO-ATOMICS-NEXT: .ascii "atomics"

[PATCH] D64537: [WebAssembly] Implement thread-local storage for non-PIC cases

2019-07-10 Thread Guanzhong Chen via Phabricator via cfe-commits
quantum updated this revision to Diff 209084.
quantum added a comment.

Add tests for `__builtin_wasm_tls_size`.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D64537/new/

https://reviews.llvm.org/D64537

Files:
  clang/include/clang/Basic/BuiltinsWebAssembly.def
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/builtins-wasm.c
  lld/test/wasm/data-layout.ll
  lld/test/wasm/gc-sections.ll
  lld/test/wasm/tls.ll
  lld/wasm/Driver.cpp
  lld/wasm/Symbols.cpp
  lld/wasm/Symbols.h
  lld/wasm/Writer.cpp
  llvm/include/llvm/BinaryFormat/Wasm.h
  llvm/include/llvm/IR/IntrinsicsWebAssembly.td
  llvm/include/llvm/MC/MCSectionWasm.h
  llvm/lib/MC/WasmObjectWriter.cpp
  llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
  llvm/lib/Target/WebAssembly/WebAssemblyISD.def
  llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
  llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
  llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
  llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
  llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
  llvm/test/CodeGen/WebAssembly/target-features-tls.ll
  llvm/test/CodeGen/WebAssembly/tls.ll

Index: llvm/test/CodeGen/WebAssembly/tls.ll
===
--- llvm/test/CodeGen/WebAssembly/tls.ll
+++ llvm/test/CodeGen/WebAssembly/tls.ll
@@ -1,17 +1,87 @@
-; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck --check-prefix=SINGLE %s
+; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals | FileCheck --check-prefixes=CHECK,O2 %s
+; RUN: llc -O0 < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals | FileCheck --check-prefixes=CHECK,O0 %s
 target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
 target triple = "wasm32-unknown-unknown"
 
-; SINGLE-LABEL: address_of_tls:
+; O0-LABEL: address_of_tls:
+; O2-LABEL: address_of_tls:
 define i32 @address_of_tls() {
-  ; SINGLE: i32.const $push0=, tls
-  ; SINGLE-NEXT: return $pop0
+  ; O0: i32.const tls
+  ; O0-NEXT: global.get __tls_base
+  ; O0-NEXT: i32.add
+  ; O0-NEXT: return
+
+  ; O2: global.get __tls_base
+  ; O2-NEXT: i32.const tls
+  ; O2-NEXT: i32.add
+  ; O2-NEXT: return
   ret i32 ptrtoint(i32* @tls to i32)
 }
 
-; SINGLE: .type	tls,@object
-; SINGLE-NEXT: .section	.bss.tls,"",@
-; SINGLE-NEXT: .p2align 2
-; SINGLE-NEXT: tls:
-; SINGLE-NEXT: .int32 0
+; O0-LABEL: ptr_to_tls
+; O2-LABEL: ptr_to_tls
+define i32* @ptr_to_tls() {
+  ; O0: i32.const tls
+  ; O0-NEXT: global.get __tls_base
+  ; O0-NEXT: i32.add
+  ; O0-NEXT: return
+
+  ; O2: global.get __tls_base
+  ; O2-NEXT: i32.const tls
+  ; O2-NEXT: i32.add
+  ; O2-NEXT: return
+  ret i32* @tls
+}
+
+; O0-LABEL: tls_load
+; O2-LABEL: tls_load
+define i32 @tls_load() {
+  ; O0: i32.const tls
+  ; O0-NEXT: global.get __tls_base
+  ; O0-NEXT: i32.add
+  ; O0-NEXT: i32.load 0
+  ; O0-NEXT: return
+
+  ; O2: global.get __tls_base
+  ; O2-NEXT: i32.const tls
+  ; O2-NEXT: i32.add
+  ; O2-NEXT: i32.load 0
+  ; O2-NEXT: return
+  %tmp = load i32, i32* @tls, align 4
+  ret i32 %tmp
+}
+
+; O0-LABEL: tls_store
+; O2-LABEL: tls_store
+define void @tls_store(i32 %x) {
+  ; O0: i32.const tls
+  ; O0-NEXT: global.get __tls_base
+  ; O0-NEXT: i32.add
+  ; O0-NEXT: i32.store 0
+  ; O0-NEXT: return
+
+  ; O2: global.get __tls_base
+  ; O2-NEXT: i32.const tls
+  ; O2-NEXT: i32.add
+  ; O2-NEXT: i32.store 0
+  ; O2-NEXT: return
+  store i32 %x, i32* @tls, align 4
+  ret void
+}
+
+; CHECK-LABEL: tls_size:
+; CHECK-NEXT: .functype tls_size () -> (i32)
+; CHECK-NEXT: global.get __tls_size
+; CHECK-NEXT: return
+declare i32 @llvm.wasm.tls.size.i32()
+define i32 @tls_size() {
+  %1 = call i32 @llvm.wasm.tls.size.i32()
+  ret i32 %1
+}
+
+; CHECK: .type tls,@object
+; CHECK-NEXT: .section .tbss.tls,"",@
+; CHECK-NEXT: .p2align 2
+; CHECK-NEXT: tls:
+; CHECK-NEXT: .int32 0
 @tls = internal thread_local global i32 0
Index: llvm/test/CodeGen/WebAssembly/target-features-tls.ll
===
--- llvm/test/CodeGen/WebAssembly/target-features-tls.ll
+++ llvm/test/CodeGen/WebAssembly/target-features-tls.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mattr=-atomics | FileCheck %s --check-prefixes CHECK,NO-ATOMICS
-; RUN: llc < %s -mattr=+atomics | FileCheck %s --check-prefixes CHECK,ATOMICS
+; RUN: llc < %s -mattr=-atomics | FileCheck %s --check-prefixes NO-ATOMICS
+; RUN: llc < %s -mattr=+atomics | FileCheck %s --check-prefixes ATOMICS
 
 ; Test that the target features section contains -atomics or +atomics
 ; for modules that have thread local storage in their source.
@@ -9,16 +9,13 @@
 
 @foo = internal thread_local global i32 0
 
-; CHECK-LABEL: .custom_section.target_features,"",@
 
 ; -atomics
-; NO-ATOMICS-NEXT: .int8 1
-; NO-ATOMICS-NEXT: .int8 45
-; NO-ATOMICS-NEXT: .int8 7
-; 

[PATCH] D64537: [WebAssembly] Implement thread-local storage for non-PIC cases

2019-07-10 Thread Guanzhong Chen via Phabricator via cfe-commits
quantum planned changes to this revision.
quantum added a comment.

Need to add tests for the compiler intrinsic `__builtin_wasm_tls_size`.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D64537/new/

https://reviews.llvm.org/D64537



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D64537: [WebAssembly] Implement thread-local storage for non-PIC cases

2019-07-10 Thread Guanzhong Chen via Phabricator via cfe-commits
quantum created this revision.
quantum added reviewers: tlively, aheejin, kripken, sbc100.
Herald added subscribers: llvm-commits, cfe-commits, jfb, sunfish, hiraditya, 
jgravelle-google, dschuff.
Herald added projects: clang, LLVM.
quantum planned changes to this revision.
quantum added a comment.

Need to add tests for the compiler intrinsic `__builtin_wasm_tls_size`.


Thread local variables are placed inside a `.tdata` segment. Their symbols are
offsets from the start of the segment. The address of a thread local variable
is computed as `__tls_base` + the offset from the start of the segment.

`.tdata` segment is a passive segment and `memory.init` is used once per thread
to initialize the thread local storage.

`__tls_base` is a wasm global. Since each thread has its own wasm instance,
it is effectively thread local. Currently, `__tls_base` must be initialized
at thread startup, and so cannot be used with dynamic libraries.

`__tls_base` is to be initialized with a new linker-synthesized function,
`__wasm_init_tls`, which takes as an argument a block of memory to use as the
storage for thread locals. It then initializes the block of memory and sets
`__tls_base`.

To help allocating memory for thread-local storage, a new compiler intrinsic
is introduced: `__builtin_wasm_tls_size()`. This instrinsic function returns
the size of the thread-local storage for the current function.

The expected usage is to run something like the following upon thread startup:

  __wasm_init_tls(calloc(__builtin_wasm_tls_size(), 1));


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D64537

Files:
  clang/include/clang/Basic/BuiltinsWebAssembly.def
  clang/lib/CodeGen/CGBuiltin.cpp
  lld/test/wasm/data-layout.ll
  lld/test/wasm/gc-sections.ll
  lld/test/wasm/tls.ll
  lld/wasm/Driver.cpp
  lld/wasm/Symbols.cpp
  lld/wasm/Symbols.h
  lld/wasm/Writer.cpp
  llvm/include/llvm/BinaryFormat/Wasm.h
  llvm/include/llvm/IR/IntrinsicsWebAssembly.td
  llvm/include/llvm/MC/MCSectionWasm.h
  llvm/lib/MC/WasmObjectWriter.cpp
  llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
  llvm/lib/Target/WebAssembly/WebAssemblyISD.def
  llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
  llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
  llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
  llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
  llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
  llvm/test/CodeGen/WebAssembly/target-features-tls.ll
  llvm/test/CodeGen/WebAssembly/tls.ll

Index: llvm/test/CodeGen/WebAssembly/tls.ll
===
--- llvm/test/CodeGen/WebAssembly/tls.ll
+++ llvm/test/CodeGen/WebAssembly/tls.ll
@@ -1,17 +1,77 @@
-; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck --check-prefix=SINGLE %s
+; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals | FileCheck --check-prefixes=CHECK,O2 %s
+; RUN: llc -O0 < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals | FileCheck --check-prefixes=CHECK,O0 %s
 target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
 target triple = "wasm32-unknown-unknown"
 
-; SINGLE-LABEL: address_of_tls:
+; O0-LABEL: address_of_tls:
+; O2-LABEL: address_of_tls:
 define i32 @address_of_tls() {
-  ; SINGLE: i32.const $push0=, tls
-  ; SINGLE-NEXT: return $pop0
+  ; O0: i32.const tls
+  ; O0-NEXT: global.get __tls_base
+  ; O0-NEXT: i32.add
+  ; O0-NEXT: return
+
+  ; O2: global.get __tls_base
+  ; O2-NEXT: i32.const tls
+  ; O2-NEXT: i32.add
+  ; O2-NEXT: return
   ret i32 ptrtoint(i32* @tls to i32)
 }
 
-; SINGLE: .type	tls,@object
-; SINGLE-NEXT: .section	.bss.tls,"",@
-; SINGLE-NEXT: .p2align 2
-; SINGLE-NEXT: tls:
-; SINGLE-NEXT: .int32 0
+; O0-LABEL: ptr_to_tls
+; O2-LABEL: ptr_to_tls
+define i32* @ptr_to_tls() {
+  ; O0: i32.const tls
+  ; O0-NEXT: global.get __tls_base
+  ; O0-NEXT: i32.add
+  ; O0-NEXT: return
+
+  ; O2: global.get __tls_base
+  ; O2-NEXT: i32.const tls
+  ; O2-NEXT: i32.add
+  ; O2-NEXT: return
+  ret i32* @tls
+}
+
+; O0-LABEL: tls_load
+; O2-LABEL: tls_load
+define i32 @tls_load() {
+  ; O0: i32.const tls
+  ; O0-NEXT: global.get __tls_base
+  ; O0-NEXT: i32.add
+  ; O0-NEXT: i32.load 0
+  ; O0-NEXT: return
+
+  ; O2: global.get __tls_base
+  ; O2-NEXT: i32.const tls
+  ; O2-NEXT: i32.add
+  ; O2-NEXT: i32.load 0
+  ; O2-NEXT: return
+  %tmp = load i32, i32* @tls, align 4
+  ret i32 %tmp
+}
+
+; O0-LABEL: tls_store
+; O2-LABEL: tls_store
+define void @tls_store(i32 %x) {
+  ; O0: i32.const tls
+  ; O0-NEXT: global.get __tls_base
+  ; O0-NEXT: i32.add
+  ; O0-NEXT: i32.store 0
+  ; O0-NEXT: return
+
+  ; O2: global.get __tls_base
+  ; O2-NEXT: i32.const tls
+  ; O2-NEXT: i32.add
+  ; O2-NEXT: i32.store 0
+  ; O2-NEXT: return
+  store i32 %x, i32* @tls, align 4
+  ret void
+}
+
+; CHECK: .type tls,@object
+; CHECK-NEXT: