[PATCH] D74531: [WebAssembly] Emit clangast in custom section aligned by 4 bytes

2021-10-19 Thread Sam Clegg via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG1813fde9cc0b: [WebAssembly] Emit clangast in custom section 
aligned by 4 bytes (authored by kateinoigakukun, committed by sbc100).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D74531/new/

https://reviews.llvm.org/D74531

Files:
  clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
  clang/test/PCH/pch-wasm.c
  llvm/lib/MC/WasmObjectWriter.cpp
  llvm/test/MC/WebAssembly/custom-section-alignment.ll

Index: llvm/test/MC/WebAssembly/custom-section-alignment.ll
===
--- /dev/null
+++ llvm/test/MC/WebAssembly/custom-section-alignment.ll
@@ -0,0 +1,10 @@
+; RUN: llc -filetype=obj %s -o - | od -t x1 -v | FileCheck %s
+
+target triple = "wasm32-unknown-unknown"
+
+!0 = !{ !"before", !"\de\ad\be\ef" }
+!1 = !{ !"__clangast", !"\fe\ed\fa\ce" }
+!wasm.custom_sections = !{ !0, !1 }
+
+; Ensure that __clangast content is aligned by 4 bytes
+; CHECK: {{(([0-9a-f]{2} ){4})*}}fe ed fa ce
Index: llvm/lib/MC/WasmObjectWriter.cpp
===
--- llvm/lib/MC/WasmObjectWriter.cpp
+++ llvm/lib/MC/WasmObjectWriter.cpp
@@ -292,6 +292,8 @@
 W->OS << Str;
   }
 
+  void writeStringWithAlignment(const StringRef Str, unsigned Alignment);
+
   void writeI32(int32_t val) {
 char Buffer[4];
 support::endian::write32le(Buffer, val);
@@ -362,6 +364,28 @@
   Section.Index = SectionCount++;
 }
 
+// Write a string with extra paddings for trailing alignment
+// TODO: support alignment at asm and llvm level?
+void WasmObjectWriter::writeStringWithAlignment(const StringRef Str,
+unsigned Alignment) {
+
+  // Calculate the encoded size of str length and add pads based on it and
+  // alignment.
+  raw_null_ostream NullOS;
+  uint64_t StrSizeLength = encodeULEB128(Str.size(), NullOS);
+  uint64_t Offset = W->OS.tell() + StrSizeLength + Str.size();
+  uint64_t Paddings = offsetToAlignment(Offset, Align(Alignment));
+  Offset += Paddings;
+
+  // LEB128 greater than 5 bytes is invalid
+  assert((StrSizeLength + Paddings) <= 5 && "too long string to align");
+
+  encodeSLEB128(Str.size(), W->OS, StrSizeLength + Paddings);
+  W->OS << Str;
+
+  assert(W->OS.tell() == Offset && "invalid padding");
+}
+
 void WasmObjectWriter::startCustomSection(SectionBookkeeping &Section,
   StringRef Name) {
   LLVM_DEBUG(dbgs() << "startCustomSection " << Name << "\n");
@@ -371,7 +395,12 @@
   Section.PayloadOffset = W->OS.tell();
 
   // Custom sections in wasm also have a string identifier.
-  writeString(Name);
+  if (Name != "__clangast") {
+writeString(Name);
+  } else {
+// The on-disk hashtable in clangast needs to be aligned by 4 bytes.
+writeStringWithAlignment(Name, 4);
+  }
 
   // The position where the custom section starts.
   Section.ContentsOffset = W->OS.tell();
Index: clang/test/PCH/pch-wasm.c
===
--- /dev/null
+++ clang/test/PCH/pch-wasm.c
@@ -0,0 +1,7 @@
+// REQUIRES: webassembly-registered-target
+// RUN: %clang_cc1 -triple wasm32-unknown-unknown-wasm -emit-pch -fmodule-format=obj %S/pchpch1.h -o - | llvm-objdump --section-headers - | FileCheck %s
+
+// Ensure that clangast section should be emitted in a section for wasm object file
+
+// CHECK: file format wasm
+// CHECK: __clangast   {{[0-9a-f]+}} {{[0-9a-f]+}}
Index: clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
===
--- clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
+++ clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
@@ -270,25 +270,42 @@
 assert(Buffer->IsComplete && "serialization did not complete");
 auto &SerializedAST = Buffer->Data;
 auto Size = SerializedAST.size();
-auto Int8Ty = llvm::Type::getInt8Ty(*VMContext);
-auto *Ty = llvm::ArrayType::get(Int8Ty, Size);
-auto *Data = llvm::ConstantDataArray::getString(
-*VMContext, StringRef(SerializedAST.data(), Size),
-/*AddNull=*/false);
-auto *ASTSym = new llvm::GlobalVariable(
-*M, Ty, /*constant*/ true, llvm::GlobalVariable::InternalLinkage, Data,
-"__clang_ast");
-// The on-disk hashtable needs to be aligned.
-ASTSym->setAlignment(llvm::Align(8));
-
-// Mach-O also needs a segment name.
-if (Triple.isOSBinFormatMachO())
-  ASTSym->setSection("__CLANG,__clangast");
-// COFF has an eight character length limit.
-else if (Triple.isOSBinFormatCOFF())
-  ASTSym->setSection("clangast");
-else
-  ASTSym->setSection("__clangast");
+
+if (Triple.isOSBinFormatWasm()) {
+  // Emit __clangast in custom section instead of named data segment
+  // to find it while iterating sections.
+  // This could be avoided if al

[PATCH] D74531: [WebAssembly] Emit clangast in custom section aligned by 4 bytes

2021-10-19 Thread Yuta Saito via Phabricator via cfe-commits
kateinoigakukun added a comment.

Thanks for reviewing! Can you merge this patch? I have no commit access.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D74531/new/

https://reviews.llvm.org/D74531

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D74531: [WebAssembly] Emit clangast in custom section aligned by 4 bytes

2021-10-18 Thread Yuta Saito via Phabricator via cfe-commits
kateinoigakukun updated this revision to Diff 380554.

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D74531/new/

https://reviews.llvm.org/D74531

Files:
  clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
  clang/test/PCH/pch-wasm.c
  llvm/lib/MC/WasmObjectWriter.cpp
  llvm/test/MC/WebAssembly/custom-section-alignment.ll

Index: llvm/test/MC/WebAssembly/custom-section-alignment.ll
===
--- /dev/null
+++ llvm/test/MC/WebAssembly/custom-section-alignment.ll
@@ -0,0 +1,10 @@
+; RUN: llc -filetype=obj %s -o - | od -t x1 -v | FileCheck %s
+
+target triple = "wasm32-unknown-unknown"
+
+!0 = !{ !"before", !"\de\ad\be\ef" }
+!1 = !{ !"__clangast", !"\fe\ed\fa\ce" }
+!wasm.custom_sections = !{ !0, !1 }
+
+; Ensure that __clangast content is aligned by 4 bytes
+; CHECK: {{(([0-9a-f]{2} ){4})*}}fe ed fa ce
Index: llvm/lib/MC/WasmObjectWriter.cpp
===
--- llvm/lib/MC/WasmObjectWriter.cpp
+++ llvm/lib/MC/WasmObjectWriter.cpp
@@ -292,6 +292,8 @@
 W->OS << Str;
   }
 
+  void writeStringWithAlignment(const StringRef Str, unsigned Alignment);
+
   void writeI32(int32_t val) {
 char Buffer[4];
 support::endian::write32le(Buffer, val);
@@ -362,6 +364,28 @@
   Section.Index = SectionCount++;
 }
 
+// Write a string with extra paddings for trailing alignment
+// TODO: support alignment at asm and llvm level?
+void WasmObjectWriter::writeStringWithAlignment(const StringRef Str,
+unsigned Alignment) {
+
+  // Calculate the encoded size of str length and add pads based on it and
+  // alignment.
+  raw_null_ostream NullOS;
+  uint64_t StrSizeLength = encodeULEB128(Str.size(), NullOS);
+  uint64_t Offset = W->OS.tell() + StrSizeLength + Str.size();
+  uint64_t Paddings = offsetToAlignment(Offset, Align(Alignment));
+  Offset += Paddings;
+
+  // LEB128 greater than 5 bytes is invalid
+  assert((StrSizeLength + Paddings) <= 5 && "too long string to align");
+
+  encodeSLEB128(Str.size(), W->OS, StrSizeLength + Paddings);
+  W->OS << Str;
+
+  assert(W->OS.tell() == Offset && "invalid padding");
+}
+
 void WasmObjectWriter::startCustomSection(SectionBookkeeping &Section,
   StringRef Name) {
   LLVM_DEBUG(dbgs() << "startCustomSection " << Name << "\n");
@@ -371,7 +395,12 @@
   Section.PayloadOffset = W->OS.tell();
 
   // Custom sections in wasm also have a string identifier.
-  writeString(Name);
+  if (Name != "__clangast") {
+writeString(Name);
+  } else {
+// The on-disk hashtable in clangast needs to be aligned by 4 bytes.
+writeStringWithAlignment(Name, 4);
+  }
 
   // The position where the custom section starts.
   Section.ContentsOffset = W->OS.tell();
Index: clang/test/PCH/pch-wasm.c
===
--- /dev/null
+++ clang/test/PCH/pch-wasm.c
@@ -0,0 +1,7 @@
+// REQUIRES: webassembly-registered-target
+// RUN: %clang_cc1 -triple wasm32-unknown-unknown-wasm -emit-pch -fmodule-format=obj %S/pchpch1.h -o - | llvm-objdump --section-headers - | FileCheck %s
+
+// Ensure that clangast section should be emitted in a section for wasm object file
+
+// CHECK: file format wasm
+// CHECK: __clangast   {{[0-9a-f]+}} {{[0-9a-f]+}}
Index: clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
===
--- clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
+++ clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
@@ -270,25 +270,42 @@
 assert(Buffer->IsComplete && "serialization did not complete");
 auto &SerializedAST = Buffer->Data;
 auto Size = SerializedAST.size();
-auto Int8Ty = llvm::Type::getInt8Ty(*VMContext);
-auto *Ty = llvm::ArrayType::get(Int8Ty, Size);
-auto *Data = llvm::ConstantDataArray::getString(
-*VMContext, StringRef(SerializedAST.data(), Size),
-/*AddNull=*/false);
-auto *ASTSym = new llvm::GlobalVariable(
-*M, Ty, /*constant*/ true, llvm::GlobalVariable::InternalLinkage, Data,
-"__clang_ast");
-// The on-disk hashtable needs to be aligned.
-ASTSym->setAlignment(llvm::Align(8));
-
-// Mach-O also needs a segment name.
-if (Triple.isOSBinFormatMachO())
-  ASTSym->setSection("__CLANG,__clangast");
-// COFF has an eight character length limit.
-else if (Triple.isOSBinFormatCOFF())
-  ASTSym->setSection("clangast");
-else
-  ASTSym->setSection("__clangast");
+
+if (Triple.isOSBinFormatWasm()) {
+  // Emit __clangast in custom section instead of named data segment
+  // to find it while iterating sections.
+  // This could be avoided if all data segements (the wasm sense) were
+  // represented as their own sections (in the llvm sense).
+  // TODO: https://github.com/WebAssembly/tool-conventions/iss

[PATCH] D74531: [WebAssembly] Emit clangast in custom section aligned by 4 bytes

2021-10-18 Thread Yuta Saito via Phabricator via cfe-commits
kateinoigakukun updated this revision to Diff 380551.
kateinoigakukun added a comment.

Thank you for taking a look again. OK, I extracted `writeStringWithAlignment` 
and updated comment lines.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D74531/new/

https://reviews.llvm.org/D74531

Files:
  clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
  clang/test/PCH/pch-wasm.c
  llvm/lib/MC/WasmObjectWriter.cpp
  llvm/test/MC/WebAssembly/custom-section-alignment.ll

Index: llvm/test/MC/WebAssembly/custom-section-alignment.ll
===
--- /dev/null
+++ llvm/test/MC/WebAssembly/custom-section-alignment.ll
@@ -0,0 +1,10 @@
+; RUN: llc -filetype=obj %s -o - | od -t x1 -v | FileCheck %s
+
+target triple = "wasm32-unknown-unknown"
+
+!0 = !{ !"before", !"\de\ad\be\ef" }
+!1 = !{ !"__clangast", !"\fe\ed\fa\ce" }
+!wasm.custom_sections = !{ !0, !1 }
+
+; Ensure that __clangast content is aligned by 4 bytes
+; CHECK: {{(([0-9a-f]{2} ){4})*}}fe ed fa ce
Index: llvm/lib/MC/WasmObjectWriter.cpp
===
--- llvm/lib/MC/WasmObjectWriter.cpp
+++ llvm/lib/MC/WasmObjectWriter.cpp
@@ -292,6 +292,8 @@
 W->OS << Str;
   }
 
+  void writeStringWithAlignment(const StringRef Str, unsigned Alignment);
+
   void writeI32(int32_t val) {
 char Buffer[4];
 support::endian::write32le(Buffer, val);
@@ -362,6 +364,28 @@
   Section.Index = SectionCount++;
 }
 
+// Write a string with extra paddings for trailing alignment
+// TODO: support alignment at asm and llvm level?
+void WasmObjectWriter::writeStringWithAlignment(const StringRef Str,
+unsigned Alignment) {
+
+  // Calculate the encoded size of str length and add pads based on it and
+  // alignment.
+  raw_null_ostream NullOS;
+  uint64_t StrSizeLength = encodeULEB128(Str.size(), NullOS);
+  uint64_t Offset = W->OS.tell() + StrSizeLength + Str.size();
+  uint64_t Paddings = offsetToAlignment(Offset, Align(Alignment));
+  Offset += Paddings;
+
+  // LEB128 greater than 5 bytes is invalid
+  assert((StrSizeLength + Paddings) <= 5 && "too long section name to align");
+
+  encodeSLEB128(Str.size(), W->OS, StrSizeLength + Paddings);
+  W->OS << Str;
+
+  assert(W->OS.tell() == Offset && "invalid padding");
+}
+
 void WasmObjectWriter::startCustomSection(SectionBookkeeping &Section,
   StringRef Name) {
   LLVM_DEBUG(dbgs() << "startCustomSection " << Name << "\n");
@@ -371,7 +395,12 @@
   Section.PayloadOffset = W->OS.tell();
 
   // Custom sections in wasm also have a string identifier.
-  writeString(Name);
+  if (Name != "__clangast") {
+writeString(Name);
+  } else {
+// The on-disk hashtable in clangast needs to be aligned by 4 bytes.
+writeStringWithAlignment(Name, 4);
+  }
 
   // The position where the custom section starts.
   Section.ContentsOffset = W->OS.tell();
Index: clang/test/PCH/pch-wasm.c
===
--- /dev/null
+++ clang/test/PCH/pch-wasm.c
@@ -0,0 +1,7 @@
+// REQUIRES: webassembly-registered-target
+// RUN: %clang_cc1 -triple wasm32-unknown-unknown-wasm -emit-pch -fmodule-format=obj %S/pchpch1.h -o - | llvm-objdump --section-headers - | FileCheck %s
+
+// Ensure that clangast section should be emitted in a section for wasm object file
+
+// CHECK: file format wasm
+// CHECK: __clangast   {{[0-9a-f]+}} {{[0-9a-f]+}}
Index: clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
===
--- clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
+++ clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
@@ -270,25 +270,42 @@
 assert(Buffer->IsComplete && "serialization did not complete");
 auto &SerializedAST = Buffer->Data;
 auto Size = SerializedAST.size();
-auto Int8Ty = llvm::Type::getInt8Ty(*VMContext);
-auto *Ty = llvm::ArrayType::get(Int8Ty, Size);
-auto *Data = llvm::ConstantDataArray::getString(
-*VMContext, StringRef(SerializedAST.data(), Size),
-/*AddNull=*/false);
-auto *ASTSym = new llvm::GlobalVariable(
-*M, Ty, /*constant*/ true, llvm::GlobalVariable::InternalLinkage, Data,
-"__clang_ast");
-// The on-disk hashtable needs to be aligned.
-ASTSym->setAlignment(llvm::Align(8));
-
-// Mach-O also needs a segment name.
-if (Triple.isOSBinFormatMachO())
-  ASTSym->setSection("__CLANG,__clangast");
-// COFF has an eight character length limit.
-else if (Triple.isOSBinFormatCOFF())
-  ASTSym->setSection("clangast");
-else
-  ASTSym->setSection("__clangast");
+
+if (Triple.isOSBinFormatWasm()) {
+  // Emit __clangast in custom section instead of named data segment
+  // to find it while iterating sections.
+  // This could be avoided if all data segements (the wa

[PATCH] D74531: [WebAssembly] Emit clangast in custom section aligned by 4 bytes

2021-10-18 Thread Sam Clegg via Phabricator via cfe-commits
sbc100 added inline comments.



Comment at: llvm/lib/MC/WasmObjectWriter.cpp:374
+  // Custom sections in wasm also have a string identifier with extra paddings
+  // for alignment for special sections.
+  // TODO: support section alignment at asm and llvm level?

I would leave the first sentence alone which that applies to all custom 
sections.The second part of this sentence (the new part) is maybe not 
needed since you have a comment already on line 378.

Also, how about keeping the simpler form in the default case, and putting this 
new code in a new method. e.g.:

```
if (Name != "__clangast") {
  writeString(Name);
} else {
  writeStringWithAlignment(Name, 4);
}
```

Then the entire new method can be marked with the TODO to remove it if/when we 
change the section mapping.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D74531/new/

https://reviews.llvm.org/D74531

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D74531: [WebAssembly] Emit clangast in custom section aligned by 4 bytes

2021-10-17 Thread Yuta Saito via Phabricator via cfe-commits
kateinoigakukun updated this revision to Diff 380283.

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D74531/new/

https://reviews.llvm.org/D74531

Files:
  clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
  clang/test/PCH/pch-wasm.c
  llvm/lib/MC/WasmObjectWriter.cpp
  llvm/test/MC/WebAssembly/custom-section-alignment.ll

Index: llvm/test/MC/WebAssembly/custom-section-alignment.ll
===
--- /dev/null
+++ llvm/test/MC/WebAssembly/custom-section-alignment.ll
@@ -0,0 +1,10 @@
+; RUN: llc -filetype=obj %s -o - | od -t x1 -v | FileCheck %s
+
+target triple = "wasm32-unknown-unknown"
+
+!0 = !{ !"before", !"\de\ad\be\ef" }
+!1 = !{ !"__clangast", !"\fe\ed\fa\ce" }
+!wasm.custom_sections = !{ !0, !1 }
+
+; Ensure that __clangast content is aligned by 4 bytes
+; CHECK: {{(([0-9a-f]{2} ){4})*}}fe ed fa ce
Index: llvm/lib/MC/WasmObjectWriter.cpp
===
--- llvm/lib/MC/WasmObjectWriter.cpp
+++ llvm/lib/MC/WasmObjectWriter.cpp
@@ -370,11 +370,33 @@
   // The position where the section header ends, for measuring its size.
   Section.PayloadOffset = W->OS.tell();
 
-  // Custom sections in wasm also have a string identifier.
-  writeString(Name);
+  // Custom sections in wasm also have a string identifier with extra paddings
+  // for alignment for special sections.
+  // TODO: support section alignment at asm and llvm level?
+  unsigned Alignment = 1;
+
+  // The on-disk hashtable in clangast needs to be aligned by 4 bytes.
+  if (Name == "__clangast")
+Alignment = 4;
+
+  // Calculate the encoded size of name length and add pads based on it and
+  // alignment.
+  raw_null_ostream NullOS;
+  uint64_t NameSizeLength = encodeULEB128(Name.size(), NullOS);
+  uint64_t ContentsOffset =
+  Section.PayloadOffset + NameSizeLength + Name.size();
+  uint64_t Paddings = offsetToAlignment(ContentsOffset, Align(Alignment));
+  ContentsOffset += Paddings;
+
+  // LEB128 greater than 5 bytes is invalid
+  assert((NameSizeLength + Paddings) <= 5 && "too long section name to align");
+
+  encodeSLEB128(Name.size(), W->OS, NameSizeLength + Paddings);
+  W->OS << Name;
 
   // The position where the custom section starts.
   Section.ContentsOffset = W->OS.tell();
+  assert(Section.ContentsOffset == ContentsOffset && "invalid padding");
 }
 
 // Now that the section is complete and we know how big it is, patch up the
Index: clang/test/PCH/pch-wasm.c
===
--- /dev/null
+++ clang/test/PCH/pch-wasm.c
@@ -0,0 +1,7 @@
+// REQUIRES: webassembly-registered-target
+// RUN: %clang_cc1 -triple wasm32-unknown-unknown-wasm -emit-pch -fmodule-format=obj %S/pchpch1.h -o - | llvm-objdump --section-headers - | FileCheck %s
+
+// Ensure that clangast section should be emitted in a section for wasm object file
+
+// CHECK: file format wasm
+// CHECK: __clangast   {{[0-9a-f]+}} {{[0-9a-f]+}}
Index: clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
===
--- clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
+++ clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
@@ -270,25 +270,42 @@
 assert(Buffer->IsComplete && "serialization did not complete");
 auto &SerializedAST = Buffer->Data;
 auto Size = SerializedAST.size();
-auto Int8Ty = llvm::Type::getInt8Ty(*VMContext);
-auto *Ty = llvm::ArrayType::get(Int8Ty, Size);
-auto *Data = llvm::ConstantDataArray::getString(
-*VMContext, StringRef(SerializedAST.data(), Size),
-/*AddNull=*/false);
-auto *ASTSym = new llvm::GlobalVariable(
-*M, Ty, /*constant*/ true, llvm::GlobalVariable::InternalLinkage, Data,
-"__clang_ast");
-// The on-disk hashtable needs to be aligned.
-ASTSym->setAlignment(llvm::Align(8));
-
-// Mach-O also needs a segment name.
-if (Triple.isOSBinFormatMachO())
-  ASTSym->setSection("__CLANG,__clangast");
-// COFF has an eight character length limit.
-else if (Triple.isOSBinFormatCOFF())
-  ASTSym->setSection("clangast");
-else
-  ASTSym->setSection("__clangast");
+
+if (Triple.isOSBinFormatWasm()) {
+  // Emit __clangast in custom section instead of named data segment
+  // to find it while iterating sections.
+  // This could be avoided if all data segements (the wasm sense) were
+  // represented as their own sections (in the llvm sense).
+  // TODO: https://github.com/WebAssembly/tool-conventions/issues/138
+  llvm::NamedMDNode *MD =
+  M->getOrInsertNamedMetadata("wasm.custom_sections");
+  llvm::Metadata *Ops[2] = {
+  llvm::MDString::get(*VMContext, "__clangast"),
+  llvm::MDString::get(*VMContext,
+  StringRef(SerializedAST.data(), Size))};
+  auto *NameAndContent = llvm::MDTuple::get(*VMCo

[PATCH] D74531: [WebAssembly] Emit clangast in custom section aligned by 4 bytes

2021-10-17 Thread Yuta Saito via Phabricator via cfe-commits
kateinoigakukun updated this revision to Diff 380282.
kateinoigakukun retitled this revision from " [WebAssembly] Emit PCH 
__clang_ast in custom section" to "[WebAssembly] Emit clangast in custom 
section aligned by 4 bytes".
kateinoigakukun edited the summary of this revision.
kateinoigakukun added a comment.
Herald added subscribers: cfe-commits, ecnelises.
Herald added a project: clang.

I'm sorry to keep you waiting. I updated to reduce hacks in backend to emit 
clangast in custom section and move it to clang. 
And emit paddings for alignment to fit into 5 bytes.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D74531/new/

https://reviews.llvm.org/D74531

Files:
  clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
  clang/test/PCH/pch-wasm.c
  llvm/lib/MC/WasmObjectWriter.cpp
  llvm/test/MC/WebAssembly/custom-section-alignment.ll

Index: llvm/test/MC/WebAssembly/custom-section-alignment.ll
===
--- /dev/null
+++ llvm/test/MC/WebAssembly/custom-section-alignment.ll
@@ -0,0 +1,10 @@
+; RUN: llc -filetype=obj %s -o - | od -t x1 -v | FileCheck %s
+
+target triple = "wasm32-unknown-unknown"
+
+!0 = !{ !"before", !"\de\ad\be\ef" }
+!1 = !{ !"__clangast", !"\fe\ed\fa\ce" }
+!wasm.custom_sections = !{ !0, !1 }
+
+; Ensure that __clangast content is aligned by 4 bytes
+; CHECK: {{(([0-9a-f]{2} ){4})*}}fe ed fa ce
Index: llvm/lib/MC/WasmObjectWriter.cpp
===
--- llvm/lib/MC/WasmObjectWriter.cpp
+++ llvm/lib/MC/WasmObjectWriter.cpp
@@ -370,11 +370,31 @@
   // The position where the section header ends, for measuring its size.
   Section.PayloadOffset = W->OS.tell();
 
-  // Custom sections in wasm also have a string identifier.
-  writeString(Name);
+  // Custom sections in wasm also have a string identifier with extra paddings for alignment
+  // for special sections.
+  // TODO: support section alignment at asm and llvm level?
+  unsigned Alignment = 1;
+
+  // The on-disk hashtable in clangast needs to be aligned by 4 bytes.
+  if (Name == "__clangast")
+Alignment = 4;
+
+  // Calculate the encoded size of name length and add pads based on it and alignment.
+  raw_null_ostream NullOS;
+  uint64_t NameSizeLength = encodeULEB128(Name.size(), NullOS);
+  uint64_t ContentsOffset = Section.PayloadOffset + NameSizeLength + Name.size();
+  uint64_t Paddings = offsetToAlignment(ContentsOffset, Align(Alignment));
+  ContentsOffset += Paddings;
+
+  // LEB128 greater than 5 bytes is invalid
+  assert((NameSizeLength + Paddings) <= 5 && "too long section name to align");
+
+  encodeSLEB128(Name.size(), W->OS, NameSizeLength + Paddings);
+  W->OS << Name;
 
   // The position where the custom section starts.
   Section.ContentsOffset = W->OS.tell();
+  assert(Section.ContentsOffset == ContentsOffset && "invalid padding");
 }
 
 // Now that the section is complete and we know how big it is, patch up the
Index: clang/test/PCH/pch-wasm.c
===
--- /dev/null
+++ clang/test/PCH/pch-wasm.c
@@ -0,0 +1,7 @@
+// REQUIRES: webassembly-registered-target
+// RUN: %clang_cc1 -triple wasm32-unknown-unknown-wasm -emit-pch -fmodule-format=obj %S/pchpch1.h -o - | llvm-objdump --section-headers - | FileCheck %s
+
+// Ensure that clangast section should be emitted in a section for wasm object file
+
+// CHECK: file format wasm
+// CHECK: __clangast   {{[0-9a-f]+}} {{[0-9a-f]+}}
Index: clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
===
--- clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
+++ clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
@@ -270,25 +270,39 @@
 assert(Buffer->IsComplete && "serialization did not complete");
 auto &SerializedAST = Buffer->Data;
 auto Size = SerializedAST.size();
-auto Int8Ty = llvm::Type::getInt8Ty(*VMContext);
-auto *Ty = llvm::ArrayType::get(Int8Ty, Size);
-auto *Data = llvm::ConstantDataArray::getString(
-*VMContext, StringRef(SerializedAST.data(), Size),
-/*AddNull=*/false);
-auto *ASTSym = new llvm::GlobalVariable(
-*M, Ty, /*constant*/ true, llvm::GlobalVariable::InternalLinkage, Data,
-"__clang_ast");
-// The on-disk hashtable needs to be aligned.
-ASTSym->setAlignment(llvm::Align(8));
-
-// Mach-O also needs a segment name.
-if (Triple.isOSBinFormatMachO())
-  ASTSym->setSection("__CLANG,__clangast");
-// COFF has an eight character length limit.
-else if (Triple.isOSBinFormatCOFF())
-  ASTSym->setSection("clangast");
-else
-  ASTSym->setSection("__clangast");
+
+if (Triple.isOSBinFormatWasm()) {
+  // Emit __clangast in custom section instead of named data segment
+  // to find it while iterating sections.
+  // This could be avoided if all data segements (the wasm