[Lldb-commits] [PATCH] D158470: [lldb] Add support for recognizing swift mangled names

2023-08-22 Thread Alex Langford via Phabricator via lldb-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rGedc64d49de92: [lldb] Add support for recognizing swift 
mangled names (authored by bulbazord).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D158470/new/

https://reviews.llvm.org/D158470

Files:
  lldb/include/lldb/Core/Mangled.h
  lldb/source/Core/Mangled.cpp
  lldb/unittests/Core/MangledTest.cpp


Index: lldb/unittests/Core/MangledTest.cpp
===
--- lldb/unittests/Core/MangledTest.cpp
+++ lldb/unittests/Core/MangledTest.cpp
@@ -89,6 +89,24 @@
   EXPECT_STREQ("", the_demangled.GetCString());
 }
 
+TEST(MangledTest, RecognizeSwiftMangledNames) {
+  llvm::StringRef valid_swift_mangled_names[] = {
+  "_TtC4main7MyClass",   // Mangled objc class name
+  "_TtP4main3Foo_",  // Mangld objc protocol name
+  "$s4main3BarCACycfC",  // Mangled name
+  "_$s4main3BarCACycfC", // Mangled name with leading underscore
+  "$S4main3BarCACycfC",  // Older swift mangled name
+  "_$S4main3BarCACycfC", // Older swift mangled name
+ // with leading underscore
+  // Mangled swift filename
+  "@__swiftmacro_4main16FunVariableNames9OptionSetfMm_.swift",
+  };
+
+  for (llvm::StringRef mangled : valid_swift_mangled_names)
+EXPECT_EQ(Mangled::GetManglingScheme(mangled),
+  Mangled::eManglingSchemeSwift);
+}
+
 TEST(MangledTest, BoolConversionOperator) {
   {
 ConstString MangledName("_ZN1a1b1cIiiiEEvm");
Index: lldb/source/Core/Mangled.cpp
===
--- lldb/source/Core/Mangled.cpp
+++ lldb/source/Core/Mangled.cpp
@@ -58,6 +58,24 @@
   if (name.startswith("___Z"))
 return Mangled::eManglingSchemeItanium;
 
+  // Swift's older style of mangling used "_T" as a mangling prefix. This can
+  // lead to false positives with other symbols that just so happen to start
+  // with "_T". To minimize the chance of that happening, we only return true
+  // for select old-style swift mangled names. The known cases are ObjC classes
+  // and protocols. Classes are either prefixed with "_TtC" or "_TtGC".
+  // Protocols are prefixed with "_TtP".
+  if (name.startswith("_TtC") || name.startswith("_TtGC") ||
+  name.startswith("_TtP"))
+return Mangled::eManglingSchemeSwift;
+
+  // Swift 4.2 used "$S" and "_$S".
+  // Swift 5 and onward uses "$s" and "_$s".
+  // Swift also uses "@__swiftmacro_" as a prefix for mangling filenames.
+  if (name.startswith("$S") || name.startswith("_$S") ||
+  name.startswith("$s") || name.startswith("_$s") ||
+  name.startswith("@__swiftmacro_"))
+return Mangled::eManglingSchemeSwift;
+
   return Mangled::eManglingSchemeNone;
 }
 
@@ -228,6 +246,7 @@
 
   case eManglingSchemeRustV0:
   case eManglingSchemeD:
+  case eManglingSchemeSwift:
 // Rich demangling scheme is not supported
 return false;
   }
@@ -265,6 +284,10 @@
   case eManglingSchemeD:
 demangled_name = GetDLangDemangledStr(m_mangled);
 break;
+  case eManglingSchemeSwift:
+// Demangling a swift name requires the swift compiler. This is
+// explicitly unsupported on llvm.org.
+break;
   case eManglingSchemeNone:
 llvm_unreachable("eManglingSchemeNone was handled already");
   }
Index: lldb/include/lldb/Core/Mangled.h
===
--- lldb/include/lldb/Core/Mangled.h
+++ lldb/include/lldb/Core/Mangled.h
@@ -43,7 +43,8 @@
 eManglingSchemeMSVC,
 eManglingSchemeItanium,
 eManglingSchemeRustV0,
-eManglingSchemeD
+eManglingSchemeD,
+eManglingSchemeSwift,
   };
 
   /// Default constructor.


Index: lldb/unittests/Core/MangledTest.cpp
===
--- lldb/unittests/Core/MangledTest.cpp
+++ lldb/unittests/Core/MangledTest.cpp
@@ -89,6 +89,24 @@
   EXPECT_STREQ("", the_demangled.GetCString());
 }
 
+TEST(MangledTest, RecognizeSwiftMangledNames) {
+  llvm::StringRef valid_swift_mangled_names[] = {
+  "_TtC4main7MyClass",   // Mangled objc class name
+  "_TtP4main3Foo_",  // Mangld objc protocol name
+  "$s4main3BarCACycfC",  // Mangled name
+  "_$s4main3BarCACycfC", // Mangled name with leading underscore
+  "$S4main3BarCACycfC",  // Older swift mangled name
+  "_$S4main3BarCACycfC", // Older swift mangled name
+ // with leading underscore
+  // Mangled swift filename
+  "@__swiftmacro_4main16FunVariableNames9OptionSetfMm_.swift",
+  };
+
+  for (llvm::StringRef mangled : valid_swift_mangled_names)
+EXPECT_EQ(Mangled::GetManglingScheme(mangled),
+  Mangled::eManglingSchemeSwift);
+}
+
 TEST(MangledTest, BoolConversionOperator) {
   {
 ConstString MangledName("_ZN1a1b1cIiiiEEvm");
Index: lldb/source/Core/Mangled.cpp

[Lldb-commits] [PATCH] D158470: [lldb] Add support for recognizing swift mangled names

2023-08-22 Thread Alex Langford via Phabricator via lldb-commits
bulbazord marked 4 inline comments as done.
bulbazord added inline comments.



Comment at: lldb/source/Core/Mangled.cpp:61
 
+  // Swift's older style of mangling used "_T" as a mangling prefix. This can
+  // lead to false positives with other symbols that just so happen to start

JDevlieghere wrote:
> aprantl wrote:
> > fdeazeve wrote:
> > > JDevlieghere wrote:
> > > > aprantl wrote:
> > > > > Feel free to completely ignore this, it's a pointless micro 
> > > > > optimization.
> > > > > 
> > > > > I'm curious if it would be more efficient to write this as 
> > > > > 
> > > > > ```
> > > > > switch (name[0]) {
> > > > >   case '?': return Mangled::eManglingSchemeMSVC;
> > > > >   case '_': 
> > > > >  switch(name[1]) {
> > > > > ...
> > > > >  }
> > > > >   case '$': 
> > > > >  switch(name[1]) {
> > > > > ...
> > > > >  }
> > > > > ```
> > > > > or if it would actually be slower than the chain of "vector" 
> > > > > comparisons we have right now?
> > > > I actually started writing a similar comment before discarding it. Even 
> > > > if this code is as hot as I expect it to be, I don't think it would 
> > > > outweigh the complexity and the potential for bugs. I really like how 
> > > > you can glance at the code and see the different mangling schemes and 
> > > > that's the first thing we'd lose. Anyway happy to be proven wrong too. 
> > > Honestly the optimizer is pretty good at doing those, look at the IR: 
> > > https://godbolt.org/z/PY3TeadbM
> > Sounds good, let's leave it!
> > 
> > Just trolling now: should we use some crazy #ifdef magic to flip the order 
> > based on the host platform, such the `?` comes first on windows and `$` 
> > comes first on Darwin?
> Did someone say tablegen? 
As much as I love micro-optimizations, let's do all of that in a follow-up. ;)


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D158470/new/

https://reviews.llvm.org/D158470

___
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits


[Lldb-commits] [PATCH] D158470: [lldb] Add support for recognizing swift mangled names

2023-08-22 Thread Jonas Devlieghere via Phabricator via lldb-commits
JDevlieghere added inline comments.



Comment at: lldb/source/Core/Mangled.cpp:61
 
+  // Swift's older style of mangling used "_T" as a mangling prefix. This can
+  // lead to false positives with other symbols that just so happen to start

aprantl wrote:
> fdeazeve wrote:
> > JDevlieghere wrote:
> > > aprantl wrote:
> > > > Feel free to completely ignore this, it's a pointless micro 
> > > > optimization.
> > > > 
> > > > I'm curious if it would be more efficient to write this as 
> > > > 
> > > > ```
> > > > switch (name[0]) {
> > > >   case '?': return Mangled::eManglingSchemeMSVC;
> > > >   case '_': 
> > > >  switch(name[1]) {
> > > > ...
> > > >  }
> > > >   case '$': 
> > > >  switch(name[1]) {
> > > > ...
> > > >  }
> > > > ```
> > > > or if it would actually be slower than the chain of "vector" 
> > > > comparisons we have right now?
> > > I actually started writing a similar comment before discarding it. Even 
> > > if this code is as hot as I expect it to be, I don't think it would 
> > > outweigh the complexity and the potential for bugs. I really like how you 
> > > can glance at the code and see the different mangling schemes and that's 
> > > the first thing we'd lose. Anyway happy to be proven wrong too. 
> > Honestly the optimizer is pretty good at doing those, look at the IR: 
> > https://godbolt.org/z/PY3TeadbM
> Sounds good, let's leave it!
> 
> Just trolling now: should we use some crazy #ifdef magic to flip the order 
> based on the host platform, such the `?` comes first on windows and `$` comes 
> first on Darwin?
Did someone say tablegen? 


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D158470/new/

https://reviews.llvm.org/D158470

___
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits


[Lldb-commits] [PATCH] D158470: [lldb] Add support for recognizing swift mangled names

2023-08-22 Thread Adrian Prantl via Phabricator via lldb-commits
aprantl added inline comments.



Comment at: lldb/source/Core/Mangled.cpp:61
 
+  // Swift's older style of mangling used "_T" as a mangling prefix. This can
+  // lead to false positives with other symbols that just so happen to start

fdeazeve wrote:
> JDevlieghere wrote:
> > aprantl wrote:
> > > Feel free to completely ignore this, it's a pointless micro optimization.
> > > 
> > > I'm curious if it would be more efficient to write this as 
> > > 
> > > ```
> > > switch (name[0]) {
> > >   case '?': return Mangled::eManglingSchemeMSVC;
> > >   case '_': 
> > >  switch(name[1]) {
> > > ...
> > >  }
> > >   case '$': 
> > >  switch(name[1]) {
> > > ...
> > >  }
> > > ```
> > > or if it would actually be slower than the chain of "vector" comparisons 
> > > we have right now?
> > I actually started writing a similar comment before discarding it. Even if 
> > this code is as hot as I expect it to be, I don't think it would outweigh 
> > the complexity and the potential for bugs. I really like how you can glance 
> > at the code and see the different mangling schemes and that's the first 
> > thing we'd lose. Anyway happy to be proven wrong too. 
> Honestly the optimizer is pretty good at doing those, look at the IR: 
> https://godbolt.org/z/PY3TeadbM
Sounds good, let's leave it!

Just trolling now: should we use some crazy #ifdef magic to flip the order 
based on the host platform, such the `?` comes first on windows and `$` comes 
first on Darwin?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D158470/new/

https://reviews.llvm.org/D158470

___
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits


[Lldb-commits] [PATCH] D158470: [lldb] Add support for recognizing swift mangled names

2023-08-22 Thread Felipe de Azevedo Piovezan via Phabricator via lldb-commits
fdeazeve added inline comments.



Comment at: lldb/source/Core/Mangled.cpp:61
 
+  // Swift's older style of mangling used "_T" as a mangling prefix. This can
+  // lead to false positives with other symbols that just so happen to start

JDevlieghere wrote:
> aprantl wrote:
> > Feel free to completely ignore this, it's a pointless micro optimization.
> > 
> > I'm curious if it would be more efficient to write this as 
> > 
> > ```
> > switch (name[0]) {
> >   case '?': return Mangled::eManglingSchemeMSVC;
> >   case '_': 
> >  switch(name[1]) {
> > ...
> >  }
> >   case '$': 
> >  switch(name[1]) {
> > ...
> >  }
> > ```
> > or if it would actually be slower than the chain of "vector" comparisons we 
> > have right now?
> I actually started writing a similar comment before discarding it. Even if 
> this code is as hot as I expect it to be, I don't think it would outweigh the 
> complexity and the potential for bugs. I really like how you can glance at 
> the code and see the different mangling schemes and that's the first thing 
> we'd lose. Anyway happy to be proven wrong too. 
Honestly the optimizer is pretty good at doing those, look at the IR: 
https://godbolt.org/z/PY3TeadbM


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D158470/new/

https://reviews.llvm.org/D158470

___
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits


[Lldb-commits] [PATCH] D158470: [lldb] Add support for recognizing swift mangled names

2023-08-22 Thread Jonas Devlieghere via Phabricator via lldb-commits
JDevlieghere added inline comments.



Comment at: lldb/source/Core/Mangled.cpp:61
 
+  // Swift's older style of mangling used "_T" as a mangling prefix. This can
+  // lead to false positives with other symbols that just so happen to start

aprantl wrote:
> Feel free to completely ignore this, it's a pointless micro optimization.
> 
> I'm curious if it would be more efficient to write this as 
> 
> ```
> switch (name[0]) {
>   case '?': return Mangled::eManglingSchemeMSVC;
>   case '_': 
>  switch(name[1]) {
> ...
>  }
>   case '$': 
>  switch(name[1]) {
> ...
>  }
> ```
> or if it would actually be slower than the chain of "vector" comparisons we 
> have right now?
I actually started writing a similar comment before discarding it. Even if this 
code is as hot as I expect it to be, I don't think it would outweigh the 
complexity and the potential for bugs. I really like how you can glance at the 
code and see the different mangling schemes and that's the first thing we'd 
lose. Anyway happy to be proven wrong too. 


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D158470/new/

https://reviews.llvm.org/D158470

___
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits


[Lldb-commits] [PATCH] D158470: [lldb] Add support for recognizing swift mangled names

2023-08-22 Thread Adrian Prantl via Phabricator via lldb-commits
aprantl added inline comments.



Comment at: lldb/source/Core/Mangled.cpp:61
 
+  // Swift's older style of mangling used "_T" as a mangling prefix. This can
+  // lead to false positives with other symbols that just so happen to start

Feel free to completely ignore this, it's a pointless micro optimization.

I'm curious if it would be more efficient to write this as 

```
switch (name[0]) {
  case '?': return Mangled::eManglingSchemeMSVC;
  case '_': 
 switch(name[1]) {
...
 }
  case '$': 
 switch(name[1]) {
...
 }
```
or if it would actually be slower than the chain of "vector" comparisons we 
have right now?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D158470/new/

https://reviews.llvm.org/D158470

___
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits


[Lldb-commits] [PATCH] D158470: [lldb] Add support for recognizing swift mangled names

2023-08-21 Thread Jonas Devlieghere via Phabricator via lldb-commits
JDevlieghere accepted this revision.
JDevlieghere added a comment.
This revision is now accepted and ready to land.

Given support for other languages like Rust and D, I think this is reasonable. 
LGTM.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D158470/new/

https://reviews.llvm.org/D158470

___
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits


[Lldb-commits] [PATCH] D158470: [lldb] Add support for recognizing swift mangled names

2023-08-21 Thread Alex Langford via Phabricator via lldb-commits
bulbazord created this revision.
bulbazord added reviewers: JDevlieghere, aprantl, mib, jingham, augusto2112, 
kastiglione, fdeazeve.
Herald added a project: All.
bulbazord requested review of this revision.
Herald added a project: LLDB.
Herald added a subscriber: lldb-commits.

Apple maintains a downstream fork of lldb in order to support swift
debugging. Much of that support is isolated to its own plugins, but some
of it is exposed in more generic code. I would like to take some of
the swift support we have downstream and move it upstream to llvm.org in
an effort to 1) reduce downstream maintenance burden, and 2) work
towards solidifying the process of adding new language suppor to llvm.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D158470

Files:
  lldb/include/lldb/Core/Mangled.h
  lldb/source/Core/Mangled.cpp
  lldb/unittests/Core/MangledTest.cpp


Index: lldb/unittests/Core/MangledTest.cpp
===
--- lldb/unittests/Core/MangledTest.cpp
+++ lldb/unittests/Core/MangledTest.cpp
@@ -89,6 +89,24 @@
   EXPECT_STREQ("", the_demangled.GetCString());
 }
 
+TEST(MangledTest, RecognizeSwiftMangledNames) {
+  llvm::StringRef valid_swift_mangled_names[] = {
+  "_TtC4main7MyClass",   // Mangled objc class name
+  "_TtP4main3Foo_",  // Mangld objc protocol name
+  "$s4main3BarCACycfC",  // Mangled name
+  "_$s4main3BarCACycfC", // Mangled name with leading underscore
+  "$S4main3BarCACycfC",  // Older swift mangled name
+  "_$S4main3BarCACycfC", // Older swift mangled name
+ // with leading underscore
+  // Mangled swift filename
+  "@__swiftmacro_4main16FunVariableNames9OptionSetfMm_.swift",
+  };
+
+  for (llvm::StringRef mangled : valid_swift_mangled_names)
+EXPECT_EQ(Mangled::GetManglingScheme(mangled),
+  Mangled::eManglingSchemeSwift);
+}
+
 TEST(MangledTest, BoolConversionOperator) {
   {
 ConstString MangledName("_ZN1a1b1cIiiiEEvm");
Index: lldb/source/Core/Mangled.cpp
===
--- lldb/source/Core/Mangled.cpp
+++ lldb/source/Core/Mangled.cpp
@@ -58,6 +58,24 @@
   if (name.startswith("___Z"))
 return Mangled::eManglingSchemeItanium;
 
+  // Swift's older style of mangling used "_T" as a mangling prefix. This can
+  // lead to false positives with other symbols that just so happen to start
+  // with "_T". To minimize the chance of that happening, we only return true
+  // for select old-style swift mangled names. The known cases are ObjC classes
+  // and protocols. Classes are either prefixed with "_TtC" or "_TtGC".
+  // Protocols are prefixed with "_TtP".
+  if (name.startswith("_TtC") || name.startswith("_TtGC") ||
+  name.startswith("_TtP"))
+return Mangled::eManglingSchemeSwift;
+
+  // Swift 4.2 used "$S" and "_$S".
+  // Swift 5 and onward uses "$s" and "_$s".
+  // Swift also uses "@__swiftmacro_" as a prefix for mangling filenames.
+  if (name.startswith("$S") || name.startswith("_$S") ||
+  name.startswith("$s") || name.startswith("_$s") ||
+  name.startswith("@__swiftmacro_"))
+return Mangled::eManglingSchemeSwift;
+
   return Mangled::eManglingSchemeNone;
 }
 
@@ -228,6 +246,7 @@
 
   case eManglingSchemeRustV0:
   case eManglingSchemeD:
+  case eManglingSchemeSwift:
 // Rich demangling scheme is not supported
 return false;
   }
@@ -265,6 +284,10 @@
   case eManglingSchemeD:
 demangled_name = GetDLangDemangledStr(m_mangled);
 break;
+  case eManglingSchemeSwift:
+// Demangling a swift name requires the swift compiler. This is
+// explicitly unsupported on llvm.org.
+break;
   case eManglingSchemeNone:
 llvm_unreachable("eManglingSchemeNone was handled already");
   }
Index: lldb/include/lldb/Core/Mangled.h
===
--- lldb/include/lldb/Core/Mangled.h
+++ lldb/include/lldb/Core/Mangled.h
@@ -43,7 +43,8 @@
 eManglingSchemeMSVC,
 eManglingSchemeItanium,
 eManglingSchemeRustV0,
-eManglingSchemeD
+eManglingSchemeD,
+eManglingSchemeSwift,
   };
 
   /// Default constructor.


Index: lldb/unittests/Core/MangledTest.cpp
===
--- lldb/unittests/Core/MangledTest.cpp
+++ lldb/unittests/Core/MangledTest.cpp
@@ -89,6 +89,24 @@
   EXPECT_STREQ("", the_demangled.GetCString());
 }
 
+TEST(MangledTest, RecognizeSwiftMangledNames) {
+  llvm::StringRef valid_swift_mangled_names[] = {
+  "_TtC4main7MyClass",   // Mangled objc class name
+  "_TtP4main3Foo_",  // Mangld objc protocol name
+  "$s4main3BarCACycfC",  // Mangled name
+  "_$s4main3BarCACycfC", // Mangled name with leading underscore
+  "$S4main3BarCACycfC",  // Older swift mangled name
+  "_$S4main3BarCACycfC", // Older swift mangled name
+