[PATCH] D112913: Misleading bidirectional detection

2022-01-12 Thread serge via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG35cca45b09b8: Misleading bidirectional detection (authored 
by serge-sans-paille).

Changed prior to commit:
  https://reviews.llvm.org/D112913?vs=398195=399273#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D112913/new/

https://reviews.llvm.org/D112913

Files:
  clang-tools-extra/clang-tidy/misc/CMakeLists.txt
  clang-tools-extra/clang-tidy/misc/MiscTidyModule.cpp
  clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.cpp
  clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.h
  clang-tools-extra/docs/ReleaseNotes.rst
  clang-tools-extra/docs/clang-tidy/checks/list.rst
  clang-tools-extra/docs/clang-tidy/checks/misc-misleading-bidirectional.rst
  clang-tools-extra/test/clang-tidy/checkers/misc-misleading-bidirectional.cpp

Index: clang-tools-extra/docs/clang-tidy/checks/misc-misleading-bidirectional.rst
===
--- /dev/null
+++ clang-tools-extra/docs/clang-tidy/checks/misc-misleading-bidirectional.rst
@@ -0,0 +1,21 @@
+.. title:: clang-tidy - misc-misleading-bidirectional
+
+misc-misleading-bidirectional
+=
+
+Warn about unterminated bidirectional unicode sequence, detecting potential attack
+as described in the `Trojan Source `_ attack.
+
+Example:
+
+.. code-block:: c++
+
+#include 
+
+int main() {
+bool isAdmin = false;
+/*‮ } ⁦if (isAdmin)⁩ ⁦ begin admins only */
+std::cout << "You are an admin.\n";
+/* end admins only ‮ { ⁦*/
+return 0;
+}
Index: clang-tools-extra/docs/clang-tidy/checks/list.rst
===
--- clang-tools-extra/docs/clang-tidy/checks/list.rst
+++ clang-tools-extra/docs/clang-tidy/checks/list.rst
@@ -212,7 +212,8 @@
`llvmlibc-implementation-in-namespace `_,
`llvmlibc-restrict-system-libc-headers `_, "Yes"
`misc-definitions-in-headers `_, "Yes"
-   `misc-misleading-identifier `_,
+   `misc-misleading-bidirectional `_,
+   `misc-misleading-identifier `_,
`misc-misplaced-const `_,
`misc-new-delete-overloads `_,
`misc-no-recursion `_,
Index: clang-tools-extra/docs/ReleaseNotes.rst
===
--- clang-tools-extra/docs/ReleaseNotes.rst
+++ clang-tools-extra/docs/ReleaseNotes.rst
@@ -127,6 +127,10 @@
   Reports identifiers whose names are too short. Currently checks local
   variables and function parameters only.
 
+- New :doc:`misc-misleading-bidirectional ` check.
+
+  Inspects string literal and comments for unterminated bidirectional Unicode
+  characters.
 
 New check aliases
 ^
Index: clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.h
===
--- /dev/null
+++ clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.h
@@ -0,0 +1,38 @@
+//===--- MisleadingBidirectionalCheck.h - clang-tidy *- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_MISLEADINGBIDIRECTIONALCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_MISLEADINGBIDIRECTIONALCHECK_H
+
+#include "../ClangTidyCheck.h"
+
+namespace clang {
+namespace tidy {
+namespace misc {
+
+class MisleadingBidirectionalCheck : public ClangTidyCheck {
+public:
+  MisleadingBidirectionalCheck(StringRef Name, ClangTidyContext *Context);
+  ~MisleadingBidirectionalCheck();
+
+  void registerPPCallbacks(const SourceManager , Preprocessor *PP,
+   Preprocessor *ModuleExpanderPP) override;
+
+  void registerMatchers(ast_matchers::MatchFinder *Finder) override;
+  void check(const ast_matchers::MatchFinder::MatchResult ) override;
+
+private:
+  class MisleadingBidirectionalHandler;
+  std::unique_ptr Handler;
+};
+
+} // namespace misc
+} // namespace tidy
+} // namespace clang
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_MISLEADINGBIDIRECTIONALCHECK_H
Index: clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.cpp
===
--- /dev/null
+++ clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.cpp
@@ -0,0 +1,139 @@
+//===--- MisleadingBidirectional.cpp - clang-tidy -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//

[PATCH] D112913: Misleading bidirectional detection

2022-01-11 Thread Fangrui Song via Phabricator via cfe-commits
MaskRay accepted this revision.
MaskRay added inline comments.
This revision is now accepted and ready to land.



Comment at: clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.cpp:16
+using namespace clang;
+
+static bool containsMisleadingBidi(StringRef Buffer,

Consider `using clang::tidy::misc::MisleadingBidirectionalCheck` to avoid 
specifying the long name repeatedly.



Comment at: clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.cpp:43
+  while (CurPtr < Buffer.end()) {
+++i;
+unsigned char C = *CurPtr;

`i` is unused



Comment at: clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.cpp:48
+  // Paragraph separator
+  if (C == 0xA || C == 0xD || (0x1C <= C && C <= 0x1E) || C == 0x85)
+BidiContexts.clear();

You may save the conditions to two booleans to avoid repeated 
`BidiContexts.clear();`



Comment at: clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.cpp:66
+
+// Open a PDF context
+if (CodePoint == RLO || CodePoint == RLE || CodePoint == LRO ||

A sentence should end with a period.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D112913/new/

https://reviews.llvm.org/D112913

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D112913: Misleading bidirectional detection

2022-01-11 Thread serge via Phabricator via cfe-commits
serge-sans-paille added a comment.

In D112913#3233699 , @upsuper wrote:

> I'd like to clarify that what I think is correct now is the algorithm to 
> detect unclosed explicit formatting scopes in a given string.

Thanks for confirming. This check only detects unterminated bidi sequence 
within comments and string literals. Its scope limits to that aspect.

> I haven't been following very closely with the whole spoofing issue, so I 
> can't say that there is no other ways to construct a spoof that this 
> algorithm is not designed to detect.

Agreed. FYI we already have a check for RTL characters ending identfiers, and a 
pending one for confusable identifiers

> As you have found, `RLM`, and `ALM` can be used to confuse code reader, but 
> they are not much different than a string with other strong RTL characters 
> inside, and I don't quite see how that can be linted without hurting 
> potentially legitimate code. Maybe if the compiler supports treating `LRM` as 
> whitespace (I'm not sure whether Clang does), a lint may be added to ask 
> wrapping any string with outermost strong characters being RTL in the form of 
> `{LRM}"string"{LRM}` so that the RTL characters don't affect outside. Other 
> than that, I don't think there is anyway to lint against such a confusion.

I agree that allowing `LRM` is a good step forward, and that's part of the 
official recommendation, but orthogonal to that review.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D112913/new/

https://reviews.llvm.org/D112913

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D112913: Misleading bidirectional detection

2022-01-11 Thread Xidorn Quan via Phabricator via cfe-commits
upsuper added a comment.

I'd like to clarify that what I think is correct now is the algorithm to detect 
unclosed explicit formatting scopes in a given string.

I haven't been following very closely with the whole spoofing issue, so I can't 
say that there is no other ways to construct a spoof that this algorithm is not 
designed to detect.

As you have found, `RLM`, and `ALM` can be used to confuse code reader, but 
they are not much different than a string with other strong RTL characters 
inside, and I don't quite see how that can be linted without hurting 
potentially legitimate code. Maybe if the compiler supports treating `LRM` as 
whitespace (I'm not sure whether Clang does), a lint may be added to ask 
wrapping any string with outermost strong characters being RTL in the form of 
`{LRM}"string"{LRM}` so that the RTL characters don't affect outside. Other 
than that, I don't think there is anyway to lint against such a confusion.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D112913/new/

https://reviews.llvm.org/D112913

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D112913: Misleading bidirectional detection

2022-01-11 Thread serge via Phabricator via cfe-commits
serge-sans-paille added a comment.

@MaskRay any blocker on that new version now that it recieved a green light 
from @upsuper?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D112913/new/

https://reviews.llvm.org/D112913

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D112913: Misleading bidirectional detection

2022-01-07 Thread serge via Phabricator via cfe-commits
serge-sans-paille updated this revision to Diff 398195.
serge-sans-paille added a comment.

rebased


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D112913/new/

https://reviews.llvm.org/D112913

Files:
  clang-tools-extra/clang-tidy/misc/CMakeLists.txt
  clang-tools-extra/clang-tidy/misc/MiscTidyModule.cpp
  clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.cpp
  clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.h
  clang-tools-extra/docs/ReleaseNotes.rst
  clang-tools-extra/docs/clang-tidy/checks/list.rst
  clang-tools-extra/docs/clang-tidy/checks/misc-misleading-bidirectional.rst
  clang-tools-extra/test/clang-tidy/checkers/misc-misleading-bidirectional.cpp

Index: clang-tools-extra/docs/clang-tidy/checks/misc-misleading-bidirectional.rst
===
--- /dev/null
+++ clang-tools-extra/docs/clang-tidy/checks/misc-misleading-bidirectional.rst
@@ -0,0 +1,21 @@
+.. title:: clang-tidy - misc-misleading-bidirectional
+
+misc-misleading-bidirectional
+=
+
+Warn about unterminated bidirectional unicode sequence, detecting potential attack
+as described in the `Trojan Source `_ attack.
+
+Example:
+
+.. code-block:: c++
+
+#include 
+
+int main() {
+bool isAdmin = false;
+/*‮ } ⁦if (isAdmin)⁩ ⁦ begin admins only */
+std::cout << "You are an admin.\n";
+/* end admins only ‮ { ⁦*/
+return 0;
+}
Index: clang-tools-extra/docs/clang-tidy/checks/list.rst
===
--- clang-tools-extra/docs/clang-tidy/checks/list.rst
+++ clang-tools-extra/docs/clang-tidy/checks/list.rst
@@ -212,7 +212,8 @@
`llvmlibc-implementation-in-namespace `_,
`llvmlibc-restrict-system-libc-headers `_, "Yes"
`misc-definitions-in-headers `_, "Yes"
-   `misc-misleading-identifier `_,
+   `misc-misleading-bidirectional `_,
+   `misc-misleading-identifier `_,
`misc-misplaced-const `_,
`misc-new-delete-overloads `_,
`misc-no-recursion `_,
Index: clang-tools-extra/docs/ReleaseNotes.rst
===
--- clang-tools-extra/docs/ReleaseNotes.rst
+++ clang-tools-extra/docs/ReleaseNotes.rst
@@ -123,6 +123,10 @@
   Reports identifiers whose names are too short. Currently checks local
   variables and function parameters only.
 
+- New :doc:`misc-misleading-bidirectional ` check.
+
+  Inspects string literal and comments for unterminated bidirectional Unicode
+  characters.
 
 New check aliases
 ^
Index: clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.h
===
--- /dev/null
+++ clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.h
@@ -0,0 +1,38 @@
+//===--- MisleadingBidirectionalCheck.h - clang-tidy *- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_MISLEADINGBIDIRECTIONALCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_MISLEADINGBIDIRECTIONALCHECK_H
+
+#include "../ClangTidyCheck.h"
+
+namespace clang {
+namespace tidy {
+namespace misc {
+
+class MisleadingBidirectionalCheck : public ClangTidyCheck {
+public:
+  MisleadingBidirectionalCheck(StringRef Name, ClangTidyContext *Context);
+  ~MisleadingBidirectionalCheck();
+
+  void registerPPCallbacks(const SourceManager , Preprocessor *PP,
+   Preprocessor *ModuleExpanderPP) override;
+
+  void registerMatchers(ast_matchers::MatchFinder *Finder) override;
+  void check(const ast_matchers::MatchFinder::MatchResult ) override;
+
+private:
+  class MisleadingBidirectionalHandler;
+  std::unique_ptr Handler;
+};
+
+} // namespace misc
+} // namespace tidy
+} // namespace clang
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_MISLEADINGBIDIRECTIONALCHECK_H
Index: clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.cpp
===
--- /dev/null
+++ clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.cpp
@@ -0,0 +1,142 @@
+//===--- MisleadingBidirectional.cpp - clang-tidy -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#include "MisleadingBidirectional.h"
+
+#include "clang/Frontend/CompilerInstance.h"
+#include "clang/Lex/Preprocessor.h"
+#include "llvm/Support/ConvertUTF.h"
+
+using namespace 

[PATCH] D112913: Misleading bidirectional detection

2022-01-06 Thread serge via Phabricator via cfe-commits
serge-sans-paille added a comment.

Note that if there's a consensus on it, I can implement LRM character support.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D112913/new/

https://reviews.llvm.org/D112913

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D112913: Misleading bidirectional detection

2022-01-06 Thread serge via Phabricator via cfe-commits
serge-sans-paille added a comment.

@aaron.ballman unfortunately I don't know any of those. If I recall correctly 
we found no software in the RedHat collection actually using those control 
characters :-/
My understanding is that we are inline with the document you mention, except 
the fact that 1. We don't allow LRM character in plain text and 2. we do not 
check if a string / comment sequence ends in RTL state, but that actually 
requires another algorithm :-/


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D112913/new/

https://reviews.llvm.org/D112913

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D112913: Misleading bidirectional detection

2022-01-06 Thread Aaron Ballman via Phabricator via cfe-commits
aaron.ballman added a comment.

Has this been tested against any large code bases that use bidirectional 
characters to see what the false positive rate is? Also, have you seen the 
latest Unicode guidance on this topic: 
https://unicode.org/L2/L2022/22007-avoiding-spoof.pdf to make sure we're 
following along?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D112913/new/

https://reviews.llvm.org/D112913

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D112913: Misleading bidirectional detection

2022-01-06 Thread serge via Phabricator via cfe-commits
serge-sans-paille added a comment.

Thanks @upsuper!


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D112913/new/

https://reviews.llvm.org/D112913

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D112913: Misleading bidirectional detection

2022-01-06 Thread serge via Phabricator via cfe-commits
serge-sans-paille updated this revision to Diff 397885.
serge-sans-paille added a comment.

rebased on main branch


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D112913/new/

https://reviews.llvm.org/D112913

Files:
  clang-tools-extra/clang-tidy/misc/CMakeLists.txt
  clang-tools-extra/clang-tidy/misc/MiscTidyModule.cpp
  clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.cpp
  clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.h
  clang-tools-extra/docs/ReleaseNotes.rst
  clang-tools-extra/docs/clang-tidy/checks/list.rst
  clang-tools-extra/docs/clang-tidy/checks/misc-misleading-bidirectional.rst
  clang-tools-extra/test/clang-tidy/checkers/misc-misleading-bidirectional.cpp

Index: clang-tools-extra/docs/clang-tidy/checks/misc-misleading-bidirectional.rst
===
--- /dev/null
+++ clang-tools-extra/docs/clang-tidy/checks/misc-misleading-bidirectional.rst
@@ -0,0 +1,21 @@
+.. title:: clang-tidy - misc-misleading-bidirectional
+
+misc-misleading-bidirectional
+=
+
+Warn about unterminated bidirectional unicode sequence, detecting potential attack
+as described in the `Trojan Source `_ attack.
+
+Example:
+
+.. code-block:: c++
+
+#include 
+
+int main() {
+bool isAdmin = false;
+/*‮ } ⁦if (isAdmin)⁩ ⁦ begin admins only */
+std::cout << "You are an admin.\n";
+/* end admins only ‮ { ⁦*/
+return 0;
+}
Index: clang-tools-extra/docs/clang-tidy/checks/list.rst
===
--- clang-tools-extra/docs/clang-tidy/checks/list.rst
+++ clang-tools-extra/docs/clang-tidy/checks/list.rst
@@ -212,7 +212,8 @@
`llvmlibc-implementation-in-namespace `_,
`llvmlibc-restrict-system-libc-headers `_, "Yes"
`misc-definitions-in-headers `_, "Yes"
-   `misc-misleading-identifier `_,
+   `misc-misleading-bidirectional `_,
+   `misc-misleading-identifier `_,
`misc-misplaced-const `_,
`misc-new-delete-overloads `_,
`misc-no-recursion `_,
Index: clang-tools-extra/docs/ReleaseNotes.rst
===
--- clang-tools-extra/docs/ReleaseNotes.rst
+++ clang-tools-extra/docs/ReleaseNotes.rst
@@ -120,6 +120,10 @@
   Reports identifiers whose names are too short. Currently checks local
   variables and function parameters only.
 
+- New :doc:`misc-misleading-bidirectional ` check.
+
+  Inspects string literal and comments for unterminated bidirectional Unicode
+  characters.
 
 New check aliases
 ^
Index: clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.h
===
--- /dev/null
+++ clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.h
@@ -0,0 +1,38 @@
+//===--- MisleadingBidirectionalCheck.h - clang-tidy *- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_MISLEADINGBIDIRECTIONALCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_MISLEADINGBIDIRECTIONALCHECK_H
+
+#include "../ClangTidyCheck.h"
+
+namespace clang {
+namespace tidy {
+namespace misc {
+
+class MisleadingBidirectionalCheck : public ClangTidyCheck {
+public:
+  MisleadingBidirectionalCheck(StringRef Name, ClangTidyContext *Context);
+  ~MisleadingBidirectionalCheck();
+
+  void registerPPCallbacks(const SourceManager , Preprocessor *PP,
+   Preprocessor *ModuleExpanderPP) override;
+
+  void registerMatchers(ast_matchers::MatchFinder *Finder) override;
+  void check(const ast_matchers::MatchFinder::MatchResult ) override;
+
+private:
+  class MisleadingBidirectionalHandler;
+  std::unique_ptr Handler;
+};
+
+} // namespace misc
+} // namespace tidy
+} // namespace clang
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_MISLEADINGBIDIRECTIONALCHECK_H
Index: clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.cpp
===
--- /dev/null
+++ clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.cpp
@@ -0,0 +1,142 @@
+//===--- MisleadingBidirectional.cpp - clang-tidy -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#include "MisleadingBidirectional.h"
+
+#include "clang/Frontend/CompilerInstance.h"
+#include "clang/Lex/Preprocessor.h"
+#include "llvm/Support/ConvertUTF.h"
+

[PATCH] D112913: Misleading bidirectional detection

2022-01-06 Thread Xidorn Quan via Phabricator via cfe-commits
upsuper added a comment.

I think the core algorithm looks correct now. I'll leave the code review to 
LLVM reviewers. Thanks.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D112913/new/

https://reviews.llvm.org/D112913

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D112913: Misleading bidirectional detection

2022-01-06 Thread serge via Phabricator via cfe-commits
serge-sans-paille updated this revision to Diff 397813.
serge-sans-paille added a comment.

Fix some parts of the bidi algorithm, and add extra test cases


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D112913/new/

https://reviews.llvm.org/D112913

Files:
  clang-tools-extra/clang-tidy/misc/CMakeLists.txt
  clang-tools-extra/clang-tidy/misc/MiscTidyModule.cpp
  clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.cpp
  clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.h
  clang-tools-extra/docs/ReleaseNotes.rst
  clang-tools-extra/docs/clang-tidy/checks/list.rst
  clang-tools-extra/docs/clang-tidy/checks/misc-misleading-bidirectional.rst
  clang-tools-extra/test/clang-tidy/checkers/misc-misleading-bidirectional.cpp

Index: clang-tools-extra/docs/clang-tidy/checks/misc-misleading-bidirectional.rst
===
--- /dev/null
+++ clang-tools-extra/docs/clang-tidy/checks/misc-misleading-bidirectional.rst
@@ -0,0 +1,21 @@
+.. title:: clang-tidy - misc-misleading-bidirectional
+
+misc-misleading-bidirectional
+=
+
+Warn about unterminated bidirectional unicode sequence, detecting potential attack
+as described in the `Trojan Source `_ attack.
+
+Example:
+
+.. code-block:: c++
+
+#include 
+
+int main() {
+bool isAdmin = false;
+/*‮ } ⁦if (isAdmin)⁩ ⁦ begin admins only */
+std::cout << "You are an admin.\n";
+/* end admins only ‮ { ⁦*/
+return 0;
+}
Index: clang-tools-extra/docs/clang-tidy/checks/list.rst
===
--- clang-tools-extra/docs/clang-tidy/checks/list.rst
+++ clang-tools-extra/docs/clang-tidy/checks/list.rst
@@ -213,6 +213,7 @@
`llvmlibc-implementation-in-namespace `_,
`llvmlibc-restrict-system-libc-headers `_, "Yes"
`misc-definitions-in-headers `_, "Yes"
+   `misc-misleading-bidirectional `_,
`misc-misleading-identifier `_,
`misc-misplaced-const `_,
`misc-new-delete-overloads `_,
Index: clang-tools-extra/docs/ReleaseNotes.rst
===
--- clang-tools-extra/docs/ReleaseNotes.rst
+++ clang-tools-extra/docs/ReleaseNotes.rst
@@ -120,6 +120,10 @@
   Reports identifiers whose names are too short. Currently checks local
   variables and function parameters only.
 
+- New :doc:`misc-misleading-bidirectional ` check.
+
+  Inspects string literal and comments for unterminated bidirectional Unicode
+  characters.
 
 New check aliases
 ^
Index: clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.h
===
--- /dev/null
+++ clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.h
@@ -0,0 +1,38 @@
+//===--- MisleadingBidirectionalCheck.h - clang-tidy *- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_MISLEADINGBIDIRECTIONALCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_MISLEADINGBIDIRECTIONALCHECK_H
+
+#include "../ClangTidyCheck.h"
+
+namespace clang {
+namespace tidy {
+namespace misc {
+
+class MisleadingBidirectionalCheck : public ClangTidyCheck {
+public:
+  MisleadingBidirectionalCheck(StringRef Name, ClangTidyContext *Context);
+  ~MisleadingBidirectionalCheck();
+
+  void registerPPCallbacks(const SourceManager , Preprocessor *PP,
+   Preprocessor *ModuleExpanderPP) override;
+
+  void registerMatchers(ast_matchers::MatchFinder *Finder) override;
+  void check(const ast_matchers::MatchFinder::MatchResult ) override;
+
+private:
+  class MisleadingBidirectionalHandler;
+  std::unique_ptr Handler;
+};
+
+} // namespace misc
+} // namespace tidy
+} // namespace clang
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_MISLEADINGBIDIRECTIONALCHECK_H
Index: clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.cpp
===
--- /dev/null
+++ clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.cpp
@@ -0,0 +1,142 @@
+//===--- MisleadingBidirectional.cpp - clang-tidy -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#include "MisleadingBidirectional.h"
+
+#include "clang/Frontend/CompilerInstance.h"
+#include "clang/Lex/Preprocessor.h"
+#include "llvm/Support/ConvertUTF.h"
+
+using namespace clang;
+

[PATCH] D112913: Misleading bidirectional detection

2022-01-05 Thread serge via Phabricator via cfe-commits
serge-sans-paille added a comment.

@upsuper I've not added extra test cases yet, but does it looks it's heading in 
the right direction?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D112913/new/

https://reviews.llvm.org/D112913

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D112913: Misleading bidirectional detection

2022-01-05 Thread serge via Phabricator via cfe-commits
serge-sans-paille updated this revision to Diff 397597.
serge-sans-paille added a comment.

Update bidi algorithm


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D112913/new/

https://reviews.llvm.org/D112913

Files:
  clang-tools-extra/clang-tidy/misc/CMakeLists.txt
  clang-tools-extra/clang-tidy/misc/MiscTidyModule.cpp
  clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.cpp
  clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.h
  clang-tools-extra/docs/ReleaseNotes.rst
  clang-tools-extra/docs/clang-tidy/checks/list.rst
  clang-tools-extra/docs/clang-tidy/checks/misc-misleading-bidirectional.rst
  clang-tools-extra/test/clang-tidy/checkers/misc-misleading-bidirectional.cpp

Index: clang-tools-extra/test/clang-tidy/checkers/misc-misleading-bidirectional.cpp
===
--- /dev/null
+++ clang-tools-extra/test/clang-tidy/checkers/misc-misleading-bidirectional.cpp
@@ -0,0 +1,31 @@
+// RUN: %check_clang_tidy %s misc-misleading-bidirectional %t
+
+void func(void) {
+  int admin = 0;
+  /*‮ }⁦if(admin)⁩ ⁦ begin*/
+  // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: comment contains misleading bidirectional Unicode characters [misc-misleading-bidirectional]
+  const char msg[] = "‮⁦if(admin)⁩ ⁦tes";
+  // CHECK-MESSAGES: :[[@LINE-1]]:22: warning: string literal contains misleading bidirectional Unicode characters [misc-misleading-bidirectional]
+}
+
+void all_fine(void) {
+  char valid[] = "some‮valid‬sequence";
+  /* EOL ends bidi‮ sequence
+   * end it's fine to do so.
+   * EOL ends ⁧isolate too
+   */
+}
+
+int invalid_utf_8(void) {
+  bool isAdmin = false;
+
+  // the comment below contains an invalid utf8 character, but should still be
+  // processed.
+
+  // CHECK-MESSAGES: :[[@LINE+1]]:3: warning: comment contains misleading bidirectional Unicode characters [misc-misleading-bidirectional]
+  /*€‮ } ⁦if (isAdmin)⁩ ⁦ begin admins only */
+  return 1;
+  /* end admins only ‮ { ⁦*/
+  // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: comment contains misleading bidirectional Unicode characters [misc-misleading-bidirectional]
+  return 0;
+}
Index: clang-tools-extra/docs/clang-tidy/checks/misc-misleading-bidirectional.rst
===
--- /dev/null
+++ clang-tools-extra/docs/clang-tidy/checks/misc-misleading-bidirectional.rst
@@ -0,0 +1,21 @@
+.. title:: clang-tidy - misc-misleading-bidirectional
+
+misc-misleading-bidirectional
+=
+
+Warn about unterminated bidirectional unicode sequence, detecting potential attack
+as described in the `Trojan Source `_ attack.
+
+Example:
+
+.. code-block:: c++
+
+#include 
+
+int main() {
+bool isAdmin = false;
+/*‮ } ⁦if (isAdmin)⁩ ⁦ begin admins only */
+std::cout << "You are an admin.\n";
+/* end admins only ‮ { ⁦*/
+return 0;
+}
Index: clang-tools-extra/docs/clang-tidy/checks/list.rst
===
--- clang-tools-extra/docs/clang-tidy/checks/list.rst
+++ clang-tools-extra/docs/clang-tidy/checks/list.rst
@@ -213,6 +213,7 @@
`llvmlibc-implementation-in-namespace `_,
`llvmlibc-restrict-system-libc-headers `_, "Yes"
`misc-definitions-in-headers `_, "Yes"
+   `misc-misleading-bidirectional `_,
`misc-misleading-identifier `_,
`misc-misplaced-const `_,
`misc-new-delete-overloads `_,
Index: clang-tools-extra/docs/ReleaseNotes.rst
===
--- clang-tools-extra/docs/ReleaseNotes.rst
+++ clang-tools-extra/docs/ReleaseNotes.rst
@@ -120,6 +120,10 @@
   Reports identifiers whose names are too short. Currently checks local
   variables and function parameters only.
 
+- New :doc:`misc-misleading-bidirectional ` check.
+
+  Inspects string literal and comments for unterminated bidirectional Unicode
+  characters.
 
 New check aliases
 ^
Index: clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.h
===
--- /dev/null
+++ clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.h
@@ -0,0 +1,38 @@
+//===--- MisleadingBidirectionalCheck.h - clang-tidy *- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_MISLEADINGBIDIRECTIONALCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_MISLEADINGBIDIRECTIONALCHECK_H
+
+#include "../ClangTidyCheck.h"
+
+namespace clang {
+namespace tidy {
+namespace misc {
+
+class MisleadingBidirectionalCheck : public ClangTidyCheck {
+public:

[PATCH] D112913: Misleading bidirectional detection

2021-11-24 Thread Xidorn Quan via Phabricator via cfe-commits
upsuper added a comment.

I'm not familiar with LLVM / Clang codebase. I was asked by @MaskRay to help 
review the bidi-related part of this patch.

Generally I believe the algorithm here is too naive that it produces both false 
positives and false negatives. I'm not sure how important those edge cases are 
considered, but anyway... details comments below:




Comment at: clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.cpp:48-49
+  // Line break: https://www.unicode.org/reports/tr14/tr14-32.html
+  if (C == '\n' || C == '\r' || C == '\f' || C == '\v' ||
+  C == 0x85 /*next line*/)
+EmbeddingOverride = Isolate = 0;

UAX 14 is probably not the right document to look here.

According to UAX 9 step [[ https://www.unicode.org/reports/tr9/#L1 | L1 ]], the 
embedding level is reset to the paragraph embedding level when hitting segment 
separator and paragraph separator, which are defined in [[ 
https://www.unicode.org/reports/tr9/#Table_Bidirectional_Character_Types | 
table 4 ]] as type B and S, and in UCD [[ 
https://www.unicode.org/Public/UCD/latest/ucd/extracted/DerivedBidiClass.txt | 
DerivedBidiClass.txt ]] you can see type B includes U+000A, U+000D, 
U+001C..001E, U+0085, U+2029, and type S includes U+0009, U+000B, and U+001F.

You have U+000C and U+2028 here which are counted as type WS which doesn't 
affect embedding level, so you may be resetting the counter prematurely. And 
you may want to reset the counter for all other characters above.



Comment at: clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.cpp:64-72
+if (CodePoint == RLO || CodePoint == RLE || CodePoint == LRO ||
+CodePoint == LRE)
+  EmbeddingOverride += 1;
+else if (CodePoint == PDF)
+  EmbeddingOverride = std::min(EmbeddingOverride - 1, EmbeddingOverride);
+else if (CodePoint == RLI || CodePoint == LRI || CodePoint == FSI)
+  Isolate += 1;

The bidi algorithm is more complicated than having two simple counters. 
Basically, a `PDF` cancels a override / embedding character only when they 
match, and `PDI` cancels all override / embedding between it and its matching 
isolate character.

I was thinking whether the counters would be enough in the sense that we may 
accept some false positive for edge cases but definitely no false negative. But 
I'm convinced it's not the case. As an example, a sequence of `RLO LRI PDF PDI` 
will yield no embedding and no isolate in this counting model, but the `PDF` 
here actually has no effect, as shown in step [[ 
https://www.unicode.org/reports/tr9/#X7 | X7 ]], if it does not match an 
embedding initiator, it is ignored, so this is effectively leaving a dangling 
`RLO`.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D112913/new/

https://reviews.llvm.org/D112913

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D112913: Misleading bidirectional detection

2021-11-23 Thread Fangrui Song via Phabricator via cfe-commits
MaskRay added inline comments.



Comment at: clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.cpp:49
+  if (C == '\n' || C == '\r' || C == '\f' || C == '\v' ||
+  C == 0x85 /*next line*/)
+EmbeddingOverride = Isolate = 0;

`/*next line=*/0x85` is more common



Comment at: clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.cpp:66
+CodePoint == LRE)
+  EmbeddingOverride += 1;
+else if (CodePoint == PDF)





Comment at: clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.cpp:68
+else if (CodePoint == PDF)
+  EmbeddingOverride = std::min(EmbeddingOverride - 1, EmbeddingOverride);
+else if (CodePoint == RLI || CodePoint == LRI || CodePoint == FSI)

More common style is `A = A ? A - 1 : 0;` which avoids unsigned wraparound.

That said, if the state is currently 0, should an attempt to decrease it be 
reported as an error as well?



Comment at: clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.cpp:70
+else if (CodePoint == RLI || CodePoint == LRI || CodePoint == FSI)
+  Isolate += 1;
+else if (CodePoint == PDI)

ditto



Comment at: clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.cpp:128
+
+void clang::tidy::misc::MisleadingBidirectionalCheck::registerMatchers(
+ast_matchers::MatchFinder *Finder) {

If you `using` MisleadingBidirectionalCheck (or `clang::tidy::misc`), then you 
can use

`void MisleadingBidirectionalCheck::registerMatchers`



Comment at: 
clang-tools-extra/test/clang-tidy/checkers/misc-misleading-bidirectional.cpp:6
+  /*‮ }⁦if(admin)⁩ ⁦ begin*/
+  // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: comment contains misleading 
bidirectional Unicode characters [misc-misleading-bidirectional]
+  const char msg[] = "‮⁦if(admin)⁩ ⁦tes";

`[[@LINE-1]]` is a deprecated FileCheck feature.

Use `[[#@LINE-1]]`


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D112913/new/

https://reviews.llvm.org/D112913

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D112913: Misleading bidirectional detection

2021-11-22 Thread serge via Phabricator via cfe-commits
serge-sans-paille added a comment.

Gentle ping @MaskRay and/or @rsmith


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D112913/new/

https://reviews.llvm.org/D112913

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D112913: Misleading bidirectional detection

2021-11-19 Thread serge via Phabricator via cfe-commits
serge-sans-paille updated this revision to Diff 388425.
serge-sans-paille added a comment.

Nits


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D112913/new/

https://reviews.llvm.org/D112913

Files:
  clang-tools-extra/clang-tidy/misc/CMakeLists.txt
  clang-tools-extra/clang-tidy/misc/MiscTidyModule.cpp
  clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.cpp
  clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.h
  clang-tools-extra/docs/ReleaseNotes.rst
  clang-tools-extra/docs/clang-tidy/checks/list.rst
  clang-tools-extra/docs/clang-tidy/checks/misc-misleading-bidirectional.rst
  clang-tools-extra/test/clang-tidy/checkers/misc-misleading-bidirectional.cpp

Index: clang-tools-extra/test/clang-tidy/checkers/misc-misleading-bidirectional.cpp
===
--- /dev/null
+++ clang-tools-extra/test/clang-tidy/checkers/misc-misleading-bidirectional.cpp
@@ -0,0 +1,31 @@
+// RUN: %check_clang_tidy %s misc-misleading-bidirectional %t
+
+void func(void) {
+  int admin = 0;
+  /*‮ }⁦if(admin)⁩ ⁦ begin*/
+  // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: comment contains misleading bidirectional Unicode characters [misc-misleading-bidirectional]
+  const char msg[] = "‮⁦if(admin)⁩ ⁦tes";
+  // CHECK-MESSAGES: :[[@LINE-1]]:22: warning: string literal contains misleading bidirectional Unicode characters [misc-misleading-bidirectional]
+}
+
+void all_fine(void) {
+  char valid[] = "some‮valid‬sequence";
+  /* EOL ends bidi‮ sequence
+   * end it's fine to do so.
+   * EOL ends ⁧isolate too
+   */
+}
+
+int invalid_utf_8(void) {
+  bool isAdmin = false;
+
+  // the comment below contains an invalid utf8 character, but should still be
+  // processed.
+
+  // CHECK-MESSAGES: :[[@LINE+1]]:5: warning: comment contains misleading bidirectional Unicode characters [misc-misleading-bidirectional]
+  /*€‮ } ⁦if (isAdmin)⁩ ⁦ begin admins only */
+  return 1;
+  /* end admins only ‮ { ⁦*/
+  // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: comment contains misleading bidirectional Unicode characters [misc-misleading-bidirectional]
+  return 0;
+}
Index: clang-tools-extra/docs/clang-tidy/checks/misc-misleading-bidirectional.rst
===
--- /dev/null
+++ clang-tools-extra/docs/clang-tidy/checks/misc-misleading-bidirectional.rst
@@ -0,0 +1,21 @@
+.. title:: clang-tidy - misc-misleading-bidirectional
+
+misc-misleading-bidirectional
+=
+
+Warn about unterminated bidirectional unicode sequence, detecting potential attack
+as described in the `Trojan Source `_ attack.
+
+Example:
+
+.. code-block:: c++
+
+#include 
+
+int main() {
+bool isAdmin = false;
+/*‮ } ⁦if (isAdmin)⁩ ⁦ begin admins only */
+std::cout << "You are an admin.\n";
+/* end admins only ‮ { ⁦*/
+return 0;
+}
Index: clang-tools-extra/docs/clang-tidy/checks/list.rst
===
--- clang-tools-extra/docs/clang-tidy/checks/list.rst
+++ clang-tools-extra/docs/clang-tidy/checks/list.rst
@@ -212,6 +212,7 @@
`llvmlibc-implementation-in-namespace `_,
`llvmlibc-restrict-system-libc-headers `_, "Yes"
`misc-definitions-in-headers `_, "Yes"
+   `misc-misleading-bidirectional `_,
`misc-misleading-identifier `_,
`misc-misplaced-const `_,
`misc-new-delete-overloads `_,
Index: clang-tools-extra/docs/ReleaseNotes.rst
===
--- clang-tools-extra/docs/ReleaseNotes.rst
+++ clang-tools-extra/docs/ReleaseNotes.rst
@@ -111,6 +111,10 @@
   Reports identifiers whose names are too short. Currently checks local
   variables and function parameters only.
 
+- New :doc:`misc-misleading-bidirectional ` check.
+
+  Inspects string literal and comments for unterminated bidirectional Unicode
+  characters.
 
 New check aliases
 ^
Index: clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.h
===
--- /dev/null
+++ clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.h
@@ -0,0 +1,38 @@
+//===--- MisleadingBidirectionalCheck.h - clang-tidy *- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_MISLEADINGBIDIRECTIONALCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_MISLEADINGBIDIRECTIONALCHECK_H
+
+#include "../ClangTidyCheck.h"
+
+namespace clang {
+namespace tidy {
+namespace misc {
+
+class MisleadingBidirectionalCheck : public ClangTidyCheck {
+public:
+  

[PATCH] D112913: Misleading bidirectional detection

2021-11-19 Thread serge via Phabricator via cfe-commits
serge-sans-paille added a comment.

Patch rebased on main, all comments addressed. Looks good?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D112913/new/

https://reviews.llvm.org/D112913

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D112913: Misleading bidirectional detection

2021-11-19 Thread serge via Phabricator via cfe-commits
serge-sans-paille updated this revision to Diff 388424.
serge-sans-paille added a comment.

Rebase on main branch


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D112913/new/

https://reviews.llvm.org/D112913

Files:
  clang-tools-extra/clang-tidy/misc/CMakeLists.txt
  clang-tools-extra/clang-tidy/misc/MiscTidyModule.cpp
  clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.cpp
  clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.h
  clang-tools-extra/docs/ReleaseNotes.rst
  clang-tools-extra/docs/clang-tidy/checks/list.rst
  clang-tools-extra/docs/clang-tidy/checks/misc-misleading-bidirectional.rst
  clang-tools-extra/test/clang-tidy/checkers/misc-misleading-bidirectional.cpp

Index: clang-tools-extra/test/clang-tidy/checkers/misc-misleading-bidirectional.cpp
===
--- /dev/null
+++ clang-tools-extra/test/clang-tidy/checkers/misc-misleading-bidirectional.cpp
@@ -0,0 +1,31 @@
+// RUN: %check_clang_tidy %s misc-misleading-bidirectional %t
+
+void func(void) {
+  int admin = 0;
+  /*‮ }⁦if(admin)⁩ ⁦ begin*/
+  // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: comment contains misleading bidirectional Unicode characters [misc-misleading-bidirectional]
+  const char msg[] = "‮⁦if(admin)⁩ ⁦tes";
+  // CHECK-MESSAGES: :[[@LINE-1]]:22: warning: string literal contains misleading bidirectional Unicode characters [misc-misleading-bidirectional]
+}
+
+void all_fine(void) {
+  char valid[] = "some‮valid‬sequence";
+  /* EOL ends bidi‮ sequence
+   * end it's fine to do so.
+   * EOL ends ⁧isolate too
+   */
+}
+
+int invalid_utf_8(void) {
+  bool isAdmin = false;
+
+  // the comment below contains an invalid utf8 character, but should still be
+  // processed.
+
+  // CHECK-MESSAGES: :[[@LINE+1]]:5: warning: comment contains misleading bidirectional Unicode characters [misc-misleading-bidirectional]
+  /*€‮ } ⁦if (isAdmin)⁩ ⁦ begin admins only */
+  return 1;
+  /* end admins only ‮ { ⁦*/
+  // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: comment contains misleading bidirectional Unicode characters [misc-misleading-bidirectional]
+  return 0;
+}
Index: clang-tools-extra/docs/clang-tidy/checks/misc-misleading-bidirectional.rst
===
--- /dev/null
+++ clang-tools-extra/docs/clang-tidy/checks/misc-misleading-bidirectional.rst
@@ -0,0 +1,21 @@
+.. title:: clang-tidy - misc-misleading-bidirectional
+
+misc-misleading-bidirectional
+=
+
+Warn about unterminated bidirectional unicode sequence, detecting potential attack
+as described in the `Trojan Source `_ attack.
+
+Example:
+
+.. code-block:: c++
+
+#include 
+
+int main() {
+bool isAdmin = false;
+/*‮ } ⁦if (isAdmin)⁩ ⁦ begin admins only */
+std::cout << "You are an admin.\n";
+/* end admins only ‮ { ⁦*/
+return 0;
+}
Index: clang-tools-extra/docs/clang-tidy/checks/list.rst
===
--- clang-tools-extra/docs/clang-tidy/checks/list.rst
+++ clang-tools-extra/docs/clang-tidy/checks/list.rst
@@ -212,6 +212,7 @@
`llvmlibc-implementation-in-namespace `_,
`llvmlibc-restrict-system-libc-headers `_, "Yes"
`misc-definitions-in-headers `_, "Yes"
+   `misc-misleading-bidirectional `_,
`misc-misleading-identifier `_,
`misc-misplaced-const `_,
`misc-new-delete-overloads `_,
Index: clang-tools-extra/docs/ReleaseNotes.rst
===
--- clang-tools-extra/docs/ReleaseNotes.rst
+++ clang-tools-extra/docs/ReleaseNotes.rst
@@ -111,6 +111,10 @@
   Reports identifiers whose names are too short. Currently checks local
   variables and function parameters only.
 
+- New :doc:`misc-misleading-bidirectional ` check.
+
+  Inspect string literal and comments for unterminated bidirectional Unicode
+  characters.
 
 New check aliases
 ^
Index: clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.h
===
--- /dev/null
+++ clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.h
@@ -0,0 +1,38 @@
+//===--- MisleadingBidirectionalCheck.h - clang-tidy *- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_MISLEADINGBIDIRECTIONALCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_MISLEADINGBIDIRECTIONALCHECK_H
+
+#include "../ClangTidyCheck.h"
+
+namespace clang {
+namespace tidy {
+namespace misc {
+
+class MisleadingBidirectionalCheck : public ClangTidyCheck {
+public:
+ 

[PATCH] D112913: Misleading bidirectional detection

2021-11-05 Thread Carlos Galvez via Phabricator via cfe-commits
carlosgalvezp added inline comments.



Comment at: clang-tools-extra/docs/ReleaseNotes.rst:105
+
+  Inspect string literal and comments for unterminated bidirectional Unicode
+  characters.

Nit: Inspects


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D112913/new/

https://reviews.llvm.org/D112913

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D112913: Misleading bidirectional detection

2021-11-03 Thread serge via Phabricator via cfe-commits
serge-sans-paille added inline comments.



Comment at: clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.cpp:59
+// If conversion fails, utf-8 is designed so that we can just try next 
char.
+if (Result != llvm::conversionOK) {
+  ++CurPtr;

rsmith wrote:
> Is there a guarantee that `convertUTF8Sequence` doesn't update `CurPtr` on 
> error? I'm concerned we might increment *past* the end in the case where 
> `CurPtr` points to the end, below, which would at least formally be UB.
According to the doc "If the conversion succeeds, this pointer will be updated 
to point to the byte just past the end of the converted sequence". My 
understanding of the implementation confirms that statements, and it's used in 
a similar manner in clang Lexer.



Comment at: clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.cpp:60-62
+// If conversion fails, we stop the analysis because we don't know how many
+// characters should be skipped otherwise. That's an obvious way to bypass
+// the check.

rsmith wrote:
> Can we scan forward looking for the next non-continuation byte? (Skip while 
> `c & 0b1100_ == 0b1000_`)
I'm not quite sure. There's a risk we end up starting over from the second part 
of a unicode character, and that could mess up with the decoding. I'll 
investigate how it's done in other libraries.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D112913/new/

https://reviews.llvm.org/D112913

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D112913: Misleading bidirectional detection

2021-11-02 Thread Richard Smith - zygoloid via Phabricator via cfe-commits
rsmith added inline comments.



Comment at: clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.cpp:59
+// If conversion fails, utf-8 is designed so that we can just try next 
char.
+if (Result != llvm::conversionOK) {
+  ++CurPtr;

Is there a guarantee that `convertUTF8Sequence` doesn't update `CurPtr` on 
error? I'm concerned we might increment *past* the end in the case where 
`CurPtr` points to the end, below, which would at least formally be UB.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D112913/new/

https://reviews.llvm.org/D112913

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D112913: Misleading bidirectional detection

2021-11-02 Thread serge via Phabricator via cfe-commits
serge-sans-paille updated this revision to Diff 384111.
serge-sans-paille added a comment.

- recover from failed utf8 decoding
- doc and release note updated
- clang-formatting
- more examples / testing


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D112913/new/

https://reviews.llvm.org/D112913

Files:
  clang-tools-extra/clang-tidy/misc/CMakeLists.txt
  clang-tools-extra/clang-tidy/misc/MiscTidyModule.cpp
  clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.cpp
  clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.h
  clang-tools-extra/docs/ReleaseNotes.rst
  clang-tools-extra/docs/clang-tidy/checks/list.rst
  clang-tools-extra/docs/clang-tidy/checks/misc-misleading-bidirectional.rst
  clang-tools-extra/test/clang-tidy/check_clang_tidy.py
  clang-tools-extra/test/clang-tidy/checkers/misc-misleading-bidirectional.cpp

Index: clang-tools-extra/test/clang-tidy/checkers/misc-misleading-bidirectional.cpp
===
--- /dev/null
+++ clang-tools-extra/test/clang-tidy/checkers/misc-misleading-bidirectional.cpp
@@ -0,0 +1,31 @@
+// RUN: %check_clang_tidy %s misc-misleading-bidirectional %t
+
+void func(void) {
+  int admin = 0;
+  /*‮ }⁦if(admin)⁩ ⁦ begin*/
+  // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: comment contains misleading bidirectional Unicode characters [misc-misleading-bidirectional]
+  const char msg[] = "‮⁦if(admin)⁩ ⁦tes";
+  // CHECK-MESSAGES: :[[@LINE-1]]:22: warning: string literal contains misleading bidirectional Unicode characters [misc-misleading-bidirectional]
+}
+
+void all_fine(void) {
+  char valid[] = "some‮valid‬sequence";
+  /* EOL ends bidi‮ sequence
+   * end it's fine to do so.
+   * EOL ends ⁧isolate too
+   */
+}
+
+int invalid_utf_8(void) {
+  bool isAdmin = false;
+
+  // the comment below contains an invalid utf8 character, but should still be
+  // processed.
+
+  // CHECK-MESSAGES: :[[@LINE+1]]:5: warning: comment contains misleading bidirectional Unicode characters [misc-misleading-bidirectional]
+  /*€‮ } ⁦if (isAdmin)⁩ ⁦ begin admins only */
+  return 1;
+  /* end admins only ‮ { ⁦*/
+  // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: comment contains misleading bidirectional Unicode characters [misc-misleading-bidirectional]
+  return 0;
+}
Index: clang-tools-extra/test/clang-tidy/check_clang_tidy.py
===
--- clang-tools-extra/test/clang-tidy/check_clang_tidy.py
+++ clang-tools-extra/test/clang-tidy/check_clang_tidy.py
@@ -34,7 +34,7 @@
 
 
 def write_file(file_name, text):
-  with open(file_name, 'w') as f:
+  with open(file_name, 'w', encoding='utf-8') as f:
 f.write(text)
 f.truncate()
 
@@ -82,7 +82,7 @@
   if resource_dir is not None:
 clang_extra_args.append('-resource-dir=%s' % resource_dir)
 
-  with open(input_file_name, 'r') as input_file:
+  with open(input_file_name, 'r', encoding="utf-8") as input_file:
 input_text = input_file.read()
 
   check_fixes_prefixes = []
Index: clang-tools-extra/docs/clang-tidy/checks/misc-misleading-bidirectional.rst
===
--- /dev/null
+++ clang-tools-extra/docs/clang-tidy/checks/misc-misleading-bidirectional.rst
@@ -0,0 +1,21 @@
+.. title:: clang-tidy - misc-misleading-bidirectional
+
+misc-misleading-bidirectional
+=
+
+Warn about unterminated bidirectional unicode sequence, detecting potential attack
+as described in the `Trojan Source `_ attack.
+
+Example:
+
+.. code-block:: c++
+
+#include 
+
+int main() {
+bool isAdmin = false;
+/*‮ } ⁦if (isAdmin)⁩ ⁦ begin admins only */
+std::cout << "You are an admin.\n";
+/* end admins only ‮ { ⁦*/
+return 0;
+}
Index: clang-tools-extra/docs/clang-tidy/checks/list.rst
===
--- clang-tools-extra/docs/clang-tidy/checks/list.rst
+++ clang-tools-extra/docs/clang-tidy/checks/list.rst
@@ -211,6 +211,7 @@
`llvmlibc-implementation-in-namespace `_,
`llvmlibc-restrict-system-libc-headers `_, "Yes"
`misc-definitions-in-headers `_, "Yes"
+   `misc-misleading-bidirectional `_,
`misc-misplaced-const `_,
`misc-new-delete-overloads `_,
`misc-no-recursion `_,
Index: clang-tools-extra/docs/ReleaseNotes.rst
===
--- clang-tools-extra/docs/ReleaseNotes.rst
+++ clang-tools-extra/docs/ReleaseNotes.rst
@@ -100,6 +100,11 @@
   Finds cases where code could use ``data()`` rather than the address of the
   element at index 0 in a container.
 
+- New :doc:`misc-misleading-bidirectional ` check.
+
+  Inspect string literal and comments for unterminated bidirectional Unicode
+  characters.
+
 New check aliases
 ^
 
Index: 

[PATCH] D112913: Misleading bidirectional detection

2021-11-01 Thread Fangrui Song via Phabricator via cfe-commits
MaskRay added inline comments.



Comment at: 
clang-tools-extra/test/clang-tidy/checkers/misc-misleading-bidirectional.cpp:1
+// RUN: %check_clang_tidy %s misc-misleading-bidirectional %t
+

The test misses many interesting cases.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D112913/new/

https://reviews.llvm.org/D112913

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D112913: Misleading bidirectional detection

2021-11-01 Thread Fangrui Song via Phabricator via cfe-commits
MaskRay requested changes to this revision.
MaskRay added inline comments.



Comment at: clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.cpp:19
+
+static bool ContainsMisleadingBidi(StringRef Buffer, bool 
HonorLineBreaks=true) {
+  const char* CurPtr = Buffer.begin();

`functionName`



Comment at: clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.cpp:19
+
+static bool ContainsMisleadingBidi(StringRef Buffer, bool 
HonorLineBreaks=true) {
+  const char* CurPtr = Buffer.begin();

MaskRay wrote:
> `functionName`
clang-format `bool HonorLineBreaks=true`



Comment at: clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.cpp:20
+static bool ContainsMisleadingBidi(StringRef Buffer, bool 
HonorLineBreaks=true) {
+  const char* CurPtr = Buffer.begin();
+  unsigned EmbeddingOverride = 0, Isolate = 0;





Comment at: clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.cpp:47
+unsigned char C = *CurPtr;
+if(isASCII(C)) {
+  ++CurPtr;





Comment at: clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.cpp:51
+  if(C == '\n' || C == '\r' || C == '\f' || C == '\v' || C == 0x85 /*next 
line*/) {
+EmbeddingOverride = Isolate = 0;
+  }

no brace



Comment at: clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.cpp:98
+private:
+
+  MisleadingBidirectionalCheck 

delete blank line



Comment at: clang-tools-extra/test/clang-tidy/check_clang_tidy.py:85
 
-  with open(input_file_name, 'r') as input_file:
+  with open(input_file_name, 'r', encoding="utf-8") as input_file:
 input_text = input_file.read()

Prefer single quotes


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D112913/new/

https://reviews.llvm.org/D112913

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D112913: Misleading bidirectional detection

2021-11-01 Thread Fangrui Song via Phabricator via cfe-commits
MaskRay added a comment.

> Context not available.

If you don't use `arc diff 'HEAD^'` to upload a Diff, please use -U9 
https://llvm.org/docs/Phabricator.html#requesting-a-review-via-the-web-interface




Comment at: clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.cpp:15
+
+namespace clang {
+namespace tidy {

See 
https://llvm.org/docs/CodingStandards.html#use-namespace-qualifiers-to-implement-previously-declared-functions


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D112913/new/

https://reviews.llvm.org/D112913

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D112913: Misleading bidirectional detection

2021-11-01 Thread Carlos Galvez via Phabricator via cfe-commits
carlosgalvezp added inline comments.



Comment at: clang-tools-extra/clang-tidy/misc/MiscTidyModule.cpp:61
+CheckFactories.registerCheck(
+"misc-misleading-bidirectional");
   }

Nit: please keep alphabetical order in the list of jobs.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D112913/new/

https://reviews.llvm.org/D112913

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D112913: Misleading bidirectional detection

2021-11-01 Thread Carlos Galvez via Phabricator via cfe-commits
carlosgalvezp requested changes to this revision.
carlosgalvezp added a comment.
This revision now requires changes to proceed.

Nice addition! Please add this check to the documentation (list of available 
checks + individual page with the documentation for this check), plus mention 
in the clang-tidy release notes. The same applies to the other related patches.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D112913/new/

https://reviews.llvm.org/D112913

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D112913: Misleading bidirectional detection

2021-11-01 Thread Richard Smith - zygoloid via Phabricator via cfe-commits
rsmith accepted this revision.
rsmith added a comment.
This revision is now accepted and ready to land.

Nits and a suggested approach for invalid code sequences that's probably 
important to handle better. Please fix the clang-tidy findings too. Otherwise, 
LGTM.




Comment at: clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.cpp:38
+
+  /* Scan each character while maintaining a count of opened bidi context.
+   * RLO/RLE/LRO/LRE all are closed by PDF while RLI LRI and FSI are closed by

Nit: please use `//`  comments per our normal coding style.



Comment at: clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.cpp:49
+  ++CurPtr;
+  // line break: https://www.unicode.org/reports/tr14/tr14-32.html
+  if(C == '\n' || C == '\r' || C == '\f' || C == '\v' || C == 0x85 /*next 
line*/) {





Comment at: clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.cpp:60-62
+// If conversion fails, we stop the analysis because we don't know how many
+// characters should be skipped otherwise. That's an obvious way to bypass
+// the check.

Can we scan forward looking for the next non-continuation byte? (Skip while `c 
& 0b1100_ == 0b1000_`)



Comment at: clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.cpp:74
+  Isolate = std::min(Isolate - 1, Isolate);
+// line break: https://www.unicode.org/reports/tr14/tr14-32.html
+else if (CodePoint == LS || CodePoint == PS)




Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D112913/new/

https://reviews.llvm.org/D112913

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D112913: Misleading bidirectional detection

2021-11-01 Thread serge via Phabricator via cfe-commits
serge-sans-paille created this revision.
Herald added subscribers: carlosgalvezp, mgorny.
serge-sans-paille requested review of this revision.
Herald added a project: clang-tools-extra.
Herald added a subscriber: cfe-commits.

This patch implements detection of incomplete bidirectional sequence withing 
comments and string literals within clang-tidy.

It detects the bidi part of https://www.trojansource.codes/trojan-source.pdf


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D112913

Files:
  clang-tools-extra/clang-tidy/misc/CMakeLists.txt
  clang-tools-extra/clang-tidy/misc/MiscTidyModule.cpp
  clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.cpp
  clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.h
  clang-tools-extra/test/clang-tidy/check_clang_tidy.py
  clang-tools-extra/test/clang-tidy/checkers/misc-misleading-bidirectional.cpp

Index: clang-tools-extra/test/clang-tidy/checkers/misc-misleading-bidirectional.cpp
===
--- /dev/null
+++ clang-tools-extra/test/clang-tidy/checkers/misc-misleading-bidirectional.cpp
@@ -0,0 +1,9 @@
+// RUN: %check_clang_tidy %s misc-misleading-bidirectional %t
+
+void func(void) {
+  int admin = 0;
+  /*‮ }⁦if(admin)⁩ ⁦ begin*/
+  // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: comment contains misleading bidirectional Unicode characters [misc-misleading-bidirectional]
+  const char msg[] = "‮⁦if(admin)⁩ ⁦tes";
+  // CHECK-MESSAGES: :[[@LINE-1]]:22: warning: string literal contains misleading bidirectional Unicode characters [misc-misleading-bidirectional]
+}
Index: clang-tools-extra/test/clang-tidy/check_clang_tidy.py
===
--- clang-tools-extra/test/clang-tidy/check_clang_tidy.py
+++ clang-tools-extra/test/clang-tidy/check_clang_tidy.py
@@ -34,7 +34,7 @@
 
 
 def write_file(file_name, text):
-  with open(file_name, 'w') as f:
+  with open(file_name, 'w', encoding="utf-8") as f:
 f.write(text)
 f.truncate()
 
@@ -82,7 +82,7 @@
   if resource_dir is not None:
 clang_extra_args.append('-resource-dir=%s' % resource_dir)
 
-  with open(input_file_name, 'r') as input_file:
+  with open(input_file_name, 'r', encoding="utf-8") as input_file:
 input_text = input_file.read()
 
   check_fixes_prefixes = []
Index: clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.h
===
--- /dev/null
+++ clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.h
@@ -0,0 +1,38 @@
+//===--- MisleadingBidirectionalCheck.h - clang-tidy *- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_MISLEADINGBIDIRECTIONALCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_MISLEADINGBIDIRECTIONALCHECK_H
+
+#include "../ClangTidyCheck.h"
+
+namespace clang {
+namespace tidy {
+namespace misc {
+
+class MisleadingBidirectionalCheck : public ClangTidyCheck {
+public:
+  MisleadingBidirectionalCheck(StringRef Name, ClangTidyContext *Context);
+  ~MisleadingBidirectionalCheck();
+
+  void registerPPCallbacks(const SourceManager , Preprocessor *PP,
+   Preprocessor *ModuleExpanderPP) override;
+
+  void registerMatchers(ast_matchers::MatchFinder *Finder) override;
+  void check(const ast_matchers::MatchFinder::MatchResult ) override;
+
+private:
+  class MisleadingBidirectionalHandler;
+  std::unique_ptr Handler;
+};
+
+} // namespace misc
+} // namespace tidy
+} // namespace clang
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_MISLEADINGBIDIRECTIONALCHECK_H
Index: clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.cpp
===
--- /dev/null
+++ clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.cpp
@@ -0,0 +1,132 @@
+//===--- MisleadingBidirectional.cpp - clang-tidy ===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#include "MisleadingBidirectional.h"
+
+#include "clang/Frontend/CompilerInstance.h"
+#include "clang/Lex/Preprocessor.h"
+#include "llvm/Support/ConvertUTF.h"
+
+namespace clang {
+namespace tidy {
+namespace misc {
+
+static bool ContainsMisleadingBidi(StringRef Buffer, bool HonorLineBreaks=true) {
+  const char* CurPtr = Buffer.begin();
+  unsigned EmbeddingOverride = 0, Isolate = 0;
+  unsigned i = 0;
+
+  enum {
+LS = 0x2028,
+PS =