Author: rsmith Date: Fri Sep 7 12:25:39 2018 New Revision: 341700 URL: http://llvm.org/viewvc/llvm-project?rev=341700&view=rev Log: PR38870: Add warning for zero-width unicode characters appearing in identifiers.
Modified: cfe/trunk/include/clang/Basic/DiagnosticLexKinds.td cfe/trunk/lib/Lex/Lexer.cpp cfe/trunk/test/Lexer/unicode.c Modified: cfe/trunk/include/clang/Basic/DiagnosticLexKinds.td URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/DiagnosticLexKinds.td?rev=341700&r1=341699&r2=341700&view=diff ============================================================================== --- cfe/trunk/include/clang/Basic/DiagnosticLexKinds.td (original) +++ cfe/trunk/include/clang/Basic/DiagnosticLexKinds.td Fri Sep 7 12:25:39 2018 @@ -122,6 +122,9 @@ def ext_unicode_whitespace : ExtWarn< def warn_utf8_symbol_homoglyph : Warning< "treating Unicode character <U+%0> as identifier character rather than " "as '%1' symbol">, InGroup<DiagGroup<"unicode-homoglyph">>; +def warn_utf8_symbol_zero_width : Warning< + "identifier contains Unicode character <U+%0> that is invisible in " + "some environments">, InGroup<DiagGroup<"unicode-zero-width">>; def err_hex_escape_no_digits : Error< "\\%0 used with no following hex digits">; Modified: cfe/trunk/lib/Lex/Lexer.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/Lexer.cpp?rev=341700&r1=341699&r2=341700&view=diff ============================================================================== --- cfe/trunk/lib/Lex/Lexer.cpp (original) +++ cfe/trunk/lib/Lex/Lexer.cpp Fri Sep 7 12:25:39 2018 @@ -1510,8 +1510,17 @@ static void maybeDiagnoseUTF8Homoglyph(D bool operator<(HomoglyphPair R) const { return Character < R.Character; } }; static constexpr HomoglyphPair SortedHomoglyphs[] = { + {U'\u00ad', 0}, // SOFT HYPHEN {U'\u01c3', '!'}, // LATIN LETTER RETROFLEX CLICK {U'\u037e', ';'}, // GREEK QUESTION MARK + {U'\u200b', 0}, // ZERO WIDTH SPACE + {U'\u200c', 0}, // ZERO WIDTH NON-JOINER + {U'\u200d', 0}, // ZERO WIDTH JOINER + {U'\u2060', 0}, // WORD JOINER + {U'\u2061', 0}, // FUNCTION APPLICATION + {U'\u2062', 0}, // INVISIBLE TIMES + {U'\u2063', 0}, // INVISIBLE SEPARATOR + {U'\u2064', 0}, // INVISIBLE PLUS {U'\u2212', '-'}, // MINUS SIGN {U'\u2215', '/'}, // DIVISION SLASH {U'\u2216', '\\'}, // SET MINUS @@ -1521,6 +1530,7 @@ static void maybeDiagnoseUTF8Homoglyph(D {U'\u2236', ':'}, // RATIO {U'\u223c', '~'}, // TILDE OPERATOR {U'\ua789', ':'}, // MODIFIER LETTER COLON + {U'\ufeff', 0}, // ZERO WIDTH NO-BREAK SPACE {U'\uff01', '!'}, // FULLWIDTH EXCLAMATION MARK {U'\uff03', '#'}, // FULLWIDTH NUMBER SIGN {U'\uff04', '$'}, // FULLWIDTH DOLLAR SIGN @@ -1560,9 +1570,14 @@ static void maybeDiagnoseUTF8Homoglyph(D llvm::raw_svector_ostream CharOS(CharBuf); llvm::write_hex(CharOS, C, llvm::HexPrintStyle::Upper, 4); } - const char LooksLikeStr[] = {Homoglyph->LooksLike, 0}; - Diags.Report(Range.getBegin(), diag::warn_utf8_symbol_homoglyph) - << Range << CharBuf << LooksLikeStr; + if (Homoglyph->LooksLike) { + const char LooksLikeStr[] = {Homoglyph->LooksLike, 0}; + Diags.Report(Range.getBegin(), diag::warn_utf8_symbol_homoglyph) + << Range << CharBuf << LooksLikeStr; + } else { + Diags.Report(Range.getBegin(), diag::warn_utf8_symbol_zero_width) + << Range << CharBuf; + } } } Modified: cfe/trunk/test/Lexer/unicode.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Lexer/unicode.c?rev=341700&r1=341699&r2=341700&view=diff ============================================================================== --- cfe/trunk/test/Lexer/unicode.c (original) +++ cfe/trunk/test/Lexer/unicode.c Fri Sep 7 12:25:39 2018 @@ -38,3 +38,10 @@ int n; = 3; // expected-warning {{tre int *nêêv = &n;; // expected-warning 2{{treating Unicode character <U+A789> as identifier character rather than as ':' symbol}} // expected-warning@-1 {{treating Unicode character <U+037E> as identifier character rather than as ';' symbol}} int vï¼ï¼»ï¼ï¼½ï¼autoï¼ï½returnï½xï¼ï½ï¼ï¼; // expected-warning 12{{treating Unicode character}} + +int â xxâ; +// expected-warning@-1 {{identifier contains Unicode character <U+2060> that is invisible in some environments}} +// expected-warning@-2 {{identifier contains Unicode character <U+FEFF> that is invisible in some environments}} +// expected-warning@-3 {{identifier contains Unicode character <U+200D> that is invisible in some environments}} +int fooâbar = 0; // expected-warning {{identifier contains Unicode character <U+200B> that is invisible in some environments}} +int x = foobar; // expected-error {{undeclared identifier}} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits