On Wed, Jan 18, 2012 at 11:35 AM, Seth Cantrell <[email protected]> wrote: > On Jan 18, 2012, at 10:37 AM, Nico Weber <[email protected]> wrote: > >> On Wed, Jan 18, 2012 at 4:27 AM, Seth Cantrell <[email protected]> >> wrote: >>> Author: socantre >>> Date: Wed Jan 18 06:27:10 2012 >>> New Revision: 148392 >>> >>> URL: http://llvm.org/viewvc/llvm-project?rev=148392&view=rev >>> Log: >>> Add and update tests for character literals >>> >>> Added: >>> cfe/trunk/test/Lexer/char-literal-encoding-error.c >>> cfe/trunk/test/Lexer/char-literal.cpp >>> Modified: >>> cfe/trunk/test/CodeGen/char-literal.c >>> cfe/trunk/test/CodeGen/string-literal-short-wstring.c >>> cfe/trunk/test/Lexer/constants.c >>> cfe/trunk/test/Lexer/utf8-char-literal.cpp >>> cfe/trunk/test/Lexer/wchar.c >>> >>> Modified: cfe/trunk/test/CodeGen/char-literal.c >>> URL: >>> http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/char-literal.c?rev=148392&r1=148391&r2=148392&view=diff >>> ============================================================================== >>> --- cfe/trunk/test/CodeGen/char-literal.c (original) >>> +++ cfe/trunk/test/CodeGen/char-literal.c Wed Jan 18 06:27:10 2012 >>> @@ -9,11 +9,26 @@ >>> // CHECK-CPP0X: store i8 97 >>> char a = 'a'; >>> >>> - // Should pick second character. >>> + // Should truncate value (equal to last character). >>> // CHECK-C: store i8 98 >>> // CHECK-CPP0X: store i8 98 >>> char b = 'ab'; >>> >>> + // Should get concatonated characters >> >> typo 'concatonated' > > Thanks. I've thought of some tests I should add. I'll fix this typo > when I add them. > >> >>> + // CHECK-C: store i32 24930 >>> + // CHECK-CPP0X: store i32 24930 >>> + int b1 = 'ab'; >>> + >>> + // Should get concatonated characters >>> + // CHECK-C: store i32 808464432 >>> + // CHECK-CPP0X: store i32 808464432 >>> + int b2 = '0000'; >>> + >>> + // Should get truncated value (last four characters concatonated) >>> + // CHECK-C: store i32 1919512167 >>> + // CHECK-CPP0X: store i32 1919512167 >>> + int b3 = 'somesillylongstring'; >>> + >>> // CHECK-C: store i32 97 >>> // CHECK-CPP0X: store i32 97 >>> wchar_t wa = L'a'; >>> @@ -27,26 +42,11 @@ >>> // CHECK-CPP0X: store i16 97 >>> char16_t ua = u'a'; >>> >>> - // Should pick second character. >>> - // CHECK-CPP0X: store i16 98 >>> - char16_t ub = u'ab'; >>> - >>> // CHECK-CPP0X: store i32 97 >>> char32_t Ua = U'a'; >>> >>> - // Should pick second character. >>> - // CHECK-CPP0X: store i32 98 >>> - char32_t Ub = U'ab'; >>> #endif >>> >>> - // Should pick last character and store its lowest byte. >>> - // This does not match gcc, which takes the last character, converts it >>> to >>> - // utf8, and then picks the second-lowest byte of that (they probably >>> store >>> - // the utf8 in uint16_ts internally and take the lower byte of that). >>> - // CHECK-C: store i8 48 >>> - // CHECK-CPP0X: store i8 48 >>> - char c = '\u1120\u0220\U00102030'; >>> - >> >> You're removing these codegen tests but aren't them replacing with >> anything that tests this input. Is this intentional? > > Yes, this input is no longer valid. Unicode character literals > prohibit multiple characters. There are tests for this error in > Lexer/char-literal.cpp. >
Also we're prohibiting characters when their value is too large for the type of character literal. >> >>> // CHECK-C: store i32 61451 >>> // CHECK-CPP0X: store i32 61451 >>> wchar_t wc = L'\uF00B'; >>> @@ -65,13 +65,6 @@ >>> wchar_t wd = L'\U0010F00B'; >>> >>> #if __cplusplus >= 201103L >>> - // Should take lower word of the 4byte UNC sequence. This does not match >>> - // gcc. I don't understand what gcc does (it looks like it converts to >>> utf16, >>> - // then takes the second (!) utf16 word, swaps the lower two nibbles, and >>> - // stores that?). >>> - // CHECK-CPP0X: store i16 -4085 >>> - char16_t ud = u'\U0010F00B'; // has utf16 encoding dbc8 dcb0 >>> - >>> // CHECK-CPP0X: store i32 1110027 >>> char32_t Ud = U'\U0010F00B'; >>> #endif >>> @@ -80,14 +73,4 @@ >>> // CHECK-C: store i32 1110027 >>> // CHECK-CPP0X: store i32 1110027 >>> wchar_t we = L'\u1234\U0010F00B'; >>> - >>> -#if __cplusplus >= 201103L >>> - // Should pick second character. >>> - // CHECK-CPP0X: store i16 -4085 >>> - char16_t ue = u'\u1234\U0010F00B'; >>> - >>> - // Should pick second character. >>> - // CHECK-CPP0X: store i32 1110027 >>> - char32_t Ue = U'\u1234\U0010F00B'; >>> -#endif >>> } >>> >>> Modified: cfe/trunk/test/CodeGen/string-literal-short-wstring.c >>> URL: >>> http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/string-literal-short-wstring.c?rev=148392&r1=148391&r2=148392&view=diff >>> ============================================================================== >>> --- cfe/trunk/test/CodeGen/string-literal-short-wstring.c (original) >>> +++ cfe/trunk/test/CodeGen/string-literal-short-wstring.c Wed Jan 18 >>> 06:27:10 2012 >>> @@ -29,15 +29,4 @@ >>> // -4085 == 0xf00b >>> // CHECK: store i16 -4085 >>> wchar_t wc = L'\uF00B'; >>> - >>> - // Should take lower word of the 4byte UNC sequence. This does not match >>> - // gcc. I don't understand what gcc does (it looks like it converts to >>> utf16, >>> - // then takes the second (!) utf16 word, swaps the lower two nibbles, and >>> - // stores that?). >>> - // CHECK: store i16 -4085 >>> - wchar_t wd = L'\U0010F00B'; // has utf16 encoding dbc8 dcb0 >>> - >>> - // Should pick second character. (gcc: -9205) >>> - // CHECK: store i16 -4085 >>> - wchar_t we = L'\u1234\U0010F00B'; >>> } >>> >>> Added: cfe/trunk/test/Lexer/char-literal-encoding-error.c >>> URL: >>> http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Lexer/char-literal-encoding-error.c?rev=148392&view=auto >>> ============================================================================== >>> --- cfe/trunk/test/Lexer/char-literal-encoding-error.c (added) >>> +++ cfe/trunk/test/Lexer/char-literal-encoding-error.c Wed Jan 18 06:27:10 >>> 2012 >>> @@ -0,0 +1,10 @@ >>> +// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify -x c++ %s >>> + >>> +// This file is encoded using ISO-8859-1 >>> + >>> +int main() { >>> + 'é'; // expected-error {{illegal sequence in character literal}} >>> + u'é'; // expected-error {{illegal sequence in character literal}} >>> + U'é'; // expected-error {{illegal sequence in character literal}} >>> + L'é'; // expected-error {{illegal sequence in character literal}} >>> +} >>> >>> Added: cfe/trunk/test/Lexer/char-literal.cpp >>> URL: >>> http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Lexer/char-literal.cpp?rev=148392&view=auto >>> ============================================================================== >>> --- cfe/trunk/test/Lexer/char-literal.cpp (added) >>> +++ cfe/trunk/test/Lexer/char-literal.cpp Wed Jan 18 06:27:10 2012 >>> @@ -0,0 +1,24 @@ >>> +// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c++11 >>> -Wfour-char-constants -fsyntax-only -verify %s >>> + >>> +int a = 'ab'; // expected-warning {{multi-character character constant}} >>> +int b = '\xFF\xFF'; // expected-warning {{multi-character character >>> constant}} >>> +int c = 'APPS'; // expected-warning {{multi-character character constant}} >>> + >>> +char d = '⌘'; // expected-error {{character too large for enclosing >>> character literal type}} >>> +char e = '\u2318'; // expected-error {{character too large for enclosing >>> character literal type}} >>> + >>> +auto f = '\xE2\x8C\x98'; // expected-warning {{multi-character character >>> constant}} >>> + >>> +char16_t g = u'ab'; // expected-error {{Unicode character literals may not >>> contain multiple characters}} >>> +char16_t h = u'\U0010FFFD'; // expected-error {{character too large for >>> enclosing character literal type}} >>> + >>> +wchar_t i = L'ab'; // expected-warning {{extraneous characters in >>> character constant ignored}} >>> +wchar_t j = L'\U0010FFFD'; >>> + >>> +char32_t k = U'\U0010FFFD'; >>> + >>> +char l = 'Ø'; // expected-error {{character too large for enclosing >>> character literal type}} >>> +char m = '👿'; // expected-error {{character too large for enclosing >>> character literal type}} >>> + >>> +char32_t n = U'ab'; // expected-error {{Unicode character literals may not >>> contain multiple characters}} >>> +char16_t o = '👽'; // expected-error {{character too large for enclosing >>> character literal type}} >>> >>> Modified: cfe/trunk/test/Lexer/constants.c >>> URL: >>> http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Lexer/constants.c?rev=148392&r1=148391&r2=148392&view=diff >>> ============================================================================== >>> --- cfe/trunk/test/Lexer/constants.c (original) >>> +++ cfe/trunk/test/Lexer/constants.c Wed Jan 18 06:27:10 2012 >>> @@ -66,4 +66,4 @@ >>> // PR7888 >>> double g = 1e100000000; // expected-warning {{too large}} >>> >>> -char h = '\u1234'; // expected-warning {{character unicode escape sequence >>> too long for its type}} >>> +char h = '\u1234'; // expected-error {{character too large for enclosing >>> character literal type}} >>> >>> Modified: cfe/trunk/test/Lexer/utf8-char-literal.cpp >>> URL: >>> http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Lexer/utf8-char-literal.cpp?rev=148392&r1=148391&r2=148392&view=diff >>> ============================================================================== >>> --- cfe/trunk/test/Lexer/utf8-char-literal.cpp (original) >>> +++ cfe/trunk/test/Lexer/utf8-char-literal.cpp Wed Jan 18 06:27:10 2012 >>> @@ -1,4 +1,5 @@ >>> // RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c++11 -fsyntax-only >>> -verify %s >>> >>> -int array0[u'ñ' == u'\xf1'? 1 : -1]; >>> -int array1['ñ' != u'\xf1'? 1 : -1]; >>> +int array0[u'ñ' == u'\xf1'? 1 : -1]; >>> +int array1['\xF1' != u'\xf1'? 1 : -1]; >>> +int array1['ñ' != u'\xf1'? 1 : -1]; // expected-error {{character too >>> large for enclosing character literal type}} >>> >>> Modified: cfe/trunk/test/Lexer/wchar.c >>> URL: >>> http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Lexer/wchar.c?rev=148392&r1=148391&r2=148392&view=diff >>> ============================================================================== >>> --- cfe/trunk/test/Lexer/wchar.c (original) >>> +++ cfe/trunk/test/Lexer/wchar.c Wed Jan 18 06:27:10 2012 >>> @@ -1,9 +1,9 @@ >>> // RUN: %clang_cc1 -fsyntax-only -fshort-wchar -verify %s >>> >>> void f() { >>> - (void)L"\U00010000"; // expected-warning {{character unicode escape >>> sequence too long for its type}} >>> + (void)L"\U00010000"; // unicode escape produces UTF-16 sequence, so no >>> warning >>> >>> - (void)L'\U00010000'; // expected-warning {{character unicode escape >>> sequence too long for its type}} >>> + (void)L'\U00010000'; // expected-error {{character too large for >>> enclosing character literal type}} >>> >>> (void)L'ab'; // expected-warning {{extraneous characters in character >>> constant ignored}} >>> >>> >>> >>> _______________________________________________ >>> cfe-commits mailing list >>> [email protected] >>> http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits _______________________________________________ cfe-commits mailing list [email protected] http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
