GunChleoc has proposed merging lp:~widelands-dev/widelands/japanese into lp:widelands.
Requested reviews: Widelands Developers (widelands-dev) Related bugs: Bug #1311698 in widelands: "Non-spacing sentences could not be break automatically" https://bugs.launchpad.net/widelands/+bug/1311698 For more details, see: https://code.launchpad.net/~widelands-dev/widelands/japanese/+merge/273419 Implemented line wrapping for Japanese. Some characters block having a new line, so I group them into vector entries. Testing can be done by removing the \n characters from win_conditions/ja.po and looking at the tooltips - rather than a crash or an endless line, we get properly wrapped lines. -- Your team Widelands Developers is requested to review the proposed merge of lp:~widelands-dev/widelands/japanese into lp:widelands.
=== modified file 'src/graphic/text/bidi.cc' --- src/graphic/text/bidi.cc 2015-09-28 06:41:58 +0000 +++ src/graphic/text/bidi.cc 2015-10-05 15:03:22 +0000 @@ -22,7 +22,6 @@ #include <map> #include <string> -#include <unicode/uchar.h> #include <unicode/unistr.h> #include <unicode/utypes.h> @@ -32,6 +31,139 @@ // TODO(GunChleoc): Have a look at the ICU API to see which helper functions can be gained from there. // TODO(GunChleoc): Arabic: Turn this into a proper class +// http://www.w3.org/TR/jlreq/#characters_not_starting_a_line +const std::set<UChar> kCannottStartLineJapanese = { + {0x2019}, // RIGHT SINGLE QUOTATION MARK + {0x201D}, // RIGHT DOUBLE QUOTATION MARK + {0x0029}, // RIGHT PARENTHESIS + {0x3015}, // RIGHT TORTOISE SHELL BRACKET + {0x005D}, // RIGHT SQUARE BRACKET + {0x007D}, // RIGHT CURLY BRACKET + {0x3009}, // RIGHT ANGLE BRACKET + {0x300B}, // RIGHT DOUBLE ANGLE BRACKET + {0x300D}, // RIGHT CORNER BRACKET + {0x300F}, // RIGHT WHITE CORNER BRACKET + {0x3011}, // RIGHT BLACK LENTICULAR BRACKET + {0x2986}, // RIGHT WHITE PARENTHESIS + {0x3019}, // RIGHT WHITE TORTOISE SHELL BRACKET + {0x3017}, // RIGHT WHITE LENTICULAR BRACKET + {0xFF09}, // Fullwidth Right Parenthesis + {0x00BB}, // RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + {0x301F}, // LOW DOUBLE PRIME QUOTATION MARK + {0x2010}, // HYPHEN + {0x301C}, // WAVE DASH + {0x30A0}, // KATAKANA-HIRAGANA DOUBLE HYPHEN + {0x2013}, // EN DASH + {0x0021}, // EXCLAMATION MARK + {0x003F}, // QUESTION MARK + {0x203C}, // DOUBLE EXCLAMATION MARK + {0x2047}, // DOUBLE QUESTION MARK + {0x2048}, // QUESTION EXCLAMATION MARK + {0x2049}, // EXCLAMATION QUESTION MARK + {0x30FB}, // KATAKANA MIDDLE DOT + {0x003A}, // COLON + {0x003B}, // SEMICOLON + {0x3002}, // IDEOGRAPHIC FULL STOP + {0x002E}, // FULL STOP + {0x3001}, // IDEOGRAPHIC COMMA + {0x002C}, // COMMA + {0x30FD}, // KATAKANA ITERATION MARK + {0x30FE}, // KATAKANA VOICED ITERATION MARK + {0x309D}, // HIRAGANA ITERATION MARK + {0x309E}, // HIRAGANA VOICED ITERATION MARK + {0x3005}, // IDEOGRAPHIC ITERATION MARK + {0x303B}, // VERTICAL IDEOGRAPHIC ITERATION MARK + {0x30FC}, // KATAKANA-HIRAGANA PROLONGED SOUND MARK + {0x3041}, // HIRAGANA LETTER SMALL A + {0x3043}, // HIRAGANA LETTER SMALL I + {0x3045}, // HIRAGANA LETTER SMALL U + {0x3047}, // HIRAGANA LETTER SMALL E + {0x3049}, // HIRAGANA LETTER SMALL O + {0x30A1}, // KATAKANA LETTER SMALL A + {0x30A3}, // KATAKANA LETTER SMALL I + {0x30A5}, // KATAKANA LETTER SMALL U + {0x30A7}, // KATAKANA LETTER SMALL E + {0x30A9}, // KATAKANA LETTER SMALL O + {0x3063}, // HIRAGANA LETTER SMALL TU + {0x3083}, // HIRAGANA LETTER SMALL YA + {0x3085}, // HIRAGANA LETTER SMALL YU + {0x3087}, // HIRAGANA LETTER SMALL YO + {0x308E}, // HIRAGANA LETTER SMALL WA + {0x3095}, // HIRAGANA LETTER SMALL KA + {0x3096}, // HIRAGANA LETTER SMALL KE + {0x30C3}, // KATAKANA LETTER SMALL TU + {0x30E3}, // KATAKANA LETTER SMALL YA + {0x30E5}, // KATAKANA LETTER SMALL YU + {0x30E7}, // KATAKANA LETTER SMALL YO + {0x30EE}, // KATAKANA LETTER SMALL WA + {0x30F5}, // KATAKANA LETTER SMALL KA + {0x30F6}, // KATAKANA LETTER SMALL KE + {0x31F0}, // KATAKANA LETTER SMALL KU + {0x31F1}, // KATAKANA LETTER SMALL SI + {0x31F2}, // KATAKANA LETTER SMALL SU + {0x31F3}, // KATAKANA LETTER SMALL TO + {0x31F4}, // KATAKANA LETTER SMALL NU + {0x31F5}, // KATAKANA LETTER SMALL HA + {0x31F6}, // KATAKANA LETTER SMALL HI + {0x31F7}, // KATAKANA LETTER SMALL HU + {0x31F8}, // KATAKANA LETTER SMALL HE + {0x31F9}, // KATAKANA LETTER SMALL HO + {0x31FA}, // KATAKANA LETTER SMALL MU + {0x31FB}, // KATAKANA LETTER SMALL RA + {0x31FC}, // KATAKANA LETTER SMALL RI + {0x31FD}, // KATAKANA LETTER SMALL RU + {0x31FE}, // KATAKANA LETTER SMALL RE + {0x31FF}, // KATAKANA LETTER SMALL RO +}; + +// http://www.w3.org/TR/jlreq/#characters_not_ending_a_line +const std::set<UChar> kCannotEndLineJapanese = { + {0x2018}, // LEFT SINGLE QUOTATION MARK + {0x201C}, // LEFT DOUBLE QUOTATION MARK + {0x0028}, // LEFT PARENTHESIS + {0x3014}, // LEFT TORTOISE SHELL BRACKET + {0x005B}, // LEFT SQUARE BRACKET + {0x007B}, // LEFT CURLY BRACKET + {0x3008}, // LEFT ANGLE BRACKET + {0x300A}, // LEFT DOUBLE ANGLE BRACKET + {0x300C}, // LEFT CORNER BRACKET + {0x300E}, // LEFT WHITE CORNER BRACKET + {0x3010}, // LEFT BLACK LENTICULAR BRACKET + {0x2985}, // LEFT WHITE PARENTHESIS + {0x3018}, // LEFT WHITE TORTOISE SHELL BRACKET + {0x3016}, // LEFT WHITE LENTICULAR BRACKET + {0xFF08}, // Fullwidth Left Parenthesis + {0x00AB}, // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + {0x301D}, // REVERSED DOUBLE PRIME QUOTATION MARK +}; + + +// http://unicode.org/faq/blocks_ranges.html +// http://unicode-table.com/en/blocks/ +const std::set<UBlockCode> kCJKCodeBlocks = { + { + UBlockCode::UBLOCK_CJK_COMPATIBILITY, + UBlockCode::UBLOCK_CJK_COMPATIBILITY_FORMS, + UBlockCode::UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS, + UBlockCode::UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, + UBlockCode::UBLOCK_CJK_RADICALS_SUPPLEMENT, + UBlockCode::UBLOCK_CJK_STROKES, + UBlockCode::UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION, + UBlockCode::UBLOCK_CJK_UNIFIED_IDEOGRAPHS, + UBlockCode::UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A, + UBlockCode::UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, + UBlockCode::UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C, + UBlockCode::UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D, + UBlockCode::UBLOCK_HIRAGANA, + UBlockCode::UBLOCK_KATAKANA, + }, +}; + +bool is_cjk_character(UChar32 c) { + return kCJKCodeBlocks.count(ublock_getCode(c)) == 1; +} + + // Need to mirror () etc. for LTR languages, so we're sticking them in a map. const std::map<UChar, UChar> kSymmetricChars = { {0x0028, 0x0029}, // () @@ -378,7 +510,7 @@ } -// True if a string does not contain Latin characters +// True if a string contains a character from an Arabic code block bool has_arabic_character(const char* input) { bool result = false; const icu::UnicodeString parseme(input); @@ -590,4 +722,46 @@ return result; } +// True if a string contains a character from a CJK code block +bool has_cjk_character(const char* input) { + bool result = false; + const icu::UnicodeString parseme(input); + for (int32_t i = 0; i < parseme.length(); ++i) { + if (is_cjk_character(parseme.char32At(i))) { + result = true; + break; + } + } + return result; +} + +// Split a string of CJK characters into units that can have line breaks between them. +std::vector<std::string> split_cjk_word(const char* input) { + const icu::UnicodeString parseme(input); + std::vector<std::string> result; + for (int i = 0; i < parseme.length(); ++i) { + icu::UnicodeString temp; + UChar c = parseme.charAt(i); + temp += c; + if (i < parseme.length() - 1) { + UChar next = parseme.charAt(i + 1); + if (cannot_end_line(c) || cannot_start_line(next)) { + temp += next; + ++i; + } + } + std::string temp2; + result.push_back(temp.toUTF8String(temp2)); + } + return result; +} + +bool cannot_start_line(const UChar& c) { + return kCannottStartLineJapanese.count(c) == 1; +} + +bool cannot_end_line(const UChar& c) { + return kCannotEndLineJapanese.count(c) == 1; +} + } // namespace UI === modified file 'src/graphic/text/bidi.h' --- src/graphic/text/bidi.h 2015-09-26 09:34:20 +0000 +++ src/graphic/text/bidi.h 2015-10-05 15:03:22 +0000 @@ -23,14 +23,20 @@ #include <string> #include <vector> +#include <unicode/uchar.h> + #include "graphic/text/font_set.h" // BiDi support for RTL languages namespace i18n { std::string make_ligatures(const char* input); std::string line2bidi(const char* input); + std::vector<std::string> split_cjk_word(const char* input); bool has_rtl_character(const char* input); bool has_rtl_character(std::vector<std::string> input); + bool has_cjk_character(const char* input); + bool cannot_start_line(const UChar& c); + bool cannot_end_line(const UChar& c); } // namespace UI === modified file 'src/graphic/text/rt_render.cc' --- src/graphic/text/rt_render.cc 2015-09-26 18:04:24 +0000 +++ src/graphic/text/rt_render.cc 2015-10-05 15:03:22 +0000 @@ -754,7 +754,15 @@ } word = ts.till_any_or_end(" \t\n\r"); if (!word.empty()) { - nodes.push_back(new TextNode(font_cache_.get_font(&ns), ns, i18n::make_ligatures(word.c_str()))); + word = i18n::make_ligatures(word.c_str()); + if (i18n::has_cjk_character(word.c_str())) { + std::vector<std::string> units = i18n::split_cjk_word(word.c_str()); + for (const std::string& unit: units) { + nodes.push_back(new TextNode(font_cache_.get_font(&ns), ns, unit)); + } + } else { + nodes.push_back(new TextNode(font_cache_.get_font(&ns), ns, word)); + } } } }
_______________________________________________ Mailing list: https://launchpad.net/~widelands-dev Post to : [email protected] Unsubscribe : https://launchpad.net/~widelands-dev More help : https://help.launchpad.net/ListHelp

