On 7/20/19 5:41 PM, Troy A. Griffitts wrote: > > Michael, I would be interested to hear your logic for internationalizing a > numeric parser. I always assumed it wasn't as simply as providing alternate > symbol codes for 0-9. Think Roman Numerals (SWORD already supports Roman > Numerals). If it was as simply as swapping out, say 47 with an alternate > symbol for '4' and an alternate symbol for '7', it wouldn't be much of a > problem to solve, but I suspect other languages do things like: XLVII. > Digit swapping works for most languages, but not for Ethiopic, Roman, and Coptic. There may be others that I haven't encountered, yet. LGPL C# code that I wrote follows for going from Hindu-Arabic (0-9, like you are most used to) to a variety of number systems:
// The following linguistically diverse digit sets might not display correctly unless // you have appropriate Unicode fonts installed. public const string ArabicDigits = "٠١٢٣٤٥٦٧٨٩"; public const string BengaliDigits = "০১২৩৪৫৬৭৮৯"; public const string ChineseSimplifiedDigits = "〇一二三四五六七八九"; public const string ChineseTraditionalDigits = "零壹貳參肆伍陸柒捌玖"; public const string ChineseHuaMaDigits = "〇〡〢〣〤〥〦〧〨〩"; public const string CopticUnits = "\u2c81\u0305\u2C83\u0305\u2C85\u0305\u2C87\u0305\u2C89\u0305\u2C8B\u0305\u2C8D\u0305\u2C8F\u0305\u2C91\u0305"; // First 9 coptic letters with single overbar public const string CopticTens = "\u2C93\u0305\u2C95\u0305\u2C97\u0305\u2C99\u0305\u2C9B\u0305\u2C9D\u0305\u2C9F\u0305\u2CA1\u0305\u03E5\u0305"; // 10th thru 18th coptic letters with single overbar public const string CopticHundreds = "\u2CA3\u0305\u2CA5\u0305\u2CA7\u0305\u2CA9\u0305\u2CAB\u0305\u2CAD\u0305\u2CAF\u0305\u2CB1\u0305\u2CB3\u0305\u2CB5\u0305"; // 19th thru 27th coptic letters with single overbar public const string CopticThousands = "\u2c81\u033F\u2C83\u033F\u2C85\u033F\u2C87\u033F\u2C89\u033F\u2C8B\u033F\u2C8D\u033F\u2C8F\u033F\u2C91\u033F"; // Thousands are the same as units, but double overbar. public const string CopticTenThousands = "\u2C93\u033F\u2C95\u033F\u2C97\u033F\u2C99\u033F\u2C9B\u033F\u2C9D\u033F\u2C9F\u033F\u2CA1\u033F\u03E5\u033F"; // Pattern continues: one bar added per period (10^3) public const string DevangariDigits = "०१२३४५६७८९"; public const string EthiopicDigits = " ፩፪፫፬፭፮፯፰፱"; public const string EthiopicTens = " ፲፳፴፵፶፷፸፹፺"; public const string EthiopicHundred = "፻"; public const string EthiopicTenThousand = "፼"; public const string GugaratiDigits = "૦૧૨૩૪૫૬૭૮૯"; public const string GurmukhiDigits = "੦੧੨੩੪੫੬੭੮੯"; public const string KannadaDigits = "೦೧೨೩೪೫೬೭೮೯"; public const string KhmerDigits = "០១២៣៤៥៦៧៨៩"; public const string LaoDigits = "໐໑໒໓໔໕໖໗໘໙"; public const string LimbuDigits = "᥆᥇᥈᥉᥊᥋᥌᥍᥎᥏"; public const string MalayalamDigits = "൦൧൨൩൪൫൬൭൮൯"; public const string MongolianDigits = "᠐᠑᠒᠓᠔᠕᠖᠗᠘᠙"; public const string BurmeseDigits = "၀၁၂၃၄၅၆၇၈၉"; public const string OriyaDigits = "୦୧୨୩୪୫୬୭୮୯"; public const string PersianDigits = "۰۱۲۳۴۵۶۷۸۹"; // Same as Urdu digits public const string TamilDigits = "௦௧௨௩௪௫௬௭௮௯"; public const string TeluguDigits = "౦౧౨౩౪౫౬౭౮౯"; public const string ThaiDigits = "๐๑๒๓๔๕๖๗๘๙"; public const string TibetanDigits = "༠༡༢༣༤༥༦༧༨༩"; public const string UrduDigits = "۰۱۲۳۴۵۶۷۸۹"; public const string RomanDigits = " ⅠⅡⅢⅣⅤⅥⅦⅧⅨ"; protected static string CurrentDigits = String.Empty; public static string NumberSample() { if (!String.IsNullOrEmpty(CurrentDigits)) return CurrentDigits; else return "0123456789"; } /// <summary> /// true iff we are changing digits to an alternate writing system /// </summary> public static bool LocalizingDigits { get { return CurrentDigits != String.Empty; } } /// <summary> /// Set the locale for localizing digits for display in Bibles for verse numbers, etc. /// </summary> /// <param name="digitPlace">string with one of the exact names of supported digit sets</param> /// <returns>the set string if successful, or "Default" otherwise</returns> public static string SetDigitLocale(string digitPlace) { switch (digitPlace) { case "Arabic": CurrentDigits = ArabicDigits; break; case "Bengali": CurrentDigits = BengaliDigits; break; case "Burmese (Myanmar)": CurrentDigits = BurmeseDigits; break; case "Chinese (Simplified)": CurrentDigits = ChineseSimplifiedDigits; break; case "Chinese (Traditional)": CurrentDigits = ChineseTraditionalDigits; break; case "Chinese (hua ma)": CurrentDigits = ChineseHuaMaDigits; break; case "Coptic": CurrentDigits = CopticUnits; break; case "Devangari": CurrentDigits = DevangariDigits; break; case "Ethiopic (Ge'ez)": CurrentDigits = EthiopicDigits; break; case "Gujarati": CurrentDigits = GugaratiDigits; break; case "Gurmukhi": CurrentDigits = GurmukhiDigits; break; case "Kannada": CurrentDigits = KannadaDigits; break; case "Khmer": CurrentDigits = KhmerDigits; break; case "Lao": CurrentDigits = LaoDigits; break; case "Limbu": CurrentDigits = LimbuDigits; break; case "Malayalam": CurrentDigits = MalayalamDigits; break; case "Mongolian": CurrentDigits = MongolianDigits; break; case "Oriya": CurrentDigits = OriyaDigits; break; case "Roman": CurrentDigits = RomanDigits; break; case "Tamil": CurrentDigits = TamilDigits; break; case "Telugu": CurrentDigits = TeluguDigits; break; case "Thai": CurrentDigits = ThaiDigits; break; case "Tibetan": CurrentDigits = TibetanDigits; break; case "Persian": case "Urdu": CurrentDigits = UrduDigits; break; case "Hindu-Arabic": case "Default": default: CurrentDigits = String.Empty; digitPlace = "Default"; break; } return digitPlace; } /// <summary> /// Replaces all numbers with appropriate numbers in the current writing system /// </summary> /// <param name="s">string that might include numbers</param> /// <returns>string with numbers localized</returns> public static string LocalizeDigits(string s) { return ReplaceDigits(s, CurrentDigits); } /// <summary> /// Some writing systems just have exact equivalents for 0 through 9 and the same place values. /// Those are easy, with a simple digit-for-digit substitution. Others require some logic beyond /// that. /// </summary> /// <param name="s">String that may have digits to localize</param> /// <param name="newDigits">one of the supported digit strings</param> /// <returns></returns> public static string ReplaceDigits(string s, string newDigits) { // TODO: implement logic for the different Chinese numeral systems, which require more than simple digit substitution, and which have many dialect and usage options. if ((newDigits == null) || (newDigits.Length < 10)) { // Nothing to do; no conversion specified return s; } if (newDigits == EthiopicDigits) { // Gotta count differently return EthiopicNumerals(s); } else if (newDigits == RomanDigits) { // Seriously old school return RomanNumerals(s); } else if (newDigits == CopticUnits) { // Older than Roman Numerals, but not the same as Heirogliphic numbers or ancient Egyptian numbers return CopticNumerals(s); } else { // Simple digit substitution with normal place values StringBuilder sb = new StringBuilder(); int n; foreach (char c in s) { n = ((int)c) - ((int)'0'); if ((n >= 0) && (n <= 9)) { sb.Append(newDigits[n]); } else { sb.Append(c); } } return sb.ToString(); } } /// <summary> /// If the input ch is a localized digit in the string localDigits, return a digit in the range '0'-'9', /// otherwise return the input character. /// </summary> /// <param name="ch">Possible localized digit</param> /// <param name="localDigits">String of 0-9 in local digits.</param> /// <returns>Standardized digit or input character</returns> public static char StandardDigit(char ch) { char result = ch; int i = CurrentDigits.IndexOf(ch); if (i >= 0) result = (char)(i + (int)'0'); return result; } /// <summary> /// Coptic numbers have no 0, but have different symbols for units, tens, hundreds, thousands, etc. /// </summary> /// <param name="s">Digits to convert less than or equal to 999</param> /// <returns>String with coptic numerals</returns> public static string CopticNumerals(string s) { StringBuilder sb = new StringBuilder(); int i, n; int place = 0; for (i = s.Length - 1; i >= 0; i--) { if (Char.IsDigit(s[i])) { if (s[i] == '0') { place++; } else { n = 2 * (((int)s[i]) - ((int)'1')); if (place == 0) { sb.Insert(0, CopticUnits[n + 1]); // Letter sb.Insert(0, CopticUnits[n]); // Combining overbar place++; } else if (place == 1) { sb.Insert(0, CopticTens[n + 1]); sb.Insert(0, CopticTens[n]); place++; } else if (place == 2) { sb.Insert(0, CopticHundreds[n + 1]); sb.Insert(0, CopticHundreds[n]); place++; } else if (place == 3) { sb.Insert(0, CopticThousands[n + 1]); sb.Insert(0, CopticThousands[n]); place++; } else if (place == 4) { sb.Insert(0, CopticTenThousands[n + 1]); sb.Insert(0, CopticTenThousands[n]); place++; } else if (place >= 5) { return s; // Give up and fail gracefully for numbers bigger than we designed for. } } } else { // Non-digit: just copy it. place = 0; sb.Insert(0, s[i]); } } return sb.ToString(); } /// <summary> /// Writing big numbers without a 0 is different. /// </summary> /// <param name="s">String with numbers to localize to Ethiopic</param> /// <returns>String with numbers localized to Ethiopic Ge'ez.</returns> public static string EthiopicNumerals(string s) { StringBuilder sb = new StringBuilder(); int i, n; int place = 0; for (i = s.Length - 1; i >= 0; i--) { if (Char.IsDigit(s[i])) { n = ((int)s[i]) - ((int)'0'); if (place == 0) { if (n > 0) { sb.Insert(0, EthiopicDigits[n]); } place++; } else if (place == 1) { if (n > 0) { sb.Insert(0, EthiopicTens[n]); } place++; } else if (place == 2) { sb.Insert(0, EthiopicHundred); if (n > 0) { sb.Insert(0, EthiopicDigits[n]); } place++; } else if (place == 3) { if (n > 0) { sb.Insert(0, EthiopicTens[n]); } place++; } else if (place == 4) { sb.Insert(0, EthiopicTenThousand); if (n > 0) { sb.Insert(0, EthiopicDigits[n]); } place++; } else if (place == 5) { if (n > 0) { sb.Insert(0, EthiopicDigits[n]); } place = 0; } } else { place = 0; sb.Insert(0, s[i]); } } return sb.ToString(); } /// <summary> /// Roman numerals kind of break down after 3,000 (MMM) in terms of common use, /// which seems to be limited to chapters and years in this decade. This function /// uses Unicode Roman numerals, but an alternate routine could easily be created /// that uses plain letters I, V, X, L, C, and M or i, v, x, l, c, and m by replacing /// or providing choices for the strings in RomanUnits in this method. /// </summary> /// <param name="s">string that may have numbers to "Romanize"</param> /// <returns>string with numbers as Roman numerals</returns> public static string RomanNumerals(string s) { string[,] RomanUnits = {{"","Ⅰ","Ⅱ","Ⅲ","Ⅳ","Ⅴ","Ⅵ","Ⅶ","Ⅷ","Ⅸ"}, {"", "Ⅹ","ⅩⅩ","ⅩⅩⅩ","ⅩⅬ","Ⅼ","ⅬⅩ","ⅬⅩⅩ","ⅬⅩⅩⅩ","ⅩⅭ"}, {"", "Ⅽ", "ⅭⅭ", "ⅭⅭⅭ", "ⅭⅮ", "Ⅾ", "ⅮⅭ", "ⅮⅭⅭ", "ⅮⅭⅭⅭ", "ⅩⅯ"}, { "", "Ⅿ", "ⅯⅯ", "ⅯⅯⅯ", "ⅯV̅", "V̅", "V̅Ⅿ", "V̅ⅯⅯ", "V̅ⅯⅯⅯ", "ⅯX̅̅"}}; StringBuilder sb = new StringBuilder(); int i, n; int place = 0; for (i = s.Length - 1; i >= 0; i--) { if (Char.IsDigit(s[i])) { n = ((int)s[i]) - ((int)'0'); sb.Insert(0, RomanUnits[place, n]); place++; if (place > 3) place = 0; } else { place = 0; sb.Insert(0, s[i]); } } sb.Replace("ⅩⅡ", "Ⅻ"); sb.Replace("ⅩⅠ", "Ⅺ"); return sb.ToString(); } > On 7/20/19 1:49 PM, Michael Johnson wrote: >> It is an important question. I have a way to handle it in other formats >> using Haiola. I suspect that proper handling it in SWORD will take a design >> change. In Haiola, the source chapter and verse numbers are always as in >> English. I can select display chapter and verse numbers from many options. >> >> Aloha, >> Michael >> http://mpj.us >> >> >> On Jul 20, 2019, at 10:39, Cyrille <lafricai...@gmail.com >> <mailto:lafricai...@gmail.com>> wrote: >> >>> Hello, >>> No body else have a proposition for this question? Should I open a bug >>> report? >>> Is it not an important question? >>> >>> Best regards, Br Cyrille >>> >>> Le 17/07/2019 à 23:14, David Haslam a écrit : >>>> Several other non-Roman scripts have their own digit characters >>>> corresponding to our 0-9. >>>> >>>> IMHO the possibilities for using non-Roman digits ought to be facilitated >>>> in the back-end. >>>> >>>> Even so, each front-end would then require a new UI control to select >>>> which script should be used to display the chapter and verse numbers. >>>> >>>> An alternate idea would be to specify the non-Roman digits as a ten >>>> character UTF-8 string in a module .conf file. >>>> >>>> Adapting the back-end to use this module specific configuration key might >>>> be much simpler. >>>> >>>> Front-ends would still require adapting for the UI features that require >>>> chapter and verse numbers to be input or displayed or adjusted by >>>> controls. >>>> >>>> Best regards, >>>> >>>> David >>>> >>>> Sent from ProtonMail Mobile >>>> >>>> >>>> On Wed, Jul 17, 2019 at 22:00, Cyrille <lafricai...@gmail.com >>>> <mailto:lafricai...@gmail.com>> wrote: >>>>> Hello, >>>>> I'm still working on a modern NT-Ps-Pr translation in Burmese. My friends >>>>> from Myanmar send me the text. But they don't use the arab numbers, they >>>>> hava their own numbers. >>>>> It could be very important for them to write in they own numbers (If I >>>>> had tu use their I will be lost ;) ). >>>>> Is it possible to add this possibility to the frontend, or this should be >>>>> in sword directly? >>>>> Need I to open a new issue on the bug tracker? >>>>> >>>>> Example of text, in bold the chapter and verses: >>>>> >>>>> ၃ ၁။ ထိုနေ့ရက်တို့၌ ယောဟန်ဘတ္တိဇံသည် ရောက်လာ၍ ဂျူဒေးယပြည်၊ တောကန္တာရတွင် >>>>> ဟော >>>>> >>>>> *၂။* ပြောသည်မှာ၊- နောင်တရကြလော့၊ အကြောင်းမူကား ကောင်းကင်နိုင်ငံတော်သည် >>>>> ရောက်လုနီးပြီဟူ၍တည်း။- >>>>> >>>>> *၃။* ပရောဖက်အီဇာယဟောထားခဲ့သည့်အတိုင်း၊ ထာ၀ရ >>>>> ဘုရားသခင်ကြွလာတော်မူမည့်လမ်းကိုပြင်ဆင်ကြ လော့၊ ကိုယ်တော်၏ လမ်းများကို >>>>> ဖြောင့်တန်းစေကြ လော့ဟူ၍ တောကန္တာရ၌ ကြွေးကြော်သောသူ၏အသံသည်ကား >>>>> ဤသူပင်ဖြစ်သတည်း။- >>>>> >>>>> *၄။* ယောဟန်သည် ကုလားအုတ်မွေးဖြင့် ရက်လုပ်သောအဝတ်ကိုဝတ်ဆင်ကာ ခါး၌ >>>>> သားရေခါးစည်းကြိုးကိုစည်းထား၏။ သူ၏အစာသည်ကား ကျိုင်းကောင်နှင့် >>>>> တောပျားရည်တို့သာဖြစ်၏။- >>>>> >>>>> *၅။* ထိုအခါ ဂျေရုဆလင်မြို့မှစ၍ ဂျူဒေးယနယ်တစ်နယ် လုံးနှင့် >>>>> ဂျော်ဒန်မြစ်တစ်လျှောက်ရှိဒေသမှ လူအပေါင်း တို့သည် သူ့ထံသို့ လာကြ၏။- >>>>> >>>>> ၆။ ထိုသူတို့သည် မိမိတို့၏ အပြစ်များကို ထုတ်ဖော်ဝန်ခံကြလျက် >>>>> ဂျော်ဒန်မြစ်တွင် သူ့အားဖြင့် ဆေးကြောခြင်းကို ခံယူကြ၏။ >>>>> >>>>> >>>>> >>>> >>>> >>>> >>>> _______________________________________________ >>>> sword-devel mailing list: sword-devel@crosswire.org >>>> http://www.crosswire.org/mailman/listinfo/sword-devel >>>> Instructions to unsubscribe/change your settings at above page >>> >>> _______________________________________________ >>> sword-devel mailing list: sword-devel@crosswire.org >>> <mailto:sword-devel@crosswire.org> >>> http://www.crosswire.org/mailman/listinfo/sword-devel >>> Instructions to unsubscribe/change your settings at above page >> >> _______________________________________________ >> sword-devel mailing list: sword-devel@crosswire.org >> http://www.crosswire.org/mailman/listinfo/sword-devel >> Instructions to unsubscribe/change your settings at above page > > _______________________________________________ > sword-devel mailing list: sword-devel@crosswire.org > http://www.crosswire.org/mailman/listinfo/sword-devel > Instructions to unsubscribe/change your settings at above page -- signature Aloha, */Michael Johnson/** PO BOX 881143 • PUKALANI HI 96788-1143*• USA mljohnson.org <http://mljohnson.org> • Phone: +1 808-333-6921 • Skype: kahunapule _______________________________________________ sword-devel mailing list: sword-devel@crosswire.org http://www.crosswire.org/mailman/listinfo/sword-devel Instructions to unsubscribe/change your settings at above page