On 7/20/19 5:41 PM, Troy A. Griffitts wrote:
>
> Michael, I would be interested to hear your logic for internationalizing a 
> numeric parser.  I always assumed it wasn't as simply as providing alternate 
> symbol codes for 0-9.  Think Roman Numerals (SWORD already supports Roman 
> Numerals).  If it was as simply as swapping out, say 47 with an alternate 
> symbol for '4' and an alternate symbol for '7', it wouldn't be much of a 
> problem to solve, but I suspect other languages do things like: XLVII.
>
Digit swapping works for most languages, but not for Ethiopic, Roman, and 
Coptic. There may be others that I haven't encountered, yet. LGPL C# code that 
I wrote follows for going from Hindu-Arabic (0-9, like you are most used to) to 
a variety of number systems:

        // The following linguistically diverse digit sets might not display 
correctly unless
        // you have appropriate Unicode fonts installed.
        public const string ArabicDigits = "٠١٢٣٤٥٦٧٨٩";
        public const string BengaliDigits = "০১২৩৪৫৬৭৮৯";
        public const string ChineseSimplifiedDigits = "〇一二三四五六七八九";
        public const string ChineseTraditionalDigits = "零壹貳參肆伍陸柒捌玖";
        public const string ChineseHuaMaDigits = "〇〡〢〣〤〥〦〧〨〩";
        public const string CopticUnits = 
"\u2c81\u0305\u2C83\u0305\u2C85\u0305\u2C87\u0305\u2C89\u0305\u2C8B\u0305\u2C8D\u0305\u2C8F\u0305\u2C91\u0305";
  // First 9 coptic letters with single overbar
        public const string CopticTens = 
"\u2C93\u0305\u2C95\u0305\u2C97\u0305\u2C99\u0305\u2C9B\u0305\u2C9D\u0305\u2C9F\u0305\u2CA1\u0305\u03E5\u0305";
   // 10th thru 18th coptic letters with single overbar
        public const string CopticHundreds = 
"\u2CA3\u0305\u2CA5\u0305\u2CA7\u0305\u2CA9\u0305\u2CAB\u0305\u2CAD\u0305\u2CAF\u0305\u2CB1\u0305\u2CB3\u0305\u2CB5\u0305";
  // 19th thru 27th coptic letters with single overbar
        public const string CopticThousands = 
"\u2c81\u033F\u2C83\u033F\u2C85\u033F\u2C87\u033F\u2C89\u033F\u2C8B\u033F\u2C8D\u033F\u2C8F\u033F\u2C91\u033F";
    //    Thousands are the same as units, but double overbar.
        public const string CopticTenThousands = 
"\u2C93\u033F\u2C95\u033F\u2C97\u033F\u2C99\u033F\u2C9B\u033F\u2C9D\u033F\u2C9F\u033F\u2CA1\u033F\u03E5\u033F";
    // Pattern continues: one bar added per period (10^3)
        public const string DevangariDigits = "०१२३४५६७८९";
        public const string EthiopicDigits = " ፩፪፫፬፭፮፯፰፱";
        public const string EthiopicTens = " ፲፳፴፵፶፷፸፹፺";
        public const string EthiopicHundred = "፻";
        public const string EthiopicTenThousand = "፼";
        public const string GugaratiDigits = "૦૧૨૩૪૫૬૭૮૯";
        public const string GurmukhiDigits = "੦੧੨੩੪੫੬੭੮੯";
        public const string KannadaDigits = "೦೧೨೩೪೫೬೭೮೯";
        public const string KhmerDigits = "០១២៣៤៥៦៧៨៩";
        public const string LaoDigits = "໐໑໒໓໔໕໖໗໘໙";
        public const string LimbuDigits = "᥆᥇᥈᥉᥊᥋᥌᥍᥎᥏";
        public const string MalayalamDigits = "൦൧൨൩൪൫൬൭൮൯";
        public const string MongolianDigits = "᠐᠑᠒᠓᠔᠕᠖᠗᠘᠙";
        public const string BurmeseDigits = "၀၁၂၃၄၅၆၇၈၉";
        public const string OriyaDigits = "୦୧୨୩୪୫୬୭୮୯";
        public const string PersianDigits = "۰۱۲۳۴۵۶۷۸۹";   // Same as Urdu 
digits
        public const string TamilDigits = "௦௧௨௩௪௫௬௭௮௯";
        public const string TeluguDigits = "౦౧౨౩౪౫౬౭౮౯";
        public const string ThaiDigits = "๐๑๒๓๔๕๖๗๘๙";
        public const string TibetanDigits = "༠༡༢༣༤༥༦༧༨༩";
        public const string UrduDigits = "۰۱۲۳۴۵۶۷۸۹";
        public const string RomanDigits = " ⅠⅡⅢⅣⅤⅥⅦⅧⅨ";
        protected static string CurrentDigits = String.Empty;

        public static string NumberSample()
        {
            if (!String.IsNullOrEmpty(CurrentDigits))
                return CurrentDigits;
            else
                return "0123456789";
        }

        /// <summary>
        /// true iff we are changing digits to an alternate writing system
        /// </summary>
        public static bool LocalizingDigits
        {
            get { return CurrentDigits != String.Empty; }
        }

        /// <summary>
        /// Set the locale for localizing digits for display in Bibles for 
verse numbers, etc.
        /// </summary>
        /// <param name="digitPlace">string with one of the exact names of 
supported digit sets</param>
        /// <returns>the set string if successful, or "Default" 
otherwise</returns>
        public static string SetDigitLocale(string digitPlace)
        {
            switch (digitPlace)
            {
                case "Arabic":
                    CurrentDigits = ArabicDigits;
                    break;
                case "Bengali":
                    CurrentDigits = BengaliDigits;
                    break;
                case "Burmese (Myanmar)":
                    CurrentDigits = BurmeseDigits;
                    break;
                case "Chinese (Simplified)":
                    CurrentDigits = ChineseSimplifiedDigits;
                    break;
                case "Chinese (Traditional)":
                    CurrentDigits = ChineseTraditionalDigits;
                    break;
                case "Chinese (hua ma)":
                    CurrentDigits = ChineseHuaMaDigits;
                    break;
                case "Coptic":
                    CurrentDigits = CopticUnits;
                    break;
                case "Devangari":
                    CurrentDigits = DevangariDigits;
                    break;
                case "Ethiopic (Ge'ez)":
                    CurrentDigits = EthiopicDigits;
                    break;
                case "Gujarati":
                    CurrentDigits = GugaratiDigits;
                    break;
                case "Gurmukhi":
                    CurrentDigits = GurmukhiDigits;
                    break;
                case "Kannada":
                    CurrentDigits = KannadaDigits;
                    break;
                case "Khmer":
                    CurrentDigits = KhmerDigits;
                    break;
                case "Lao":
                    CurrentDigits = LaoDigits;
                    break;
                case "Limbu":
                    CurrentDigits = LimbuDigits;
                    break;
                case "Malayalam":
                    CurrentDigits = MalayalamDigits;
                    break;
                case "Mongolian":
                    CurrentDigits = MongolianDigits;
                    break;
                case "Oriya":
                    CurrentDigits = OriyaDigits;
                    break;
                case "Roman":
                    CurrentDigits = RomanDigits;
                    break;
                case "Tamil":
                    CurrentDigits = TamilDigits;
                    break;
                case "Telugu":
                    CurrentDigits = TeluguDigits;
                    break;
                case "Thai":
                    CurrentDigits = ThaiDigits;
                    break;
                case "Tibetan":
                    CurrentDigits = TibetanDigits;
                    break;
                case "Persian":
                case "Urdu":
                    CurrentDigits = UrduDigits;
                    break;
                case "Hindu-Arabic":
                case "Default":
                default:
                    CurrentDigits = String.Empty;
                    digitPlace = "Default";
                    break;
            }
            return digitPlace;
        }

        /// <summary>
        /// Replaces all numbers with appropriate numbers in the current 
writing system
        /// </summary>
        /// <param name="s">string that might include numbers</param>
        /// <returns>string with numbers localized</returns>
        public static string LocalizeDigits(string s)
        {
            return ReplaceDigits(s, CurrentDigits);
        }

        /// <summary>
        /// Some writing systems just have exact equivalents for 0 through 9 
and the same place values.
        /// Those are easy, with a simple digit-for-digit substitution. Others 
require some logic beyond
        /// that.
        /// </summary>
        /// <param name="s">String that may have digits to localize</param>
        /// <param name="newDigits">one of the supported digit strings</param>
        /// <returns></returns>
        public static string ReplaceDigits(string s, string newDigits)
        {   // TODO: implement logic for the different Chinese numeral systems, 
which require more than simple digit substitution, and which have many dialect 
and usage options.
            if ((newDigits == null) || (newDigits.Length < 10))
            {   // Nothing to do; no conversion specified
                return s;
            }
            if (newDigits == EthiopicDigits)
            {   // Gotta count differently
                return EthiopicNumerals(s);
            }
            else if (newDigits == RomanDigits)
            {   // Seriously old school
                return RomanNumerals(s);
            }
            else if (newDigits == CopticUnits)
            {   // Older than Roman Numerals, but not the same as Heirogliphic 
numbers or ancient Egyptian numbers
                return CopticNumerals(s);
            }
            else
            {   // Simple digit substitution with normal place values
                StringBuilder sb = new StringBuilder();
                int n;
                foreach (char c in s)
                {
                    n = ((int)c) - ((int)'0');
                    if ((n >= 0) && (n <= 9))
                    {
                        sb.Append(newDigits[n]);
                    }
                    else
                    {
                        sb.Append(c);
                    }
                }
                return sb.ToString();
            }
        }

        /// <summary>
        /// If the input ch is a localized digit in the string localDigits, 
return a digit in the range '0'-'9',
        /// otherwise return the input character.
        /// </summary>
        /// <param name="ch">Possible localized digit</param>
        /// <param name="localDigits">String of 0-9 in local digits.</param>
        /// <returns>Standardized digit or input character</returns>
        public static char StandardDigit(char ch)
        {
            char result = ch;
            int i = CurrentDigits.IndexOf(ch);
            if (i >= 0)
                result = (char)(i + (int)'0');
            return result;
        }

        /// <summary>
        /// Coptic numbers have no 0, but have different symbols for units, 
tens, hundreds, thousands, etc.
        /// </summary>
        /// <param name="s">Digits to convert less than or equal to 999</param>
        /// <returns>String with coptic numerals</returns>
        public static string CopticNumerals(string s)
        {
            StringBuilder sb = new StringBuilder();
                        int i, n;
            int place = 0;
            for (i = s.Length - 1; i >= 0; i--)
            {
                if (Char.IsDigit(s[i]))
                {
                    if (s[i] == '0')
                    {
                        place++;
                    }
                    else
                    {
                        n = 2 * (((int)s[i]) - ((int)'1'));
                        if (place == 0)
                        {
                            sb.Insert(0, CopticUnits[n + 1]); // Letter
                            sb.Insert(0, CopticUnits[n]);   // Combining overbar
                            place++;
                        }
                        else if (place == 1)
                        {
                            sb.Insert(0, CopticTens[n + 1]);
                            sb.Insert(0, CopticTens[n]);
                            place++;
                        }
                        else if (place == 2)
                        {
                            sb.Insert(0, CopticHundreds[n + 1]);
                            sb.Insert(0, CopticHundreds[n]);
                            place++;
                        }
                        else if (place == 3)
                        {
                            sb.Insert(0, CopticThousands[n + 1]);
                            sb.Insert(0, CopticThousands[n]);
                            place++;
                        }
                        else if (place == 4)
                        {
                            sb.Insert(0, CopticTenThousands[n + 1]);
                            sb.Insert(0, CopticTenThousands[n]);
                            place++;
                        }
                        else if (place >= 5)
                        {
                            return s;   // Give up and fail gracefully for 
numbers bigger than we designed for.
                        }
                    }
                }
                else
                {   // Non-digit: just copy it.
                    place = 0;
                    sb.Insert(0, s[i]);
                }
            }
            return sb.ToString();
        }

        /// <summary>
        /// Writing big numbers without a 0 is different.
        /// </summary>
        /// <param name="s">String with numbers to localize to Ethiopic</param>
        /// <returns>String with numbers localized to Ethiopic Ge'ez.</returns>
        public static string EthiopicNumerals(string s)
        {
            StringBuilder sb = new StringBuilder();
            int i, n;
            int place = 0;
            for (i = s.Length - 1; i >= 0; i--)
            {
                if (Char.IsDigit(s[i]))
                {
                    n = ((int)s[i]) - ((int)'0');
                    if (place == 0)
                    {
                        if (n > 0)
                        {
                            sb.Insert(0, EthiopicDigits[n]);
                        }
                        place++;
                    }
                    else if (place == 1)
                    {
                        if (n > 0)
                        {
                            sb.Insert(0, EthiopicTens[n]);
                        }
                        place++;
                    }
                    else if (place == 2)
                    {
                        sb.Insert(0, EthiopicHundred);
                        if (n > 0)
                        {
                            sb.Insert(0, EthiopicDigits[n]);
                        }
                        place++;
                    }
                    else if (place == 3)
                    {
                        if (n > 0)
                        {
                            sb.Insert(0, EthiopicTens[n]);
                        }
                        place++;
                    }
                    else if (place == 4)
                    {
                        sb.Insert(0, EthiopicTenThousand);
                        if (n > 0)
                        {
                            sb.Insert(0, EthiopicDigits[n]);
                        }
                        place++;
                    }
                    else if (place == 5)
                    {
                        if (n > 0)
                        {
                            sb.Insert(0, EthiopicDigits[n]);
                        }
                        place = 0;
                    }
                }
                else
                {
                    place = 0;
                    sb.Insert(0, s[i]);
                }
            }
            return sb.ToString();
        }

        /// <summary>
        /// Roman numerals kind of break down after 3,000 (MMM) in terms of 
common use,
        /// which seems to be limited to chapters and years in this decade. 
This function
        /// uses Unicode Roman numerals, but an alternate routine could easily 
be created
        /// that uses plain letters I, V, X, L, C, and M or i, v, x, l, c, and 
m by replacing
        /// or providing choices for the strings in RomanUnits in this method.
        /// </summary>
        /// <param name="s">string that may have numbers to "Romanize"</param>
        /// <returns>string with numbers as Roman numerals</returns>
        public static string RomanNumerals(string s)
        {
            string[,] RomanUnits = {{"","Ⅰ","Ⅱ","Ⅲ","Ⅳ","Ⅴ","Ⅵ","Ⅶ","Ⅷ","Ⅸ"},
            {"", "Ⅹ","ⅩⅩ","ⅩⅩⅩ","ⅩⅬ","Ⅼ","ⅬⅩ","ⅬⅩⅩ","ⅬⅩⅩⅩ","ⅩⅭ"},
            {"", "Ⅽ", "ⅭⅭ", "ⅭⅭⅭ", "ⅭⅮ", "Ⅾ", "ⅮⅭ", "ⅮⅭⅭ", "ⅮⅭⅭⅭ", "ⅩⅯ"},
            { "", "Ⅿ", "ⅯⅯ", "ⅯⅯⅯ", "ⅯV̅", "V̅", "V̅Ⅿ", "V̅ⅯⅯ", "V̅ⅯⅯⅯ", 
"ⅯX̅̅"}};
            StringBuilder sb = new StringBuilder();
            int i, n;
            int place = 0;
            for (i = s.Length - 1; i >= 0; i--)
            {
                if (Char.IsDigit(s[i]))
                {
                    n = ((int)s[i]) - ((int)'0');
                    sb.Insert(0, RomanUnits[place, n]);
                    place++;
                    if (place > 3)
                        place = 0;
                }
                else
                {
                    place = 0;
                    sb.Insert(0, s[i]);
                }
            }
            sb.Replace("ⅩⅡ", "Ⅻ");
            sb.Replace("ⅩⅠ", "Ⅺ");
            return sb.ToString();
        }


> On 7/20/19 1:49 PM, Michael Johnson wrote:
>> It is an important question. I have a way to handle it in other formats 
>> using Haiola. I suspect that proper handling it in SWORD will take a design 
>> change. In Haiola, the source chapter and verse numbers are always as in 
>> English. I can select display chapter and verse numbers from many options.
>>
>> Aloha,
>> Michael
>> http://mpj.us
>>
>>
>> On Jul 20, 2019, at 10:39, Cyrille <lafricai...@gmail.com 
>> <mailto:lafricai...@gmail.com>> wrote:
>>
>>> Hello,
>>> No body else have a proposition for this question? Should I open a bug 
>>> report?
>>> Is it not an important question?
>>>
>>> Best regards, Br Cyrille
>>>
>>> Le 17/07/2019 à 23:14, David Haslam a écrit :
>>>> Several other non-Roman scripts have their own digit characters 
>>>> corresponding to our 0-9.
>>>>
>>>> IMHO the possibilities for using non-Roman digits ought to be facilitated 
>>>> in the back-end.
>>>>
>>>> Even so, each front-end would then require a new UI control to select 
>>>> which script should be used to display the chapter and verse numbers.
>>>>
>>>> An alternate idea would be to specify the non-Roman digits as a ten 
>>>> character UTF-8 string in a module .conf file.
>>>>
>>>> Adapting the back-end to use this module specific configuration key might 
>>>> be much simpler. 
>>>>
>>>> Front-ends would still require adapting for the UI features that require 
>>>> chapter and verse numbers to be input or displayed or adjusted by 
>>>> controls. 
>>>>
>>>> Best regards,
>>>>
>>>> David 
>>>>
>>>> Sent from ProtonMail Mobile
>>>>
>>>>
>>>> On Wed, Jul 17, 2019 at 22:00, Cyrille <lafricai...@gmail.com 
>>>> <mailto:lafricai...@gmail.com>> wrote:
>>>>> Hello,
>>>>> I'm still working on a modern NT-Ps-Pr translation in Burmese. My friends 
>>>>> from Myanmar send me the text. But they don't use the arab numbers, they 
>>>>> hava their own numbers.
>>>>> It could be very important for them to write in they own numbers (If I 
>>>>> had tu use their I will be lost ;) ).
>>>>> Is it possible to add this possibility to the frontend, or this should be 
>>>>> in sword directly?
>>>>> Need I to open a new issue on the bug tracker?
>>>>>
>>>>> Example of text, in bold the chapter and verses:
>>>>>
>>>>> ၃ ၁။ ထိုနေ့ရက်တို့၌ ယောဟန်ဘတ္တိဇံသည် ရောက်လာ၍ ဂျူဒေးယပြည်၊ တောကန္တာရတွင် 
>>>>> ဟော
>>>>>
>>>>> *၂။* ပြောသည်မှာ၊- နောင်တရကြလော့၊ အကြောင်းမူကား ကောင်းကင်နိုင်ငံတော်သည် 
>>>>> ရောက်လုနီးပြီဟူ၍တည်း။-
>>>>>
>>>>> *၃။* ပရောဖက်အီဇာယဟောထားခဲ့သည့်အတိုင်း၊ ထာ၀ရ 
>>>>> ဘုရားသခင်ကြွလာတော်မူမည့်လမ်းကိုပြင်ဆင်ကြ လော့၊ ကိုယ်တော်၏ လမ်းများကို 
>>>>> ဖြောင့်တန်းစေကြ လော့ဟူ၍ တောကန္တာရ၌ ကြွေးကြော်သောသူ၏အသံသည်ကား 
>>>>> ဤသူပင်ဖြစ်သတည်း။-
>>>>>
>>>>> *၄။* ယောဟန်သည် ကုလားအုတ်မွေးဖြင့် ရက်လုပ်သောအဝတ်ကိုဝတ်ဆင်ကာ ခါး၌ 
>>>>> သားရေခါးစည်းကြိုးကိုစည်းထား၏။ သူ၏အစာသည်ကား ကျိုင်းကောင်နှင့် 
>>>>> တောပျားရည်တို့သာဖြစ်၏။-
>>>>>
>>>>> *၅။* ထိုအခါ ဂျေရုဆလင်မြို့မှစ၍ ဂျူဒေးယနယ်တစ်နယ် လုံးနှင့် 
>>>>> ဂျော်ဒန်မြစ်တစ်လျှောက်ရှိဒေသမှ လူအပေါင်း တို့သည် သူ့ထံသို့ လာကြ၏။-
>>>>>
>>>>> ၆။ ထိုသူတို့သည် မိမိတို့၏ အပြစ်များကို ထုတ်ဖော်ဝန်ခံကြလျက် 
>>>>> ဂျော်ဒန်မြစ်တွင် သူ့အားဖြင့် ဆေးကြောခြင်းကို ခံယူကြ၏။
>>>>>
>>>>>
>>>>>
>>>>
>>>>
>>>>
>>>> _______________________________________________
>>>> sword-devel mailing list: sword-devel@crosswire.org
>>>> http://www.crosswire.org/mailman/listinfo/sword-devel
>>>> Instructions to unsubscribe/change your settings at above page
>>>
>>> _______________________________________________
>>> sword-devel mailing list: sword-devel@crosswire.org 
>>> <mailto:sword-devel@crosswire.org>
>>> http://www.crosswire.org/mailman/listinfo/sword-devel
>>> Instructions to unsubscribe/change your settings at above page
>>
>> _______________________________________________
>> sword-devel mailing list: sword-devel@crosswire.org
>> http://www.crosswire.org/mailman/listinfo/sword-devel
>> Instructions to unsubscribe/change your settings at above page
>
> _______________________________________________
> sword-devel mailing list: sword-devel@crosswire.org
> http://www.crosswire.org/mailman/listinfo/sword-devel
> Instructions to unsubscribe/change your settings at above page


-- 
signature

Aloha,
*/Michael Johnson/**
PO BOX 881143 • PUKALANI HI 96788-1143*• USA
mljohnson.org <http://mljohnson.org> • Phone: +1 808-333-6921 • Skype: 
kahunapule

_______________________________________________
sword-devel mailing list: sword-devel@crosswire.org
http://www.crosswire.org/mailman/listinfo/sword-devel
Instructions to unsubscribe/change your settings at above page

Reply via email to