From 4460fa7eadc7b0ee9ed35fe84fd18547147e4a49 Mon Sep 17 00:00:00 2001
From: Konstantin Ritt <ritt.ks@gmail.com>
Date: Mon, 24 Dec 2012 16:20:17 +0200
Subject: [PATCH] Implement hb_language_get_default_for_script()

with a fallbacks to hb_language_get_default() if language can not
be determined for a given script.

Use it in hb_buffer_guess_segment_properties() instead of
hb_language_get_default() when the language is not set.
---
 src/hb-buffer.cc |   5 +-
 src/hb-common.cc | 149 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/hb-common.h  |  37 +++++++-------
 3 files changed, 171 insertions(+), 20 deletions(-)

diff --git a/src/hb-buffer.cc b/src/hb-buffer.cc
index 4b644e4..bcf427a 100644
--- a/src/hb-buffer.cc
+++ b/src/hb-buffer.cc
@@ -514,10 +514,9 @@ hb_buffer_t::guess_segment_properties (void)
     props.direction = hb_script_get_horizontal_direction (props.script);
   }
 
-  /* If language is not set, use default language from locale */
+  /* If language is not set, use guess language from script */
   if (props.language == HB_LANGUAGE_INVALID) {
-    /* TODO get_default_for_script? using $LANGUAGE */
-    props.language = hb_language_get_default ();
+    props.language = hb_language_get_default_for_script (props.script);
   }
 }
 
diff --git a/src/hb-common.cc b/src/hb-common.cc
index 9422555..1046db5 100644
--- a/src/hb-common.cc
+++ b/src/hb-common.cc
@@ -258,6 +258,155 @@ hb_language_get_default (void)
   return default_language;
 }
 
+typedef struct {
+  char language[6];
+  hb_script_t script;
+} ScriptLang;
+
+/*
+ * Extracted from CLDR 22.1, and adjusted manually.
+ */
+
+static const ScriptLang default_languages[] = {
+/*{ "",	HB_SCRIPT_COMMON }, */
+  { "ar",	HB_SCRIPT_ARABIC },
+  { "hy",	HB_SCRIPT_ARMENIAN },
+  { "bn",	HB_SCRIPT_BENGALI },
+  { "zh-tw",	HB_SCRIPT_BOPOMOFO },
+  { "iu",	HB_SCRIPT_CANADIAN_ABORIGINAL },
+  { "chr",	HB_SCRIPT_CHEROKEE },
+  { "cop",	HB_SCRIPT_COPTIC },
+  { "ru",	HB_SCRIPT_CYRILLIC },
+  { "hi",	HB_SCRIPT_DEVANAGARI },
+  { "ka",	HB_SCRIPT_GEORGIAN },
+  { "el",	HB_SCRIPT_GREEK },
+  { "gu",	HB_SCRIPT_GUJARATI },
+  { "pa",	HB_SCRIPT_GURMUKHI },
+  { "ko",	HB_SCRIPT_HANGUL },
+  { "zh-cn",	HB_SCRIPT_HAN },
+  { "he",	HB_SCRIPT_HEBREW },
+  { "ja",	HB_SCRIPT_HIRAGANA },
+/*{ "",	HB_SCRIPT_INHERITED }, */
+  { "kn",	HB_SCRIPT_KANNADA },
+  { "ja",	HB_SCRIPT_KATAKANA },
+  { "lo",	HB_SCRIPT_LAO },
+  { "en",	HB_SCRIPT_LATIN },
+  { "ml",	HB_SCRIPT_MALAYALAM },
+  { "mn",	HB_SCRIPT_MONGOLIAN },
+  { "sga",	HB_SCRIPT_OGHAM },
+  { "or",	HB_SCRIPT_ORIYA },
+  { "non",	HB_SCRIPT_RUNIC },
+  { "syr",	HB_SCRIPT_SYRIAC },
+  { "ta",	HB_SCRIPT_TAMIL },
+  { "te",	HB_SCRIPT_TELUGU },
+  { "th",	HB_SCRIPT_THAI },
+  { "ii",	HB_SCRIPT_YI },
+
+  /* Unicode-2.0 additions */
+  { "bo",	HB_SCRIPT_TIBETAN },
+
+  /* Unicode-3.0 additions */
+  { "am",	HB_SCRIPT_ETHIOPIC },
+  { "km",	HB_SCRIPT_KHMER },
+  { "my",	HB_SCRIPT_MYANMAR },
+  { "si",	HB_SCRIPT_SINHALA },
+  { "dv",	HB_SCRIPT_THAANA },
+
+  /* Unicode-3.1 additions */
+  { "en",	HB_SCRIPT_DESERET },
+  { "got",	HB_SCRIPT_GOTHIC },
+  { "ett",	HB_SCRIPT_OLD_ITALIC },
+
+  /* Unicode-3.2 additions */
+  { "bku",	HB_SCRIPT_BUHID },
+  { "hnn",	HB_SCRIPT_HANUNOO },
+  { "fil",	HB_SCRIPT_TAGALOG },
+  { "tbw",	HB_SCRIPT_TAGBANWA },
+
+  /* Unicode-4.0 additions */
+/*{ "",	HB_SCRIPT_BRAILLE }, */
+  { "grc",	HB_SCRIPT_CYPRIOT },
+  { "lif",	HB_SCRIPT_LIMBU },
+  { "grc",	HB_SCRIPT_LINEAR_B },
+  { "so",	HB_SCRIPT_OSMANYA },
+  { "en",	HB_SCRIPT_SHAVIAN },
+  { "tdd",	HB_SCRIPT_TAI_LE },
+  { "uga",	HB_SCRIPT_UGARITIC },
+
+  /* Unicode-4.1 additions */
+  { "bug",	HB_SCRIPT_BUGINESE },
+  { "cu",	HB_SCRIPT_GLAGOLITIC },
+  { "pra",	HB_SCRIPT_KHAROSHTHI },
+  { "khb",	HB_SCRIPT_NEW_TAI_LUE },
+  { "peo",	HB_SCRIPT_OLD_PERSIAN },
+  { "syl",	HB_SCRIPT_SYLOTI_NAGRI },
+  { "shi",	HB_SCRIPT_TIFINAGH },
+
+  /* Unicode-5.0 additions */
+  { "ban",	HB_SCRIPT_BALINESE },
+  { "akk",	HB_SCRIPT_CUNEIFORM },
+  { "man",	HB_SCRIPT_NKO },
+  { "lzh",	HB_SCRIPT_PHAGS_PA },
+  { "phn",	HB_SCRIPT_PHOENICIAN },
+/*{ "",	HB_SCRIPT_UNKNOWN }, */
+
+  /* Unicode-5.1 additions */
+  { "xcr",	HB_SCRIPT_CARIAN },
+  { "cjm",	HB_SCRIPT_CHAM },
+  { "eky",	HB_SCRIPT_KAYAH_LI },
+  { "lep",	HB_SCRIPT_LEPCHA },
+  { "xlc",	HB_SCRIPT_LYCIAN },
+  { "xld",	HB_SCRIPT_LYDIAN },
+  { "sat",	HB_SCRIPT_OL_CHIKI },
+  { "rej",	HB_SCRIPT_REJANG },
+  { "saz",	HB_SCRIPT_SAURASHTRA },
+  { "su",	HB_SCRIPT_SUNDANESE },
+  { "vai",	HB_SCRIPT_VAI },
+
+  /* Unicode-5.2 additions */
+  { "ae",	HB_SCRIPT_AVESTAN },
+  { "bax",	HB_SCRIPT_BAMUM },
+  { "egy",	HB_SCRIPT_EGYPTIAN_HIEROGLYPHS },
+  { "arc",	HB_SCRIPT_IMPERIAL_ARAMAIC },
+  { "pal",	HB_SCRIPT_INSCRIPTIONAL_PAHLAVI },
+  { "xpr",	HB_SCRIPT_INSCRIPTIONAL_PARTHIAN },
+  { "jv",	HB_SCRIPT_JAVANESE },
+  { "bh",	HB_SCRIPT_KAITHI },
+  { "lis",	HB_SCRIPT_LISU },
+  { "mni",	HB_SCRIPT_MEETEI_MAYEK },
+  { "xsa",	HB_SCRIPT_OLD_SOUTH_ARABIAN },
+  { "otk",	HB_SCRIPT_OLD_TURKIC },
+  { "smp",	HB_SCRIPT_SAMARITAN },
+  { "nod",	HB_SCRIPT_TAI_THAM },
+  { "blt",	HB_SCRIPT_TAI_VIET },
+
+  /* Unicode-6.0 additions */
+  { "bbc",	HB_SCRIPT_BATAK },
+  { "pra",	HB_SCRIPT_BRAHMI },
+  { "myz",	HB_SCRIPT_MANDAIC },
+
+  /* Unicode-6.1 additions */
+  { "ccp",	HB_SCRIPT_CHAKMA },
+  { "xmr",	HB_SCRIPT_MEROITIC_CURSIVE },
+  { "xmr",	HB_SCRIPT_MEROITIC_HIEROGLYPHS },
+  { "hmd",	HB_SCRIPT_MIAO },
+  { "sa",	HB_SCRIPT_SHARADA },
+  { "srb",	HB_SCRIPT_SORA_SOMPENG },
+  { "doi",	HB_SCRIPT_TAKRI }
+};
+
+hb_language_t
+hb_language_get_default_for_script (hb_script_t script)
+{
+  unsigned int i;
+
+  for (i = 0; i < ARRAY_LENGTH (default_languages); i++)
+    if (default_languages[i].script == script)
+      return hb_language_from_string (default_languages[i].language, -1);
+
+  return hb_language_get_default ();
+}
+
 
 /* hb_script_t */
 
diff --git a/src/hb-common.h b/src/hb-common.h
index cc221d3..eb29e08 100644
--- a/src/hb-common.h
+++ b/src/hb-common.h
@@ -129,23 +129,6 @@ hb_direction_to_string (hb_direction_t direction);
 #define HB_DIRECTION_REVERSE(dir)	((hb_direction_t) (((unsigned int) (dir)) ^ 1)) /* Direction must be valid */
 
 
-/* hb_language_t */
-
-typedef struct hb_language_impl_t *hb_language_t;
-
-/* len=-1 means str is NUL-terminated */
-hb_language_t
-hb_language_from_string (const char *str, int len);
-
-const char *
-hb_language_to_string (hb_language_t language);
-
-#define HB_LANGUAGE_INVALID ((hb_language_t) NULL)
-
-hb_language_t
-hb_language_get_default (void);
-
-
 /* hb_script_t */
 
 /* http://unicode.org/iso15924/ */
@@ -302,6 +285,26 @@ hb_direction_t
 hb_script_get_horizontal_direction (hb_script_t script);
 
 
+/* hb_language_t */
+
+typedef struct hb_language_impl_t *hb_language_t;
+
+/* len=-1 means str is NUL-terminated */
+hb_language_t
+hb_language_from_string (const char *str, int len);
+
+const char *
+hb_language_to_string (hb_language_t language);
+
+#define HB_LANGUAGE_INVALID ((hb_language_t) NULL)
+
+hb_language_t
+hb_language_get_default (void);
+
+hb_language_t
+hb_language_get_default_for_script (hb_script_t script);
+
+
 /* User data */
 
 typedef struct hb_user_data_key_t {
-- 
1.8.0.msysgit.0

