hyphen/hyphen-2.7.1-2.8.2.patch | 325 -------------------------------- hyphen/hyphen-2.7.1-2.8.3.patch | 399 ++++++++++++++++++++++++++++++++++++++++ hyphen/makefile.mk | 2 3 files changed, 400 insertions(+), 326 deletions(-)
New commits: commit e894342d6bdbe7cf6b3f93c6e66d6988e3fda907 Author: Caolán McNamara <caol...@redhat.com> Date: Mon Oct 24 15:38:01 2011 +0100 fix up hyphen 2.8.2/2.8.3 conflicts diff --git a/hyphen/hyphen-2.7.1-2.8.2.patch b/hyphen/hyphen-2.7.1-2.8.2.patch deleted file mode 100644 index 912fba7..0000000 --- a/hyphen/hyphen-2.7.1-2.8.2.patch +++ /dev/null @@ -1,325 +0,0 @@ ---- misc/build/hyphen-2.7.1/hyphen.c.old 2011-10-07 15:51:25.883686906 +0200 -+++ misc/build/hyphen-2.7.1/hyphen.c 2011-10-07 15:51:59.363686900 +0200 -@@ -242,99 +242,45 @@ - } - #endif - --HyphenDict * --hnj_hyphen_load (const char *fn) --{ -- HyphenDict *dict[2]; -- HashTab *hashtab; -- FILE *f; -- char buf[MAX_CHARS]; -+void hnj_hyphen_load_line(char * buf, HyphenDict * dict, HashTab * hashtab) { -+ int i, j; - char word[MAX_CHARS]; - char pattern[MAX_CHARS]; - char * repl; - signed char replindex; - signed char replcut; -- int state_num = 0, last_state; -- int i, j, k; -+ int state_num = 0; -+ int last_state; - char ch; - int found; -- HashEntry *e; -- int nextlevel = 0; -- -- f = fopen (fn, "r"); -- if (f == NULL) -- return NULL; - --// loading one or two dictionaries (separated by NEXTLEVEL keyword) --for (k = 0; k == 0 || (k == 1 && nextlevel); k++) { -- hashtab = hnj_hash_new (); --#ifdef VERBOSE -- global = hashtab; --#endif -- hnj_hash_insert (hashtab, "", 0); -- dict[k] = hnj_malloc (sizeof(HyphenDict)); -- dict[k]->num_states = 1; -- dict[k]->states = hnj_malloc (sizeof(HyphenState)); -- dict[k]->states[0].match = NULL; -- dict[k]->states[0].repl = NULL; -- dict[k]->states[0].fallback_state = -1; -- dict[k]->states[0].num_trans = 0; -- dict[k]->states[0].trans = NULL; -- dict[k]->nextlevel = NULL; -- dict[k]->lhmin = 0; -- dict[k]->rhmin = 0; -- dict[k]->clhmin = 0; -- dict[k]->crhmin = 0; -- dict[k]->nohyphen = NULL; -- dict[k]->nohyphenl = 0; -- -- /* read in character set info */ -- if (k == 0) { -- for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0; -- fgets(dict[k]->cset, sizeof(dict[k]->cset),f); -- for (i=0;i<MAX_NAME;i++) -- if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n')) -- dict[k]->cset[i] = 0; -- dict[k]->utf8 = (strcmp(dict[k]->cset, "UTF-8") == 0); -- } else { -- strcpy(dict[k]->cset, dict[0]->cset); -- dict[k]->utf8 = dict[0]->utf8; -- } -- -- while (fgets (buf, sizeof(buf), f) != NULL) -- { -- if (buf[0] != '%') -- { -- if (strncmp(buf, "NEXTLEVEL", 9) == 0) { -- nextlevel = 1; -- break; -- } else if (strncmp(buf, "LEFTHYPHENMIN", 13) == 0) { -- dict[k]->lhmin = atoi(buf + 13); -- continue; -+ if (strncmp(buf, "LEFTHYPHENMIN", 13) == 0) { -+ dict->lhmin = atoi(buf + 13); -+ return; - } else if (strncmp(buf, "RIGHTHYPHENMIN", 14) == 0) { -- dict[k]->rhmin = atoi(buf + 14); -- continue; -+ dict->rhmin = atoi(buf + 14); -+ return; - } else if (strncmp(buf, "COMPOUNDLEFTHYPHENMIN", 21) == 0) { -- dict[k]->clhmin = atoi(buf + 21); -- continue; -+ dict->clhmin = atoi(buf + 21); -+ return; - } else if (strncmp(buf, "COMPOUNDRIGHTHYPHENMIN", 22) == 0) { -- dict[k]->crhmin = atoi(buf + 22); -- continue; -+ dict->crhmin = atoi(buf + 22); -+ return; - } else if (strncmp(buf, "NOHYPHEN", 8) == 0) { - char * space = buf + 8; - while (*space != '\0' && (*space == ' ' || *space == '\t')) space++; -- if (*buf != '\0') dict[k]->nohyphen = hnj_strdup(space); -- if (dict[k]->nohyphen) { -- char * nhe = dict[k]->nohyphen + strlen(dict[k]->nohyphen) - 1; -+ if (*buf != '\0') dict->nohyphen = hnj_strdup(space); -+ if (dict->nohyphen) { -+ char * nhe = dict->nohyphen + strlen(dict->nohyphen) - 1; - *nhe = 0; -- for (nhe = nhe - 1; nhe > dict[k]->nohyphen; nhe--) { -+ for (nhe = nhe - 1; nhe > dict->nohyphen; nhe--) { - if (*nhe == ',') { -- dict[k]->nohyphenl++; -+ dict->nohyphenl++; - *nhe = 0; - } - } - } -- continue; -+ return; - } - j = 0; - pattern[j] = '0'; -@@ -379,7 +325,7 @@ - } else { - if (*word == '.') i++; - /* convert UTF-8 char. positions of discretionary hyph. replacements to 8-bit */ -- if (dict[k]->utf8) { -+ if (dict->utf8) { - int pu = -1; /* unicode character position */ - int ps = -1; /* unicode start position (original replindex) */ - int pc = (*word == '.') ? 1: 0; /* 8-bit character position */ -@@ -403,14 +349,14 @@ - printf ("word %s pattern %s, j = %d repl: %s\n", word, pattern + i, j, repl); - #endif - found = hnj_hash_lookup (hashtab, word); -- state_num = hnj_get_state (dict[k], hashtab, word); -- dict[k]->states[state_num].match = hnj_strdup (pattern + i); -- dict[k]->states[state_num].repl = repl; -- dict[k]->states[state_num].replindex = replindex; -+ state_num = hnj_get_state (dict, hashtab, word); -+ dict->states[state_num].match = hnj_strdup (pattern + i); -+ dict->states[state_num].repl = repl; -+ dict->states[state_num].replindex = replindex; - if (!replcut) { -- dict[k]->states[state_num].replcut = (signed char) strlen(word); -+ dict->states[state_num].replcut = (signed char) strlen(word); - } else { -- dict[k]->states[state_num].replcut = replcut; -+ dict->states[state_num].replcut = replcut; - } - - /* now, put in the prefix transitions */ -@@ -420,11 +366,81 @@ - ch = word[j - 1]; - word[j - 1] = '\0'; - found = hnj_hash_lookup (hashtab, word); -- state_num = hnj_get_state (dict[k], hashtab, word); -- hnj_add_trans (dict[k], state_num, last_state, ch); -+ state_num = hnj_get_state (dict, hashtab, word); -+ hnj_add_trans (dict, state_num, last_state, ch); - } -- } -+} -+ -+HyphenDict * -+hnj_hyphen_load (const char *fn) -+{ -+ HyphenDict *dict[2]; -+ HashTab *hashtab; -+ FILE *f; -+ char buf[MAX_CHARS]; -+ int nextlevel = 0; -+ int i, j, k; -+ HashEntry *e; -+ int state_num = 0; -+ -+ f = fopen (fn, "r"); -+ if (f == NULL) -+ return NULL; -+ -+// loading one or two dictionaries (separated by NEXTLEVEL keyword) -+for (k = 0; k < 2; k++) { -+ hashtab = hnj_hash_new (); -+#ifdef VERBOSE -+ global = hashtab; -+#endif -+ hnj_hash_insert (hashtab, "", 0); -+ dict[k] = hnj_malloc (sizeof(HyphenDict)); -+ dict[k]->num_states = 1; -+ dict[k]->states = hnj_malloc (sizeof(HyphenState)); -+ dict[k]->states[0].match = NULL; -+ dict[k]->states[0].repl = NULL; -+ dict[k]->states[0].fallback_state = -1; -+ dict[k]->states[0].num_trans = 0; -+ dict[k]->states[0].trans = NULL; -+ dict[k]->nextlevel = NULL; -+ dict[k]->lhmin = 0; -+ dict[k]->rhmin = 0; -+ dict[k]->clhmin = 0; -+ dict[k]->crhmin = 0; -+ dict[k]->nohyphen = NULL; -+ dict[k]->nohyphenl = 0; -+ -+ /* read in character set info */ -+ if (k == 0) { -+ for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0; -+ fgets(dict[k]->cset, sizeof(dict[k]->cset),f); -+ for (i=0;i<MAX_NAME;i++) -+ if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n')) -+ dict[k]->cset[i] = 0; -+ dict[k]->utf8 = (strcmp(dict[k]->cset, "UTF-8") == 0); -+ } else { -+ strcpy(dict[k]->cset, dict[0]->cset); -+ dict[k]->utf8 = dict[0]->utf8; -+ } -+ -+ if (k == 0 || nextlevel) { -+ while (fgets (buf, sizeof(buf), f) != NULL) { -+ if (strncmp(buf, "NEXTLEVEL", 9) == 0) { -+ nextlevel = 1; -+ break; -+ } else if (buf[0] != '%') hnj_hyphen_load_line(buf, dict[k], hashtab); - } -+ } else if (k == 1) { -+ /* default first level: hyphen and ASCII apostrophe */ -+ if (!dict[0]->utf8) hnj_hyphen_load_line("NOHYPHEN -,'\n", dict[k], hashtab); -+ else hnj_hyphen_load_line("NOHYPHEN -,',\xe2\x80\x93,\xe2\x80\x99\n", dict[k], hashtab); -+ hnj_hyphen_load_line("1-1\n", dict[k], hashtab); /* hyphen */ -+ hnj_hyphen_load_line("1'1\n", dict[k], hashtab); /* ASCII apostrophe */ -+ if (dict[0]->utf8) { -+ hnj_hyphen_load_line("1\xe2\x80\x93" "1\n", dict[k], hashtab); /* endash */ -+ hnj_hyphen_load_line("1\xe2\x80\x99" "1\n", dict[k], hashtab); /* apostrophe */ -+ } -+ } - - /* Could do unioning of matches here (instead of the preprocessor script). - If we did, the pseudocode would look something like this: -@@ -476,7 +492,15 @@ - state_num = 0; - } - fclose(f); -- if (k == 2) dict[0]->nextlevel = dict[1]; -+ if (nextlevel) dict[0]->nextlevel = dict[1]; -+ else { -+ dict[1] -> nextlevel = dict[0]; -+ dict[1]->lhmin = dict[0]->lhmin; -+ dict[1]->rhmin = dict[0]->rhmin; -+ dict[1]->clhmin = (dict[0]->clhmin) ? dict[0]->clhmin : ((dict[0]->lhmin) ? dict[0]->lhmin : 2); -+ dict[1]->crhmin = (dict[0]->crhmin) ? dict[0]->crhmin : ((dict[0]->rhmin) ? dict[0]->rhmin : 2); -+ return dict[1]; -+ } - return dict[0]; - } - -@@ -527,8 +551,13 @@ - j = 0; - prep_word[j++] = '.'; - -- for (i = 0; i < word_size; i++) -+ for (i = 0; i < word_size; i++) { -+ if (word[i] <= '9' && word[i] >= '0') { -+ prep_word[j++] = '.'; -+ } else { - prep_word[j++] = word[i]; -+ } -+ } - - prep_word[j++] = '.'; - prep_word[j] = '\0'; -@@ -670,6 +699,9 @@ - i += hnj_ligature(word[2]); - } - -+ // ignore numbers -+ for (j = 0; word[j] <= '9' && word[j] >= '0'; j++) i--; -+ - for (j = 0; i < lhmin && word[j] != '\0'; i++) do { - // check length of the non-standard part - if (*rep && *pos && *cut && (*rep)[j]) { -@@ -696,9 +728,13 @@ - int hnj_hyphen_rhmin(int utf8, const char *word, int word_size, char * hyphens, - char *** rep, int ** pos, int ** cut, int rhmin) - { -- int i; -- int j = word_size - 2; -- for (i = 1; i < rhmin && j > 0; j--) { -+ int i = 1; -+ int j; -+ -+ // ignore numbers -+ for (j = word_size - 1; j > 0 && word[j] <= '9' && word[j] >= '0'; j--) i--; -+ -+ for (j = word_size - 2; i < rhmin && j > 0; j--) { - // check length of the non-standard part - if (*rep && *pos && *cut && (*rep)[j]) { - char * rh = strchr((*rep)[j], '='); -@@ -756,8 +792,15 @@ - j = 0; - prep_word[j++] = '.'; - -- for (i = 0; i < word_size; i++) -+ for (i = 0; i < word_size; i++) { -+ if (word[i] <= '9' && word[i] >= '0') { -+ prep_word[j++] = '.'; -+ } else { - prep_word[j++] = word[i]; -+ } -+ } -+ -+ - - prep_word[j++] = '.'; - prep_word[j] = '\0'; -@@ -1093,8 +1136,10 @@ - char *hyphword, char *** rep, int ** pos, int ** cut, - int lhmin, int rhmin, int clhmin, int crhmin) - { -- lhmin = (lhmin > 0 ? lhmin : dict->lhmin); -- rhmin = (rhmin > 0 ? rhmin : dict->rhmin); -+ lhmin = (lhmin > dict->lhmin) ? lhmin : dict->lhmin; -+ rhmin = (rhmin > dict->rhmin) ? rhmin : dict->rhmin; -+ clhmin = (clhmin > dict->clhmin) ? clhmin : dict->clhmin; -+ crhmin = (crhmin > dict->crhmin) ? crhmin : dict->crhmin; - hnj_hyphen_hyph_(dict, word, word_size, hyphens, rep, pos, cut, - clhmin, crhmin, 1, 1); - hnj_hyphen_lhmin(dict->utf8, word, word_size, hyphens, diff --git a/hyphen/makefile.mk b/hyphen/makefile.mk index 06cfc58..1cb1a10 100644 --- a/hyphen/makefile.mk +++ b/hyphen/makefile.mk @@ -44,7 +44,7 @@ ADDITIONAL_FILES += makefile.mk PATCH_FILES= \ hyphen-2.7.1.patch \ hyphen-2.7.1-read-charset.patch \ - hyphen-2.7.1-2.8.2.patch + hyphen-2.7.1-2.8.3.patch .IF "$(GUI)"=="UNX" CONFIGURE_DIR=$(BUILD_DIR) commit 161a463b814d18ec74c3829f9ae35f635725356c Author: László Németh <nem...@numbertext.org> Date: Mon Oct 10 16:22:13 2011 +0200 Add fixes of Hyphen 2.8.3 diff --git a/hyphen/hyphen-2.7.1-2.8.3.patch b/hyphen/hyphen-2.7.1-2.8.3.patch new file mode 100644 index 0000000..047ce13 --- /dev/null +++ b/hyphen/hyphen-2.7.1-2.8.3.patch @@ -0,0 +1,399 @@ +--- misc/build/hyphen-2.7.1/hyphen.c.old 2011-10-10 15:58:33.317260138 +0200 ++++ misc/build/hyphen-2.7.1/hyphen.c 2011-10-10 15:58:55.221260136 +0200 +@@ -226,115 +226,61 @@ + } + + #ifdef VERBOSE +-HashTab *global; ++HashTab *global[1]; + + static char * +-get_state_str (int state) ++get_state_str (int state, int level) + { + int i; + HashEntry *e; + + for (i = 0; i < HASH_SIZE; i++) +- for (e = global->entries[i]; e; e = e->next) ++ for (e = global[level]->entries[i]; e; e = e->next) + if (e->val == state) + return e->key; + return NULL; + } + #endif + +-HyphenDict * +-hnj_hyphen_load (const char *fn) +-{ +- HyphenDict *dict[2]; +- HashTab *hashtab; +- FILE *f; +- char buf[MAX_CHARS]; ++void hnj_hyphen_load_line(char * buf, HyphenDict * dict, HashTab * hashtab) { ++ int i, j; + char word[MAX_CHARS]; + char pattern[MAX_CHARS]; + char * repl; + signed char replindex; + signed char replcut; +- int state_num = 0, last_state; +- int i, j, k; ++ int state_num = 0; ++ int last_state; + char ch; + int found; +- HashEntry *e; +- int nextlevel = 0; +- +- f = fopen (fn, "r"); +- if (f == NULL) +- return NULL; +- +-// loading one or two dictionaries (separated by NEXTLEVEL keyword) +-for (k = 0; k == 0 || (k == 1 && nextlevel); k++) { +- hashtab = hnj_hash_new (); +-#ifdef VERBOSE +- global = hashtab; +-#endif +- hnj_hash_insert (hashtab, "", 0); +- dict[k] = hnj_malloc (sizeof(HyphenDict)); +- dict[k]->num_states = 1; +- dict[k]->states = hnj_malloc (sizeof(HyphenState)); +- dict[k]->states[0].match = NULL; +- dict[k]->states[0].repl = NULL; +- dict[k]->states[0].fallback_state = -1; +- dict[k]->states[0].num_trans = 0; +- dict[k]->states[0].trans = NULL; +- dict[k]->nextlevel = NULL; +- dict[k]->lhmin = 0; +- dict[k]->rhmin = 0; +- dict[k]->clhmin = 0; +- dict[k]->crhmin = 0; +- dict[k]->nohyphen = NULL; +- dict[k]->nohyphenl = 0; +- +- /* read in character set info */ +- if (k == 0) { +- for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0; +- fgets(dict[k]->cset, sizeof(dict[k]->cset),f); +- for (i=0;i<MAX_NAME;i++) +- if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n')) +- dict[k]->cset[i] = 0; +- dict[k]->utf8 = (strcmp(dict[k]->cset, "UTF-8") == 0); +- } else { +- strcpy(dict[k]->cset, dict[0]->cset); +- dict[k]->utf8 = dict[0]->utf8; +- } + +- while (fgets (buf, sizeof(buf), f) != NULL) +- { +- if (buf[0] != '%') +- { +- if (strncmp(buf, "NEXTLEVEL", 9) == 0) { +- nextlevel = 1; +- break; +- } else if (strncmp(buf, "LEFTHYPHENMIN", 13) == 0) { +- dict[k]->lhmin = atoi(buf + 13); +- continue; ++ if (strncmp(buf, "LEFTHYPHENMIN", 13) == 0) { ++ dict->lhmin = atoi(buf + 13); ++ return; + } else if (strncmp(buf, "RIGHTHYPHENMIN", 14) == 0) { +- dict[k]->rhmin = atoi(buf + 14); +- continue; ++ dict->rhmin = atoi(buf + 14); ++ return; + } else if (strncmp(buf, "COMPOUNDLEFTHYPHENMIN", 21) == 0) { +- dict[k]->clhmin = atoi(buf + 21); +- continue; ++ dict->clhmin = atoi(buf + 21); ++ return; + } else if (strncmp(buf, "COMPOUNDRIGHTHYPHENMIN", 22) == 0) { +- dict[k]->crhmin = atoi(buf + 22); +- continue; ++ dict->crhmin = atoi(buf + 22); ++ return; + } else if (strncmp(buf, "NOHYPHEN", 8) == 0) { + char * space = buf + 8; + while (*space != '\0' && (*space == ' ' || *space == '\t')) space++; +- if (*buf != '\0') dict[k]->nohyphen = hnj_strdup(space); +- if (dict[k]->nohyphen) { +- char * nhe = dict[k]->nohyphen + strlen(dict[k]->nohyphen) - 1; ++ if (*buf != '\0') dict->nohyphen = hnj_strdup(space); ++ if (dict->nohyphen) { ++ char * nhe = dict->nohyphen + strlen(dict->nohyphen) - 1; + *nhe = 0; +- for (nhe = nhe - 1; nhe > dict[k]->nohyphen; nhe--) { ++ for (nhe = nhe - 1; nhe > dict->nohyphen; nhe--) { + if (*nhe == ',') { +- dict[k]->nohyphenl++; ++ dict->nohyphenl++; + *nhe = 0; + } + } + } +- continue; ++ return; + } + j = 0; + pattern[j] = '0'; +@@ -379,7 +325,7 @@ + } else { + if (*word == '.') i++; + /* convert UTF-8 char. positions of discretionary hyph. replacements to 8-bit */ +- if (dict[k]->utf8) { ++ if (dict->utf8) { + int pu = -1; /* unicode character position */ + int ps = -1; /* unicode start position (original replindex) */ + int pc = (*word == '.') ? 1: 0; /* 8-bit character position */ +@@ -403,14 +349,14 @@ + printf ("word %s pattern %s, j = %d repl: %s\n", word, pattern + i, j, repl); + #endif + found = hnj_hash_lookup (hashtab, word); +- state_num = hnj_get_state (dict[k], hashtab, word); +- dict[k]->states[state_num].match = hnj_strdup (pattern + i); +- dict[k]->states[state_num].repl = repl; +- dict[k]->states[state_num].replindex = replindex; ++ state_num = hnj_get_state (dict, hashtab, word); ++ dict->states[state_num].match = hnj_strdup (pattern + i); ++ dict->states[state_num].repl = repl; ++ dict->states[state_num].replindex = replindex; + if (!replcut) { +- dict[k]->states[state_num].replcut = (signed char) strlen(word); ++ dict->states[state_num].replcut = (signed char) strlen(word); + } else { +- dict[k]->states[state_num].replcut = replcut; ++ dict->states[state_num].replcut = replcut; + } + + /* now, put in the prefix transitions */ +@@ -420,11 +366,82 @@ + ch = word[j - 1]; + word[j - 1] = '\0'; + found = hnj_hash_lookup (hashtab, word); +- state_num = hnj_get_state (dict[k], hashtab, word); +- hnj_add_trans (dict[k], state_num, last_state, ch); ++ state_num = hnj_get_state (dict, hashtab, word); ++ hnj_add_trans (dict, state_num, last_state, ch); + } +- } ++} ++ ++HyphenDict * ++hnj_hyphen_load (const char *fn) ++{ ++ HyphenDict *dict[2]; ++ HashTab *hashtab; ++ FILE *f; ++ char buf[MAX_CHARS]; ++ int nextlevel = 0; ++ int i, j, k; ++ HashEntry *e; ++ int state_num = 0; ++ ++ f = fopen (fn, "r"); ++ if (f == NULL) ++ return NULL; ++ ++// loading one or two dictionaries (separated by NEXTLEVEL keyword) ++for (k = 0; k < 2; k++) { ++ hashtab = hnj_hash_new (); ++#ifdef VERBOSE ++ global[k] = hashtab; ++#endif ++ hnj_hash_insert (hashtab, "", 0); ++ dict[k] = hnj_malloc (sizeof(HyphenDict)); ++ dict[k]->num_states = 1; ++ dict[k]->states = hnj_malloc (sizeof(HyphenState)); ++ dict[k]->states[0].match = NULL; ++ dict[k]->states[0].repl = NULL; ++ dict[k]->states[0].fallback_state = -1; ++ dict[k]->states[0].num_trans = 0; ++ dict[k]->states[0].trans = NULL; ++ dict[k]->nextlevel = NULL; ++ dict[k]->lhmin = 0; ++ dict[k]->rhmin = 0; ++ dict[k]->clhmin = 0; ++ dict[k]->crhmin = 0; ++ dict[k]->nohyphen = NULL; ++ dict[k]->nohyphenl = 0; ++ ++ /* read in character set info */ ++ if (k == 0) { ++ for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0; ++ fgets(dict[k]->cset, sizeof(dict[k]->cset),f); ++ for (i=0;i<MAX_NAME;i++) ++ if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n')) ++ dict[k]->cset[i] = 0; ++ dict[k]->utf8 = (strcmp(dict[k]->cset, "UTF-8") == 0); ++ } else { ++ strcpy(dict[k]->cset, dict[0]->cset); ++ dict[k]->utf8 = dict[0]->utf8; ++ } ++ ++ if (k == 0 || nextlevel) { ++ while (fgets (buf, sizeof(buf), f) != NULL) { ++ if (strncmp(buf, "NEXTLEVEL", 9) == 0) { ++ nextlevel = 1; ++ break; ++ } else if (buf[0] != '%') hnj_hyphen_load_line(buf, dict[k], hashtab); ++ } ++ } else if (k == 1) { ++ /* default first level: hyphen and ASCII apostrophe */ ++ if (!dict[0]->utf8) hnj_hyphen_load_line("NOHYPHEN '\n", dict[k], hashtab); ++ else hnj_hyphen_load_line("NOHYPHEN ',\xe2\x80\x93,\xe2\x80\x99\n", dict[k], hashtab); ++ strcpy(buf, "1-1/=,1,1\n"); // buf rewritten by hnj_hyphen_load here ++ hnj_hyphen_load_line(buf, dict[k], hashtab); /* remove hyphen */ ++ hnj_hyphen_load_line("1'1\n", dict[k], hashtab); /* ASCII apostrophe */ ++ if (dict[0]->utf8) { ++ hnj_hyphen_load_line("1\xe2\x80\x93" "1\n", dict[k], hashtab); /* endash */ ++ hnj_hyphen_load_line("1\xe2\x80\x99" "1\n", dict[k], hashtab); /* apostrophe */ + } ++ } + + /* Could do unioning of matches here (instead of the preprocessor script). + If we did, the pseudocode would look something like this: +@@ -476,7 +493,20 @@ + state_num = 0; + } + fclose(f); +- if (k == 2) dict[0]->nextlevel = dict[1]; ++ if (nextlevel) dict[0]->nextlevel = dict[1]; ++ else { ++ dict[1] -> nextlevel = dict[0]; ++ dict[1]->lhmin = dict[0]->lhmin; ++ dict[1]->rhmin = dict[0]->rhmin; ++ dict[1]->clhmin = (dict[0]->clhmin) ? dict[0]->clhmin : ((dict[0]->lhmin) ? dict[0]->lhmin : 3); ++ dict[1]->crhmin = (dict[0]->crhmin) ? dict[0]->crhmin : ((dict[0]->rhmin) ? dict[0]->rhmin : 3); ++#ifdef VERBOSE ++ HashTab *r = global[0]; ++ global[0] = global[1]; ++ global[1] = r; ++#endif ++ return dict[1]; ++ } + return dict[0]; + } + +@@ -527,8 +557,13 @@ + j = 0; + prep_word[j++] = '.'; + +- for (i = 0; i < word_size; i++) ++ for (i = 0; i < word_size; i++) { ++ if (word[i] <= '9' && word[i] >= '0') { ++ prep_word[j++] = '.'; ++ } else { + prep_word[j++] = word[i]; ++ } ++ } + + prep_word[j++] = '.'; + prep_word[j] = '\0'; +@@ -557,7 +592,7 @@ + + #ifdef VERBOSE + char *state_str; +- state_str = get_state_str (state); ++ state_str = get_state_str (state, 0); + + for (k = 0; k < i - strlen (state_str); k++) + putchar (' '); +@@ -670,6 +705,9 @@ + i += hnj_ligature(word[2]); + } + ++ // ignore numbers ++ for (j = 0; word[j] <= '9' && word[j] >= '0'; j++) i--; ++ + for (j = 0; i < lhmin && word[j] != '\0'; i++) do { + // check length of the non-standard part + if (*rep && *pos && *cut && (*rep)[j]) { +@@ -696,9 +734,13 @@ + int hnj_hyphen_rhmin(int utf8, const char *word, int word_size, char * hyphens, + char *** rep, int ** pos, int ** cut, int rhmin) + { +- int i; +- int j = word_size - 2; +- for (i = 1; i < rhmin && j > 0; j--) { ++ int i = 1; ++ int j; ++ ++ // ignore numbers ++ for (j = word_size - 1; j > 0 && word[j] <= '9' && word[j] >= '0'; j--) i--; ++ ++ for (j = word_size - 2; i < rhmin && j > 0; j--) { + // check length of the non-standard part + if (*rep && *pos && *cut && (*rep)[j]) { + char * rh = strchr((*rep)[j], '='); +@@ -756,8 +798,15 @@ + j = 0; + prep_word[j++] = '.'; + +- for (i = 0; i < word_size; i++) ++ for (i = 0; i < word_size; i++) { ++ if (word[i] <= '9' && word[i] >= '0') { ++ prep_word[j++] = '.'; ++ } else { + prep_word[j++] = word[i]; ++ } ++ } ++ ++ + + prep_word[j++] = '.'; + prep_word[j] = '\0'; +@@ -786,7 +835,7 @@ + + #ifdef VERBOSE + char *state_str; +- state_str = get_state_str (state); ++ state_str = get_state_str (state, 1); + + for (k = 0; k < i - strlen (state_str); k++) + putchar (' '); +@@ -1033,6 +1082,9 @@ + } + } + hyphens[j + 1] = '\0'; ++#ifdef VERBOSE ++ printf ("nums: %s\n", hyphens); ++#endif + return 0; + } + +@@ -1074,8 +1126,8 @@ + for (nhi = 0; nhi <= dict->nohyphenl; nhi++) { + char * nhy = (char *) strstr(word, nh); + while (nhy) { +- hyphens[nhy - word + strlen(nh) - 1] = 0; +- if (nhy - word - 1 >= 0) hyphens[nhy - word - 1] = 0; ++ hyphens[nhy - word + strlen(nh) - 1] = '0'; ++ if (nhy - word - 1 >= 0) hyphens[nhy - word - 1] = '0'; + nhy = (char *) strstr(nhy + 1, nh); + } + nh = nh + strlen(nh) + 1; +@@ -1084,6 +1136,9 @@ + + if (hyphword) hnj_hyphen_hyphword(word, word_size, hyphens, hyphword, rep, pos, cut); + if (dict->utf8) return hnj_hyphen_norm(word, word_size, hyphens, rep, pos, cut); ++#ifdef VERBOSE ++ printf ("nums: %s\n", hyphens); ++#endif + return 0; + } + +@@ -1093,8 +1148,10 @@ + char *hyphword, char *** rep, int ** pos, int ** cut, + int lhmin, int rhmin, int clhmin, int crhmin) + { +- lhmin = (lhmin > 0 ? lhmin : dict->lhmin); +- rhmin = (rhmin > 0 ? rhmin : dict->rhmin); ++ lhmin = (lhmin > dict->lhmin) ? lhmin : dict->lhmin; ++ rhmin = (rhmin > dict->rhmin) ? rhmin : dict->rhmin; ++ clhmin = (clhmin > dict->clhmin) ? clhmin : dict->clhmin; ++ crhmin = (crhmin > dict->crhmin) ? crhmin : dict->crhmin; + hnj_hyphen_hyph_(dict, word, word_size, hyphens, rep, pos, cut, + clhmin, crhmin, 1, 1); + hnj_hyphen_lhmin(dict->utf8, word, word_size, hyphens,
_______________________________________________ Libreoffice-commits mailing list Libreoffice-commits@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/libreoffice-commits