Thank you for this. Unfortunately qsort and bsearch are C99 functions, and I don't think we can safely assume that they are present. So I am going to put this into R-devel but not 2.6.0. When we have a bit more experience we can consider backporting it to 2.6.1 (I don't want to change the autoconf stuff at this stage).
Brian Ripley On Thu, 13 Sep 2007, Ei-ji Nakama wrote: > For example, the following changes are necessary when i convert a > Japanese hiragana into katakana in chattr. > > R code: >> chartr("\u3041-\u3093","\u30a1-\u30f3","\u3084\u3063\u305f\u30fc") > > --- R-alpha.orig/src/main/character.c 2007-09-05 07:13:27.000000000 +0900 > +++ R-alpha/src/main/character.c 2007-09-13 16:10:21.000000000 +0900 > @@ -2041,6 +2041,16 @@ > return(c); > } > > +typedef struct { wchar_t c_old, c_new; } xtable_t; > +static inline int xtable_comp(const xtable_t *a, const xtable_t *b) > +{ > + return a->c_old - b->c_old; > +} > +static inline int xtable_key_comp(const wchar_t *a, const xtable_t *b) > +{ > + return *a - b->c_old; > +} > + > SEXP attribute_hidden do_chartr(SEXP call, SEXP op, SEXP args, SEXP env) > { > SEXP old, _new, x, y; > @@ -2064,14 +2074,18 @@ > #ifdef SUPPORT_MBCS > if(mbcslocale) { > int j, nb, nc; > - wchar_t xtable[65536 + 1], c_old, c_new, *wc; > + xtable_t *xtable; > + int xtable_cnt; > + wchar_t c_old, c_new, *wc; > const char *xi, *s; > struct wtr_spec *trs_old, **trs_old_ptr; > struct wtr_spec *trs_new, **trs_new_ptr; > - > - for(i = 0; i <= UCHAR_MAX; i++) xtable[i] = i; > + struct wtr_spec *trs_cnt, **trs_cnt_ptr; > > /* Initialize the old and new wtr_spec lists. */ > + trs_cnt = Calloc(1, struct wtr_spec); > + trs_cnt->type = WTR_INIT; > + trs_cnt->next = NULL; > trs_old = Calloc(1, struct wtr_spec); > trs_old->type = WTR_INIT; > trs_old->next = NULL; > @@ -2084,6 +2098,7 @@ > if(nc < 0) error(_("invalid multibyte string 'old'")); > wc = (wchar_t *) R_AllocStringBuffer((nc+1)*sizeof(wchar_t), &cbuff); > mbstowcs(wc, s, nc + 1); > + wtr_build_spec(wc, trs_cnt); /* use count only */ > wtr_build_spec(wc, trs_old); > > s = translateChar(STRING_ELT(_new, 0)); > @@ -2096,38 +2111,54 @@ > /* Initialize the pointers for walking through the old and new > wtr_spec lists and retrieving the next chars from the lists. > */ > + trs_cnt_ptr = Calloc(1, struct wtr_spec *); > + *trs_cnt_ptr = trs_cnt->next; > + for( xtable_cnt = 0 ; wtr_get_next_char_from_spec(trs_cnt_ptr) > ;xtable_cnt++ ); > + Free(trs_cnt_ptr); > + xtable = (xtable_t *)R_alloc(xtable_cnt+1,sizeof(xtable_t)); > + > trs_old_ptr = Calloc(1, struct wtr_spec *); > *trs_old_ptr = trs_old->next; > trs_new_ptr = Calloc(1, struct wtr_spec *); > *trs_new_ptr = trs_new->next; > - for(;;) { > + for(i=0;;i++) { > c_old = wtr_get_next_char_from_spec(trs_old_ptr); > c_new = wtr_get_next_char_from_spec(trs_new_ptr); > if(c_old == '\0') > break; > else if(c_new == '\0') > error(_("'old' is longer than 'new'")); > - else > - xtable[c_old] = c_new; > + else{ > + xtable[i].c_old=c_old; > + xtable[i].c_new=c_new; > + } > } > + > /* Free the memory occupied by the wtr_spec lists. */ > wtr_free_spec(trs_old); > wtr_free_spec(trs_new); > Free(trs_old_ptr); Free(trs_new_ptr); > > + qsort(xtable, xtable_cnt, sizeof(xtable_t), > + (int(*)(const void *, const void *))xtable_comp); > + > n = LENGTH(x); > PROTECT(y = allocVector(STRSXP, n)); > for(i = 0; i < n; i++) { > if (STRING_ELT(x,i) == NA_STRING) > SET_STRING_ELT(y, i, NA_STRING); > else { > + xtable_t *tbl; > xi = translateChar(STRING_ELT(x, i)); > nc = mbstowcs(NULL, xi, 0); > if(nc < 0) > error(_("invalid input multibyte string %d"), i+1); > wc = (wchar_t *) > R_AllocStringBuffer((nc+1)*sizeof(wchar_t), &cbuff); > mbstowcs(wc, xi, nc + 1); > - for(j = 0; j < nc; j++) wc[j] = xtable[wc[j]]; > + for(j = 0; j < nc; j++) > + if (tbl = bsearch(&wc[j], xtable, xtable_cnt, > sizeof(xtable_t), > + (int(*)(const void *, const > void *))xtable_key_comp)) > + wc[j]=tbl->c_new; > nb = wcstombs(NULL, wc, 0); > cbuf = CallocCharBuf(nb); > wcstombs(cbuf, wc, nb + 1); > > -- Brian D. Ripley, [EMAIL PROTECTED] Professor of Applied Statistics, http://www.stats.ox.ac.uk/~ripley/ University of Oxford, Tel: +44 1865 272861 (self) 1 South Parks Road, +44 1865 272866 (PA) Oxford OX1 3TG, UK Fax: +44 1865 272595 ______________________________________________ R-devel@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-devel