Author: yamakenz
Date: Tue Jul 10 22:58:08 2007
New Revision: 4693

Modified:
   trunk/doc/COMPATIBILITY
   trunk/scm/prime.scm
   trunk/scm/util.scm
   trunk/test/test-uim-util.scm
   trunk/uim/uim-util.c

Log:
* uim/uim-util.c
  - (uim_strsplit, uim_split_string): Removed
  - (string_contains): New static function
  - (uim_init_util_subrs):
    * Remove definition for string-split
    * Add definition for string-contains
* scm/util.scm
  - (string-split): New procedure. The specification is changed from C
    version. See doc/COMPATIBILITY. All the invocation of string-split
    is checked as safe by eyes
* scm/prime.scm
  - (prime-engine-send-command): Follow the specification change of
    string-split
  - (prime-util-string-split): Add a comment about string-split
* test/test-uim-util.scm
  - Update the "passed revision" comment
  - Add tests for new string-split
  - Add tests for string-contains
* doc/COMPATIBILITY
  - Add new section "Specification changes of utility procedures"


Modified: trunk/doc/COMPATIBILITY
==============================================================================
--- trunk/doc/COMPATIBILITY     (original)
+++ trunk/doc/COMPATIBILITY     Tue Jul 10 22:58:08 2007
@@ -57,6 +57,23 @@
 
 The changes are described below in most recently updated order.
 ------------------------------------------------------------------------------
+Summary: Specification changes of utility procedures
+Affects: uim developers, IM developers
+Updates: Scheme API
+Version: 1.5.0
+Revision: ac4693
+Date: 2007-07-11
+Modifier: YamaKen
+Related: 
+URL:
+Changes:
+  (changed) string-split
+Description:
+  Now string-split produces empty strings as follows. See
+  test-uim-utils.scm for further information.
+    uim 1.4: (string-split "hhh" "h")  ==> ()
+    uim 1.5: (string-split "hhh" "h")  ==> ("" "" "" "")
+------------------------------------------------------------------------------
 Summary: SRFI-1 procedures replacement
 Affects: uim developers, IM developers
 Updates: Scheme API

Modified: trunk/scm/prime.scm
==============================================================================
--- trunk/scm/prime.scm (original)
+++ trunk/scm/prime.scm Tue Jul 10 22:58:08 2007
@@ -743,6 +743,11 @@
 ;; (string-split "\t\t" "\t") => ().
 ;; (prime-util-string-split "\t\t" "\t") => ("" "" "").
 ;; The second argument separator must be a single character string.
+;;
+;; uim 1.5.0 revised the specification of string-split as
+;; follows. Replace prime-util-string-split with the new string-split
+;; if no other problems are remaining.  -- YamaKen 2007-07-11
+;;   (string-split "\t\t" "\t") => ("" "" "")
 (define prime-util-string-split
   (lambda (string separator)
     (let ((result (list))
@@ -809,11 +814,11 @@
 ;; problem with unix domain socket.
 (define prime-engine-send-command
   (lambda (arg-list)
-    (cdr 
-     (string-split
-      (prime-send-command
-       (prime-util-string-concat arg-list "\t"))
-      "\n"))))
+    (let* ((result (prime-send-command
+                   (prime-util-string-concat arg-list "\t")))
+          (result-lines (string-split result "\n")))
+      (take! result-lines (- (length result-lines) 1)) ;; drop last "\n"
+      (cdr result-lines)))) ;; drop status line
 
 (define prime-engine-conv-predict
   (lambda (prime-session)

Modified: trunk/scm/util.scm
==============================================================================
--- trunk/scm/util.scm  (original)
+++ trunk/scm/util.scm  Tue Jul 10 22:58:08 2007
@@ -153,6 +153,18 @@
   (lambda (sep str-list)
     (apply string-append (join sep str-list))))
 
+(define string-split
+  (lambda (str sep)
+    (let ((slen (string-length str))
+         (seplen (string-length sep)))
+      (let rec ((start 0))
+       (let ((next (and (<= start slen)
+                        (string-contains str sep start))))
+         (if next
+             (cons (substring str start next)
+                   (rec (+ next seplen)))
+             (list (substring str start slen))))))))
+
 (define string-append-map
   (lambda args
     (apply string-append (apply map args))))

Modified: trunk/test/test-uim-util.scm
==============================================================================
--- trunk/test/test-uim-util.scm        (original)
+++ trunk/test/test-uim-util.scm        Tue Jul 10 22:58:08 2007
@@ -29,7 +29,7 @@
 ;;; SUCH DAMAGE.
 ;;;;
 
-;; These tests are passed at revision 4692 (new repository)
+;; These tests are passed at revision 4693 (new repository)
 
 (use test.unit)
 
@@ -67,7 +67,47 @@
    (assert-true  (uim-bool '(unsetenv "UIM_NONEXISTING_ENV")))
    (assert-false (uim-bool '(getenv "UIM_NONEXISTING_ENV"))))
 
-  ("test string-split"
+  ;; See "Specification changes of utility procedures" of doc/COMPATIBILITY
+;;  ("test string-split (uim 1.4)"
+;;   ;; ordinary split
+;;   (assert-equal '("h" "geh" "ge")
+;;              (uim '(string-split "hogehoge" "o")))
+;;   ;; case sensitive
+;;   (assert-equal '("hogehoge")
+;;              (uim '(string-split "hogehoge" "O")))
+;;   ;; split by sequence
+;;   (assert-equal '("h" "eh" "e")
+;;              (uim '(string-split "hogehoge" "og")))
+;;   ;; split by first character
+;;   (assert-equal '("oge" "oge")
+;;              (uim '(string-split "hogehoge" "h")))
+;;   ;; split by first sequence
+;;   (assert-equal '("ge" "ge")
+;;              (uim '(string-split "hogehoge" "ho")))
+;;   ;; split by last character
+;;   (assert-equal '("hog" "hog")
+;;              (uim '(string-split "hogehoge" "e")))
+;;   ;; split by last sequence
+;;   (assert-equal '("ho" "ho")
+;;              (uim '(string-split "hogehoge" "ge")))
+;;   ;; split by whole string
+;;   (assert-equal ()
+;;              (uim '(string-split "hogehoge" "hogehoge")))
+;;   ;; repeated splitter
+;;   (assert-equal ()
+;;              (uim '(string-split "hhh" "h")))
+;;   ;; split by space
+;;   (assert-equal '("h" "o" "g" "e" "hoge")
+;;              (uim '(string-split " h o g e hoge" " ")))
+;;   ;; split by symbolic character
+;;   (assert-equal '("h" "o" "g" "e" "hoge")
+;;              (uim '(string-split "|h|o|g|e|hoge" "|")))
+;;   ;; split by non existent character
+;;   (assert-equal '("hogehoge")
+;;              (uim '(string-split "hogehoge" "|"))))
+
+  ;; See "Specification changes of utility procedures" of doc/COMPATIBILITY
+  ("test string-split (uim 1.5)"
    ;; ordinary split
    (assert-equal '("h" "geh" "ge")
                 (uim '(string-split "hogehoge" "o")))
@@ -78,25 +118,28 @@
    (assert-equal '("h" "eh" "e")
                 (uim '(string-split "hogehoge" "og")))
    ;; split by first character
-   (assert-equal '("oge" "oge")
+   (assert-equal '("" "oge" "oge")
                 (uim '(string-split "hogehoge" "h")))
    ;; split by first sequence
-   (assert-equal '("ge" "ge")
+   (assert-equal '("" "ge" "ge")
                 (uim '(string-split "hogehoge" "ho")))
    ;; split by last character
-   (assert-equal '("hog" "hog")
+   (assert-equal '("hog" "hog" "")
                 (uim '(string-split "hogehoge" "e")))
    ;; split by last sequence
-   (assert-equal '("ho" "ho")
+   (assert-equal '("ho" "ho" "")
                 (uim '(string-split "hogehoge" "ge")))
    ;; split by whole string
-   (assert-equal ()
+   (assert-equal '("" "")
                 (uim '(string-split "hogehoge" "hogehoge")))
+   ;; repeated splitter
+   (assert-equal '("" "" "" "")
+                (uim '(string-split "hhh" "h")))
    ;; split by space
-   (assert-equal '("h" "o" "g" "e" "hoge")
+   (assert-equal '("" "h" "o" "g" "e" "hoge")
                 (uim '(string-split " h o g e hoge" " ")))
    ;; split by symbolic character
-   (assert-equal '("h" "o" "g" "e" "hoge")
+   (assert-equal '("" "h" "o" "g" "e" "hoge")
                 (uim '(string-split "|h|o|g|e|hoge" "|")))
    ;; split by non existent character
    (assert-equal '("hogehoge")
@@ -124,6 +167,17 @@
 ;   (assert-equal '("c" " " " " "b" " " "a")
 ;               (uim '(string-to-list "a b  c")))
    )
+
+  ("test string-contains"
+   (assert-equal 0 (uim '(string-contains ""         "" 0)))
+   (assert-false   (uim '(string-contains ""         "f" 0)))
+   (assert-equal 0 (uim '(string-contains "foo"      "" 0)))
+   (assert-equal 0 (uim '(string-contains "foo"      "f" 0)))
+   (assert-equal 1 (uim '(string-contains "foo"      "o" 0)))
+   (assert-equal 1 (uim '(string-contains "foo"      "oo" 0)))
+   (assert-false   (uim '(string-contains "foo"      "oof" 0)))
+   (assert-equal 1 (uim '(string-contains "foo"      "o" 1)))
+   (assert-equal 2 (uim '(string-contains "foo"      "o" 2))))
 
   ("test string-prefix?"
    (assert-true  (uim-bool '(string-prefix? ""         "foo_bar")))

Modified: trunk/uim/uim-util.c
==============================================================================
--- trunk/uim/uim-util.c        (original)
+++ trunk/uim/uim-util.c        Tue Jul 10 22:58:08 2007
@@ -179,95 +179,29 @@
   return uim_scm_t();
 }
 
-static char **
-uim_strsplit(const char *splittee, const char *splitter)
-{
-  const char *cur, *tmp;
-  int nr_token = 0;
-  int in_token = 0;
-  char **res;
-  int len;
-  int i;
-
-  if (!splittee || !splitter)
-    return NULL;
-
-
-  /* count the number of token */
-  cur = splittee;
-  while (*cur) {
-    if (strchr(splitter, *cur)) {
-      in_token = 0;
-    } else {
-      if (!in_token) {
-       nr_token ++;
-      }
-      in_token = 1;
-    }
-    cur ++;
-  }
-  /* allocate buffer */
-  res = (char **)malloc(sizeof(char *) * (nr_token + 1) );
-  if (!res) {
-    return NULL;
-  }
-  /**/
-  cur = splittee;
-  for (i = 0; i < nr_token; i++) {
-    /* find current token's start */
-    while (strchr(splitter, *cur)) {
-      cur ++;
-    }
-    /* calc length */
-    len = 0;
-    tmp = cur;
-    while (!strchr(splitter, *tmp)) {
-      len ++;
-      tmp ++;
-    }
-    /* store */
-    res[i] = malloc(sizeof(char) * (len + 1));
-    strlcpy(res[i], cur, len + 1);
-    cur = tmp;
-  }
-  /**/
-  res[nr_token] = NULL;
-
-  return res;
-}
-
+/* Limited version of SRFI-13 string-contains. The number of args are
+ * fixed to 3. */
 static uim_lisp
-uim_split_string(uim_lisp _splittee, uim_lisp _splitter)
+string_contains(uim_lisp s1_, uim_lisp s2_, uim_lisp start1_)
 {
-  const char *splittee = uim_scm_refer_c_str(_splittee);
-  const char *splitter = uim_scm_refer_c_str(_splitter);
-  char **strs;
-  uim_lisp l = uim_scm_null_list();
-  int i;
-  int n_strs;
-
-  if (!uim_scm_stringp(_splittee) || !uim_scm_stringp(_splitter))
-    return uim_scm_f();
-
-  if (splittee == NULL || splitter == NULL)
-    return uim_scm_f();
+  const char *s1, *s2, *found;
+  int start1;
+  size_t s1len;
 
-  strs = uim_strsplit(splittee, splitter);
+  if (!uim_scm_stringp(s1_) || !uim_scm_stringp(s2_))
+    return uim_scm_f();  /* FIXME: uim_scm_error() */
 
-  if (!strs)
-    return uim_scm_f();
+  s1 = uim_scm_refer_c_str(s1_);
+  s2 = uim_scm_refer_c_str(s2_);
+  start1 = uim_scm_c_int(start1_);
+  s1len = strlen(s1);
 
-  if (!*strs)
-    return uim_scm_null_list();
+  if (start1 < 0 || s1len < (size_t)start1)
+    return uim_scm_f();  /* FIXME: uim_scm_error() */
 
-  for (n_strs = 0; strs[n_strs] != '\0'; n_strs++);
+  found = strstr(&s1[start1], s2);
 
-  l = uim_scm_c_strs_into_list(n_strs, (const char *const *)strs);
-  for (i = n_strs - 1; i >= 0; i--) {
-    free(strs[i]);
-  }
-  free(strs);
-  return l;
+  return (found) ? uim_scm_make_int(found - s1) : uim_scm_f();
 }
 
 static uim_lisp
@@ -350,19 +284,22 @@
   uim_scm_init_subr_0("sys-pkglibdir", sys_pkglibdir);
   uim_scm_init_subr_0("sys-datadir", sys_datadir);
   uim_scm_init_subr_0("sys-pkgdatadir", sys_pkgdatadir);
+
   uim_scm_init_subr_1("file-readable?", file_readablep);
   uim_scm_init_subr_1("file-writable?", file_writablep);
   uim_scm_init_subr_1("file-executable?", file_executablep);
   uim_scm_init_subr_1("file-regular?", file_regularp);
   uim_scm_init_subr_1("file-directory?", file_directoryp);
   uim_scm_init_subr_1("file-mtime", file_mtime);
+
   uim_scm_init_subr_0("setugid?", setugidp);
+
   uim_scm_init_subr_1("getenv", c_getenv);
   uim_scm_init_subr_3("setenv", c_setenv);
   uim_scm_init_subr_1("unsetenv", c_unsetenv);
 
-  /* these procedures should be replaced with standard ones of R5RS or SRFIs */
-  uim_scm_init_subr_2("string-split", uim_split_string);
+  /* SRFI-13 */
+  uim_scm_init_subr_3("string-contains", string_contains);
   uim_scm_init_subr_2("string-prefix?", string_prefixp);
   uim_scm_init_subr_2("string-prefix-ci?", string_prefix_cip);
 }

Reply via email to