Attached is a patch to add string comparison ops, along with the
necessary infrastructure in the string code.

The current behaviour is that if the two strings do not have the
same encoding then both are promoted to UTF-32 before comparison
as that should generally preserve information.

Tom

-- 
Tom Hughes ([EMAIL PROTECTED])
http://www.compton.nu/
? t/tom.pasm
Index: basic_opcodes.ops
===================================================================
RCS file: /home/perlcvs/parrot/basic_opcodes.ops,v
retrieving revision 1.36
diff -u -w -r1.36 basic_opcodes.ops
--- basic_opcodes.ops   2001/10/08 14:04:20     1.36
+++ basic_opcodes.ops   2001/10/09 23:46:56
@@ -604,6 +604,90 @@
 AUTO_OP concat_s {
     STRING *s = string_concat(STR_REG(P1), STR_REG(P2), 1);
     STR_REG(P1) = s;
+}
+
+/* EQ Sx, Sy, EQ_BRANCH */
+MANUAL_OP eq_s_ic {
+  if (string_compare(STR_REG(P1), STR_REG(P2)) == 0) {
+    RETURN(INT_CONST(P3));
+  }
+}
+
+/* EQ Sx, CONSTANT, EQ_BRANCH */
+MANUAL_OP eq_sc_ic {
+  if (string_compare(STR_REG(P1), STR_CONST(P2)) == 0) {
+    RETURN(INT_CONST(P3));
+  }
+}
+
+/* NE Sx, Sy, NE_BRANCH */
+MANUAL_OP ne_s_ic {
+  if (string_compare(STR_REG(P1), STR_REG(P2)) != 0) {
+    RETURN(INT_CONST(P3));
+  }
+}
+
+/* NE Sx, CONSTANT, NE_BRANCH */
+MANUAL_OP ne_sc_ic {
+  if (string_compare(STR_REG(P1), STR_CONST(P2)) != 0) {
+    RETURN(INT_CONST(P3));
+  }
+}
+
+/* LT Sx, Sy, LT_BRANCH */
+MANUAL_OP lt_s_ic {
+  if (string_compare(STR_REG(P1), STR_REG(P2)) < 0) {
+    RETURN(INT_CONST(P3));
+  }
+}
+
+/* LT Sx, CONSTANT, LT_BRANCH */
+MANUAL_OP lt_sc_ic {
+  if (string_compare(STR_REG(P1), STR_CONST(P2)) < 0) {
+    RETURN(INT_CONST(P3));
+  }
+}
+
+/* LE Sx, Sy, LE_BRANCH */
+MANUAL_OP le_s_ic {
+  if (string_compare(STR_REG(P1), STR_REG(P2)) <= 0) {
+    RETURN(INT_CONST(P3));
+  }
+}
+
+/* LE Sx, CONSTANT, LE_BRANCH */
+MANUAL_OP le_sc_ic {
+  if (string_compare(STR_REG(P1), STR_CONST(P2)) <= 0) {
+    RETURN(INT_CONST(P3));
+  }
+}
+
+/* GT Sx, Sy, GT_BRANCH */
+MANUAL_OP gt_s_ic {
+  if (string_compare(STR_REG(P1), STR_REG(P2)) > 0) {
+    RETURN(INT_CONST(P3));
+  }
+}
+
+/* GT Sx, CONSTANT, GT_BRANCH */
+MANUAL_OP gt_sc_ic {
+  if (string_compare(STR_REG(P1), STR_CONST(P2)) > 0) {
+    RETURN(INT_CONST(P3));
+  }
+}
+
+/* GE Sx, Sy, GE_BRANCH */
+MANUAL_OP ge_s_ic {
+  if (string_compare(STR_REG(P1), STR_REG(P2)) >= 0) {
+    RETURN(INT_CONST(P3));
+  }
+}
+
+/* GE Sx, CONSTANT, GE_BRANCH */
+MANUAL_OP ge_sc_ic {
+  if (string_compare(STR_REG(P1), STR_CONST(P2)) >= 0) {
+    RETURN(INT_CONST(P3));
+  }
 }

 /* NOOP */
Index: opcode_table
===================================================================
RCS file: /home/perlcvs/parrot/opcode_table,v
retrieving revision 1.24
diff -u -w -r1.24 opcode_table
--- opcode_table        2001/10/08 13:45:21     1.24
+++ opcode_table        2001/10/09 23:46:57
@@ -67,7 +67,7 @@
 substr_s_s_i   4       S S I I
 concat_s       2       S S

-# Comparators (TODO: String comparators)
+# Comparators

 eq_i_ic        3       I I D
 eq_ic_ic       3       I i D
@@ -94,6 +94,19 @@
 gt_nc_ic       3       N n D
 ge_n_ic        3       N N D
 ge_nc_ic       3       N n D
+
+eq_s_ic        3       S S D
+eq_sc_ic       3       S s D
+ne_s_ic        3       S S D
+ne_sc_ic       3       S s D
+lt_s_ic        3       S S D
+lt_sc_ic       3       S s D
+le_s_ic        3       S S D
+le_sc_ic       3       S s D
+gt_s_ic        3       S S D
+gt_sc_ic       3       S s D
+ge_s_ic        3       S S D
+ge_sc_ic       3       S s D

 # Flow control

Index: string.c
===================================================================
RCS file: /home/perlcvs/parrot/string.c,v
retrieving revision 1.12
diff -u -w -r1.12 string.c
--- string.c    2001/10/08 07:49:10     1.12
+++ string.c    2001/10/09 23:46:57
@@ -152,6 +152,23 @@
     return (ENC_VTABLE(s)->chopn)(s, n);
 }

+/*=for api string string_compare
+ * compare two strings.
+ */
+INTVAL
+string_compare(STRING* s1, STRING* s2) {
+    if (s1->encoding != s2->encoding) {
+        if (s1->encoding->which != enc_utf32) {
+            s1 = Parrot_transcode_table[s1->encoding->which][enc_utf32](s1, NULL);
+        }
+        if (s2->encoding->which != enc_utf32) {
+            s2 = Parrot_transcode_table[s2->encoding->which][enc_utf32](s2, NULL);
+        }
+    }
+
+    return (ENC_VTABLE(s1)->compare)(s1, s2);
+}
+
 /*
  * Local variables:
  * c-indentation-style: bsd
Index: strnative.c
===================================================================
RCS file: /home/perlcvs/parrot/strnative.c,v
retrieving revision 1.15
diff -u -w -r1.15 strnative.c
--- strnative.c 2001/10/08 07:49:10     1.15
+++ strnative.c 2001/10/09 23:46:58
@@ -82,6 +82,25 @@
     return dest;
 }

+/*=for api string_native string_native_compare
+   compare two strings
+*/
+static INTVAL
+string_native_compare(STRING* s1, STRING* s2) {
+    INTVAL cmp;
+
+    if (s1->bufused < s2->bufused) {
+        cmp = memcmp(s1->bufstart, s2->bufstart, s1->bufused);
+        if (cmp == 0) cmp == -1;
+    }
+    else {
+        cmp = memcmp(s1->bufstart, s2->bufstart, s2->bufused);
+        if (cmp ==0 && s1->bufused > s2->bufused) cmp = 1;
+    }
+
+    return cmp;
+}
+
 /*=for api string_native string_native_vtable
    return the vtable for the native string
 */
@@ -94,6 +113,7 @@
        string_native_concat,
        string_native_chopn,
        string_native_substr,
+       string_native_compare,
     };
     return sv;
 }
Index: strutf16.c
===================================================================
RCS file: /home/perlcvs/parrot/strutf16.c,v
retrieving revision 1.1
diff -u -w -r1.1 strutf16.c
--- strutf16.c  2001/10/08 07:50:52     1.1
+++ strutf16.c  2001/10/09 23:46:59
@@ -123,6 +123,37 @@
     return dest;
 }

+/*=for api string_utf16 string_utf16_compare
+   compare two strings
+*/
+static INTVAL
+string_utf16_compare(STRING* s1, STRING* s2) {
+    utf16_t *s1start = s1->bufstart;
+    utf16_t *s1end = s1start + s1->bufused / sizeof(utf16_t);
+    utf16_t *s2start = s2->bufstart;
+    utf16_t *s2end = s2start + s2->bufused / sizeof(utf16_t);
+    INTVAL cmp = 0;
+
+    while (cmp == 0 && s1start < s1end && s2start < s2end) {
+        utf32_t c1 = *s1start++;
+        utf32_t c2 = *s2start++;
+
+        if (UNICODE_IS_HIGH_SURROGATE(c1)) {
+            c1 = UNICODE_DECODE_SURROGATE(c1, *s1start++);
+        }
+        if (UNICODE_IS_HIGH_SURROGATE(c2)) {
+            c2 = UNICODE_DECODE_SURROGATE(c2, *s2start++);
+        }
+
+        cmp = c1 - c2;
+    }
+
+    if (cmp == 0 && s1start < s1end) cmp = 1;
+    if (cmp == 0 && s2start < s2end) cmp = -1;
+
+    return cmp;
+}
+
 /*=for api string_utf16 string_utf16_vtable
    return the vtable for the native string
 */
@@ -135,6 +166,7 @@
        string_utf16_concat,
        string_utf16_chopn,
        string_utf16_substr,
+       string_utf16_compare,
     };
     return sv;
 }
Index: strutf32.c
===================================================================
RCS file: /home/perlcvs/parrot/strutf32.c,v
retrieving revision 1.1
diff -u -w -r1.1 strutf32.c
--- strutf32.c  2001/10/08 07:50:52     1.1
+++ strutf32.c  2001/10/09 23:46:59
@@ -11,6 +11,7 @@
  */

 #include "parrot/parrot.h"
+#include "parrot/unicode.h"

 /* Functions for handling strings in UTF-32 format */

@@ -80,6 +81,27 @@
     return dest;
 }

+/*=for api string_utf32 string_utf32_compare
+   compare two strings
+*/
+static INTVAL
+string_utf32_compare(STRING* s1, STRING* s2) {
+    utf32_t *s1start = s1->bufstart;
+    utf32_t *s1end = s1start + s1->strlen;
+    utf32_t *s2start = s2->bufstart;
+    utf32_t *s2end = s2start + s2->strlen;
+    INTVAL cmp = 0;
+
+    while (cmp == 0 && s1start < s1end && s2start < s2end) {
+        cmp = *s1start++ - *s2start++;
+    }
+
+    if (cmp == 0 && s1start < s1end) cmp = 1;
+    if (cmp == 0 && s2start < s2end) cmp = -1;
+
+    return cmp;
+}
+
 /*=for api string_utf32 string_utf32_vtable
    return the vtable for the native string
 */
@@ -92,6 +114,7 @@
        string_utf32_concat,
        string_utf32_chopn,
        string_utf32_substr,
+       string_utf32_compare,
     };
     return sv;
 }
Index: strutf8.c
===================================================================
RCS file: /home/perlcvs/parrot/strutf8.c,v
retrieving revision 1.1
diff -u -w -r1.1 strutf8.c
--- strutf8.c   2001/10/08 07:50:52     1.1
+++ strutf8.c   2001/10/09 23:47:00
@@ -124,6 +124,58 @@
     return dest;
 }

+/*=for api string_utf8 string_utf8_compare
+   compare two strings
+*/
+static INTVAL
+string_utf8_compare(STRING* s1, STRING* s2) {
+    utf8_t *s1start = s1->bufstart;
+    utf8_t *s1end = s1start + s1->bufused;
+    utf8_t *s2start = s2->bufstart;
+    utf8_t *s2end = s2start + s2->bufused;
+    INTVAL cmp = 0;
+
+    while (cmp == 0 && s1start < s1end && s2start < s2end) {
+        utf32_t c1 = *s1start++;
+        utf32_t c2 = *s2start++;
+
+        if (UTF8_IS_START(c1)) {
+            INTVAL len = UTF8SKIP(s1start - 1);
+            INTVAL count;
+
+            c1 &= UTF8_START_MASK(len);
+            for (count = 1; count < len; count++) {
+                if (!UTF8_IS_CONTINUATION(*s1start)) {
+                    INTERNAL_EXCEPTION(MALFORMED_UTF8,
+                                       "Malformed UTF-8 string\n");
+                }
+                c1 = UTF8_ACCUMULATE(c1, *s1start++);
+            }
+        }
+
+        if (UTF8_IS_START(c2)) {
+            INTVAL len = UTF8SKIP(s2start - 1);
+            INTVAL count;
+
+            c2 &= UTF8_START_MASK(len);
+            for (count = 1; count < len; count++) {
+                if (!UTF8_IS_CONTINUATION(*s2start)) {
+                    INTERNAL_EXCEPTION(MALFORMED_UTF8,
+                                       "Malformed UTF-8 string\n");
+                }
+                c2 = UTF8_ACCUMULATE(c1, *s2start++);
+            }
+        }
+
+        cmp = c1 - c2;
+    }
+
+    if (cmp == 0 && s1start < s1end) cmp = 1;
+    if (cmp == 0 && s2start < s2end) cmp = -1;
+
+    return cmp;
+}
+
 /*=for api string_utf8 string_utf8_vtable
    return the vtable for the native string
 */
@@ -136,6 +188,7 @@
        string_utf8_concat,
        string_utf8_chopn,
        string_utf8_substr,
+       string_utf8_compare,
     };
     return sv;
 }
Index: include/parrot/string.h
===================================================================
RCS file: /home/perlcvs/parrot/include/parrot/string.h,v
retrieving revision 1.4
diff -u -w -r1.4 string.h
--- include/parrot/string.h     2001/10/08 07:49:03     1.4
+++ include/parrot/string.h     2001/10/09 23:47:04
@@ -33,6 +33,7 @@
 typedef STRING* (*two_strings_iv_to_string_t)(STRING *, STRING *, INTVAL);
 typedef STRING* (*substr_t)(STRING*, INTVAL, INTVAL, STRING*);
 typedef INTVAL (*iv_to_iv_t)(INTVAL);
+typedef INTVAL (*two_strings_to_iv_t)(STRING*, STRING*);

 struct string_vtable {
     encoding_t which;                   /* What sort of encoding is this? */
@@ -41,6 +42,7 @@
     two_strings_iv_to_string_t concat;  /* Append string b to the end of string a */
     string_iv_to_string_t chopn;        /* Remove n characters from the end of a 
string */
     substr_t substr;                    /* Substring operation */
+    two_strings_to_iv_t compare;        /* Compare operation */
 };

 struct parrot_string {
@@ -67,6 +69,8 @@
 string_chopn(STRING*, INTVAL);
 STRING*
 string_substr(STRING*, INTVAL, INTVAL, STRING**);
+INTVAL
+string_compare(STRING*, STRING*);

 /* Declarations of other functions */
 INTVAL

Reply via email to