[kaffe] CVS kaffe (guilhem): Fixes for RuleBasedCollator.

Kaffe CVS Thu, 13 May 2004 12:52:44 -0700

PatchSet 4727 
Date: 2004/05/13 19:18:19
Author: guilhem
Branch: HEAD
Tag: (none) 
Log:
Fixes for RuleBasedCollator.


        * libraries/javalib/java/text/RuleBasedCollator.java
        (CollationElement, CollationSorter): Made static.
        (last_tertiary_value, SPECIAL_UNKNOWN_SEQ): Introduced special
        collation elements for unknown character this is for sequence
        using resets.
        (mergeRules): Fixed insertion point.
        (buildCollationVector): Initialize last_tertiary_value.
        (compare): Handle special cases of accented characters.
        (getDefaultAccentedElement): New method.
        (getCollationFixed): Fixed key building.

        * libraries/javalib/java/text/CollationElementIterator.java
        (text_indexes): New field.
        (nextBlock, previousBlock): Updated textIndex according to
        text_indexes.
        (setText): Build text_indexes. Better handling of expansion ordering.

Members: 
        ChangeLog:1.2301->1.2302 
        libraries/javalib/java/text/CollationElementIterator.java:1.15->1.16 
        libraries/javalib/java/text/RuleBasedCollator.java:1.20->1.21 

Index: kaffe/ChangeLog
diff -u kaffe/ChangeLog:1.2301 kaffe/ChangeLog:1.2302
--- kaffe/ChangeLog:1.2301      Thu May 13 18:59:22 2004
+++ kaffe/ChangeLog     Thu May 13 19:18:19 2004
@@ -1,3 +1,22 @@
+2004-05-13 Guilhem Lavaux <[EMAIL PROTECTED]>
+
+       * libraries/javalib/java/text/RuleBasedCollator.java
+       (CollationElement, CollationSorter): Made static.
+       (last_tertiary_value, SPECIAL_UNKNOWN_SEQ): Introduced special
+       collation elements for unknown character this is for sequence
+       using resets.
+       (mergeRules): Fixed insertion point.
+       (buildCollationVector): Initialize last_tertiary_value.
+       (compare): Handle special cases of accented characters.
+       (getDefaultAccentedElement): New method.
+       (getCollationFixed): Fixed key building.
+       
+       * libraries/javalib/java/text/CollationElementIterator.java
+       (text_indexes): New field.
+       (nextBlock, previousBlock): Updated textIndex according to
+       text_indexes.
+       (setText): Build text_indexes. Better handling of expansion ordering.
+
 2004-05-13  Dalibor Topic  <[EMAIL PROTECTED]>
 
        *  configure.ac: Allow enabling of jvmpi, xdebugging and 
Index: kaffe/libraries/javalib/java/text/CollationElementIterator.java
diff -u kaffe/libraries/javalib/java/text/CollationElementIterator.java:1.15 
kaffe/libraries/javalib/java/text/CollationElementIterator.java:1.16
--- kaffe/libraries/javalib/java/text/CollationElementIterator.java:1.15        Fri 
Apr 23 17:35:12 2004
+++ kaffe/libraries/javalib/java/text/CollationElementIterator.java     Thu May 13 
19:18:21 2004
@@ -92,6 +92,11 @@
   private Object[] text_decomposition;
 
   /**
+   * Array containing the index of the specified block.
+   */
+  private int[] text_indexes;
+
+  /**
    * This method initializes a new instance of <code>CollationElementIterator</code>
    * to iterate over the specified <code>String</code> using the rules in the
    * specified <code>RuleBasedCollator</code>.
@@ -112,9 +117,11 @@
       return null;
     
     RuleBasedCollator.CollationElement e =
-      (RuleBasedCollator.CollationElement) text_decomposition[index++];
+      (RuleBasedCollator.CollationElement) text_decomposition[index];
     
-    textIndex += e.key.length();
+    textIndex = text_indexes[index];
+    
+    index++;
 
     return e;
   }
@@ -128,7 +135,7 @@
     RuleBasedCollator.CollationElement e =
       (RuleBasedCollator.CollationElement) text_decomposition[index];
 
-    textIndex -= e.key.length();
+    textIndex = text_indexes[index];
     
     return e;
   }
@@ -231,7 +238,9 @@
   public void setText(String text)
   {
     int idx = 0;
+    int idx_idx = 1;
     int alreadyExpanded = 0;
+    int idxToMove = 0;
 
     this.text = text;
     this.index = 0;
@@ -239,6 +248,8 @@
     String work_text = text.intern();
 
     Vector v = new Vector();
+    Vector vi = new Vector();
+
     // Build element collection ordered as they come in "text".
     while (idx < work_text.length())
       {
@@ -277,11 +288,36 @@
        
        if (prefix == null)
          {
-           RuleBasedCollator.CollationElement e =
-             collator.getDefaultElement(work_text.charAt (idx));
-           
-           v.add (e);
-           idx++;
+           if (alreadyExpanded > 0)
+             {
+               RuleBasedCollator.CollationElement e =
+                 collator.getDefaultAccentedElement (work_text.charAt (idx));
+               
+               v.add (e);
+               vi.add (new Integer(idx_idx));
+               idx++;
+               alreadyExpanded--;
+               if (alreadyExpanded == 0)
+                 {
+                   idx_idx += idxToMove;
+                   idxToMove = 0; 
+                 }
+               else
+                 idx_idx++;
+             }
+           else
+             {
+               RuleBasedCollator.CollationElement e =
+                 collator.getDefaultElement (work_text.charAt (idx));
+               Integer i_ref = new Integer(idx_idx);
+
+               v.add (RuleBasedCollator.SPECIAL_UNKNOWN_SEQ);
+               vi.add (i_ref);
+               v.add (e);
+               vi.add (i_ref);
+               idx_idx++;
+               idx++;
+             }
            continue;
          }
 
@@ -290,18 +326,39 @@
            work_text = prefix.expansion
              + work_text.substring (idx+prefix.key.length());
            idx = 0;
-           alreadyExpanded = prefix.expansion.length();
            v.add (prefix);
+           vi.add (new Integer(idx_idx));
+           if (alreadyExpanded == 0)
+             idxToMove = prefix.key.length();
+           else
+             idxToMove = 0;
+           alreadyExpanded += prefix.expansion.length();
          }
        else
          {
            if (!prefix.ignore)
-             v.add (prefix);
+             {
+               v.add (prefix);
+               vi.add (new Integer(idx_idx));
+             }
            idx += prefix.key.length();
+           if (alreadyExpanded > 0)
+             {
+               alreadyExpanded -= prefix.key.length();
+               if (alreadyExpanded == 0)
+                 {
+                   idx_idx += idxToMove;
+                   idxToMove = 0;
+                 }
+             } else
+               idx_idx += prefix.key.length();
          }
       }
     
     text_decomposition = v.toArray();
+    text_indexes = new int[vi.size()];
+    for (int i = 0; i < vi.size(); i++) 
+      text_indexes[i] = ((Integer)vi.elementAt(i)).intValue();
   }
 
   /**
Index: kaffe/libraries/javalib/java/text/RuleBasedCollator.java
diff -u kaffe/libraries/javalib/java/text/RuleBasedCollator.java:1.20 
kaffe/libraries/javalib/java/text/RuleBasedCollator.java:1.21
--- kaffe/libraries/javalib/java/text/RuleBasedCollator.java:1.20       Fri Apr 23 
18:38:28 2004
+++ kaffe/libraries/javalib/java/text/RuleBasedCollator.java    Thu May 13 19:18:21 
2004
@@ -147,7 +147,7 @@
    * This class describes what rank has a character (or a sequence of characters) 
    * in the lexicographic order. Each element in a rule has a collation element.
    */
-  final class CollationElement
+  final static class CollationElement
   {
     String key;
     int primary;
@@ -189,7 +189,7 @@
    * [EMAIL PROTECTED] 
#mergeRules(int,java.lang.String,java.util.Vector,java.util.Vector)})
    * as a temporary state while merging two sets of instructions.
    */
-  final class CollationSorter
+  final static class CollationSorter
   {
     static final int GREATERP = 0;
     static final int GREATERS = 1;
@@ -230,10 +230,27 @@
   private int last_primary_value;
 
   /**
+   * This is the value of the last secondary sequence of the
+   * primary 0, entered into
+   * <code>ce_table</code>. It is used to compute the
+   * ordering value of an unspecified accented character.
+   */
+  private int last_tertiary_value;
+
+  /**
    * This variable is true if accents need to be sorted
    * in the other direction.
    */
   private boolean inverseAccentComparison;
+
+  /**
+   * This collation element is special to unknown sequence.
+   * The JDK uses it to mark and sort the characters which has
+   * no collation rules.
+   */
+  static final CollationElement SPECIAL_UNKNOWN_SEQ = 
+    new CollationElement("", (short) 32767, (short) 0, (short) 0,
+                        (short) 0, null);
   
   /**
    * This method initializes a new instance of <code>RuleBasedCollator</code>
@@ -356,14 +373,14 @@
          (CollationSorter) main.elementAt(insertion_point-1);
        
        sorter.expansionOrdering = starter.substring(max_length); // Skip the first 
good prefix element
-       
+               
        main.insertElementAt(sorter, insertion_point);
        
        /*
         * This is a new set of rules. Append to the list.
         */
        patch.removeElementAt(0);
-       insertion_point = main.size();
+       insertion_point++;
       }
 
     // Now insert all elements of patch at the insertion point.
@@ -392,7 +409,7 @@
   {
     boolean ignoreChars = (base_offset == 0);
     int operator = -1;
-    StringBuffer sb = new StringBuffer("");
+    StringBuffer sb = new StringBuffer();
     boolean doubleQuote = false;
     boolean eatingChars = false;
     boolean nextIsModifier = false;
@@ -605,6 +622,7 @@
     throws ParseException
   {
     int primary_seq = 0;
+    int last_tertiary_seq = 0;
     short secondary_seq = 0;
     short tertiary_seq = 0;
     short equality_seq = 0;
@@ -652,6 +670,8 @@
            continue element_loop;
          case CollationSorter.GREATERT:
            tertiary_seq++;
+           if (primary_seq == 0)
+             last_tertiary_seq = tertiary_seq;
            equality_seq = 0;
            break;
          case CollationSorter.IGNORE:
@@ -686,6 +706,7 @@
     ce_table = v.toArray();
 
     last_primary_value = primary_seq+1;
+    last_tertiary_value = last_tertiary_seq+1;
   }
 
   /**
@@ -757,6 +778,17 @@
         // Check for primary strength differences
         int prim1 = CollationElementIterator.primaryOrder(ord1); 
         int prim2 = CollationElementIterator.primaryOrder(ord2); 
+       
+       if (prim1 == 0 && getStrength() < TERTIARY)
+         {
+           ct.previousBlock();
+           continue;
+         }
+       else if (prim2 == 0 && getStrength() < TERTIARY)
+         {
+           cs.previousBlock();
+           continue;
+         }
 
         if (prim1 < prim2)
           return -1;
@@ -769,7 +801,7 @@
         int sec1 = CollationElementIterator.secondaryOrder(ord1);
         int sec2 = CollationElementIterator.secondaryOrder(ord2);
 
-        if (sec1 < sec2)
+       if (sec1 < sec2)
           return -1;
         else if (sec1 > sec2)
           return 1;
@@ -833,6 +865,28 @@
   }
 
   /**
+   * This method builds a default collation element for an accented character
+   * without invoking the database created from the rules passed to the constructor.
+   *
+   * @param c Character which needs a collation element.
+   * @return A valid brand new CollationElement instance.
+   */
+  CollationElement getDefaultAccentedElement(char c)
+  {
+    int v;
+
+    // Preliminary support for generic accent sorting inversion (I don't know if all
+    // characters in the range should be sorted backward). This is the place
+    // to fix this if needed.
+    if (inverseAccentComparison && (c >= 0x02B9 && c <= 0x0361))
+      v = 0x0361 - ((int) c - 0x02B9);
+    else
+      v = (short) c;
+    return new CollationElement("" + c, (short) 0,
+                               (short) 0, (short) (last_tertiary_value + v), (short) 
0, null);
+  }
+
+  /**
    * This method returns an instance for <code>CollationElementIterator</code>
    * for the specified <code>String</code> under the collation rules for this
    * object.
@@ -894,11 +948,12 @@
         switch (getStrength())
           {
             case PRIMARY:
-               ord = CollationElementIterator.primaryOrder(ord);
-               break;
-
+             ord = CollationElementIterator.primaryOrder(ord);
+             break;
+             
             case SECONDARY:
-               ord = CollationElementIterator.secondaryOrder(ord);
+             ord = CollationElementIterator.primaryOrder(ord) << 8;
+             ord |= CollationElementIterator.secondaryOrder(ord);
 
             default:
                break;

_______________________________________________
kaffe mailing list
[EMAIL PROTECTED]
http://kaffe.org/cgi-bin/mailman/listinfo/kaffe

[kaffe] CVS kaffe (guilhem): Fixes for RuleBasedCollator.

Reply via email to