Author: orbiter
Date: 2008-01-20 02:22:46 +0100 (Sun, 20 Jan 2008)
New Revision: 4347

Removed:
   trunk/source/de/anomic/kelondro/kelondroSplittedTree.java
Modified:
   trunk/source/dbtest.java
   trunk/source/de/anomic/kelondro/kelondroBytesIntMap.java
   trunk/source/de/anomic/kelondro/kelondroCache.java
   trunk/source/de/anomic/kelondro/kelondroCollectionIndex.java
   trunk/source/de/anomic/kelondro/kelondroEcoTable.java
   trunk/source/de/anomic/kelondro/kelondroFlexTable.java
   trunk/source/de/anomic/kelondro/kelondroIndex.java
   trunk/source/de/anomic/kelondro/kelondroRAMIndex.java
   trunk/source/de/anomic/kelondro/kelondroRowCollection.java
   trunk/source/de/anomic/kelondro/kelondroRowSet.java
   trunk/source/de/anomic/kelondro/kelondroSQLTable.java
   trunk/source/de/anomic/kelondro/kelondroSplitTable.java
   trunk/source/de/anomic/kelondro/kelondroTree.java
Log:
to fix inconsistencies in collection index, a double reference reporting 
mechanism has been implemented


Modified: trunk/source/dbtest.java
===================================================================
--- trunk/source/dbtest.java    2008-01-19 12:23:56 UTC (rev 4346)
+++ trunk/source/dbtest.java    2008-01-20 01:22:46 UTC (rev 4347)
@@ -23,7 +23,6 @@
 import de.anomic.kelondro.kelondroRowSet;
 import de.anomic.kelondro.kelondroSQLTable;
 import de.anomic.kelondro.kelondroSplitTable;
-import de.anomic.kelondro.kelondroSplittedTree;
 import de.anomic.kelondro.kelondroTree;
 import de.anomic.server.serverInstantThread;
 import de.anomic.server.serverMemory;
@@ -200,12 +199,6 @@
                 File tablefile = new File(tablename + ".kelondro.db");
                 table = new kelondroCache(new kelondroTree(tablefile, true, 
preload, testRow));
             }
-            if (dbe.equals("kelondroSplittedTree")) {
-                File tablepath = new File(tablename).getParentFile();
-                tablename = new File(tablename).getName();
-                table = new kelondroSplittedTree(tablepath, tablename, 
kelondroBase64Order.enhancedCoder,
-                                preload, 8, testRow, 1, 80);
-            }
             if (dbe.equals("kelondroFlexTable")) {
                 File tablepath = new File(tablename).getParentFile();
                 table = new kelondroFlexTable(tablepath, new 
File(tablename).getName(), preload, testRow, 0, true);
@@ -362,7 +355,6 @@
             
             if (command.equals("list")) {
                 kelondroCloneableIterator<kelondroRow.Entry> i = null;
-                if (table instanceof kelondroSplittedTree) i = 
((kelondroSplittedTree) table).rows(true, null);
                 if (table instanceof kelondroTree) i = ((kelondroTree) 
table).rows(true, null);
                 if (table instanceof kelondroSQLTable) i = ((kelondroSQLTable) 
table).rows(true, null);
                 kelondroRow.Entry row;

Modified: trunk/source/de/anomic/kelondro/kelondroBytesIntMap.java
===================================================================
--- trunk/source/de/anomic/kelondro/kelondroBytesIntMap.java    2008-01-19 
12:23:56 UTC (rev 4346)
+++ trunk/source/de/anomic/kelondro/kelondroBytesIntMap.java    2008-01-20 
01:22:46 UTC (rev 4347)
@@ -25,6 +25,8 @@
 package de.anomic.kelondro;
 
 import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
 
 public class kelondroBytesIntMap {
     
@@ -74,6 +76,27 @@
         index.addUnique(newentry);
     }
     
+    public synchronized ArrayList<Integer[]> removeDoubles() throws 
IOException {
+        ArrayList<kelondroRowSet> indexreport = index.removeDoubles();
+        ArrayList<Integer[]> report = new ArrayList<Integer[]>();
+        Iterator<kelondroRowSet> i = indexreport.iterator();
+        kelondroRowSet rowset;
+        Integer[] is;
+        Iterator<kelondroRow.Entry> ei;
+        int c;
+        while (i.hasNext()) {
+            rowset = i.next();
+            is = new Integer[rowset.size()];
+            ei = rowset.rows();
+            c = 0;
+            while (ei.hasNext()) {
+                is[c++] = new Integer((int) ei.next().getColLong(1));
+            }
+            report.add(is);
+        }
+        return report;
+    }
+    
     public synchronized int removei(byte[] key) throws IOException {
         assert (key != null);
         kelondroRow.Entry indexentry = index.remove(key, true); // keeping the 
order will prevent multiple re-sorts

Modified: trunk/source/de/anomic/kelondro/kelondroCache.java
===================================================================
--- trunk/source/de/anomic/kelondro/kelondroCache.java  2008-01-19 12:23:56 UTC 
(rev 4346)
+++ trunk/source/de/anomic/kelondro/kelondroCache.java  2008-01-20 01:22:46 UTC 
(rev 4347)
@@ -28,6 +28,7 @@
 package de.anomic.kelondro;
 
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Date;
 import java.util.HashMap;
 import java.util.Iterator;
@@ -353,6 +354,11 @@
         while (i.hasNext()) addUnique((Entry) i.next());
     }
 
+    public synchronized ArrayList<kelondroRowSet> removeDoubles() throws 
IOException {
+        return index.removeDoubles();
+        // todo: remove reported entries from the cache!!!
+    }
+    
     public synchronized Entry remove(byte[] key, boolean keepOrder) throws 
IOException {
         checkMissSpace();
         

Modified: trunk/source/de/anomic/kelondro/kelondroCollectionIndex.java
===================================================================
--- trunk/source/de/anomic/kelondro/kelondroCollectionIndex.java        
2008-01-19 12:23:56 UTC (rev 4346)
+++ trunk/source/de/anomic/kelondro/kelondroCollectionIndex.java        
2008-01-20 01:22:46 UTC (rev 4347)
@@ -221,8 +221,7 @@
                     ientry.setCol(idx_col_indexpos,   aentry.index());
                     ientry.setCol(idx_col_lastread,   t);
                     ientry.setCol(idx_col_lastwrote,  t);
-                    //index.addUnique(ientry); // FIXME: this should avoid 
doubles
-                    index.put(ientry);
+                    index.addUnique(ientry); // FIXME: this should avoid 
doubles
                     count++;
                     
                     // write a log
@@ -233,6 +232,35 @@
                 }
             }
         }
+        // care for double entries
+        ArrayList<kelondroRowSet> del = index.removeDoubles();
+        Iterator<kelondroRowSet> j = del.iterator();
+        kelondroRowSet rowset;
+        Iterator<kelondroRow.Entry> rowiter;
+        int partition, maxpartition;
+        kelondroRow.Entry entry, maxentry;
+        int doublecount = 0;
+        while (j.hasNext()) {
+            rowset = j.next();
+            // for each entry in row set choose one which we want to keep
+            rowiter = rowset.rows();
+            maxentry = null;
+            maxpartition = -1;
+            while (rowiter.hasNext()) {
+                entry = rowiter.next();
+                partition = (int) entry.getColLong(idx_col_clusteridx);
+                if (partition > maxpartition) {
+                    maxpartition = partition;
+                    maxentry = entry;
+                }
+            }
+            if (maxentry != null) {
+                // put back a single entry to the index, which is then not 
double to any other entry
+                index.put(maxentry);
+                doublecount++;
+            }
+        }
+        if (doublecount > 0) serverLog.logWarning("STARTUP", "found " + 
doublecount + " RWI entries with references to several collections. All have 
been fixed (zombies still exists).");
     }
     
     private kelondroIndex openIndexFile(File path, String filenameStub, 
kelondroByteOrder indexOrder,

Modified: trunk/source/de/anomic/kelondro/kelondroEcoTable.java
===================================================================
--- trunk/source/de/anomic/kelondro/kelondroEcoTable.java       2008-01-19 
12:23:56 UTC (rev 4346)
+++ trunk/source/de/anomic/kelondro/kelondroEcoTable.java       2008-01-20 
01:22:46 UTC (rev 4347)
@@ -28,12 +28,14 @@
 import java.io.FileNotFoundException;
 import java.io.FileOutputStream;
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Date;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.TreeMap;
+import java.util.TreeSet;
 
 import de.anomic.kelondro.kelondroRow.Entry;
 import de.anomic.server.serverMemory;
@@ -189,6 +191,34 @@
         assert file.size() == index.size() : "file.size() = " + file.size() + 
", index.size() = " + index.size();
     }
 
+    public synchronized ArrayList<kelondroRowSet> removeDoubles() throws 
IOException {
+        ArrayList<Integer[]> indexreport = index.removeDoubles();
+        ArrayList<kelondroRowSet> report = new ArrayList<kelondroRowSet>();
+        Iterator<Integer[]> i = indexreport.iterator();
+        Integer[] is;
+        kelondroRowSet rows;
+        TreeSet<Integer> d = new TreeSet<Integer>();
+        byte[] b = new byte[rowdef.objectsize];
+        while (i.hasNext()) {
+            is = i.next();
+            rows = new kelondroRowSet(this.rowdef, is.length);
+            for (int j = 0; j < is.length; j++) {
+                d.add(is[j]);
+                file.get(is[j].intValue(), b, 0);
+                rows.addUnique(rowdef.newEntry(b));
+            }
+            report.add(rows);
+        }
+        // finally delete the affected rows, but start with largest id first, 
othervise we overwrite wrong entries
+        Integer s;
+        while (d.size() > 0) {
+            s = d.last();
+            d.remove(s);
+            this.removeInFile(s.intValue());
+        }
+        return report;
+    }
+    
     public void close() {
         file.close();
         file = null;
@@ -201,7 +231,7 @@
     public String filename() {
         return this.file.filename().toString();
     }
-
+    
     public synchronized Entry get(byte[] key) throws IOException {
         assert file.size() == index.size() : "file.size() = " + file.size() + 
", index.size() = " + index.size();
         assert ((table == null) || (table.size() == index.size()));
@@ -281,6 +311,34 @@
         assert file.size() == index.size() : "file.size() = " + file.size() + 
", index.size() = " + index.size();
     }
 
+    private void removeInFile(int i) throws IOException {
+        assert i >= 0;
+        
+        byte[] p = new byte[rowdef.objectsize];
+        if (table == null) {
+            file.cleanLast(p, 0);
+            file.put(i, p, 0);
+            byte[] k = new byte[rowdef.primaryKeyLength];
+            System.arraycopy(p, 0, k, 0, rowdef.primaryKeyLength);
+            index.puti(k, i);
+        } else {
+            if (i == index.size() - 1) {
+                // special handling if the entry is the last entry in the file
+                table.removeRow(i, false);
+                file.clean(i);
+            } else {
+                // switch values
+                kelondroRow.Entry te = table.removeOne();
+                table.set(i, te);
+
+                file.cleanLast(p, 0);
+                file.put(i, p, 0);
+                kelondroRow.Entry lr = rowdef.newEntry(p);
+                index.puti(lr.getPrimaryKeyBytes(), i);
+            }
+        }
+    }
+    
     public synchronized Entry remove(byte[] key, boolean keepOrder) throws 
IOException {
         assert file.size() == index.size() : "file.size() = " + file.size() + 
", index.size() = " + index.size();
         assert ((table == null) || (table.size() == index.size()));

Modified: trunk/source/de/anomic/kelondro/kelondroFlexTable.java
===================================================================
--- trunk/source/de/anomic/kelondro/kelondroFlexTable.java      2008-01-19 
12:23:56 UTC (rev 4346)
+++ trunk/source/de/anomic/kelondro/kelondroFlexTable.java      2008-01-20 
01:22:46 UTC (rev 4347)
@@ -33,6 +33,7 @@
 import java.util.List;
 import java.util.Map;
 import java.util.TreeMap;
+import java.util.TreeSet;
 
 import de.anomic.server.serverMemory;
 import de.anomic.server.logging.serverLog;
@@ -172,9 +173,8 @@
         System.out.flush();
         return ri;
     }
-    
 
-        private kelondroIndex initializeTreeIndex(File indexfile, long 
preloadTime, kelondroByteOrder objectOrder) throws IOException {
+    private kelondroIndex initializeTreeIndex(File indexfile, long 
preloadTime, kelondroByteOrder objectOrder) throws IOException {
                kelondroIndex treeindex = new kelondroCache(new 
kelondroTree(indexfile, true, preloadTime, 
treeIndexRow(rowdef.primaryKeyLength, objectOrder), 2, 80));
                Iterator<kelondroNode> content = super.col[0].contentNodes(-1);
                kelondroNode node;
@@ -315,6 +315,32 @@
                
     }
     
+    public synchronized ArrayList<kelondroRowSet> removeDoubles() throws 
IOException {
+        ArrayList<Integer[]> indexreport = index.removeDoubles();
+        ArrayList<kelondroRowSet> report = new ArrayList<kelondroRowSet>();
+        Iterator<Integer[]> i = indexreport.iterator();
+        Integer[] is;
+        kelondroRowSet rows;
+        TreeSet<Integer> d = new TreeSet<Integer>();
+        while (i.hasNext()) {
+            is = i.next();
+            rows = new kelondroRowSet(this.rowdef, is.length);
+            for (int j = 0; j < is.length; j++) {
+                d.add(is[j]);
+                rows.addUnique(this.get(is[j].intValue()));
+            }
+            report.add(rows);
+        }
+        // finally delete the affected rows, but start with largest id first, 
othervise we overwrite wrong entries
+        Integer s;
+        while (d.size() > 0) {
+            s = d.last();
+            d.remove(s);
+            this.remove(s.intValue());
+        }
+        return report;
+    }
+    
     public synchronized kelondroRow.Entry remove(byte[] key, boolean 
keepOrder) throws IOException {
         assert keepOrder == false; // the underlying data structure is a file, 
where the order cannot be maintained. Gaps are filled with new values.
         int i = index.removei(key);

Modified: trunk/source/de/anomic/kelondro/kelondroIndex.java
===================================================================
--- trunk/source/de/anomic/kelondro/kelondroIndex.java  2008-01-19 12:23:56 UTC 
(rev 4346)
+++ trunk/source/de/anomic/kelondro/kelondroIndex.java  2008-01-20 01:22:46 UTC 
(rev 4347)
@@ -51,6 +51,7 @@
 package de.anomic.kelondro;
 
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Date;
 import java.util.List;
 
@@ -67,6 +68,7 @@
     public void putMultiple(List<kelondroRow.Entry> rows) throws IOException; 
// for R/W head path optimization
     public void addUnique(kelondroRow.Entry row) throws IOException; // no 
double-check
     public void addUniqueMultiple(List<kelondroRow.Entry> rows) throws 
IOException; // no double-check
+    public ArrayList<kelondroRowSet> removeDoubles() throws IOException; // 
removes all elements that are double (to be used after all addUnique)
     public kelondroRow.Entry remove(byte[] key, boolean keepOrder) throws 
IOException;
     public kelondroRow.Entry removeOne() throws IOException;
     public kelondroCloneableIterator<byte[]> keys(boolean up, byte[] firstKey) 
throws IOException; // iterates only the key

Modified: trunk/source/de/anomic/kelondro/kelondroRAMIndex.java
===================================================================
--- trunk/source/de/anomic/kelondro/kelondroRAMIndex.java       2008-01-19 
12:23:56 UTC (rev 4346)
+++ trunk/source/de/anomic/kelondro/kelondroRAMIndex.java       2008-01-20 
01:22:46 UTC (rev 4347)
@@ -24,7 +24,7 @@
 
 package de.anomic.kelondro;
 
-import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Date;
 import java.util.Iterator;
 import java.util.List;
@@ -65,7 +65,7 @@
         }
     }
     
-    public synchronized kelondroRow.Entry get(byte[] key) throws IOException {
+    public synchronized kelondroRow.Entry get(byte[] key) {
         assert (key != null);
         finishInitialization();
         kelondroRow.Entry indexentry = index0.get(key);
@@ -73,14 +73,14 @@
         return index1.get(key);
     }
 
-       public boolean has(byte[] key) throws IOException {
+       public boolean has(byte[] key) {
                assert (key != null);
         finishInitialization();
         if (index0.has(key)) return true;
         return index1.has(key);
        }
     
-    public synchronized kelondroRow.Entry put(kelondroRow.Entry entry) throws 
IOException {
+    public synchronized kelondroRow.Entry put(kelondroRow.Entry entry) {
        assert (entry != null);
        finishInitialization();
         // if the new entry is within the initialization part, just overwrite 
it
@@ -93,18 +93,18 @@
         return index1.put(entry);
     }
     
-       public Entry put(Entry row, Date entryDate) throws IOException {
+       public Entry put(Entry row, Date entryDate) {
                return put(row);
        }
        
-       public void putMultiple(List<Entry> rows) throws IOException {
+       public void putMultiple(List<Entry> rows) {
                Iterator<Entry> i = rows.iterator();
                while (i.hasNext()) {
                        put(i.next());
                }
        }
 
-       public synchronized void addUnique(kelondroRow.Entry entry) throws 
IOException {        
+       public synchronized void addUnique(kelondroRow.Entry entry) {           
        assert (entry != null);
         if (index1 == null) {
             // we are in the initialization phase
@@ -115,14 +115,20 @@
         }
     }
 
-       public void addUniqueMultiple(List<Entry> rows) throws IOException {
+       public void addUniqueMultiple(List<Entry> rows) {
                Iterator<Entry> i = rows.iterator();
                while (i.hasNext()) {
                        addUnique(i.next());
                }
        }
        
-    public synchronized kelondroRow.Entry remove(byte[] key, boolean 
keepOrder) throws IOException {
+       public synchronized ArrayList<kelondroRowSet> removeDoubles() {
+           // finish initialization phase explicitely
+           if (index1 == null) index1 = new kelondroRowSet(rowdef, 0);
+           return index0.removeDoubles();
+       }
+       
+    public synchronized kelondroRow.Entry remove(byte[] key, boolean 
keepOrder) {
         assert keepOrder == true; // if this is false, the index must be 
re-ordered so many times which will cause a major CPU usage
        finishInitialization();
         // if the new entry is within the initialization part, just delete it
@@ -135,7 +141,7 @@
         return index1.remove(key, keepOrder);
     }
 
-    public synchronized kelondroRow.Entry removeOne() throws IOException {
+    public synchronized kelondroRow.Entry removeOne() {
         if ((index1 != null) && (index1.size() != 0)) {
             return index1.removeOne();
         }
@@ -156,7 +162,7 @@
         return index0.size() + index1.size();
     }
     
-    public synchronized kelondroCloneableIterator<byte[]> keys(boolean up, 
byte[] firstKey) throws IOException {
+    public synchronized kelondroCloneableIterator<byte[]> keys(boolean up, 
byte[] firstKey) {
         // returns the key-iterator of the underlying kelondroIndex
         if (index1 == null) {
             // finish initialization phase
@@ -182,7 +188,7 @@
                 true);
     }
 
-    public synchronized kelondroCloneableIterator<kelondroRow.Entry> 
rows(boolean up, byte[] firstKey) throws IOException {
+    public synchronized kelondroCloneableIterator<kelondroRow.Entry> 
rows(boolean up, byte[] firstKey) {
         // returns the row-iterator of the underlying kelondroIndex
         if (index1 == null) {
             // finish initialization phase

Modified: trunk/source/de/anomic/kelondro/kelondroRowCollection.java
===================================================================
--- trunk/source/de/anomic/kelondro/kelondroRowCollection.java  2008-01-19 
12:23:56 UTC (rev 4346)
+++ trunk/source/de/anomic/kelondro/kelondroRowCollection.java  2008-01-20 
01:22:46 UTC (rev 4347)
@@ -26,6 +26,7 @@
 
 import java.io.File;
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Random;
@@ -583,9 +584,9 @@
         try {
             while (i >= 0) {
                 if (compare(i, i + 1) == 0) {
-                    removeRow(i, false);
+                    removeRow(i + 1, false);
                     d++;
-                    if (i < chunkcount - 2) u = false;
+                    if (i + 1 < chunkcount - 1) u = false;
                 }
                 i--;
                 if (System.currentTimeMillis() - t > 10000) {
@@ -599,6 +600,45 @@
         }
     }
     
+    public synchronized ArrayList<kelondroRowSet> removeDoubles() {
+        assert (this.rowdef.objectOrder != null);
+        // removes double-occurrences of chunks
+        // in contrast to uniq() this removes also the remaining, non-double 
entry that had a double-occurrance to the others
+        // all removed chunks are returned in an array
+        this.sort();
+        ArrayList<kelondroRowSet> report = new ArrayList<kelondroRowSet>();
+        if (chunkcount < 2) return report;
+        int i = chunkcount - 2;
+        int d = 0;
+        boolean u = true;
+        kelondroRowSet collection = new kelondroRowSet(this.rowdef, 2);
+        try {
+            while (i >= 0) {
+                if (compare(i, i + 1) == 0) {
+                    collection.addUnique(get(i + 1));
+                    removeRow(i + 1, false);
+                    d++;
+                    if (i + 1 < chunkcount - 1) u = false;
+                } else if (collection.size() > 0) {
+                    // finish collection of double occurrences
+                    collection.addUnique(get(i + 1));
+                    removeRow(i + 1, false);
+                    d++;
+                    if (i + 1 < chunkcount - 1) u = false;
+                    collection.trim(false);
+                    report.add(collection);
+                    collection = new kelondroRowSet(this.rowdef, 2);
+                }
+                i--;
+            }
+        } catch (RuntimeException e) {
+            serverLog.logWarning("kelondroRowCollection", e.getMessage(), e);
+        } finally {
+            if (!u) this.sort();
+        }
+        return report;
+    }
+    
     public synchronized boolean isSorted() {
         assert (this.rowdef.objectOrder != null);
         if (chunkcount <= 1) return true;
@@ -711,7 +751,18 @@
                        kelondroBase64Order.enhancedCoder, 0);
        
        kelondroRowCollection a = new kelondroRowCollection(r, testsize);
+       a.add("AAAAAAAAAAAA".getBytes());
+       a.add("BBBBBBBBBBBB".getBytes());
+       a.add("BBBBBBBBBBBB".getBytes());
+       a.add("BBBBBBBBBBBB".getBytes());
+       a.add("CCCCCCCCCCCC".getBytes());
+       ArrayList<kelondroRowSet> del = a.removeDoubles();
+       System.out.println(del + "rows double");
+       Iterator<kelondroRow.Entry> j = a.rows();
+       while (j.hasNext()) System.out.println(new String(j.next().bytes()));
+       
         System.out.println("kelondroRowCollection test with size = " + 
testsize);
+        a = new kelondroRowCollection(r, testsize);
         long t0 = System.currentTimeMillis();
         random = new Random(0);
         for (int i = 0; i < testsize; i++) a.add(randomHash().getBytes());
@@ -807,40 +858,4 @@
         System.out.println(daysSince2000(System.currentTimeMillis()));
         */
     }
-    
-    /*
-kelondroRowCollection test with size = 10000
-create c   : 134 milliseconds, 74 entries/millisecond
-copy c -> d: 47 milliseconds, 212 entries/millisecond
-sort c (1) : 66 milliseconds, 151 entries/millisecond
-sort d (2) : 23 milliseconds, 434 entries/millisecond
-uniq c     : 3 milliseconds, 3333 entries/millisecond
-uniq d     : 2 milliseconds, 5000 entries/millisecond
-create e   : 528 milliseconds, 18 entries/millisecond
-sort e (2) : 13 milliseconds, 769 entries/millisecond
-uniq e     : 2 milliseconds, 5000 entries/millisecond
-c isSorted = true: 2 milliseconds
-d isSorted = true: 3 milliseconds
-e isSorted = true: 2 milliseconds
-e allfound = true: 85 milliseconds
-e noghosts = true: 75 milliseconds
-Result size: c = 10000, d = 10000, e = 10000
-
-kelondroRowCollection test with size = 100000
-create c   : 589 milliseconds, 169 entries/millisecond
-copy c -> d: 141 milliseconds, 709 entries/millisecond
-sort c (1) : 268 milliseconds, 373 entries/millisecond
-sort d (2) : 187 milliseconds, 534 entries/millisecond
-uniq c     : 13 milliseconds, 7692 entries/millisecond
-uniq d     : 14 milliseconds, 7142 entries/millisecond
-create e   : 22068 milliseconds, 4 entries/millisecond
-sort e (2) : 167 milliseconds, 598 entries/millisecond
-uniq e     : 14 milliseconds, 7142 entries/millisecond
-c isSorted = true: 13 milliseconds
-d isSorted = true: 14 milliseconds
-e isSorted = true: 13 milliseconds
-e allfound = true: 815 milliseconds
-e noghosts = true: 787 milliseconds
-Result size: c = 100000, d = 100000, e = 100000
-     */
 }
\ No newline at end of file

Modified: trunk/source/de/anomic/kelondro/kelondroRowSet.java
===================================================================
--- trunk/source/de/anomic/kelondro/kelondroRowSet.java 2008-01-19 12:23:56 UTC 
(rev 4346)
+++ trunk/source/de/anomic/kelondro/kelondroRowSet.java 2008-01-20 01:22:46 UTC 
(rev 4347)
@@ -24,7 +24,6 @@
 
 package de.anomic.kelondro;
 
-import java.io.IOException;
 import java.util.Date;
 import java.util.Iterator;
 import java.util.List;
@@ -76,7 +75,7 @@
                this.profile = new kelondroProfile();
        }
    
-    public synchronized boolean has(byte[] key) throws IOException {
+    public synchronized boolean has(byte[] key) {
         return (get(key) != null);
     }
     
@@ -92,7 +91,7 @@
         return entry;
     }
     
-    public synchronized void putMultiple(List<kelondroRow.Entry> rows) throws 
IOException {
+    public synchronized void putMultiple(List<kelondroRow.Entry> rows) {
         Iterator<kelondroRow.Entry> i = rows.iterator();
         while (i.hasNext()) put(i.next());
     }

Modified: trunk/source/de/anomic/kelondro/kelondroSQLTable.java
===================================================================
--- trunk/source/de/anomic/kelondro/kelondroSQLTable.java       2008-01-19 
12:23:56 UTC (rev 4346)
+++ trunk/source/de/anomic/kelondro/kelondroSQLTable.java       2008-01-20 
01:22:46 UTC (rev 4347)
@@ -32,6 +32,7 @@
 import java.sql.PreparedStatement;
 import java.sql.ResultSet;
 import java.sql.SQLException;
+import java.util.ArrayList;
 import java.util.Date;
 import java.util.Iterator;
 import java.util.List;
@@ -140,6 +141,10 @@
         return (get(key) != null);
     }
     
+    public ArrayList<kelondroRowSet> removeDoubles() {
+        return new ArrayList<kelondroRowSet>();
+    }
+    
     public kelondroRow.Entry get(byte[] key) throws IOException {
         try {
             String sqlQuery = new String

Modified: trunk/source/de/anomic/kelondro/kelondroSplitTable.java
===================================================================
--- trunk/source/de/anomic/kelondro/kelondroSplitTable.java     2008-01-19 
12:23:56 UTC (rev 4346)
+++ trunk/source/de/anomic/kelondro/kelondroSplitTable.java     2008-01-20 
01:22:46 UTC (rev 4347)
@@ -28,6 +28,7 @@
 
 import java.io.File;
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Calendar;
 import java.util.Date;
 import java.util.HashMap;
@@ -292,6 +293,15 @@
         while (i.hasNext()) addUnique(i.next(), entryDate);
     }
     
+    public ArrayList<kelondroRowSet> removeDoubles() throws IOException {
+        Iterator<kelondroIndex> i = tables.values().iterator();
+        ArrayList<kelondroRowSet> report = new ArrayList<kelondroRowSet>();
+        while (i.hasNext()) {
+            report.addAll(i.next().removeDoubles());
+        }
+        return report;
+    }
+    
     public synchronized kelondroRow.Entry remove(byte[] key, boolean 
keepOrder) throws IOException {
         Iterator<kelondroIndex> i = tables.values().iterator();
         kelondroIndex table;

Deleted: trunk/source/de/anomic/kelondro/kelondroSplittedTree.java
===================================================================
--- trunk/source/de/anomic/kelondro/kelondroSplittedTree.java   2008-01-19 
12:23:56 UTC (rev 4346)
+++ trunk/source/de/anomic/kelondro/kelondroSplittedTree.java   2008-01-20 
01:22:46 UTC (rev 4347)
@@ -1,344 +0,0 @@
-// kelondroSplittedTree.java
-// -------------------------
-// part of The Kelondro Database
-// (C) by Michael Peter Christen; [EMAIL PROTECTED]
-// first published on http://www.anomic.de
-// Frankfurt, Germany, 2006
-// created 07.01.2006
-//
-// $LastChangedDate: 2005-09-22 22:01:26 +0200 (Thu, 22 Sep 2005) $
-// $LastChangedRevision: 774 $
-// $LastChangedBy: orbiter $
-//
-// This program is free software; you can redistribute it and/or modify
-// it under the terms of the GNU General Public License as published by
-// the Free Software Foundation; either version 2 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-//
-// Using this software in any meaning (reading, learning, copying, compiling,
-// running) means that you agree that the Author(s) is (are) not responsible
-// for cost, loss of data or any harm that may be caused directly or indirectly
-// by usage of this softare or this documentation. The usage of this software
-// is on your own risk. The installation and usage (starting/running) of this
-// software may allow other people or application to access your computer and
-// any attached devices and is highly dependent on the configuration of the
-// software which must be done by the user of the software; the author(s) is
-// (are) also not responsible for proper configuration and usage of the
-// software, even if provoked by documentation provided together with
-// the software.
-//
-// Any changes to this file according to the GPL as documented in the file
-// gpl.txt aside this file in the shipment you received can be done to the
-// lines that follows this copyright notice here, but changes must not be
-// done inside the copyright notive above. A re-distribution must contain
-// the intact and unchanged copyright notice.
-// Contributions and changes to the program code must be marked as such.
-
-package de.anomic.kelondro;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Date;
-import java.util.Iterator;
-import java.util.List;
-
-public class kelondroSplittedTree implements kelondroIndex {
-
-    private kelondroTree[] ktfs;
-    private kelondroByteOrder order;
-    private int ff;
-    private String filename;
-    
-    private static File dbFile(File path, String filenameStub, int forkfactor, 
int columns, int number) {
-        String ns = Integer.toHexString(number).toUpperCase();
-        while (ns.length() < 2) ns = "0" + ns;
-        String ff = Integer.toHexString(forkfactor).toUpperCase();
-        while (ff.length() < 2) ff = "0" + ff;
-        String co = Integer.toHexString(columns).toUpperCase();
-        while (co.length() < 2) co = "0" + co;
-        return new File(path, filenameStub + "." + ff + "." + co + "." + ns + 
".ktc");
-    }
-    
-    public kelondroSplittedTree(File pathToFiles, String filenameStub, 
kelondroByteOrder objectOrder,
-                            long preloadTime,
-                            int forkfactor, kelondroRow rowdef, int txtProps, 
int txtPropsWidth) {
-        try {
-            this.filename = new File(pathToFiles, 
filenameStub).getCanonicalPath();
-        } catch (IOException e) {
-            this.filename = null;
-        }
-        ktfs = new kelondroTree[forkfactor];
-        File f;
-        for (int i = 0; i < forkfactor; i++) {
-            f = dbFile(pathToFiles, filenameStub, forkfactor, 
rowdef.columns(), i);
-            ktfs[i] = kelondroTree.open(f, true, preloadTime / forkfactor, 
rowdef, txtProps, txtPropsWidth);
-        }
-        this.order = objectOrder;
-        ff = forkfactor;
-    }
-    
-    public void reset() throws IOException {
-       for (int i = 0; i < ktfs.length; i++) {
-            ktfs[i].reset();
-        }
-    }
-    
-    public void close() {
-        for (int i = 0; i < ktfs.length; i++) ktfs[i].close();
-    }
-    
-    public int size() {
-        return ktfs[0].size();
-    }
-    
-    public kelondroRow row() {
-        return ktfs[0].row();
-    }
-    
-    private int partition(byte[] key) {
-        // return number of db file where this key should be managed
-        return (int) order.partition(key, ff);
-    }
-    
-    public boolean has(byte[] key) throws IOException {
-        throw new UnsupportedOperationException("has should not be used with 
kelondroSplittedTree.");
-    }
-    
-    public kelondroRow.Entry get(byte[] key) throws IOException {
-        return ktfs[partition(key)].get(key);
-    }
-
-       @SuppressWarnings("unchecked")
-    public synchronized void putMultiple(List<kelondroRow.Entry> rows) throws 
IOException {
-        Iterator<kelondroRow.Entry> i = rows.iterator();
-        kelondroRow.Entry row;
-        ArrayList<kelondroRow.Entry>[] parts = new ArrayList[ktfs.length];
-        for (int j = 0; j < ktfs.length; j++) parts[j] = new 
ArrayList<kelondroRow.Entry>();
-        while (i.hasNext()) {
-            row = i.next();
-            parts[partition(row.getColBytes(0))].add(row);
-        }
-        for (int j = 0; j < ktfs.length; j++) ktfs[j].putMultiple(parts[j]);
-    }
-    
-    public kelondroRow.Entry put(kelondroRow.Entry row, Date entryDate) throws 
IOException {
-        return put(row);
-    }
-    
-    public synchronized void addUnique(kelondroRow.Entry row) throws 
IOException {
-        throw new UnsupportedOperationException();
-    }
-    
-    public synchronized void addUnique(kelondroRow.Entry row, Date entryDate) {
-        throw new UnsupportedOperationException();
-    }
-    
-    public synchronized void addUniqueMultiple(List<kelondroRow.Entry> rows) 
throws IOException {
-        throw new UnsupportedOperationException();
-    }
-    
-    public kelondroRow.Entry put(kelondroRow.Entry row) throws IOException {
-        return ktfs[partition(row.getColBytes(0))].put(row);
-    }
-
-    public kelondroRow.Entry remove(byte[] key, boolean keepOrder) throws 
IOException {
-        return ktfs[partition(key)].remove(key, keepOrder);
-    }
-    
-    public kelondroRow.Entry removeOne() throws IOException {
-        // removes one entry from the partition with the most entries
-        int maxc = -1, maxi = 0;
-        for (int i = 0; i < ktfs.length; i++) {
-            if (ktfs[i].size() > maxc) {
-                maxc = ktfs[i].size();
-                maxi = i;
-            }
-        }
-        if (maxc > 0) {
-            return ktfs[maxi].removeOne();
-        } else {
-            return null;
-        }
-    }
-    
-    public kelondroCloneableIterator<kelondroRow.Entry> rows(boolean up, 
byte[] firstKey) throws IOException {
-        return new rowIterator(up, firstKey);
-    }
-    
-    public class rowIterator implements 
kelondroCloneableIterator<kelondroRow.Entry> {
-
-        int c = 0;
-        Iterator<kelondroRow.Entry> ktfsI;
-        boolean up;
-        
-        public rowIterator(boolean up, byte[] firstKey) throws IOException {
-            this.up = up;
-            c = (up) ? 0 : (ff - 1);
-            if (firstKey != null) throw new 
UnsupportedOperationException("ktfsIterator does not work with a start key");
-            ktfsI = ktfs[c].rows(up, firstKey); // FIXME: this works only 
correct with firstKey == null
-        }
-        
-               public rowIterator clone(Object secondKey) {
-            try {
-                return new rowIterator(up, (byte[]) secondKey);
-            } catch (IOException e) {
-                return null;
-            }
-        }
-        
-        public boolean hasNext() {
-            return ((ktfsI.hasNext()) ||
-                    ((up) && (c < ff)) ||
-                    ((!(up)) && (c > 0)));
-        }
-
-        public kelondroRow.Entry next() {
-            if (ktfsI.hasNext()) return ktfsI.next();
-            if (up) {
-                if (c < (ff - 1)) {
-                    c++;
-                    try {
-                        ktfsI = ktfs[c].rows(true, null);
-                    } catch (IOException e) {
-                        return null;
-                    }
-                    return ktfsI.next();
-                } else {
-                    return null;
-                }
-            } else {
-                if (c > 0) {
-                    c--;
-                    try {
-                        ktfsI = ktfs[c].rows(false, null);
-                    } catch (IOException e) {
-                        return null;
-                    }
-                    return ktfsI.next();
-                } else {
-                    return null;
-                }
-            }
-        }
-
-        public void remove() {
-            ktfsI.remove();
-        }
-        
-    }
-
-       public kelondroCloneableIterator<byte[]> keys(boolean up, byte[] 
firstKey) throws IOException {
-               return new keyIterator(up, firstKey);
-       }
-       
-    public class keyIterator implements kelondroCloneableIterator<byte[]> {
-
-        int c = 0;
-        Iterator<byte[]> ktfsI;
-        boolean up;
-        
-        public keyIterator(boolean up, byte[] firstKey) throws IOException {
-            this.up = up;
-            c = (up) ? 0 : (ff - 1);
-            if (firstKey != null) throw new 
UnsupportedOperationException("ktfsIterator does not work with a start key");
-            ktfsI = ktfs[c].keys(up, firstKey); // FIXME: this works only 
correct with firstKey == null
-        }
-        
-               public keyIterator clone(Object secondKey) {
-            try {
-                return new keyIterator(up, (byte[]) secondKey);
-            } catch (IOException e) {
-                return null;
-            }
-        }
-        
-        public boolean hasNext() {
-            return ((ktfsI.hasNext()) ||
-                    ((up) && (c < ff)) ||
-                    ((!(up)) && (c > 0)));
-        }
-
-        public byte[] next() {
-            if (ktfsI.hasNext()) return ktfsI.next();
-            if (up) {
-                if (c < (ff - 1)) {
-                    c++;
-                    try {
-                        ktfsI = ktfs[c].keys(true, null);
-                    } catch (IOException e) {
-                        return null;
-                    }
-                    return ktfsI.next();
-                } else {
-                    return null;
-                }
-            } else {
-                if (c > 0) {
-                    c--;
-                    try {
-                        ktfsI = ktfs[c].keys(false, null);
-                    } catch (IOException e) {
-                        return null;
-                    }
-                    return ktfsI.next();
-                } else {
-                    return null;
-                }
-            }
-        }
-
-        public void remove() {
-            ktfsI.remove();
-        }
-        
-    }
-    
-    public kelondroByteOrder order() {
-        return this.order;
-    }
-
-    public int primarykey() {
-        return 0;
-    }
-
-    public kelondroProfile profile() {
-        kelondroProfile[] profiles = new kelondroProfile[ktfs.length];
-        for (int i = 0; i < ktfs.length; i++) profiles[i] = ktfs[i].profile();
-        return kelondroProfile.consolidate(profiles);
-    }
-    
-    public final int cacheObjectChunkSize() {
-        // dummy method
-        return -1;
-    }
-    
-    public long[] cacheObjectStatus() {
-        // dummy method
-        return null;
-    }
-    
-    public final int cacheNodeChunkSize() {
-        // returns the size that the node cache uses for a single entry
-        return -1;
-    }
-    
-    public final int[] cacheNodeStatus() {
-        // a collection of different node cache status values
-        return new int[]{0,0,0,0,0,0,0,0,0,0};
-    }
-
-    public String filename() {
-        return this.filename;
-    }
-
-}

Modified: trunk/source/de/anomic/kelondro/kelondroTree.java
===================================================================
--- trunk/source/de/anomic/kelondro/kelondroTree.java   2008-01-19 12:23:56 UTC 
(rev 4346)
+++ trunk/source/de/anomic/kelondro/kelondroTree.java   2008-01-20 01:22:46 UTC 
(rev 4347)
@@ -50,6 +50,7 @@
 import java.io.FileReader;
 import java.io.IOException;
 import java.io.RandomAccessFile;
+import java.util.ArrayList;
 import java.util.Comparator;
 import java.util.Date;
 import java.util.HashSet;
@@ -186,7 +187,12 @@
         }
         return result;
     }
-
+    
+    public ArrayList<kelondroRowSet> removeDoubles() {
+        // this data structure cannot have doubles; return empty array
+        return new ArrayList<kelondroRowSet>();
+    }
+    
     public class Search {
 
         // a search object combines the results of a search in the tree, which 
are

_______________________________________________
YaCy-svn mailing list
[email protected]
https://lists.berlios.de/mailman/listinfo/yacy-svn

Antwort per Email an