Author: orbiter
Date: 2008-01-23 12:13:39 +0100 (Wed, 23 Jan 2008)
New Revision: 4375
Modified:
trunk/source/de/anomic/kelondro/kelondroEcoTable.java
trunk/source/de/anomic/kelondro/kelondroRowSet.java
trunk/source/de/anomic/plasma/plasmaSnippetCache.java
Log:
- fix double-deletion in eco tables
- changed behaviour of sort moment (not during a get)
- added some asserts in snippet cache for debugging
Modified: trunk/source/de/anomic/kelondro/kelondroEcoTable.java
===================================================================
--- trunk/source/de/anomic/kelondro/kelondroEcoTable.java 2008-01-23
00:10:15 UTC (rev 4374)
+++ trunk/source/de/anomic/kelondro/kelondroEcoTable.java 2008-01-23
11:13:39 UTC (rev 4375)
@@ -145,13 +145,22 @@
index.addi(key, ds[0].intValue());
}
// then remove the other doubles by removing them from the
table, but do a re-indexing while doing that
+ // first aggregate all the delete positions because the
elements from the top positions must be removed first
i = doubles.iterator();
+ TreeSet<Integer> delpos = new TreeSet<Integer>();
while (i.hasNext()) {
ds = i.next();
for (int j = 1; j < ds.length; j++) {
- removeInFile(ds[j].intValue());
+ delpos.add(ds[j]);
}
}
+ // now remove the entries in a sorted way (top-down)
+ Integer top;
+ while (delpos.size() > 0) {
+ top = delpos.last();
+ delpos.remove(top);
+ removeInFile(top.intValue());
+ }
}
} catch (FileNotFoundException e) {
// should never happen
Modified: trunk/source/de/anomic/kelondro/kelondroRowSet.java
===================================================================
--- trunk/source/de/anomic/kelondro/kelondroRowSet.java 2008-01-23 00:10:15 UTC
(rev 4374)
+++ trunk/source/de/anomic/kelondro/kelondroRowSet.java 2008-01-23 11:13:39 UTC
(rev 4375)
@@ -110,6 +110,10 @@
index = find(entry.bytes(), (rowdef.primaryKeyIndex < 0) ? 0
:super.rowdef.colstart[rowdef.primaryKeyIndex], super.rowdef.primaryKeyLength);
if (index < 0) {
super.addUnique(entry);
+ // when reaching a specific amount of un-sorted entries, re-sort
all
+ if ((this.chunkcount - this.sortBound) > collectionReSortLimit) {
+ sort();
+ }
} else {
oldentry = get(index);
set(index, entry);
@@ -140,10 +144,6 @@
if (rowdef.objectOrder == null) return iterativeSearch(a, astart,
alength, 0, this.chunkcount);
- // check if a re-sorting makes sense
- if ((this.chunkcount - this.sortBound) > collectionReSortLimit) {
- sort();
- }
if ((this.rowdef.objectOrder != null) && (this.rowdef.objectOrder
instanceof kelondroBase64Order) && (this.sortBound > 4000)) {
// first try to find in sorted area
final byte[] compiledPivot = compilePivot(a, astart, alength);
Modified: trunk/source/de/anomic/plasma/plasmaSnippetCache.java
===================================================================
--- trunk/source/de/anomic/plasma/plasmaSnippetCache.java 2008-01-23
00:10:15 UTC (rev 4374)
+++ trunk/source/de/anomic/plasma/plasmaSnippetCache.java 2008-01-23
11:13:39 UTC (rev 4375)
@@ -867,6 +867,10 @@
log.logInfo("error: '" + snippet.getError() + "', remove url = " +
snippet.getUrl().toNormalform(false, true) + ", cause: " + snippet.getError());
plasmaSwitchboard.getSwitchboard().wordIndex.loadedURL.remove(urlHash);
plasmaSearchEvent event = plasmaSearchEvent.getEvent(eventID);
+ assert plasmaSwitchboard.getSwitchboard() != null;
+ assert plasmaSwitchboard.getSwitchboard().wordIndex != null;
+ assert event != null;
+ assert event.getQuery() != null;
plasmaSwitchboard.getSwitchboard().wordIndex.removeEntryMultiple(event.getQuery().queryHashes,
urlHash);
event.remove(urlHash);
}
_______________________________________________
YaCy-svn mailing list
[email protected]
https://lists.berlios.de/mailman/listinfo/yacy-svn