Author: orbiter
Date: 2008-01-30 01:15:43 +0100 (Wed, 30 Jan 2008)
New Revision: 4419
Modified:
trunk/source/de/anomic/index/indexRWIEntryOrder.java
trunk/source/de/anomic/index/indexRWIVarEntry.java
trunk/source/de/anomic/index/indexURLEntry.java
trunk/source/de/anomic/plasma/plasmaCrawlLURL.java
trunk/source/de/anomic/plasma/plasmaDHTChunk.java
trunk/source/de/anomic/plasma/plasmaSearchEvent.java
trunk/source/de/anomic/plasma/plasmaSearchRankingProcess.java
trunk/source/de/anomic/plasma/plasmaSnippetCache.java
trunk/source/de/anomic/plasma/plasmaWordIndex.java
Log:
fixed bug in remote search
Modified: trunk/source/de/anomic/index/indexRWIEntryOrder.java
===================================================================
--- trunk/source/de/anomic/index/indexRWIEntryOrder.java 2008-01-29
23:07:59 UTC (rev 4418)
+++ trunk/source/de/anomic/index/indexRWIEntryOrder.java 2008-01-30
00:15:43 UTC (rev 4419)
@@ -115,6 +115,10 @@
return cardinal(new indexRWIVarEntry(new indexRWIRowEntry(key)));
}
+ public long cardinal(indexRWIRowEntry t) {
+ return cardinal(new indexRWIVarEntry(t));
+ }
+
public long cardinal(indexRWIVarEntry t) {
//return Long.MAX_VALUE - preRanking(ranking, iEntry, this.entryMin,
this.entryMax, this.searchWords);
// the normalizedEntry must be a normalized indexEntry
Modified: trunk/source/de/anomic/index/indexRWIVarEntry.java
===================================================================
--- trunk/source/de/anomic/index/indexRWIVarEntry.java 2008-01-29 23:07:59 UTC
(rev 4418)
+++ trunk/source/de/anomic/index/indexRWIVarEntry.java 2008-01-30 00:15:43 UTC
(rev 4419)
@@ -89,12 +89,12 @@
}
public boolean isNewer(indexRWIEntry other) {
- // TODO Auto-generated method stub
+ assert false; // should not be used
return false;
}
public boolean isOlder(indexRWIEntry other) {
- // TODO Auto-generated method stub
+ assert false; // should not be used
return false;
}
@@ -131,12 +131,12 @@
}
public Entry toKelondroEntry() {
- // TODO Auto-generated method stub
+ assert false; // should not be used
return null;
}
public String toPropertyForm() {
- // TODO Auto-generated method stub
+ assert false; // should not be used
return null;
}
Modified: trunk/source/de/anomic/index/indexURLEntry.java
===================================================================
--- trunk/source/de/anomic/index/indexURLEntry.java 2008-01-29 23:07:59 UTC
(rev 4418)
+++ trunk/source/de/anomic/index/indexURLEntry.java 2008-01-30 00:15:43 UTC
(rev 4419)
@@ -115,7 +115,7 @@
private kelondroRow.Entry entry;
private String snippet;
- private indexRWIEntry word; // this is only used if the url is transported
via remote search requests
+ private indexRWIRowEntry word; // this is only used if the url is
transported via remote search requests
private long ranking; // during generation of a search result this value
is set
public indexURLEntry(
@@ -185,7 +185,7 @@
return s.toString().getBytes();
}
- public indexURLEntry(kelondroRow.Entry entry, indexRWIEntry searchedWord,
long ranking) {
+ public indexURLEntry(kelondroRow.Entry entry, indexRWIRowEntry
searchedWord, long ranking) {
this.entry = entry;
this.snippet = null;
this.word = searchedWord;
@@ -287,7 +287,7 @@
// serverLog.logFailure("plasmaLURL.corePropList",
e.getMessage());
// if (moddate == null)
serverLog.logFailure("plasmaLURL.corePropList", "moddate=null");
// if (loaddate == null)
serverLog.logFailure("plasmaLURL.corePropList", "loaddate=null");
- // e.printStackTrace();
+ e.printStackTrace();
return null;
}
}
@@ -391,7 +391,7 @@
return snippet;
}
- public indexRWIEntry word() {
+ public indexRWIRowEntry word() {
return word;
}
Modified: trunk/source/de/anomic/plasma/plasmaCrawlLURL.java
===================================================================
--- trunk/source/de/anomic/plasma/plasmaCrawlLURL.java 2008-01-29 23:07:59 UTC
(rev 4418)
+++ trunk/source/de/anomic/plasma/plasmaCrawlLURL.java 2008-01-30 00:15:43 UTC
(rev 4419)
@@ -66,7 +66,7 @@
import de.anomic.data.htmlTools;
import de.anomic.http.httpc;
import de.anomic.http.httpc.response;
-import de.anomic.index.indexRWIEntry;
+import de.anomic.index.indexRWIRowEntry;
import de.anomic.index.indexURLEntry;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroCache;
@@ -153,7 +153,7 @@
return 0;
}
- public synchronized indexURLEntry load(String urlHash, indexRWIEntry
searchedWord, long ranking) {
+ public synchronized indexURLEntry load(String urlHash, indexRWIRowEntry
searchedWord, long ranking) {
// generates an plasmaLURLEntry using the url hash
// to speed up the access, the url-hashes are buffered
// in the hash cache.
Modified: trunk/source/de/anomic/plasma/plasmaDHTChunk.java
===================================================================
--- trunk/source/de/anomic/plasma/plasmaDHTChunk.java 2008-01-29 23:07:59 UTC
(rev 4418)
+++ trunk/source/de/anomic/plasma/plasmaDHTChunk.java 2008-01-30 00:15:43 UTC
(rev 4419)
@@ -213,7 +213,7 @@
final Iterator<indexContainer> indexContainerIterator =
wordIndex.indexContainerSet(hash, ram, true, maxcount).iterator();
indexContainer container;
Iterator<indexRWIRowEntry> urlIter;
- indexRWIEntry iEntry;
+ indexRWIRowEntry iEntry;
indexURLEntry lurl;
int refcount = 0;
int wholesize;
@@ -243,7 +243,7 @@
// CPU & IO reduce
// try { Thread.sleep(50); } catch
(InterruptedException e) { }
- iEntry = (indexRWIEntry) urlIter.next();
+ iEntry = urlIter.next();
if ((iEntry == null) || (iEntry.urlHash() == null)) {
urlIter.remove();
continue;
@@ -263,7 +263,7 @@
// remove all remaining; we have enough
while (urlIter.hasNext()) {
- iEntry = (indexRWIEntry) urlIter.next();
+ iEntry = urlIter.next();
urlIter.remove();
}
Modified: trunk/source/de/anomic/plasma/plasmaSearchEvent.java
===================================================================
--- trunk/source/de/anomic/plasma/plasmaSearchEvent.java 2008-01-29
23:07:59 UTC (rev 4418)
+++ trunk/source/de/anomic/plasma/plasmaSearchEvent.java 2008-01-30
00:15:43 UTC (rev 4419)
@@ -347,7 +347,7 @@
if (query.contentdom == plasmaSearchQuery.CONTENTDOM_TEXT) {
// attach text snippet
startTime = System.currentTimeMillis();
- plasmaSnippetCache.TextSnippet snippet =
plasmaSnippetCache.retrieveTextSnippet(comp.url(), snippetFetchWordHashes,
(snippetFetchMode == 2), ((query.constraint != null) &&
(query.constraint.get(plasmaCondenser.flag_cat_indexof))), 180, 3000,
(snippetFetchMode == 2) ? Integer.MAX_VALUE : 100000);
+ plasmaSnippetCache.TextSnippet snippet =
plasmaSnippetCache.retrieveTextSnippet(comp, snippetFetchWordHashes,
(snippetFetchMode == 2), ((query.constraint != null) &&
(query.constraint.get(plasmaCondenser.flag_cat_indexof))), 180, 3000,
(snippetFetchMode == 2) ? Integer.MAX_VALUE : 100000);
long snippetComputationTime = System.currentTimeMillis() -
startTime;
serverLog.logInfo("SEARCH_EVENT", "text snippet load time for " +
comp.url() + ": " + snippetComputationTime + ", " + ((snippet.getErrorCode() <
11) ? "snippet found" : ("no snippet found (" + snippet.getError() + ")")));
Modified: trunk/source/de/anomic/plasma/plasmaSearchRankingProcess.java
===================================================================
--- trunk/source/de/anomic/plasma/plasmaSearchRankingProcess.java
2008-01-29 23:07:59 UTC (rev 4418)
+++ trunk/source/de/anomic/plasma/plasmaSearchRankingProcess.java
2008-01-30 00:15:43 UTC (rev 4419)
@@ -40,7 +40,6 @@
import de.anomic.index.indexRWIEntry;
import de.anomic.index.indexRWIEntryOrder;
import de.anomic.index.indexRWIRowEntry;
-import de.anomic.index.indexRWIVarEntry;
import de.anomic.index.indexURLEntry;
import de.anomic.kelondro.kelondroBinSearch;
import de.anomic.kelondro.kelondroMScoreCluster;
@@ -53,8 +52,8 @@
public static kelondroBinSearch[] ybrTables = null; // block-rank tables
private static boolean useYBR = true;
- private TreeMap<Object, indexRWIEntry> sortedRWIEntries; // key = ranking
(Long); value = indexRWIEntry; if sortorder < 2 then key is instance of String
- private HashMap<String, TreeMap<Object, indexRWIEntry>> doubleDomCache; //
key = domhash (6 bytes); value = TreeMap like sortedRWIEntries
+ private TreeMap<Object, indexRWIRowEntry> sortedRWIEntries; // key =
ranking (Long); value = indexRWIEntry; if sortorder < 2 then key is instance of
String
+ private HashMap<String, TreeMap<Object, indexRWIRowEntry>> doubleDomCache;
// key = domhash (6 bytes); value = TreeMap like sortedRWIEntries
private HashMap<String, String> handover; // key = urlhash, value =
urlstring; used for double-check of urls that had been handed over to search
process
private plasmaSearchQuery query;
private int sortorder;
@@ -74,8 +73,8 @@
// attention: if minEntries is too high, this method will not
terminate within the maxTime
// sortorder: 0 = hash, 1 = url, 2 = ranking
this.localSearchContainerMaps = null;
- this.sortedRWIEntries = new TreeMap<Object, indexRWIEntry>();
- this.doubleDomCache = new HashMap<String, TreeMap<Object,
indexRWIEntry>>();
+ this.sortedRWIEntries = new TreeMap<Object, indexRWIRowEntry>();
+ this.doubleDomCache = new HashMap<String, TreeMap<Object,
indexRWIRowEntry>>();
this.handover = new HashMap<String, String>();
this.filteredCount = 0;
this.order = null;
@@ -124,11 +123,11 @@
final Iterator<indexRWIRowEntry> en = index.entries();
// generate a new map where the urls are sorted (not by hash but by
the url text)
- indexRWIEntry ientry;
+ indexRWIRowEntry ientry;
indexURLEntry uentry;
String u;
loop: while (en.hasNext()) {
- ientry = (indexRWIEntry) en.next();
+ ientry = en.next();
// check constraints
if (!testFlags(ientry)) continue loop;
@@ -181,12 +180,12 @@
// normalize entries and get ranking
timer = System.currentTimeMillis();
Iterator<indexRWIRowEntry> i = index.entries();
- indexRWIVarEntry iEntry, l;
+ indexRWIRowEntry iEntry, l;
long biggestEntry = 0;
//long s0 = System.currentTimeMillis();
Long r;
while (i.hasNext()) {
- iEntry = new indexRWIVarEntry(i.next());
+ iEntry = i.next();
if (iEntry.urlHash().length() != index.row().primaryKeyLength)
continue;
// increase flag counts
@@ -216,11 +215,11 @@
continue;
} else {
if (urlhashes.containsKey(iEntry.urlHash())) continue;
- l = (indexRWIVarEntry) sortedRWIEntries.remove((Long)
sortedRWIEntries.lastKey());
+ l = sortedRWIEntries.remove((Long)
sortedRWIEntries.lastKey());
urlhashes.remove(l.urlHash());
while (sortedRWIEntries.containsKey(r)) r = new
Long(r.longValue() + 1);
sortedRWIEntries.put(r, iEntry);
- biggestEntry = order.cardinal((indexRWIVarEntry)
sortedRWIEntries.get(sortedRWIEntries.lastKey()));
+ biggestEntry =
order.cardinal(sortedRWIEntries.get(sortedRWIEntries.lastKey()));
}
}
@@ -267,18 +266,18 @@
private synchronized Object[] /*{Object, indexRWIEntry}*/ bestRWI(boolean
skipDoubleDom) {
// returns from the current RWI list the best entry and removed this
entry from the list
Object bestEntry;
- TreeMap<Object, indexRWIEntry> m;
- indexRWIEntry rwi;
+ TreeMap<Object, indexRWIRowEntry> m;
+ indexRWIRowEntry rwi;
while (sortedRWIEntries.size() > 0) {
bestEntry = sortedRWIEntries.firstKey();
- rwi = (indexRWIEntry) sortedRWIEntries.remove(bestEntry);
+ rwi = sortedRWIEntries.remove(bestEntry);
if (!skipDoubleDom) return new Object[]{bestEntry, rwi};
// check doubledom
String domhash = rwi.urlHash().substring(6);
- m = (TreeMap<Object, indexRWIEntry>)
this.doubleDomCache.get(domhash);
+ m = this.doubleDomCache.get(domhash);
if (m == null) {
// first appearance of dom
- m = new TreeMap<Object, indexRWIEntry>();
+ m = new TreeMap<Object, indexRWIRowEntry>();
this.doubleDomCache.put(domhash, m);
return new Object[]{bestEntry, rwi};
}
@@ -287,20 +286,20 @@
}
// no more entries in sorted RWI entries. Now take Elements from the
doubleDomCache
// find best entry from all caches
- Iterator<TreeMap<Object, indexRWIEntry>> i =
this.doubleDomCache.values().iterator();
+ Iterator<TreeMap<Object, indexRWIRowEntry>> i =
this.doubleDomCache.values().iterator();
bestEntry = null;
Object o;
- indexRWIEntry bestrwi = null;
+ indexRWIRowEntry bestrwi = null;
while (i.hasNext()) {
m = i.next();
if (m.size() == 0) continue;
if (bestEntry == null) {
bestEntry = m.firstKey();
- bestrwi = (indexRWIEntry) m.remove(bestEntry);
+ bestrwi = m.remove(bestEntry);
continue;
}
o = m.firstKey();
- rwi = (indexRWIEntry) m.remove(o);
+ rwi = m.remove(o);
if (o instanceof Long) {
if (((Long) o).longValue() < ((Long) bestEntry).longValue()) {
bestEntry = o;
@@ -326,7 +325,7 @@
while ((sortedRWIEntries.size() > 0) || (size() > 0)) {
Object[] obrwi = bestRWI(skipDoubleDom);
Object bestEntry = obrwi[0];
- indexRWIEntry ientry = (indexRWIEntry) obrwi[1];
+ indexRWIRowEntry ientry = (indexRWIRowEntry) obrwi[1];
long ranking = (bestEntry instanceof Long) ? ((Long)
bestEntry).longValue() : 0;
indexURLEntry u = wordIndex.loadedURL.load(ientry.urlHash(),
ientry, ranking);
if (u != null) {
@@ -342,7 +341,7 @@
public synchronized int size() {
//assert sortedRWIEntries.size() == urlhashes.size() :
"sortedRWIEntries.size() = " + sortedRWIEntries.size() + ", urlhashes.size() =
" + urlhashes.size();
int c = sortedRWIEntries.size();
- Iterator<TreeMap<Object, indexRWIEntry>> i =
this.doubleDomCache.values().iterator();
+ Iterator<TreeMap<Object, indexRWIRowEntry>> i =
this.doubleDomCache.values().iterator();
while (i.hasNext()) c += i.next().size();
return c;
}
Modified: trunk/source/de/anomic/plasma/plasmaSnippetCache.java
===================================================================
--- trunk/source/de/anomic/plasma/plasmaSnippetCache.java 2008-01-29
23:07:59 UTC (rev 4418)
+++ trunk/source/de/anomic/plasma/plasmaSnippetCache.java 2008-01-30
00:15:43 UTC (rev 4419)
@@ -59,6 +59,7 @@
import de.anomic.htmlFilter.htmlFilterImageEntry;
import de.anomic.http.httpHeader;
import de.anomic.http.httpc;
+import de.anomic.index.indexURLEntry;
import de.anomic.kelondro.kelondroMScoreCluster;
import de.anomic.kelondro.kelondroMSetTools;
import de.anomic.plasma.cache.IResourceInfo;
@@ -246,9 +247,9 @@
}
@SuppressWarnings("unchecked")
- public static TextSnippet retrieveTextSnippet(yacyURL url, Set<String>
queryhashes, boolean fetchOnline, boolean pre, int snippetMaxLength, int
timeout, int maxDocLen) {
+ public static TextSnippet retrieveTextSnippet(indexURLEntry.Components
comp, Set<String> queryhashes, boolean fetchOnline, boolean pre, int
snippetMaxLength, int timeout, int maxDocLen) {
// heise = "0OQUNU3JSs05"
-
+ yacyURL url = comp.url();
if (queryhashes.size() == 0) {
//System.out.println("found no queryhashes for URL retrieve " +
url);
return new TextSnippet(url, null, ERROR_NO_HASH_GIVEN,
queryhashes, "no query hashes given");
@@ -258,8 +259,8 @@
int source = SOURCE_CACHE;
String wordhashes = yacySearch.set2string(queryhashes);
String line = retrieveFromCache(wordhashes, url.hash());
- if (line != null) {
- //System.out.println("found snippet for URL " + url + " in cache:
" + line);
+ if (line != null) {
+ // found the snippet
return new TextSnippet(url, line, source, null, null,
faviconCache.get(url.hash()));
}
@@ -279,7 +280,11 @@
if ((resContentLength > maxDocLen) && (!fetchOnline)) {
// content may be too large to be parsed here. To be fast,
we omit calculation of snippet here
return new TextSnippet(url, null, ERROR_SOURCE_LOADING,
queryhashes, "resource available, but too large: " + resContentLength + "
bytes");
- }
+ }/*
+ } else if (url.) {
+ // try to create the snippet from information given in the url
itself
+ */
+
} else if (fetchOnline) {
// if not found try to download it
@@ -342,7 +347,7 @@
if (sentences == null) return new TextSnippet(url, null,
ERROR_PARSER_NO_LINES, queryhashes, "parser returned no sentences",resFavicon);
Object[] tsr = computeTextSnippet(sentences, queryhashes,
snippetMaxLength);
String textline = (tsr == null) ? null : (String) tsr[0];
- Set<String> remainingHashes = (tsr == null) ? queryhashes : (Set)
tsr[1];
+ Set<String> remainingHashes = (tsr == null) ? queryhashes :
(Set<String>) tsr[1];
// compute snippet from media
String audioline = computeMediaSnippet(document.getAudiolinks(),
queryhashes);
Modified: trunk/source/de/anomic/plasma/plasmaWordIndex.java
===================================================================
--- trunk/source/de/anomic/plasma/plasmaWordIndex.java 2008-01-29 23:07:59 UTC
(rev 4418)
+++ trunk/source/de/anomic/plasma/plasmaWordIndex.java 2008-01-30 00:15:43 UTC
(rev 4419)
@@ -598,7 +598,7 @@
public void run() {
serverLog.logInfo("INDEXCLEANER", "IndexCleaner-Thread started");
indexContainer container = null;
- indexRWIEntry entry = null;
+ indexRWIRowEntry entry = null;
yacyURL url = null;
HashSet<String> urlHashs = new HashSet<String>();
Iterator<indexContainer> indexContainerIterator =
indexContainerSet(startHash, false, false, 100).iterator();
@@ -609,7 +609,7 @@
wordHashNow = container.getWordHash();
while (containerIterator.hasNext() && run) {
waiter();
- entry = (indexRWIEntry) containerIterator.next();
+ entry = containerIterator.next();
// System.out.println("Wordhash: "+wordHash+" UrlHash:
// "+entry.getUrlHash());
indexURLEntry ue = lurl.load(entry.urlHash(), entry, 0);
_______________________________________________
YaCy-svn mailing list
[email protected]
https://lists.berlios.de/mailman/listinfo/yacy-svn