Author: j16sdiz
Date: 2008-12-29 01:19:19 +0000 (Mon, 29 Dec 2008)
New Revision: 24813
Modified:
trunk/plugins/XMLSpider/XMLSpider.java
Log:
cache all TermPosition, store them only when page finish
Modified: trunk/plugins/XMLSpider/XMLSpider.java
===================================================================
--- trunk/plugins/XMLSpider/XMLSpider.java 2008-12-29 01:19:03 UTC (rev
24812)
+++ trunk/plugins/XMLSpider/XMLSpider.java 2008-12-29 01:19:19 UTC (rev
24813)
@@ -423,18 +423,6 @@
FreenetURI uri = state.getURI();
try {
- // Page may be refetched if added manually
- // Delete existing TermPosition
- Query query = db.query();
- query.constrain(TermPosition.class);
- query.descend("pageId").constrain(page.id);
- @SuppressWarnings("unchecked")
- ObjectSet<TermPosition> set = query.execute();
- for (TermPosition tp : set) {
- assert tp.pageId == page.id;
- db.delete(tp);
- }
-
ClientMetadata cm = result.getMetadata();
Bucket data = result.asBucket();
String mimeType = cm.getMIMEType();
@@ -723,7 +711,7 @@
if (word.length() < 3)
return;
Term term = getTermByWord(word, true);
- TermPosition termPos = getTermPosition(term, true);
+ TermPosition termPos = getTermPosition(term);
synchronized (termPos) {
int[] newPositions = new
int[termPos.positions.length + 1];
@@ -734,57 +722,37 @@
}
}
- @SuppressWarnings("serial")
- protected Map<Term, TermPosition> termPosCache = new
LinkedHashMap<Term, TermPosition>() {
- protected boolean removeEldestEntry(Map.Entry<Term,
TermPosition> eldest) {
- if (size() < 1024) return false;
-
- db.store(eldest.getValue());
- return true;
- }
- };
+ protected Map<Term, TermPosition> termPosCache = new
HashMap<Term, TermPosition>();
public void store() {
+ // Delete existing TermPosition
+ Query query = db.query();
+ query.constrain(TermPosition.class);
+ query.descend("pageId").constrain(page.id);
+ @SuppressWarnings("unchecked")
+ ObjectSet<TermPosition> set = query.execute();
+ for (TermPosition tp : set) {
+ assert tp.pageId == page.id;
+ db.delete(tp);
+ }
+
for (TermPosition tp : termPosCache.values())
db.store(tp);
termPosCache.clear();
}
- protected TermPosition getTermPosition(Term term, boolean
create) {
- synchronized (term) {
- TermPosition cachedTermPos =
termPosCache.get(term);
- if (cachedTermPos != null)
- return cachedTermPos;
+ protected TermPosition getTermPosition(Term term) {
+ TermPosition cachedTermPos = termPosCache.get(term);
+ if (cachedTermPos != null)
+ return cachedTermPos;
- synchronized (page) {
- Query query = db.query();
- query.constrain(TermPosition.class);
+ cachedTermPos = new TermPosition();
+ cachedTermPos.word = term.word;
+ cachedTermPos.pageId = page.id;
+ cachedTermPos.positions = new int[0];
-
query.descend("word").constrain(term.word);
-
query.descend("pageId").constrain(page.id);
- @SuppressWarnings("unchecked")
- ObjectSet<TermPosition> set =
query.execute();
-
- if (set.hasNext()) {
- cachedTermPos = set.next();
- assert
term.word.equals(cachedTermPos.word);
- assert cachedTermPos.pageId ==
page.id;
- termPosCache.put(term,
cachedTermPos);
- return cachedTermPos;
- } else if (create) {
- cachedTermPos = new
TermPosition();
- cachedTermPos.word = term.word;
- cachedTermPos.pageId = page.id;
- cachedTermPos.positions = new
int[0];
-
- termPosCache.put(term,
cachedTermPos);
- db.store(cachedTermPos);
- return cachedTermPos;
- } else {
- return null;
- }
- }
- }
+ termPosCache.put(term, cachedTermPos);
+ return cachedTermPos;
}
}
_______________________________________________
cvs mailing list
[email protected]
http://emu.freenetproject.org/cgi-bin/mailman/listinfo/cvs