Author: orbiter
Date: 2008-02-07 23:16:36 +0100 (Thu, 07 Feb 2008)
New Revision: 4459
Modified:
trunk/htroot/BlogComments.java
trunk/htroot/CrawlProfileEditor_p.java
trunk/htroot/Settings_p.java
trunk/htroot/TestApplet.java
trunk/htroot/User.java
trunk/htroot/ViewImage.java
trunk/htroot/Wiki.java
trunk/htroot/www/welcome.java
trunk/htroot/yacy/hello.java
trunk/htroot/yacy/search.java
trunk/htroot/yacy/transfer.java
trunk/htroot/yacysearch.java
trunk/source/de/anomic/data/userDB.java
trunk/source/de/anomic/http/httpSSI.java
trunk/source/de/anomic/http/httpd.java
trunk/source/de/anomic/http/httpdFileHandler.java
trunk/source/de/anomic/index/indexRWIEntry.java
trunk/source/de/anomic/index/indexRWIEntryOrder.java
trunk/source/de/anomic/index/indexRWIRowEntry.java
trunk/source/de/anomic/index/indexRWIVarEntry.java
trunk/source/de/anomic/index/indexURLEntry.java
trunk/source/de/anomic/kelondro/kelondroSplitTable.java
trunk/source/de/anomic/plasma/plasmaCrawlLURL.java
trunk/source/de/anomic/plasma/plasmaCrawlZURL.java
trunk/source/de/anomic/plasma/plasmaSearchRankingProcess.java
trunk/source/de/anomic/plasma/plasmaSnippetCache.java
trunk/source/de/anomic/plasma/plasmaSwitchboard.java
trunk/source/de/anomic/plasma/plasmaWordIndex.java
Log:
- faster search: using different data structures that avoid multiplr
calculations
- no more table copy for error-eco table
- optional table copy for lurl-entries
- more abstractions (less single constant strings)
- better logging (using host names instead of ips)
Modified: trunk/htroot/BlogComments.java
===================================================================
--- trunk/htroot/BlogComments.java 2008-02-07 20:35:24 UTC (rev 4458)
+++ trunk/htroot/BlogComments.java 2008-02-07 22:16:36 UTC (rev 4459)
@@ -102,7 +102,7 @@
}
String pagename = post.get("page", "blog_default");
- String ip = post.get("CLIENTIP", "127.0.0.1");
+ String ip = post.get(httpHeader.CONNECTION_PROP_CLIENTIP, "127.0.0.1");
String StrAuthor = post.get("author", "anonymous");
Modified: trunk/htroot/CrawlProfileEditor_p.java
===================================================================
--- trunk/htroot/CrawlProfileEditor_p.java 2008-02-07 20:35:24 UTC (rev
4458)
+++ trunk/htroot/CrawlProfileEditor_p.java 2008-02-07 22:16:36 UTC (rev
4459)
@@ -105,9 +105,9 @@
while (it.hasNext()) {
selentry = (entry)it.next();
if (selentry.name().equals(plasmaSwitchboard.CRAWL_PROFILE_PROXY)
||
-
selentry.name().equals(plasmaSwitchboard.CRAWL_PROFILE_REMOTE) ||
+
selentry.name().equals(plasmaSwitchboard.CRAWL_PROFILE_REMOTE) /*||
selentry.name().equals(plasmaSwitchboard.CRAWL_PROFILE_SNIPPET_TEXT) ||
-
selentry.name().equals(plasmaSwitchboard.CRAWL_PROFILE_SNIPPET_MEDIA))
+
selentry.name().equals(plasmaSwitchboard.CRAWL_PROFILE_SNIPPET_MEDIA)*/)
continue;
prop.put("profiles_" + count + "_name", selentry.name());
prop.put("profiles_" + count + "_handle", selentry.handle());
Modified: trunk/htroot/Settings_p.java
===================================================================
--- trunk/htroot/Settings_p.java 2008-02-07 20:35:24 UTC (rev 4458)
+++ trunk/htroot/Settings_p.java 2008-02-07 22:16:36 UTC (rev 4459)
@@ -212,7 +212,7 @@
}
// clientIP
- prop.putHTML("clientIP", (String) header.get("CLIENTIP", "<unknown>"),
true); // read an artificial header addendum
+ prop.putHTML("clientIP", (String)
header.get(httpHeader.CONNECTION_PROP_CLIENTIP, "<unknown>"), true); // read an
artificial header addendum
/*
* seed upload settings
Modified: trunk/htroot/TestApplet.java
===================================================================
--- trunk/htroot/TestApplet.java 2008-02-07 20:35:24 UTC (rev 4458)
+++ trunk/htroot/TestApplet.java 2008-02-07 22:16:36 UTC (rev 4459)
@@ -50,7 +50,7 @@
//File templatefile=filehandler.getOverlayedFile((String)post.get("url"));
File classfile =
httpdFileHandler.getOverlayedClass((String)post.get("url"));
httpHeader header2=new httpHeader();
- header2.put("CLIENTIP", "127.0.0.1");
+ header2.put(httpHeader.CONNECTION_PROP_CLIENTIP, "127.0.0.1");
header2.put("PATH", post.get("url"));
serverObjects tp=null;
try {
Modified: trunk/htroot/User.java
===================================================================
--- trunk/htroot/User.java 2008-02-07 20:35:24 UTC (rev 4458)
+++ trunk/htroot/User.java 2008-02-07 22:16:36 UTC (rev 4459)
@@ -79,7 +79,7 @@
prop.put("logged-in_identified-by", "2");
//try via ip
if(entry == null){
- entry=sb.userDB.ipAuth(((String)header.get("CLIENTIP",
"xxxxxx")));
+
entry=sb.userDB.ipAuth(((String)header.get(httpHeader.CONNECTION_PROP_CLIENTIP,
"xxxxxx")));
if(entry != null){
prop.put("logged-in_identified-by", "0");
}
@@ -108,7 +108,7 @@
//identified via form-login
//TODO: this does not work for a static admin, yet.
}else if(post != null && post.containsKey("username") &&
post.containsKey("password")){
- //entry=sb.userDB.passwordAuth((String)post.get("username"),
(String)post.get("password"), (String)header.get("CLIENTIP", "xxxxxx"));
+ //entry=sb.userDB.passwordAuth((String)post.get("username"),
(String)post.get("password"),
(String)header.get(httpHeader.CONNECTION_PROP_CLIENTIP, "xxxxxx"));
String username=(String)post.get("username");
String password=(String)post.get("password");
@@ -163,7 +163,7 @@
if(post!=null && post.containsKey("logout")){
prop.put("logged-in", "0");
if(entry != null){
- entry.logout(((String)header.get("CLIENTIP", "xxxxxx")),
userDB.getLoginToken(header.getHeaderCookies())); //todo: logout cookie
+
entry.logout(((String)header.get(httpHeader.CONNECTION_PROP_CLIENTIP,
"xxxxxx")), userDB.getLoginToken(header.getHeaderCookies())); //todo: logout
cookie
}else{
sb.userDB.adminLogout(userDB.getLoginToken(header.getHeaderCookies()));
}
Modified: trunk/htroot/ViewImage.java
===================================================================
--- trunk/htroot/ViewImage.java 2008-02-07 20:35:24 UTC (rev 4458)
+++ trunk/htroot/ViewImage.java 2008-02-07 22:16:36 UTC (rev 4459)
@@ -72,7 +72,7 @@
String urlString = post.get("url", "");
String urlLicense = post.get("code", "");
- boolean auth = ((String) header.get("CLIENTIP",
"")).equals("localhost") || sb.verifyAuthentication(header, true); // handle
access rights
+ boolean auth = ((String)
header.get(httpHeader.CONNECTION_PROP_CLIENTIP, "")).equals("localhost") ||
sb.verifyAuthentication(header, true); // handle access rights
yacyURL url = null;
if ((urlString.length() > 0) && (auth)) try {
Modified: trunk/htroot/Wiki.java
===================================================================
--- trunk/htroot/Wiki.java 2008-02-07 20:35:24 UTC (rev 4458)
+++ trunk/htroot/Wiki.java 2008-02-07 22:16:36 UTC (rev 4459)
@@ -88,7 +88,7 @@
String access = switchboard.getConfig("WikiAccess", "admin");
String pagename = post.get("page", "start");
- String ip = post.get("CLIENTIP", "127.0.0.1");
+ String ip = post.get(httpHeader.CONNECTION_PROP_CLIENTIP, "127.0.0.1");
String author = post.get("author", "anonymous");
if (author.equals("anonymous")) {
author = wikiBoard.guessAuthor(ip);
Modified: trunk/htroot/www/welcome.java
===================================================================
--- trunk/htroot/www/welcome.java 2008-02-07 20:35:24 UTC (rev 4458)
+++ trunk/htroot/www/welcome.java 2008-02-07 22:16:36 UTC (rev 4459)
@@ -78,7 +78,7 @@
prop.put("hostip", "Unknown Host Exception");
}
prop.put("port", serverCore.getPortNr(env.getConfig("port","8080")));
- prop.put("clientip", (String) header.get("CLIENTIP", ""));
+ prop.put("clientip", (String)
header.get(httpHeader.CONNECTION_PROP_CLIENTIP, ""));
final String peertype = (yacyCore.seedDB.mySeed() == null) ?
yacySeed.PEERTYPE_JUNIOR : yacyCore.seedDB.mySeed().get(yacySeed.PEERTYPE,
yacySeed.PEERTYPE_VIRGIN);
final boolean senior = (peertype.equals(yacySeed.PEERTYPE_SENIOR)) ||
(peertype.equals(yacySeed.PEERTYPE_PRINCIPAL));
Modified: trunk/htroot/yacy/hello.java
===================================================================
--- trunk/htroot/yacy/hello.java 2008-02-07 20:35:24 UTC (rev 4458)
+++ trunk/htroot/yacy/hello.java 2008-02-07 22:16:36 UTC (rev 4459)
@@ -103,7 +103,7 @@
// if ((properTest != null) && (!
properTest.substring(0,1).equals("IP"))) { return null; }
// we easily know the caller's IP:
- final String clientip = (String) header.get("CLIENTIP", "<unknown>");
// read an artificial header addendum
+ final String clientip = (String)
header.get(httpHeader.CONNECTION_PROP_CLIENTIP, "<unknown>"); // read an
artificial header addendum
InetAddress ias = serverDomains.dnsResolve(clientip);
if (ias == null) {
prop.put("message", "cannot resolve your IP from your reported
location " + clientip);
Modified: trunk/htroot/yacy/search.java
===================================================================
--- trunk/htroot/yacy/search.java 2008-02-07 20:35:24 UTC (rev 4458)
+++ trunk/htroot/yacy/search.java 2008-02-07 22:16:36 UTC (rev 4459)
@@ -282,7 +282,7 @@
// prepare search statistics
Long trackerHandle = new Long(System.currentTimeMillis());
HashMap<String, Object> searchProfile =
theQuery.resultProfile(joincount, System.currentTimeMillis() - timestamp,
urlRetrievalAllTime, snippetComputationAllTime);
- String client = (String) header.get("CLIENTIP");
+ String client = (String)
header.get(httpHeader.CONNECTION_PROP_CLIENTIP);
searchProfile.put("host", client);
yacySeed remotepeer =
yacyCore.seedDB.lookupByIP(natLib.getInetAddress(client), true, false, false);
searchProfile.put("peername", (remotepeer == null) ? "unknown" :
remotepeer.getName());
Modified: trunk/htroot/yacy/transfer.java
===================================================================
--- trunk/htroot/yacy/transfer.java 2008-02-07 20:35:24 UTC (rev 4458)
+++ trunk/htroot/yacy/transfer.java 2008-02-07 22:16:36 UTC (rev 4459)
@@ -89,14 +89,14 @@
final yacySeed opeer = yacyCore.seedDB.get(ohash);
if (opeer == null) {
// reject unknown peers: this does not appear fair, but anonymous
senders are dangerous
- sb.getLog().logFine("RankingTransmission: rejected unknown peer '"
+ ohash + "', current IP " + header.get("CLIENTIP", "unknown"));
+ sb.getLog().logFine("RankingTransmission: rejected unknown peer '"
+ ohash + "', current IP " + header.get(httpHeader.CONNECTION_PROP_CLIENTIP,
"unknown"));
return prop;
}
opeer.setLastSeenUTC();
if (filename.indexOf("..") >= 0) {
// reject paths that contain '..' because they are dangerous
- sb.getLog().logFine("RankingTransmission: rejected wrong path '" +
filename + "' from peer " + opeer.getName() + "/" + opeer.getPublicAddress()+
", current IP " + header.get("CLIENTIP", "unknown"));
+ sb.getLog().logFine("RankingTransmission: rejected wrong path '" +
filename + "' from peer " + opeer.getName() + "/" + opeer.getPublicAddress()+
", current IP " + header.get(httpHeader.CONNECTION_PROP_CLIENTIP, "unknown"));
return prop;
}
Modified: trunk/htroot/yacysearch.java
===================================================================
--- trunk/htroot/yacysearch.java 2008-02-07 20:35:24 UTC (rev 4458)
+++ trunk/htroot/yacysearch.java 2008-02-07 22:16:36 UTC (rev 4459)
@@ -257,7 +257,7 @@
constraint,
true);
- String client = (String) header.get("CLIENTIP"); // the search
client who initiated the search
+ String client = (String)
header.get(httpHeader.CONNECTION_PROP_CLIENTIP); // the search client who
initiated the search
// tell all threads to do nothing for a specific time
sb.intermissionAllThreads(10000);
Modified: trunk/source/de/anomic/data/userDB.java
===================================================================
--- trunk/source/de/anomic/data/userDB.java 2008-02-07 20:35:24 UTC (rev
4458)
+++ trunk/source/de/anomic/data/userDB.java 2008-02-07 22:16:36 UTC (rev
4459)
@@ -155,7 +155,7 @@
return null;
}
public Entry getUser(httpHeader header){
- return getUser((String) header.get(httpHeader.AUTHORIZATION),
(String)header.get("CLIENTIP"), header.getHeaderCookies());
+ return getUser((String) header.get(httpHeader.AUTHORIZATION),
(String)header.get(httpHeader.CONNECTION_PROP_CLIENTIP),
header.getHeaderCookies());
}
public Entry getUser(String auth, String ip, String cookies){
Entry entry=null;
Modified: trunk/source/de/anomic/http/httpSSI.java
===================================================================
--- trunk/source/de/anomic/http/httpSSI.java 2008-02-07 20:35:24 UTC (rev
4458)
+++ trunk/source/de/anomic/http/httpSSI.java 2008-02-07 22:16:36 UTC (rev
4459)
@@ -85,7 +85,7 @@
conProp.setProperty(httpHeader.CONNECTION_PROP_PATH, path);
conProp.setProperty(httpHeader.CONNECTION_PROP_ARGS, args);
conProp.setProperty(httpHeader.CONNECTION_PROP_HTTP_VER,
httpHeader.HTTP_VERSION_0_9);
- conProp.setProperty("CLIENTIP", "127.0.0.1");
+ conProp.setProperty(httpHeader.CONNECTION_PROP_CLIENTIP, "127.0.0.1");
header.put(httpHeader.AUTHORIZATION, authorization);
httpdFileHandler.doGet(conProp, header, out);
}
Modified: trunk/source/de/anomic/http/httpd.java
===================================================================
--- trunk/source/de/anomic/http/httpd.java 2008-02-07 20:35:24 UTC (rev
4458)
+++ trunk/source/de/anomic/http/httpd.java 2008-02-07 22:16:36 UTC (rev
4459)
@@ -193,7 +193,7 @@
public void initSession(serverCore.Session newsession) throws IOException {
this.session = newsession;
this.userAddress = session.userAddress; // client InetAddress
- this.clientIP = this.userAddress.getHostAddress();
+ this.clientIP = this.userAddress.getHostName();
if (this.userAddress.isAnyLocalAddress()) this.clientIP = "localhost";
if (this.clientIP.equals("0:0:0:0:0:0:0:1")) this.clientIP =
"localhost";
if (this.clientIP.equals("127.0.0.1")) this.clientIP = "localhost";
@@ -1147,7 +1147,7 @@
// tp.put("host", serverCore.publicIP().getHostAddress());
// tp.put("port", switchboard.getConfig("port", "8080"));
- String clientIP =
conProp.getProperty(httpHeader.CONNECTION_PROP_CLIENTIP,"127.0.0.1");
+ String clientIP =
conProp.getProperty(httpHeader.CONNECTION_PROP_CLIENTIP, "127.0.0.1");
// check if ip is local ip address
InetAddress hostAddress = serverDomains.dnsResolve(clientIP);
Modified: trunk/source/de/anomic/http/httpdFileHandler.java
===================================================================
--- trunk/source/de/anomic/http/httpdFileHandler.java 2008-02-07 20:35:24 UTC
(rev 4458)
+++ trunk/source/de/anomic/http/httpdFileHandler.java 2008-02-07 22:16:36 UTC
(rev 4459)
@@ -303,13 +303,13 @@
if ((path.substring(0,(pos==-1)?path.length():pos)).endsWith("_p")
&& (adminAccountBase64MD5.length() != 0)) {
//authentication required
//userDB
- if(sb.userDB.hasAdminRight(authorization,
conProp.getProperty("CLIENTIP"), requestHeader.getHeaderCookies())){
+ if(sb.userDB.hasAdminRight(authorization,
conProp.getProperty(httpHeader.CONNECTION_PROP_CLIENTIP),
requestHeader.getHeaderCookies())){
//Authentication successful. remove brute-force flag
- serverCore.bfHost.remove(conProp.getProperty("CLIENTIP"));
+
serverCore.bfHost.remove(conProp.getProperty(httpHeader.CONNECTION_PROP_CLIENTIP));
//static
}else if(authorization != null &&
httpd.staticAdminAuthenticated(authorization.trim().substring(6),
switchboard)==4){
//Authentication successful. remove brute-force flag
- serverCore.bfHost.remove(conProp.getProperty("CLIENTIP"));
+
serverCore.bfHost.remove(conProp.getProperty(httpHeader.CONNECTION_PROP_CLIENTIP));
//no auth
}else if (authorization == null) {
// no authorization given in response. Ask for that
@@ -323,7 +323,7 @@
return;
} else {
// a wrong authentication was given or the userDB user
does not have admin access. Ask again
- String clientIP = conProp.getProperty("CLIENTIP",
"unknown-host");
+ String clientIP =
conProp.getProperty(httpHeader.CONNECTION_PROP_CLIENTIP, "unknown-host");
serverLog.logInfo("HTTPD", "Wrong log-in for account
'admin' in http file handler for path '" + path + "' from host '" + clientIP +
"'");
Integer attempts = (Integer)
serverCore.bfHost.get(clientIP);
if (attempts == null)
@@ -473,7 +473,7 @@
// call an image-servlet to produce an on-the-fly - generated
image
Object img = null;
try {
- requestHeader.put(httpHeader.CONNECTION_PROP_CLIENTIP,
conProp.getProperty("CLIENTIP"));
+ requestHeader.put(httpHeader.CONNECTION_PROP_CLIENTIP,
conProp.getProperty(httpHeader.CONNECTION_PROP_CLIENTIP));
requestHeader.put(httpHeader.CONNECTION_PROP_PATH, path);
// in case that there are no args given, args = null or
empty hashmap
img = invokeServlet(targetClass, requestHeader, args);
@@ -527,7 +527,7 @@
}
} else if ((targetClass != null) && (path.endsWith(".stream"))) {
// call rewrite-class
- requestHeader.put(httpHeader.CONNECTION_PROP_CLIENTIP,
conProp.getProperty("CLIENTIP"));
+ requestHeader.put(httpHeader.CONNECTION_PROP_CLIENTIP,
conProp.getProperty(httpHeader.CONNECTION_PROP_CLIENTIP));
requestHeader.put(httpHeader.CONNECTION_PROP_PATH, path);
//requestHeader.put(httpHeader.CONNECTION_PROP_INPUTSTREAM,
body);
//requestHeader.put(httpHeader.CONNECTION_PROP_OUTPUTSTREAM,
out);
@@ -570,7 +570,7 @@
} else {
// CGI-class: call the class to create a property for
rewriting
try {
-
requestHeader.put(httpHeader.CONNECTION_PROP_CLIENTIP,
conProp.getProperty("CLIENTIP"));
+
requestHeader.put(httpHeader.CONNECTION_PROP_CLIENTIP,
conProp.getProperty(httpHeader.CONNECTION_PROP_CLIENTIP));
requestHeader.put(httpHeader.CONNECTION_PROP_PATH,
path);
// in case that there are no args given, args =
null or empty hashmap
Object tmp = invokeServlet(targetClass,
requestHeader, args);
@@ -586,7 +586,7 @@
if
(tp.containsKey(servletProperties.ACTION_AUTHENTICATE)) {
// handle brute-force protection
if (authorization != null) {
- String clientIP =
conProp.getProperty("CLIENTIP", "unknown-host");
+ String clientIP =
conProp.getProperty(httpHeader.CONNECTION_PROP_CLIENTIP, "unknown-host");
serverLog.logInfo("HTTPD", "dynamic log-in
for account 'admin' in http file handler for path '" + path + "' from host '" +
clientIP + "'");
Integer attempts = (Integer)
serverCore.bfHost.get(clientIP);
if (attempts == null)
Modified: trunk/source/de/anomic/index/indexRWIEntry.java
===================================================================
--- trunk/source/de/anomic/index/indexRWIEntry.java 2008-02-07 20:35:24 UTC
(rev 4458)
+++ trunk/source/de/anomic/index/indexRWIEntry.java 2008-02-07 22:16:36 UTC
(rev 4459)
@@ -47,8 +47,6 @@
public String urlHash();
- public int quality();
-
public int virtualAge();
public long lastModified();
Modified: trunk/source/de/anomic/index/indexRWIEntryOrder.java
===================================================================
--- trunk/source/de/anomic/index/indexRWIEntryOrder.java 2008-02-07
20:35:24 UTC (rev 4458)
+++ trunk/source/de/anomic/index/indexRWIEntryOrder.java 2008-02-07
22:16:36 UTC (rev 4459)
@@ -26,6 +26,7 @@
package de.anomic.index;
+import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
@@ -55,12 +56,13 @@
this.maxdomcount = 0;
}
- public void normalizeWith(indexContainer container) {
+ public ArrayList<indexRWIVarEntry> normalizeWith(indexContainer container)
{
// normalize ranking: find minimum and maxiumum of separate ranking
criteria
assert (container != null);
-
+ ArrayList<indexRWIVarEntry> result = null;
+
//long s0 = System.currentTimeMillis();
- if ((processors > 1) && (container.size() > 10000)) {
+ if ((processors > 1) && (container.size() > 600)) {
// run minmax with two threads
int middle = container.size() / 2;
minmaxfinder mmf0 = new minmaxfinder(container, 0, middle);
@@ -83,6 +85,8 @@
entry = di.next();
this.doms.addScore(entry.getKey(), ((Integer)
entry.getValue()).intValue());
}
+ result = mmf0.decodedEntries;
+ result.addAll(mmf1.decodedContainer());
//long s1= System.currentTimeMillis(), sc = Math.max(1, s1 - s0);
//System.out.println("***DEBUG*** indexRWIEntry.Order
(2-THREADED): " + sc + " milliseconds for " + container.size() + " entries, " +
(container.size() / sc) + " entries/millisecond");
} else if (container.size() > 0) {
@@ -97,10 +101,12 @@
entry = di.next();
this.doms.addScore(entry.getKey(), ((Integer)
entry.getValue()).intValue());
}
+ result = mmf.decodedContainer();
//long s1= System.currentTimeMillis(), sc = Math.max(1, s1 - s0);
//System.out.println("***DEBUG*** indexRWIEntry.Order (ONETHREAD):
" + sc + " milliseconds for " + container.size() + " entries, " +
(container.size() / sc) + " entries/millisecond");
}
if (this.doms.size() > 0) this.maxdomcount = this.doms.getMaxScore();
+ return result;
}
public kelondroOrder<indexRWIVarEntry> clone() {
@@ -179,6 +185,7 @@
private int start, end;
private HashMap<String, Integer> doms;
private Integer int1;
+ ArrayList<indexRWIVarEntry> decodedEntries;
public minmaxfinder(indexContainer container, int start /*including*/,
int end /*excluding*/) {
this.container = container;
@@ -186,18 +193,20 @@
this.end = end;
this.doms = new HashMap<String, Integer>();
this.int1 = new Integer(1);
+ this.decodedEntries = new ArrayList<indexRWIVarEntry>();
}
public void run() {
// find min/max to obtain limits for normalization
this.entryMin = null;
this.entryMax = null;
- indexRWIRowEntry iEntry;
+ indexRWIVarEntry iEntry;
int p = this.start;
String dom;
Integer count;
while (p < this.end) {
- iEntry = new indexRWIRowEntry(container.get(p++));
+ iEntry = new indexRWIVarEntry(new
indexRWIRowEntry(container.get(p++)));
+ this.decodedEntries.add(iEntry);
// find min/max
if (this.entryMin == null) this.entryMin = new
indexRWIVarEntry(iEntry); else indexRWIVarEntry.min(this.entryMin, iEntry);
if (this.entryMax == null) this.entryMax = new
indexRWIVarEntry(iEntry); else indexRWIVarEntry.max(this.entryMax, iEntry);
@@ -212,6 +221,10 @@
}
}
+ public ArrayList<indexRWIVarEntry> decodedContainer() {
+ return this.decodedEntries;
+ }
+
public HashMap<String, Integer> domcount() {
return this.doms;
}
Modified: trunk/source/de/anomic/index/indexRWIRowEntry.java
===================================================================
--- trunk/source/de/anomic/index/indexRWIRowEntry.java 2008-02-07 20:35:24 UTC
(rev 4458)
+++ trunk/source/de/anomic/index/indexRWIRowEntry.java 2008-02-07 22:16:36 UTC
(rev 4459)
@@ -88,6 +88,8 @@
private static final int col_worddistance = 18; // i 1 initial zero; may
be used as reserve: is filled during search
private static final int col_reserve = 19; // k 1 reserve
+ public double termFrequency;
+
private kelondroRow.Entry entry;
public indexRWIRowEntry(String urlHash,
@@ -101,14 +103,14 @@
int posinphrase, // position of word in its phrase
int posofphrase, // number of the phrase where word appears
int worddistance, // word distance; this is 0 by default,
and set to the difference of posintext from two indexes if these are combined
(simultanous search). If stored, this shows that the result was obtained by
remote search
- int sizeOfPage, // # of bytes of the page TODO: not needed
any more
long lastmodified, // last-modified time of the document
where word appears
long updatetime, // update time; this is needed to compute
a TTL for the word, so it can be removed easily if the TTL is short
String language, // (guessed) language of document
char doctype, // type of document
int outlinksSame, // outlinks to same domain
int outlinksOther, // outlinks to other domain
- kelondroBitfield flags // attributes to the url and to the word
according the url
+ kelondroBitfield flags, // attributes to the url and to the word
according the url
+ double termFrequency
) {
assert (urlHash.length() == 12) : "urlhash = " + urlHash;
@@ -136,6 +138,7 @@
this.entry.setCol(col_posofphrase, posofphrase);
this.entry.setCol(col_worddistance, worddistance);
this.entry.setCol(col_reserve, 0);
+ this.termFrequency = termFrequency;
}
public indexRWIRowEntry(String urlHash, String code) {
@@ -183,10 +186,6 @@
return this.entry.getColString(col_urlhash, null);
}
- public int quality() {
- return 0; // not used any more
- }
-
public int virtualAge() {
return (int) this.entry.getColLong(col_lastModified); // this is the
time in MicoDateDays format
}
@@ -256,7 +255,8 @@
}
public double termFrequency() {
- return (((double) this.hitcount()) / ((double) (this.wordsintext() +
this.wordsintitle() + 1)));
+ if (this.termFrequency == 0.0) this.termFrequency = (((double)
this.hitcount()) / ((double) (this.wordsintext() + this.wordsintitle() + 1)));
+ return this.termFrequency;
}
public String toString() {
@@ -288,18 +288,12 @@
public boolean isNewer(indexRWIEntry other) {
if (other == null) return true;
if (this.lastModified() > other.lastModified()) return true;
- if (this.lastModified() == other.lastModified()) {
- if (this.quality() > other.quality()) return true;
- }
return false;
}
public boolean isOlder(indexRWIEntry other) {
if (other == null) return false;
if (this.lastModified() < other.lastModified()) return true;
- if (this.lastModified() == other.lastModified()) {
- if (this.quality() < other.quality()) return true;
- }
return false;
}
Modified: trunk/source/de/anomic/index/indexRWIVarEntry.java
===================================================================
--- trunk/source/de/anomic/index/indexRWIVarEntry.java 2008-02-07 20:35:24 UTC
(rev 4458)
+++ trunk/source/de/anomic/index/indexRWIVarEntry.java 2008-02-07 22:16:36 UTC
(rev 4459)
@@ -37,7 +37,7 @@
public char type;
public int hitcount, llocal, lother, phrasesintext, posintext,
posinphrase, posofphrase,
- quality, urlcomps, urllength, virtualAge,
+ urlcomps, urllength, virtualAge,
worddistance, wordsintext, wordsintitle;
public double termFrequency;
@@ -55,7 +55,6 @@
this.posintext = e.posintext();
this.posinphrase = e.posinphrase();
this.posofphrase = e.posofphrase();
- this.quality = e.quality();
this.urlcomps = e.urlcomps();
this.urllength = e.urllength();
this.virtualAge = e.virtualAge();
@@ -133,9 +132,29 @@
public int posofphrase() {
return posofphrase;
}
-
- public int quality() {
- return quality;
+
+ private indexRWIRowEntry toRowEntry() {
+ return new indexRWIRowEntry(
+ urlHash,
+ urllength, // byte-length of complete URL
+ urlcomps, // number of path components
+ wordsintitle, // length of description/length (longer are
better?)
+ hitcount, // how often appears this word in the text
+ wordsintext, // total number of words
+ phrasesintext, // total number of phrases
+ posintext, // position of word in all words
+ posinphrase, // position of word in its phrase
+ posofphrase, // number of the phrase where word appears
+ worddistance, // word distance
+ lastModified, // last-modified time of the document where
word appears
+ System.currentTimeMillis(), // update time;
+ language, // (guessed) language of document
+ type, // type of document
+ llocal, // outlinks to same domain
+ lother, // outlinks to other domain
+ flags, // attributes to the url and to the word
according the url
+ termFrequency
+ );
}
public Entry toKelondroEntry() {
@@ -144,8 +163,7 @@
}
public String toPropertyForm() {
- assert false; // should not be used
- return null;
+ return toRowEntry().toPropertyForm();
}
public String urlHash() {
@@ -177,7 +195,8 @@
}
public double termFrequency() {
- return termFrequency;
+ if (this.termFrequency == 0.0) this.termFrequency = (((double)
this.hitcount()) / ((double) (this.wordsintext() + this.wordsintitle() + 1)));
+ return this.termFrequency;
}
public static final void min(indexRWIVarEntry t, indexRWIEntry other) {
@@ -187,7 +206,6 @@
if (t.hitcount() > (v = other.hitcount())) t.hitcount = v;
if (t.llocal() > (v = other.llocal())) t.llocal = v;
if (t.lother() > (v = other.lother())) t.lother = v;
- if (t.quality() > (v = other.quality())) t.quality = v;
if (t.virtualAge() > (v = other.virtualAge())) t.virtualAge = v;
if (t.wordsintext() > (v = other.wordsintext())) t.wordsintext = v;
if (t.phrasesintext() > (v = other.phrasesintext())) t.phrasesintext =
v;
@@ -210,7 +228,6 @@
if (t.hitcount() < (v = other.hitcount())) t.hitcount = v;
if (t.llocal() < (v = other.llocal())) t.llocal = v;
if (t.lother() < (v = other.lother())) t.lother = v;
- if (t.quality() < (v = other.quality())) t.quality = v;
if (t.virtualAge() < (v = other.virtualAge())) t.virtualAge = v;
if (t.wordsintext() < (v = other.wordsintext())) t.wordsintext = v;
if (t.phrasesintext() < (v = other.phrasesintext())) t.phrasesintext =
v;
Modified: trunk/source/de/anomic/index/indexURLEntry.java
===================================================================
--- trunk/source/de/anomic/index/indexURLEntry.java 2008-02-07 20:35:24 UTC
(rev 4458)
+++ trunk/source/de/anomic/index/indexURLEntry.java 2008-02-07 22:16:36 UTC
(rev 4459)
@@ -115,7 +115,7 @@
private kelondroRow.Entry entry;
private String snippet;
- private indexRWIRowEntry word; // this is only used if the url is
transported via remote search requests
+ private indexRWIEntry word; // this is only used if the url is transported
via remote search requests
private long ranking; // during generation of a search result this value
is set
public indexURLEntry(
@@ -185,7 +185,7 @@
return s.toString().getBytes();
}
- public indexURLEntry(kelondroRow.Entry entry, indexRWIRowEntry
searchedWord, long ranking) {
+ public indexURLEntry(kelondroRow.Entry entry, indexRWIEntry searchedWord,
long ranking) {
this.entry = entry;
this.snippet = null;
this.word = searchedWord;
@@ -391,7 +391,7 @@
return snippet;
}
- public indexRWIRowEntry word() {
+ public indexRWIEntry word() {
return word;
}
Modified: trunk/source/de/anomic/kelondro/kelondroSplitTable.java
===================================================================
--- trunk/source/de/anomic/kelondro/kelondroSplitTable.java 2008-02-07
20:35:24 UTC (rev 4458)
+++ trunk/source/de/anomic/kelondro/kelondroSplitTable.java 2008-02-07
22:16:36 UTC (rev 4459)
@@ -119,7 +119,7 @@
// this is a kelonodroFlex table
table = new kelondroCache(new kelondroFlexTable(path, maxf,
preloadTime, rowdef, 0, resetOnFail));
} else {
- table = new kelondroEcoTable(f, rowdef,
kelondroEcoTable.tailCacheDenyUsage, EcoFSBufferSize, 0);
+ table = new kelondroEcoTable(f, rowdef,
kelondroEcoTable.tailCacheUsageAuto, EcoFSBufferSize, 0);
}
tables.put(date, table);
}
Modified: trunk/source/de/anomic/plasma/plasmaCrawlLURL.java
===================================================================
--- trunk/source/de/anomic/plasma/plasmaCrawlLURL.java 2008-02-07 20:35:24 UTC
(rev 4458)
+++ trunk/source/de/anomic/plasma/plasmaCrawlLURL.java 2008-02-07 22:16:36 UTC
(rev 4459)
@@ -66,7 +66,7 @@
import de.anomic.data.htmlTools;
import de.anomic.http.httpc;
import de.anomic.http.httpc.response;
-import de.anomic.index.indexRWIRowEntry;
+import de.anomic.index.indexRWIEntry;
import de.anomic.index.indexURLEntry;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroCache;
@@ -153,7 +153,7 @@
return 0;
}
- public synchronized indexURLEntry load(String urlHash, indexRWIRowEntry
searchedWord, long ranking) {
+ public synchronized indexURLEntry load(String urlHash, indexRWIEntry
searchedWord, long ranking) {
// generates an plasmaLURLEntry using the url hash
// to speed up the access, the url-hashes are buffered
// in the hash cache.
Modified: trunk/source/de/anomic/plasma/plasmaCrawlZURL.java
===================================================================
--- trunk/source/de/anomic/plasma/plasmaCrawlZURL.java 2008-02-07 20:35:24 UTC
(rev 4458)
+++ trunk/source/de/anomic/plasma/plasmaCrawlZURL.java 2008-02-07 22:16:36 UTC
(rev 4459)
@@ -69,7 +69,7 @@
if (f.isDirectory()) kelondroFlexTable.delete(cachePath,
tablename); else f.delete();
}
}
- urlIndex = new kelondroEcoTable(f, rowdef,
kelondroEcoTable.tailCacheUsageAuto, EcoFSBufferSize, 0);
+ urlIndex = new kelondroEcoTable(f, rowdef,
kelondroEcoTable.tailCacheDenyUsage, EcoFSBufferSize, 0);
//urlIndex = new kelondroFlexTable(cachePath, tablename, -1, rowdef,
0, true);
}
Modified: trunk/source/de/anomic/plasma/plasmaSearchRankingProcess.java
===================================================================
--- trunk/source/de/anomic/plasma/plasmaSearchRankingProcess.java
2008-02-07 20:35:24 UTC (rev 4458)
+++ trunk/source/de/anomic/plasma/plasmaSearchRankingProcess.java
2008-02-07 22:16:36 UTC (rev 4459)
@@ -28,6 +28,7 @@
import java.io.File;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
@@ -40,6 +41,7 @@
import de.anomic.index.indexRWIEntry;
import de.anomic.index.indexRWIEntryOrder;
import de.anomic.index.indexRWIRowEntry;
+import de.anomic.index.indexRWIVarEntry;
import de.anomic.index.indexURLEntry;
import de.anomic.kelondro.kelondroBinSearch;
import de.anomic.kelondro.kelondroMScoreCluster;
@@ -52,8 +54,8 @@
public static kelondroBinSearch[] ybrTables = null; // block-rank tables
private static boolean useYBR = true;
- private TreeMap<Object, indexRWIRowEntry> sortedRWIEntries; // key =
ranking (Long); value = indexRWIEntry; if sortorder < 2 then key is instance of
String
- private HashMap<String, TreeMap<Object, indexRWIRowEntry>> doubleDomCache;
// key = domhash (6 bytes); value = TreeMap like sortedRWIEntries
+ private TreeMap<Object, indexRWIVarEntry> sortedRWIEntries; // key =
ranking (Long); value = indexRWIEntry; if sortorder < 2 then key is instance of
String
+ private HashMap<String, TreeMap<Object, indexRWIVarEntry>> doubleDomCache;
// key = domhash (6 bytes); value = TreeMap like sortedRWIEntries
private HashMap<String, String> handover; // key = urlhash, value =
urlstring; used for double-check of urls that had been handed over to search
process
private plasmaSearchQuery query;
private int sortorder;
@@ -72,8 +74,8 @@
// attention: if minEntries is too high, this method will not
terminate within the maxTime
// sortorder: 0 = hash, 1 = url, 2 = ranking
this.localSearchContainerMaps = null;
- this.sortedRWIEntries = new TreeMap<Object, indexRWIRowEntry>();
- this.doubleDomCache = new HashMap<String, TreeMap<Object,
indexRWIRowEntry>>();
+ this.sortedRWIEntries = new TreeMap<Object, indexRWIVarEntry>();
+ this.doubleDomCache = new HashMap<String, TreeMap<Object,
indexRWIVarEntry>>();
this.handover = new HashMap<String, String>();
this.order = null;
this.query = query;
@@ -132,11 +134,11 @@
this.remote_indexCount += index.size();
}
- indexRWIRowEntry ientry;
+ indexRWIVarEntry ientry;
indexURLEntry uentry;
String u;
loop: while (en.hasNext()) {
- ientry = en.next();
+ ientry = new indexRWIVarEntry(en.next());
// check constraints
if (!testFlags(ientry)) continue loop;
@@ -183,13 +185,13 @@
if (this.order == null) {
this.order = new indexRWIEntryOrder(query.ranking);
}
- this.order.normalizeWith(index);
+ ArrayList<indexRWIVarEntry> decodedEntries =
this.order.normalizeWith(index);
serverProfiling.update("SEARCH", new
plasmaProfiling.searchEvent(query.id(true), plasmaSearchEvent.NORMALIZING,
index.size(), System.currentTimeMillis() - timer));
// normalize entries and get ranking
timer = System.currentTimeMillis();
- Iterator<indexRWIRowEntry> i = index.entries();
- indexRWIRowEntry iEntry, l;
+ Iterator<indexRWIVarEntry> i = decodedEntries.iterator();
+ indexRWIVarEntry iEntry, l;
long biggestEntry = 0;
//long s0 = System.currentTimeMillis();
Long r;
@@ -272,8 +274,8 @@
private synchronized Object[] /*{Object, indexRWIEntry}*/ bestRWI(boolean
skipDoubleDom) {
// returns from the current RWI list the best entry and removed this
entry from the list
Object bestEntry;
- TreeMap<Object, indexRWIRowEntry> m;
- indexRWIRowEntry rwi;
+ TreeMap<Object, indexRWIVarEntry> m;
+ indexRWIVarEntry rwi;
while (sortedRWIEntries.size() > 0) {
bestEntry = sortedRWIEntries.firstKey();
rwi = sortedRWIEntries.remove(bestEntry);
@@ -283,7 +285,7 @@
m = this.doubleDomCache.get(domhash);
if (m == null) {
// first appearance of dom
- m = new TreeMap<Object, indexRWIRowEntry>();
+ m = new TreeMap<Object, indexRWIVarEntry>();
this.doubleDomCache.put(domhash, m);
return new Object[]{bestEntry, rwi};
}
@@ -292,10 +294,10 @@
}
// no more entries in sorted RWI entries. Now take Elements from the
doubleDomCache
// find best entry from all caches
- Iterator<TreeMap<Object, indexRWIRowEntry>> i =
this.doubleDomCache.values().iterator();
+ Iterator<TreeMap<Object, indexRWIVarEntry>> i =
this.doubleDomCache.values().iterator();
bestEntry = null;
Object o;
- indexRWIRowEntry bestrwi = null;
+ indexRWIVarEntry bestrwi = null;
while (i.hasNext()) {
m = i.next();
if (m.size() == 0) continue;
@@ -331,7 +333,7 @@
while ((sortedRWIEntries.size() > 0) || (size() > 0)) {
Object[] obrwi = bestRWI(skipDoubleDom);
Object bestEntry = obrwi[0];
- indexRWIRowEntry ientry = (indexRWIRowEntry) obrwi[1];
+ indexRWIVarEntry ientry = (indexRWIVarEntry) obrwi[1];
long ranking = (bestEntry instanceof Long) ? ((Long)
bestEntry).longValue() : 0;
indexURLEntry u = wordIndex.loadedURL.load(ientry.urlHash(),
ientry, ranking);
if (u != null) {
@@ -347,7 +349,7 @@
public synchronized int size() {
//assert sortedRWIEntries.size() == urlhashes.size() :
"sortedRWIEntries.size() = " + sortedRWIEntries.size() + ", urlhashes.size() =
" + urlhashes.size();
int c = sortedRWIEntries.size();
- Iterator<TreeMap<Object, indexRWIRowEntry>> i =
this.doubleDomCache.values().iterator();
+ Iterator<TreeMap<Object, indexRWIVarEntry>> i =
this.doubleDomCache.values().iterator();
while (i.hasNext()) c += i.next().size();
return c;
}
Modified: trunk/source/de/anomic/plasma/plasmaSnippetCache.java
===================================================================
--- trunk/source/de/anomic/plasma/plasmaSnippetCache.java 2008-02-07
20:35:24 UTC (rev 4458)
+++ trunk/source/de/anomic/plasma/plasmaSnippetCache.java 2008-02-07
22:16:36 UTC (rev 4459)
@@ -414,7 +414,7 @@
resInfo = entry.getDocumentInfo();
// read resource body (if it is there)
- byte []resourceArray = entry.cacheArray();
+ byte[] resourceArray = entry.cacheArray();
if (resourceArray != null) {
resContent = new ByteArrayInputStream(resourceArray);
resContentLength = resourceArray.length;
Modified: trunk/source/de/anomic/plasma/plasmaSwitchboard.java
===================================================================
--- trunk/source/de/anomic/plasma/plasmaSwitchboard.java 2008-02-07
20:35:24 UTC (rev 4458)
+++ trunk/source/de/anomic/plasma/plasmaSwitchboard.java 2008-02-07
22:16:36 UTC (rev 4459)
@@ -906,7 +906,7 @@
} catch (MalformedURLException e) {
}
} else {
- File networkUnitDefinitionFile = new File(rootPath,
networkUnitDefinition);
+ File networkUnitDefinitionFile =
(networkUnitDefinition.startsWith("/")) ? new File(networkUnitDefinition) : new
File(rootPath, networkUnitDefinition);
if (networkUnitDefinitionFile.exists()) {
initProps =
serverFileUtils.loadHashMap(networkUnitDefinitionFile);
this.setConfig(initProps);
@@ -2348,14 +2348,14 @@
wordStat.posInPhrase,
wordStat.numOfPhrase,
0,
- newEntry.size(),
docDate.getTime(),
System.currentTimeMillis(),
language,
doctype,
ioLinks[0].intValue(),
ioLinks[1].intValue(),
- condenser.RESULT_FLAGS
+ condenser.RESULT_FLAGS,
+ 0.0
);
indexContainer wordIdxContainer =
plasmaWordIndex.emptyContainer(wordHash, 1);
wordIdxContainer.add(wordIdxEntry);
@@ -2573,10 +2573,10 @@
if (authorization.length() > 256) return 0;
// authorization by encoded password, only for localhost access
- if ((((String) header.get("CLIENTIP", "")).equals("localhost")) &&
(adminAccountBase64MD5.equals(authorization))) return 3; // soft-authenticated
for localhost
+ if ((((String) header.get(httpHeader.CONNECTION_PROP_CLIENTIP,
"")).equals("localhost")) && (adminAccountBase64MD5.equals(authorization)))
return 3; // soft-authenticated for localhost
// authorization by hit in userDB
- if (userDB.hasAdminRight((String) header.get(httpHeader.AUTHORIZATION,
"xxxxxx"), ((String) header.get("CLIENTIP", "")), header.getHeaderCookies()))
return 4; //return, because 4=max
+ if (userDB.hasAdminRight((String) header.get(httpHeader.AUTHORIZATION,
"xxxxxx"), ((String) header.get(httpHeader.CONNECTION_PROP_CLIENTIP, "")),
header.getHeaderCookies())) return 4; //return, because 4=max
// authorization with admin keyword in configuration
return httpd.staticAdminAuthenticated(authorization, this);
Modified: trunk/source/de/anomic/plasma/plasmaWordIndex.java
===================================================================
--- trunk/source/de/anomic/plasma/plasmaWordIndex.java 2008-02-07 20:35:24 UTC
(rev 4458)
+++ trunk/source/de/anomic/plasma/plasmaWordIndex.java 2008-02-07 22:16:36 UTC
(rev 4459)
@@ -314,13 +314,13 @@
wprop.posInPhrase,
wprop.numOfPhrase,
0,
- size,
urlModified.getTime(),
System.currentTimeMillis(),
language,
doctype,
outlinksSame, outlinksOther,
- wprop.flags);
+ wprop.flags,
+ 0.0);
addEntry(plasmaCondenser.word2hash(word), ientry,
System.currentTimeMillis(), false);
wordCount++;
}
_______________________________________________
YaCy-svn mailing list
[email protected]
https://lists.berlios.de/mailman/listinfo/yacy-svn