Author: orbiter Date: 2008-02-07 23:16:36 +0100 (Thu, 07 Feb 2008) New Revision: 4459
Modified: trunk/htroot/BlogComments.java trunk/htroot/CrawlProfileEditor_p.java trunk/htroot/Settings_p.java trunk/htroot/TestApplet.java trunk/htroot/User.java trunk/htroot/ViewImage.java trunk/htroot/Wiki.java trunk/htroot/www/welcome.java trunk/htroot/yacy/hello.java trunk/htroot/yacy/search.java trunk/htroot/yacy/transfer.java trunk/htroot/yacysearch.java trunk/source/de/anomic/data/userDB.java trunk/source/de/anomic/http/httpSSI.java trunk/source/de/anomic/http/httpd.java trunk/source/de/anomic/http/httpdFileHandler.java trunk/source/de/anomic/index/indexRWIEntry.java trunk/source/de/anomic/index/indexRWIEntryOrder.java trunk/source/de/anomic/index/indexRWIRowEntry.java trunk/source/de/anomic/index/indexRWIVarEntry.java trunk/source/de/anomic/index/indexURLEntry.java trunk/source/de/anomic/kelondro/kelondroSplitTable.java trunk/source/de/anomic/plasma/plasmaCrawlLURL.java trunk/source/de/anomic/plasma/plasmaCrawlZURL.java trunk/source/de/anomic/plasma/plasmaSearchRankingProcess.java trunk/source/de/anomic/plasma/plasmaSnippetCache.java trunk/source/de/anomic/plasma/plasmaSwitchboard.java trunk/source/de/anomic/plasma/plasmaWordIndex.java Log: - faster search: using different data structures that avoid multiplr calculations - no more table copy for error-eco table - optional table copy for lurl-entries - more abstractions (less single constant strings) - better logging (using host names instead of ips) Modified: trunk/htroot/BlogComments.java =================================================================== --- trunk/htroot/BlogComments.java 2008-02-07 20:35:24 UTC (rev 4458) +++ trunk/htroot/BlogComments.java 2008-02-07 22:16:36 UTC (rev 4459) @@ -102,7 +102,7 @@ } String pagename = post.get("page", "blog_default"); - String ip = post.get("CLIENTIP", "127.0.0.1"); + String ip = post.get(httpHeader.CONNECTION_PROP_CLIENTIP, "127.0.0.1"); String StrAuthor = post.get("author", "anonymous"); Modified: trunk/htroot/CrawlProfileEditor_p.java =================================================================== --- trunk/htroot/CrawlProfileEditor_p.java 2008-02-07 20:35:24 UTC (rev 4458) +++ trunk/htroot/CrawlProfileEditor_p.java 2008-02-07 22:16:36 UTC (rev 4459) @@ -105,9 +105,9 @@ while (it.hasNext()) { selentry = (entry)it.next(); if (selentry.name().equals(plasmaSwitchboard.CRAWL_PROFILE_PROXY) || - selentry.name().equals(plasmaSwitchboard.CRAWL_PROFILE_REMOTE) || + selentry.name().equals(plasmaSwitchboard.CRAWL_PROFILE_REMOTE) /*|| selentry.name().equals(plasmaSwitchboard.CRAWL_PROFILE_SNIPPET_TEXT) || - selentry.name().equals(plasmaSwitchboard.CRAWL_PROFILE_SNIPPET_MEDIA)) + selentry.name().equals(plasmaSwitchboard.CRAWL_PROFILE_SNIPPET_MEDIA)*/) continue; prop.put("profiles_" + count + "_name", selentry.name()); prop.put("profiles_" + count + "_handle", selentry.handle()); Modified: trunk/htroot/Settings_p.java =================================================================== --- trunk/htroot/Settings_p.java 2008-02-07 20:35:24 UTC (rev 4458) +++ trunk/htroot/Settings_p.java 2008-02-07 22:16:36 UTC (rev 4459) @@ -212,7 +212,7 @@ } // clientIP - prop.putHTML("clientIP", (String) header.get("CLIENTIP", "<unknown>"), true); // read an artificial header addendum + prop.putHTML("clientIP", (String) header.get(httpHeader.CONNECTION_PROP_CLIENTIP, "<unknown>"), true); // read an artificial header addendum /* * seed upload settings Modified: trunk/htroot/TestApplet.java =================================================================== --- trunk/htroot/TestApplet.java 2008-02-07 20:35:24 UTC (rev 4458) +++ trunk/htroot/TestApplet.java 2008-02-07 22:16:36 UTC (rev 4459) @@ -50,7 +50,7 @@ //File templatefile=filehandler.getOverlayedFile((String)post.get("url")); File classfile = httpdFileHandler.getOverlayedClass((String)post.get("url")); httpHeader header2=new httpHeader(); - header2.put("CLIENTIP", "127.0.0.1"); + header2.put(httpHeader.CONNECTION_PROP_CLIENTIP, "127.0.0.1"); header2.put("PATH", post.get("url")); serverObjects tp=null; try { Modified: trunk/htroot/User.java =================================================================== --- trunk/htroot/User.java 2008-02-07 20:35:24 UTC (rev 4458) +++ trunk/htroot/User.java 2008-02-07 22:16:36 UTC (rev 4459) @@ -79,7 +79,7 @@ prop.put("logged-in_identified-by", "2"); //try via ip if(entry == null){ - entry=sb.userDB.ipAuth(((String)header.get("CLIENTIP", "xxxxxx"))); + entry=sb.userDB.ipAuth(((String)header.get(httpHeader.CONNECTION_PROP_CLIENTIP, "xxxxxx"))); if(entry != null){ prop.put("logged-in_identified-by", "0"); } @@ -108,7 +108,7 @@ //identified via form-login //TODO: this does not work for a static admin, yet. }else if(post != null && post.containsKey("username") && post.containsKey("password")){ - //entry=sb.userDB.passwordAuth((String)post.get("username"), (String)post.get("password"), (String)header.get("CLIENTIP", "xxxxxx")); + //entry=sb.userDB.passwordAuth((String)post.get("username"), (String)post.get("password"), (String)header.get(httpHeader.CONNECTION_PROP_CLIENTIP, "xxxxxx")); String username=(String)post.get("username"); String password=(String)post.get("password"); @@ -163,7 +163,7 @@ if(post!=null && post.containsKey("logout")){ prop.put("logged-in", "0"); if(entry != null){ - entry.logout(((String)header.get("CLIENTIP", "xxxxxx")), userDB.getLoginToken(header.getHeaderCookies())); //todo: logout cookie + entry.logout(((String)header.get(httpHeader.CONNECTION_PROP_CLIENTIP, "xxxxxx")), userDB.getLoginToken(header.getHeaderCookies())); //todo: logout cookie }else{ sb.userDB.adminLogout(userDB.getLoginToken(header.getHeaderCookies())); } Modified: trunk/htroot/ViewImage.java =================================================================== --- trunk/htroot/ViewImage.java 2008-02-07 20:35:24 UTC (rev 4458) +++ trunk/htroot/ViewImage.java 2008-02-07 22:16:36 UTC (rev 4459) @@ -72,7 +72,7 @@ String urlString = post.get("url", ""); String urlLicense = post.get("code", ""); - boolean auth = ((String) header.get("CLIENTIP", "")).equals("localhost") || sb.verifyAuthentication(header, true); // handle access rights + boolean auth = ((String) header.get(httpHeader.CONNECTION_PROP_CLIENTIP, "")).equals("localhost") || sb.verifyAuthentication(header, true); // handle access rights yacyURL url = null; if ((urlString.length() > 0) && (auth)) try { Modified: trunk/htroot/Wiki.java =================================================================== --- trunk/htroot/Wiki.java 2008-02-07 20:35:24 UTC (rev 4458) +++ trunk/htroot/Wiki.java 2008-02-07 22:16:36 UTC (rev 4459) @@ -88,7 +88,7 @@ String access = switchboard.getConfig("WikiAccess", "admin"); String pagename = post.get("page", "start"); - String ip = post.get("CLIENTIP", "127.0.0.1"); + String ip = post.get(httpHeader.CONNECTION_PROP_CLIENTIP, "127.0.0.1"); String author = post.get("author", "anonymous"); if (author.equals("anonymous")) { author = wikiBoard.guessAuthor(ip); Modified: trunk/htroot/www/welcome.java =================================================================== --- trunk/htroot/www/welcome.java 2008-02-07 20:35:24 UTC (rev 4458) +++ trunk/htroot/www/welcome.java 2008-02-07 22:16:36 UTC (rev 4459) @@ -78,7 +78,7 @@ prop.put("hostip", "Unknown Host Exception"); } prop.put("port", serverCore.getPortNr(env.getConfig("port","8080"))); - prop.put("clientip", (String) header.get("CLIENTIP", "")); + prop.put("clientip", (String) header.get(httpHeader.CONNECTION_PROP_CLIENTIP, "")); final String peertype = (yacyCore.seedDB.mySeed() == null) ? yacySeed.PEERTYPE_JUNIOR : yacyCore.seedDB.mySeed().get(yacySeed.PEERTYPE, yacySeed.PEERTYPE_VIRGIN); final boolean senior = (peertype.equals(yacySeed.PEERTYPE_SENIOR)) || (peertype.equals(yacySeed.PEERTYPE_PRINCIPAL)); Modified: trunk/htroot/yacy/hello.java =================================================================== --- trunk/htroot/yacy/hello.java 2008-02-07 20:35:24 UTC (rev 4458) +++ trunk/htroot/yacy/hello.java 2008-02-07 22:16:36 UTC (rev 4459) @@ -103,7 +103,7 @@ // if ((properTest != null) && (! properTest.substring(0,1).equals("IP"))) { return null; } // we easily know the caller's IP: - final String clientip = (String) header.get("CLIENTIP", "<unknown>"); // read an artificial header addendum + final String clientip = (String) header.get(httpHeader.CONNECTION_PROP_CLIENTIP, "<unknown>"); // read an artificial header addendum InetAddress ias = serverDomains.dnsResolve(clientip); if (ias == null) { prop.put("message", "cannot resolve your IP from your reported location " + clientip); Modified: trunk/htroot/yacy/search.java =================================================================== --- trunk/htroot/yacy/search.java 2008-02-07 20:35:24 UTC (rev 4458) +++ trunk/htroot/yacy/search.java 2008-02-07 22:16:36 UTC (rev 4459) @@ -282,7 +282,7 @@ // prepare search statistics Long trackerHandle = new Long(System.currentTimeMillis()); HashMap<String, Object> searchProfile = theQuery.resultProfile(joincount, System.currentTimeMillis() - timestamp, urlRetrievalAllTime, snippetComputationAllTime); - String client = (String) header.get("CLIENTIP"); + String client = (String) header.get(httpHeader.CONNECTION_PROP_CLIENTIP); searchProfile.put("host", client); yacySeed remotepeer = yacyCore.seedDB.lookupByIP(natLib.getInetAddress(client), true, false, false); searchProfile.put("peername", (remotepeer == null) ? "unknown" : remotepeer.getName()); Modified: trunk/htroot/yacy/transfer.java =================================================================== --- trunk/htroot/yacy/transfer.java 2008-02-07 20:35:24 UTC (rev 4458) +++ trunk/htroot/yacy/transfer.java 2008-02-07 22:16:36 UTC (rev 4459) @@ -89,14 +89,14 @@ final yacySeed opeer = yacyCore.seedDB.get(ohash); if (opeer == null) { // reject unknown peers: this does not appear fair, but anonymous senders are dangerous - sb.getLog().logFine("RankingTransmission: rejected unknown peer '" + ohash + "', current IP " + header.get("CLIENTIP", "unknown")); + sb.getLog().logFine("RankingTransmission: rejected unknown peer '" + ohash + "', current IP " + header.get(httpHeader.CONNECTION_PROP_CLIENTIP, "unknown")); return prop; } opeer.setLastSeenUTC(); if (filename.indexOf("..") >= 0) { // reject paths that contain '..' because they are dangerous - sb.getLog().logFine("RankingTransmission: rejected wrong path '" + filename + "' from peer " + opeer.getName() + "/" + opeer.getPublicAddress()+ ", current IP " + header.get("CLIENTIP", "unknown")); + sb.getLog().logFine("RankingTransmission: rejected wrong path '" + filename + "' from peer " + opeer.getName() + "/" + opeer.getPublicAddress()+ ", current IP " + header.get(httpHeader.CONNECTION_PROP_CLIENTIP, "unknown")); return prop; } Modified: trunk/htroot/yacysearch.java =================================================================== --- trunk/htroot/yacysearch.java 2008-02-07 20:35:24 UTC (rev 4458) +++ trunk/htroot/yacysearch.java 2008-02-07 22:16:36 UTC (rev 4459) @@ -257,7 +257,7 @@ constraint, true); - String client = (String) header.get("CLIENTIP"); // the search client who initiated the search + String client = (String) header.get(httpHeader.CONNECTION_PROP_CLIENTIP); // the search client who initiated the search // tell all threads to do nothing for a specific time sb.intermissionAllThreads(10000); Modified: trunk/source/de/anomic/data/userDB.java =================================================================== --- trunk/source/de/anomic/data/userDB.java 2008-02-07 20:35:24 UTC (rev 4458) +++ trunk/source/de/anomic/data/userDB.java 2008-02-07 22:16:36 UTC (rev 4459) @@ -155,7 +155,7 @@ return null; } public Entry getUser(httpHeader header){ - return getUser((String) header.get(httpHeader.AUTHORIZATION), (String)header.get("CLIENTIP"), header.getHeaderCookies()); + return getUser((String) header.get(httpHeader.AUTHORIZATION), (String)header.get(httpHeader.CONNECTION_PROP_CLIENTIP), header.getHeaderCookies()); } public Entry getUser(String auth, String ip, String cookies){ Entry entry=null; Modified: trunk/source/de/anomic/http/httpSSI.java =================================================================== --- trunk/source/de/anomic/http/httpSSI.java 2008-02-07 20:35:24 UTC (rev 4458) +++ trunk/source/de/anomic/http/httpSSI.java 2008-02-07 22:16:36 UTC (rev 4459) @@ -85,7 +85,7 @@ conProp.setProperty(httpHeader.CONNECTION_PROP_PATH, path); conProp.setProperty(httpHeader.CONNECTION_PROP_ARGS, args); conProp.setProperty(httpHeader.CONNECTION_PROP_HTTP_VER, httpHeader.HTTP_VERSION_0_9); - conProp.setProperty("CLIENTIP", "127.0.0.1"); + conProp.setProperty(httpHeader.CONNECTION_PROP_CLIENTIP, "127.0.0.1"); header.put(httpHeader.AUTHORIZATION, authorization); httpdFileHandler.doGet(conProp, header, out); } Modified: trunk/source/de/anomic/http/httpd.java =================================================================== --- trunk/source/de/anomic/http/httpd.java 2008-02-07 20:35:24 UTC (rev 4458) +++ trunk/source/de/anomic/http/httpd.java 2008-02-07 22:16:36 UTC (rev 4459) @@ -193,7 +193,7 @@ public void initSession(serverCore.Session newsession) throws IOException { this.session = newsession; this.userAddress = session.userAddress; // client InetAddress - this.clientIP = this.userAddress.getHostAddress(); + this.clientIP = this.userAddress.getHostName(); if (this.userAddress.isAnyLocalAddress()) this.clientIP = "localhost"; if (this.clientIP.equals("0:0:0:0:0:0:0:1")) this.clientIP = "localhost"; if (this.clientIP.equals("127.0.0.1")) this.clientIP = "localhost"; @@ -1147,7 +1147,7 @@ // tp.put("host", serverCore.publicIP().getHostAddress()); // tp.put("port", switchboard.getConfig("port", "8080")); - String clientIP = conProp.getProperty(httpHeader.CONNECTION_PROP_CLIENTIP,"127.0.0.1"); + String clientIP = conProp.getProperty(httpHeader.CONNECTION_PROP_CLIENTIP, "127.0.0.1"); // check if ip is local ip address InetAddress hostAddress = serverDomains.dnsResolve(clientIP); Modified: trunk/source/de/anomic/http/httpdFileHandler.java =================================================================== --- trunk/source/de/anomic/http/httpdFileHandler.java 2008-02-07 20:35:24 UTC (rev 4458) +++ trunk/source/de/anomic/http/httpdFileHandler.java 2008-02-07 22:16:36 UTC (rev 4459) @@ -303,13 +303,13 @@ if ((path.substring(0,(pos==-1)?path.length():pos)).endsWith("_p") && (adminAccountBase64MD5.length() != 0)) { //authentication required //userDB - if(sb.userDB.hasAdminRight(authorization, conProp.getProperty("CLIENTIP"), requestHeader.getHeaderCookies())){ + if(sb.userDB.hasAdminRight(authorization, conProp.getProperty(httpHeader.CONNECTION_PROP_CLIENTIP), requestHeader.getHeaderCookies())){ //Authentication successful. remove brute-force flag - serverCore.bfHost.remove(conProp.getProperty("CLIENTIP")); + serverCore.bfHost.remove(conProp.getProperty(httpHeader.CONNECTION_PROP_CLIENTIP)); //static }else if(authorization != null && httpd.staticAdminAuthenticated(authorization.trim().substring(6), switchboard)==4){ //Authentication successful. remove brute-force flag - serverCore.bfHost.remove(conProp.getProperty("CLIENTIP")); + serverCore.bfHost.remove(conProp.getProperty(httpHeader.CONNECTION_PROP_CLIENTIP)); //no auth }else if (authorization == null) { // no authorization given in response. Ask for that @@ -323,7 +323,7 @@ return; } else { // a wrong authentication was given or the userDB user does not have admin access. Ask again - String clientIP = conProp.getProperty("CLIENTIP", "unknown-host"); + String clientIP = conProp.getProperty(httpHeader.CONNECTION_PROP_CLIENTIP, "unknown-host"); serverLog.logInfo("HTTPD", "Wrong log-in for account 'admin' in http file handler for path '" + path + "' from host '" + clientIP + "'"); Integer attempts = (Integer) serverCore.bfHost.get(clientIP); if (attempts == null) @@ -473,7 +473,7 @@ // call an image-servlet to produce an on-the-fly - generated image Object img = null; try { - requestHeader.put(httpHeader.CONNECTION_PROP_CLIENTIP, conProp.getProperty("CLIENTIP")); + requestHeader.put(httpHeader.CONNECTION_PROP_CLIENTIP, conProp.getProperty(httpHeader.CONNECTION_PROP_CLIENTIP)); requestHeader.put(httpHeader.CONNECTION_PROP_PATH, path); // in case that there are no args given, args = null or empty hashmap img = invokeServlet(targetClass, requestHeader, args); @@ -527,7 +527,7 @@ } } else if ((targetClass != null) && (path.endsWith(".stream"))) { // call rewrite-class - requestHeader.put(httpHeader.CONNECTION_PROP_CLIENTIP, conProp.getProperty("CLIENTIP")); + requestHeader.put(httpHeader.CONNECTION_PROP_CLIENTIP, conProp.getProperty(httpHeader.CONNECTION_PROP_CLIENTIP)); requestHeader.put(httpHeader.CONNECTION_PROP_PATH, path); //requestHeader.put(httpHeader.CONNECTION_PROP_INPUTSTREAM, body); //requestHeader.put(httpHeader.CONNECTION_PROP_OUTPUTSTREAM, out); @@ -570,7 +570,7 @@ } else { // CGI-class: call the class to create a property for rewriting try { - requestHeader.put(httpHeader.CONNECTION_PROP_CLIENTIP, conProp.getProperty("CLIENTIP")); + requestHeader.put(httpHeader.CONNECTION_PROP_CLIENTIP, conProp.getProperty(httpHeader.CONNECTION_PROP_CLIENTIP)); requestHeader.put(httpHeader.CONNECTION_PROP_PATH, path); // in case that there are no args given, args = null or empty hashmap Object tmp = invokeServlet(targetClass, requestHeader, args); @@ -586,7 +586,7 @@ if (tp.containsKey(servletProperties.ACTION_AUTHENTICATE)) { // handle brute-force protection if (authorization != null) { - String clientIP = conProp.getProperty("CLIENTIP", "unknown-host"); + String clientIP = conProp.getProperty(httpHeader.CONNECTION_PROP_CLIENTIP, "unknown-host"); serverLog.logInfo("HTTPD", "dynamic log-in for account 'admin' in http file handler for path '" + path + "' from host '" + clientIP + "'"); Integer attempts = (Integer) serverCore.bfHost.get(clientIP); if (attempts == null) Modified: trunk/source/de/anomic/index/indexRWIEntry.java =================================================================== --- trunk/source/de/anomic/index/indexRWIEntry.java 2008-02-07 20:35:24 UTC (rev 4458) +++ trunk/source/de/anomic/index/indexRWIEntry.java 2008-02-07 22:16:36 UTC (rev 4459) @@ -47,8 +47,6 @@ public String urlHash(); - public int quality(); - public int virtualAge(); public long lastModified(); Modified: trunk/source/de/anomic/index/indexRWIEntryOrder.java =================================================================== --- trunk/source/de/anomic/index/indexRWIEntryOrder.java 2008-02-07 20:35:24 UTC (rev 4458) +++ trunk/source/de/anomic/index/indexRWIEntryOrder.java 2008-02-07 22:16:36 UTC (rev 4459) @@ -26,6 +26,7 @@ package de.anomic.index; +import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.Map; @@ -55,12 +56,13 @@ this.maxdomcount = 0; } - public void normalizeWith(indexContainer container) { + public ArrayList<indexRWIVarEntry> normalizeWith(indexContainer container) { // normalize ranking: find minimum and maxiumum of separate ranking criteria assert (container != null); - + ArrayList<indexRWIVarEntry> result = null; + //long s0 = System.currentTimeMillis(); - if ((processors > 1) && (container.size() > 10000)) { + if ((processors > 1) && (container.size() > 600)) { // run minmax with two threads int middle = container.size() / 2; minmaxfinder mmf0 = new minmaxfinder(container, 0, middle); @@ -83,6 +85,8 @@ entry = di.next(); this.doms.addScore(entry.getKey(), ((Integer) entry.getValue()).intValue()); } + result = mmf0.decodedEntries; + result.addAll(mmf1.decodedContainer()); //long s1= System.currentTimeMillis(), sc = Math.max(1, s1 - s0); //System.out.println("***DEBUG*** indexRWIEntry.Order (2-THREADED): " + sc + " milliseconds for " + container.size() + " entries, " + (container.size() / sc) + " entries/millisecond"); } else if (container.size() > 0) { @@ -97,10 +101,12 @@ entry = di.next(); this.doms.addScore(entry.getKey(), ((Integer) entry.getValue()).intValue()); } + result = mmf.decodedContainer(); //long s1= System.currentTimeMillis(), sc = Math.max(1, s1 - s0); //System.out.println("***DEBUG*** indexRWIEntry.Order (ONETHREAD): " + sc + " milliseconds for " + container.size() + " entries, " + (container.size() / sc) + " entries/millisecond"); } if (this.doms.size() > 0) this.maxdomcount = this.doms.getMaxScore(); + return result; } public kelondroOrder<indexRWIVarEntry> clone() { @@ -179,6 +185,7 @@ private int start, end; private HashMap<String, Integer> doms; private Integer int1; + ArrayList<indexRWIVarEntry> decodedEntries; public minmaxfinder(indexContainer container, int start /*including*/, int end /*excluding*/) { this.container = container; @@ -186,18 +193,20 @@ this.end = end; this.doms = new HashMap<String, Integer>(); this.int1 = new Integer(1); + this.decodedEntries = new ArrayList<indexRWIVarEntry>(); } public void run() { // find min/max to obtain limits for normalization this.entryMin = null; this.entryMax = null; - indexRWIRowEntry iEntry; + indexRWIVarEntry iEntry; int p = this.start; String dom; Integer count; while (p < this.end) { - iEntry = new indexRWIRowEntry(container.get(p++)); + iEntry = new indexRWIVarEntry(new indexRWIRowEntry(container.get(p++))); + this.decodedEntries.add(iEntry); // find min/max if (this.entryMin == null) this.entryMin = new indexRWIVarEntry(iEntry); else indexRWIVarEntry.min(this.entryMin, iEntry); if (this.entryMax == null) this.entryMax = new indexRWIVarEntry(iEntry); else indexRWIVarEntry.max(this.entryMax, iEntry); @@ -212,6 +221,10 @@ } } + public ArrayList<indexRWIVarEntry> decodedContainer() { + return this.decodedEntries; + } + public HashMap<String, Integer> domcount() { return this.doms; } Modified: trunk/source/de/anomic/index/indexRWIRowEntry.java =================================================================== --- trunk/source/de/anomic/index/indexRWIRowEntry.java 2008-02-07 20:35:24 UTC (rev 4458) +++ trunk/source/de/anomic/index/indexRWIRowEntry.java 2008-02-07 22:16:36 UTC (rev 4459) @@ -88,6 +88,8 @@ private static final int col_worddistance = 18; // i 1 initial zero; may be used as reserve: is filled during search private static final int col_reserve = 19; // k 1 reserve + public double termFrequency; + private kelondroRow.Entry entry; public indexRWIRowEntry(String urlHash, @@ -101,14 +103,14 @@ int posinphrase, // position of word in its phrase int posofphrase, // number of the phrase where word appears int worddistance, // word distance; this is 0 by default, and set to the difference of posintext from two indexes if these are combined (simultanous search). If stored, this shows that the result was obtained by remote search - int sizeOfPage, // # of bytes of the page TODO: not needed any more long lastmodified, // last-modified time of the document where word appears long updatetime, // update time; this is needed to compute a TTL for the word, so it can be removed easily if the TTL is short String language, // (guessed) language of document char doctype, // type of document int outlinksSame, // outlinks to same domain int outlinksOther, // outlinks to other domain - kelondroBitfield flags // attributes to the url and to the word according the url + kelondroBitfield flags, // attributes to the url and to the word according the url + double termFrequency ) { assert (urlHash.length() == 12) : "urlhash = " + urlHash; @@ -136,6 +138,7 @@ this.entry.setCol(col_posofphrase, posofphrase); this.entry.setCol(col_worddistance, worddistance); this.entry.setCol(col_reserve, 0); + this.termFrequency = termFrequency; } public indexRWIRowEntry(String urlHash, String code) { @@ -183,10 +186,6 @@ return this.entry.getColString(col_urlhash, null); } - public int quality() { - return 0; // not used any more - } - public int virtualAge() { return (int) this.entry.getColLong(col_lastModified); // this is the time in MicoDateDays format } @@ -256,7 +255,8 @@ } public double termFrequency() { - return (((double) this.hitcount()) / ((double) (this.wordsintext() + this.wordsintitle() + 1))); + if (this.termFrequency == 0.0) this.termFrequency = (((double) this.hitcount()) / ((double) (this.wordsintext() + this.wordsintitle() + 1))); + return this.termFrequency; } public String toString() { @@ -288,18 +288,12 @@ public boolean isNewer(indexRWIEntry other) { if (other == null) return true; if (this.lastModified() > other.lastModified()) return true; - if (this.lastModified() == other.lastModified()) { - if (this.quality() > other.quality()) return true; - } return false; } public boolean isOlder(indexRWIEntry other) { if (other == null) return false; if (this.lastModified() < other.lastModified()) return true; - if (this.lastModified() == other.lastModified()) { - if (this.quality() < other.quality()) return true; - } return false; } Modified: trunk/source/de/anomic/index/indexRWIVarEntry.java =================================================================== --- trunk/source/de/anomic/index/indexRWIVarEntry.java 2008-02-07 20:35:24 UTC (rev 4458) +++ trunk/source/de/anomic/index/indexRWIVarEntry.java 2008-02-07 22:16:36 UTC (rev 4459) @@ -37,7 +37,7 @@ public char type; public int hitcount, llocal, lother, phrasesintext, posintext, posinphrase, posofphrase, - quality, urlcomps, urllength, virtualAge, + urlcomps, urllength, virtualAge, worddistance, wordsintext, wordsintitle; public double termFrequency; @@ -55,7 +55,6 @@ this.posintext = e.posintext(); this.posinphrase = e.posinphrase(); this.posofphrase = e.posofphrase(); - this.quality = e.quality(); this.urlcomps = e.urlcomps(); this.urllength = e.urllength(); this.virtualAge = e.virtualAge(); @@ -133,9 +132,29 @@ public int posofphrase() { return posofphrase; } - - public int quality() { - return quality; + + private indexRWIRowEntry toRowEntry() { + return new indexRWIRowEntry( + urlHash, + urllength, // byte-length of complete URL + urlcomps, // number of path components + wordsintitle, // length of description/length (longer are better?) + hitcount, // how often appears this word in the text + wordsintext, // total number of words + phrasesintext, // total number of phrases + posintext, // position of word in all words + posinphrase, // position of word in its phrase + posofphrase, // number of the phrase where word appears + worddistance, // word distance + lastModified, // last-modified time of the document where word appears + System.currentTimeMillis(), // update time; + language, // (guessed) language of document + type, // type of document + llocal, // outlinks to same domain + lother, // outlinks to other domain + flags, // attributes to the url and to the word according the url + termFrequency + ); } public Entry toKelondroEntry() { @@ -144,8 +163,7 @@ } public String toPropertyForm() { - assert false; // should not be used - return null; + return toRowEntry().toPropertyForm(); } public String urlHash() { @@ -177,7 +195,8 @@ } public double termFrequency() { - return termFrequency; + if (this.termFrequency == 0.0) this.termFrequency = (((double) this.hitcount()) / ((double) (this.wordsintext() + this.wordsintitle() + 1))); + return this.termFrequency; } public static final void min(indexRWIVarEntry t, indexRWIEntry other) { @@ -187,7 +206,6 @@ if (t.hitcount() > (v = other.hitcount())) t.hitcount = v; if (t.llocal() > (v = other.llocal())) t.llocal = v; if (t.lother() > (v = other.lother())) t.lother = v; - if (t.quality() > (v = other.quality())) t.quality = v; if (t.virtualAge() > (v = other.virtualAge())) t.virtualAge = v; if (t.wordsintext() > (v = other.wordsintext())) t.wordsintext = v; if (t.phrasesintext() > (v = other.phrasesintext())) t.phrasesintext = v; @@ -210,7 +228,6 @@ if (t.hitcount() < (v = other.hitcount())) t.hitcount = v; if (t.llocal() < (v = other.llocal())) t.llocal = v; if (t.lother() < (v = other.lother())) t.lother = v; - if (t.quality() < (v = other.quality())) t.quality = v; if (t.virtualAge() < (v = other.virtualAge())) t.virtualAge = v; if (t.wordsintext() < (v = other.wordsintext())) t.wordsintext = v; if (t.phrasesintext() < (v = other.phrasesintext())) t.phrasesintext = v; Modified: trunk/source/de/anomic/index/indexURLEntry.java =================================================================== --- trunk/source/de/anomic/index/indexURLEntry.java 2008-02-07 20:35:24 UTC (rev 4458) +++ trunk/source/de/anomic/index/indexURLEntry.java 2008-02-07 22:16:36 UTC (rev 4459) @@ -115,7 +115,7 @@ private kelondroRow.Entry entry; private String snippet; - private indexRWIRowEntry word; // this is only used if the url is transported via remote search requests + private indexRWIEntry word; // this is only used if the url is transported via remote search requests private long ranking; // during generation of a search result this value is set public indexURLEntry( @@ -185,7 +185,7 @@ return s.toString().getBytes(); } - public indexURLEntry(kelondroRow.Entry entry, indexRWIRowEntry searchedWord, long ranking) { + public indexURLEntry(kelondroRow.Entry entry, indexRWIEntry searchedWord, long ranking) { this.entry = entry; this.snippet = null; this.word = searchedWord; @@ -391,7 +391,7 @@ return snippet; } - public indexRWIRowEntry word() { + public indexRWIEntry word() { return word; } Modified: trunk/source/de/anomic/kelondro/kelondroSplitTable.java =================================================================== --- trunk/source/de/anomic/kelondro/kelondroSplitTable.java 2008-02-07 20:35:24 UTC (rev 4458) +++ trunk/source/de/anomic/kelondro/kelondroSplitTable.java 2008-02-07 22:16:36 UTC (rev 4459) @@ -119,7 +119,7 @@ // this is a kelonodroFlex table table = new kelondroCache(new kelondroFlexTable(path, maxf, preloadTime, rowdef, 0, resetOnFail)); } else { - table = new kelondroEcoTable(f, rowdef, kelondroEcoTable.tailCacheDenyUsage, EcoFSBufferSize, 0); + table = new kelondroEcoTable(f, rowdef, kelondroEcoTable.tailCacheUsageAuto, EcoFSBufferSize, 0); } tables.put(date, table); } Modified: trunk/source/de/anomic/plasma/plasmaCrawlLURL.java =================================================================== --- trunk/source/de/anomic/plasma/plasmaCrawlLURL.java 2008-02-07 20:35:24 UTC (rev 4458) +++ trunk/source/de/anomic/plasma/plasmaCrawlLURL.java 2008-02-07 22:16:36 UTC (rev 4459) @@ -66,7 +66,7 @@ import de.anomic.data.htmlTools; import de.anomic.http.httpc; import de.anomic.http.httpc.response; -import de.anomic.index.indexRWIRowEntry; +import de.anomic.index.indexRWIEntry; import de.anomic.index.indexURLEntry; import de.anomic.kelondro.kelondroBase64Order; import de.anomic.kelondro.kelondroCache; @@ -153,7 +153,7 @@ return 0; } - public synchronized indexURLEntry load(String urlHash, indexRWIRowEntry searchedWord, long ranking) { + public synchronized indexURLEntry load(String urlHash, indexRWIEntry searchedWord, long ranking) { // generates an plasmaLURLEntry using the url hash // to speed up the access, the url-hashes are buffered // in the hash cache. Modified: trunk/source/de/anomic/plasma/plasmaCrawlZURL.java =================================================================== --- trunk/source/de/anomic/plasma/plasmaCrawlZURL.java 2008-02-07 20:35:24 UTC (rev 4458) +++ trunk/source/de/anomic/plasma/plasmaCrawlZURL.java 2008-02-07 22:16:36 UTC (rev 4459) @@ -69,7 +69,7 @@ if (f.isDirectory()) kelondroFlexTable.delete(cachePath, tablename); else f.delete(); } } - urlIndex = new kelondroEcoTable(f, rowdef, kelondroEcoTable.tailCacheUsageAuto, EcoFSBufferSize, 0); + urlIndex = new kelondroEcoTable(f, rowdef, kelondroEcoTable.tailCacheDenyUsage, EcoFSBufferSize, 0); //urlIndex = new kelondroFlexTable(cachePath, tablename, -1, rowdef, 0, true); } Modified: trunk/source/de/anomic/plasma/plasmaSearchRankingProcess.java =================================================================== --- trunk/source/de/anomic/plasma/plasmaSearchRankingProcess.java 2008-02-07 20:35:24 UTC (rev 4458) +++ trunk/source/de/anomic/plasma/plasmaSearchRankingProcess.java 2008-02-07 22:16:36 UTC (rev 4459) @@ -28,6 +28,7 @@ import java.io.File; import java.io.IOException; +import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.Map; @@ -40,6 +41,7 @@ import de.anomic.index.indexRWIEntry; import de.anomic.index.indexRWIEntryOrder; import de.anomic.index.indexRWIRowEntry; +import de.anomic.index.indexRWIVarEntry; import de.anomic.index.indexURLEntry; import de.anomic.kelondro.kelondroBinSearch; import de.anomic.kelondro.kelondroMScoreCluster; @@ -52,8 +54,8 @@ public static kelondroBinSearch[] ybrTables = null; // block-rank tables private static boolean useYBR = true; - private TreeMap<Object, indexRWIRowEntry> sortedRWIEntries; // key = ranking (Long); value = indexRWIEntry; if sortorder < 2 then key is instance of String - private HashMap<String, TreeMap<Object, indexRWIRowEntry>> doubleDomCache; // key = domhash (6 bytes); value = TreeMap like sortedRWIEntries + private TreeMap<Object, indexRWIVarEntry> sortedRWIEntries; // key = ranking (Long); value = indexRWIEntry; if sortorder < 2 then key is instance of String + private HashMap<String, TreeMap<Object, indexRWIVarEntry>> doubleDomCache; // key = domhash (6 bytes); value = TreeMap like sortedRWIEntries private HashMap<String, String> handover; // key = urlhash, value = urlstring; used for double-check of urls that had been handed over to search process private plasmaSearchQuery query; private int sortorder; @@ -72,8 +74,8 @@ // attention: if minEntries is too high, this method will not terminate within the maxTime // sortorder: 0 = hash, 1 = url, 2 = ranking this.localSearchContainerMaps = null; - this.sortedRWIEntries = new TreeMap<Object, indexRWIRowEntry>(); - this.doubleDomCache = new HashMap<String, TreeMap<Object, indexRWIRowEntry>>(); + this.sortedRWIEntries = new TreeMap<Object, indexRWIVarEntry>(); + this.doubleDomCache = new HashMap<String, TreeMap<Object, indexRWIVarEntry>>(); this.handover = new HashMap<String, String>(); this.order = null; this.query = query; @@ -132,11 +134,11 @@ this.remote_indexCount += index.size(); } - indexRWIRowEntry ientry; + indexRWIVarEntry ientry; indexURLEntry uentry; String u; loop: while (en.hasNext()) { - ientry = en.next(); + ientry = new indexRWIVarEntry(en.next()); // check constraints if (!testFlags(ientry)) continue loop; @@ -183,13 +185,13 @@ if (this.order == null) { this.order = new indexRWIEntryOrder(query.ranking); } - this.order.normalizeWith(index); + ArrayList<indexRWIVarEntry> decodedEntries = this.order.normalizeWith(index); serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(query.id(true), plasmaSearchEvent.NORMALIZING, index.size(), System.currentTimeMillis() - timer)); // normalize entries and get ranking timer = System.currentTimeMillis(); - Iterator<indexRWIRowEntry> i = index.entries(); - indexRWIRowEntry iEntry, l; + Iterator<indexRWIVarEntry> i = decodedEntries.iterator(); + indexRWIVarEntry iEntry, l; long biggestEntry = 0; //long s0 = System.currentTimeMillis(); Long r; @@ -272,8 +274,8 @@ private synchronized Object[] /*{Object, indexRWIEntry}*/ bestRWI(boolean skipDoubleDom) { // returns from the current RWI list the best entry and removed this entry from the list Object bestEntry; - TreeMap<Object, indexRWIRowEntry> m; - indexRWIRowEntry rwi; + TreeMap<Object, indexRWIVarEntry> m; + indexRWIVarEntry rwi; while (sortedRWIEntries.size() > 0) { bestEntry = sortedRWIEntries.firstKey(); rwi = sortedRWIEntries.remove(bestEntry); @@ -283,7 +285,7 @@ m = this.doubleDomCache.get(domhash); if (m == null) { // first appearance of dom - m = new TreeMap<Object, indexRWIRowEntry>(); + m = new TreeMap<Object, indexRWIVarEntry>(); this.doubleDomCache.put(domhash, m); return new Object[]{bestEntry, rwi}; } @@ -292,10 +294,10 @@ } // no more entries in sorted RWI entries. Now take Elements from the doubleDomCache // find best entry from all caches - Iterator<TreeMap<Object, indexRWIRowEntry>> i = this.doubleDomCache.values().iterator(); + Iterator<TreeMap<Object, indexRWIVarEntry>> i = this.doubleDomCache.values().iterator(); bestEntry = null; Object o; - indexRWIRowEntry bestrwi = null; + indexRWIVarEntry bestrwi = null; while (i.hasNext()) { m = i.next(); if (m.size() == 0) continue; @@ -331,7 +333,7 @@ while ((sortedRWIEntries.size() > 0) || (size() > 0)) { Object[] obrwi = bestRWI(skipDoubleDom); Object bestEntry = obrwi[0]; - indexRWIRowEntry ientry = (indexRWIRowEntry) obrwi[1]; + indexRWIVarEntry ientry = (indexRWIVarEntry) obrwi[1]; long ranking = (bestEntry instanceof Long) ? ((Long) bestEntry).longValue() : 0; indexURLEntry u = wordIndex.loadedURL.load(ientry.urlHash(), ientry, ranking); if (u != null) { @@ -347,7 +349,7 @@ public synchronized int size() { //assert sortedRWIEntries.size() == urlhashes.size() : "sortedRWIEntries.size() = " + sortedRWIEntries.size() + ", urlhashes.size() = " + urlhashes.size(); int c = sortedRWIEntries.size(); - Iterator<TreeMap<Object, indexRWIRowEntry>> i = this.doubleDomCache.values().iterator(); + Iterator<TreeMap<Object, indexRWIVarEntry>> i = this.doubleDomCache.values().iterator(); while (i.hasNext()) c += i.next().size(); return c; } Modified: trunk/source/de/anomic/plasma/plasmaSnippetCache.java =================================================================== --- trunk/source/de/anomic/plasma/plasmaSnippetCache.java 2008-02-07 20:35:24 UTC (rev 4458) +++ trunk/source/de/anomic/plasma/plasmaSnippetCache.java 2008-02-07 22:16:36 UTC (rev 4459) @@ -414,7 +414,7 @@ resInfo = entry.getDocumentInfo(); // read resource body (if it is there) - byte []resourceArray = entry.cacheArray(); + byte[] resourceArray = entry.cacheArray(); if (resourceArray != null) { resContent = new ByteArrayInputStream(resourceArray); resContentLength = resourceArray.length; Modified: trunk/source/de/anomic/plasma/plasmaSwitchboard.java =================================================================== --- trunk/source/de/anomic/plasma/plasmaSwitchboard.java 2008-02-07 20:35:24 UTC (rev 4458) +++ trunk/source/de/anomic/plasma/plasmaSwitchboard.java 2008-02-07 22:16:36 UTC (rev 4459) @@ -906,7 +906,7 @@ } catch (MalformedURLException e) { } } else { - File networkUnitDefinitionFile = new File(rootPath, networkUnitDefinition); + File networkUnitDefinitionFile = (networkUnitDefinition.startsWith("/")) ? new File(networkUnitDefinition) : new File(rootPath, networkUnitDefinition); if (networkUnitDefinitionFile.exists()) { initProps = serverFileUtils.loadHashMap(networkUnitDefinitionFile); this.setConfig(initProps); @@ -2348,14 +2348,14 @@ wordStat.posInPhrase, wordStat.numOfPhrase, 0, - newEntry.size(), docDate.getTime(), System.currentTimeMillis(), language, doctype, ioLinks[0].intValue(), ioLinks[1].intValue(), - condenser.RESULT_FLAGS + condenser.RESULT_FLAGS, + 0.0 ); indexContainer wordIdxContainer = plasmaWordIndex.emptyContainer(wordHash, 1); wordIdxContainer.add(wordIdxEntry); @@ -2573,10 +2573,10 @@ if (authorization.length() > 256) return 0; // authorization by encoded password, only for localhost access - if ((((String) header.get("CLIENTIP", "")).equals("localhost")) && (adminAccountBase64MD5.equals(authorization))) return 3; // soft-authenticated for localhost + if ((((String) header.get(httpHeader.CONNECTION_PROP_CLIENTIP, "")).equals("localhost")) && (adminAccountBase64MD5.equals(authorization))) return 3; // soft-authenticated for localhost // authorization by hit in userDB - if (userDB.hasAdminRight((String) header.get(httpHeader.AUTHORIZATION, "xxxxxx"), ((String) header.get("CLIENTIP", "")), header.getHeaderCookies())) return 4; //return, because 4=max + if (userDB.hasAdminRight((String) header.get(httpHeader.AUTHORIZATION, "xxxxxx"), ((String) header.get(httpHeader.CONNECTION_PROP_CLIENTIP, "")), header.getHeaderCookies())) return 4; //return, because 4=max // authorization with admin keyword in configuration return httpd.staticAdminAuthenticated(authorization, this); Modified: trunk/source/de/anomic/plasma/plasmaWordIndex.java =================================================================== --- trunk/source/de/anomic/plasma/plasmaWordIndex.java 2008-02-07 20:35:24 UTC (rev 4458) +++ trunk/source/de/anomic/plasma/plasmaWordIndex.java 2008-02-07 22:16:36 UTC (rev 4459) @@ -314,13 +314,13 @@ wprop.posInPhrase, wprop.numOfPhrase, 0, - size, urlModified.getTime(), System.currentTimeMillis(), language, doctype, outlinksSame, outlinksOther, - wprop.flags); + wprop.flags, + 0.0); addEntry(plasmaCondenser.word2hash(word), ientry, System.currentTimeMillis(), false); wordCount++; } _______________________________________________ YaCy-svn mailing list YaCy-svn@lists.berlios.de https://lists.berlios.de/mailman/listinfo/yacy-svn