http://www.mediawiki.org/wiki/Special:Code/MediaWiki/65461

Revision: 65461
Author:   daniel
Date:     2010-04-23 11:32:56 +0000 (Fri, 23 Apr 2010)

Log Message:
-----------
idManager options / dependency update

Modified Paths:
--------------
    trunk/WikiWord/WikiWordBuilder/.classpath
    
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/builder/NameMaps.java
    
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/builder/util/IdManagerBenchmark.java
    
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/processor/PageTitleFilter.java
    
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/processor/TitleSetFilter.java
    
trunk/WikiWord/WikiWordBuilder/src/test/java/de/brightbyte/wikiword/store/builder/NameStoreBenchmark.java
    trunk/WikiWord/WikiWordBuilder/tweaks.properties.sample

Added Paths:
-----------
    trunk/WikiWord/WikiWordBuilder/contrib/
    trunk/WikiWord/WikiWordBuilder/contrib/README
    trunk/WikiWord/WikiWordBuilder/contrib/install-all
    trunk/WikiWord/WikiWordBuilder/contrib/patricia-trie-0.1.pom

Removed Paths:
-------------
    trunk/WikiWord/WikiWordBuilder/contrib/jzlib-1.0.7-LICENSE.txt
    trunk/WikiWord/WikiWordBuilder/contrib/jzlib-1.0.7.jar
    trunk/WikiWord/WikiWordBuilder/contrib/jzlib-1.0.7.url
    trunk/WikiWord/WikiWordBuilder/lib/

Modified: trunk/WikiWord/WikiWordBuilder/.classpath
===================================================================
--- trunk/WikiWord/WikiWordBuilder/.classpath   2010-04-23 01:16:46 UTC (rev 
65460)
+++ trunk/WikiWord/WikiWordBuilder/.classpath   2010-04-23 11:32:56 UTC (rev 
65461)
@@ -9,12 +9,7 @@
        <classpathentry kind="con" 
path="org.eclipse.jdt.junit.JUNIT_CONTAINER/3.8.1"/>
        <classpathentry combineaccessrules="false" kind="src" 
path="/BrightByteDB"/>
        <classpathentry kind="var" 
path="M2_REPO/mysql/mysql-connector-java/3.1.11/mysql-connector-java-3.1.11.jar"/>
-       <classpathentry kind="lib" path="lib/patricia-trie-0.1.jar" 
sourcepath="/home/daniel/src/patricia-trie-0.1/patricia-trie-0.1.jar">
-               <attributes>
-                       <attribute name="javadoc_location" 
value="file:/home/daniel/src/patricia-trie-0.1/api/"/>
-               </attributes>
-       </classpathentry>
-       <classpathentry kind="var" 
path="M2_REPO/org/javolution/javolution/5.2.6/javolution-5.2.6.jar"/>
-       <classpathentry kind="lib" path="lib/jzlib-1.0.7.jar" 
sourcepath="/home/daniel/src/jzlib-1.0.7"/>
+       <classpathentry kind="var" 
path="M2_REPO/trove/trove/3.0.0a3/trove-3.0.0a3.jar"/>
+       <classpathentry kind="var" 
path="M2_REPO/kapsi/patricia-trie/0.1/patricia-trie-0.1.jar"/>
        <classpathentry kind="output" path="bin"/>
 </classpath>


Property changes on: trunk/WikiWord/WikiWordBuilder/contrib
___________________________________________________________________
Added: svn:mergeinfo
   + 

Added: trunk/WikiWord/WikiWordBuilder/contrib/README
===================================================================
--- trunk/WikiWord/WikiWordBuilder/contrib/README                               
(rev 0)
+++ trunk/WikiWord/WikiWordBuilder/contrib/README       2010-04-23 11:32:56 UTC 
(rev 65461)
@@ -0,0 +1,7 @@
+This directory contains libraries that are not readily available from 
+a well known Maven repository. For each jar file, there should be
+a corresponding pom file as well as a license text.
+
+Before using Apache Maven, please install these libraries into your local
+maven repository. You can use the script "install-all" to do this. Maven
+will automatically download any other libraries required for building.
\ No newline at end of file

Added: trunk/WikiWord/WikiWordBuilder/contrib/install-all
===================================================================
--- trunk/WikiWord/WikiWordBuilder/contrib/install-all                          
(rev 0)
+++ trunk/WikiWord/WikiWordBuilder/contrib/install-all  2010-04-23 11:32:56 UTC 
(rev 65461)
@@ -0,0 +1,6 @@
+#!/bin/bash
+
+for pom in *.pom; do
+       jar="${pom%%.pom}.jar"
+       mvn install:install-file -Dfile="$jar" -DpomFile="$pom"
+done


Property changes on: trunk/WikiWord/WikiWordBuilder/contrib/install-all
___________________________________________________________________
Added: svn:executable
   + *

Deleted: trunk/WikiWord/WikiWordBuilder/contrib/jzlib-1.0.7-LICENSE.txt
===================================================================
--- trunk/WikiWord/WikiWordBuilder/lib/jzlib-1.0.7-LICENSE.txt  2010-04-04 
21:32:42 UTC (rev 64604)
+++ trunk/WikiWord/WikiWordBuilder/contrib/jzlib-1.0.7-LICENSE.txt      
2010-04-23 11:32:56 UTC (rev 65461)
@@ -1,29 +0,0 @@
-JZlib 0.0.* were released under the GNU LGPL license.  Later, we have switched 
-over to a BSD-style license. 
-
-------------------------------------------------------------------------------
-Copyright (c) 2000,2001,2002,2003 ymnk, JCraft,Inc. All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-  1. Redistributions of source code must retain the above copyright notice,
-     this list of conditions and the following disclaimer.
-
-  2. Redistributions in binary form must reproduce the above copyright 
-     notice, this list of conditions and the following disclaimer in 
-     the documentation and/or other materials provided with the distribution.
-
-  3. The names of the authors may not be used to endorse or promote products
-     derived from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES,
-INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
-FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JCRAFT,
-INC. OR ANY CONTRIBUTORS TO THIS SOFTWARE BE LIABLE FOR ANY DIRECT, INDIRECT,
-INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
-OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
-EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

Deleted: trunk/WikiWord/WikiWordBuilder/contrib/jzlib-1.0.7.jar
===================================================================
(Binary files differ)

Deleted: trunk/WikiWord/WikiWordBuilder/contrib/jzlib-1.0.7.url
===================================================================
--- trunk/WikiWord/WikiWordBuilder/lib/jzlib-1.0.7.url  2010-04-04 21:32:42 UTC 
(rev 64604)
+++ trunk/WikiWord/WikiWordBuilder/contrib/jzlib-1.0.7.url      2010-04-23 
11:32:56 UTC (rev 65461)
@@ -1 +0,0 @@
-http://www.jcraft.com/jzlib/

Added: trunk/WikiWord/WikiWordBuilder/contrib/patricia-trie-0.1.pom
===================================================================
--- trunk/WikiWord/WikiWordBuilder/contrib/patricia-trie-0.1.pom                
                (rev 0)
+++ trunk/WikiWord/WikiWordBuilder/contrib/patricia-trie-0.1.pom        
2010-04-23 11:32:56 UTC (rev 65461)
@@ -0,0 +1,9 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0"; 
+       xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"; 
+       xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/maven-v4_0_0.xsd";>
+                                                                               
                                                                                
+       <modelVersion>4.0.0</modelVersion>
+       <groupId>kapsi</groupId>
+       <artifactId>patricia-trie</artifactId>
+       <version>0.1</version>
+</project>

Modified: 
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/builder/NameMaps.java
===================================================================
--- 
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/builder/NameMaps.java
   2010-04-23 01:16:46 UTC (rev 65460)
+++ 
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/builder/NameMaps.java
   2010-04-23 11:32:56 UTC (rev 65461)
@@ -20,6 +20,7 @@
 import de.brightbyte.data.Functor;
 import de.brightbyte.data.KeyDigestingValueStore;
 import de.brightbyte.data.KeyValueStore;
+import de.brightbyte.data.LongIntLookup;
 import de.brightbyte.data.MapLookup;
 import de.brightbyte.data.XorFold32;
 import de.brightbyte.data.XorFold64;
@@ -42,8 +43,8 @@
                        return new HashMap<String, V>();
                }*/
 
-               public static <V>KeyValueStore<String, V> newStore(String 
storeParams, String lang) {
-                       KeyValueStore<String, V> store = null;
+               public static KeyValueStore<String, Integer> newStore(String 
storeParams, String lang) {
+                       KeyValueStore<String, Integer> store = null;
                        
                        String[] tt = storeParams.split("[,;|+/ &]+");
                        
@@ -51,7 +52,7 @@
                        params.addAll(Arrays.asList(tt));
                        
                        if (params.contains("none") || params.contains("null")) 
store = null;
-                       else if (params.contains("string")) store = new 
MapLookup<String, V>(new HashMap<String, V>());
+                       else if (params.contains("string")) store = new 
MapLookup<String, Integer>(new HashMap<String, Integer>());
                        else if (params.contains("utf8") || 
params.contains("utf16")) {
                                //initial digest turns string into UTF-8 bytes
                                Functor<byte[], String> digest;
@@ -72,16 +73,19 @@
                                        throw new RuntimeException(e);
                                }
                                
-                               if (params.contains("fold64") || 
params.contains("fold32")) { //fold into Long
-                                       Functor<? extends Number, byte[]> fold;
+                               if (params.contains("fold64")) { //fold into 
Long
+                                       Functor<Long, byte[]> fold;
+                                       fold = XorFold64.instance;
                                        
-                                       if (params.contains("fold32")) fold = 
XorFold32.instance;
-                                       else fold = XorFold64.instance;
-                                       
-                                       Functor<Number, String> convert = new 
Functor.Composite<Number, byte[], String>(digest, fold);
+                                       Functor<Long, String> convert = new 
Functor.Composite<Long, byte[], String>(digest, fold);
 
-                                       MapLookup<Number, V> numStore = new 
MapLookup<Number, V>(new HashMap<Number, V>());
-                                       store = new 
KeyDigestingValueStore<String, Number, V>(numStore, convert);
+                                       if (params.contains("primitive")) {
+                                               LongIntLookup<Long> numStore = 
new LongIntLookup<Long>();
+                                               store = new 
KeyDigestingValueStore<String, Long, Integer>(numStore, convert);
+                                       } else {
+                                               MapLookup<Long, Integer> 
numStore = new MapLookup<Long, Integer>(new HashMap<Long, Integer>());
+                                               store = new 
KeyDigestingValueStore<String, Long, Integer>(numStore, convert);
+                                       }
                                } else { //keep bytes, wrap in ByteArray
                                                if (params.contains("wrap8")) 
digest = new Functor.Composite<byte[], byte[], String>(digest, new XorWrap(8));
                                                else if 
(params.contains("wrap6")) digest = new Functor.Composite<byte[], byte[], 
String>(digest, new XorWrap(6));
@@ -92,8 +96,8 @@
                                                Functor<ByteString, String> 
convert = new Functor.Composite<ByteString, byte[], String>(digest, 
ByteString.wrap);
                        
                                                //set up the store
-                                               MapLookup<ByteString, V> 
byteStore = new MapLookup<ByteString, V>(new HashMap<ByteString, V>());
-                                               store = new 
KeyDigestingValueStore<String, ByteString, V>(byteStore, convert);
+                                               MapLookup<ByteString, Integer> 
byteStore = new MapLookup<ByteString, Integer>(new HashMap<ByteString, 
Integer>());
+                                               store = new 
KeyDigestingValueStore<String, ByteString, Integer>(byteStore, convert);
                                }
                        }  else {
                                throw new IllegalArgumentException("bad store 
spec: "+storeParams+"; expected 'none' or 'string' or 'utf8' or 'utf16' as part 
of the type spec");

Modified: 
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/builder/util/IdManagerBenchmark.java
===================================================================
--- 
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/builder/util/IdManagerBenchmark.java
    2010-04-23 01:16:46 UTC (rev 65460)
+++ 
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/builder/util/IdManagerBenchmark.java
    2010-04-23 11:32:56 UTC (rev 65461)
@@ -10,10 +10,7 @@
 import java.util.HashMap;
 import java.util.Map;
 
-import javolution.util.FastMap;
-
 import org.ardverk.collection.PatriciaTrie;
-import org.ardverk.collection.StringKeyAnalyzer;
 
 import de.brightbyte.audit.DebugUtil;
 import de.brightbyte.data.Codec;
@@ -68,8 +65,8 @@
                        Map<String, Integer> map;
                        
                        if (mode.equals("hash")) map = new HashMap<String, 
Integer>();
-                       else if (mode.equals("fast")) map = new FastMap<String, 
Integer>();
-                       else if (mode.equals("trie")) map = new 
PatriciaTrie<String, Integer>(StringKeyAnalyzer.INSTANCE);
+                       //else if (mode.equals("fast")) map = new 
FastMap<String, Integer>();
+                       //else if (mode.equals("trie")) map = new 
PatriciaTrie<String, Integer>(StringKeyAnalyzer.INSTANCE);
                        else if (mode.equals("rtrie")) map = new 
PatriciaTrie<String, Integer>(ReverseStringKeyAnalyzer.INSTANCE);
                        else if (mode.equals("terse")) map = new 
TerseIdMap<String>(String.class, NaturalComparator.<String>instance());
                        else throw new IllegalArgumentException("unknown mode: 
"+mode);
@@ -82,8 +79,8 @@
                        CharsetCodec converter = new CharsetCodec(enc);
                        
                        if (mode.equals("hash")) map = new HashMap<byte[], 
Integer>();
-                       else if (mode.equals("fast")) map = new FastMap<byte[], 
Integer>();
-                       else if (mode.equals("trie")) map = new 
PatriciaTrie<byte[], Integer>(ByteArrayKeyAnalyzer.INSTANCE);
+                       //else if (mode.equals("fast")) map = new 
FastMap<byte[], Integer>();
+                       //else if (mode.equals("trie")) map = new 
PatriciaTrie<byte[], Integer>(ByteArrayKeyAnalyzer.INSTANCE);
                        else if (mode.equals("rtrie")) throw new 
IllegalArgumentException("Reverte Trie is not yet supported for byte arrays");
                        else if (mode.equals("terse")) map = new 
TerseIdMap<byte[]>(byte[].class, ArrayComparator.BYTES);
                        else throw new IllegalArgumentException("unknown mode: 
"+mode);

Modified: 
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/processor/PageTitleFilter.java
===================================================================
--- 
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/processor/PageTitleFilter.java
  2010-04-23 01:16:46 UTC (rev 65460)
+++ 
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/processor/PageTitleFilter.java
  2010-04-23 11:32:56 UTC (rev 65461)
@@ -4,10 +4,10 @@
 import de.brightbyte.wikiword.analyzer.WikiPage;
 
 public class PageTitleFilter implements WikiPageFilter {
-       protected Filter<CharSequence> filter;
+       protected Filter<String> filter;
        private String name;
        
-       public PageTitleFilter(String name, Filter<CharSequence> filter) {
+       public PageTitleFilter(String name, Filter<String> filter) {
                if (filter==null) throw new NullPointerException();
                this.filter = filter;
                this.name = name;
@@ -15,7 +15,7 @@
 
        public boolean matches(WikiPage page) {
                CharSequence t = page.getResourceName();
-               return filter.matches(t);
+               return filter.matches(t.toString());
        }
 
        public String getName() {

Modified: 
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/processor/TitleSetFilter.java
===================================================================
--- 
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/processor/TitleSetFilter.java
   2010-04-23 01:16:46 UTC (rev 65460)
+++ 
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/processor/TitleSetFilter.java
   2010-04-23 11:32:56 UTC (rev 65461)
@@ -4,15 +4,12 @@
 import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStream;
-import java.util.Map;
-import java.util.Set;
 
 import de.brightbyte.data.KeyValueLookup;
 import de.brightbyte.data.KeyValueStore;
 import de.brightbyte.data.Lookup;
 import de.brightbyte.data.cursor.DataCursor;
 import de.brightbyte.data.filter.LookupFilter;
-import de.brightbyte.data.filter.StaticSetFilter;
 import de.brightbyte.io.LineCursor;
 import de.brightbyte.util.PersistenceException;
 import de.brightbyte.wikiword.builder.NameMaps;
@@ -21,7 +18,7 @@
        protected final static Integer ONE = new Integer(1);
        
        protected static Lookup<String, Integer> slurpCursor(DataCursor<String> 
titleCursor) throws PersistenceException {
-               KeyValueStore<String, Integer> store = 
NameMaps.<Integer>newStore("string", "en"); //XXX: language...
+               KeyValueStore<String, Integer> store = 
NameMaps.newStore("string", "en"); //XXX: language...
                
                String s;
                while ((s = titleCursor.next()) != null) {
@@ -50,7 +47,7 @@
 
        @SuppressWarnings("unchecked")
        public TitleSetFilter(String name, Lookup<String, Integer> titles) {
-               super(name, new LookupFilter<CharSequence, Integer>(titles, 
ONE));
+               super(name, new LookupFilter<String, Integer>(titles, ONE));
        }
 
        public TitleSetFilter(File titleFile, String enc) throws 
PersistenceException {

Modified: 
trunk/WikiWord/WikiWordBuilder/src/test/java/de/brightbyte/wikiword/store/builder/NameStoreBenchmark.java
===================================================================
--- 
trunk/WikiWord/WikiWordBuilder/src/test/java/de/brightbyte/wikiword/store/builder/NameStoreBenchmark.java
   2010-04-23 01:16:46 UTC (rev 65460)
+++ 
trunk/WikiWord/WikiWordBuilder/src/test/java/de/brightbyte/wikiword/store/builder/NameStoreBenchmark.java
   2010-04-23 11:32:56 UTC (rev 65461)
@@ -1,87 +1,26 @@
 package de.brightbyte.wikiword.store.builder;
 
 import java.io.BufferedReader;
-import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.security.NoSuchAlgorithmException;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Set;
 
-import de.brightbyte.data.BlockDigest;
-import de.brightbyte.data.ByteString;
-import de.brightbyte.data.Codec;
-import de.brightbyte.data.Functor;
-import de.brightbyte.data.KeyDigestingValueStore;
 import de.brightbyte.data.KeyValueStore;
-import de.brightbyte.data.MapLookup;
-import de.brightbyte.data.XorFold32;
-import de.brightbyte.data.XorFold64;
-import de.brightbyte.data.XorWrap;
-import de.brightbyte.io.HuffmanDataCodec;
-import de.brightbyte.text.CharsetCodec;
 import de.brightbyte.util.PersistenceException;
+import de.brightbyte.wikiword.builder.NameMaps;
 
 public class NameStoreBenchmark {
        public static void main(String[] args) throws IOException, 
PersistenceException, NoSuchAlgorithmException, InterruptedException {
-               String type = args[0];
+               String params = args[0];
                int limit = Integer.parseInt(args[1]);
                
-               KeyValueStore<String, Integer> store = null;
+               KeyValueStore<String, Integer> store = 
NameMaps.newStore(params, "en");
                
-               String[] tt = type.split("[,;|+/]");
-               
-               Set<String> params = new HashSet<String>();
-               params.addAll(Arrays.asList(tt));
-               
-               if (params.contains("none") || params.contains("null")) store = 
null;
-               else if (params.contains("string")) store = new 
MapLookup<String, Integer>(new HashMap<String, Integer>());
-               else if (params.contains("utf8") || params.contains("utf16")) {
-                       //initial digest turns string into UTF-8 bytes
-                       Functor<byte[], String> digest;
-                       
-                       if (params.contains("utf8")) digest = new 
Codec.Encoder<String, byte[]>(new CharsetCodec("UTF-8"));
-                       else digest = new Codec.Encoder<String, byte[]>(new 
CharsetCodec("UTF-16"));
-                       
-                       //apply md5 digest or huffman compression
-                       if (params.contains("md5")) digest = new 
Functor.Composite<byte[], byte[], String>(digest, new BlockDigest("MD5"));
-                       else if (params.contains("sha1")) digest = new 
Functor.Composite<byte[], byte[], String>(digest, new BlockDigest("SHA-1"));
-                       else if (params.contains("huff") || 
params.contains("huffman")) digest = new Functor.Composite<byte[], byte[], 
String>(digest, getHuffmanEncoder(args[3]));
-                       
-                       if (params.contains("fold64") || 
params.contains("fold32")) { //fold into Long
-                               Functor<? extends Number, byte[]> fold;
-                               
-                               if (params.contains("fold32")) fold = 
XorFold32.instance;
-                               else fold = XorFold64.instance;
-                               
-                               Functor<Number, String> convert = new 
Functor.Composite<Number, byte[], String>(digest, fold);
-
-                               MapLookup<Number, Integer> numStore = new 
MapLookup<Number, Integer>(new HashMap<Number, Integer>());
-                               store = new KeyDigestingValueStore<String, 
Number, Integer>(numStore, convert);
-                       } else { //keep bytes, wrap in ByteArray
-                                       if (params.contains("wrap8")) digest = 
new Functor.Composite<byte[], byte[], String>(digest, new XorWrap(8));
-                                       else if (params.contains("wrap6")) 
digest = new Functor.Composite<byte[], byte[], String>(digest, new XorWrap(6));
-                                       else if (params.contains("wrap4")) 
digest = new Functor.Composite<byte[], byte[], String>(digest, new XorWrap(4));
-                                       else if (params.contains("wrap4")) 
digest = new Functor.Composite<byte[], byte[], String>(digest, new XorWrap(4));
-                                       
-                                       //create converter that includes 
wrapping the byte array in a ByteString
-                                       Functor<ByteString, String> convert = 
new Functor.Composite<ByteString, byte[], String>(digest, ByteString.wrap);
-               
-                                       //set up the store
-                                       MapLookup<ByteString, Integer> 
byteStore = new MapLookup<ByteString, Integer>(new HashMap<ByteString, 
Integer>());
-                                       store = new 
KeyDigestingValueStore<String, ByteString, Integer>(byteStore, convert);
-                       }
-               }  else {
-                       throw new IllegalArgumentException("bad store type: 
"+type+"; expected 'none' or 'string' or 'utf8' as part of the type spec");
-               }
-               
                BufferedReader in = new BufferedReader(new 
InputStreamReader(new FileInputStream(args[2]), "UTF-8"));
                
                Runtime.getRuntime().gc();
-               Thread.currentThread().sleep(1000);
+               Thread.sleep(1000);
                long baseline = Runtime.getRuntime().totalMemory() - 
Runtime.getRuntime().freeMemory();
 
                long start = System.nanoTime();
@@ -101,17 +40,11 @@
                System.out.format("Processed %d entries in %01.3f sec\n", c, 
t/1000000000.0);
                
                Runtime.getRuntime().gc();
-               Thread.currentThread().sleep(1000);
+               Thread.sleep(1000);
                long m = Runtime.getRuntime().totalMemory() - 
Runtime.getRuntime().freeMemory();
                
                System.out.format("Memoray used: %01.2f MB\n", (m - 
baseline)/(1024.0*1024.0));
                
                if (store!=null) store.close();
        }
-
-       private static Functor<byte[], byte[]> getHuffmanEncoder(String 
dictFile) throws IOException {
-               HuffmanDataCodec codec = new HuffmanDataCodec();
-               codec.buildDictionary(new File(dictFile), 0);
-               return new Codec.Encoder<byte[], byte[]>(codec);
-       }
 }

Modified: trunk/WikiWord/WikiWordBuilder/tweaks.properties.sample
===================================================================
--- trunk/WikiWord/WikiWordBuilder/tweaks.properties.sample     2010-04-23 
01:16:46 UTC (rev 65460)
+++ trunk/WikiWord/WikiWordBuilder/tweaks.properties.sample     2010-04-23 
11:32:56 UTC (rev 65461)
@@ -86,9 +86,10 @@
 #idStoreParameters:
 # basic: string (default), utf8, or utf16
 # for utf8 and utf16: md5, sha1, or huffman (or nothing)
-# for utf8 and utf16: wrap8, fold64
-# "utf16+md5+fold64" uses about half as much memory as "string"
-#dbstore.idManager.idStoreParameters="utf16+md5+fold64"
+# for utf8 and utf16: wrap8 (wrap to 8 bytes), fold64 (wrap to single long 
value)
+# for fold64: primitive (use gnu trove primitive hash)
+# "utf16+md5+fold64+primitive" uses about one third of the memory used by 
"string"
+#dbstore.idManager.idStoreParameters="utf16+md5+fold64+primitive"
 
 ### CycleFinder #####################################
 dbstore.CycleFinder.levelWarningThreshold=32



_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

Reply via email to