http://www.mediawiki.org/wiki/Special:Code/MediaWiki/65461
Revision: 65461
Author: daniel
Date: 2010-04-23 11:32:56 +0000 (Fri, 23 Apr 2010)
Log Message:
-----------
idManager options / dependency update
Modified Paths:
--------------
trunk/WikiWord/WikiWordBuilder/.classpath
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/builder/NameMaps.java
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/builder/util/IdManagerBenchmark.java
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/processor/PageTitleFilter.java
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/processor/TitleSetFilter.java
trunk/WikiWord/WikiWordBuilder/src/test/java/de/brightbyte/wikiword/store/builder/NameStoreBenchmark.java
trunk/WikiWord/WikiWordBuilder/tweaks.properties.sample
Added Paths:
-----------
trunk/WikiWord/WikiWordBuilder/contrib/
trunk/WikiWord/WikiWordBuilder/contrib/README
trunk/WikiWord/WikiWordBuilder/contrib/install-all
trunk/WikiWord/WikiWordBuilder/contrib/patricia-trie-0.1.pom
Removed Paths:
-------------
trunk/WikiWord/WikiWordBuilder/contrib/jzlib-1.0.7-LICENSE.txt
trunk/WikiWord/WikiWordBuilder/contrib/jzlib-1.0.7.jar
trunk/WikiWord/WikiWordBuilder/contrib/jzlib-1.0.7.url
trunk/WikiWord/WikiWordBuilder/lib/
Modified: trunk/WikiWord/WikiWordBuilder/.classpath
===================================================================
--- trunk/WikiWord/WikiWordBuilder/.classpath 2010-04-23 01:16:46 UTC (rev
65460)
+++ trunk/WikiWord/WikiWordBuilder/.classpath 2010-04-23 11:32:56 UTC (rev
65461)
@@ -9,12 +9,7 @@
<classpathentry kind="con"
path="org.eclipse.jdt.junit.JUNIT_CONTAINER/3.8.1"/>
<classpathentry combineaccessrules="false" kind="src"
path="/BrightByteDB"/>
<classpathentry kind="var"
path="M2_REPO/mysql/mysql-connector-java/3.1.11/mysql-connector-java-3.1.11.jar"/>
- <classpathentry kind="lib" path="lib/patricia-trie-0.1.jar"
sourcepath="/home/daniel/src/patricia-trie-0.1/patricia-trie-0.1.jar">
- <attributes>
- <attribute name="javadoc_location"
value="file:/home/daniel/src/patricia-trie-0.1/api/"/>
- </attributes>
- </classpathentry>
- <classpathentry kind="var"
path="M2_REPO/org/javolution/javolution/5.2.6/javolution-5.2.6.jar"/>
- <classpathentry kind="lib" path="lib/jzlib-1.0.7.jar"
sourcepath="/home/daniel/src/jzlib-1.0.7"/>
+ <classpathentry kind="var"
path="M2_REPO/trove/trove/3.0.0a3/trove-3.0.0a3.jar"/>
+ <classpathentry kind="var"
path="M2_REPO/kapsi/patricia-trie/0.1/patricia-trie-0.1.jar"/>
<classpathentry kind="output" path="bin"/>
</classpath>
Property changes on: trunk/WikiWord/WikiWordBuilder/contrib
___________________________________________________________________
Added: svn:mergeinfo
+
Added: trunk/WikiWord/WikiWordBuilder/contrib/README
===================================================================
--- trunk/WikiWord/WikiWordBuilder/contrib/README
(rev 0)
+++ trunk/WikiWord/WikiWordBuilder/contrib/README 2010-04-23 11:32:56 UTC
(rev 65461)
@@ -0,0 +1,7 @@
+This directory contains libraries that are not readily available from
+a well known Maven repository. For each jar file, there should be
+a corresponding pom file as well as a license text.
+
+Before using Apache Maven, please install these libraries into your local
+maven repository. You can use the script "install-all" to do this. Maven
+will automatically download any other libraries required for building.
\ No newline at end of file
Added: trunk/WikiWord/WikiWordBuilder/contrib/install-all
===================================================================
--- trunk/WikiWord/WikiWordBuilder/contrib/install-all
(rev 0)
+++ trunk/WikiWord/WikiWordBuilder/contrib/install-all 2010-04-23 11:32:56 UTC
(rev 65461)
@@ -0,0 +1,6 @@
+#!/bin/bash
+
+for pom in *.pom; do
+ jar="${pom%%.pom}.jar"
+ mvn install:install-file -Dfile="$jar" -DpomFile="$pom"
+done
Property changes on: trunk/WikiWord/WikiWordBuilder/contrib/install-all
___________________________________________________________________
Added: svn:executable
+ *
Deleted: trunk/WikiWord/WikiWordBuilder/contrib/jzlib-1.0.7-LICENSE.txt
===================================================================
--- trunk/WikiWord/WikiWordBuilder/lib/jzlib-1.0.7-LICENSE.txt 2010-04-04
21:32:42 UTC (rev 64604)
+++ trunk/WikiWord/WikiWordBuilder/contrib/jzlib-1.0.7-LICENSE.txt
2010-04-23 11:32:56 UTC (rev 65461)
@@ -1,29 +0,0 @@
-JZlib 0.0.* were released under the GNU LGPL license. Later, we have switched
-over to a BSD-style license.
-
-------------------------------------------------------------------------------
-Copyright (c) 2000,2001,2002,2003 ymnk, JCraft,Inc. All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in
- the documentation and/or other materials provided with the distribution.
-
- 3. The names of the authors may not be used to endorse or promote products
- derived from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES,
-INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
-FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JCRAFT,
-INC. OR ANY CONTRIBUTORS TO THIS SOFTWARE BE LIABLE FOR ANY DIRECT, INDIRECT,
-INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
-OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
-EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Deleted: trunk/WikiWord/WikiWordBuilder/contrib/jzlib-1.0.7.jar
===================================================================
(Binary files differ)
Deleted: trunk/WikiWord/WikiWordBuilder/contrib/jzlib-1.0.7.url
===================================================================
--- trunk/WikiWord/WikiWordBuilder/lib/jzlib-1.0.7.url 2010-04-04 21:32:42 UTC
(rev 64604)
+++ trunk/WikiWord/WikiWordBuilder/contrib/jzlib-1.0.7.url 2010-04-23
11:32:56 UTC (rev 65461)
@@ -1 +0,0 @@
-http://www.jcraft.com/jzlib/
Added: trunk/WikiWord/WikiWordBuilder/contrib/patricia-trie-0.1.pom
===================================================================
--- trunk/WikiWord/WikiWordBuilder/contrib/patricia-trie-0.1.pom
(rev 0)
+++ trunk/WikiWord/WikiWordBuilder/contrib/patricia-trie-0.1.pom
2010-04-23 11:32:56 UTC (rev 65461)
@@ -0,0 +1,9 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/maven-v4_0_0.xsd">
+
+ <modelVersion>4.0.0</modelVersion>
+ <groupId>kapsi</groupId>
+ <artifactId>patricia-trie</artifactId>
+ <version>0.1</version>
+</project>
Modified:
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/builder/NameMaps.java
===================================================================
---
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/builder/NameMaps.java
2010-04-23 01:16:46 UTC (rev 65460)
+++
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/builder/NameMaps.java
2010-04-23 11:32:56 UTC (rev 65461)
@@ -20,6 +20,7 @@
import de.brightbyte.data.Functor;
import de.brightbyte.data.KeyDigestingValueStore;
import de.brightbyte.data.KeyValueStore;
+import de.brightbyte.data.LongIntLookup;
import de.brightbyte.data.MapLookup;
import de.brightbyte.data.XorFold32;
import de.brightbyte.data.XorFold64;
@@ -42,8 +43,8 @@
return new HashMap<String, V>();
}*/
- public static <V>KeyValueStore<String, V> newStore(String
storeParams, String lang) {
- KeyValueStore<String, V> store = null;
+ public static KeyValueStore<String, Integer> newStore(String
storeParams, String lang) {
+ KeyValueStore<String, Integer> store = null;
String[] tt = storeParams.split("[,;|+/ &]+");
@@ -51,7 +52,7 @@
params.addAll(Arrays.asList(tt));
if (params.contains("none") || params.contains("null"))
store = null;
- else if (params.contains("string")) store = new
MapLookup<String, V>(new HashMap<String, V>());
+ else if (params.contains("string")) store = new
MapLookup<String, Integer>(new HashMap<String, Integer>());
else if (params.contains("utf8") ||
params.contains("utf16")) {
//initial digest turns string into UTF-8 bytes
Functor<byte[], String> digest;
@@ -72,16 +73,19 @@
throw new RuntimeException(e);
}
- if (params.contains("fold64") ||
params.contains("fold32")) { //fold into Long
- Functor<? extends Number, byte[]> fold;
+ if (params.contains("fold64")) { //fold into
Long
+ Functor<Long, byte[]> fold;
+ fold = XorFold64.instance;
- if (params.contains("fold32")) fold =
XorFold32.instance;
- else fold = XorFold64.instance;
-
- Functor<Number, String> convert = new
Functor.Composite<Number, byte[], String>(digest, fold);
+ Functor<Long, String> convert = new
Functor.Composite<Long, byte[], String>(digest, fold);
- MapLookup<Number, V> numStore = new
MapLookup<Number, V>(new HashMap<Number, V>());
- store = new
KeyDigestingValueStore<String, Number, V>(numStore, convert);
+ if (params.contains("primitive")) {
+ LongIntLookup<Long> numStore =
new LongIntLookup<Long>();
+ store = new
KeyDigestingValueStore<String, Long, Integer>(numStore, convert);
+ } else {
+ MapLookup<Long, Integer>
numStore = new MapLookup<Long, Integer>(new HashMap<Long, Integer>());
+ store = new
KeyDigestingValueStore<String, Long, Integer>(numStore, convert);
+ }
} else { //keep bytes, wrap in ByteArray
if (params.contains("wrap8"))
digest = new Functor.Composite<byte[], byte[], String>(digest, new XorWrap(8));
else if
(params.contains("wrap6")) digest = new Functor.Composite<byte[], byte[],
String>(digest, new XorWrap(6));
@@ -92,8 +96,8 @@
Functor<ByteString, String>
convert = new Functor.Composite<ByteString, byte[], String>(digest,
ByteString.wrap);
//set up the store
- MapLookup<ByteString, V>
byteStore = new MapLookup<ByteString, V>(new HashMap<ByteString, V>());
- store = new
KeyDigestingValueStore<String, ByteString, V>(byteStore, convert);
+ MapLookup<ByteString, Integer>
byteStore = new MapLookup<ByteString, Integer>(new HashMap<ByteString,
Integer>());
+ store = new
KeyDigestingValueStore<String, ByteString, Integer>(byteStore, convert);
}
} else {
throw new IllegalArgumentException("bad store
spec: "+storeParams+"; expected 'none' or 'string' or 'utf8' or 'utf16' as part
of the type spec");
Modified:
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/builder/util/IdManagerBenchmark.java
===================================================================
---
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/builder/util/IdManagerBenchmark.java
2010-04-23 01:16:46 UTC (rev 65460)
+++
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/builder/util/IdManagerBenchmark.java
2010-04-23 11:32:56 UTC (rev 65461)
@@ -10,10 +10,7 @@
import java.util.HashMap;
import java.util.Map;
-import javolution.util.FastMap;
-
import org.ardverk.collection.PatriciaTrie;
-import org.ardverk.collection.StringKeyAnalyzer;
import de.brightbyte.audit.DebugUtil;
import de.brightbyte.data.Codec;
@@ -68,8 +65,8 @@
Map<String, Integer> map;
if (mode.equals("hash")) map = new HashMap<String,
Integer>();
- else if (mode.equals("fast")) map = new FastMap<String,
Integer>();
- else if (mode.equals("trie")) map = new
PatriciaTrie<String, Integer>(StringKeyAnalyzer.INSTANCE);
+ //else if (mode.equals("fast")) map = new
FastMap<String, Integer>();
+ //else if (mode.equals("trie")) map = new
PatriciaTrie<String, Integer>(StringKeyAnalyzer.INSTANCE);
else if (mode.equals("rtrie")) map = new
PatriciaTrie<String, Integer>(ReverseStringKeyAnalyzer.INSTANCE);
else if (mode.equals("terse")) map = new
TerseIdMap<String>(String.class, NaturalComparator.<String>instance());
else throw new IllegalArgumentException("unknown mode:
"+mode);
@@ -82,8 +79,8 @@
CharsetCodec converter = new CharsetCodec(enc);
if (mode.equals("hash")) map = new HashMap<byte[],
Integer>();
- else if (mode.equals("fast")) map = new FastMap<byte[],
Integer>();
- else if (mode.equals("trie")) map = new
PatriciaTrie<byte[], Integer>(ByteArrayKeyAnalyzer.INSTANCE);
+ //else if (mode.equals("fast")) map = new
FastMap<byte[], Integer>();
+ //else if (mode.equals("trie")) map = new
PatriciaTrie<byte[], Integer>(ByteArrayKeyAnalyzer.INSTANCE);
else if (mode.equals("rtrie")) throw new
IllegalArgumentException("Reverte Trie is not yet supported for byte arrays");
else if (mode.equals("terse")) map = new
TerseIdMap<byte[]>(byte[].class, ArrayComparator.BYTES);
else throw new IllegalArgumentException("unknown mode:
"+mode);
Modified:
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/processor/PageTitleFilter.java
===================================================================
---
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/processor/PageTitleFilter.java
2010-04-23 01:16:46 UTC (rev 65460)
+++
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/processor/PageTitleFilter.java
2010-04-23 11:32:56 UTC (rev 65461)
@@ -4,10 +4,10 @@
import de.brightbyte.wikiword.analyzer.WikiPage;
public class PageTitleFilter implements WikiPageFilter {
- protected Filter<CharSequence> filter;
+ protected Filter<String> filter;
private String name;
- public PageTitleFilter(String name, Filter<CharSequence> filter) {
+ public PageTitleFilter(String name, Filter<String> filter) {
if (filter==null) throw new NullPointerException();
this.filter = filter;
this.name = name;
@@ -15,7 +15,7 @@
public boolean matches(WikiPage page) {
CharSequence t = page.getResourceName();
- return filter.matches(t);
+ return filter.matches(t.toString());
}
public String getName() {
Modified:
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/processor/TitleSetFilter.java
===================================================================
---
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/processor/TitleSetFilter.java
2010-04-23 01:16:46 UTC (rev 65460)
+++
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/processor/TitleSetFilter.java
2010-04-23 11:32:56 UTC (rev 65461)
@@ -4,15 +4,12 @@
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
-import java.util.Map;
-import java.util.Set;
import de.brightbyte.data.KeyValueLookup;
import de.brightbyte.data.KeyValueStore;
import de.brightbyte.data.Lookup;
import de.brightbyte.data.cursor.DataCursor;
import de.brightbyte.data.filter.LookupFilter;
-import de.brightbyte.data.filter.StaticSetFilter;
import de.brightbyte.io.LineCursor;
import de.brightbyte.util.PersistenceException;
import de.brightbyte.wikiword.builder.NameMaps;
@@ -21,7 +18,7 @@
protected final static Integer ONE = new Integer(1);
protected static Lookup<String, Integer> slurpCursor(DataCursor<String>
titleCursor) throws PersistenceException {
- KeyValueStore<String, Integer> store =
NameMaps.<Integer>newStore("string", "en"); //XXX: language...
+ KeyValueStore<String, Integer> store =
NameMaps.newStore("string", "en"); //XXX: language...
String s;
while ((s = titleCursor.next()) != null) {
@@ -50,7 +47,7 @@
@SuppressWarnings("unchecked")
public TitleSetFilter(String name, Lookup<String, Integer> titles) {
- super(name, new LookupFilter<CharSequence, Integer>(titles,
ONE));
+ super(name, new LookupFilter<String, Integer>(titles, ONE));
}
public TitleSetFilter(File titleFile, String enc) throws
PersistenceException {
Modified:
trunk/WikiWord/WikiWordBuilder/src/test/java/de/brightbyte/wikiword/store/builder/NameStoreBenchmark.java
===================================================================
---
trunk/WikiWord/WikiWordBuilder/src/test/java/de/brightbyte/wikiword/store/builder/NameStoreBenchmark.java
2010-04-23 01:16:46 UTC (rev 65460)
+++
trunk/WikiWord/WikiWordBuilder/src/test/java/de/brightbyte/wikiword/store/builder/NameStoreBenchmark.java
2010-04-23 11:32:56 UTC (rev 65461)
@@ -1,87 +1,26 @@
package de.brightbyte.wikiword.store.builder;
import java.io.BufferedReader;
-import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.security.NoSuchAlgorithmException;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Set;
-import de.brightbyte.data.BlockDigest;
-import de.brightbyte.data.ByteString;
-import de.brightbyte.data.Codec;
-import de.brightbyte.data.Functor;
-import de.brightbyte.data.KeyDigestingValueStore;
import de.brightbyte.data.KeyValueStore;
-import de.brightbyte.data.MapLookup;
-import de.brightbyte.data.XorFold32;
-import de.brightbyte.data.XorFold64;
-import de.brightbyte.data.XorWrap;
-import de.brightbyte.io.HuffmanDataCodec;
-import de.brightbyte.text.CharsetCodec;
import de.brightbyte.util.PersistenceException;
+import de.brightbyte.wikiword.builder.NameMaps;
public class NameStoreBenchmark {
public static void main(String[] args) throws IOException,
PersistenceException, NoSuchAlgorithmException, InterruptedException {
- String type = args[0];
+ String params = args[0];
int limit = Integer.parseInt(args[1]);
- KeyValueStore<String, Integer> store = null;
+ KeyValueStore<String, Integer> store =
NameMaps.newStore(params, "en");
- String[] tt = type.split("[,;|+/]");
-
- Set<String> params = new HashSet<String>();
- params.addAll(Arrays.asList(tt));
-
- if (params.contains("none") || params.contains("null")) store =
null;
- else if (params.contains("string")) store = new
MapLookup<String, Integer>(new HashMap<String, Integer>());
- else if (params.contains("utf8") || params.contains("utf16")) {
- //initial digest turns string into UTF-8 bytes
- Functor<byte[], String> digest;
-
- if (params.contains("utf8")) digest = new
Codec.Encoder<String, byte[]>(new CharsetCodec("UTF-8"));
- else digest = new Codec.Encoder<String, byte[]>(new
CharsetCodec("UTF-16"));
-
- //apply md5 digest or huffman compression
- if (params.contains("md5")) digest = new
Functor.Composite<byte[], byte[], String>(digest, new BlockDigest("MD5"));
- else if (params.contains("sha1")) digest = new
Functor.Composite<byte[], byte[], String>(digest, new BlockDigest("SHA-1"));
- else if (params.contains("huff") ||
params.contains("huffman")) digest = new Functor.Composite<byte[], byte[],
String>(digest, getHuffmanEncoder(args[3]));
-
- if (params.contains("fold64") ||
params.contains("fold32")) { //fold into Long
- Functor<? extends Number, byte[]> fold;
-
- if (params.contains("fold32")) fold =
XorFold32.instance;
- else fold = XorFold64.instance;
-
- Functor<Number, String> convert = new
Functor.Composite<Number, byte[], String>(digest, fold);
-
- MapLookup<Number, Integer> numStore = new
MapLookup<Number, Integer>(new HashMap<Number, Integer>());
- store = new KeyDigestingValueStore<String,
Number, Integer>(numStore, convert);
- } else { //keep bytes, wrap in ByteArray
- if (params.contains("wrap8")) digest =
new Functor.Composite<byte[], byte[], String>(digest, new XorWrap(8));
- else if (params.contains("wrap6"))
digest = new Functor.Composite<byte[], byte[], String>(digest, new XorWrap(6));
- else if (params.contains("wrap4"))
digest = new Functor.Composite<byte[], byte[], String>(digest, new XorWrap(4));
- else if (params.contains("wrap4"))
digest = new Functor.Composite<byte[], byte[], String>(digest, new XorWrap(4));
-
- //create converter that includes
wrapping the byte array in a ByteString
- Functor<ByteString, String> convert =
new Functor.Composite<ByteString, byte[], String>(digest, ByteString.wrap);
-
- //set up the store
- MapLookup<ByteString, Integer>
byteStore = new MapLookup<ByteString, Integer>(new HashMap<ByteString,
Integer>());
- store = new
KeyDigestingValueStore<String, ByteString, Integer>(byteStore, convert);
- }
- } else {
- throw new IllegalArgumentException("bad store type:
"+type+"; expected 'none' or 'string' or 'utf8' as part of the type spec");
- }
-
BufferedReader in = new BufferedReader(new
InputStreamReader(new FileInputStream(args[2]), "UTF-8"));
Runtime.getRuntime().gc();
- Thread.currentThread().sleep(1000);
+ Thread.sleep(1000);
long baseline = Runtime.getRuntime().totalMemory() -
Runtime.getRuntime().freeMemory();
long start = System.nanoTime();
@@ -101,17 +40,11 @@
System.out.format("Processed %d entries in %01.3f sec\n", c,
t/1000000000.0);
Runtime.getRuntime().gc();
- Thread.currentThread().sleep(1000);
+ Thread.sleep(1000);
long m = Runtime.getRuntime().totalMemory() -
Runtime.getRuntime().freeMemory();
System.out.format("Memoray used: %01.2f MB\n", (m -
baseline)/(1024.0*1024.0));
if (store!=null) store.close();
}
-
- private static Functor<byte[], byte[]> getHuffmanEncoder(String
dictFile) throws IOException {
- HuffmanDataCodec codec = new HuffmanDataCodec();
- codec.buildDictionary(new File(dictFile), 0);
- return new Codec.Encoder<byte[], byte[]>(codec);
- }
}
Modified: trunk/WikiWord/WikiWordBuilder/tweaks.properties.sample
===================================================================
--- trunk/WikiWord/WikiWordBuilder/tweaks.properties.sample 2010-04-23
01:16:46 UTC (rev 65460)
+++ trunk/WikiWord/WikiWordBuilder/tweaks.properties.sample 2010-04-23
11:32:56 UTC (rev 65461)
@@ -86,9 +86,10 @@
#idStoreParameters:
# basic: string (default), utf8, or utf16
# for utf8 and utf16: md5, sha1, or huffman (or nothing)
-# for utf8 and utf16: wrap8, fold64
-# "utf16+md5+fold64" uses about half as much memory as "string"
-#dbstore.idManager.idStoreParameters="utf16+md5+fold64"
+# for utf8 and utf16: wrap8 (wrap to 8 bytes), fold64 (wrap to single long
value)
+# for fold64: primitive (use gnu trove primitive hash)
+# "utf16+md5+fold64+primitive" uses about one third of the memory used by
"string"
+#dbstore.idManager.idStoreParameters="utf16+md5+fold64+primitive"
### CycleFinder #####################################
dbstore.CycleFinder.levelWarningThreshold=32
_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs