Author: rwesten
Date: Thu Oct 17 13:31:18 2013
New Revision: 1533075
URL: http://svn.apache.org/r1533075
Log:
STANBOL-1143: Applied the patch provided by A. Soroka. The Urify utility -
formally well hidden within the genericrdf indexing tool - has now an entry in
the README.md AND also supports help; This also includes import organization of
the indexing.core module, removal of unused members and .close() is now called
on the BnD tool in the OsgiConfigurationUtil; increased log level of FST
configurations to info
Modified:
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/Urify.java
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/config/IndexingConfig.java
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/config/IndexingConstants.java
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/destination/OsgiConfigurationUtil.java
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/impl/FinishedEntityDaemon.java
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/impl/IndexerConstants.java
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/impl/IndexerImpl.java
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/processor/FieldValueFilter.java
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/processor/LdpathPostProcessor.java
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/processor/LdpathSourceProcessor.java
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/EntityIneratorToScoreProviderAdapter.java
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/ResourceLoader.java
stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/ConfigTest.java
stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/IndexerTest.java
stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/ResourceLoaderTest.java
stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/processor/FieldValueFilterTest.java
stanbol/trunk/entityhub/indexing/destination/solryard/src/main/java/org/apache/stanbol/entityhub/indexing/destination/solryard/fst/FstConfig.java
stanbol/trunk/entityhub/indexing/genericrdf/README.md
Modified:
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/Urify.java
URL:
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/Urify.java?rev=1533075&r1=1533074&r2=1533075&view=diff
==============================================================================
---
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/Urify.java
(original)
+++
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/Urify.java
Thu Oct 17 13:31:18 2013
@@ -16,6 +16,9 @@
*/
package org.apache.stanbol.entityhub.indexing;
+import static java.lang.System.exit;
+import static java.lang.System.out;
+
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
@@ -77,12 +80,14 @@ public class Urify implements Runnable{
private static Logger log = LoggerFactory.getLogger(Urify.class);
private static final Options options;
+
static {
options = new Options();
options.addOption("h", "help", false, "display this help and exit");
options.addOption("p","prefix",true,
"The URI prefix used for wrapping the bNode Id");
options.addOption("e","encoding",true, "the char encodinf (default:
UTF-8)");
+ options.addOption("o","outputFilePrefix",true, "The prefix to add to
output files, defaults to \"uf_\"");
}
/**
* @param args
@@ -92,39 +97,53 @@ public class Urify implements Runnable{
CommandLineParser parser = new PosixParser();
CommandLine line = parser.parse(options, args);
args = line.getArgs();
+ if (line.hasOption('h')) {
+ out.println("Processes RDF files to translate blank
nodes into prefixed URI nodes.");
+ out.println("-h/--help: Print this help and exit.");
+ out.println("-p/--prefix: Required: The prefix to add
to blank nodes to make them URIs.");
+ out.println("-e/--encoding: The text encoding to expect
in the RDF, defaults to UTF-8.");
+ out.println("-o/--outputFilePrefix: The prefix to add
to output files, defaults to \"uf_\".");
+ exit(0);
+ }
if(!line.hasOption('p')){
log.error("Missing parameter 'prefix' ('p)!");
- System.exit(1);
+ exit(1);
}
String prefix = "<"+line.getOptionValue('p');
- log.info("prfix: {} ",line.getOptionValue('p'));
+ log.info("Using prefix: {} ",line.getOptionValue('p'));
Charset charset;
if(line.hasOption('e')){
charset = Charset.forName(line.getOptionValue('e'));
if(charset == null){
log.error("Unsupported encoding
'{}'!",line.getOptionValue('e'));
- System.exit(1);
+ exit(1);
}
} else {
charset = Charset.forName("UTF-8");
}
+
log.info("charset: {} ",charset.name());
- Urify urify = new Urify(Arrays.asList(args), charset, prefix);
- urify.run(); //TODO: this could support processing multiple files in
parallel
+ Urify urify = new Urify(Arrays.asList(args), charset, prefix,
+ line.hasOption('o') ? line.getOptionValue('o')
: "uf_");
+ urify.run(); //TODO: this could support processing multiple
files in parallel
}
private final Charset charset;
private final String prefix;
+ private final String outputFilePrefix;
protected long start = System.currentTimeMillis();
protected long uf_count = 0;
private List<String> resources;
- public Urify(List<String> resources, Charset charset, String prefix)
throws IOException {
- this.charset = charset;
- this.prefix = prefix;
- this.resources = Collections.synchronizedList(new
ArrayList<String>(resources));
- }
+ public Urify(List<String> resources, Charset charset, String prefix,
+ final String outputFilePrefix) throws IOException {
+ this.charset = charset;
+ this.prefix = prefix;
+ this.outputFilePrefix = outputFilePrefix;
+ this.resources = Collections.synchronizedList(new
ArrayList<String>(
+ resources));
+ }
public void run() {
String source;
@@ -148,7 +167,7 @@ public class Urify implements Runnable{
if(source.isFile()){
String path = FilenameUtils.getFullPathNoEndSeparator(resource);
String name = FilenameUtils.getName(resource);
- File target = new File(path,"uf_"+name);
+ File target = new File(path, outputFilePrefix + name);
int i=0;
while(target.exists()){
i++;
Modified:
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/config/IndexingConfig.java
URL:
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/config/IndexingConfig.java?rev=1533075&r1=1533074&r2=1533075&view=diff
==============================================================================
---
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/config/IndexingConfig.java
(original)
+++
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/config/IndexingConfig.java
Thu Oct 17 13:31:18 2013
@@ -77,9 +77,7 @@ public class IndexingConfig {
private static final String SOURCE_FOLDER = "resources";
private static final String SOURCE_PATH =
DEFAULT_ROOT_PATH+File.separatorChar+SOURCE_FOLDER;
private static final String DESTINATION_FOLDER = "destination";
- private static final String DESTINATION_PATH =
DEFAULT_ROOT_PATH+File.separatorChar+DESTINATION_FOLDER;
private static final String DISTRIBUTION_FOLDER = "dist";
- private static final String DISTRIBUTION_PATH =
DEFAULT_ROOT_PATH+File.separatorChar+DISTRIBUTION_FOLDER;
private static final String INDEXING_PROPERTIES = "indexing.properties";
private static final String CONFIG_PARAM = "config";
public static final String KEY_INDEXING_CONFIG = "indexingConfig";
Modified:
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/config/IndexingConstants.java
URL:
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/config/IndexingConstants.java?rev=1533075&r1=1533074&r2=1533075&view=diff
==============================================================================
---
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/config/IndexingConstants.java
(original)
+++
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/config/IndexingConstants.java
Thu Oct 17 13:31:18 2013
@@ -16,7 +16,6 @@
*/
package org.apache.stanbol.entityhub.indexing.core.config;
-import java.io.File;
/**
* Constants defines/used for Indexing.
Modified:
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/destination/OsgiConfigurationUtil.java
URL:
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/destination/OsgiConfigurationUtil.java?rev=1533075&r1=1533074&r2=1533075&view=diff
==============================================================================
---
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/destination/OsgiConfigurationUtil.java
(original)
+++
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/destination/OsgiConfigurationUtil.java
Thu Oct 17 13:31:18 2013
@@ -256,6 +256,7 @@ public final class OsgiConfigurationUtil
builder.addClasspath(new
File(config.getDestinationFolder(),CONFIG_ROOT));
} catch (IOException e) {
log.warn("Unable to build OSGI Bundle for Indexed Referenced Site
"+config.getName(),e);
+ builder.close();
return;
}
Jar jar;
@@ -263,8 +264,10 @@ public final class OsgiConfigurationUtil
jar = builder.build();
} catch (Exception e) {
log.warn("Unable to build OSGI Bundle for Indexed Referenced Site
"+config.getName(),e);
- return;
- }
+ return;
+ } finally {
+ builder.close();
+ }
try {
jar.write(new File(config.getDistributionFolder(),
CONFIG_PACKAGE+config.getName()+"-1.0.0.jar"));
Modified:
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/impl/FinishedEntityDaemon.java
URL:
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/impl/FinishedEntityDaemon.java?rev=1533075&r1=1533074&r2=1533075&view=diff
==============================================================================
---
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/impl/FinishedEntityDaemon.java
(original)
+++
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/impl/FinishedEntityDaemon.java
Thu Oct 17 13:31:18 2013
@@ -21,9 +21,7 @@ import static org.apache.stanbol.entityh
import static
org.apache.stanbol.entityhub.indexing.core.impl.IndexerConstants.SOURCE_STARTED;
import static
org.apache.stanbol.entityhub.indexing.core.impl.IndexerConstants.STORE_DURATION;
-import java.io.BufferedReader;
import java.io.BufferedWriter;
-import java.io.FileOutputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.nio.charset.Charset;
Modified:
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/impl/IndexerConstants.java
URL:
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/impl/IndexerConstants.java?rev=1533075&r1=1533074&r2=1533075&view=diff
==============================================================================
---
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/impl/IndexerConstants.java
(original)
+++
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/impl/IndexerConstants.java
Thu Oct 17 13:31:18 2013
@@ -81,8 +81,8 @@ public interface IndexerConstants {
*/
//ignore the Type safety because the item is of
//INDEXING_COMPLETED_QUEUE_ITEM is anyway null
- @SuppressWarnings("unchecked")
- QueueItem INDEXING_COMPLETED_QUEUE_ITEM = new QueueItem(null);
+ @SuppressWarnings("rawtypes")
+ QueueItem INDEXING_COMPLETED_QUEUE_ITEM = new QueueItem<Void>(null);
/**
* The sequence number for {@link IndexingDaemon}s that read from the
Modified:
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/impl/IndexerImpl.java
URL:
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/impl/IndexerImpl.java?rev=1533075&r1=1533074&r2=1533075&view=diff
==============================================================================
---
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/impl/IndexerImpl.java
(original)
+++
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/impl/IndexerImpl.java
Thu Oct 17 13:31:18 2013
@@ -56,7 +56,6 @@ import org.apache.stanbol.entityhub.inde
import org.apache.stanbol.entityhub.indexing.core.Indexer;
import org.apache.stanbol.entityhub.indexing.core.IndexingComponent;
import org.apache.stanbol.entityhub.indexing.core.IndexingDestination;
-import
org.apache.stanbol.entityhub.indexing.core.destination.OsgiConfigurationUtil;
import org.apache.stanbol.entityhub.indexing.core.event.IndexingEvent;
import org.apache.stanbol.entityhub.indexing.core.event.IndexingListener;
import
org.apache.stanbol.entityhub.indexing.core.impl.IndexingDaemon.IndexingDaemonEventObject;
@@ -382,7 +381,7 @@ public class IndexerImpl implements Inde
if(state == State.POSTPROCESSING){ //if state > INITIALISED
throw new IllegalStateException("Unable to skip post
processing if postprocessing is already in progress!");
}
- if(state.ordinal() >= state.POSTPROCESSED.ordinal()){
+ if(state.ordinal() >= State.POSTPROCESSED.ordinal()){
return; //already post processed
}
setState(State.POSTPROCESSED);
@@ -564,7 +563,7 @@ public class IndexerImpl implements Inde
if(state == State.INDEXING){
throw new IllegalStateException("Unable to skip indexing if
indexing is already in progress!");
}
- if(state.ordinal() >= state.INDEXED.ordinal()){ //if state >
INDEXING
+ if(state.ordinal() >= State.INDEXED.ordinal()){ //if state >
INDEXING
return; //already in INDEXED state
}
setState(State.INDEXED);
Modified:
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/processor/FieldValueFilter.java
URL:
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/processor/FieldValueFilter.java?rev=1533075&r1=1533074&r2=1533075&view=diff
==============================================================================
---
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/processor/FieldValueFilter.java
(original)
+++
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/processor/FieldValueFilter.java
Thu Oct 17 13:31:18 2013
@@ -124,7 +124,8 @@ public class FieldValueFilter implements
/**
* @param value
*/
- private void parseFilterConfig(Object value) {
+ @SuppressWarnings("unchecked")
+ private void parseFilterConfig(Object value) {
Collection<String> values;
if(value instanceof String){
values = Arrays.asList(value.toString().split(";"));
Modified:
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/processor/LdpathPostProcessor.java
URL:
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/processor/LdpathPostProcessor.java?rev=1533075&r1=1533074&r2=1533075&view=diff
==============================================================================
---
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/processor/LdpathPostProcessor.java
(original)
+++
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/processor/LdpathPostProcessor.java
Thu Oct 17 13:31:18 2013
@@ -22,7 +22,6 @@ import org.apache.stanbol.entityhub.inde
import org.apache.stanbol.entityhub.indexing.core.IndexingDestination;
import org.apache.stanbol.entityhub.ldpath.EntityhubLDPath;
import org.apache.stanbol.entityhub.ldpath.backend.YardBackend;
-import org.apache.stanbol.entityhub.servicesapi.model.Representation;
import org.apache.stanbol.entityhub.servicesapi.yard.Yard;
import at.newmedialab.ldpath.api.backend.RDFBackend;
Modified:
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/processor/LdpathSourceProcessor.java
URL:
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/processor/LdpathSourceProcessor.java?rev=1533075&r1=1533074&r2=1533075&view=diff
==============================================================================
---
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/processor/LdpathSourceProcessor.java
(original)
+++
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/processor/LdpathSourceProcessor.java
Thu Oct 17 13:31:18 2013
@@ -21,23 +21,18 @@ import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
-import java.math.BigDecimal;
-import java.math.BigInteger;
import java.net.URI;
import java.nio.charset.Charset;
import java.util.Collection;
-import java.util.Date;
import java.util.Locale;
import java.util.Map;
import java.util.Map.Entry;
-import java.util.concurrent.ThreadPoolExecutor;
import org.apache.commons.io.IOUtils;
import org.apache.stanbol.entityhub.core.model.InMemoryValueFactory;
import org.apache.stanbol.entityhub.indexing.core.EntityProcessor;
import org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig;
import
org.apache.stanbol.entityhub.ldpath.EntityhubLDPath.EntityhubConfiguration;
-import org.apache.stanbol.entityhub.ldpath.backend.SingleRepresentationBackend;
import org.apache.stanbol.entityhub.servicesapi.model.Representation;
import org.apache.stanbol.entityhub.servicesapi.model.ValueFactory;
import org.slf4j.Logger;
Modified:
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/EntityIneratorToScoreProviderAdapter.java
URL:
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/EntityIneratorToScoreProviderAdapter.java?rev=1533075&r1=1533074&r2=1533075&view=diff
==============================================================================
---
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/EntityIneratorToScoreProviderAdapter.java
(original)
+++
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/EntityIneratorToScoreProviderAdapter.java
Thu Oct 17 13:31:18 2013
@@ -20,8 +20,8 @@ import java.util.HashMap;
import java.util.Map;
import org.apache.stanbol.entityhub.indexing.core.EntityIterator;
-import org.apache.stanbol.entityhub.indexing.core.EntityScoreProvider;
import org.apache.stanbol.entityhub.indexing.core.EntityIterator.EntityScore;
+import org.apache.stanbol.entityhub.indexing.core.EntityScoreProvider;
import org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig;
import org.apache.stanbol.entityhub.servicesapi.model.Representation;
Modified:
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/ResourceLoader.java
URL:
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/ResourceLoader.java?rev=1533075&r1=1533074&r2=1533075&view=diff
==============================================================================
---
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/ResourceLoader.java
(original)
+++
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/source/ResourceLoader.java
Thu Oct 17 13:31:18 2013
@@ -28,9 +28,9 @@ import java.util.EnumSet;
import java.util.Enumeration;
import java.util.HashSet;
import java.util.Map;
+import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeMap;
-import java.util.Map.Entry;
import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
import org.apache.commons.compress.archivers.zip.ZipFile;
Modified:
stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/ConfigTest.java
URL:
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/ConfigTest.java?rev=1533075&r1=1533074&r2=1533075&view=diff
==============================================================================
---
stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/ConfigTest.java
(original)
+++
stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/ConfigTest.java
Thu Oct 17 13:31:18 2013
@@ -16,6 +16,10 @@
*/
package org.apache.stanbol.entityhub.indexing.core;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
@@ -29,9 +33,6 @@ import org.apache.stanbol.entityhub.inde
import org.apache.stanbol.entityhub.indexing.core.normaliser.RangeNormaliser;
import org.apache.stanbol.entityhub.indexing.core.normaliser.ScoreNormaliser;
import
org.apache.stanbol.entityhub.indexing.core.source.LineBasedEntityIterator;
-
-import static org.junit.Assert.*;
-
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
Modified:
stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/IndexerTest.java
URL:
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/IndexerTest.java?rev=1533075&r1=1533074&r2=1533075&view=diff
==============================================================================
---
stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/IndexerTest.java
(original)
+++
stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/IndexerTest.java
Thu Oct 17 13:31:18 2013
@@ -16,6 +16,11 @@
*/
package org.apache.stanbol.entityhub.indexing.core;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
@@ -25,8 +30,8 @@ import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
-import java.util.Set;
import java.util.Map.Entry;
+import java.util.Set;
import org.apache.commons.io.FilenameUtils;
import org.apache.stanbol.entityhub.core.model.InMemoryValueFactory;
@@ -37,13 +42,11 @@ import org.apache.stanbol.entityhub.serv
import org.apache.stanbol.entityhub.servicesapi.model.Text;
import org.apache.stanbol.entityhub.servicesapi.model.ValueFactory;
import org.apache.stanbol.entityhub.servicesapi.model.rdf.RdfResourceEnum;
-import org.junit.After;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import static org.junit.Assert.*;
public class IndexerTest {
Modified:
stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/ResourceLoaderTest.java
URL:
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/ResourceLoaderTest.java?rev=1533075&r1=1533074&r2=1533075&view=diff
==============================================================================
---
stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/ResourceLoaderTest.java
(original)
+++
stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/ResourceLoaderTest.java
Thu Oct 17 13:31:18 2013
@@ -16,6 +16,11 @@
*/
package org.apache.stanbol.entityhub.indexing.core;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
@@ -32,7 +37,6 @@ import org.junit.BeforeClass;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import static org.junit.Assert.*;
public class ResourceLoaderTest {
/**
Modified:
stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/processor/FieldValueFilterTest.java
URL:
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/processor/FieldValueFilterTest.java?rev=1533075&r1=1533074&r2=1533075&view=diff
==============================================================================
---
stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/processor/FieldValueFilterTest.java
(original)
+++
stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/processor/FieldValueFilterTest.java
Thu Oct 17 13:31:18 2013
@@ -17,7 +17,6 @@
package org.apache.stanbol.entityhub.indexing.core.processor;
import java.io.IOException;
-import java.io.InputStream;
import java.util.HashMap;
import java.util.Map;
Modified:
stanbol/trunk/entityhub/indexing/destination/solryard/src/main/java/org/apache/stanbol/entityhub/indexing/destination/solryard/fst/FstConfig.java
URL:
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/destination/solryard/src/main/java/org/apache/stanbol/entityhub/indexing/destination/solryard/fst/FstConfig.java?rev=1533075&r1=1533074&r2=1533075&view=diff
==============================================================================
---
stanbol/trunk/entityhub/indexing/destination/solryard/src/main/java/org/apache/stanbol/entityhub/indexing/destination/solryard/fst/FstConfig.java
(original)
+++
stanbol/trunk/entityhub/indexing/destination/solryard/src/main/java/org/apache/stanbol/entityhub/indexing/destination/solryard/fst/FstConfig.java
Thu Oct 17 13:31:18 2013
@@ -117,7 +117,7 @@ public class FstConfig {
CorpusCreationInfo fstInfo = new
CorpusCreationInfo(language,
fieldInfo.name, storeFieldName,
fieldType, fstFile);
- log.debug(" ... init {} ", fstInfo);
+ log.info(" ... init {} ", fstInfo);
addCorpus(fstInfo);
}
} else {
Modified: stanbol/trunk/entityhub/indexing/genericrdf/README.md
URL:
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/genericrdf/README.md?rev=1533075&r1=1533074&r2=1533075&view=diff
==============================================================================
--- stanbol/trunk/entityhub/indexing/genericrdf/README.md (original)
+++ stanbol/trunk/entityhub/indexing/genericrdf/README.md Thu Oct 17 13:31:18
2013
@@ -229,6 +229,20 @@ the Bundle described above. To install t
{name} denotes to the value you configured for the "name" property within the
"indexing.properties" file.
+### A note about blank nodes
+If your input data sets contain large numbers of blank nodes, you may find that
+you have problems running out of heap space during indexing. This is because
Jena
+(like many semantic stores) keeps a store of blank nodes in core memory while
+importing. Keeping in mind that EntityHub does not support the use of blank
nodes,
+there is a means of indexing such data sets nonetheless. You can convert them
to
+named nodes and then index. There is a convenient tool packaged with Stanbol
for
+this purpose, called "Urify" (org.apache.stanbol.entityhub.indexing.Urify).
+It is available in the runnable JAR file built by this indexer. To use it, put
that
+JAR on your classpath, and you can execute Urify, giving it a list of files to
process.
+Use the "-h" or "--help" flag to see options for Urify:
-
+ java -Xmx1024m -cp org.apache.stanbol.entityhub.indexing.genericrdf-*.jar \
+ org.apache.stanbol.entityhub.indexing.Urify --help
+
+
\ No newline at end of file