Author: rwesten
Date: Wed Jun  5 07:23:15 2013
New Revision: 1489728

URL: http://svn.apache.org/r1489728
Log:
The Topic Engine now uses the ManagedSolrServer to init SolrCores. The same is 
true for Trainingset. Note that this means that SolrCore configurations are now 
loaded via the DataFileProvider infrastructure. This allows users to load 
custom models and/or pre-trained models copied to the 'stanbol/datafiles' 
folder - STANBOL-1087; updates default schemas used by the TopicEngine and 
Trainingset to Solr 4 - STANBOL-1086; Added a default SolrCore configuration 
for the Topic Engine that supports n-grams - STANBOL-1089; removed all 
configuration properties used to configure the name of Solr fields from the 
Felix Webconsole dialog. Added constants for the default values. Configuration 
of those properties is still supported by parsing OSGI configuration files - 
STANBOL-1090

Added:
    
stanbol/trunk/enhancement-engines/topic/engine/src/main/resources/shingle-topic-model/
    
stanbol/trunk/enhancement-engines/topic/engine/src/main/resources/shingle-topic-model/conf/
    
stanbol/trunk/enhancement-engines/topic/engine/src/main/resources/shingle-topic-model/conf/schema.xml
    
stanbol/trunk/enhancement-engines/topic/engine/src/main/resources/shingle-topic-model/conf/solrconfig.xml
Modified:
    stanbol/trunk/enhancement-engines/topic/engine/pom.xml
    
stanbol/trunk/enhancement-engines/topic/engine/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
    
stanbol/trunk/enhancement-engines/topic/engine/src/main/java/org/apache/stanbol/enhancer/topic/ConfiguredSolrCoreTracker.java
    
stanbol/trunk/enhancement-engines/topic/engine/src/main/java/org/apache/stanbol/enhancer/topic/training/SolrTrainingSet.java
    
stanbol/trunk/enhancement-engines/topic/engine/src/main/resources/OSGI-INF/metatype/metatype.properties
    
stanbol/trunk/enhancement-engines/topic/engine/src/main/resources/default-topic-model/conf/schema.xml
    
stanbol/trunk/enhancement-engines/topic/engine/src/main/resources/default-topic-model/conf/solrconfig.xml
    
stanbol/trunk/enhancement-engines/topic/engine/src/main/resources/default-topic-trainingset/conf/solrconfig.xml
    
stanbol/trunk/enhancement-engines/topic/engine/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java

Modified: stanbol/trunk/enhancement-engines/topic/engine/pom.xml
URL: 
http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/topic/engine/pom.xml?rev=1489728&r1=1489727&r2=1489728&view=diff
==============================================================================
--- stanbol/trunk/enhancement-engines/topic/engine/pom.xml (original)
+++ stanbol/trunk/enhancement-engines/topic/engine/pom.xml Wed Jun  5 07:23:15 
2013
@@ -74,7 +74,7 @@
             <Private-Package>
               org.apache.stanbol.enhancer.engine.topic
             </Private-Package>
-            <Install-Path>install-config</Install-Path>
+            <!--  Install-Path>install-config</Install-Path -->
             <Data-Files>data-files</Data-Files>
             <Data-Files-Priority>-100</Data-Files-Priority>
           </instructions>

Modified: 
stanbol/trunk/enhancement-engines/topic/engine/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
URL: 
http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/topic/engine/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java?rev=1489728&r1=1489727&r2=1489728&view=diff
==============================================================================
--- 
stanbol/trunk/enhancement-engines/topic/engine/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
 (original)
+++ 
stanbol/trunk/enhancement-engines/topic/engine/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
 Wed Jun  5 07:23:15 2013
@@ -25,6 +25,7 @@ import java.util.Collections;
 import java.util.Date;
 import java.util.Dictionary;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.Hashtable;
 import java.util.Iterator;
 import java.util.LinkedHashSet;
@@ -142,70 +143,118 @@ import org.slf4j.LoggerFactory;
 @Component(metatype = true, immediate = true, configurationFactory = true, 
policy = ConfigurationPolicy.REQUIRE)
 @Service
 @Properties(value = {
-                     @Property(name = EnhancementEngine.PROPERTY_NAME),
-                     @Property(name = TopicClassificationEngine.ORDER, 
intValue = 100),
-                     @Property(name = TopicClassificationEngine.SOLR_CORE),
-                     @Property(name = TopicClassificationEngine.LANGUAGES),
-                     @Property(name = 
TopicClassificationEngine.SIMILARTITY_FIELD, value = "classifier_features"),
-                     @Property(name = 
TopicClassificationEngine.CONCEPT_URI_FIELD, value = "concept"),
-                     @Property(name = 
TopicClassificationEngine.PRIMARY_TOPIC_URI_FIELD, value = "primary_topic"),
-                     @Property(name = TopicClassificationEngine.BROADER_FIELD, 
value = "broader"),
-                     @Property(name = 
TopicClassificationEngine.MODEL_UPDATE_DATE_FIELD, value = "last_update_dt"),
-                     @Property(name = 
TopicClassificationEngine.PRECISION_FIELD, value = "precision"),
-                     @Property(name = TopicClassificationEngine.RECALL_FIELD, 
value = "recall"),
-                     @Property(name = 
TopicClassificationEngine.ENTRY_ID_FIELD, value = "entry_id"),
-                     @Property(name = 
TopicClassificationEngine.MODEL_ENTRY_ID_FIELD, value = "model_entry_id"),
-                     @Property(name = 
TopicClassificationEngine.ENTRY_TYPE_FIELD, value = "entry_type"),
-                     @Property(name = 
TopicClassificationEngine.MODEL_EVALUATION_DATE_FIELD, value = 
"last_evaluation_dt"),
-                     @Property(name = 
TopicClassificationEngine.FALSE_NEGATIVES_FIELD, value = "false_negatives"),
-                     @Property(name = 
TopicClassificationEngine.FALSE_POSITIVES_FIELD, value = "false_positives"),
-                     @Property(name = 
TopicClassificationEngine.POSITIVE_SUPPORT_FIELD, value = "positive_support"),
-                     @Property(name = 
TopicClassificationEngine.NEGATIVE_SUPPORT_FIELD, value = "negative_support"),
-                     @Property(name = 
TopicClassificationEngine.TRAINING_SET_ID),
-                     @Property(name = Constants.SERVICE_RANKING, intValue = 
0)})
+        @Property(name = EnhancementEngine.PROPERTY_NAME),
+        @Property(name = TopicClassificationEngine.SOLR_CORE),
+        @Property(name = TopicClassificationEngine.SOLR_CORE_CONFIG,
+            value = TopicClassificationEngine.DEFAULT_SOLR_CORE_CONFIG),
+        @Property(name = TopicClassificationEngine.LANGUAGES),
+// those properties can still be set via a configuration file, but as most 
users
+// will not use them exclude those from the configuration form
+//        @Property(name = TopicClassificationEngine.SIMILARTITY_FIELD, value 
= TopicClassificationEngine.DEFAULT_SIMILARTITY_FIELD),
+//        @Property(name = TopicClassificationEngine.CONCEPT_URI_FIELD, value 
= TopicClassificationEngine.DEFAULT_CONCEPT_URI_FIELD),
+//        @Property(name = TopicClassificationEngine.PRIMARY_TOPIC_URI_FIELD, 
value = TopicClassificationEngine.DEFAULT_PRIMARY_TOPIC_URI_FIELD),
+//        @Property(name = TopicClassificationEngine.BROADER_FIELD, value = 
TopicClassificationEngine.DEFAULT_BROADER_FIELD),
+//        @Property(name = TopicClassificationEngine.MODEL_UPDATE_DATE_FIELD, 
value = TopicClassificationEngine.DEFAULT_MODEL_UPDATE_DATE_FIELD),
+//        @Property(name = TopicClassificationEngine.PRECISION_FIELD, value = 
TopicClassificationEngine.DEFAULT_PRECISION_FIELD),
+//        @Property(name = TopicClassificationEngine.RECALL_FIELD, value = 
TopicClassificationEngine.DEFAULT_RECALL_FIELD),
+//        @Property(name = TopicClassificationEngine.ENTRY_ID_FIELD, value = 
TopicClassificationEngine.DEFAULT_ENTRY_ID_FIELD),
+//        @Property(name = TopicClassificationEngine.MODEL_ENTRY_ID_FIELD, 
value = TopicClassificationEngine.DEFAULT_MODEL_ENTRY_ID_FIELD),
+//        @Property(name = TopicClassificationEngine.ENTRY_TYPE_FIELD, value = 
TopicClassificationEngine.DEFAULT_ENTRY_TYPE_FIELD),
+//        @Property(name = 
TopicClassificationEngine.MODEL_EVALUATION_DATE_FIELD, value = 
TopicClassificationEngine.DEFAULT_MODEL_EVALUATION_DATE_FIELD),
+//        @Property(name = TopicClassificationEngine.FALSE_NEGATIVES_FIELD, 
value = TopicClassificationEngine.DEFAULT_FALSE_NEGATIVES_FIELD),
+//        @Property(name = TopicClassificationEngine.FALSE_POSITIVES_FIELD, 
value = TopicClassificationEngine.DEFAULT_FALSE_POSITIVES_FIELD),
+//        @Property(name = TopicClassificationEngine.POSITIVE_SUPPORT_FIELD, 
value = TopicClassificationEngine.DEFAULT_POSITIVE_SUPPORT_FIELD),
+//        @Property(name = TopicClassificationEngine.NEGATIVE_SUPPORT_FIELD, 
value = TopicClassificationEngine.DEFAULT_NEGATIVE_SUPPORT_FIELD),
+//        @Property(name = TopicClassificationEngine.ORDER, intValue = 
TopicClassificationEngine.DEFAULT_ENGINE_ORDER),
+        @Property(name = TopicClassificationEngine.TRAINING_SET_ID),
+        @Property(name = Constants.SERVICE_RANKING, intValue = 0)})
 public class TopicClassificationEngine extends ConfiguredSolrCoreTracker 
implements EnhancementEngine,
         ServiceProperties, TopicClassifier {
 
+    public static final String DEFAULT_SOLR_CORE_CONFIG = 
"default-topic-model.solrindex.zip";
+
     public static final String MODEL_ENTRY = "model";
 
     public static final String METADATA_ENTRY = "metadata";
-
+    /**
+     * The reference to the SolrCore used ny the TopicClassificationEngine.
+     * The default is the engine name with the suffix '-model'. This also 
supports
+     * the {server-name}:{index-name} syntax. if n
+     */
     public static final String SOLR_CORE = 
"org.apache.stanbol.enhancer.engine.topic.solrCore";
+    /**
+     * The name of the Solr Index archive (default: 
"default-topic-model.solrindex.zip").
+     * The file is loaded by using the DataFileProvider infrastructure. The 
archive may
+     * also include a pre-trained model.
+     */
+    public static final String SOLR_CORE_CONFIG = 
"org.apache.stanbol.enhancer.engine.topic.solrCoreConfig";
 
     public static final String LANGUAGES = 
"org.apache.stanbol.enhancer.engine.topic.languages";
 
     public static final String ORDER = 
"org.apache.stanbol.enhancer.engine.topic.order";
+    
+    public static final Integer DEFAULT_ENGINE_ORDER = 
ServiceProperties.ORDERING_CONTENT_EXTRACTION;
 
     public static final String ENTRY_ID_FIELD = 
"org.apache.stanbol.enhancer.engine.topic.entryIdField";
+    
+    public static final String DEFAULT_ENTRY_ID_FIELD = "entry_id";
 
     public static final String ENTRY_TYPE_FIELD = 
"org.apache.stanbol.enhancer.engine.topic.entryTypeField";
+    
+    public static final String DEFAULT_ENTRY_TYPE_FIELD = "entry_type";
 
     public static final String SIMILARTITY_FIELD = 
"org.apache.stanbol.enhancer.engine.topic.similarityField";
+    
+    public static final String DEFAULT_SIMILARTITY_FIELD = 
"classifier_features";
 
     public static final String CONCEPT_URI_FIELD = 
"org.apache.stanbol.enhancer.engine.topic.conceptUriField";
+    
+    public static final String DEFAULT_CONCEPT_URI_FIELD = "concept";
 
     public static final String BROADER_FIELD = 
"org.apache.stanbol.enhancer.engine.topic.broaderField";
+    
+    public static final String DEFAULT_BROADER_FIELD = "broader";
 
     public static final String PRIMARY_TOPIC_URI_FIELD = 
"org.apache.stanbol.enhancer.engine.topic.primaryTopicField";
+    
+    public static final String DEFAULT_PRIMARY_TOPIC_URI_FIELD = 
"primary_topic";
 
     public static final String MODEL_UPDATE_DATE_FIELD = 
"org.apache.stanbol.enhancer.engine.topic.modelUpdateDateField";
 
+    public static final String DEFAULT_MODEL_UPDATE_DATE_FIELD = 
"last_update_dt";
+    
     public static final String MODEL_EVALUATION_DATE_FIELD = 
"org.apache.stanbol.enhancer.engine.topic.modelEvaluationDateField";
+    
+    public static final String DEFAULT_MODEL_EVALUATION_DATE_FIELD = 
"last_evaluation_dt";
 
     public static final String MODEL_ENTRY_ID_FIELD = 
"org.apache.stanbol.enhancer.engine.topic.modelEntryIdField";
+    
+    public static final String DEFAULT_MODEL_ENTRY_ID_FIELD = "model_entry_id";
 
     public static final String PRECISION_FIELD = 
"org.apache.stanbol.enhancer.engine.topic.precisionField";
+    
+    public static final String DEFAULT_PRECISION_FIELD = "precision";
 
     public static final String RECALL_FIELD = 
"org.apache.stanbol.enhancer.engine.topic.recallField";
+    
+    public static final String DEFAULT_RECALL_FIELD = "recall";
 
     public static final String FALSE_POSITIVES_FIELD = 
"org.apache.stanbol.enhancer.engine.topic.falsePositivesField";
+    
+    public static final String DEFAULT_FALSE_POSITIVES_FIELD = 
"false_positives";
 
     public static final String FALSE_NEGATIVES_FIELD = 
"org.apache.stanbol.enhancer.engine.topic.falseNegativesField";
+    
+    public static final String DEFAULT_FALSE_NEGATIVES_FIELD = 
"false_negatives";
 
     public static final String POSITIVE_SUPPORT_FIELD = 
"org.apache.stanbol.enhancer.engine.topic.positiveSupportField";
 
+    public static final String DEFAULT_POSITIVE_SUPPORT_FIELD = 
"positive_support";
+    
     public static final String NEGATIVE_SUPPORT_FIELD = 
"org.apache.stanbol.enhancer.engine.topic.negativeSupportField";
 
+    public static final String DEFAULT_NEGATIVE_SUPPORT_FIELD = 
"negative_support";
+    
     public static final String TRAINING_SET_ID = 
"org.apache.stanbol.enhancer.engine.topic.trainingSetId";
 
     private static final Logger log = 
LoggerFactory.getLogger(TopicClassificationEngine.class);
@@ -250,6 +299,7 @@ public class TopicClassificationEngine e
     protected String engineName;
 
     protected List<String> acceptedLanguages;
+    private Set<String> acceptedLanguageSet;
 
     protected Integer order = ORDERING_EXTRACTION_ENHANCEMENT;
 
@@ -337,7 +387,6 @@ public class TopicClassificationEngine e
     protected void activate(ComponentContext context, 
Dictionary<String,Object> config) throws ConfigurationException,
                                                                                
        InvalidSyntaxException {
         this.context = context;
-        indexArchiveName = "default-topic-model";
         configure(config);
 
         // if training set is not null, track it
@@ -376,36 +425,52 @@ public class TopicClassificationEngine e
 
     public void configure(Dictionary<String,Object> config) throws 
ConfigurationException {
         engineName = getRequiredStringParam(config, 
EnhancementEngine.PROPERTY_NAME);
-        entryIdField = getRequiredStringParam(config, ENTRY_ID_FIELD);
-        modelEntryIdField = getRequiredStringParam(config, 
MODEL_ENTRY_ID_FIELD);
-        conceptUriField = getRequiredStringParam(config, CONCEPT_URI_FIELD);
-        entryTypeField = getRequiredStringParam(config, ENTRY_TYPE_FIELD);
-        similarityField = getRequiredStringParam(config, SIMILARTITY_FIELD);
+        entryIdField = getRequiredStringParam(config, ENTRY_ID_FIELD, 
DEFAULT_ENTRY_ID_FIELD);
+        modelEntryIdField = getRequiredStringParam(config, 
MODEL_ENTRY_ID_FIELD, DEFAULT_MODEL_ENTRY_ID_FIELD);
+        conceptUriField = getRequiredStringParam(config, CONCEPT_URI_FIELD, 
DEFAULT_CONCEPT_URI_FIELD);
+        entryTypeField = getRequiredStringParam(config, ENTRY_TYPE_FIELD, 
DEFAULT_ENTRY_TYPE_FIELD);
+        similarityField = getRequiredStringParam(config, SIMILARTITY_FIELD, 
DEFAULT_SIMILARTITY_FIELD);
         acceptedLanguages = getStringListParan(config, LANGUAGES);
-        precisionField = getRequiredStringParam(config, PRECISION_FIELD);
-        recallField = getRequiredStringParam(config, RECALL_FIELD);
-        modelUpdateDateField = getRequiredStringParam(config, 
MODEL_UPDATE_DATE_FIELD);
-        modelEvaluationDateField = getRequiredStringParam(config, 
MODEL_EVALUATION_DATE_FIELD);
-        falsePositivesField = getRequiredStringParam(config, 
FALSE_POSITIVES_FIELD);
-        falseNegativesField = getRequiredStringParam(config, 
FALSE_NEGATIVES_FIELD);
-        positiveSupportField = getRequiredStringParam(config, 
POSITIVE_SUPPORT_FIELD);
-        negativeSupportField = getRequiredStringParam(config, 
NEGATIVE_SUPPORT_FIELD);
-        configureSolrCore(config, SOLR_CORE, engineName + "-model");
+        acceptedLanguageSet = new HashSet<String>(acceptedLanguages);
+        precisionField = getRequiredStringParam(config, PRECISION_FIELD, 
DEFAULT_PRECISION_FIELD);
+        recallField = getRequiredStringParam(config, RECALL_FIELD, 
DEFAULT_RECALL_FIELD);
+        modelUpdateDateField = getRequiredStringParam(config, 
MODEL_UPDATE_DATE_FIELD, DEFAULT_MODEL_UPDATE_DATE_FIELD);
+        modelEvaluationDateField = getRequiredStringParam(config, 
MODEL_EVALUATION_DATE_FIELD, DEFAULT_MODEL_EVALUATION_DATE_FIELD);
+        falsePositivesField = getRequiredStringParam(config, 
FALSE_POSITIVES_FIELD, DEFAULT_FALSE_POSITIVES_FIELD);
+        falseNegativesField = getRequiredStringParam(config, 
FALSE_NEGATIVES_FIELD, DEFAULT_FALSE_NEGATIVES_FIELD);
+        positiveSupportField = getRequiredStringParam(config, 
POSITIVE_SUPPORT_FIELD, DEFAULT_POSITIVE_SUPPORT_FIELD);
+        negativeSupportField = getRequiredStringParam(config, 
NEGATIVE_SUPPORT_FIELD, DEFAULT_NEGATIVE_SUPPORT_FIELD);
+        configureSolrCore(config, SOLR_CORE, engineName + 
"-model",SOLR_CORE_CONFIG);
 
         // optional fields, can be null
-        broaderField = (String) config.get(BROADER_FIELD);
-        primaryTopicUriField = (String) config.get(PRIMARY_TOPIC_URI_FIELD);
+        broaderField = getRequiredStringParam(config, BROADER_FIELD, 
DEFAULT_BROADER_FIELD);
+        primaryTopicUriField = getRequiredStringParam(config, 
PRIMARY_TOPIC_URI_FIELD, DEFAULT_PRIMARY_TOPIC_URI_FIELD);
         trainingSetId = (String) config.get(TRAINING_SET_ID);
         Object orderParamValue = config.get(ORDER);
-        if (orderParamValue != null) {
-            order = (Integer) orderParamValue;
+        if (orderParamValue instanceof Number) {
+            order = ((Number) orderParamValue).intValue();
+        } else if(orderParamValue != null){
+            try {
+                Integer.parseInt(orderParamValue.toString());
+            }catch (NumberFormatException e) {
+                throw new ConfigurationException(ORDER, "The configured 
EnhancementEngine "
+                    + "order MUST BE an Intever value!",e);
+            }
+        } else {
+            order = DEFAULT_ENGINE_ORDER;
         }
     }
 
     @Override
     public int canEnhance(ContentItem ci) throws EngineException {
         if (ContentItemHelper.getBlob(ci, SUPPORTED_MIMETYPES) != null && 
getActiveSolrServer() != null) {
-            return ENHANCE_SYNCHRONOUS;
+            String language = EnhancementEngineHelper.getLanguage(ci);
+            if(acceptedLanguageSet.isEmpty() || 
acceptedLanguageSet.contains(language) ||
+                    acceptedLanguageSet.contains("")){
+                return ENHANCE_SYNCHRONOUS;
+            } else {
+                return CANNOT_ENHANCE;
+            }
         } else {
             return CANNOT_ENHANCE;
         }
@@ -421,6 +486,12 @@ public class TopicClassificationEngine e
                             + "') -> this indicates that canEnhance was"
                             + "NOT called and indicates a bug in the used 
EnhancementJobManager!");
         }
+        String language = EnhancementEngineHelper.getLanguage(ci);
+        if(!(acceptedLanguageSet.isEmpty() || 
acceptedLanguageSet.contains(language) ||
+                acceptedLanguageSet.contains(""))){
+            throw new IllegalStateException("The language '"+language+"' of 
the ContentItem is not configured as "
+                +" active for this Engine (active: "+acceptedLanguageSet+").");
+        }
         String text;
         try {
             text = ContentItemHelper.getText(contentPart.getValue());
@@ -551,7 +622,7 @@ public class TopicClassificationEngine e
         List<TopicSuggestion> suggestedTopics = new 
ArrayList<TopicSuggestion>(MAX_SUGGESTIONS * 3);
         SolrServer solrServer = getActiveSolrServer();
         SolrQuery query = new SolrQuery();
-        query.setQueryType("/" + MoreLikeThisParams.MLT);
+        query.setRequestHandler("/" + MoreLikeThisParams.MLT);
         query.setFilterQueries(entryTypeField + ":" + MODEL_ENTRY);
         query.set(MoreLikeThisParams.MATCH_INCLUDE, false);
         query.set(MoreLikeThisParams.MIN_DOC_FREQ, 1);
@@ -823,7 +894,15 @@ public class TopicClassificationEngine e
             return trainingSet;
         }
         if (trainingSetTracker != null) {
-            return (TrainingSet) trainingSetTracker.getService();
+            TrainingSet trainingsSet = (TrainingSet) 
trainingSetTracker.getService();
+            if(trainingsSet == null){
+                for(int i=0; i < 5 && trainingsSet == null; i++){
+                    try {
+                        trainingsSet = (TrainingSet) 
trainingSetTracker.waitForService(1000);
+                    } catch (InterruptedException e) {/*ignore*/}
+                }
+            }
+            return trainingsSet;
         }
         return null;
     }
@@ -1023,13 +1102,14 @@ public class TopicClassificationEngine e
         cvFoldCount = foldCount;
     }
 
-    protected Dictionary<String,Object> getCanonicalConfiguration(Object 
server) {
+    protected Dictionary<String,Object> getCanonicalConfiguration(Object 
server, Object coreConfig) {
         Hashtable<String,Object> config = new Hashtable<String,Object>();
         config.put(EnhancementEngine.PROPERTY_NAME, engineName + 
"-evaluation");
         config.put(TopicClassificationEngine.ENTRY_ID_FIELD, "entry_id");
         config.put(TopicClassificationEngine.ENTRY_TYPE_FIELD, "entry_type");
         config.put(TopicClassificationEngine.MODEL_ENTRY_ID_FIELD, 
"model_entry_id");
         config.put(TopicClassificationEngine.SOLR_CORE, server);
+        config.put(TopicClassificationEngine.SOLR_CORE_CONFIG, coreConfig);
         config.put(TopicClassificationEngine.CONCEPT_URI_FIELD, "concept");
         config.put(TopicClassificationEngine.PRIMARY_TOPIC_URI_FIELD, 
"primary_topic");
         config.put(TopicClassificationEngine.SIMILARTITY_FIELD, 
"classifier_features");
@@ -1085,12 +1165,8 @@ public class TopicClassificationEngine e
         return updatedTopics;
     }
 
-    protected int performCVFold(int cvFoldIndex,
-                                int cvFoldCount,
-                                int cvIterations,
-                                boolean incremental) throws 
ConfigurationException,
-                                                    TrainingSetException,
-                                                    ClassifierException {
+    protected int performCVFold(int cvFoldIndex, int cvFoldCount, int 
cvIterations, boolean incremental)
+            throws ConfigurationException, TrainingSetException, 
ClassifierException {
 
         cvIterations = cvIterations <= 0 ? cvFoldCount : cvFoldCount;
         log.info(String.format("Performing evaluation %d-fold CV iteration 
%d/%d on classifier %s",
@@ -1102,7 +1178,9 @@ public class TopicClassificationEngine e
                 // OSGi setup: the evaluation server will be generated 
automatically using the
                 // managedSolrServer
                 classifier.bindManagedSolrServer(managedSolrServer);
-                classifier.activate(context, 
getCanonicalConfiguration(engineName + "-evaluation"));
+                classifier.activate(context, getCanonicalConfiguration(
+                    engineName + "-evaluation", //TODO: maybe we should use 
the SolrCoreName instead
+                    solrCoreConfig));
             } else {
                 if(__evaluationServer == null){
                     __evaluationServerDir = new 
File(embeddedSolrServerDir,engineName + "-evaluation");
@@ -1112,7 +1190,7 @@ public class TopicClassificationEngine e
                     __evaluationServer = 
EmbeddedSolrHelper.makeEmbeddedSolrServer(__evaluationServerDir,
                         "evaluationclassifierserver", "default-topic-model", 
"default-topic-model");
                 }
-                
classifier.configure(getCanonicalConfiguration(__evaluationServer));
+                
classifier.configure(getCanonicalConfiguration(__evaluationServer,solrCoreConfig));
             }
         } catch (Exception e) {
             throw new ClassifierException(e);

Modified: 
stanbol/trunk/enhancement-engines/topic/engine/src/main/java/org/apache/stanbol/enhancer/topic/ConfiguredSolrCoreTracker.java
URL: 
http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/topic/engine/src/main/java/org/apache/stanbol/enhancer/topic/ConfiguredSolrCoreTracker.java?rev=1489728&r1=1489727&r2=1489728&view=diff
==============================================================================
--- 
stanbol/trunk/enhancement-engines/topic/engine/src/main/java/org/apache/stanbol/enhancer/topic/ConfiguredSolrCoreTracker.java
 (original)
+++ 
stanbol/trunk/enhancement-engines/topic/engine/src/main/java/org/apache/stanbol/enhancer/topic/ConfiguredSolrCoreTracker.java
 Wed Jun  5 07:23:15 2013
@@ -17,20 +17,22 @@
 package org.apache.stanbol.enhancer.topic;
 
 import java.io.IOException;
-import java.net.URL;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Dictionary;
 import java.util.List;
 
-import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
 import org.apache.solr.client.solrj.SolrServer;
 import org.apache.stanbol.commons.solr.IndexReference;
 import org.apache.stanbol.commons.solr.RegisteredSolrServerTracker;
 import org.apache.stanbol.commons.solr.managed.IndexMetadata;
+import org.apache.stanbol.commons.solr.managed.ManagedIndexState;
 import org.apache.stanbol.commons.solr.managed.ManagedSolrServer;
+import org.apache.stanbol.enhancer.engine.topic.TopicClassificationEngine;
 import org.osgi.service.cm.ConfigurationException;
 import org.osgi.service.component.ComponentContext;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 import org.xml.sax.SAXException;
 
 /**
@@ -38,6 +40,8 @@ import org.xml.sax.SAXException;
  */
 public abstract class ConfiguredSolrCoreTracker {
 
+    protected final Logger log = LoggerFactory.getLogger(getClass());
+    
     protected ManagedSolrServer managedSolrServer;
 
     protected String solrCoreId;
@@ -49,7 +53,9 @@ public abstract class ConfiguredSolrCore
 
     protected ComponentContext context;
 
-    protected String indexArchiveName;
+    protected String solrCoreConfig;
+
+    //protected String indexArchiveName;
 
     abstract public void configure(Dictionary<String,Object> config) throws 
ConfigurationException;
 
@@ -93,7 +99,22 @@ public abstract class ConfiguredSolrCore
      *         tracker.
      */
     public SolrServer getActiveSolrServer() {
-        SolrServer result = solrServer != null ? solrServer : 
indexTracker.getService();
+        SolrServer result;
+        if(solrServer != null){
+            result = solrServer;
+        } else {
+            result = indexTracker.getService();
+            if(result == null){
+                //try to wait for the server (mainly because the evaluation
+                //server is created on demand and will need some time to be
+                //initialised).
+                for(int i = 0; i < 5 && result == null; i++){
+                    try {
+                        result = (SolrServer) 
indexTracker.waitForService(1000);
+                    } catch (InterruptedException e) {/* ignore */ }
+                }
+            }
+        }
         if (result == null) {
             if (solrCoreId != null) {
                 throw new RuntimeException("No Solr Core registered with id: " 
+ solrCoreId);
@@ -105,27 +126,32 @@ public abstract class ConfiguredSolrCore
     }
 
     protected void configureSolrCore(Dictionary<String,Object> config,
-                                     String solrCoreProperty,
-                                     String defaultCoreId) throws 
ConfigurationException {
+            String solrCoreProperty, String defaultCoreId,
+            String solrCoreConfigProperty) 
+                    throws ConfigurationException {
         Object solrCoreInfo = config.get(solrCoreProperty);
         if (solrCoreInfo instanceof SolrServer) {
             // Bind a fixed Solr server client instead of doing dynamic OSGi 
lookup using the service tracker.
             // This can be useful both for unit-testing .
             solrServer = (SolrServer) config.get(solrCoreProperty);
+            solrCoreConfig = 
TopicClassificationEngine.DEFAULT_SOLR_CORE_CONFIG;
         } else {
-            if (solrCoreInfo != null && 
!solrCoreInfo.toString().trim().isEmpty()) {
-                this.solrCoreId = solrCoreInfo.toString();
-            } else {
-                this.solrCoreId = defaultCoreId;
-            }
             if (context == null) {
                 throw new ConfigurationException(solrCoreProperty,
                         solrCoreProperty + " should be a SolrServer instance 
for using"
                                 + " the engine without any OSGi context. Got: 
" + solrCoreId);
             }
+            if (solrCoreInfo != null && 
!solrCoreInfo.toString().trim().isEmpty()) {
+                this.solrCoreId = solrCoreInfo.toString().trim();
+            } else {
+                this.solrCoreId = defaultCoreId;
+            }
+            solrCoreConfig = getRequiredStringParam(config, 
solrCoreConfigProperty, 
+                this.solrCoreId + ".solrindex.zip");
             try {
                 IndexReference indexReference = 
IndexReference.parse(solrCoreId);
-                indexReference = checkInitSolrIndex(indexReference);
+                //String configName = getRequiredStringParam(config, 
SOLR_CONFIG, defaultValue)
+                indexReference = checkInitSolrIndex(indexReference, 
solrCoreConfig);
                 // track the solr core OSGi updates
                 indexTracker = new 
RegisteredSolrServerTracker(context.getBundleContext(), indexReference);
                 indexTracker.open();
@@ -134,30 +160,61 @@ public abstract class ConfiguredSolrCore
             }
         }
     }
-
-    protected IndexReference checkInitSolrIndex(IndexReference indexReference) 
throws IOException,
-                                                                              
ConfigurationException,
-                                                                              
SAXException {
+    /**
+     * Checks if the SolrIndex is available and if not it tries to initialise 
it
+     * @param indexReference the SolrCore reference
+     * @param solrCoreConfig the name of the SolrIndex configuration 
({name}.solrindex.zip)
+     * @return
+     * @throws IOException
+     * @throws ConfigurationException
+     * @throws SAXException
+     */
+    protected IndexReference checkInitSolrIndex(IndexReference indexReference, 
String solrCoreConfig) 
+            throws IOException, ConfigurationException, SAXException {
         // if the solr core is managed, check that the index is properly 
activated
         if (managedSolrServer != null && 
indexReference.checkServer(managedSolrServer.getServerName())
-            && context != null) {
+            && context != null && solrCoreConfig != null) {
+            log.info(" > check/init index {} on ManagedSolrServer {}", 
indexReference, managedSolrServer.getServerName());
             String indexName = indexReference.getIndex();
-            IndexMetadata indexMetadata = 
managedSolrServer.getIndexMetadata(indexName);
-            if (indexMetadata == null) {
-                // TODO: debug the DataFileProvider init race conditions 
instead
-                // indexMetadata = 
managedSolrServer.createSolrIndex(indexName, indexArchiveName, null);
-                URL archiveUrl = context.getBundleContext().getBundle()
-                        .getEntry("/data-files/" + indexArchiveName + 
".solrindex.zip");
-                if (archiveUrl == null) {
-                    throw new ConfigurationException(solrCoreId, "Could not 
find index archive for "
-                                                                 + 
indexArchiveName);
+            final IndexMetadata indexMetadata;
+            ManagedIndexState indexState = 
managedSolrServer.getIndexState(indexName);
+            if(indexState == null){
+                if(solrCoreConfig.indexOf(".solrindex.") < 0){ //if the suffix 
is missing
+                    solrCoreConfig = solrCoreConfig + ".solrindex.zip"; 
//append it
+                }
+                log.info("Create SolrCore {} (config: {}) on ManagedSolrServer 
{} ...",
+                    new 
Object[]{indexName,solrCoreConfig,managedSolrServer.getServerName()});
+                indexMetadata = managedSolrServer.createSolrIndex(indexName, 
+                    solrCoreConfig, null);
+                if(indexMetadata != null)
+                log.info("  ... created {}", 
indexMetadata.getIndexReference());
+            } else {
+                indexMetadata = managedSolrServer.getIndexMetadata(indexName);
+                if(indexState != ManagedIndexState.ACTIVE){
+                    log.info("  ... activate {}", 
indexMetadata.getIndexReference());
+                    managedSolrServer.activateIndex(indexName);
+                } else {
+                    log.info("  ... index {} already active", 
indexMetadata.getIndexReference());
                 }
-                ZipArchiveInputStream zis = new 
ZipArchiveInputStream(archiveUrl.openStream());
-                indexMetadata = managedSolrServer.updateIndex(indexName, zis, 
indexArchiveName);
-            }
-            if (!indexMetadata.isActive()) {
-                managedSolrServer.activateIndex(indexName);
             }
+//            IndexMetadata indexMetadata = 
managedSolrServer.getIndexMetadata(indexName);
+//            if (indexMetadata == null) {
+//                // TODO: debug the DataFileProvider init race conditions 
instead
+//                // indexMetadata = 
managedSolrServer.createSolrIndex(indexName, indexArchiveName, null);
+//                
dfp.getInputStream(context.getBundleContext().getBundle().getSymbolicName(), 
+//                    indexArchiveName + ".solrindex.zip", null);
+//                URL archiveUrl = context.getBundleContext().getBundle()
+//                        .getEntry("/data-files/" + indexArchiveName + 
".solrindex.zip");
+//                if (archiveUrl == null) {
+//                    throw new ConfigurationException(solrCoreId, "Could not 
find index archive for "
+//                                                                 + 
indexArchiveName);
+//                }
+//                ZipArchiveInputStream zis = new 
ZipArchiveInputStream(archiveUrl.openStream());
+//                indexMetadata = managedSolrServer.updateIndex(indexName, 
zis, indexArchiveName);
+//            }
+//            if (!indexMetadata.isActive()) {
+//                managedSolrServer.activateIndex(indexName);
+//            }
             indexReference = indexMetadata.getIndexReference();
         }
         return indexReference;

Modified: 
stanbol/trunk/enhancement-engines/topic/engine/src/main/java/org/apache/stanbol/enhancer/topic/training/SolrTrainingSet.java
URL: 
http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/topic/engine/src/main/java/org/apache/stanbol/enhancer/topic/training/SolrTrainingSet.java?rev=1489728&r1=1489727&r2=1489728&view=diff
==============================================================================
--- 
stanbol/trunk/enhancement-engines/topic/engine/src/main/java/org/apache/stanbol/enhancer/topic/training/SolrTrainingSet.java
 (original)
+++ 
stanbol/trunk/enhancement-engines/topic/engine/src/main/java/org/apache/stanbol/enhancer/topic/training/SolrTrainingSet.java
 Wed Jun  5 07:23:15 2013
@@ -43,6 +43,7 @@ import org.apache.solr.client.solrj.util
 import org.apache.solr.common.SolrDocument;
 import org.apache.solr.common.SolrInputDocument;
 import org.apache.stanbol.commons.solr.managed.ManagedSolrServer;
+import 
org.apache.stanbol.commons.stanboltools.datafileprovider.DataFileProvider;
 import org.apache.stanbol.enhancer.topic.Batch;
 import org.apache.stanbol.enhancer.topic.ConfiguredSolrCoreTracker;
 import org.apache.stanbol.enhancer.topic.UTCTimeStamper;
@@ -59,24 +60,38 @@ import org.slf4j.LoggerFactory;
 @Component(metatype = true, immediate = true, configurationFactory = true, 
policy = ConfigurationPolicy.REQUIRE)
 @Service
 @Properties(value = {@Property(name = SolrTrainingSet.TRAINING_SET_NAME),
-                     @Property(name = SolrTrainingSet.SOLR_CORE),
-                     @Property(name = SolrTrainingSet.EXAMPLE_ID_FIELD, value 
= "id"),
-                     @Property(name = SolrTrainingSet.EXAMPLE_TEXT_FIELD, 
value = "text"),
-                     @Property(name = SolrTrainingSet.TOPICS_URI_FIELD, value 
= "topics"),
-                     @Property(name = SolrTrainingSet.MODIFICATION_DATE_FIELD, 
value = "modification_dt")})
+        @Property(name = SolrTrainingSet.SOLR_CORE),
+        @Property(name = SolrTrainingSet.SOLR_CORE_CONFIG, value = 
SolrTrainingSet.DEFAULT_SOLR_CORE_CONFIG)
+//        @Property(name = SolrTrainingSet.EXAMPLE_ID_FIELD, value = 
SolrTrainingSet.DEFAULT_EXAMPLE_ID_FIELD),
+//        @Property(name = SolrTrainingSet.EXAMPLE_TEXT_FIELD, value = 
SolrTrainingSet.DEFAULT_EXAMPLE_TEXT_FIELD),
+//        @Property(name = SolrTrainingSet.TOPICS_URI_FIELD, value = 
SolrTrainingSet.DEFAULT_TOPICS_URI_FIELD),
+//        @Property(name = SolrTrainingSet.MODIFICATION_DATE_FIELD, value = 
SolrTrainingSet.DEFAULT_MODIFICATION_DATE_FIELD)
+})
 public class SolrTrainingSet extends ConfiguredSolrCoreTracker implements 
TrainingSet {
 
     public static final String TRAINING_SET_NAME = 
"org.apache.stanbol.enhancer.topic.trainingset.id";
 
     public static final String SOLR_CORE = 
"org.apache.stanbol.enhancer.engine.topic.solrCore";
 
+    public static final String SOLR_CORE_CONFIG = 
"org.apache.stanbol.enhancer.engine.topic.solrCoreConfig";
+    
+    public static final String DEFAULT_SOLR_CORE_CONFIG = 
"default-topic-trainingset.solrindex.zip";
+
     public static final String TOPICS_URI_FIELD = 
"org.apache.stanbol.enhancer.engine.topic.topicsUriField";
+    
+    public static final String DEFAULT_TOPICS_URI_FIELD = "topics";
 
     public static final String EXAMPLE_ID_FIELD = 
"org.apache.stanbol.enhancer.engine.topic.exampleIdField";
+    
+    public static final String DEFAULT_EXAMPLE_ID_FIELD = "id";
 
     public static final String EXAMPLE_TEXT_FIELD = 
"org.apache.stanbol.enhancer.engine.topic.exampleTextField";
+    
+    public static final String DEFAULT_EXAMPLE_TEXT_FIELD = "text";
 
     public static final String MODIFICATION_DATE_FIELD = 
"org.apache.stanbol.enhancer.engine.topic.modificiationDateField";
+    
+    public static final String DEFAULT_MODIFICATION_DATE_FIELD = 
"modification_dt";
 
     @SuppressWarnings("unused")
     private static final Logger log = 
LoggerFactory.getLogger(SolrTrainingSet.class);
@@ -96,14 +111,13 @@ public class SolrTrainingSet extends Con
 
     @Reference(cardinality = ReferenceCardinality.OPTIONAL_UNARY, bind = 
"bindManagedSolrServer", unbind = "unbindManagedSolrServer", strategy = 
ReferenceStrategy.EVENT, policy = ReferencePolicy.DYNAMIC)
     protected ManagedSolrServer managedSolrServer;
-
+    
     public String getName() {
         return trainingSetId;
     }
 
     @Activate
     protected void activate(ComponentContext context) throws 
ConfigurationException, InvalidSyntaxException {
-        indexArchiveName = "default-topic-trainingset";
         @SuppressWarnings("unchecked")
         Dictionary<String,Object> config = context.getProperties();
         this.context = context;
@@ -120,11 +134,11 @@ public class SolrTrainingSet extends Con
     @Override
     public void configure(Dictionary<String,Object> config) throws 
ConfigurationException {
         trainingSetId = getRequiredStringParam(config, TRAINING_SET_NAME);
-        exampleIdField = getRequiredStringParam(config, EXAMPLE_ID_FIELD);
-        exampleTextField = getRequiredStringParam(config, EXAMPLE_TEXT_FIELD);
-        topicUrisField = getRequiredStringParam(config, TOPICS_URI_FIELD);
-        modificationDateField = getRequiredStringParam(config, 
MODIFICATION_DATE_FIELD);
-        configureSolrCore(config, SOLR_CORE, trainingSetId);
+        exampleIdField = getRequiredStringParam(config, EXAMPLE_ID_FIELD, 
DEFAULT_EXAMPLE_ID_FIELD);
+        exampleTextField = getRequiredStringParam(config, EXAMPLE_TEXT_FIELD, 
DEFAULT_EXAMPLE_TEXT_FIELD);
+        topicUrisField = getRequiredStringParam(config, TOPICS_URI_FIELD, 
DEFAULT_TOPICS_URI_FIELD);
+        modificationDateField = getRequiredStringParam(config, 
MODIFICATION_DATE_FIELD, DEFAULT_MODIFICATION_DATE_FIELD);
+        configureSolrCore(config, SOLR_CORE, trainingSetId, SOLR_CORE_CONFIG);
     }
 
     public static ConfiguredSolrCoreTracker 
fromParameters(Dictionary<String,Object> config) throws ConfigurationException {

Modified: 
stanbol/trunk/enhancement-engines/topic/engine/src/main/resources/OSGI-INF/metatype/metatype.properties
URL: 
http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/topic/engine/src/main/resources/OSGI-INF/metatype/metatype.properties?rev=1489728&r1=1489727&r2=1489728&view=diff
==============================================================================
--- 
stanbol/trunk/enhancement-engines/topic/engine/src/main/resources/OSGI-INF/metatype/metatype.properties
 (original)
+++ 
stanbol/trunk/enhancement-engines/topic/engine/src/main/resources/OSGI-INF/metatype/metatype.properties
 Wed Jun  5 07:23:15 2013
@@ -42,9 +42,27 @@ same value can be executed in parallel.
 
 #org.apache.stanbol.enhancer.engine.topic.languages
 org.apache.stanbol.enhancer.engine.topic.languages.name=Languages
+org.apache.stanbol.enhancer.engine.topic.languages.description=The list of \
+supported languages (default: all)
 
 #org.apache.stanbol.enhancer.engine.topic.solrCore
 org.apache.stanbol.enhancer.engine.topic.solrCore.name=Solr Core
+org.apache.stanbol.enhancer.engine.topic.solrCore.description=The name of the \
+Solr Core (default: '{engine-name}-model'). This also supports the \
+'{server-name}:{core-name}' syntax.
+
+#org.apache.stanbol.enhancer.engine.topic.solrCoreConfig
+org.apache.stanbol.enhancer.engine.topic.solrCoreConfig.name=Solr Core Config
+org.apache.stanbol.enhancer.engine.topic.solrCoreConfig.description=Allows to \
+specify the Solr Core Configuration used by the Topic Classification instance. 
\
+The file with this name is loaded via the DataFileProvider. It may also 
contain \
+a pre-trained model. In that case the 
+
+#org.apache.stanbol.enhancer.engine.topic.trainingSetId
+org.apache.stanbol.enhancer.engine.topic.trainingSetId.name=Training Set
+org.apache.stanbol.enhancer.engine.topic.trainingSetId.description=The name of 
\
+the Training Set used for this Topic Classification engine. If not specified 
the \
+model of this Engine will be read-only.
 
 #org.apache.stanbol.enhancer.engine.topic.entryIdField
 org.apache.stanbol.enhancer.engine.topic.entryIdField.name=ID Field
@@ -93,3 +111,32 @@ org.apache.stanbol.enhancer.engine.topic
 
 #org.apache.stanbol.enhancer.engine.topic.negativeSupportField
 org.apache.stanbol.enhancer.engine.topic.negativeSupportField.name=Negative 
Support Field
+
+org.apache.stanbol.enhancer.engine.topic.conceptUriField.name="Concept URI 
Field"
+
+org.apache.stanbol.enhancer.engine.topic.primaryTopicField.name="Primary Topic 
Field"
+
+# Configuration Properties for the Solr Training Set
+# org.apache.stanbol.enhancer.topic.training.SolrTrainingSet
+org.apache.stanbol.enhancer.topic.training.SolrTrainingSet.name=Apache Stanbol 
\
+Enhancer: Solr based Topic Classifier TrainingSet
+org.apache.stanbol.enhancer.topic.training.SolrTrainingSet.description=Solr \
+based implementation of a TrainingSet for Topic Classifiers
+
+
+org.apache.stanbol.enhancer.topic.trainingset.id.name=Training Set Name
+org.apache.stanbol.enhancer.engine.topic.exampleIdField.name=Document ID Field
+org.apache.stanbol.enhancer.engine.topic.exampleIdField.description=The Solr \
+field name used to store the ID of the training document
+org.apache.stanbol.enhancer.engine.topic.exampleIdField.name=Document Text 
Field
+org.apache.stanbol.enhancer.engine.topic.exampleIdField.description=The Solr \
+field name used to store the text of the training document
+org.apache.stanbol.enhancer.engine.topic.exampleIdField.name=Topic URI Field
+org.apache.stanbol.enhancer.engine.topic.exampleIdField.description=The Solr \
+field name used to store the URIs of Concepts the training document is 
assigned to
+org.apache.stanbol.enhancer.engine.topic.exampleIdField.name=Modification Date 
Field
+org.apache.stanbol.enhancer.engine.topic.exampleIdField.description=The Solr \
+field name used to store the last change to the training document
+
+
+

Modified: 
stanbol/trunk/enhancement-engines/topic/engine/src/main/resources/default-topic-model/conf/schema.xml
URL: 
http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/topic/engine/src/main/resources/default-topic-model/conf/schema.xml?rev=1489728&r1=1489727&r2=1489728&view=diff
==============================================================================
--- 
stanbol/trunk/enhancement-engines/topic/engine/src/main/resources/default-topic-model/conf/schema.xml
 (original)
+++ 
stanbol/trunk/enhancement-engines/topic/engine/src/main/resources/default-topic-model/conf/schema.xml
 Wed Jun  5 07:23:15 2013
@@ -9,7 +9,7 @@
   License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS 
   OF ANY KIND, either express or implied. See the License for the specific 
   language governing permissions and limitations under the License. -->
-<schema name="example" version="1.3">
+<schema name="default-topic-model" version="1.3">
   <types>
     <fieldType name="uuid" class="solr.UUIDField" indexed="true" />
 


Reply via email to