Author: sfermigier
Date: Sat Mar 5 22:10:17 2011
New Revision: 1078371
URL: http://svn.apache.org/viewvc?rev=1078371&view=rev
Log:
Format and simplify code a bit.
Modified:
incubator/stanbol/trunk/enhancer/autotagging/src/main/java/org/apache/stanbol/autotagging/Autotagger.java
incubator/stanbol/trunk/enhancer/autotagging/src/main/java/org/apache/stanbol/autotagging/jena/ModelIndexer.java
incubator/stanbol/trunk/enhancer/autotagging/src/main/java/org/apache/stanbol/autotagging/jena/ModelResampler.java
incubator/stanbol/trunk/enhancer/autotagging/src/test/java/org/apache/stanbol/autotagging/AutotaggingTest.java
incubator/stanbol/trunk/enhancer/autotagging/src/test/java/org/apache/stanbol/autotagging/ModelResamplerTest.java
incubator/stanbol/trunk/enhancer/clerezza/org.apache.stanbol.enhancer.clerezza/src/main/java/org/apache/stanbol/enhancer/clerezza/ClerezzaStore.java
incubator/stanbol/trunk/enhancer/engines/autotagging/src/main/java/org/apache/stanbol/enhancer/engines/autotagging/impl/AutotaggingServlet.java
incubator/stanbol/trunk/enhancer/engines/autotagging/src/main/java/org/apache/stanbol/enhancer/engines/autotagging/impl/ConfiguredAutotaggerProvider.java
incubator/stanbol/trunk/enhancer/engines/autotagging/src/main/java/org/apache/stanbol/enhancer/engines/autotagging/impl/EnhancementRDFUtils.java
incubator/stanbol/trunk/enhancer/engines/autotagging/src/main/java/org/apache/stanbol/enhancer/engines/autotagging/impl/EntityMentionEnhancementEngine.java
incubator/stanbol/trunk/enhancer/engines/autotagging/src/main/java/org/apache/stanbol/enhancer/engines/autotagging/impl/RelatedTopicEnhancementEngine.java
incubator/stanbol/trunk/enhancer/engines/opencalais/src/main/java/org/apache/stanbol/enhancer/engines/opencalais/impl/OpenCalaisEngine.java
incubator/stanbol/trunk/enhancer/integration-tests/src/test/java/org/apache/stanbol/enhancer/it/StatelessEngineTest.java
incubator/stanbol/trunk/enhancer/stores/jcrstore/src/main/java/org/apache/stanbol/enhancer/store/jcr/JCRContentItem.java
Modified:
incubator/stanbol/trunk/enhancer/autotagging/src/main/java/org/apache/stanbol/autotagging/Autotagger.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/autotagging/src/main/java/org/apache/stanbol/autotagging/Autotagger.java?rev=1078371&r1=1078370&r2=1078371&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/autotagging/src/main/java/org/apache/stanbol/autotagging/Autotagger.java
(original)
+++
incubator/stanbol/trunk/enhancer/autotagging/src/main/java/org/apache/stanbol/autotagging/Autotagger.java
Sat Mar 5 22:10:17 2011
@@ -43,7 +43,7 @@ import org.slf4j.LoggerFactory;
*/
public class Autotagger {
- private final Logger log = LoggerFactory.getLogger(getClass());
+ private static final Logger log =
LoggerFactory.getLogger(Autotagger.class);
public final String typeFieldName =
"http://www.w3.org/1999/02/22-rdf-syntax-ns#type";
Modified:
incubator/stanbol/trunk/enhancer/autotagging/src/main/java/org/apache/stanbol/autotagging/jena/ModelIndexer.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/autotagging/src/main/java/org/apache/stanbol/autotagging/jena/ModelIndexer.java?rev=1078371&r1=1078370&r2=1078371&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/autotagging/src/main/java/org/apache/stanbol/autotagging/jena/ModelIndexer.java
(original)
+++
incubator/stanbol/trunk/enhancer/autotagging/src/main/java/org/apache/stanbol/autotagging/jena/ModelIndexer.java
Sat Mar 5 22:10:17 2011
@@ -42,14 +42,12 @@ import com.hp.hpl.jena.tdb.TDBFactory;
*/
public class ModelIndexer implements Closeable {
- private static final Logger log =
LoggerFactory.getLogger(ModelIndexer.class);
-
public static final String URI_FIELD = "uri";
-
public static final String DEFAULT_DBPEDIA_SAMPLE =
"dbpedia/dbpedia-sample-10000.nt";
-
public static final String POPULARITY_SCORE_PROPERTY =
"http://www.iksproject.eu/ns/popularity-score";
+ private static final Logger log =
LoggerFactory.getLogger(ModelIndexer.class);
+
private final IndexWriter iwriter;
private final Model model;
Modified:
incubator/stanbol/trunk/enhancer/autotagging/src/main/java/org/apache/stanbol/autotagging/jena/ModelResampler.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/autotagging/src/main/java/org/apache/stanbol/autotagging/jena/ModelResampler.java?rev=1078371&r1=1078370&r2=1078371&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/autotagging/src/main/java/org/apache/stanbol/autotagging/jena/ModelResampler.java
(original)
+++
incubator/stanbol/trunk/enhancer/autotagging/src/main/java/org/apache/stanbol/autotagging/jena/ModelResampler.java
Sat Mar 5 22:10:17 2011
@@ -46,7 +46,7 @@ import com.hp.hpl.jena.tdb.TDBFactory;
*/
public class ModelResampler {
- private final Logger log = LoggerFactory.getLogger(getClass());
+ private static final Logger log =
LoggerFactory.getLogger(ModelResampler.class);
protected int maxTopResources = 10000;
Modified:
incubator/stanbol/trunk/enhancer/autotagging/src/test/java/org/apache/stanbol/autotagging/AutotaggingTest.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/autotagging/src/test/java/org/apache/stanbol/autotagging/AutotaggingTest.java?rev=1078371&r1=1078370&r2=1078371&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/autotagging/src/test/java/org/apache/stanbol/autotagging/AutotaggingTest.java
(original)
+++
incubator/stanbol/trunk/enhancer/autotagging/src/test/java/org/apache/stanbol/autotagging/AutotaggingTest.java
Sat Mar 5 22:10:17 2011
@@ -37,6 +37,8 @@ import com.hp.hpl.jena.rdf.model.ModelFa
public class AutotaggingTest {
+ protected File defaultIndexDirectory;
+
private Model model;
private RAMDirectory ramDirectory;
@@ -45,8 +47,6 @@ public class AutotaggingTest {
private IndexWriter writer;
- protected File defaultIndexDirectory;
-
public static InputStream getResource(String name) {
InputStream stream =
Thread.currentThread().getContextClassLoader().getResourceAsStream(
name);
@@ -94,8 +94,10 @@ public class AutotaggingTest {
Document hitDoc = isearcher.doc(hits[0].doc);
assertEquals("http://dbpedia.org/resource/%21Action_Pact%21",
hitDoc.get(ModelIndexer.URI_FIELD));
+
String[] types =
hitDoc.getValues("http://www.w3.org/1999/02/22-rdf-syntax-ns#type");
assertEquals(3, types.length);
+
isearcher.close();
}
Modified:
incubator/stanbol/trunk/enhancer/autotagging/src/test/java/org/apache/stanbol/autotagging/ModelResamplerTest.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/autotagging/src/test/java/org/apache/stanbol/autotagging/ModelResamplerTest.java?rev=1078371&r1=1078370&r2=1078371&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/autotagging/src/test/java/org/apache/stanbol/autotagging/ModelResamplerTest.java
(original)
+++
incubator/stanbol/trunk/enhancer/autotagging/src/test/java/org/apache/stanbol/autotagging/ModelResamplerTest.java
Sat Mar 5 22:10:17 2011
@@ -69,7 +69,6 @@ public class ModelResamplerTest {
srcModel.add(r[8], p, r[4]);
srcModel.add(r[4], p, r[8]);
-
}
@Test
@@ -85,8 +84,10 @@ public class ModelResamplerTest {
assertEquals(1.0, r1Score, 0.01);
assertTrue(samplerIterator.hasNext());
+
ri = samplerIterator.next();
assertEquals(r[2], ri.resource);
+
double r2Score = ri.score.doubleValue();
assertEquals(0.86, r2Score, 0.01);
Modified:
incubator/stanbol/trunk/enhancer/clerezza/org.apache.stanbol.enhancer.clerezza/src/main/java/org/apache/stanbol/enhancer/clerezza/ClerezzaStore.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/clerezza/org.apache.stanbol.enhancer.clerezza/src/main/java/org/apache/stanbol/enhancer/clerezza/ClerezzaStore.java?rev=1078371&r1=1078370&r2=1078371&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/clerezza/org.apache.stanbol.enhancer.clerezza/src/main/java/org/apache/stanbol/enhancer/clerezza/ClerezzaStore.java
(original)
+++
incubator/stanbol/trunk/enhancer/clerezza/org.apache.stanbol.enhancer.clerezza/src/main/java/org/apache/stanbol/enhancer/clerezza/ClerezzaStore.java
Sat Mar 5 22:10:17 2011
@@ -36,6 +36,7 @@ import org.apache.stanbol.enhancer.servi
/**
* test
+ *
* @author andreas
*/
@Component
@@ -44,8 +45,10 @@ public class ClerezzaStore implements St
@Reference
DiscobitsHandler handler;
+
@Reference
ContentGraphProvider cgProvider;
+
@Reference
TcManager tcManager;
@@ -59,7 +62,7 @@ public class ClerezzaStore implements St
UriRef uriRef = new UriRef(id);
MGraph metadataGraph;
try {
- metadataGraph = tcManager.createMGraph(uriRef);
+ metadataGraph = tcManager.createMGraph(uriRef);
} catch (EntityAlreadyExistsException ex) {
return null;
}
@@ -80,8 +83,8 @@ public class ClerezzaStore implements St
UriRef uriRef = new UriRef(id);
MGraph metadataGraph;
try {
- metadataGraph = tcManager.getMGraph(uriRef);
- } catch(NoSuchEntityException ex) {
+ metadataGraph = tcManager.getMGraph(uriRef);
+ } catch (NoSuchEntityException ex) {
throw new IllegalArgumentException("Is not a content item");
}
ContentItem contentItem = new ClerezzaContentItem(new GraphNode(uriRef,
Modified:
incubator/stanbol/trunk/enhancer/engines/autotagging/src/main/java/org/apache/stanbol/enhancer/engines/autotagging/impl/AutotaggingServlet.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/autotagging/src/main/java/org/apache/stanbol/enhancer/engines/autotagging/impl/AutotaggingServlet.java?rev=1078371&r1=1078370&r2=1078371&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/autotagging/src/main/java/org/apache/stanbol/enhancer/engines/autotagging/impl/AutotaggingServlet.java
(original)
+++
incubator/stanbol/trunk/enhancer/engines/autotagging/src/main/java/org/apache/stanbol/enhancer/engines/autotagging/impl/AutotaggingServlet.java
Sat Mar 5 22:10:17 2011
@@ -66,20 +66,17 @@ public class AutotaggingServlet extends
@Override
/** Create a ContentItem and queue for enhancement */
- protected void doPost(HttpServletRequest req, HttpServletResponse resp)
- throws ServletException, IOException {
- doGet(req,resp);
+ protected void doPost(HttpServletRequest req, HttpServletResponse resp)
throws ServletException, IOException {
+ doGet(req, resp);
}
-
@Override
- protected void doGet(HttpServletRequest req, HttpServletResponse resp)
- throws ServletException, IOException {
+ protected void doGet(HttpServletRequest req, HttpServletResponse resp)
throws ServletException, IOException {
String labelParam = getEntityName(req);
String typeParam = getEntityType(req);
Autotagger autotagger = provider.getAutotagger();
List<TagInfo> suggestions;
- if(typeParam == null){
+ if (typeParam == null) {
suggestions = autotagger.suggest(labelParam);
} else {
suggestions = autotagger.suggestForType(labelParam, typeParam);
@@ -89,19 +86,19 @@ public class AutotaggingServlet extends
resp.setCharacterEncoding("UTF-8");
Writer writer = resp.getWriter();
JSONObject suggestionList = new JSONObject();
- List<Map<String, Object>> suggestionObjects = new
ArrayList<Map<String,Object>>(suggestions.size());
- for (TagInfo suggestion : suggestions){
- Map<String,Object> map = new HashMap<String, Object>();
+ List<Map<String, Object>> suggestionObjects = new
ArrayList<Map<String, Object>>(suggestions.size());
+ for (TagInfo suggestion : suggestions) {
+ Map<String, Object> map = new HashMap<String, Object>();
map.put("uri", suggestion.getId());
- map.put("label",suggestion.getLabel());
- map.put("type",suggestion.getType());
+ map.put("label", suggestion.getLabel());
+ map.put("type", suggestion.getType());
map.put("confidence", suggestion.getConfidence());
suggestionObjects.add(map);
}
try {
suggestionList.put("suggestion", suggestionObjects);
} catch (JSONException e) {
- log.error("Unable to encode suggestions as JSON",e);
+ log.error("Unable to encode suggestions as JSON", e);
resp.sendError(500, e.getMessage());
return;
}
@@ -111,21 +108,21 @@ public class AutotaggingServlet extends
private String getEntityName(HttpServletRequest r) {
final String result = r.getParameter("name");
if (result == null || result.length() == 0) {
- throw new IllegalArgumentException(
- "Missing Parameter name, request should include parameter
\"name\"");
+ throw new IllegalArgumentException("Missing Parameter name,
request should include parameter \"name\"");
}
return result;
}
private String getEntityType(HttpServletRequest r) {
final String result = r.getParameter("type");
-// if(result == null){
-// return null;
-// } else {
-// //convert the Type to the Ontology
-// }
+ // if(result == null){
+ // return null;
+ // } else {
+ // //convert the Type to the Ontology
+ // }
return result;
}
+
@Activate
protected void activate(ComponentContext ctx) throws Exception {
httpService.registerServlet(ALIAS, this, null, null);
Modified:
incubator/stanbol/trunk/enhancer/engines/autotagging/src/main/java/org/apache/stanbol/enhancer/engines/autotagging/impl/ConfiguredAutotaggerProvider.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/autotagging/src/main/java/org/apache/stanbol/enhancer/engines/autotagging/impl/ConfiguredAutotaggerProvider.java?rev=1078371&r1=1078370&r2=1078371&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/autotagging/src/main/java/org/apache/stanbol/enhancer/engines/autotagging/impl/ConfiguredAutotaggerProvider.java
(original)
+++
incubator/stanbol/trunk/enhancer/engines/autotagging/src/main/java/org/apache/stanbol/enhancer/engines/autotagging/impl/ConfiguredAutotaggerProvider.java
Sat Mar 5 22:10:17 2011
@@ -41,7 +41,7 @@ import org.slf4j.LoggerFactory;
@Service
public class ConfiguredAutotaggerProvider implements AutotaggerProvider {
- private final Logger log = LoggerFactory.getLogger(getClass());
+ private static final Logger log =
LoggerFactory.getLogger(ConfiguredAutotaggerProvider.class);
@Property(value = "")
public static final String LUCENE_INDEX_PATH =
"org.apache.stanbol.enhancer.engines.autotagging.indexPath";
Modified:
incubator/stanbol/trunk/enhancer/engines/autotagging/src/main/java/org/apache/stanbol/enhancer/engines/autotagging/impl/EnhancementRDFUtils.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/autotagging/src/main/java/org/apache/stanbol/enhancer/engines/autotagging/impl/EnhancementRDFUtils.java?rev=1078371&r1=1078370&r2=1078371&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/autotagging/src/main/java/org/apache/stanbol/enhancer/engines/autotagging/impl/EnhancementRDFUtils.java
(original)
+++
incubator/stanbol/trunk/enhancer/engines/autotagging/src/main/java/org/apache/stanbol/enhancer/engines/autotagging/impl/EnhancementRDFUtils.java
Sat Mar 5 22:10:17 2011
@@ -32,6 +32,7 @@ import static org.apache.stanbol.enhance
public class EnhancementRDFUtils {
+ // Utility class.
private EnhancementRDFUtils() {
}
Modified:
incubator/stanbol/trunk/enhancer/engines/autotagging/src/main/java/org/apache/stanbol/enhancer/engines/autotagging/impl/EntityMentionEnhancementEngine.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/autotagging/src/main/java/org/apache/stanbol/enhancer/engines/autotagging/impl/EntityMentionEnhancementEngine.java?rev=1078371&r1=1078370&r2=1078371&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/autotagging/src/main/java/org/apache/stanbol/enhancer/engines/autotagging/impl/EntityMentionEnhancementEngine.java
(original)
+++
incubator/stanbol/trunk/enhancer/engines/autotagging/src/main/java/org/apache/stanbol/enhancer/engines/autotagging/impl/EntityMentionEnhancementEngine.java
Sat Mar 5 22:10:17 2011
@@ -60,7 +60,7 @@ import static org.apache.stanbol.enhance
public class EntityMentionEnhancementEngine implements EnhancementEngine,
ServiceProperties {
- private final Logger log = LoggerFactory.getLogger(getClass());
+ private static final Logger log =
LoggerFactory.getLogger(EntityMentionEnhancementEngine.class);
@Reference
AutotaggerProvider autotaggerProvider;
@@ -111,7 +111,9 @@ public class EntityMentionEnhancementEng
}
}
- protected final Collection<TagInfo>
computeEntityRecommendations(Autotagger autotagger, LiteralFactory
literalFactory, MGraph graph, UriRef contentItemId, UriRef textAnnotation,
List<UriRef> subsumedAnnotations) throws IOException {
+ protected final Collection<TagInfo>
computeEntityRecommendations(Autotagger autotagger,
+ LiteralFactory literalFactory, MGraph graph, UriRef contentItemId,
UriRef textAnnotation,
+ List<UriRef> subsumedAnnotations) throws IOException {
// First get the required properties for the parsed textAnnotation
// ... and check the values
String name = EnhancementEngineHelper.getString(graph, textAnnotation,
Modified:
incubator/stanbol/trunk/enhancer/engines/autotagging/src/main/java/org/apache/stanbol/enhancer/engines/autotagging/impl/RelatedTopicEnhancementEngine.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/autotagging/src/main/java/org/apache/stanbol/enhancer/engines/autotagging/impl/RelatedTopicEnhancementEngine.java?rev=1078371&r1=1078370&r2=1078371&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/autotagging/src/main/java/org/apache/stanbol/enhancer/engines/autotagging/impl/RelatedTopicEnhancementEngine.java
(original)
+++
incubator/stanbol/trunk/enhancer/engines/autotagging/src/main/java/org/apache/stanbol/enhancer/engines/autotagging/impl/RelatedTopicEnhancementEngine.java
Sat Mar 5 22:10:17 2011
@@ -53,7 +53,7 @@ public class RelatedTopicEnhancementEngi
protected static final String TEXT_PLAIN_MIMETYPE = "text/plain";
- private final Logger log = LoggerFactory.getLogger(getClass());
+ private static final Logger log =
LoggerFactory.getLogger(RelatedTopicEnhancementEngine.class);
// TODO: make me configurable through an OSGi property
protected String type = "http://www.w3.org/2004/02/skos/core#Concept";
Modified:
incubator/stanbol/trunk/enhancer/engines/opencalais/src/main/java/org/apache/stanbol/enhancer/engines/opencalais/impl/OpenCalaisEngine.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/opencalais/src/main/java/org/apache/stanbol/enhancer/engines/opencalais/impl/OpenCalaisEngine.java?rev=1078371&r1=1078370&r2=1078371&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/opencalais/src/main/java/org/apache/stanbol/enhancer/engines/opencalais/impl/OpenCalaisEngine.java
(original)
+++
incubator/stanbol/trunk/enhancer/engines/opencalais/src/main/java/org/apache/stanbol/enhancer/engines/opencalais/impl/OpenCalaisEngine.java
Sat Mar 5 22:10:17 2011
@@ -69,12 +69,14 @@ import org.osgi.service.component.Compon
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.*;
+
/**
- * This class provides an interface to the OpenCalais service for Named Entity
Recognition.
+ * This class provides an interface to the OpenCalais service for Named Entity
Recognition.
* It uses the OpenCalais REST service with the 'paramsXML' structures for
passing
* parameters {@link
http://www.opencalais.com/documentation/calais-web-service-api/api-invocation/rest-using-paramsxml)}.
- *
+ *
* @author <a href="mailto:[email protected]">Walter Kasper</a>
*/
@@ -82,426 +84,428 @@ import org.slf4j.LoggerFactory;
@Service
public class OpenCalaisEngine implements EnhancementEngine, ServiceProperties {
- private static Logger log = LoggerFactory.getLogger(OpenCalaisEngine.class);
+ private static Logger log =
LoggerFactory.getLogger(OpenCalaisEngine.class);
+
+ /**
+ * This contains the directly supported MIME types of this enhancement
engine.
+ * For handling other mime-types the plain text must be contained in the
metadata as by Metaxa.
+ */
+ protected static final List<String> SUPPORTED_MIMETYPES =
Arrays.asList(new String[]{"text/plain", "text/html"});
+
+ /**
+ * This contains a list of languages supported by OpenCalais.
+ * If the metadata don't contain a value for the language as the value of
the {@link Property.DC_LANG property}
+ * it is left to the grace of the OpenCalais whether it accepts the text.
+ * OpenCalais uses its own language identifcation anyway.
+ */
+ protected static final List<String> SUPPORTED_LANGUAGES =
Arrays.asList(new String[]{"en", "fr", "es"});
+
+ /**
+ * The default value for the Execution of this Engine. Currently set to
+ * {@link ServiceProperties#ORDERING_EXTRACTION_ENHANCEMENT} + 10. It
should run after Metaxa and LangId.
+ */
+ public static final Integer defaultOrder =
ServiceProperties.ORDERING_EXTRACTION_ENHANCEMENT + 10;
+
+ @Property
+ public static final String LICENSE_KEY =
"org.apache.stanbol.enhancer.engines.opencalais.license";
+
+ @Property(value = "http://api.opencalais.com/enlighten/rest/")
+ public static final String CALAIS_URL_KEY =
"org.apache.stanbol.enhancer.engines.opencalais.url";
+
+ /**
+ * the URL for the Calais REST Service
+ */
+ private String calaisUrl = "http://api.opencalais.com/enlighten/rest/";
+
+ /**
+ * the license key from OpenCalais for using the service
+ */
+ private String licenseKey = null;
+
+ @Reference
+ TcManager tcManager;
- /**
- * This contains the directly supported MIME types of this enhancement
engine. For handling other mime-types the plain text must be contained in the
metadata as by Metaxa.
- */
- protected static final List<String> SUPPORTED_MIMETYPES = Arrays.asList(new
String[]{"text/plain", "text/html"});
-
- /**
- * This contains a list of languages supported by OpenCalais.
- * If the metadata don't contain a value for the language as the value of
the {@link Property.DC_LANG property}
- * it is left to the grace of the OpenCalais whether it accepts the text.
- * OpenCalais uses its own language identifcation anyway.
- */
- protected static final List<String> SUPPORTED_LANGUAGES = Arrays.asList(new
String[]{"en","fr","es"});
-
- /**
- * The default value for the Execution of this Engine. Currently set to
- * {@link ServiceProperties#ORDERING_EXTRACTION_ENHANCEMENT} + 10. It should
run after Metaxa and LangId.
- */
- public static final Integer defaultOrder =
ServiceProperties.ORDERING_EXTRACTION_ENHANCEMENT+10;
-
- @Property
- public static final String LICENSE_KEY =
"org.apache.stanbol.enhancer.engines.opencalais.license";
-
- @Property(value="http://api.opencalais.com/enlighten/rest/")
- public static final String CALAIS_URL_KEY =
"org.apache.stanbol.enhancer.engines.opencalais.url";
-
- /**
- * the URL for the Calais REST Service
- */
- private String calaisUrl = "http://api.opencalais.com/enlighten/rest/";
-
- /**
- * the license key from OpenCalais for using the service
- */
- private String licenseKey = null;
-
- @Reference
- TcManager tcManager;
-
- BundleContext bundleContext;
-
- public String getLicenseKey() {
- return licenseKey;
- }
-
- public void setLicenseKey(String licenseKey) {
- this.licenseKey = licenseKey;
- }
-
- public String getCalaisUrl() {
- return calaisUrl;
- }
-
- public void setCalaisUrl(String calaisUrl) {
- this.calaisUrl = calaisUrl;
- }
-
- public Map<String, Object> getServiceProperties() {
- // TODO Auto-generated method stub
- return Collections.unmodifiableMap(Collections.singletonMap(
- ServiceProperties.ENHANCEMENT_ENGINE_ORDERING,
- (Object) defaultOrder));
- }
-
- /**
- * {@inheritDoc}
- */
- public int canEnhance(ContentItem ci) throws EngineException {
- if (getLicenseKey() == null || getLicenseKey().trim().length()==0) {
- //do nothing if no license key is defined
- log.warn("No license key defined. The engine will not work!");
- return CANNOT_ENHANCE;
- }
- UriRef subj = new UriRef(ci.getId());
- String mimeType = ci.getMimeType().split(";",2)[0];
- if (SUPPORTED_MIMETYPES.contains(mimeType.toLowerCase())) {
- // check language
- String language = getMetadataLanguage(ci.getMetadata(),null);
- if (language != null && !SUPPORTED_LANGUAGES.contains(language)) {
- log.warn("Wrong language for Calais: {}",language);
+ BundleContext bundleContext;
+
+ public String getLicenseKey() {
+ return licenseKey;
+ }
+
+ public void setLicenseKey(String licenseKey) {
+ this.licenseKey = licenseKey;
+ }
+
+ public String getCalaisUrl() {
+ return calaisUrl;
+ }
+
+ public void setCalaisUrl(String calaisUrl) {
+ this.calaisUrl = calaisUrl;
+ }
+
+ public Map<String, Object> getServiceProperties() {
+ // TODO Auto-generated method stub
+ return Collections.unmodifiableMap(Collections.singletonMap(
+ ServiceProperties.ENHANCEMENT_ENGINE_ORDERING,
+ (Object) defaultOrder));
+ }
+
+ public int canEnhance(ContentItem ci) throws EngineException {
+ if (getLicenseKey() == null || getLicenseKey().trim().length() == 0) {
+ //do nothing if no license key is defined
+ log.warn("No license key defined. The engine will not work!");
+ return CANNOT_ENHANCE;
+ }
+ UriRef subj = new UriRef(ci.getId());
+ String mimeType = ci.getMimeType().split(";", 2)[0];
+ if (SUPPORTED_MIMETYPES.contains(mimeType.toLowerCase())) {
+ // check language
+ String language = getMetadataLanguage(ci.getMetadata(), null);
+ if (language != null && !SUPPORTED_LANGUAGES.contains(language)) {
+ log.warn("Wrong language for Calais: {}", language);
+ return CANNOT_ENHANCE;
+ }
+ return ENHANCE_SYNCHRONOUS;
+ } else {
+ // TODO: check whether the metadata graph contains the text
+ Iterator<Triple> it = ci.getMetadata().filter(subj,
NIE_PLAINTEXTCONTENT, null);
+ if (it.hasNext()) {
+ return ENHANCE_SYNCHRONOUS;
+ }
+ }
return CANNOT_ENHANCE;
- }
- return ENHANCE_SYNCHRONOUS;
}
- else {
- // TODO: check whether the metadata graph contains the text
- Iterator<Triple> it = ci.getMetadata().filter(subj,
Properties.NIE_PLAINTEXTCONTENT, null);
- if (it.hasNext()) {
- return ENHANCE_SYNCHRONOUS;
- }
- }
- return CANNOT_ENHANCE;
- }
-
- /**
- * {@inheritDoc}
- */
- public void computeEnhancements(ContentItem ci) throws EngineException {
- String text = "";
- if
(SUPPORTED_MIMETYPES.contains(ci.getMimeType().split(";",2)[0].toLowerCase())) {
- try {
- text = IOUtils.toString(ci.getStream());
- } catch (IOException e) {
- throw new InvalidContentException(this, ci, e);
- }
- }
- else {
- text = getMetadataText(ci.getMetadata(), new UriRef(ci.getId()));
- }
- if (text == null) {
- log.warn("no text found");
- return;
- }
-
- MGraph calaisModel = getCalaisAnalysis(text,ci);
- if (calaisModel != null) {
- createEnhancements(queryModel(calaisModel),ci);
- }
-
- }
-
- /**
- * This generates enhancement structures for the entities from OpenCalais
and adds them to the content item's metadata. For each entity a TextAnnotation
and an EntityAnnotation are created. An EntityAnnotation can relate to several
TextAnnotations.
- *
- * @param occs a Collection of entity information
- * @param ci the content item
- */
- public void createEnhancements(Collection<CalaisEntityOccurrence> occs,
ContentItem ci) {
- LiteralFactory literalFactory = LiteralFactory.getInstance();
- //TODO create TextEnhancement (form, start, end, type?) and
EntityAnnotation (id, name, type)
- HashMap<Resource, UriRef> entityAnnotationMap = new
HashMap<Resource,UriRef>();
- for (CalaisEntityOccurrence occ: occs) {
- UriRef textAnnotation = EnhancementEngineHelper.createTextEnhancement(
- ci, this);
- MGraph model = ci.getMetadata();
- model.add(new TripleImpl(textAnnotation, Properties.DC_TYPE, occ.type));
- model.add(new
TripleImpl(textAnnotation,Properties.ENHANCER_SELECTED_TEXT, occ.exact));
- model.add(new TripleImpl(textAnnotation,Properties.ENHANCER_START,
literalFactory.createTypedLiteral(occ.offset)));
- model.add(new TripleImpl(textAnnotation,Properties.ENHANCER_END,
literalFactory.createTypedLiteral(occ.offset + occ.length)));
- model.add(new
TripleImpl(textAnnotation,Properties.ENHANCER_SELECTED_TEXT, occ.exact));
- model.add(new
TripleImpl(textAnnotation,Properties.ENHANCER_SELECTION_CONTEXT,
literalFactory.createTypedLiteral(occ.context)));
- //create EntityAnnotation only once but add a reference to the
textAnnotation
- if (entityAnnotationMap.containsKey(occ.id)) {
- model.add(new
TripleImpl(entityAnnotationMap.get(occ.id),Properties.DC_RELATION,
textAnnotation));
- }
- else {
- UriRef entityAnnotation =
EnhancementEngineHelper.createEntityEnhancement(ci, this);
- entityAnnotationMap.put(occ.id, entityAnnotation);
- model.add(new TripleImpl(entityAnnotation,Properties.DC_RELATION,
textAnnotation));
- model.add(new
TripleImpl(entityAnnotation,Properties.ENHANCER_ENTITY_LABEL, occ.name));
- model.add(new
TripleImpl(entityAnnotation,Properties.ENHANCER_ENTITY_TYPE, occ.type));
- model.add(new
TripleImpl(entityAnnotation,Properties.ENHANCER_ENTITY_REFERENCE, occ.id));
- }
- }
- }
-
- /**
- * This retrieves the annotations from OpenCalais as RDF/XML. From that an
MGraph is created.
- * @param text the text to send to OpenCalais
- * @return an MGraph with all annotations
- * @throws EngineException
- */
- public MGraph getCalaisAnalysis(String text,ContentItem ci) throws
EngineException {
- String mimeType = ci.getMimeType().split(";",2)[0].toLowerCase();
- if (mimeType.equals("text/plain")) {
- mimeType = "text/raw";
- }
- String calaisParams = "<c:params
xmlns:c=\"http://s.opencalais.com/1/pred/\"
xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">"+
- "<c:processingDirectives c:contentType=\""+mimeType+"\" " +
- // "c:enableMetadataType=\"GenericRelations,SocialTags\" "+
- // "c:enableMetadataType=\"GenericRelations\" "+
- "c:outputFormat=\"rdf/xml\" "+
- "c:calculateRelevanceScore=\"false\" "+
- "c:omitOutputtingOriginalText=\"true\""+
- ">"+
- "</c:processingDirectives>"+
- "</c:params>";
- MGraph model = null;
- try {
- StringBuilder postParams = new StringBuilder();
- postParams
- .append("licenseID=")
- .append(URLEncoder.encode(getLicenseKey(), "UTF-8"))
- .append("&content=")
- .append(URLEncoder.encode(text, "UTF-8"))
- .append("¶msXML=")
- .append(URLEncoder.encode(calaisParams, "UTF-8"));
- // get annotations from Calais
- String calaisResult =
- doPostRequest(
- this.getCalaisUrl(), null, postParams.toString(),
- "application/x-www-form-urlencoded", "UTF-8");
- log.debug("Calais data:\n{}",calaisResult);
- // build model from Calais result
- InputStream in = new
ByteArrayInputStream(calaisResult.getBytes("utf-8"));
- model = readModel(in,"application/rdf+xml");
- } catch (UnsupportedEncodingException e) {
- throw new EngineException(e.getMessage(), e);
- } catch (IOException e) {
- throw new EngineException(e.getMessage(), e);
- }
- return model;
- }
-
- /**
- * This parses an InputStream of RDF data and produces an MGraph from them
- * @param in The InputStream of RDF data
- * @param format the format of the RDF data
- * @return the resulting MGraph or null if the RDF serialization format is
not supported by the parser
- */
- public MGraph readModel(InputStream in, String format) {
- Parser parser = Parser.getInstance();
- if (parser.getSupportedFormats().contains(format)) {
- Graph graph = parser.parse(in, format);
- MGraph model = new SimpleMGraph(graph);
- return model;
- }
- else {
- log.warn("Unsupported RDF format: {}\nSupported RDF formats:
{}",format,parser.getSupportedFormats());
- }
- return null;
- }
-
- /**
- * This extracts the relevant entity information from the Calais RDF data.
The entities and the relted information is extracted by a Sparql query.
- * @param model the MGraph representing the Calais data
- * @return a Collection of entity information
- */
- public Collection<CalaisEntityOccurrence> queryModel(MGraph model) {
- //TODO extract also Geo info (latitude/longitude)?
- String query =
- "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> " +
- "PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> " +
- "PREFIX p: <http://s.opencalais.com/1/pred/> " +
- "PREFIX t: <http://s.opencalais.com/1/type/em/e/> " +
- "SELECT DISTINCT ?id ?did ?name ?type ?dtype ?offset ?length ?exact
?context ?score WHERE { " +
- "?id p:name ?name ." +
- "?id rdf:type ?type ." +
- "?y p:subject ?id ." +
- "?y p:offset ?offset ." +
- "?y p:length ?length ." +
- "?y p:exact ?exact ." +
- "?y p:detection ?context ." +
- " OPTIONAL { ?z p:subject ?id . ?z p:relevance ?score . } " +
- // get disambiguated entity references if available
- " OPTIONAL { ?did p:subject ?id . ?did p:name ?name . ?did rdf:type ?dtype
. } " +
- "FILTER (" +
- "?type = t:Person || " +
- "?type = t:City || " +
- "?type = t:Continent || " +
- "?type = t:Country || " +
- "?type = t:ProvinceOrState || " +
- "?type = t:Region || " +
- "?type = t:Company || " +
- "?type = t:Facility || " +
- "?type = t:Organization " +
- ")" +
- "} ";
- Collection<CalaisEntityOccurrence> result = new
ArrayList<CalaisEntityOccurrence>();
- try {
- SelectQuery sQuery = (SelectQuery)QueryParser.getInstance().parse(query);
- ResultSet rs = tcManager.executeSparqlQuery(sQuery, model);
- while(rs.hasNext()) {
- SolutionMapping row = rs.next();
- CalaisEntityOccurrence occ = new CalaisEntityOccurrence();
- Resource disambiguated = row.get("did");
- occ.id = (disambiguated == null?row.get("id"):disambiguated);
- occ.type = (disambiguated == null?row.get("type"):row.get("dtype"));
- occ.name = row.get("name");
- occ.exact = row.get("exact");
- //TODO for html the offsets might not be those of the original
document but refer to a cleaned up version?
- occ.offset =
Integer.valueOf(((Literal)row.get("offset")).getLexicalForm());
- // remove brackets
- occ.context =
((Literal)row.get("context")).getLexicalForm().replaceAll("[\\[\\]]", "");
- occ.length =
Integer.valueOf(((Literal)row.get("length")).getLexicalForm());
- if (row.get("score") != null) {
- occ.relevance =
Double.valueOf(((Literal)row.get("score")).getLexicalForm());
- }
- result.add(occ);
- }
- } catch (ParseException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
- return result;
- }
-
-
- /**
- * This sends a POST request to the given url.
- *
- * @param targetUrl a <code>String</code> with the target url
- * @param params a <code>Map<String,String></code> object containing the url
parameters; use <code>null</code> if there are no parameters
- * @param body a <code>String</code> with the body of the post request; use
- * <code>null</code> if the body is empty
- * @param contentType a <code>String</code> with the content type of the post
- * request; use <code>null</code> for the default content type
- * <code>text/xml; charset=utf-8</code>
- * @param responseEncoding a <code>String</code> with the encoding used to
- * read the server response; use <code>null</code> for the default charset
- * @return a <code>String</code> with the server response
- * @throws IOException if an error occurs
- */
- public static String doPostRequest(
- String targetUrl, Map<String,String> params, String body, String
contentType,
- String responseEncoding)
- throws IOException {
-
- StringBuilder urlString = new StringBuilder(targetUrl);
-
- // add parameters to url
- if (params != null) {
- if (!params.isEmpty()) {
- urlString.append("?");
- }
- Iterator<Map.Entry<String, String>> it = params.entrySet().iterator();
- while (it.hasNext()) {
- Map.Entry<String, String> oneParam = it.next();
- urlString
- .append(oneParam.getKey()).append("=")
- .append(URLEncoder.encode(oneParam.getValue(), "UTF-8"));
+
+ public void computeEnhancements(ContentItem ci) throws EngineException {
+ String text = "";
+ if (SUPPORTED_MIMETYPES.contains(ci.getMimeType().split(";",
2)[0].toLowerCase())) {
+ try {
+ text = IOUtils.toString(ci.getStream());
+ } catch (IOException e) {
+ throw new InvalidContentException(this, ci, e);
+ }
+ } else {
+ text = getMetadataText(ci.getMetadata(), new UriRef(ci.getId()));
+ }
+ if (text == null) {
+ log.warn("no text found");
+ return;
+ }
+
+ MGraph calaisModel = getCalaisAnalysis(text, ci);
+ if (calaisModel != null) {
+ createEnhancements(queryModel(calaisModel), ci);
+ }
+
+ }
+
+ /**
+ * This generates enhancement structures for the entities from OpenCalais
+ * and adds them to the content item's metadata.
+ * For each entity a TextAnnotation and an EntityAnnotation are created.
+ * An EntityAnnotation can relate to several TextAnnotations.
+ *
+ * @param occs a Collection of entity information
+ * @param ci the content item
+ */
+ public void createEnhancements(Collection<CalaisEntityOccurrence> occs,
ContentItem ci) {
+ LiteralFactory literalFactory = LiteralFactory.getInstance();
+ //TODO create TextEnhancement (form, start, end, type?) and
EntityAnnotation (id, name, type)
+ HashMap<Resource, UriRef> entityAnnotationMap = new HashMap<Resource,
UriRef>();
+ for (CalaisEntityOccurrence occ : occs) {
+ UriRef textAnnotation =
EnhancementEngineHelper.createTextEnhancement(
+ ci, this);
+ MGraph model = ci.getMetadata();
+ model.add(new TripleImpl(textAnnotation, DC_TYPE, occ.type));
+ model.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT,
occ.exact));
+ model.add(new TripleImpl(textAnnotation, ENHANCER_START,
literalFactory.createTypedLiteral(occ.offset)));
+ model.add(new TripleImpl(textAnnotation, ENHANCER_END,
literalFactory.createTypedLiteral(occ.offset + occ.length)));
+ model.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT,
occ.exact));
+ model.add(new TripleImpl(textAnnotation,
ENHANCER_SELECTION_CONTEXT, literalFactory.createTypedLiteral(occ.context)));
+ //create EntityAnnotation only once but add a reference to the
textAnnotation
+ if (entityAnnotationMap.containsKey(occ.id)) {
+ model.add(new TripleImpl(entityAnnotationMap.get(occ.id),
DC_RELATION, textAnnotation));
+ } else {
+ UriRef entityAnnotation =
EnhancementEngineHelper.createEntityEnhancement(ci, this);
+ entityAnnotationMap.put(occ.id, entityAnnotation);
+ model.add(new TripleImpl(entityAnnotation, DC_RELATION,
textAnnotation));
+ model.add(new TripleImpl(entityAnnotation,
ENHANCER_ENTITY_LABEL, occ.name));
+ model.add(new TripleImpl(entityAnnotation,
ENHANCER_ENTITY_TYPE, occ.type));
+ model.add(new TripleImpl(entityAnnotation,
ENHANCER_ENTITY_REFERENCE, occ.id));
+ }
+ }
+ }
+
+ /**
+ * This retrieves the annotations from OpenCalais as RDF/XML. From that an
MGraph is created.
+ *
+ * @param text the text to send to OpenCalais
+ *
+ * @return an MGraph with all annotations
+ *
+ * @throws EngineException
+ */
+ public MGraph getCalaisAnalysis(String text, ContentItem ci) throws
EngineException {
+ String mimeType = ci.getMimeType().split(";", 2)[0].toLowerCase();
+ if (mimeType.equals("text/plain")) {
+ mimeType = "text/raw";
+ }
+ String calaisParams = "<c:params
xmlns:c=\"http://s.opencalais.com/1/pred/\"
xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">" +
+ "<c:processingDirectives c:contentType=\"" + mimeType + "\" " +
+ // "c:enableMetadataType=\"GenericRelations,SocialTags\" "+
+ // "c:enableMetadataType=\"GenericRelations\" "+
+ "c:outputFormat=\"rdf/xml\" " +
+ "c:calculateRelevanceScore=\"false\" " +
+ "c:omitOutputtingOriginalText=\"true\"" +
+ ">" +
+ "</c:processingDirectives>" +
+ "</c:params>";
+ MGraph model = null;
+ try {
+ StringBuilder postParams = new StringBuilder();
+ postParams
+ .append("licenseID=")
+ .append(URLEncoder.encode(getLicenseKey(), "UTF-8"))
+ .append("&content=")
+ .append(URLEncoder.encode(text, "UTF-8"))
+ .append("¶msXML=")
+ .append(URLEncoder.encode(calaisParams, "UTF-8"));
+ // get annotations from Calais
+ String calaisResult =
+ doPostRequest(
+ this.getCalaisUrl(), null, postParams.toString(),
+ "application/x-www-form-urlencoded", "UTF-8");
+ log.debug("Calais data:\n{}", calaisResult);
+ // build model from Calais result
+ InputStream in = new
ByteArrayInputStream(calaisResult.getBytes("utf-8"));
+ model = readModel(in, "application/rdf+xml");
+ } catch (UnsupportedEncodingException e) {
+ throw new EngineException(e.getMessage(), e);
+ } catch (IOException e) {
+ throw new EngineException(e.getMessage(), e);
+ }
+ return model;
+ }
+
+ /**
+ * This parses an InputStream of RDF data and produces an MGraph from them
+ *
+ * @param in The InputStream of RDF data
+ * @param format the format of the RDF data
+ *
+ * @return the resulting MGraph or null if the RDF serialization format is
not supported by the parser
+ */
+ public MGraph readModel(InputStream in, String format) {
+ Parser parser = Parser.getInstance();
+ if (parser.getSupportedFormats().contains(format)) {
+ Graph graph = parser.parse(in, format);
+ MGraph model = new SimpleMGraph(graph);
+ return model;
+ } else {
+ log.warn("Unsupported RDF format: {}\nSupported RDF formats: {}",
+ format, parser.getSupportedFormats());
+ }
+ return null;
+ }
+
+ /**
+ * This extracts the relevant entity information from the Calais RDF data.
+ * The entities and the relted information is extracted by a Sparql query.
+ *
+ * @param model the MGraph representing the Calais data
+ *
+ * @return a Collection of entity information
+ */
+ public Collection<CalaisEntityOccurrence> queryModel(MGraph model) {
+ //TODO extract also Geo info (latitude/longitude)?
+ String query =
+ "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> " +
+ "PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> " +
+ "PREFIX p: <http://s.opencalais.com/1/pred/> " +
+ "PREFIX t: <http://s.opencalais.com/1/type/em/e/> " +
+ "SELECT DISTINCT ?id ?did ?name ?type ?dtype ?offset
?length ?exact ?context ?score WHERE { " +
+ "?id p:name ?name ." +
+ "?id rdf:type ?type ." +
+ "?y p:subject ?id ." +
+ "?y p:offset ?offset ." +
+ "?y p:length ?length ." +
+ "?y p:exact ?exact ." +
+ "?y p:detection ?context ." +
+ " OPTIONAL { ?z p:subject ?id . ?z p:relevance ?score
. } " +
+ // get disambiguated entity references if available
+ " OPTIONAL { ?did p:subject ?id . ?did p:name ?name .
?did rdf:type ?dtype . } " +
+ "FILTER (" +
+ "?type = t:Person || " +
+ "?type = t:City || " +
+ "?type = t:Continent || " +
+ "?type = t:Country || " +
+ "?type = t:ProvinceOrState || " +
+ "?type = t:Region || " +
+ "?type = t:Company || " +
+ "?type = t:Facility || " +
+ "?type = t:Organization " +
+ ")" +
+ "} ";
+ Collection<CalaisEntityOccurrence> result = new
ArrayList<CalaisEntityOccurrence>();
+ try {
+ SelectQuery sQuery = (SelectQuery)
QueryParser.getInstance().parse(query);
+ ResultSet rs = tcManager.executeSparqlQuery(sQuery, model);
+ while (rs.hasNext()) {
+ SolutionMapping row = rs.next();
+ CalaisEntityOccurrence occ = new CalaisEntityOccurrence();
+ Resource disambiguated = row.get("did");
+ occ.id = (disambiguated == null ? row.get("id") :
disambiguated);
+ occ.type = (disambiguated == null ? row.get("type") :
row.get("dtype"));
+ occ.name = row.get("name");
+ occ.exact = row.get("exact");
+ //TODO for html the offsets might not be those of the original
document but refer to a cleaned up version?
+ occ.offset = Integer.valueOf(((Literal)
row.get("offset")).getLexicalForm());
+ // remove brackets
+ occ.context = ((Literal)
row.get("context")).getLexicalForm().replaceAll("[\\[\\]]", "");
+ occ.length = Integer.valueOf(((Literal)
row.get("length")).getLexicalForm());
+ if (row.get("score") != null) {
+ occ.relevance = Double.valueOf(((Literal)
row.get("score")).getLexicalForm());
+ }
+ result.add(occ);
+ }
+ } catch (ParseException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+ return result;
+ }
+
+
+ /**
+ * This sends a POST request to the given url.
+ *
+ * @param targetUrl a <code>String</code> with the target url
+ * @param params a <code>Map<String,String></code> object containing the
url parameters;
+ * use <code>null</code> if there are no parameters
+ * @param body a <code>String</code> with the body of the post request; use
+ * <code>null</code> if the body is empty
+ * @param contentType a <code>String</code> with the content type of the
post
+ * request; use <code>null</code> for the default content type
+ * <code>text/xml; charset=utf-8</code>
+ * @param responseEncoding a <code>String</code> with the encoding used to
+ * read the server response; use <code>null</code> for the default
charset
+ *
+ * @return a <code>String</code> with the server response
+ *
+ * @throws IOException if an error occurs
+ */
+ public static String doPostRequest(
+ String targetUrl, Map<String, String> params, String body, String
contentType,
+ String responseEncoding)
+ throws IOException {
+
+ StringBuilder urlString = new StringBuilder(targetUrl);
+
+ // add parameters to url
+ if (params != null) {
+ if (!params.isEmpty()) {
+ urlString.append("?");
+ }
+ Iterator<Map.Entry<String, String>> it =
params.entrySet().iterator();
+ while (it.hasNext()) {
+ Map.Entry<String, String> oneParam = it.next();
+ urlString
+ .append(oneParam.getKey()).append("=")
+ .append(URLEncoder.encode(oneParam.getValue(),
"UTF-8"));
+ if (it.hasNext()) {
+ urlString.append("&");
+ }
+ }
+ }
+
+ // init connection
+ URL url = new URL(urlString.toString());
+ HttpURLConnection urlConn = (HttpURLConnection) url.openConnection();
+ urlConn.setRequestMethod("POST");
+ urlConn.setDoInput(true);
+ if (null != body) {
+ urlConn.setDoOutput(true);
+ } else {
+ urlConn.setDoOutput(false);
+ }
+ urlConn.setUseCaches(false);
+ if (null == contentType) {
+ contentType = "text/xml; charset=utf-8";
+ }
+ urlConn.setRequestProperty("Content-Type", contentType);
+
+ // send POST output
+ if (null != body) {
+ OutputStreamWriter printout = new OutputStreamWriter(
+ urlConn.getOutputStream(), "UTF8");
+ printout.write(body);
+ printout.flush();
+ printout.close();
+ }
+
+ // get response data
+ if (null == responseEncoding) {
+ responseEncoding = Charset.defaultCharset().toString();
+ }
+ return IOUtils.toString(
+ urlConn.getInputStream(), responseEncoding);
+ }
+
+ public String getMetadataText(MGraph model, NonLiteral subj) {
+ String text = "";
+ for (Iterator<Triple> it = model.filter(subj, NIE_PLAINTEXTCONTENT,
null); it.hasNext();) {
+ text += getLexicalForm(it.next().getObject());
+ }
+ if (text.trim().length() > 0) {
+ return text;
+ }
+ return null;
+ }
+
+ public String getMetadataLanguage(MGraph model, NonLiteral subj) {
+ Iterator<Triple> it = model.filter(subj, DC_LANGUAGE, null);
if (it.hasNext()) {
- urlString.append("&");
+ Resource langNode = it.next().getObject();
+ return getLexicalForm(langNode);
+ }
+ return null;
+ }
+
+ public String getLexicalForm(Resource res) {
+ if (res == null) {
+ return null;
+ } else if (res instanceof Literal) {
+ return ((Literal) res).getLexicalForm();
+ } else {
+ return res.toString();
}
- }
}
-
- // init connection
- URL url = new URL(urlString.toString());
- HttpURLConnection urlConn = (HttpURLConnection)url.openConnection();
- urlConn.setRequestMethod("POST");
- urlConn.setDoInput(true);
- if (null != body) {
- urlConn.setDoOutput(true);
- }
- else {
- urlConn.setDoOutput(false);
- }
- urlConn.setUseCaches(false);
- if (null == contentType) {
- contentType = "text/xml; charset=utf-8";
- }
- urlConn.setRequestProperty("Content-Type", contentType);
-
- // send POST output
- if (null != body) {
- OutputStreamWriter printout = new OutputStreamWriter(
- urlConn.getOutputStream(), "UTF8");
- printout.write(body);
- printout.flush();
- printout.close();
- }
-
- // get response data
- if (null == responseEncoding) {
- responseEncoding = Charset.defaultCharset().toString();
- }
- return IOUtils.toString(
- urlConn.getInputStream(), responseEncoding);
- }
-
- public String getMetadataText(MGraph model, NonLiteral subj) {
- String text = "";
- for (Iterator<Triple> it = model.filter(subj,
Properties.NIE_PLAINTEXTCONTENT, null); it.hasNext();) {
- text += getLexicalForm(it.next().getObject());
- }
- if (text.trim().length() > 0) {
- return text;
- }
- return null;
- }
- public String getMetadataLanguage(MGraph model, NonLiteral subj) {
- Iterator<Triple> it = model.filter(subj, Properties.DC_LANGUAGE, null);
- if (it.hasNext()) {
- Resource langNode = it.next().getObject();
- return getLexicalForm(langNode);
- }
- return null;
- }
-
- public String getLexicalForm(Resource res) {
- if (res == null) {
- return null;
- }
- else if (res instanceof Literal) {
- return ((Literal)res).getLexicalForm();
- }
- else
- return res.toString();
- }
-
- /**
- * The activate method.
- *
- * @param ce
- * the {@link ComponentContext}
- */
- protected void activate(@SuppressWarnings("unused") ComponentContext ce) {
- if (ce != null) {
- this.bundleContext = ce.getBundleContext();
- //TODO initialize Extractor
- Dictionary<String,String> properties = ce.getProperties();
- String license = properties.get(LICENSE_KEY);
- String url = properties.get(CALAIS_URL_KEY);
- setLicenseKey(license);
- setCalaisUrl(url);
-// this.tcManager = TcManager.getInstance();
- }
- }
-
- /**
- * The deactivate method.
- *
- * @param ce
- * the {@link ComponentContext}
- */
- protected void deactivate(@SuppressWarnings("unused") ComponentContext ce) {
- }
+ /**
+ * The activate method.
+ *
+ * @param ce the {@link ComponentContext}
+ */
+ protected void activate(@SuppressWarnings("unused") ComponentContext ce) {
+ if (ce != null) {
+ this.bundleContext = ce.getBundleContext();
+ //TODO initialize Extractor
+ Dictionary<String, String> properties = ce.getProperties();
+ String license = properties.get(LICENSE_KEY);
+ String url = properties.get(CALAIS_URL_KEY);
+ setLicenseKey(license);
+ setCalaisUrl(url);
+ // this.tcManager = TcManager.getInstance();
+ }
+ }
+ /**
+ * The deactivate method.
+ *
+ * @param ce the {@link ComponentContext}
+ */
+ protected void deactivate(@SuppressWarnings("unused") ComponentContext ce)
{
+
+ }
}
Modified:
incubator/stanbol/trunk/enhancer/integration-tests/src/test/java/org/apache/stanbol/enhancer/it/StatelessEngineTest.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/integration-tests/src/test/java/org/apache/stanbol/enhancer/it/StatelessEngineTest.java?rev=1078371&r1=1078370&r2=1078371&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/integration-tests/src/test/java/org/apache/stanbol/enhancer/it/StatelessEngineTest.java
(original)
+++
incubator/stanbol/trunk/enhancer/integration-tests/src/test/java/org/apache/stanbol/enhancer/it/StatelessEngineTest.java
Sat Mar 5 22:10:17 2011
@@ -74,8 +74,8 @@ public class StatelessEngineTest extends
"text/rdf+nt",
"<urn:enhancement.*www.w3.org/1999/02/22-rdf-syntax-ns#type.*ontology/TextAnnotation>",
};
-
- for(int i=0 ; i < formats.length; i+=3) {
+
+ for (int i = 0; i < formats.length; i += 3) {
executor.execute(
builder.buildPostRequest("/engines")
.withHeader("Accept", formats[i])
Modified:
incubator/stanbol/trunk/enhancer/stores/jcrstore/src/main/java/org/apache/stanbol/enhancer/store/jcr/JCRContentItem.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/stores/jcrstore/src/main/java/org/apache/stanbol/enhancer/store/jcr/JCRContentItem.java?rev=1078371&r1=1078370&r2=1078371&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/stores/jcrstore/src/main/java/org/apache/stanbol/enhancer/store/jcr/JCRContentItem.java
(original)
+++
incubator/stanbol/trunk/enhancer/stores/jcrstore/src/main/java/org/apache/stanbol/enhancer/store/jcr/JCRContentItem.java
Sat Mar 5 22:10:17 2011
@@ -39,14 +39,15 @@ import org.slf4j.LoggerFactory;
public class JCRContentItem implements ContentItem, GraphListener {
+ public static final String ENHANCER_ID_PROP = "stanbolEnhancerId";
+
private static final String OBJECT = "object";
private static final String PREDICATE = "predicate";
private static final String SUBJECT = "subject";
private static final String JCR_DATA = "jcr:data";
private static final String JCR_MIME_TYPE = "jcr:mimeType";
- public static final String ENHANCER_ID_PROP = "stanbolEnhancerId";
- private final Logger log = LoggerFactory.getLogger(getClass());
+ private static final Logger log = LoggerFactory.getLogger(getClass());
private Node jcrNode;