Author: rwesten
Date: Tue May 15 07:05:23 2012
New Revision: 1338567
URL: http://svn.apache.org/viewvc?rev=1338567&view=rev
Log:
STANBOL-583: changed implementation so that the XML escaped text is directly
streamed to the HTTP request. While this changes are a clear improvement they
have not solved the remaining issues (failing unit test of the CELI NER engine
Modified:
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/NERserviceClientHTTP.java
Modified:
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/NERserviceClientHTTP.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/NERserviceClientHTTP.java?rev=1338567&r1=1338566&r2=1338567&view=diff
==============================================================================
---
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/NERserviceClientHTTP.java
(original)
+++
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/NERserviceClientHTTP.java
Tue May 15 07:05:23 2012
@@ -1,11 +1,15 @@
package org.apache.stanbol.enhancer.engines.celi.ner.impl;
+import java.io.BufferedWriter;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
+import java.io.Writer;
import java.net.HttpURLConnection;
import java.net.URL;
+import java.nio.charset.Charset;
+import java.util.Collections;
import java.util.List;
import java.util.Vector;
@@ -23,11 +27,30 @@ import org.slf4j.LoggerFactory;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
-
public class NERserviceClientHTTP {
-
- private URL serviceEP;
- private String licenseKey;
+ /**
+ * The UTF-8 {@link Charset}
+ */
+ private static final Charset UTF8 = Charset.forName("UTF-8");
+ /**
+ * The content type "text/xml; charset={@link #UTF8}"
+ */
+ private static final String CONTENT_TYPE = "text/xml;
charset="+UTF8.name();
+ /**
+ * The XML version, encoding; SOAP envelope, heder and starting element
of the body;
+ * processTextRequest and text starting element.
+ */
+ private static final String REQUEST_PREFIX = "<?xml version=\"1.0\"
encoding=\""+UTF8.name()+"\"?>" +
+ "<soapenv:Envelope
xmlns:soapenv=\"http://schemas.xmlsoap.org/soap/envelope/\" " +
+
"xmlns:v0u0=\"http://linguagrid.org/ns/namedentityrecognition/v0u0\"><soapenv:Header/>"
+
+ "<soapenv:Body><v0u0:processTextRequest><v0u0:text>";
+ /**
+ * closes the text, processTextRequest, SOAP body and envelope
+ */
+ private static final String REQUEST_SUFFIX =
"</v0u0:text></v0u0:processTextRequest></soapenv:Body></soapenv:Envelope>";
+
+ private final URL serviceEP;
+ private final String licenseKey;
private final Logger log = LoggerFactory.getLogger(getClass());
@@ -35,8 +58,50 @@ public class NERserviceClientHTTP {
this.serviceEP=serviceUrl;
this.licenseKey=licenseKey;
}
-
- public InputStream doPostRequest(URL url, String body) throws
IOException {
+ /**
+ * creates a POST request to the {@link #serviceEP} by using the
+ * {@link #licenseKey} so that one can write the request data to the
+ * returned {@link HttpURLConnection#getOutputStream()}
+ * @param hasBody
+ * @return
+ * @throws IOException
+ */
+ private HttpURLConnection createPostRequest() throws IOException {
+ HttpURLConnection urlConn = (HttpURLConnection)
serviceEP.openConnection();
+ urlConn.setRequestMethod("POST");
+ urlConn.setDoInput(true);
+ urlConn.setDoOutput(true);
+ urlConn.setUseCaches(false);
+ if(CONTENT_TYPE != null){
+ urlConn.setRequestProperty("Content-Type",
CONTENT_TYPE);
+ }
+ if(this.licenseKey!=null){
+ String encoded =
Base64.encode(this.licenseKey.getBytes(UTF8));
+ urlConn.setRequestProperty("Authorization", "Basic
"+encoded);
+ }
+ return urlConn;
+ }
+ /**
+ * performs the request
+ * @param urlConn
+ * @return
+ * @throws IOException
+ */
+ private InputStream doRequest(HttpURLConnection urlConn) throws
IOException {
+ //close connection
+ urlConn.disconnect();
+
+ // get response data
+ return urlConn.getInputStream();
+
+ }
+ /**
+ * use {@link #createPostRequest()} and {@link
#doRequest(HttpURLConnection)
+ * to avoid creating in-memory copies of the parsed text with
+ * StringEscapeUtils#escapeXml(String).
+ */
+ @Deprecated
+ private InputStream doPostRequest(URL url, String body) throws
IOException {
HttpURLConnection urlConn = (HttpURLConnection)
url.openConnection();
urlConn.setRequestMethod("POST");
@@ -69,25 +134,32 @@ public class NERserviceClientHTTP {
return urlConn.getInputStream();
}
-
public List<NamedEntity> extractEntities(String text) {
-
+ if(text == null || text.isEmpty()){
+ //no text -> no extractions
+ return Collections.emptyList();
+ }
List<NamedEntity> extractedNE = new Vector<NamedEntity>();
try {
- String txt = StringEscapeUtils.escapeXml(text);
- String xmldata = "<?xml version=\"1.0\"
encoding=\"UTF-8\"?><soapenv:Envelope
xmlns:soapenv=\"http://schemas.xmlsoap.org/soap/envelope/\"
xmlns:v0u0=\"http://linguagrid.org/ns/namedentityrecognition/v0u0\"><soapenv:Header/><soapenv:Body><v0u0:processTextRequest><v0u0:text>"
- + txt +
"</v0u0:text></v0u0:processTextRequest></soapenv:Body></soapenv:Envelope>";
-
- InputStream resultStream = doPostRequest(serviceEP,
xmldata);
+ //create the POST request
+ HttpURLConnection con = createPostRequest();
+ //write content
+ BufferedWriter writer = new BufferedWriter(new
OutputStreamWriter(con.getOutputStream(),UTF8));
+ writer.write(REQUEST_PREFIX);
+ StringEscapeUtils.escapeXml(writer, text);
+ writer.write(REQUEST_SUFFIX);
+ writer.close();
+ //now perform the request
+ InputStream stream = doRequest(con);
- // Create SoapMessage
+ // Create SoapMessage and parse the results
MessageFactory msgFactory =
MessageFactory.newInstance();
SOAPMessage message = msgFactory.createMessage();
SOAPPart soapPart = message.getSOAPPart();
// Load the SOAP text into a stream source
- StreamSource source = new StreamSource(resultStream);
+ StreamSource source = new StreamSource(stream);
// Set contents of message
soapPart.setContent(source);
@@ -117,4 +189,4 @@ public class NERserviceClientHTTP {
return extractedNE;
}
-}
+}
\ No newline at end of file