Author: kwright
Date: Thu May 11 10:36:57 2017
New Revision: 1794806

URL: http://svn.apache.org/viewvc?rev=1794806&view=rev
Log:
Move tika service functionality to new connector

Added:
    manifoldcf/branches/CONNECTORS-1425/connectors/tikaservice/
      - copied from r1794719, 
manifoldcf/branches/CONNECTORS-1425/connectors/tika/
    manifoldcf/branches/CONNECTORS-1425/connectors/tikaservice/.gitignore
      - copied unchanged from r1794722, 
manifoldcf/branches/CONNECTORS-1425/connectors/tika/.gitignore
    
manifoldcf/branches/CONNECTORS-1425/connectors/tikaservice/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tika/TikaConfig.java
      - copied unchanged from r1794722, 
manifoldcf/branches/CONNECTORS-1425/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tika/TikaConfig.java
    
manifoldcf/branches/CONNECTORS-1425/connectors/tikaservice/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tika/TikaExtractor.java
      - copied, changed from r1794772, 
manifoldcf/branches/CONNECTORS-1425/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tika/TikaExtractor.java
    
manifoldcf/branches/CONNECTORS-1425/connectors/tikaservice/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tika/common_en_US.properties
      - copied unchanged from r1794722, 
manifoldcf/branches/CONNECTORS-1425/connectors/tika/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tika/common_en_US.properties
    
manifoldcf/branches/CONNECTORS-1425/connectors/tikaservice/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tika/common_es_ES.properties
      - copied unchanged from r1794722, 
manifoldcf/branches/CONNECTORS-1425/connectors/tika/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tika/common_es_ES.properties
    
manifoldcf/branches/CONNECTORS-1425/connectors/tikaservice/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tika/common_ja_JP.properties
      - copied unchanged from r1794722, 
manifoldcf/branches/CONNECTORS-1425/connectors/tika/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tika/common_ja_JP.properties
    
manifoldcf/branches/CONNECTORS-1425/connectors/tikaservice/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tika/common_zh_CN.properties
      - copied unchanged from r1794722, 
manifoldcf/branches/CONNECTORS-1425/connectors/tika/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tika/common_zh_CN.properties
    
manifoldcf/branches/CONNECTORS-1425/connectors/tikaservice/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/tika/editSpecification.js
      - copied unchanged from r1794722, 
manifoldcf/branches/CONNECTORS-1425/connectors/tika/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/tika/editSpecification.js
    
manifoldcf/branches/CONNECTORS-1425/connectors/tikaservice/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/tika/editSpecification_TikaType.html
      - copied unchanged from r1794722, 
manifoldcf/branches/CONNECTORS-1425/connectors/tika/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/tika/editSpecification_TikaType.html
    
manifoldcf/branches/CONNECTORS-1425/connectors/tikaservice/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/tika/viewSpecification.html
      - copied unchanged from r1794722, 
manifoldcf/branches/CONNECTORS-1425/connectors/tika/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/tika/viewSpecification.html
Removed:
    
manifoldcf/branches/CONNECTORS-1425/connectors/tika/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/tika/editSpecification_TikaType.html
Modified:
    manifoldcf/branches/CONNECTORS-1425/connectors/pom.xml
    manifoldcf/branches/CONNECTORS-1425/connectors/tika/.gitignore
    
manifoldcf/branches/CONNECTORS-1425/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tika/TikaConfig.java
    
manifoldcf/branches/CONNECTORS-1425/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tika/TikaExtractor.java
    
manifoldcf/branches/CONNECTORS-1425/connectors/tika/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tika/common_en_US.properties
    
manifoldcf/branches/CONNECTORS-1425/connectors/tika/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tika/common_es_ES.properties
    
manifoldcf/branches/CONNECTORS-1425/connectors/tika/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tika/common_ja_JP.properties
    
manifoldcf/branches/CONNECTORS-1425/connectors/tika/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tika/common_zh_CN.properties
    
manifoldcf/branches/CONNECTORS-1425/connectors/tika/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/tika/editSpecification.js
    
manifoldcf/branches/CONNECTORS-1425/connectors/tika/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/tika/viewSpecification.html
    manifoldcf/branches/CONNECTORS-1425/connectors/tikaservice/build.xml
    manifoldcf/branches/CONNECTORS-1425/connectors/tikaservice/pom.xml

Modified: manifoldcf/branches/CONNECTORS-1425/connectors/pom.xml
URL: 
http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1425/connectors/pom.xml?rev=1794806&r1=1794805&r2=1794806&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1425/connectors/pom.xml (original)
+++ manifoldcf/branches/CONNECTORS-1425/connectors/pom.xml Thu May 11 10:36:57 
2017
@@ -63,6 +63,7 @@
     <module>amazoncloudsearch</module>
     <module>forcedmetadata</module>
     <module>tika</module>
+    <module>tikaservice</module>
     <module>documentfilter</module>
     <module>searchblox</module>
     <module>confluence</module>

Modified: manifoldcf/branches/CONNECTORS-1425/connectors/tika/.gitignore
URL: 
http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1425/connectors/tika/.gitignore?rev=1794806&r1=1794805&r2=1794806&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1425/connectors/tika/.gitignore (original)
+++ manifoldcf/branches/CONNECTORS-1425/connectors/tika/.gitignore Thu May 11 
10:36:57 2017
@@ -1,4 +1,3 @@
-/target/
 /.classpath
-/.settings/
 /.project
+/.settings/

Modified: 
manifoldcf/branches/CONNECTORS-1425/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tika/TikaConfig.java
URL: 
http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1425/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tika/TikaConfig.java?rev=1794806&r1=1794805&r2=1794806&view=diff
==============================================================================
--- 
manifoldcf/branches/CONNECTORS-1425/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tika/TikaConfig.java
 (original)
+++ 
manifoldcf/branches/CONNECTORS-1425/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tika/TikaConfig.java
 Thu May 11 10:36:57 2017
@@ -37,12 +37,5 @@ public class TikaConfig {
   public static final String ATTRIBUTE_SOURCE = "source";
   public static final String ATTRIBUTE_TARGET = "target";
   public static final String ATTRIBUTE_VALUE = "value";
-  public static final String TIKAHOSTNAME_DEFAULT = "localhost";
-  public static final int TIKAPORT_DEFAULT = 9998;
-  public static final String NODE_TIKAHOSTNAME = "tikaHostname";
-  public static final String NODE_TIKAPORT = "tikaPort";
-  public static final String NODE_TIKASERVER = "tikaServer";
-  public static final long TIKARETRY_DEFAULT = 10000;
-  public static final String NODE_TIKARETRY = "tikaRetry";
   
 }

Modified: 
manifoldcf/branches/CONNECTORS-1425/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tika/TikaExtractor.java
URL: 
http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1425/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tika/TikaExtractor.java?rev=1794806&r1=1794805&r2=1794806&view=diff
==============================================================================
--- 
manifoldcf/branches/CONNECTORS-1425/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tika/TikaExtractor.java
 (original)
+++ 
manifoldcf/branches/CONNECTORS-1425/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tika/TikaExtractor.java
 Thu May 11 10:36:57 2017
@@ -19,47 +19,30 @@
 package org.apache.manifoldcf.agents.transformation.tika;
 
 import org.apache.manifoldcf.core.interfaces.*;
-import org.apache.commons.io.IOUtils;
-import org.apache.commons.io.input.TeeInputStream;
-import org.apache.http.HttpEntity;
-import org.apache.http.HttpHost;
-import org.apache.http.HttpResponse;
-import org.apache.http.client.HttpClient;
-import org.apache.http.client.methods.HttpPost;
-import org.apache.http.client.methods.HttpPut;
-import org.apache.http.entity.InputStreamEntity;
-import org.apache.http.impl.client.HttpClientBuilder;
 import org.apache.manifoldcf.agents.interfaces.*;
 import org.apache.manifoldcf.agents.system.Logging;
 
 import java.io.*;
-import java.net.URI;
-import java.net.URISyntaxException;
 import java.util.*;
 
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.TikaMetadataKeys;
 import org.apache.tika.parser.html.BoilerpipeContentHandler;
-import org.json.simple.JSONObject;
-import org.json.simple.parser.JSONParser;
-import org.json.simple.parser.ParseException;
 
 import de.l3s.boilerpipe.BoilerpipeExtractor;
 
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 
-/**
- * This connector works as a transformation connector, but does nothing other
- * than logging.
- *
- */
-public class TikaExtractor extends 
org.apache.manifoldcf.agents.transformation.BaseTransformationConnector {
+/** This connector works as a transformation connector, but does nothing other 
than logging.
+*
+*/
+public class TikaExtractor extends 
org.apache.manifoldcf.agents.transformation.BaseTransformationConnector
+{
   public static final String _rcsid = "@(#)$Id$";
 
   private static final String EDIT_SPECIFICATION_JS = "editSpecification.js";
-  private static final String EDIT_SPECIFICATION_TIKATYPE_HTML = 
"editSpecification_TikaType.html";
   private static final String EDIT_SPECIFICATION_FIELDMAPPING_HTML = 
"editSpecification_FieldMapping.html";
   private static final String EDIT_SPECIFICATION_EXCEPTIONS_HTML = 
"editSpecification_Exceptions.html";
   private static final String EDIT_SPECIFICATION_BOILERPLATE_HTML = 
"editSpecification_Boilerplate.html";
@@ -67,221 +50,159 @@ public class TikaExtractor extends org.a
 
   protected static final String ACTIVITY_EXTRACT = "extract";
 
-  protected static final String[] activitiesList = new String[] { 
ACTIVITY_EXTRACT };
-
+  protected static final String[] activitiesList = new 
String[]{ACTIVITY_EXTRACT};
+  
   /** We handle up to 64K in memory; after that we go to disk. */
   protected static final long inMemoryMaximumFile = 65536;
-
-  /**
-   * Return a list of activities that this connector generates. The connector
-   * does NOT need to be connected before this method is called.
-   * 
-   * @return the set of activities.
-   */
+  
+  /** Return a list of activities that this connector generates.
+  * The connector does NOT need to be connected before this method is called.
+  *@return the set of activities.
+  */
   @Override
-  public String[] getActivitiesList() {
+  public String[] getActivitiesList()
+  {
     return activitiesList;
   }
 
-  /**
-   * Get an output version string, given an output specification. The output
-   * version string is used to uniquely describe the pertinent details of the
-   * output specification and the configuration, to allow the Connector
-   * Framework to determine whether a document will need to be output again.
-   * Note that the contents of the document cannot be considered by this 
method,
-   * and that a different version string (defined in IRepositoryConnector) is
-   * used to describe the version of the actual document.
-   *
-   * This method presumes that the connector object has been configured, and it
-   * is thus able to communicate with the output data store should that be
-   * necessary.
-   * 
-   * @param os
-   *          is the current output specification for the job that is doing the
-   *          crawling.
-   * @return a string, of unlimited length, which uniquely describes output
-   *         configuration and specification in such a way that if two such
-   *         strings are equal, the document will not need to be sent again to
-   *         the output data store.
-   */
-  @Override
-  public VersionContext getPipelineDescription(Specification os) throws 
ManifoldCFException, ServiceInterruption {
+  /** Get an output version string, given an output specification.  The output 
version string is used to uniquely describe the pertinent details of
+  * the output specification and the configuration, to allow the Connector 
Framework to determine whether a document will need to be output again.
+  * Note that the contents of the document cannot be considered by this 
method, and that a different version string (defined in IRepositoryConnector)
+  * is used to describe the version of the actual document.
+  *
+  * This method presumes that the connector object has been configured, and it 
is thus able to communicate with the output data store should that be
+  * necessary.
+  *@param os is the current output specification for the job that is doing the 
crawling.
+  *@return a string, of unlimited length, which uniquely describes output 
configuration and specification in such a way that if two such strings are 
equal,
+  * the document will not need to be sent again to the output data store.
+  */
+  @Override
+  public VersionContext getPipelineDescription(Specification os)
+    throws ManifoldCFException, ServiceInterruption
+  {
     SpecPacker sp = new SpecPacker(os);
-    return new VersionContext(sp.toPackedString(), params, os);
+    return new VersionContext(sp.toPackedString(),params,os);
   }
 
-  // We intercept checks pertaining to the document format and send modified
-  // checks further down
-
-  /**
-   * Detect if a mime type is acceptable or not. This method is used to
-   * determine whether it makes sense to fetch a document in the first place.
-   * 
-   * @param pipelineDescription
-   *          is the document's pipeline version string, for this connection.
-   * @param mimeType
-   *          is the mime type of the document.
-   * @param checkActivity
-   *          is an object including the activities that can be performed by
-   *          this method.
-   * @return true if the mime type can be accepted by this connector.
-   */
-  @Override
-  public boolean checkMimeTypeIndexable(VersionContext pipelineDescription, 
String mimeType,
-      IOutputCheckActivity checkActivity) throws ManifoldCFException, 
ServiceInterruption {
+  // We intercept checks pertaining to the document format and send modified 
checks further down
+  
+  /** Detect if a mime type is acceptable or not.  This method is used to 
determine whether it makes sense to fetch a document
+  * in the first place.
+  *@param pipelineDescription is the document's pipeline version string, for 
this connection.
+  *@param mimeType is the mime type of the document.
+  *@param checkActivity is an object including the activities that can be 
performed by this method.
+  *@return true if the mime type can be accepted by this connector.
+  */
+  @Override
+  public boolean checkMimeTypeIndexable(VersionContext pipelineDescription, 
String mimeType, IOutputCheckActivity checkActivity)
+    throws ManifoldCFException, ServiceInterruption
+  {
     // We should see what Tika will transform
     // MHL
     // Do a downstream check
     return checkActivity.checkMimeTypeIndexable("text/plain;charset=utf-8");
   }
 
-  /**
-   * Pre-determine whether a document (passed here as a File object) is
-   * acceptable or not. This method is used to determine whether a document
-   * needs to be actually transferred. This hook is provided mainly to support
-   * search engines that only handle a small set of accepted file types.
-   * 
-   * @param pipelineDescription
-   *          is the document's pipeline version string, for this connection.
-   * @param localFile
-   *          is the local file to check.
-   * @param checkActivity
-   *          is an object including the activities that can be done by this
-   *          method.
-   * @return true if the file is acceptable, false if not.
-   */
-  @Override
-  public boolean checkDocumentIndexable(VersionContext pipelineDescription, 
File localFile,
-      IOutputCheckActivity checkActivity) throws ManifoldCFException, 
ServiceInterruption {
-    // Document contents are not germane anymore, unless it looks like Tika
-    // won't accept them.
+  /** Pre-determine whether a document (passed here as a File object) is 
acceptable or not.  This method is
+  * used to determine whether a document needs to be actually transferred.  
This hook is provided mainly to support
+  * search engines that only handle a small set of accepted file types.
+  *@param pipelineDescription is the document's pipeline version string, for 
this connection.
+  *@param localFile is the local file to check.
+  *@param checkActivity is an object including the activities that can be done 
by this method.
+  *@return true if the file is acceptable, false if not.
+  */
+  @Override
+  public boolean checkDocumentIndexable(VersionContext pipelineDescription, 
File localFile, IOutputCheckActivity checkActivity)
+    throws ManifoldCFException, ServiceInterruption
+  {
+    // Document contents are not germane anymore, unless it looks like Tika 
won't accept them.
     // Not sure how to check that...
     return true;
   }
 
-  /**
-   * Pre-determine whether a document's length is acceptable. This method is
-   * used to determine whether to fetch a document in the first place.
-   * 
-   * @param pipelineDescription
-   *          is the document's pipeline version string, for this connection.
-   * @param length
-   *          is the length of the document.
-   * @param checkActivity
-   *          is an object including the activities that can be done by this
-   *          method.
-   * @return true if the file is acceptable, false if not.
-   */
-  @Override
-  public boolean checkLengthIndexable(VersionContext pipelineDescription, long 
length,
-      IOutputCheckActivity checkActivity) throws ManifoldCFException, 
ServiceInterruption {
+  /** Pre-determine whether a document's length is acceptable.  This method is 
used
+  * to determine whether to fetch a document in the first place.
+  *@param pipelineDescription is the document's pipeline version string, for 
this connection.
+  *@param length is the length of the document.
+  *@param checkActivity is an object including the activities that can be done 
by this method.
+  *@return true if the file is acceptable, false if not.
+  */
+  @Override
+  public boolean checkLengthIndexable(VersionContext pipelineDescription, long 
length, IOutputCheckActivity checkActivity)
+    throws ManifoldCFException, ServiceInterruption
+  {
     // Always true
     return true;
   }
 
-  /**
-   * Add (or replace) a document in the output data store using the connector.
-   * This method presumes that the connector object has been configured, and it
-   * is thus able to communicate with the output data store should that be
-   * necessary. The OutputSpecification is *not* provided to this method,
-   * because the goal is consistency, and if output is done it must be
-   * consistent with the output description, since that was what was partly 
used
-   * to determine if output should be taking place. So it may be necessary for
-   * this method to decode an output description string in order to determine
-   * what should be done.
-   * 
-   * @param documentURI
-   *          is the URI of the document. The URI is presumed to be the unique
-   *          identifier which the output data store will use to process and
-   *          serve the document. This URI is constructed by the repository
-   *          connector which fetches the document, and is thus universal 
across
-   *          all output connectors.
-   * @param outputDescription
-   *          is the description string that was constructed for this document
-   *          by the getOutputDescription() method.
-   * @param document
-   *          is the document data to be processed (handed to the output data
-   *          store).
-   * @param authorityNameString
-   *          is the name of the authority responsible for authorizing any
-   *          access tokens passed in with the repository document. May be 
null.
-   * @param activities
-   *          is the handle to an object that the implementer of a pipeline
-   *          connector may use to perform operations, such as logging
-   *          processing activity, or sending a modified document to the next
-   *          stage in the pipeline.
-   * @return the document status (accepted or permanently rejected).
-   * @throws IOException
-   *           only if there's a stream error reading the document data.
-   */
-  @Override
-  public int addOrReplaceDocumentWithException(String documentURI, 
VersionContext pipelineDescription,
-      RepositoryDocument document, String authorityNameString, 
IOutputAddActivity activities)
-      throws ManifoldCFException, ServiceInterruption, IOException {
-    // First, make sure downstream pipeline will now accept
-    // text/plain;charset=utf-8
-    if (!activities.checkMimeTypeIndexable("text/plain;charset=utf-8")) {
+  /** Add (or replace) a document in the output data store using the connector.
+  * This method presumes that the connector object has been configured, and it 
is thus able to communicate with the output data store should that be
+  * necessary.
+  * The OutputSpecification is *not* provided to this method, because the goal 
is consistency, and if output is done it must be consistent with the
+  * output description, since that was what was partly used to determine if 
output should be taking place.  So it may be necessary for this method to decode
+  * an output description string in order to determine what should be done.
+  *@param documentURI is the URI of the document.  The URI is presumed to be 
the unique identifier which the output data store will use to process
+  * and serve the document.  This URI is constructed by the repository 
connector which fetches the document, and is thus universal across all output 
connectors.
+  *@param outputDescription is the description string that was constructed for 
this document by the getOutputDescription() method.
+  *@param document is the document data to be processed (handed to the output 
data store).
+  *@param authorityNameString is the name of the authority responsible for 
authorizing any access tokens passed in with the repository document.  May be 
null.
+  *@param activities is the handle to an object that the implementer of a 
pipeline connector may use to perform operations, such as logging processing 
activity,
+  * or sending a modified document to the next stage in the pipeline.
+  *@return the document status (accepted or permanently rejected).
+  *@throws IOException only if there's a stream error reading the document 
data.
+  */
+  @Override
+  public int addOrReplaceDocumentWithException(String documentURI, 
VersionContext pipelineDescription, RepositoryDocument document, String 
authorityNameString, IOutputAddActivity activities)
+    throws ManifoldCFException, ServiceInterruption, IOException
+  {
+    // First, make sure downstream pipeline will now accept 
text/plain;charset=utf-8
+    if (!activities.checkMimeTypeIndexable("text/plain;charset=utf-8"))
+    {
       activities.noDocument();
-      activities.recordActivity(null, ACTIVITY_EXTRACT, null, documentURI, 
activities.EXCLUDED_MIMETYPE,
-          "Downstream pipeline rejected mime type 'text/plain;charset=utf-8'");
+      activities.recordActivity(null, ACTIVITY_EXTRACT, null, documentURI,
+        activities.EXCLUDED_MIMETYPE, "Downstream pipeline rejected mime type 
'text/plain;charset=utf-8'");
       return DOCUMENTSTATUS_REJECTED;
     }
 
     SpecPacker sp = new SpecPacker(pipelineDescription.getSpecification());
 
-    // Tika server variables
-    String mime = "";
-    InputStream tikaServerIs = null;
-    int retry = 0;
-    HttpResponse response = null;
-    IOException tikaServerDownException = null;
-
     BoilerpipeExtractor extractorClassInstance = 
sp.getExtractorClassInstance();
-
+    
     // Tika's API reads from an input stream and writes to an output Writer.
-    // Since a RepositoryDocument includes readers and inputstreams 
exclusively,
-    // AND all downstream
-    // processing needs to occur in a ManifoldCF thread, we have some
-    // constraints on the architecture we need to get this done:
-    // (1) The principle worker thread must call the downstream pipeline send()
-    // method.
-    // (2) The callee of the send() method must call a reader in the Repository
-    // Document.
-    // (3) The Reader, if its databuffer is empty, must pull more data from the
-    // original input stream and hand it to Tika, which populates the Reader's
-    // databuffer.
-    // So all this can be done in one thread, with some work, and the creation
-    // of a special InputStream or Reader implementation. Where it fails,
-    // though, is the
-    // requirement that tika-extracted metadata be included in the
-    // RepositoryDocument right from the beginning. Effectively this means that
-    // the entire document
-    // must be parsed before it is handed downstream -- so basically a 
temporary
-    // file (or in-memory buffer if small enough) must be created.
+    // Since a RepositoryDocument includes readers and inputstreams 
exclusively, AND all downstream
+    // processing needs to occur in a ManifoldCF thread, we have some 
constraints on the architecture we need to get this done:
+    // (1) The principle worker thread must call the downstream pipeline 
send() method.
+    // (2) The callee of the send() method must call a reader in the 
Repository Document.
+    // (3) The Reader, if its databuffer is empty, must pull more data from 
the original input stream and hand it to Tika, which populates the Reader's 
databuffer.
+    // So all this can be done in one thread, with some work, and the creation 
of a special InputStream or Reader implementation.  Where it fails, though, is 
the
+    // requirement that tika-extracted metadata be included in the 
RepositoryDocument right from the beginning.  Effectively this means that the 
entire document
+    // must be parsed before it is handed downstream -- so basically a 
temporary file (or in-memory buffer if small enough) must be created.
     // Instead of the elegant flow above, we have the following:
     // (1) Create a temporary file (or in-memory buffer if file is small 
enough)
     // (2) Run Tika to completion, streaming content output to temporary file
-    // (3) Modify RepositoryDocument to read from temporary file, and include
-    // Tika-extracted metadata
+    // (3) Modify RepositoryDocument to read from temporary file, and include 
Tika-extracted metadata
     // (4) Call downstream document processing
-
+      
     DestinationStorage ds;
-
-    if (document.getBinaryLength() <= inMemoryMaximumFile) {
-      ds = new MemoryDestinationStorage((int) document.getBinaryLength());
-    } else {
+      
+    if (document.getBinaryLength() <= inMemoryMaximumFile)
+    {
+      ds = new MemoryDestinationStorage((int)document.getBinaryLength());
+    }
+    else
+    {
       ds = new FileDestinationStorage();
     }
-    try {
+    try
+    {
       Metadata metadata = new Metadata();
-      if (document.getFileName() != null) {
+      if (document.getFileName() != null)
+      {
         metadata.add(TikaMetadataKeys.RESOURCE_NAME_KEY, 
document.getFileName());
         metadata.add("stream_name", document.getFileName());
       }
-      if (document.getMimeType() != null) {
-        mime = document.getMimeType();
-        metadata.add("Content-Type", mime);
-      }
+      if (document.getMimeType() != null)
+        metadata.add("Content-Type", document.getMimeType());
       metadata.add("stream_size", new 
Long(document.getBinaryLength()).toString());
 
       // We only log the extraction
@@ -289,470 +210,334 @@ public class TikaExtractor extends org.a
       String resultCode = "OK";
       String description = null;
       Long length = null;
-
-      try {
-        if (sp.tikaServer) {
-          try {
-            final HttpClient client = HttpClientBuilder.create().build();
-            final HttpHost tikaHost = new HttpHost(sp.tikaHostname, 
sp.tikaPort);
-
-            // Make a copy of the original stream as it needs to be sent two
-            // times to Tika
-            // one for the metadata and one for the content
-            IOUtils.copy(document.getBinaryStream(), ds.getOutputStream());
-            HttpPut httpPut;
-            HttpEntity entity;
-
-            // Metadata
-            httpPut = new HttpPut(sp.metaURI);
-            if (!mime.isEmpty()) {
-              httpPut.addHeader("Content-Type", mime);
-            }
-            httpPut.addHeader("Accept", "application/json");
-            entity = new InputStreamEntity(ds.getInputStream());
-            httpPut.setEntity(entity);
-            while (retry < 3 && response == null) {
-              try {
-                response = client.execute(tikaHost, httpPut);
-                tikaServerDownException = null;
-              } catch (IOException e) {
-                tikaServerDownException = e;
-                retry++;
-                if (retry < 3) {
-                  try {
-                    Thread.sleep(sp.tikaRetry);
-                  } catch (InterruptedException e1) {
-                    // Should not happen
-                  }
-                }
-              }
-            }
-            if (tikaServerDownException != null) {
-              throw tikaServerDownException;
-            }
-            int responseCode = response.getStatusLine().getStatusCode();
-            if (response.getStatusLine().getStatusCode() == 200 || 
response.getStatusLine().getStatusCode() == 204) {
-              tikaServerIs = response.getEntity().getContent();
-              try {
-                final BufferedReader br = new BufferedReader(new 
InputStreamReader(tikaServerIs));
-                final JSONParser parser = new JSONParser();
-                JSONObject metaJson;
-                final StringBuilder sb = new StringBuilder();
-                String output;
-                while ((output = br.readLine()) != null) {
-                  sb.append(output);
-                }
-                metaJson = (JSONObject) parser.parse(sb.toString());
-                for (Object key : metaJson.keySet()) {
-                  metadata.add(key.toString(), metaJson.get(key).toString());
-                }
-              } finally {
-                tikaServerIs.close();
-              }
-            } else {
-              activities.noDocument();
-              if (responseCode == 422) {
-                resultCode = "TIKASERVERREJECTS";
-                description = "Tika Server rejected document with the 
following reason: "
-                    + response.getStatusLine().getReasonPhrase();
-                handleTikaServerRejects(description);
-              } else {
-                resultCode = "TIKASERVERERROR";
-                description = "Tika Server failed to parse document with the 
following error: "
-                    + response.getStatusLine().getReasonPhrase();
-                handleTikaServerError(description);
-              }
-              return DOCUMENTSTATUS_REJECTED;
-            }
-
-            // Content
-            httpPut = new HttpPut(sp.contentURI);
-            if (!mime.isEmpty()) {
-              httpPut.addHeader("Content-Type", mime);
-            }
-            httpPut.addHeader("Accept", "text/plain");
-            entity = new InputStreamEntity(ds.getInputStream());
-            httpPut.setEntity(entity);
-
-            // Retry mecanism
-            retry = 0;
-            response = null;
-            while (retry < 3 && response == null) {
-              try {
-                response = client.execute(tikaHost, httpPut);
-                tikaServerDownException = null;
-              } catch (IOException e) {
-                tikaServerDownException = e;
-                retry++;
-                if (retry < 3) {
-                  try {
-                    Thread.sleep(sp.tikaRetry);
-                  } catch (InterruptedException e1) {
-                    // Should not happen
-                  }
-                }
-              }
-            }
-            if (tikaServerDownException != null) {
-              throw tikaServerDownException;
-            }
-
-            responseCode = response.getStatusLine().getStatusCode();
-            if (response.getStatusLine().getStatusCode() == 200 || 
response.getStatusLine().getStatusCode() == 204) {
-              tikaServerIs = response.getEntity().getContent();
-              try {
-                ds.close();
-                ds = new FileDestinationStorage();
-                IOUtils.copyLarge(tikaServerIs, ds.getOutputStream(), 0L, 
sp.writeLimit);
-                length = new Long(ds.getBinaryLength());
-              } finally {
-                tikaServerIs.close();
-              }
-            } else {
-              activities.noDocument();
-              if (responseCode == 422) {
-                resultCode = "TIKASERVERREJECTS";
-                description = "Tika Server rejected document with the 
following reason: "
-                    + response.getStatusLine().getReasonPhrase();
-                handleTikaServerRejects(description);
-              } else {
-                resultCode = "TIKASERVERERROR";
-                description = "Tika Server failed to parse document with the 
following error: "
-                    + response.getStatusLine().getReasonPhrase();
-                handleTikaServerError(description);
-              }
-              return DOCUMENTSTATUS_REJECTED;
-            }
-
-          } catch (IOException | ParseException e) {
-            resultCode = "TIKASERVERRESPONSEISSUE";
-            description = e.getMessage();
-            int rval;
-            if (e instanceof IOException) {
-              rval = handleTikaServerException((IOException) e);
-            } else {
-              rval = handleTikaServerException((ParseException) e);
-            }
-            if (rval == DOCUMENTSTATUS_REJECTED) {
-              activities.noDocument();
-            }
-            return rval;
-          }
-        } else {
-
-          OutputStream os = ds.getOutputStream();
-          try {
-            Writer w = new OutputStreamWriter(os, "utf-8");
-            try {
-              // Use tika to parse stuff
-              ContentHandler handler = 
TikaParser.newWriteOutBodyContentHandler(w, sp.writeLimit());
-              if (extractorClassInstance != null)
-                handler = new BoilerpipeContentHandler(handler, 
extractorClassInstance);
-              try {
-                TikaParser.parse(document.getBinaryStream(), metadata, 
handler);
-              } catch (TikaException e) {
-                if (sp.ignoreTikaException()) {
-                  resultCode = 
e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
-                  description = e.getMessage();
-                } else {
-                  resultCode = "TIKAREJECTION";
-                  description = e.getMessage();
-                  int rval = handleTikaException(e);
-                  if (rval == DOCUMENTSTATUS_REJECTED)
-                    activities.noDocument();
-                  return rval;
-                }
-              } catch (SAXException e) {
+      try
+      {
+        OutputStream os = ds.getOutputStream();
+        try
+        {
+          Writer w = new OutputStreamWriter(os,"utf-8");
+          try
+          {
+            // Use tika to parse stuff
+            ContentHandler handler = 
TikaParser.newWriteOutBodyContentHandler(w, sp.writeLimit());
+            if (extractorClassInstance != null)
+              handler = new BoilerpipeContentHandler(handler, 
extractorClassInstance);
+            try
+            {
+              TikaParser.parse(document.getBinaryStream(), metadata, handler);
+            }
+            catch (TikaException e)
+            {
+              if (sp.ignoreTikaException())
+              {
                 resultCode = 
e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
                 description = e.getMessage();
-                int rval = handleSaxException(e);
+              }
+              else
+              {
+                resultCode = "TIKAREJECTION";
+                description = e.getMessage();
+                int rval = handleTikaException(e);
                 if (rval == DOCUMENTSTATUS_REJECTED)
                   activities.noDocument();
                 return rval;
-              } catch (IOException e) {
-                resultCode = 
e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
-                description = e.getMessage();
-                throw e;
               }
-            } finally {
-              w.flush();
             }
-          } finally {
-            os.close();
-            length = new Long(ds.getBinaryLength());
+            catch (SAXException e)
+            {
+              resultCode = 
e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
+              description = e.getMessage();
+              int rval = handleSaxException(e);
+              if (rval == DOCUMENTSTATUS_REJECTED)
+                activities.noDocument();
+              return rval;
+            }
+            catch (IOException e)
+            {
+              resultCode = 
e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
+              description = e.getMessage();
+              throw e;
+            }
+          }
+          finally
+          {
+            w.flush();
           }
         }
-
-        if (!activities.checkLengthIndexable(ds.getBinaryLength())) {
+        finally
+        {
+          os.close();
+          length = new Long(ds.getBinaryLength());
+        }
+        
+        // Check to be sure downstream pipeline will accept document of 
specified length
+        if (!activities.checkLengthIndexable(ds.getBinaryLength()))
+        {
           activities.noDocument();
           resultCode = activities.EXCLUDED_LENGTH;
-          description = "Downstream pipeline rejected document with length " + 
ds.getBinaryLength();
+          description = "Downstream pipeline rejected document with length 
"+ds.getBinaryLength();
           return DOCUMENTSTATUS_REJECTED;
         }
 
-      } finally {
+      }
+      finally
+      {
         // Log the extraction processing
-        activities.recordActivity(new Long(startTime), ACTIVITY_EXTRACT, 
length, documentURI, resultCode, description);
+        activities.recordActivity(new Long(startTime), ACTIVITY_EXTRACT, 
length, documentURI,
+          resultCode, description);
       }
-
+      
       // Parsing complete!
       // Create a copy of Repository Document
       RepositoryDocument docCopy = document.duplicate();
-
+        
       // Get new stream length
       long newBinaryLength = ds.getBinaryLength();
       // Open new input stream
       InputStream is = ds.getInputStream();
+      try
+      {
+        docCopy.setBinary(is,newBinaryLength);
 
-      try {
-        docCopy.setBinary(is, newBinaryLength);
-
-        // Set up all metadata from Tika. We may want to run this through a
-        // mapper eventually...
+        // Set up all metadata from Tika.  We may want to run this through a 
mapper eventually...
         String[] metaNames = metadata.names();
-        for (String mName : metaNames) {
+        for(String mName : metaNames){
           String value = metadata.get(mName);
-          if (sp.lowerNames()) {
+          if (sp.lowerNames())
+          {
             StringBuilder sb = new StringBuilder();
-            for (int i = 0; i < mName.length(); i++) {
+            for (int i=0; i<mName.length(); i++) {
               char ch = mName.charAt(i);
-              if (!Character.isLetterOrDigit(ch))
-                ch = '_';
-              else
-                ch = Character.toLowerCase(ch);
+              if (!Character.isLetterOrDigit(ch)) ch='_';
+              else ch=Character.toLowerCase(ch);
               sb.append(ch);
             }
             mName = sb.toString();
           }
           String target = sp.getMapping(mName);
-          if (target != null) {
+          if(target!=null)
+          {
             docCopy.addField(target, value);
-          } else {
-            if (sp.keepAllMetadata()) {
-              docCopy.addField(mName, value);
+          }
+          else
+          {
+            if(sp.keepAllMetadata())
+            {
+             docCopy.addField(mName, value);
             }
           }
         }
 
         // Send new document downstream
-        return activities.sendDocument(documentURI, docCopy);
-      } finally {
+        return activities.sendDocument(documentURI,docCopy);
+      }
+      finally
+      {
         is.close();
       }
-    } finally {
+    }
+    finally
+    {
       ds.close();
     }
 
   }
 
-  /**
-   * Obtain the name of the form check javascript method to call.
-   * 
-   * @param connectionSequenceNumber
-   *          is the unique number of this connection within the job.
-   * @return the name of the form check javascript method.
-   */
-  @Override
-  public String getFormCheckJavascriptMethodName(int connectionSequenceNumber) 
{
-    return "s" + connectionSequenceNumber + "_checkSpecification";
-  }
-
-  /**
-   * Obtain the name of the form presave check javascript method to call.
-   * 
-   * @param connectionSequenceNumber
-   *          is the unique number of this connection within the job.
-   * @return the name of the form presave check javascript method.
-   */
-  @Override
-  public String getFormPresaveCheckJavascriptMethodName(int 
connectionSequenceNumber) {
-    return "s" + connectionSequenceNumber + "_checkSpecificationForSave";
-  }
-
-  /**
-   * Output the specification header section. This method is called in the head
-   * section of a job page which has selected a pipeline connection of the
-   * current type. Its purpose is to add the required tabs to the list, and to
-   * output any javascript methods that might be needed by the job editing 
HTML.
-   * 
-   * @param out
-   *          is the output to which any HTML should be sent.
-   * @param locale
-   *          is the preferred local of the output.
-   * @param os
-   *          is the current pipeline specification for this connection.
-   * @param connectionSequenceNumber
-   *          is the unique number of this connection within the job.
-   * @param tabsArray
-   *          is an array of tab names. Add to this array any tab names that 
are
-   *          specific to the connector.
-   */
-  @Override
-  public void outputSpecificationHeader(IHTTPOutput out, Locale locale, 
Specification os, int connectionSequenceNumber,
-      List<String> tabsArray) throws ManifoldCFException, IOException {
+  /** Obtain the name of the form check javascript method to call.
+  *@param connectionSequenceNumber is the unique number of this connection 
within the job.
+  *@return the name of the form check javascript method.
+  */
+  @Override
+  public String getFormCheckJavascriptMethodName(int connectionSequenceNumber)
+  {
+    return "s"+connectionSequenceNumber+"_checkSpecification";
+  }
+
+  /** Obtain the name of the form presave check javascript method to call.
+  *@param connectionSequenceNumber is the unique number of this connection 
within the job.
+  *@return the name of the form presave check javascript method.
+  */
+  @Override
+  public String getFormPresaveCheckJavascriptMethodName(int 
connectionSequenceNumber)
+  {
+    return "s"+connectionSequenceNumber+"_checkSpecificationForSave";
+  }
+
+  /** Output the specification header section.
+  * This method is called in the head section of a job page which has selected 
a pipeline connection of the current type.  Its purpose is to add the required 
tabs
+  * to the list, and to output any javascript methods that might be needed by 
the job editing HTML.
+  *@param out is the output to which any HTML should be sent.
+  *@param locale is the preferred local of the output.
+  *@param os is the current pipeline specification for this connection.
+  *@param connectionSequenceNumber is the unique number of this connection 
within the job.
+  *@param tabsArray is an array of tab names.  Add to this array any tab names 
that are specific to the connector.
+  */
+  @Override
+  public void outputSpecificationHeader(IHTTPOutput out, Locale locale, 
Specification os,
+    int connectionSequenceNumber, List<String> tabsArray)
+    throws ManifoldCFException, IOException
+  {
     Map<String, Object> paramMap = new HashMap<String, Object>();
-    paramMap.put("SEQNUM", Integer.toString(connectionSequenceNumber));
+    paramMap.put("SEQNUM",Integer.toString(connectionSequenceNumber));
 
-    tabsArray.add(Messages.getString(locale, "TikaExtractor.TikaTypeTabName"));
     tabsArray.add(Messages.getString(locale, 
"TikaExtractor.FieldMappingTabName"));
     tabsArray.add(Messages.getString(locale, 
"TikaExtractor.ExceptionsTabName"));
     tabsArray.add(Messages.getString(locale, 
"TikaExtractor.BoilerplateTabName"));
 
     // Fill in the specification header map, using data from all tabs.
-    fillInTikaTypeSpecificationMap(paramMap, os);
     fillInFieldMappingSpecificationMap(paramMap, os);
     fillInExceptionsSpecificationMap(paramMap, os);
     fillInBoilerplateSpecificationMap(paramMap, os);
-
-    Messages.outputResourceWithVelocity(out, locale, EDIT_SPECIFICATION_JS, 
paramMap);
+    
+    
Messages.outputResourceWithVelocity(out,locale,EDIT_SPECIFICATION_JS,paramMap);
   }
-
-  /**
-   * Output the specification body section. This method is called in the body
-   * section of a job page which has selected a pipeline connection of the
-   * current type. Its purpose is to present the required form elements for
-   * editing. The coder can presume that the HTML that is output from this
-   * configuration will be within appropriate <html>, <body>, and <form> tags.
-   * The name of the form is "editjob".
-   * 
-   * @param out
-   *          is the output to which any HTML should be sent.
-   * @param locale
-   *          is the preferred local of the output.
-   * @param os
-   *          is the current pipeline specification for this job.
-   * @param connectionSequenceNumber
-   *          is the unique number of this connection within the job.
-   * @param actualSequenceNumber
-   *          is the connection within the job that has currently been 
selected.
-   * @param tabName
-   *          is the current tab name.
-   */
-  @Override
-  public void outputSpecificationBody(IHTTPOutput out, Locale locale, 
Specification os, int connectionSequenceNumber,
-      int actualSequenceNumber, String tabName) throws ManifoldCFException, 
IOException {
+  
+  /** Output the specification body section.
+  * This method is called in the body section of a job page which has selected 
a pipeline connection of the current type.  Its purpose is to present the 
required form elements for editing.
+  * The coder can presume that the HTML that is output from this configuration 
will be within appropriate <html>, <body>, and <form> tags.  The name of the
+  * form is "editjob".
+  *@param out is the output to which any HTML should be sent.
+  *@param locale is the preferred local of the output.
+  *@param os is the current pipeline specification for this job.
+  *@param connectionSequenceNumber is the unique number of this connection 
within the job.
+  *@param actualSequenceNumber is the connection within the job that has 
currently been selected.
+  *@param tabName is the current tab name.
+  */
+  @Override
+  public void outputSpecificationBody(IHTTPOutput out, Locale locale, 
Specification os,
+    int connectionSequenceNumber, int actualSequenceNumber, String tabName)
+    throws ManifoldCFException, IOException
+  {
     Map<String, Object> paramMap = new HashMap<String, Object>();
 
     // Set the tab name
     paramMap.put("TABNAME", tabName);
-    paramMap.put("SEQNUM", Integer.toString(connectionSequenceNumber));
-    paramMap.put("SELECTEDNUM", Integer.toString(actualSequenceNumber));
+    paramMap.put("SEQNUM",Integer.toString(connectionSequenceNumber));
+    paramMap.put("SELECTEDNUM",Integer.toString(actualSequenceNumber));
 
     // Fill in the field mapping tab data
-    fillInTikaTypeSpecificationMap(paramMap, os);
     fillInFieldMappingSpecificationMap(paramMap, os);
     fillInExceptionsSpecificationMap(paramMap, os);
     fillInBoilerplateSpecificationMap(paramMap, os);
-
-    Messages.outputResourceWithVelocity(out, locale, 
EDIT_SPECIFICATION_TIKATYPE_HTML, paramMap);
-    Messages.outputResourceWithVelocity(out, locale, 
EDIT_SPECIFICATION_FIELDMAPPING_HTML, paramMap);
-    Messages.outputResourceWithVelocity(out, locale, 
EDIT_SPECIFICATION_EXCEPTIONS_HTML, paramMap);
-    Messages.outputResourceWithVelocity(out, locale, 
EDIT_SPECIFICATION_BOILERPLATE_HTML, paramMap);
-  }
-
-  /**
-   * Process a specification post. This method is called at the start of job's
-   * edit or view page, whenever there is a possibility that form data for a
-   * connection has been posted. Its purpose is to gather form information and
-   * modify the transformation specification accordingly. The name of the 
posted
-   * form is "editjob".
-   * 
-   * @param variableContext
-   *          contains the post data, including binary file-upload information.
-   * @param locale
-   *          is the preferred local of the output.
-   * @param os
-   *          is the current pipeline specification for this job.
-   * @param connectionSequenceNumber
-   *          is the unique number of this connection within the job.
-   * @return null if all is well, or a string error message if there is an 
error
-   *         that should prevent saving of the job (and cause a redirection to
-   *         an error page).
-   */
+    
+    
Messages.outputResourceWithVelocity(out,locale,EDIT_SPECIFICATION_FIELDMAPPING_HTML,paramMap);
+    
Messages.outputResourceWithVelocity(out,locale,EDIT_SPECIFICATION_EXCEPTIONS_HTML,paramMap);
+    
Messages.outputResourceWithVelocity(out,locale,EDIT_SPECIFICATION_BOILERPLATE_HTML,paramMap);
+  }
+
+  /** Process a specification post.
+  * This method is called at the start of job's edit or view page, whenever 
there is a possibility that form data for a connection has been
+  * posted.  Its purpose is to gather form information and modify the 
transformation specification accordingly.
+  * The name of the posted form is "editjob".
+  *@param variableContext contains the post data, including binary file-upload 
information.
+  *@param locale is the preferred local of the output.
+  *@param os is the current pipeline specification for this job.
+  *@param connectionSequenceNumber is the unique number of this connection 
within the job.
+  *@return null if all is well, or a string error message if there is an error 
that should prevent saving of the job (and cause a redirection to an error 
page).
+  */
   @Override
   public String processSpecificationPost(IPostParameters variableContext, 
Locale locale, Specification os,
-      int connectionSequenceNumber) throws ManifoldCFException {
-    String seqPrefix = "s" + connectionSequenceNumber + "_";
+    int connectionSequenceNumber)
+    throws ManifoldCFException {
+    String seqPrefix = "s"+connectionSequenceNumber+"_";
 
     String x;
-
-    x = variableContext.getParameter(seqPrefix + "fieldmapping_count");
-    if (x != null && x.length() > 0) {
+        
+    x = variableContext.getParameter(seqPrefix+"fieldmapping_count");
+    if (x != null && x.length() > 0)
+    {
       // About to gather the fieldmapping nodes, so get rid of the old ones.
       int i = 0;
-      while (i < os.getChildCount()) {
+      while (i < os.getChildCount())
+      {
         SpecificationNode node = os.getChild(i);
-        if (node.getType().equals(TikaConfig.NODE_FIELDMAP) || 
node.getType().equals(TikaConfig.NODE_KEEPMETADATA)
-            || node.getType().equals(TikaConfig.NODE_LOWERNAMES) || 
node.getType().equals(TikaConfig.NODE_WRITELIMIT))
+        if (node.getType().equals(TikaConfig.NODE_FIELDMAP)
+          || node.getType().equals(TikaConfig.NODE_KEEPMETADATA)
+          || node.getType().equals(TikaConfig.NODE_LOWERNAMES)
+          || node.getType().equals(TikaConfig.NODE_WRITELIMIT))
           os.removeChild(i);
         else
           i++;
       }
       int count = Integer.parseInt(x);
       i = 0;
-      while (i < count) {
-        String prefix = seqPrefix + "fieldmapping_";
-        String suffix = "_" + Integer.toString(i);
-        String op = variableContext.getParameter(prefix + "op" + suffix);
-        if (op == null || !op.equals("Delete")) {
+      while (i < count)
+      {
+        String prefix = seqPrefix+"fieldmapping_";
+        String suffix = "_"+Integer.toString(i);
+        String op = variableContext.getParameter(prefix+"op"+suffix);
+        if (op == null || !op.equals("Delete"))
+        {
           // Gather the fieldmap etc.
-          String source = variableContext.getParameter(prefix + "source" + 
suffix);
-          String target = variableContext.getParameter(prefix + "target" + 
suffix);
+          String source = variableContext.getParameter(prefix+"source"+suffix);
+          String target = variableContext.getParameter(prefix+"target"+suffix);
           if (target == null)
             target = "";
           SpecificationNode node = new 
SpecificationNode(TikaConfig.NODE_FIELDMAP);
-          node.setAttribute(TikaConfig.ATTRIBUTE_SOURCE, source);
-          node.setAttribute(TikaConfig.ATTRIBUTE_TARGET, target);
-          os.addChild(os.getChildCount(), node);
+          node.setAttribute(TikaConfig.ATTRIBUTE_SOURCE,source);
+          node.setAttribute(TikaConfig.ATTRIBUTE_TARGET,target);
+          os.addChild(os.getChildCount(),node);
         }
         i++;
       }
-
-      String addop = variableContext.getParameter(seqPrefix + 
"fieldmapping_op");
-      if (addop != null && addop.equals("Add")) {
-        String source = variableContext.getParameter(seqPrefix + 
"fieldmapping_source");
-        String target = variableContext.getParameter(seqPrefix + 
"fieldmapping_target");
+      
+      String addop = variableContext.getParameter(seqPrefix+"fieldmapping_op");
+      if (addop != null && addop.equals("Add"))
+      {
+        String source = 
variableContext.getParameter(seqPrefix+"fieldmapping_source");
+        String target = 
variableContext.getParameter(seqPrefix+"fieldmapping_target");
         if (target == null)
           target = "";
         SpecificationNode node = new 
SpecificationNode(TikaConfig.NODE_FIELDMAP);
-        node.setAttribute(TikaConfig.ATTRIBUTE_SOURCE, source);
-        node.setAttribute(TikaConfig.ATTRIBUTE_TARGET, target);
-        os.addChild(os.getChildCount(), node);
+        node.setAttribute(TikaConfig.ATTRIBUTE_SOURCE,source);
+        node.setAttribute(TikaConfig.ATTRIBUTE_TARGET,target);
+        os.addChild(os.getChildCount(),node);
       }
-
+      
       // Gather the keep all metadata parameter to be the last one
       SpecificationNode node = new 
SpecificationNode(TikaConfig.NODE_KEEPMETADATA);
-      String keepAll = variableContext.getParameter(seqPrefix + 
"keepallmetadata");
-      if (keepAll != null) {
+      String keepAll = 
variableContext.getParameter(seqPrefix+"keepallmetadata");
+      if (keepAll != null)
+      {
         node.setAttribute(TikaConfig.ATTRIBUTE_VALUE, keepAll);
-      } else {
+      }
+      else
+      {
         node.setAttribute(TikaConfig.ATTRIBUTE_VALUE, "false");
       }
-      // Add the new keepallmetadata config parameter
+      // Add the new keepallmetadata config parameter 
       os.addChild(os.getChildCount(), node);
-
+      
       SpecificationNode node2 = new 
SpecificationNode(TikaConfig.NODE_LOWERNAMES);
-      String lower = variableContext.getParameter(seqPrefix + "lowernames");
-      if (lower != null) {
+      String lower = variableContext.getParameter(seqPrefix+"lowernames");
+      if (lower != null)
+      {
         node2.setAttribute(TikaConfig.ATTRIBUTE_VALUE, lower);
-      } else {
+      }
+      else
+      {
         node2.setAttribute(TikaConfig.ATTRIBUTE_VALUE, "false");
       }
       os.addChild(os.getChildCount(), node2);
-
+      
       SpecificationNode node3 = new 
SpecificationNode(TikaConfig.NODE_WRITELIMIT);
-      String writeLimit = variableContext.getParameter(seqPrefix + 
"writelimit");
-      if (writeLimit != null) {
+      String writeLimit = variableContext.getParameter(seqPrefix+"writelimit");
+      if (writeLimit != null)
+      {
         node3.setAttribute(TikaConfig.ATTRIBUTE_VALUE, writeLimit);
-      } else {
+      }
+      else
+      {
         node3.setAttribute(TikaConfig.ATTRIBUTE_VALUE, "");
       }
       os.addChild(os.getChildCount(), node3);
     }
-
-    if (variableContext.getParameter(seqPrefix + 
"ignoretikaexceptions_present") != null) {
+    
+    if (variableContext.getParameter(seqPrefix+"ignoretikaexceptions_present") 
!= null)
+    {
       int i = 0;
-      while (i < os.getChildCount()) {
+      while (i < os.getChildCount())
+      {
         SpecificationNode node = os.getChild(i);
         if (node.getType().equals(TikaConfig.NODE_IGNORETIKAEXCEPTION))
           os.removeChild(i);
@@ -760,7 +545,7 @@ public class TikaExtractor extends org.a
           i++;
       }
 
-      String value = variableContext.getParameter(seqPrefix + 
"ignoretikaexceptions");
+      String value = 
variableContext.getParameter(seqPrefix+"ignoretikaexceptions");
       if (value == null)
         value = "false";
 
@@ -768,11 +553,13 @@ public class TikaExtractor extends org.a
       node.setAttribute(TikaConfig.ATTRIBUTE_VALUE, value);
       os.addChild(os.getChildCount(), node);
     }
-
-    x = variableContext.getParameter(seqPrefix + "boilerplateclassname");
-    if (x != null) {
+    
+    x = variableContext.getParameter(seqPrefix+"boilerplateclassname");
+    if (x != null)
+    {
       int i = 0;
-      while (i < os.getChildCount()) {
+      while (i < os.getChildCount())
+      {
         SpecificationNode node = os.getChild(i);
         if (node.getType().equals(TikaConfig.NODE_BOILERPLATEPROCESSOR))
           os.removeChild(i);
@@ -780,265 +567,183 @@ public class TikaExtractor extends org.a
           i++;
       }
 
-      if (x.length() > 0) {
+      if (x.length() > 0)
+      {
         SpecificationNode node = new 
SpecificationNode(TikaConfig.NODE_BOILERPLATEPROCESSOR);
         node.setAttribute(TikaConfig.ATTRIBUTE_VALUE, x);
         os.addChild(os.getChildCount(), node);
       }
     }
-
-    x = variableContext.getParameter(seqPrefix + "tikaserver");
-    if (x != null) {
-      int i = 0;
-      while (i < os.getChildCount()) {
-        SpecificationNode node = os.getChild(i);
-        if (node.getType().equals(TikaConfig.NODE_TIKASERVER) || 
node.getType().equals(TikaConfig.NODE_TIKAHOSTNAME)
-            || node.getType().equals(TikaConfig.NODE_TIKAPORT) || 
node.getType().equals(TikaConfig.NODE_TIKARETRY))
-          os.removeChild(i);
-        else
-          i++;
-      }
-
-      SpecificationNode node = new 
SpecificationNode(TikaConfig.NODE_TIKASERVER);
-      String tikaServer = variableContext.getParameter(seqPrefix + 
"tikaserver");
-      if (tikaServer != null) {
-        node.setAttribute(TikaConfig.ATTRIBUTE_VALUE, tikaServer);
-      } else {
-        node.setAttribute(TikaConfig.ATTRIBUTE_VALUE, "false");
-      }
-      // Add the new tikaserver config parameter
-      os.addChild(os.getChildCount(), node);
-
-      SpecificationNode node2 = new 
SpecificationNode(TikaConfig.NODE_TIKAHOSTNAME);
-      String tikaHostname = variableContext.getParameter(seqPrefix + 
"tikahostname");
-      if (tikaHostname != null) {
-        node2.setAttribute(TikaConfig.ATTRIBUTE_VALUE, tikaHostname);
-      } else {
-        node2.setAttribute(TikaConfig.ATTRIBUTE_VALUE, "");
-      }
-      // Add the new tikahostname config parameter
-      os.addChild(os.getChildCount(), node2);
-
-      SpecificationNode node3 = new 
SpecificationNode(TikaConfig.NODE_TIKAPORT);
-      String tikaPort = variableContext.getParameter(seqPrefix + "tikaport");
-      if (tikaPort != null) {
-        node3.setAttribute(TikaConfig.ATTRIBUTE_VALUE, tikaPort);
-      } else {
-        node3.setAttribute(TikaConfig.ATTRIBUTE_VALUE, "");
-      }
-      // Add the new tikaport config parameter
-      os.addChild(os.getChildCount(), node3);
-
-      SpecificationNode node4 = new 
SpecificationNode(TikaConfig.NODE_TIKARETRY);
-      String tikaRetry = variableContext.getParameter(seqPrefix + "tikaretry");
-      if (tikaRetry != null) {
-        node4.setAttribute(TikaConfig.ATTRIBUTE_VALUE, tikaRetry);
-      } else {
-        node4.setAttribute(TikaConfig.ATTRIBUTE_VALUE, "");
-      }
-      // Add the new tikaport config parameter
-      os.addChild(os.getChildCount(), node4);
-    }
-
+    
     return null;
   }
+  
 
-  /**
-   * View specification. This method is called in the body section of a job's
-   * view page. Its purpose is to present the pipeline specification 
information
-   * to the user. The coder can presume that the HTML that is output from this
-   * configuration will be within appropriate <html> and <body> tags.
-   * 
-   * @param out
-   *          is the output to which any HTML should be sent.
-   * @param locale
-   *          is the preferred local of the output.
-   * @param connectionSequenceNumber
-   *          is the unique number of this connection within the job.
-   * @param os
-   *          is the current pipeline specification for this job.
-   */
-  @Override
-  public void viewSpecification(IHTTPOutput out, Locale locale, Specification 
os, int connectionSequenceNumber)
-      throws ManifoldCFException, IOException {
+  /** View specification.
+  * This method is called in the body section of a job's view page.  Its 
purpose is to present the pipeline specification information to the user.
+  * The coder can presume that the HTML that is output from this configuration 
will be within appropriate <html> and <body> tags.
+  *@param out is the output to which any HTML should be sent.
+  *@param locale is the preferred local of the output.
+  *@param connectionSequenceNumber is the unique number of this connection 
within the job.
+  *@param os is the current pipeline specification for this job.
+  */
+  @Override
+  public void viewSpecification(IHTTPOutput out, Locale locale, Specification 
os,
+    int connectionSequenceNumber)
+    throws ManifoldCFException, IOException
+  {
     Map<String, Object> paramMap = new HashMap<String, Object>();
-    paramMap.put("SEQNUM", Integer.toString(connectionSequenceNumber));
+    paramMap.put("SEQNUM",Integer.toString(connectionSequenceNumber));
 
     // Fill in the map with data from all tabs
-    fillInTikaTypeSpecificationMap(paramMap, os);
     fillInFieldMappingSpecificationMap(paramMap, os);
     fillInExceptionsSpecificationMap(paramMap, os);
     fillInBoilerplateSpecificationMap(paramMap, os);
 
-    Messages.outputResourceWithVelocity(out, locale, VIEW_SPECIFICATION_HTML, 
paramMap);
-
-  }
-
-  protected static void fillInTikaTypeSpecificationMap(Map<String, Object> 
paramMap, Specification os) {
-    String tikaServer = "false";
-    String tikaHostname = TikaConfig.TIKAHOSTNAME_DEFAULT;
-    String tikaPort = String.valueOf(TikaConfig.TIKAPORT_DEFAULT);
-    String tikaRetry = String.valueOf(TikaConfig.TIKARETRY_DEFAULT);
-    for (int i = 0; i < os.getChildCount(); i++) {
-      SpecificationNode sn = os.getChild(i);
-      if (sn.getType().equals(TikaConfig.NODE_TIKASERVER)) {
-        tikaServer = sn.getAttributeValue(TikaConfig.ATTRIBUTE_VALUE);
-      } else if (sn.getType().equals(TikaConfig.NODE_TIKAHOSTNAME)) {
-        tikaHostname = sn.getAttributeValue(TikaConfig.ATTRIBUTE_VALUE);
-      } else if (sn.getType().equals(TikaConfig.NODE_TIKAPORT)) {
-        tikaPort = sn.getAttributeValue(TikaConfig.ATTRIBUTE_VALUE);
-      } else if (sn.getType().equals(TikaConfig.NODE_TIKARETRY)) {
-        tikaRetry = sn.getAttributeValue(TikaConfig.ATTRIBUTE_VALUE);
-      }
-    }
-    paramMap.put("TIKASERVER", tikaServer);
-    paramMap.put("TIKAHOSTNAME", tikaHostname);
-    paramMap.put("TIKAPORT", tikaPort);
-    paramMap.put("TIKARETRY", tikaRetry);
+    
Messages.outputResourceWithVelocity(out,locale,VIEW_SPECIFICATION_HTML,paramMap);
+    
   }
 
-  protected static void fillInFieldMappingSpecificationMap(Map<String, Object> 
paramMap, Specification os) {
+  protected static void fillInFieldMappingSpecificationMap(Map<String,Object> 
paramMap, Specification os)
+  {
     // Prep for field mappings
-    List<Map<String, String>> fieldMappings = new ArrayList<Map<String, 
String>>();
+    List<Map<String,String>> fieldMappings = new 
ArrayList<Map<String,String>>();
     String keepAllMetadataValue = "true";
     String lowernamesValue = "false";
     String writeLimitValue = "";
-    for (int i = 0; i < os.getChildCount(); i++) {
+    for (int i = 0; i < os.getChildCount(); i++)
+    {
       SpecificationNode sn = os.getChild(i);
       if (sn.getType().equals(TikaConfig.NODE_FIELDMAP)) {
         String source = sn.getAttributeValue(TikaConfig.ATTRIBUTE_SOURCE);
         String target = sn.getAttributeValue(TikaConfig.ATTRIBUTE_TARGET);
         String targetDisplay;
-        if (target == null) {
+        if (target == null)
+        {
           target = "";
           targetDisplay = "(remove)";
-        } else
+        }
+        else
           targetDisplay = target;
-        Map<String, String> fieldMapping = new HashMap<String, String>();
-        fieldMapping.put("SOURCE", source);
-        fieldMapping.put("TARGET", target);
-        fieldMapping.put("TARGETDISPLAY", targetDisplay);
+        Map<String,String> fieldMapping = new HashMap<String,String>();
+        fieldMapping.put("SOURCE",source);
+        fieldMapping.put("TARGET",target);
+        fieldMapping.put("TARGETDISPLAY",targetDisplay);
         fieldMappings.add(fieldMapping);
-      } else if (sn.getType().equals(TikaConfig.NODE_KEEPMETADATA)) {
+      }
+      else if (sn.getType().equals(TikaConfig.NODE_KEEPMETADATA))
+      {
         keepAllMetadataValue = 
sn.getAttributeValue(TikaConfig.ATTRIBUTE_VALUE);
-      } else if (sn.getType().equals(TikaConfig.NODE_LOWERNAMES)) {
+      }
+      else if (sn.getType().equals(TikaConfig.NODE_LOWERNAMES))
+      {
         lowernamesValue = sn.getAttributeValue(TikaConfig.ATTRIBUTE_VALUE);
-      } else if (sn.getType().equals(TikaConfig.NODE_WRITELIMIT)) {
+      }
+      else if (sn.getType().equals(TikaConfig.NODE_WRITELIMIT))
+      {
         writeLimitValue = sn.getAttributeValue(TikaConfig.ATTRIBUTE_VALUE);
       }
     }
-    paramMap.put("FIELDMAPPINGS", fieldMappings);
-    paramMap.put("KEEPALLMETADATA", keepAllMetadataValue);
-    paramMap.put("LOWERNAMES", lowernamesValue);
-    paramMap.put("WRITELIMIT", writeLimitValue);
+    paramMap.put("FIELDMAPPINGS",fieldMappings);
+    paramMap.put("KEEPALLMETADATA",keepAllMetadataValue);
+    paramMap.put("LOWERNAMES",lowernamesValue);
+    paramMap.put("WRITELIMIT",writeLimitValue);
   }
 
-  protected static void fillInExceptionsSpecificationMap(Map<String, Object> 
paramMap, Specification os) {
+  protected static void fillInExceptionsSpecificationMap(Map<String,Object> 
paramMap, Specification os)
+  {
     String ignoreTikaExceptions = "true";
-    for (int i = 0; i < os.getChildCount(); i++) {
+    for (int i = 0; i < os.getChildCount(); i++)
+    {
       SpecificationNode sn = os.getChild(i);
-      if (sn.getType().equals(TikaConfig.NODE_IGNORETIKAEXCEPTION)) {
+      if (sn.getType().equals(TikaConfig.NODE_IGNORETIKAEXCEPTION))
+      {
         ignoreTikaExceptions = 
sn.getAttributeValue(TikaConfig.ATTRIBUTE_VALUE);
       }
     }
-    paramMap.put("IGNORETIKAEXCEPTIONS", ignoreTikaExceptions);
+    paramMap.put("IGNORETIKAEXCEPTIONS",ignoreTikaExceptions);
   }
 
-  protected static void fillInBoilerplateSpecificationMap(Map<String, Object> 
paramMap, Specification os) {
+  protected static void fillInBoilerplateSpecificationMap(Map<String,Object> 
paramMap, Specification os)
+  {
     String boilerplateClassName = "";
-    for (int i = 0; i < os.getChildCount(); i++) {
+    for (int i = 0; i < os.getChildCount(); i++)
+    {
       SpecificationNode sn = os.getChild(i);
-      if (sn.getType().equals(TikaConfig.NODE_BOILERPLATEPROCESSOR)) {
+      if (sn.getType().equals(TikaConfig.NODE_BOILERPLATEPROCESSOR))
+      {
         boilerplateClassName = 
sn.getAttributeValue(TikaConfig.ATTRIBUTE_VALUE);
       }
     }
-    paramMap.put("BOILERPLATECLASSNAME", boilerplateClassName);
+    paramMap.put("BOILERPLATECLASSNAME",boilerplateClassName);
   }
 
   protected static int handleTikaException(TikaException e)
-      throws IOException, ManifoldCFException, ServiceInterruption {
-    // MHL - what does Tika throw if it gets an IOException reading the 
stream??
-    Logging.ingest.warn("Tika: Tika exception extracting: " + e.getMessage(), 
e);
-    return DOCUMENTSTATUS_REJECTED;
-  }
-
-  protected static int handleTikaServerRejects(String reason)
-      throws IOException, ManifoldCFException, ServiceInterruption {
-    // MHL - what does Tika throw if it gets an IOException reading the 
stream??
-    Logging.ingest.warn("Tika Server: Tika Server rejects: " + reason);
-    return DOCUMENTSTATUS_REJECTED;
-  }
-
-  protected static int handleTikaServerError(String description)
-      throws IOException, ManifoldCFException, ServiceInterruption {
-    // MHL - what does Tika throw if it gets an IOException reading the 
stream??
-    Logging.ingest.warn("Tika Server: Tika Server error: " + description);
-    return DOCUMENTSTATUS_REJECTED;
-  }
-
-  protected static int handleTikaServerException(IOException e)
-      throws IOException, ManifoldCFException, ServiceInterruption {
+    throws IOException, ManifoldCFException, ServiceInterruption
+  {
     // MHL - what does Tika throw if it gets an IOException reading the 
stream??
-    Logging.ingest.warn("Tika: Tika exception extracting: " + e.getMessage(), 
e);
+    Logging.ingest.warn("Tika: Tika exception extracting: "+e.getMessage(),e);
     return DOCUMENTSTATUS_REJECTED;
   }
-
-  protected static int handleTikaServerException(ParseException e)
-      throws IOException, ManifoldCFException, ServiceInterruption {
-    // MHL - what does Tika throw if it gets an IOException reading the 
stream??
-    Logging.ingest.warn("Tika: Tika exception extracting: " + e.getMessage(), 
e);
-    return DOCUMENTSTATUS_REJECTED;
-  }
-
-  protected static int handleSaxException(SAXException e) throws IOException, 
ManifoldCFException, ServiceInterruption {
+  
+  protected static int handleSaxException(SAXException e)
+    throws IOException, ManifoldCFException, ServiceInterruption
+  {
     // MHL - what does this mean?
-    Logging.ingest.warn("Tika: SAX exception extracting: " + e.getMessage(), 
e);
+    Logging.ingest.warn("Tika: SAX exception extracting: "+e.getMessage(),e);
     return DOCUMENTSTATUS_REJECTED;
   }
-
-  protected static int handleIOException(IOException e) throws 
ManifoldCFException {
+  
+  protected static int handleIOException(IOException e)
+    throws ManifoldCFException
+  {
     // IOException reading from our local storage...
     if (e instanceof InterruptedIOException)
-      throw new ManifoldCFException(e.getMessage(), e, 
ManifoldCFException.INTERRUPTED);
-    throw new ManifoldCFException(e.getMessage(), e);
+      throw new 
ManifoldCFException(e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+    throw new ManifoldCFException(e.getMessage(),e);
   }
-
-  protected static interface DestinationStorage {
-    /**
-     * Get the output stream to write to. Caller should explicitly close this
-     * stream when done writing.
-     */
-    public OutputStream getOutputStream() throws ManifoldCFException;
-
-    /**
-     * Get new binary length.
-     */
-    public long getBinaryLength() throws ManifoldCFException;
-
-    /**
-     * Get the input stream to read from. Caller should explicitly close this
-     * stream when done reading.
-     */
-    public InputStream getInputStream() throws ManifoldCFException;
-
-    /**
-     * Close the object and clean up everything. This should be called when the
-     * data is no longer needed.
-     */
-    public void close() throws ManifoldCFException;
-  }
-
-  protected static class FileDestinationStorage implements DestinationStorage {
+  
+  protected static interface DestinationStorage
+  {
+    /** Get the output stream to write to.  Caller should explicitly close 
this stream when done writing.
+    */
+    public OutputStream getOutputStream()
+      throws ManifoldCFException;
+    
+    /** Get new binary length.
+    */
+    public long getBinaryLength()
+      throws ManifoldCFException;
+
+    /** Get the input stream to read from.  Caller should explicitly close 
this stream when done reading.
+    */
+    public InputStream getInputStream()
+      throws ManifoldCFException;
+    
+    /** Close the object and clean up everything.
+    * This should be called when the data is no longer needed.
+    */
+    public void close()
+      throws ManifoldCFException;
+  }
+  
+  protected static class FileDestinationStorage implements DestinationStorage
+  {
     protected final File outputFile;
     protected final OutputStream outputStream;
 
-    public FileDestinationStorage() throws ManifoldCFException {
+    public FileDestinationStorage()
+      throws ManifoldCFException
+    {
       File outputFile;
       OutputStream outputStream;
-      try {
-        outputFile = File.createTempFile("mcftika", "tmp");
+      try
+      {
+        outputFile = File.createTempFile("mcftika","tmp");
         outputStream = new FileOutputStream(outputFile);
-      } catch (IOException e) {
+      }
+      catch (IOException e)
+      {
         handleIOException(e);
         outputFile = null;
         outputStream = null;
@@ -1046,127 +751,121 @@ public class TikaExtractor extends org.a
       this.outputFile = outputFile;
       this.outputStream = outputStream;
     }
-
+    
     @Override
-    public OutputStream getOutputStream() throws ManifoldCFException {
+    public OutputStream getOutputStream()
+      throws ManifoldCFException
+    {
       return outputStream;
     }
-
-    /**
-     * Get new binary length.
-     */
+    
+    /** Get new binary length.
+    */
     @Override
-    public long getBinaryLength() throws ManifoldCFException {
+    public long getBinaryLength()
+      throws ManifoldCFException
+    {
       return outputFile.length();
     }
 
-    /**
-     * Get the input stream to read from. Caller should explicitly close this
-     * stream when done reading.
-     */
+    /** Get the input stream to read from.  Caller should explicitly close 
this stream when done reading.
+    */
     @Override
-    public InputStream getInputStream() throws ManifoldCFException {
-      try {
+    public InputStream getInputStream()
+      throws ManifoldCFException
+    {
+      try
+      {
         return new FileInputStream(outputFile);
-      } catch (IOException e) {
+      }
+      catch (IOException e)
+      {
         handleIOException(e);
         return null;
       }
     }
-
-    /**
-     * Close the object and clean up everything. This should be called when the
-     * data is no longer needed.
-     */
+    
+    /** Close the object and clean up everything.
+    * This should be called when the data is no longer needed.
+    */
     @Override
-    public void close() throws ManifoldCFException {
+    public void close()
+      throws ManifoldCFException
+    {
       outputFile.delete();
     }
 
   }
-
-  protected static class MemoryDestinationStorage implements 
DestinationStorage {
+  
+  protected static class MemoryDestinationStorage implements DestinationStorage
+  {
     protected final ByteArrayOutputStream outputStream;
-
-    public MemoryDestinationStorage(int sizeHint) {
+    
+    public MemoryDestinationStorage(int sizeHint)
+    {
       outputStream = new ByteArrayOutputStream(sizeHint);
     }
-
+    
     @Override
-    public OutputStream getOutputStream() throws ManifoldCFException {
+    public OutputStream getOutputStream()
+      throws ManifoldCFException
+    {
       return outputStream;
     }
 
-    /**
-     * Get new binary length.
-     */
+    /** Get new binary length.
+    */
     @Override
-    public long getBinaryLength() throws ManifoldCFException {
+    public long getBinaryLength()
+      throws ManifoldCFException
+    {
       return outputStream.size();
     }
-
-    /**
-     * Get the input stream to read from. Caller should explicitly close this
-     * stream when done reading.
-     */
+    
+    /** Get the input stream to read from.  Caller should explicitly close 
this stream when done reading.
+    */
     @Override
-    public InputStream getInputStream() throws ManifoldCFException {
+    public InputStream getInputStream()
+      throws ManifoldCFException
+    {
       return new ByteArrayInputStream(outputStream.toByteArray());
     }
-
-    /**
-     * Close the object and clean up everything. This should be called when the
-     * data is no longer needed.
-     */
-    public void close() throws ManifoldCFException {
+    
+    /** Close the object and clean up everything.
+    * This should be called when the data is no longer needed.
+    */
+    public void close()
+      throws ManifoldCFException
+    {
     }
 
   }
 
   protected static class SpecPacker {
-
-    private final Map<String, String> sourceTargets = new HashMap<String, 
String>();
+    
+    private final Map<String,String> sourceTargets = new 
HashMap<String,String>();
     private final boolean keepAllMetadata;
     private final boolean lowerNames;
     private final int writeLimit;
     private final boolean ignoreTikaException;
     private final String extractorClassName;
-    private URI metaURI;
-    private URI contentURI;
-    private final String tikaHostname;
-    private final int tikaPort;
-    private final boolean tikaServer;
-    private final long tikaRetry;
-
+    
     public SpecPacker(Specification os) {
       boolean keepAllMetadata = true;
       boolean lowerNames = false;
       int writeLimit = TikaConfig.WRITELIMIT_DEFAULT;
       boolean ignoreTikaException = true;
       String extractorClassName = null;
-      String tikaHostname = TikaConfig.TIKAHOSTNAME_DEFAULT;
-      int tikaPort = TikaConfig.TIKAPORT_DEFAULT;
-      boolean tikaServer = false;
-      long tikaRetry = TikaConfig.TIKARETRY_DEFAULT;
-      try {
-        metaURI = new URI("/meta");
-        contentURI = new URI("/tika");
-      } catch (URISyntaxException e) {
-        // Should be impossible
-        metaURI = null;
-        contentURI = null;
-      }
-
       for (int i = 0; i < os.getChildCount(); i++) {
         SpecificationNode sn = os.getChild(i);
-
-        if (sn.getType().equals(TikaConfig.NODE_KEEPMETADATA)) {
+        
+        if(sn.getType().equals(TikaConfig.NODE_KEEPMETADATA)) {
           String value = sn.getAttributeValue(TikaConfig.ATTRIBUTE_VALUE);
           keepAllMetadata = Boolean.parseBoolean(value);
-        } else if (sn.getType().equals(TikaConfig.NODE_LOWERNAMES)) {
+        } else if(sn.getType().equals(TikaConfig.NODE_LOWERNAMES)) {
           String value = sn.getAttributeValue(TikaConfig.ATTRIBUTE_VALUE);
           lowerNames = Boolean.parseBoolean(value);
-        } else if (sn.getType().equals(TikaConfig.NODE_WRITELIMIT)) {
+        } else if(sn.getType().equals(TikaConfig.NODE_WRITELIMIT)) {
           String value = sn.getAttributeValue(TikaConfig.ATTRIBUTE_VALUE);
           if (value.length() == 0) {
             writeLimit = TikaConfig.WRITELIMIT_DEFAULT;
@@ -1176,7 +875,7 @@ public class TikaExtractor extends org.a
         } else if (sn.getType().equals(TikaConfig.NODE_FIELDMAP)) {
           String source = sn.getAttributeValue(TikaConfig.ATTRIBUTE_SOURCE);
           String target = sn.getAttributeValue(TikaConfig.ATTRIBUTE_TARGET);
-
+          
           if (target == null) {
             target = "";
           }
@@ -1186,34 +885,6 @@ public class TikaExtractor extends org.a
           ignoreTikaException = Boolean.parseBoolean(value);
         } else if (sn.getType().equals(TikaConfig.NODE_BOILERPLATEPROCESSOR)) {
           extractorClassName = 
sn.getAttributeValue(TikaConfig.ATTRIBUTE_VALUE);
-        } else if (sn.getType().equals(TikaConfig.NODE_TIKAHOSTNAME)) {
-          String value = sn.getAttributeValue(TikaConfig.ATTRIBUTE_VALUE);
-          if (value.length() == 0) {
-            tikaHostname = TikaConfig.TIKAHOSTNAME_DEFAULT;
-          } else {
-            tikaHostname = value;
-          }
-        } else if (sn.getType().equals(TikaConfig.NODE_TIKAPORT)) {
-          String value = sn.getAttributeValue(TikaConfig.ATTRIBUTE_VALUE);
-          if (value.length() == 0) {
-            tikaPort = TikaConfig.TIKAPORT_DEFAULT;
-          } else {
-            tikaPort = Integer.parseInt(value);
-          }
-        } else if (sn.getType().equals(TikaConfig.NODE_TIKASERVER)) {
-          String value = sn.getAttributeValue(TikaConfig.ATTRIBUTE_VALUE);
-          if (value.length() == 0) {
-            tikaServer = false;
-          } else {
-            tikaServer = Boolean.parseBoolean(value);
-          }
-        } else if (sn.getType().equals(TikaConfig.NODE_TIKARETRY)) {
-          String value = sn.getAttributeValue(TikaConfig.ATTRIBUTE_VALUE);
-          if (value.length() == 0) {
-            tikaRetry = TikaConfig.TIKARETRY_DEFAULT;
-          } else {
-            tikaRetry = Long.parseLong(value);
-          }
         }
       }
       this.keepAllMetadata = keepAllMetadata;
@@ -1221,16 +892,12 @@ public class TikaExtractor extends org.a
       this.writeLimit = writeLimit;
       this.ignoreTikaException = ignoreTikaException;
       this.extractorClassName = extractorClassName;
-      this.tikaHostname = tikaHostname;
-      this.tikaPort = tikaPort;
-      this.tikaServer = tikaServer;
-      this.tikaRetry = tikaRetry;
     }
-
+    
     public String toPackedString() {
       StringBuilder sb = new StringBuilder();
       int i;
-
+      
       // Mappings
       final String[] sortArray = new String[sourceTargets.size()];
       i = 0;
@@ -1238,7 +905,7 @@ public class TikaExtractor extends org.a
         sortArray[i++] = source;
       }
       java.util.Arrays.sort(sortArray);
-
+      
       List<String> packedMappings = new ArrayList<String>();
       String[] fixedList = new String[2];
       for (String source : sortArray) {
@@ -1246,10 +913,10 @@ public class TikaExtractor extends org.a
         StringBuilder localBuffer = new StringBuilder();
         fixedList[0] = source;
         fixedList[1] = target;
-        packFixedList(localBuffer, fixedList, ':');
+        packFixedList(localBuffer,fixedList,':');
         packedMappings.add(localBuffer.toString());
       }
-      packList(sb, packedMappings, '+');
+      packList(sb,packedMappings,'+');
 
       // Keep all metadata
       if (keepAllMetadata)
@@ -1257,11 +924,12 @@ public class TikaExtractor extends org.a
       else
         sb.append('-');
       if (lowerNames)
-        sb.append('+');
-      else
-        sb.append('-');
+          sb.append('+');
+        else
+          sb.append('-');
 
-      if (writeLimit != TikaConfig.WRITELIMIT_DEFAULT) {
+      if (writeLimit != TikaConfig.WRITELIMIT_DEFAULT)
+      {
         sb.append('+');
         sb.append(writeLimit);
       }
@@ -1271,60 +939,55 @@ public class TikaExtractor extends org.a
       else
         sb.append('-');
 
-      if (extractorClassName != null) {
+      if (extractorClassName != null)
+      {
         sb.append('+');
         sb.append(extractorClassName);
-      } else
+      }
+      else
         sb.append('-');
-
+      
       return sb.toString();
     }
-
-    public URI metaURI() {
-      return metaURI;
-    }
-
-    public URI contentURI() {
-      return contentURI;
-    }
-
+    
     public String getMapping(String source) {
       return sourceTargets.get(source);
     }
-
+    
     public boolean keepAllMetadata() {
       return keepAllMetadata;
     }
-
+    
     public boolean lowerNames() {
       return lowerNames;
     }
-
+    
     public int writeLimit() {
       return writeLimit;
     }
-
+    
     public boolean ignoreTikaException() {
       return ignoreTikaException;
     }
-
-    public BoilerpipeExtractor getExtractorClassInstance() throws 
ManifoldCFException {
+    
+    public BoilerpipeExtractor getExtractorClassInstance()
+      throws ManifoldCFException {
       if (extractorClassName == null)
         return null;
       try {
         ClassLoader loader = BoilerpipeExtractor.class.getClassLoader();
         Class extractorClass = loader.loadClass(extractorClassName);
         java.lang.reflect.Field f = extractorClass.getField("INSTANCE");
-        return (BoilerpipeExtractor) f.get(null);
+        return (BoilerpipeExtractor)f.get(null);
       } catch (ClassNotFoundException e) {
-        throw new ManifoldCFException(
-            "Boilerpipe extractor class '" + extractorClassName + "' not 
found: " + e.getMessage(), e);
+        throw new ManifoldCFException("Boilerpipe extractor class 
'"+extractorClassName+"' not found: "+e.getMessage(),e);
       } catch (Exception e) {
-        throw new ManifoldCFException(
-            "Boilerpipe extractor class '" + extractorClassName + "' exception 
on instantiation: " + e.getMessage(), e);
+        throw new ManifoldCFException("Boilerpipe extractor class 
'"+extractorClassName+"' exception on instantiation: "+e.getMessage(),e);
       }
     }
 
   }
 
 }
+
+

Modified: 
manifoldcf/branches/CONNECTORS-1425/connectors/tika/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tika/common_en_US.properties
URL: 
http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1425/connectors/tika/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tika/common_en_US.properties?rev=1794806&r1=1794805&r2=1794806&view=diff
==============================================================================
--- 
manifoldcf/branches/CONNECTORS-1425/connectors/tika/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tika/common_en_US.properties
 (original)
+++ 
manifoldcf/branches/CONNECTORS-1425/connectors/tika/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tika/common_en_US.properties
 Thu May 11 10:36:57 2017
@@ -13,13 +13,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-TikaExtractor.TikaHostname=Tika hostname:
-TikaExtractor.TikaPort=Tika port:
-TikaExtractor.TikaRetry=Retry interval (ms):
-TikaExtractor.TikaParsersSelected=Tika Parsers
-TikaExtractor.TikaServerSelected=Tika Server
-TikaExtractor.TikaTypeTabName=Tika type
-TikaExtractor.TikaType=Tika type:
 TikaExtractor.FieldMappingTabName=Field mapping
 TikaExtractor.ExceptionsTabName=Exceptions
 TikaExtractor.BoilerplateTabName=Boilerplate


Reply via email to