Author: rharo
Date: Tue Jan 26 15:47:12 2016
New Revision: 1726831

URL: http://svn.apache.org/viewvc?rev=1726831&view=rev
Log:
OpenNLP Transformation Connector Initial Import

Added:
    manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/
    manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/README.md
    manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/
    manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/
    manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/
    
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/
    
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/
    
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/
    
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/
    
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/
    
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/
    
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/
    
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/Messages.java
    
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/OpenNlpExtractor.java
    
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/OpenNlpExtractorConfig.java
    
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/
    
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/
    
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/
    
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/manifoldcf/
    
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/manifoldcf/agents/
    
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/
    
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/opennlp/
    
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/opennlp/common_en_US.properties
    
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/opennlp/common_ja_JP.properties
    
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/opennlp/common_zh_CN.properties
    
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/
    
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/
    
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/
    
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/manifoldcf/
    
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/manifoldcf/agents/
    
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/
    
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/opennlp/
    
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/opennlp/editSpecification.js
    
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/opennlp/editSpecification_FieldMapping.html
    
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/opennlp/viewSpecification.html
    manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/download-models.sh
    manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/pom.xml
Modified:
    manifoldcf/branches/CONNECTORS-1270/connectors/pom.xml

Added: manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/README.md
URL: 
http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/README.md?rev=1726831&view=auto
==============================================================================
--- manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/README.md (added)
+++ manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/README.md Tue Jan 26 
15:47:12 2016
@@ -0,0 +1,42 @@
+# OpenNLP Transformation Connector for Apache ManifoldCF
+
+OpenNLP connector extracts named entities(People, Locations and Organizations) 
from document content attaches metadata (ner_people, ner_locations and 
ner_organizations) to repository document.
+
+
+## Building the Connector
+---
+
+```
+git clone https://github.com/apache/manifoldcf.git
+cd manifoldcf/
+git checkout release-2.2-branch
+mvn clean install 
+
+git clone https://github.com/ChalithaUdara/OpenNLP-Manifold-Connector.git
+cd OpenNLP-Manifold-Connector
+mvn clean install -DskipTests=true
+```
+
+## Configure Connector with ManifoldCF
+---
+
+Copy mcf-opennlp-connector-2.2-jar-with-dependencies.jar to 
**$MANIFOLD_DIR/connectors-lib**
+To configure connector with manifoldcf add following to 
**$MANIFOLD_DIR/connectors.xml** file.
+
+```
+<transformationconnector name="OpenNLP Extractor" 
class="org.apache.manifoldcf.agents.transformation.opennlp.OpenNlpExtractor" />
+```
+---
+
+In order to extract named entities with OpenNLP, you first need to download 
the required OpenNLP models. Run **download-models** script to download models.
+
+```
+sh download-models.sh
+```
+
+This will download models to nlpmodels directory.
+
+In manifoldcf job configuration, you need to configure paths to corresponding 
models.  
+
+
+

Added: 
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/Messages.java
URL: 
http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/Messages.java?rev=1726831&view=auto
==============================================================================
--- 
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/Messages.java
 (added)
+++ 
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/Messages.java
 Tue Jan 26 15:47:12 2016
@@ -0,0 +1,139 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.manifoldcf.agents.transformation.opennlp;
+
+import java.util.Locale;
+import java.util.Map;
+
+import org.apache.manifoldcf.core.interfaces.IHTTPOutput;
+import org.apache.manifoldcf.core.interfaces.ManifoldCFException;
+
+public class Messages extends org.apache.manifoldcf.ui.i18n.Messages
+{
+    public static final String 
DEFAULT_BUNDLE_NAME="org.apache.manifoldcf.agents.transformation.opennlp.common";
+    public static final String 
DEFAULT_PATH_NAME="org.apache.manifoldcf.agents.transformation.opennlp";
+    
+    /** Constructor - do no instantiate
+     */
+     protected Messages()
+     {
+     }
+     
+     public static String getString(Locale locale, String messageKey)
+     {
+       return getString(DEFAULT_BUNDLE_NAME, locale, messageKey, null);
+     }
+
+     public static String getAttributeString(Locale locale, String messageKey)
+     {
+       return getAttributeString(DEFAULT_BUNDLE_NAME, locale, messageKey, 
null);
+     }
+
+     public static String getBodyString(Locale locale, String messageKey)
+     {
+       return getBodyString(DEFAULT_BUNDLE_NAME, locale, messageKey, null);
+     }
+
+     public static String getAttributeJavascriptString(Locale locale, String 
messageKey)
+     {
+       return getAttributeJavascriptString(DEFAULT_BUNDLE_NAME, locale, 
messageKey, null);
+     }
+
+     public static String getBodyJavascriptString(Locale locale, String 
messageKey)
+     {
+       return getBodyJavascriptString(DEFAULT_BUNDLE_NAME, locale, messageKey, 
null);
+     }
+
+     public static String getString(Locale locale, String messageKey, Object[] 
args)
+     {
+       return getString(DEFAULT_BUNDLE_NAME, locale, messageKey, args);
+     }
+
+     public static String getAttributeString(Locale locale, String messageKey, 
Object[] args)
+     {
+       return getAttributeString(DEFAULT_BUNDLE_NAME, locale, messageKey, 
args);
+     }
+     
+     public static String getBodyString(Locale locale, String messageKey, 
Object[] args)
+     {
+       return getBodyString(DEFAULT_BUNDLE_NAME, locale, messageKey, args);
+     }
+
+     public static String getAttributeJavascriptString(Locale locale, String 
messageKey, Object[] args)
+     {
+       return getAttributeJavascriptString(DEFAULT_BUNDLE_NAME, locale, 
messageKey, args);
+     }
+
+     public static String getBodyJavascriptString(Locale locale, String 
messageKey, Object[] args)
+     {
+       return getBodyJavascriptString(DEFAULT_BUNDLE_NAME, locale, messageKey, 
args);
+     }
+
+     // More general methods which allow bundlenames and class loaders to be 
specified.
+     
+     public static String getString(String bundleName, Locale locale, String 
messageKey, Object[] args)
+     {
+       return getString(Messages.class, bundleName, locale, messageKey, args);
+     }
+
+     public static String getAttributeString(String bundleName, Locale locale, 
String messageKey, Object[] args)
+     {
+       return getAttributeString(Messages.class, bundleName, locale, 
messageKey, args);
+     }
+
+     public static String getBodyString(String bundleName, Locale locale, 
String messageKey, Object[] args)
+     {
+       return getBodyString(Messages.class, bundleName, locale, messageKey, 
args);
+     }
+     
+     public static String getAttributeJavascriptString(String bundleName, 
Locale locale, String messageKey, Object[] args)
+     {
+       return getAttributeJavascriptString(Messages.class, bundleName, locale, 
messageKey, args);
+     }
+
+     public static String getBodyJavascriptString(String bundleName, Locale 
locale, String messageKey, Object[] args)
+     {
+       return getBodyJavascriptString(Messages.class, bundleName, locale, 
messageKey, args);
+     }
+
+     // Resource output
+     
+     public static void outputResource(IHTTPOutput output, Locale locale, 
String resourceKey,
+       Map<String,String> substitutionParameters, boolean mapToUpperCase)
+       throws ManifoldCFException
+     {
+       
outputResource(output,Messages.class,DEFAULT_PATH_NAME,locale,resourceKey,
+         substitutionParameters,mapToUpperCase);
+     }
+     
+     public static void outputResourceWithVelocity(IHTTPOutput output, Locale 
locale, String resourceKey,
+       Map<String,String> substitutionParameters, boolean mapToUpperCase)
+       throws ManifoldCFException
+     {
+       
outputResourceWithVelocity(output,Messages.class,DEFAULT_BUNDLE_NAME,DEFAULT_PATH_NAME,locale,resourceKey,
+         substitutionParameters,mapToUpperCase);
+     }
+
+     public static void outputResourceWithVelocity(IHTTPOutput output, Locale 
locale, String resourceKey,
+       Map<String,Object> contextObjects)
+       throws ManifoldCFException
+     {
+       
outputResourceWithVelocity(output,Messages.class,DEFAULT_BUNDLE_NAME,DEFAULT_PATH_NAME,locale,resourceKey,
+         contextObjects);
+     }
+
+}

Added: 
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/OpenNlpExtractor.java
URL: 
http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/OpenNlpExtractor.java?rev=1726831&view=auto
==============================================================================
--- 
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/OpenNlpExtractor.java
 (added)
+++ 
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/OpenNlpExtractor.java
 Tue Jan 26 15:47:12 2016
@@ -0,0 +1,513 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.manifoldcf.agents.transformation.opennlp;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import opennlp.tools.namefind.NameFinderME;
+import opennlp.tools.sentdetect.SentenceDetector;
+import opennlp.tools.tokenize.Tokenizer;
+import opennlp.tools.util.Span;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.manifoldcf.agents.interfaces.IOutputAddActivity;
+import org.apache.manifoldcf.agents.interfaces.RepositoryDocument;
+import org.apache.manifoldcf.agents.interfaces.ServiceInterruption;
+import org.apache.manifoldcf.agents.system.Logging;
+import org.apache.manifoldcf.agents.transformation.BaseTransformationConnector;
+import org.apache.manifoldcf.core.interfaces.IHTTPOutput;
+import org.apache.manifoldcf.core.interfaces.IPostParameters;
+import org.apache.manifoldcf.core.interfaces.ManifoldCFException;
+import org.apache.manifoldcf.core.interfaces.Specification;
+import org.apache.manifoldcf.core.interfaces.SpecificationNode;
+import org.apache.manifoldcf.core.interfaces.VersionContext;
+
+public class OpenNlpExtractor extends BaseTransformationConnector {
+       private static final String EDIT_SPECIFICATION_JS = 
"editSpecification.js";
+       private static final String EDIT_SPECIFICATION_FIELDMAPPING_HTML = 
"editSpecification_FieldMapping.html";
+       private static final String VIEW_SPECIFICATION_HTML = 
"viewSpecification.html";
+
+       // Meta-data fields added by this connector
+       private static final String PERSONS = "ner_people";
+       private static final String LOCATIONS = "ner_locations";
+       private static final String ORGANIZATIONS = "ner_organizations";
+
+       protected static final String ACTIVITY_EXTRACT = "extract";
+
+       protected static final String[] activitiesList = new String[] { 
ACTIVITY_EXTRACT };
+
+       /**
+        * Return a list of activities that this connector generates. The 
connector
+        * does NOT need to be connected before this method is called.
+        * 
+        * @return the set of activities.
+        */
+       @Override
+       public String[] getActivitiesList() {
+               return activitiesList;
+       }
+
+       /**
+        * Get a pipeline version string, given a pipeline specification 
object. The
+        * version string is used to uniquely describe the pertinent details of 
the
+        * specification and the configuration, to allow the Connector 
Framework to
+        * determine whether a document will need to be processed again. Note 
that
+        * the contents of any document cannot be considered by this method; 
only
+        * configuration and specification information can be considered.
+        * 
+        * This method presumes that the underlying connector object has been
+        * configured.
+        * 
+        * @param spec
+        *            is the current pipeline specification object for this
+        *            connection for the job that is doing the crawling.
+        * @return a string, of unlimited length, which uniquely describes
+        *         configuration and specification in such a way that if two 
such
+        *         strings are equal, nothing that affects how or whether the
+        *         document is indexed will be different.
+        */
+       @Override
+       public VersionContext getPipelineDescription(Specification os) throws 
ManifoldCFException, ServiceInterruption {
+               SpecPacker sp = new SpecPacker(os);
+               return new VersionContext(sp.toPackedString(), params, os);
+       }
+
+       /**
+        * Add (or replace) a document in the output data store using the 
connector.
+        * This method presumes that the connector object has been configured, 
and
+        * it is thus able to communicate with the output data store should 
that be
+        * necessary. The OutputSpecification is *not* provided to this method,
+        * because the goal is consistency, and if output is done it must be
+        * consistent with the output description, since that was what was 
partly
+        * used to determine if output should be taking place. So it may be
+        * necessary for this method to decode an output description string in 
order
+        * to determine what should be done.
+        * 
+        * @param documentURI
+        *            is the URI of the document. The URI is presumed to be the
+        *            unique identifier which the output data store will use to
+        *            process and serve the document. This URI is constructed 
by the
+        *            repository connector which fetches the document, and is 
thus
+        *            universal across all output connectors.
+        * @param outputDescription
+        *            is the description string that was constructed for this
+        *            document by the getOutputDescription() method.
+        * @param document
+        *            is the document data to be processed (handed to the output
+        *            data store).
+        * @param authorityNameString
+        *            is the name of the authority responsible for authorizing 
any
+        *            access tokens passed in with the repository document. May 
be
+        *            null.
+        * @param activities
+        *            is the handle to an object that the implementer of a 
pipeline
+        *            connector may use to perform operations, such as logging
+        *            processing activity, or sending a modified document to the
+        *            next stage in the pipeline.
+        * @return the document status (accepted or permanently rejected).
+        * @throws IOException
+        *             only if there's a stream error reading the document data.
+        */
+       @Override
+       public int addOrReplaceDocumentWithException(String documentURI, 
VersionContext pipelineDescription,
+                       RepositoryDocument document, String 
authorityNameString, IOutputAddActivity activities)
+                                       throws ManifoldCFException, 
ServiceInterruption, IOException {
+               // assumes use of Tika extractor before using this connector
+               Logging.agents.debug("Starting OpenNlp extraction");
+
+               SpecPacker sp = new 
SpecPacker(pipelineDescription.getSpecification());
+
+               byte[] bytes = IOUtils.toByteArray(document.getBinaryStream());
+
+               SentenceDetector sentenceDetector = 
OpenNlpExtractorConfig.sentenceDetector(sp.getSModelPath());
+               Tokenizer tokenizer = 
OpenNlpExtractorConfig.tokenizer(sp.getTModelPath());
+               NameFinderME peopleFinder = 
OpenNlpExtractorConfig.peopleFinder(sp.getPModelPath());
+               NameFinderME locationFinder = 
OpenNlpExtractorConfig.locationFinder(sp.getLModelPath());
+               NameFinderME organizationFinder = 
OpenNlpExtractorConfig.organizationFinder(sp.getOModelPath());
+
+               // create a duplicate
+               RepositoryDocument docCopy = document.duplicate();
+               Map<String, List<String>> nerMap = new HashMap<>();
+
+               if (document.getBinaryLength() > 0) {
+                       String textContent = new String(bytes, 
StandardCharsets.UTF_8);
+                       List<String> peopleList = new ArrayList<>();
+                       List<String> locationsList = new ArrayList<>();
+                       List<String> organizationsList = new ArrayList<>();
+
+                       String[] sentences = 
sentenceDetector.sentDetect(textContent);
+                       for (String sentence : sentences) {
+                               String[] tokens = tokenizer.tokenize(sentence);
+
+                               Span[] spans = peopleFinder.find(tokens);
+                               
peopleList.addAll(Arrays.asList(Span.spansToStrings(spans, tokens)));
+
+                               spans = locationFinder.find(tokens);
+                               
locationsList.addAll(Arrays.asList(Span.spansToStrings(spans, tokens)));
+
+                               spans = organizationFinder.find(tokens);
+                               
organizationsList.addAll(Arrays.asList(Span.spansToStrings(spans, tokens)));
+
+                       }
+
+                       nerMap.put(PERSONS, peopleList);
+                       nerMap.put(LOCATIONS, locationsList);
+                       nerMap.put(ORGANIZATIONS, organizationsList);
+               }
+               // reset original stream
+               docCopy.setBinary(new ByteArrayInputStream(bytes), 
bytes.length);
+
+               // add named entity meta-data
+               if (!nerMap.isEmpty()) {
+                       for (Entry<String, List<String>> entry : 
nerMap.entrySet()) {
+                               List<String> neList = entry.getValue();
+                               String[] neArray = neList.toArray(new 
String[neList.size()]);
+                               docCopy.addField(entry.getKey(), neArray);
+                       }
+               }
+
+               return activities.sendDocument(documentURI, docCopy);
+       }
+
+       // ////////////////////////
+       // UI Methods
+       // ////////////////////////
+
+       /**
+        * Obtain the name of the form check javascript method to call.
+        * 
+        * @param connectionSequenceNumber
+        *            is the unique number of this connection within the job.
+        * @return the name of the form check javascript method.
+        */
+       @Override
+       public String getFormCheckJavascriptMethodName(int 
connectionSequenceNumber) {
+               return "s" + connectionSequenceNumber + "_checkSpecification";
+       }
+
+       /**
+        * Obtain the name of the form presave check javascript method to call.
+        * 
+        * @param connectionSequenceNumber
+        *            is the unique number of this connection within the job.
+        * @return the name of the form presave check javascript method.
+        */
+       @Override
+       public String getFormPresaveCheckJavascriptMethodName(int 
connectionSequenceNumber) {
+               return "s" + connectionSequenceNumber + 
"_checkSpecificationForSave";
+       }
+
+       /**
+        * Output the specification header section. This method is called in the
+        * head section of a job page which has selected an output connection 
of the
+        * current type. Its purpose is to add the required tabs to the list, 
and to
+        * output any javascript methods that might be needed by the job editing
+        * HTML.
+        * 
+        * @param out
+        *            is the output to which any HTML should be sent.
+        * @param locale
+        *            is the preferred local of the output.
+        * @param os
+        *            is the current output specification for this job.
+        * @param connectionSequenceNumber
+        *            is the unique number of this connection within the job.
+        * @param tabsArray
+        *            is an array of tab names. Add to this array any tab names 
that
+        *            are specific to the connector.
+        */
+       @Override
+       public void outputSpecificationHeader(IHTTPOutput out, Locale locale, 
Specification os,
+                       int connectionSequenceNumber, List<String> tabsArray) 
throws ManifoldCFException, IOException {
+               Map<String, Object> paramMap = new HashMap<String, Object>();
+               paramMap.put("SEQNUM", 
Integer.toString(connectionSequenceNumber));
+
+               tabsArray.add(Messages.getString(locale, 
"OpenNlpExtractor.FieldMappingTabName"));
+
+               Messages.outputResourceWithVelocity(out, locale, 
EDIT_SPECIFICATION_JS, paramMap);
+       }
+
+       /**
+        * Output the specification body section. This method is called in the 
body
+        * section of a job page which has selected an output connection of the
+        * current type. Its purpose is to present the required form elements 
for
+        * editing. The coder can presume that the HTML that is output from this
+        * configuration will be within appropriate <html>, <body>, and <form> 
tags.
+        * The name of the form is "editjob".
+        * 
+        * @param out
+        *            is the output to which any HTML should be sent.
+        * @param locale
+        *            is the preferred local of the output.
+        * @param os
+        *            is the current output specification for this job.
+        * @param connectionSequenceNumber
+        *            is the unique number of this connection within the job.
+        * @param actualSequenceNumber
+        *            is the connection within the job that has currently been
+        *            selected.
+        * @param tabName
+        *            is the current tab name.
+        */
+       @Override
+       public void outputSpecificationBody(IHTTPOutput out, Locale locale, 
Specification os, int connectionSequenceNumber,
+                       int actualSequenceNumber, String tabName) throws 
ManifoldCFException, IOException {
+               Map<String, Object> paramMap = new HashMap<String, Object>();
+
+               paramMap.put("TABNAME", tabName);
+               paramMap.put("SEQNUM", 
Integer.toString(connectionSequenceNumber));
+               paramMap.put("SELECTEDNUM", 
Integer.toString(actualSequenceNumber));
+
+               fillInFieldMappingSpecificationMap(paramMap, os);
+
+               Messages.outputResourceWithVelocity(out, locale, 
EDIT_SPECIFICATION_FIELDMAPPING_HTML, paramMap);
+       }
+
+       /**
+        * Process a specification post. This method is called at the start of 
job's
+        * edit or view page, whenever there is a possibility that form data 
for a
+        * connection has been posted. Its purpose is to gather form 
information and
+        * modify the output specification accordingly. The name of the posted 
form
+        * is "editjob".
+        * 
+        * @param variableContext
+        *            contains the post data, including binary file-upload
+        *            information.
+        * @param locale
+        *            is the preferred local of the output.
+        * @param os
+        *            is the current output specification for this job.
+        * @param connectionSequenceNumber
+        *            is the unique number of this connection within the job.
+        * @return null if all is well, or a string error message if there is an
+        *         error that should prevent saving of the job (and cause a
+        *         redirection to an error page).
+        */
+       @Override
+       public String processSpecificationPost(IPostParameters variableContext, 
Locale locale, Specification os,
+                       int connectionSequenceNumber) throws 
ManifoldCFException {
+               String seqPrefix = "s" + connectionSequenceNumber + "_";
+
+               SpecificationNode node = new 
SpecificationNode(OpenNlpExtractorConfig.NODE_SMODEL_PATH);
+               String smodelPath = variableContext.getParameter(seqPrefix + 
"smodelpath");
+               if (smodelPath != null) {
+                       
node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, smodelPath);
+               } else {
+                       
node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, "");
+               }
+               os.addChild(os.getChildCount(), node);
+
+               node = new 
SpecificationNode(OpenNlpExtractorConfig.NODE_TMODEL_PATH);
+               String tmodelPath = variableContext.getParameter(seqPrefix + 
"tmodelpath");
+               if (tmodelPath != null) {
+                       
node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, tmodelPath);
+               } else {
+                       
node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, "");
+               }
+               os.addChild(os.getChildCount(), node);
+
+               node = new 
SpecificationNode(OpenNlpExtractorConfig.NODE_PMODEL_PATH);
+               String pmodelPath = variableContext.getParameter(seqPrefix + 
"pmodelpath");
+               if (pmodelPath != null) {
+                       
node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, pmodelPath);
+               } else {
+                       
node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, "");
+               }
+               os.addChild(os.getChildCount(), node);
+
+               node = new 
SpecificationNode(OpenNlpExtractorConfig.NODE_LMODEL_PATH);
+               String lmodelPath = variableContext.getParameter(seqPrefix + 
"lmodelpath");
+               if (lmodelPath != null) {
+                       
node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, lmodelPath);
+               } else {
+                       
node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, "");
+               }
+               os.addChild(os.getChildCount(), node);
+
+               node = new 
SpecificationNode(OpenNlpExtractorConfig.NODE_OMODEL_PATH);
+               String omodelPath = variableContext.getParameter(seqPrefix + 
"omodelpath");
+               if (omodelPath != null) {
+                       
node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, omodelPath);
+               } else {
+                       
node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, "");
+               }
+               os.addChild(os.getChildCount(), node);
+
+               return null;
+       }
+
+       /**
+        * View specification. This method is called in the body section of a 
job's
+        * view page. Its purpose is to present the output specification 
information
+        * to the user. The coder can presume that the HTML that is output from 
this
+        * configuration will be within appropriate <html> and <body> tags.
+        * 
+        * @param out
+        *            is the output to which any HTML should be sent.
+        * @param locale
+        *            is the preferred local of the output.
+        * @param connectionSequenceNumber
+        *            is the unique number of this connection within the job.
+        * @param os
+        *            is the current output specification for this job.
+        */
+       @Override
+       public void viewSpecification(IHTTPOutput out, Locale locale, 
Specification os, int connectionSequenceNumber)
+                       throws ManifoldCFException, IOException {
+               Map<String, Object> paramMap = new HashMap<String, Object>();
+               paramMap.put("SEQNUM", 
Integer.toString(connectionSequenceNumber));
+
+               fillInFieldMappingSpecificationMap(paramMap, os);
+               Messages.outputResourceWithVelocity(out, locale, 
VIEW_SPECIFICATION_HTML, paramMap);
+       }
+
+       protected static void fillInFieldMappingSpecificationMap(Map<String, 
Object> paramMap, Specification os) {
+               String sModelPath = "";
+               String tModelPath = "";
+               String pModelPath = "";
+               String lModelPath = "";
+               String oModelPath = "";
+
+               for (int i = 0; i < os.getChildCount(); i++) {
+                       SpecificationNode sn = os.getChild(i);
+                       if 
(sn.getType().equals(OpenNlpExtractorConfig.NODE_SMODEL_PATH)) {
+                               sModelPath = 
sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
+                               if (sModelPath == null) {
+                                       sModelPath = "";
+                               }
+                       } else if 
(sn.getType().equals(OpenNlpExtractorConfig.NODE_TMODEL_PATH)) {
+                               tModelPath = 
sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
+                               if (tModelPath == null) {
+                                       tModelPath = "";
+                               }
+                       } else if 
(sn.getType().equals(OpenNlpExtractorConfig.NODE_PMODEL_PATH)) {
+                               pModelPath = 
sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
+                               if (pModelPath == null) {
+                                       pModelPath = "";
+                               }
+                       } else if 
(sn.getType().equals(OpenNlpExtractorConfig.NODE_LMODEL_PATH)) {
+                               lModelPath = 
sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
+                               if (lModelPath == null) {
+                                       lModelPath = "";
+                               }
+                       } else if 
(sn.getType().equals(OpenNlpExtractorConfig.NODE_OMODEL_PATH)) {
+                               oModelPath = 
sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
+                               if (oModelPath == null) {
+                                       oModelPath = "";
+                               }
+                       }
+
+               }
+               paramMap.put("SMODELPATH", sModelPath);
+               paramMap.put("TMODELPATH", tModelPath);
+               paramMap.put("PMODELPATH", pModelPath);
+               paramMap.put("LMODELPATH", lModelPath);
+               paramMap.put("OMODELPATH", oModelPath);
+       }
+
+       protected static class SpecPacker {
+
+               private final String sModelPath;
+               private final String tModelPath;
+               private final String pModelPath;
+               private final String lModelPath;
+               private final String oModelPath;
+
+               public SpecPacker(Specification os) {
+                       String sModelPath = null;
+                       String tModelPath = null;
+                       String pModelPath = null;
+                       String lModelPath = null;
+                       String oModelPath = null;
+
+                       for (int i = 0; i < os.getChildCount(); i++) {
+                               SpecificationNode sn = os.getChild(i);
+
+                               if 
(sn.getType().equals(OpenNlpExtractorConfig.NODE_SMODEL_PATH)) {
+                                       sModelPath = 
sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
+                               }
+                               if 
(sn.getType().equals(OpenNlpExtractorConfig.NODE_TMODEL_PATH)) {
+                                       tModelPath = 
sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
+                               }
+                               if 
(sn.getType().equals(OpenNlpExtractorConfig.NODE_PMODEL_PATH)) {
+                                       pModelPath = 
sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
+                               }
+                               if 
(sn.getType().equals(OpenNlpExtractorConfig.NODE_LMODEL_PATH)) {
+                                       lModelPath = 
sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
+                               }
+                               if 
(sn.getType().equals(OpenNlpExtractorConfig.NODE_OMODEL_PATH)) {
+                                       oModelPath = 
sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
+                               }
+
+                       }
+                       this.sModelPath = sModelPath;
+                       this.tModelPath = tModelPath;
+                       this.pModelPath = pModelPath;
+                       this.lModelPath = lModelPath;
+                       this.oModelPath = oModelPath;
+               }
+
+               public String toPackedString() {
+                       StringBuilder sb = new StringBuilder();
+
+                       // extract nouns
+                       if (sModelPath != null)
+                               sb.append(sModelPath);
+                       if (tModelPath != null)
+                               sb.append(tModelPath);
+                       if (pModelPath != null)
+                               sb.append(pModelPath);
+                       if (lModelPath != null)
+                               sb.append(lModelPath);
+                       if (oModelPath != null)
+                               sb.append(oModelPath);
+
+                       return sb.toString();
+               }
+
+               public String getSModelPath() {
+                       return sModelPath;
+               }
+
+               public String getTModelPath() {
+                       return tModelPath;
+               }
+
+               public String getPModelPath() {
+                       return pModelPath;
+               }
+
+               public String getLModelPath() {
+                       return lModelPath;
+               }
+
+               public String getOModelPath() {
+                       return oModelPath;
+               }
+
+       }
+
+}

Added: 
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/OpenNlpExtractorConfig.java
URL: 
http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/OpenNlpExtractorConfig.java?rev=1726831&view=auto
==============================================================================
--- 
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/OpenNlpExtractorConfig.java
 (added)
+++ 
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/OpenNlpExtractorConfig.java
 Tue Jan 26 15:47:12 2016
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.manifoldcf.agents.transformation.opennlp;
+
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+
+import opennlp.tools.namefind.NameFinderME;
+import opennlp.tools.namefind.TokenNameFinderModel;
+import opennlp.tools.sentdetect.SentenceDetector;
+import opennlp.tools.sentdetect.SentenceDetectorME;
+import opennlp.tools.sentdetect.SentenceModel;
+import opennlp.tools.tokenize.Tokenizer;
+import opennlp.tools.tokenize.TokenizerME;
+import opennlp.tools.tokenize.TokenizerModel;
+import opennlp.tools.util.InvalidFormatException;
+
+public class OpenNlpExtractorConfig
+{
+       private static enum MODEL{
+               SENTENCE, TOKENIZER, PEOPLE, LOCATIONS, ORGANIZATIONS;
+       }
+       
+       // Specification nodes and values
+    public static final String NODE_SMODEL_PATH = "SModelPath";
+    public static final String NODE_TMODEL_PATH = "TModelPath";
+    public static final String NODE_PMODEL_PATH = "PModelPath";
+    public static final String NODE_LMODEL_PATH = "LModelPath";
+    public static final String NODE_OMODEL_PATH = "OModelPath";
+
+    public static final String ATTRIBUTE_VALUE = "value";
+    
+    private static SentenceModel sModel = null;
+    private static TokenizerModel tModel = null;
+    private static TokenNameFinderModel pModel = null;
+    private static TokenNameFinderModel lModel = null;
+    private static TokenNameFinderModel oModel = null;
+    
+    private static synchronized void initializeModel(MODEL m, String path) 
throws InvalidFormatException, FileNotFoundException, IOException{
+       if(sModel == null && m == MODEL.SENTENCE)
+               sModel = new SentenceModel(new FileInputStream(path));
+       if(tModel == null && m == MODEL.TOKENIZER)
+               tModel = new TokenizerModel(new FileInputStream(path));
+       if(pModel == null && m == MODEL.PEOPLE)
+               pModel = new TokenNameFinderModel(new FileInputStream(path));
+       if(lModel == null && m == MODEL.LOCATIONS)
+               lModel = new TokenNameFinderModel(new FileInputStream(path));
+       if(oModel == null && m == MODEL.ORGANIZATIONS)
+               oModel = new TokenNameFinderModel(new FileInputStream(path));
+    }
+    
+    public static final SentenceDetector sentenceDetector(String path) throws 
InvalidFormatException, FileNotFoundException, IOException{
+       if(sModel == null)
+               initializeModel(MODEL.SENTENCE, path);
+        return new SentenceDetectorME(sModel);
+    }
+    
+    public static final Tokenizer tokenizer(String path) throws 
InvalidFormatException, FileNotFoundException, IOException{
+       if(tModel == null)
+               initializeModel(MODEL.TOKENIZER, path);
+        return new TokenizerME(tModel);
+    }
+    
+    public static final NameFinderME peopleFinder(String path) throws 
InvalidFormatException, FileNotFoundException, IOException{
+       if(pModel == null)
+               initializeModel(MODEL.PEOPLE, path);
+        return new NameFinderME(pModel);
+    }
+    
+    public static final NameFinderME locationFinder(String path) throws 
InvalidFormatException, FileNotFoundException, IOException{
+       if(lModel == null)
+               initializeModel(MODEL.LOCATIONS, path);
+        return new NameFinderME(lModel);
+    }
+    
+    public static final NameFinderME organizationFinder(String path) throws 
InvalidFormatException, FileNotFoundException, IOException{
+       if(oModel == null)
+               initializeModel(MODEL.ORGANIZATIONS, path);
+        return new NameFinderME(oModel);
+    }
+
+
+    
+
+}

Added: 
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/opennlp/common_en_US.properties
URL: 
http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/opennlp/common_en_US.properties?rev=1726831&view=auto
==============================================================================
--- 
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/opennlp/common_en_US.properties
 (added)
+++ 
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/opennlp/common_en_US.properties
 Tue Jan 26 15:47:12 2016
@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+OpenNlpExtractor.FieldMappingTabName=OpenNlp Extraction
+OpenNlpExtractor.SentenceDetectorModelPath=Sentence Detector Model Path:
+OpenNlpExtractor.TokenizerModelPath=Tokenizer Model Path:
+OpenNlpExtractor.PeopleModelPath=People Model Path:
+OpenNlpExtractor.LocationsModelPath=Locations Model Path:
+OpenNlpExtractor.OraganizationsModelPath=Organizations Model Path:
\ No newline at end of file

Added: 
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/opennlp/common_ja_JP.properties
URL: 
http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/opennlp/common_ja_JP.properties?rev=1726831&view=auto
==============================================================================
--- 
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/opennlp/common_ja_JP.properties
 (added)
+++ 
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/opennlp/common_ja_JP.properties
 Tue Jan 26 15:47:12 2016
@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+OpenNlpExtractor.FieldMappingTabName=OpenNlp Extraction
+OpenNlpExtractor.SentenceDetectorModelPath=Sentence Detector Model Path:
+OpenNlpExtractor.TokenizerModelPath=Tokenizer Model Path:
+OpenNlpExtractor.PeopleModelPath=People Model Path:
+OpenNlpExtractor.LocationsModelPath=Locations Model Path:
+OpenNlpExtractor.OraganizationsModelPath=Organizations Model Path:
\ No newline at end of file

Added: 
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/opennlp/common_zh_CN.properties
URL: 
http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/opennlp/common_zh_CN.properties?rev=1726831&view=auto
==============================================================================
--- 
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/opennlp/common_zh_CN.properties
 (added)
+++ 
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/opennlp/common_zh_CN.properties
 Tue Jan 26 15:47:12 2016
@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+OpenNlpExtractor.FieldMappingTabName=OpenNlp Extraction
+OpenNlpExtractor.SentenceDetectorModelPath=Sentence Detector Model Path:
+OpenNlpExtractor.TokenizerModelPath=Tokenizer Model Path:
+OpenNlpExtractor.PeopleModelPath=People Model Path:
+OpenNlpExtractor.LocationsModelPath=Locations Model Path:
+OpenNlpExtractor.OraganizationsModelPath=Organizations Model Path:
\ No newline at end of file

Added: 
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/opennlp/editSpecification.js
URL: 
http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/opennlp/editSpecification.js?rev=1726831&view=auto
==============================================================================
--- 
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/opennlp/editSpecification.js
 (added)
+++ 
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/opennlp/editSpecification.js
 Tue Jan 26 15:47:12 2016
@@ -0,0 +1,25 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<script type="text/javascript">
+<!--
+function s${SEQNUM}_checkSpecification()
+{
+  return true;
+}
+//-->
+</script>

Added: 
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/opennlp/editSpecification_FieldMapping.html
URL: 
http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/opennlp/editSpecification_FieldMapping.html?rev=1726831&view=auto
==============================================================================
--- 
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/opennlp/editSpecification_FieldMapping.html
 (added)
+++ 
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/opennlp/editSpecification_FieldMapping.html
 Tue Jan 26 15:47:12 2016
@@ -0,0 +1,72 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+#if($TABNAME ==
+$ResourceBundle.getString('OpenNlpExtractor.FieldMappingTabName') &&
+${SEQNUM} == ${SELECTEDNUM})
+
+<table class="displaytable">
+       <tr>
+               <td class="separator" colspan="2"><hr /></td>
+       </tr>
+       <tr>
+               <td 
class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('OpenNlpExtractor.SentenceDetectorModelPath'))</nobr></td>
+               <td class="value"><input type="text" 
name="s${SEQNUM}_smodelpath"
+                       size="128" 
value="$Encoder.attributeEscape($SMODELPATH)" /></td>
+       </tr>
+       <tr>
+               <td class="separator" colspan="2"><hr /></td>
+       </tr>
+       <tr>
+               <td 
class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('OpenNlpExtractor.TokenizerModelPath'))</nobr></td>
+               <td class="value"><input type="text" 
name="s${SEQNUM}_tmodelpath"
+                       size="128" 
value="$Encoder.attributeEscape($TMODELPATH)" /></td>
+       </tr>
+       <tr>
+               <td class="separator" colspan="2"><hr /></td>
+       </tr>
+       <tr>
+               <td 
class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('OpenNlpExtractor.PeopleModelPath'))</nobr></td>
+               <td class="value"><input type="text" 
name="s${SEQNUM}_pmodelpath"
+                       size="128" 
value="$Encoder.attributeEscape($PMODELPATH)" /></td>
+       </tr>
+       <tr>
+               <td class="separator" colspan="2"><hr /></td>
+       </tr>
+       <tr>
+               <td 
class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('OpenNlpExtractor.LocationsModelPath'))</nobr></td>
+               <td class="value"><input type="text" 
name="s${SEQNUM}_lmodelpath"
+                       size="128" 
value="$Encoder.attributeEscape($LMODELPATH)" /></td>
+       </tr>
+       <tr>
+               <td class="separator" colspan="2"><hr /></td>
+       </tr>
+       <tr>
+               <td 
class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('OpenNlpExtractor.OraganizationsModelPath'))</nobr></td>
+               <td class="value"><input type="text" 
name="s${SEQNUM}_omodelpath"
+                       size="128" 
value="$Encoder.attributeEscape($OMODELPATH)" /></td>
+       </tr>
+
+</table>
+
+#else
+<input type="hidden" name="s${SEQNUM}_smodelpath" 
value="$Encoder.bodyEscape($SMODELPATH)"/>
+<input type="hidden" name="s${SEQNUM}_tmodelpath" 
value="$Encoder.bodyEscape($TMODELPATH)"/>
+<input type="hidden" name="s${SEQNUM}_pmodelpath" 
value="$Encoder.bodyEscape($PMODELPATH)"/>
+<input type="hidden" name="s${SEQNUM}_lmodelpath" 
value="$Encoder.bodyEscape($LMODELPATH)"/>
+<input type="hidden" name="s${SEQNUM}_omodelpath" 
value="$Encoder.bodyEscape($OMODELPATH)"/>
+#end

Added: 
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/opennlp/viewSpecification.html
URL: 
http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/opennlp/viewSpecification.html?rev=1726831&view=auto
==============================================================================
--- 
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/opennlp/viewSpecification.html
 (added)
+++ 
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/opennlp/viewSpecification.html
 Tue Jan 26 15:47:12 2016
@@ -0,0 +1,58 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<table class="displaytable">
+       <tr>
+               <td 
class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('OpenNlpExtractor.SentenceDetectorModelPath'))</nobr></td>
+               <td 
class="value"><nobr>$Encoder.bodyEscape($SMODELPATH)</nobr></td>
+       </tr>
+
+       <tr>
+               <td class="separator" colspan="2"><hr /></td>
+       </tr>
+
+       <tr>
+               <td 
class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('OpenNlpExtractor.TokenizerModelPath'))</nobr></td>
+               <td 
class="value"><nobr>$Encoder.bodyEscape($TMODELPATH)</nobr></td>
+       </tr>
+
+       <tr>
+               <td class="separator" colspan="2"><hr /></td>
+       </tr>
+       <tr>
+               <td 
class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('OpenNlpExtractor.PeopleModelPath'))</nobr></td>
+               <td 
class="value"><nobr>$Encoder.bodyEscape($PMODELPATH)</nobr></td>
+       </tr>
+       
+       <tr>
+               <td class="separator" colspan="2"><hr /></td>
+       </tr>
+       <tr>
+               <td 
class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('OpenNlpExtractor.LocationsModelPath'))</nobr></td>
+               <td 
class="value"><nobr>$Encoder.bodyEscape($LMODELPATH)</nobr></td>
+       </tr>
+       
+       <tr>
+               <td class="separator" colspan="2"><hr /></td>
+       </tr>
+       <tr>
+               <td 
class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('OpenNlpExtractor.OraganizationsModelPath'))</nobr></td>
+               <td 
class="value"><nobr>$Encoder.bodyEscape($OMODELPATH)</nobr></td>
+       </tr>
+
+
+</table>

Added: manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/download-models.sh
URL: 
http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/download-models.sh?rev=1726831&view=auto
==============================================================================
--- manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/download-models.sh 
(added)
+++ manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/download-models.sh 
Tue Jan 26 15:47:12 2016
@@ -0,0 +1,17 @@
+#!/bin/bash
+
+MODELS_DIR=nlpmodels
+
+if [ ! -d "$MODELS_DIR" ]; then
+  echo “$MODELS_DIR does not exist…”
+  echo “creating $MODELS_DIR …”
+  mkdir -p ${MODELS_DIR}
+fi
+
+echo “downloading models…”
+wget -O ${MODELS_DIR}/en-sent.bin 
http://opennlp.sourceforge.net/models-1.5/en-sent.bin
+wget -O ${MODELS_DIR}/en-token.bin 
http://opennlp.sourceforge.net/models-1.5/en-token.bin
+wget -O ${MODELS_DIR}/en-ner-person.bin 
http://opennlp.sourceforge.net/models-1.5/en-ner-person.bin
+wget -O ${MODELS_DIR}/en-ner-location.bin 
http://opennlp.sourceforge.net/models-1.5/en-ner-location.bin
+wget -O ${MODELS_DIR}/en-ner-organization.bin 
http://opennlp.sourceforge.net/models-1.5/en-ner-organization.bin
+echo “downloading finished…”
\ No newline at end of file

Added: manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/pom.xml
URL: 
http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/pom.xml?rev=1726831&view=auto
==============================================================================
--- manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/pom.xml (added)
+++ manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/pom.xml Tue Jan 26 
15:47:12 2016
@@ -0,0 +1,279 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0"; 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+       xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd";>
+       <parent>
+               <groupId>org.apache.manifoldcf</groupId>
+               <artifactId>mcf-connectors</artifactId>
+               <version>2.4-SNAPSHOT</version>
+       </parent>
+       <modelVersion>4.0.0</modelVersion>
+
+       <properties>
+               
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+               
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
+       </properties>
+
+       <artifactId>mcf-opennlp-connector</artifactId>
+       <name>ManifoldCF - Connectors - OpenNlp Extractor</name>
+
+       <build>
+               <defaultGoal>integration-test</defaultGoal>
+               
<sourceDirectory>${basedir}/connector/src/main/java</sourceDirectory>
+               
<testSourceDirectory>${basedir}/connector/src/test/java</testSourceDirectory>
+               <resources>
+                       <resource>
+                               
<directory>${basedir}/connector/src/main/native2ascii</directory>
+                               <includes>
+                                       <include>**/*.properties</include>
+                               </includes>
+                       </resource>
+                       <resource>
+                               
<directory>${basedir}/connector/src/main/resources</directory>
+                               <includes>
+                                       <include>**/*.html</include>
+                                       <include>**/*.js</include>
+                               </includes>
+                       </resource>
+               </resources>
+               <testResources>
+                       <testResource>
+                               
<directory>${basedir}/connector/src/test/resources</directory>
+                       </testResource>
+               </testResources>
+
+               <plugins>
+
+                       <plugin>
+                               <groupId>org.codehaus.mojo</groupId>
+                               
<artifactId>native2ascii-maven-plugin</artifactId>
+                               <version>1.0-beta-1</version>
+                               <configuration>
+                                       <workDir>target/classes</workDir>
+                               </configuration>
+                               <executions>
+                                       <execution>
+                                               <id>native2ascii-utf8</id>
+                                               <goals>
+                                                       
<goal>native2ascii</goal>
+                                               </goals>
+                                               <configuration>
+                                                       
<encoding>UTF8</encoding>
+                                                       <includes>
+                                                               
<include>**/*.properties</include>
+                                                       </includes>
+                                               </configuration>
+                                       </execution>
+                               </executions>
+                       </plugin>
+
+                       <plugin>
+                               <artifactId>maven-assembly-plugin</artifactId>
+                               <configuration>
+                                       <descriptorRefs>
+                                               
<descriptorRef>jar-with-dependencies</descriptorRef>
+                                       </descriptorRefs>
+                               </configuration>
+                               <executions>
+                                       <execution>
+                                               <id>make-assembly</id> <!-- 
this is used for inheritance merges -->
+                                               <phase>package</phase> <!-- 
bind to the packaging phase -->
+                                               <goals>
+                                                       <goal>single</goal>
+                                               </goals>
+                                       </execution>
+                               </executions>
+                       </plugin>
+
+
+                       <!-- Test plugin configuration -->
+                       <plugin>
+                               <artifactId>maven-dependency-plugin</artifactId>
+                               <executions>
+                                       <execution>
+                                               <id>copy-war</id>
+                                               
<phase>generate-resources</phase>
+                                               <goals>
+                                                       <goal>copy</goal>
+                                               </goals>
+                                               <configuration>
+                                                       
<outputDirectory>target/dependency</outputDirectory>
+                                                       <artifactItems>
+                                                               <artifactItem>
+                                                                       
<groupId>${project.groupId}</groupId>
+                                                                       
<artifactId>mcf-api-service</artifactId>
+                                                                       
<version>${project.version}</version>
+                                                                       
<type>war</type>
+                                                                       
<overWrite>false</overWrite>
+                                                                       
<destFileName>mcf-api-service.war</destFileName>
+                                                               </artifactItem>
+                                                               <artifactItem>
+                                                                       
<groupId>${project.groupId}</groupId>
+                                                                       
<artifactId>mcf-authority-service</artifactId>
+                                                                       
<version>${project.version}</version>
+                                                                       
<type>war</type>
+                                                                       
<overWrite>false</overWrite>
+                                                                       
<destFileName>mcf-authority-service.war</destFileName>
+                                                               </artifactItem>
+                                                               <artifactItem>
+                                                                       
<groupId>${project.groupId}</groupId>
+                                                                       
<artifactId>mcf-crawler-ui</artifactId>
+                                                                       
<version>${project.version}</version>
+                                                                       
<type>war</type>
+                                                                       
<overWrite>false</overWrite>
+                                                                       
<destFileName>mcf-crawler-ui.war</destFileName>
+                                                               </artifactItem>
+                                                       </artifactItems>
+                                               </configuration>
+                                       </execution>
+                               </executions>
+                       </plugin>
+
+                       <plugin>
+                               <groupId>org.apache.maven.plugins</groupId>
+                               <artifactId>maven-surefire-plugin</artifactId>
+                               <configuration>
+                                       <excludes>
+                                               
<exclude>**/*Postgresql*.java</exclude>
+                                               
<exclude>**/*MySQL*.java</exclude>
+                                       </excludes>
+                                       <forkMode>always</forkMode>
+                                       
<workingDirectory>target/test-output</workingDirectory>
+                               </configuration>
+                       </plugin>
+
+                       <plugin>
+                               <groupId>org.apache.maven.plugins</groupId>
+                               <artifactId>maven-failsafe-plugin</artifactId>
+                               <version>2.12.3</version>
+                               <configuration>
+                                       <skipTests>${skipITs}</skipTests>
+                                       <systemPropertyVariables>
+                                               
<crawlerWarPath>../dependency/mcf-crawler-ui.war</crawlerWarPath>
+                                               
<authorityserviceWarPath>../dependency/mcf-authority-service.war</authorityserviceWarPath>
+                                               
<apiWarPath>../dependency/mcf-api-service.war</apiWarPath>
+                                       </systemPropertyVariables>
+                                       <excludes>
+                                               
<exclude>**/*Postgresql*.java</exclude>
+                                               
<exclude>**/*MySQL*.java</exclude>
+                                       </excludes>
+                                       <forkMode>always</forkMode>
+                                       
<workingDirectory>target/test-output</workingDirectory>
+                               </configuration>
+                               <executions>
+                                       <execution>
+                                               <id>integration-test</id>
+                                               <goals>
+                                                       
<goal>integration-test</goal>
+                                               </goals>
+                                       </execution>
+                                       <execution>
+                                               <id>verify</id>
+                                               <goals>
+                                                       <goal>verify</goal>
+                                               </goals>
+                                       </execution>
+                               </executions>
+                       </plugin>
+
+               </plugins>
+
+               <pluginManagement>
+                       <plugins>
+                               <!--This plugin's configuration is used to 
store Eclipse m2e settings 
+                                       only. It has no influence on the Maven 
build itself. -->
+                               <plugin>
+                                       <groupId>org.eclipse.m2e</groupId>
+                                       
<artifactId>lifecycle-mapping</artifactId>
+                                       <version>1.0.0</version>
+                                       <configuration>
+                                               <lifecycleMappingMetadata>
+                                                       <pluginExecutions>
+                                                               
<pluginExecution>
+                                                                       
<pluginExecutionFilter>
+                                                                               
<groupId>
+                                                                               
        org.apache.maven.plugins
+                                                                               
</groupId>
+                                                                               
<artifactId>
+                                                                               
        maven-dependency-plugin
+                                                                               
</artifactId>
+                                                                               
<versionRange>[2.1,)</versionRange>
+                                                                               
<goals>
+                                                                               
        <goal>copy</goal>
+                                                                               
</goals>
+                                                                       
</pluginExecutionFilter>
+                                                                       <action>
+                                                                               
<ignore></ignore>
+                                                                       
</action>
+                                                               
</pluginExecution>
+                                                               
<pluginExecution>
+                                                                       
<pluginExecutionFilter>
+                                                                               
<groupId>org.codehaus.mojo</groupId>
+                                                                               
<artifactId>
+                                                                               
        native2ascii-maven-plugin
+                                                                               
</artifactId>
+                                                                               
<versionRange>
+                                                                               
        [1.0-beta-1,)
+                                                                               
</versionRange>
+                                                                               
<goals>
+                                                                               
        <goal>native2ascii</goal>
+                                                                               
</goals>
+                                                                       
</pluginExecutionFilter>
+                                                                       <action>
+                                                                               
<ignore></ignore>
+                                                                       
</action>
+                                                               
</pluginExecution>
+                                                               
<pluginExecution>
+                                                                       
<pluginExecutionFilter>
+                                                                               
<groupId>
+                                                                               
        org.apache.maven.plugins
+                                                                               
</groupId>
+                                                                               
<artifactId>
+                                                                               
        maven-remote-resources-plugin
+                                                                               
</artifactId>
+                                                                               
<versionRange>[1.1,)</versionRange>
+                                                                               
<goals>
+                                                                               
        <goal>process</goal>
+                                                                               
</goals>
+                                                                       
</pluginExecutionFilter>
+                                                                       <action>
+                                                                               
<ignore></ignore>
+                                                                       
</action>
+                                                               
</pluginExecution>
+                                                       </pluginExecutions>
+                                               </lifecycleMappingMetadata>
+                                       </configuration>
+                               </plugin>
+                       </plugins>
+               </pluginManagement>
+       </build>
+
+       <dependencies>
+               <dependency>
+                       <groupId>${project.groupId}</groupId>
+                       <artifactId>mcf-core</artifactId>
+                       <version>${project.version}</version>
+               </dependency>
+               <dependency>
+                       <groupId>${project.groupId}</groupId>
+                       <artifactId>mcf-connector-common</artifactId>
+                       <version>${project.version}</version>
+               </dependency>
+               <dependency>
+                       <groupId>${project.groupId}</groupId>
+                       <artifactId>mcf-agents</artifactId>
+                       <version>${project.version}</version>
+               </dependency>
+               <dependency>
+                       <groupId>${project.groupId}</groupId>
+                       <artifactId>mcf-ui-core</artifactId>
+                       <version>${project.version}</version>
+               </dependency>
+
+               <dependency>
+                       <groupId>org.apache.opennlp</groupId>
+                       <artifactId>opennlp-tools</artifactId>
+                       <version>1.6.0</version>
+               </dependency>
+               
+       </dependencies>
+
+</project>
\ No newline at end of file

Modified: manifoldcf/branches/CONNECTORS-1270/connectors/pom.xml
URL: 
http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1270/connectors/pom.xml?rev=1726831&r1=1726830&r2=1726831&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1270/connectors/pom.xml (original)
+++ manifoldcf/branches/CONNECTORS-1270/connectors/pom.xml Tue Jan 26 15:47:12 
2016
@@ -68,6 +68,7 @@
     <module>confluence</module>
     <module>amazons3</module>
     <module>kafka</module>
+    <module>opennlp</module>
   </modules>
 
 </project>


Reply via email to