Author: kwright
Date: Fri Oct 6 07:24:43 2023
New Revision: 1912766
URL: http://svn.apache.org/viewvc?rev=1912766&view=rev
Log:
Add standalone CSV connector, thanks to Guylaine Bassett
Added:
manifoldcf/trunk/connectors/csv/
manifoldcf/trunk/connectors/csv/.gitignore
manifoldcf/trunk/connectors/csv/build.xml
manifoldcf/trunk/connectors/csv/connector/
manifoldcf/trunk/connectors/csv/connector/src/
manifoldcf/trunk/connectors/csv/connector/src/main/
manifoldcf/trunk/connectors/csv/connector/src/main/java/
manifoldcf/trunk/connectors/csv/connector/src/main/java/org/
manifoldcf/trunk/connectors/csv/connector/src/main/java/org/apache/
manifoldcf/trunk/connectors/csv/connector/src/main/java/org/apache/manifoldcf/
manifoldcf/trunk/connectors/csv/connector/src/main/java/org/apache/manifoldcf/crawler/
manifoldcf/trunk/connectors/csv/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/
manifoldcf/trunk/connectors/csv/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/csv/
manifoldcf/trunk/connectors/csv/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/csv/CSVConfig.java
manifoldcf/trunk/connectors/csv/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/csv/CSVConnector.java
manifoldcf/trunk/connectors/csv/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/csv/CSVUtils.java
manifoldcf/trunk/connectors/csv/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/csv/Messages.java
manifoldcf/trunk/connectors/csv/connector/src/main/native2ascii/
manifoldcf/trunk/connectors/csv/connector/src/main/native2ascii/org/
manifoldcf/trunk/connectors/csv/connector/src/main/native2ascii/org/apache/
manifoldcf/trunk/connectors/csv/connector/src/main/native2ascii/org/apache/manifoldcf/
manifoldcf/trunk/connectors/csv/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/
manifoldcf/trunk/connectors/csv/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/
manifoldcf/trunk/connectors/csv/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/csv/
manifoldcf/trunk/connectors/csv/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/csv/common_en_US.properties
manifoldcf/trunk/connectors/csv/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/csv/common_es_ES.properties
manifoldcf/trunk/connectors/csv/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/csv/common_fr_FR.properties
manifoldcf/trunk/connectors/csv/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/csv/common_ja_JP.properties
manifoldcf/trunk/connectors/csv/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/csv/common_zh_CN.properties
manifoldcf/trunk/connectors/csv/connector/src/main/resources/
manifoldcf/trunk/connectors/csv/connector/src/main/resources/org/
manifoldcf/trunk/connectors/csv/connector/src/main/resources/org/apache/
manifoldcf/trunk/connectors/csv/connector/src/main/resources/org/apache/manifoldcf/
manifoldcf/trunk/connectors/csv/connector/src/main/resources/org/apache/manifoldcf/crawler/
manifoldcf/trunk/connectors/csv/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/
manifoldcf/trunk/connectors/csv/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/csv/
manifoldcf/trunk/connectors/csv/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/csv/editSpecification.js
manifoldcf/trunk/connectors/csv/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/csv/editSpecification_CSV.html
manifoldcf/trunk/connectors/csv/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/csv/viewSpecification_CSV.html
manifoldcf/trunk/connectors/csv/pom.xml
Modified:
manifoldcf/trunk/CHANGES.txt
manifoldcf/trunk/connectors/pom.xml
Modified: manifoldcf/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/manifoldcf/trunk/CHANGES.txt?rev=1912766&r1=1912765&r2=1912766&view=diff
==============================================================================
--- manifoldcf/trunk/CHANGES.txt (original)
+++ manifoldcf/trunk/CHANGES.txt Fri Oct 6 07:24:43 2023
@@ -3,6 +3,10 @@ $Id$
======================= 2.26-dev =====================
+pr-152: Add new CSV-only connector. This is in addition to
+existing CSV support in the JDBC connector, which is now deprecated.
+(Guylaine Bassett)
+
pr-149: Add Webconnector feature:
The "Force the inclusion of redirectionâ options allows you to include hosts
redirected from original seeds. You might want to use this option if the site
you are crawling is subject to redirections. Note that it is not required if
the previous option is not checked. Here are the possible behaviors:
Added: manifoldcf/trunk/connectors/csv/.gitignore
URL:
http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/csv/.gitignore?rev=1912766&view=auto
==============================================================================
--- manifoldcf/trunk/connectors/csv/.gitignore (added)
+++ manifoldcf/trunk/connectors/csv/.gitignore Fri Oct 6 07:24:43 2023
@@ -0,0 +1,4 @@
+/.project
+/.settings/
+/target/
+/.classpath
Added: manifoldcf/trunk/connectors/csv/build.xml
URL:
http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/csv/build.xml?rev=1912766&view=auto
==============================================================================
--- manifoldcf/trunk/connectors/csv/build.xml (added)
+++ manifoldcf/trunk/connectors/csv/build.xml Fri Oct 6 07:24:43 2023
@@ -0,0 +1,40 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements. See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+
+<project name="csv" default="all">
+
+ <property environment="env"/>
+ <condition property="mcf-dist" value="${env.MCFDISTPATH}">
+ <isset property="env.MCFDISTPATH"/>
+ </condition>
+ <property name="abs-dist" location="../../dist"/>
+ <condition property="mcf-dist" value="${abs-dist}">
+ <not>
+ <isset property="env.MCFDISTPATH"/>
+ </not>
+ </condition>
+
+ <import file="${mcf-dist}/connector-build.xml"/>
+
+ <target name="deliver-connector"
depends="mcf-connector-build.deliver-connector">
+ <antcall target="general-add-repository-connector">
+ <param name="connector-label" value="CSV"/>
+ <param name="connector-class"
value="org.apache.manifoldcf.crawler.connectors.csv.CSVConnector"/>
+ </antcall>
+ </target>
+
+</project>
\ No newline at end of file
Added:
manifoldcf/trunk/connectors/csv/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/csv/CSVConfig.java
URL:
http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/csv/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/csv/CSVConfig.java?rev=1912766&view=auto
==============================================================================
---
manifoldcf/trunk/connectors/csv/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/csv/CSVConfig.java
(added)
+++
manifoldcf/trunk/connectors/csv/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/csv/CSVConfig.java
Fri Oct 6 07:24:43 2023
@@ -0,0 +1,29 @@
+/* $Id: Messages.java 1295926 2012-03-01 21:56:27Z kwright $ */
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to You under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+* http://www.apache.org/licenses/LICENSE-2.0
+ *
+* Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
under
+ * the License.
+ */
+package org.apache.manifoldcf.crawler.connectors.csv;
+
+public class CSVConfig {
+
+ // Specification nodes and values
+ public static final String NODE_FILEPATH = "filepath";
+ public static final String NODE_CONTENT_COLUMN = "contentcolumn";
+ public static final String NODE_ID_COLUMN = "idcolumn";
+ public static final String NODE_SEPARATOR = "separator";
+ public static final String ATTRIBUTE_VALUE = "value";
+
+}
Added:
manifoldcf/trunk/connectors/csv/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/csv/CSVConnector.java
URL:
http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/csv/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/csv/CSVConnector.java?rev=1912766&view=auto
==============================================================================
---
manifoldcf/trunk/connectors/csv/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/csv/CSVConnector.java
(added)
+++
manifoldcf/trunk/connectors/csv/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/csv/CSVConnector.java
Fri Oct 6 07:24:43 2023
@@ -0,0 +1,521 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to You under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
under
+ * the License.
+ */
+package org.apache.manifoldcf.crawler.connectors.csv;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.logging.log4j.Level;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.apache.manifoldcf.agents.interfaces.RepositoryDocument;
+import org.apache.manifoldcf.agents.interfaces.ServiceInterruption;
+import org.apache.manifoldcf.core.interfaces.*;
+import org.apache.manifoldcf.crawler.connectors.BaseRepositoryConnector;
+import org.apache.manifoldcf.crawler.interfaces.IExistingVersions;
+import org.apache.manifoldcf.crawler.interfaces.IProcessActivity;
+import org.apache.manifoldcf.crawler.interfaces.ISeedingActivity;
+
+import java.io.*;
+import java.util.*;
+
+public class CSVConnector extends BaseRepositoryConnector {
+
+ private static final Logger LOGGER =
LogManager.getLogger(CSVConnector.class.getName());
+ private static Level DOCPROCESSLEVEL = Level.forName("DOCPROCESS", 450);
+ private static final String EDIT_SPECIFICATION_JS = "editSpecification.js";
+ private static final String EDIT_SPECIFICATION_CSV_HTML =
"editSpecification_CSV.html";
+ private static final String VIEW_SPECIFICATION_CSV_HTML =
"viewSpecification_CSV.html";
+
+ protected final static String ACTIVITY_READ = "read";
+ private static final String DOCUMENT_ID_SEPARATOR = ";;";
+
+ /**
+ * Constructor.
+ */
+ public CSVConnector() {
+ }
+
+ @Override
+ public int getMaxDocumentRequest() {
+ return 20;
+ }
+
+ @Override
+ public int getConnectorModel() {
+ return CSVConnector.MODEL_ADD_CHANGE_DELETE;
+ }
+
+ @Override
+ public String[] getActivitiesList() {
+ return new String[] { ACTIVITY_READ };
+ }
+
+ /**
+ * For any given document, list the bins that it is a member of.
+ */
+ @Override
+ public String[] getBinNames(final String documentIdentifier) {
+ // Return the host name
+ return new String[] { "CSV" };
+ }
+
+ // All methods below this line will ONLY be called if a connect() call
succeeded
+ // on this instance!
+ /**
+ * Connect. The configuration parameters are included.
+ *
+ * @param configParams are the configuration parameters for this connection.
Note well: There are no exceptions allowed from this call, since it is expected
to mainly establish connection
+ * parameters.
+ */
+ @Override
+ public void connect(final ConfigParams configParams) {
+ super.connect(configParams);
+
+ }
+
+ @Override
+ public void disconnect() throws ManifoldCFException {
+ super.disconnect();
+ }
+
+ @Override
+ public String check() throws ManifoldCFException {
+ return super.check();
+ }
+
+ @Override
+ public String addSeedDocuments(final ISeedingActivity activities, final
Specification spec, final String lastSeedVersion, final long seedTime, final
int jobMode)
+ throws ManifoldCFException, ServiceInterruption {
+
+ long startTime;
+ if (lastSeedVersion == null) {
+ startTime = 0L;
+ } else {
+ // Unpack seed time from seed version string
+ startTime = Long.parseLong(lastSeedVersion);
+ }
+
+ final CSVSpecs csvSpecs = new CSVSpecs(spec);
+ final Map<String, String[]> csvMap = csvSpecs.getCSVMap();
+ for (final String csvPath : csvMap.keySet()) {
+ try {
+ final long numberOfLines = CSVUtils.getCSVLinesNumber(csvPath);
+ for (long i = 1L; i < numberOfLines; i++) {
+ final String documentId = getDocumentIdentifier(i, csvPath);
+ activities.addSeedDocument(documentId);
+ }
+ } catch (final IOException e) {
+ throw new ManifoldCFException("Could not read CSV file " + csvPath + "
: " + e.getMessage(), e);
+ }
+ }
+
+ return String.valueOf(seedTime);
+
+ }
+
+ @Override
+ public void processDocuments(final String[] documentIdentifiers, final
IExistingVersions statuses, final Specification spec, final IProcessActivity
activities, final int jobMode,
+ final boolean usesDefaultAuthority) throws ManifoldCFException,
ServiceInterruption {
+
+ // Check if we should abort
+ activities.checkJobStillActive();
+
+ final CSVSpecs csvSpecs = new CSVSpecs(spec);
+
+ final long startFetchTime = System.currentTimeMillis();
+
+ final Map<String, List<Long>> linesToReadPerDoc = new HashMap<>();
+
+ for (final String documentIdentifier : documentIdentifiers) {
+ LOGGER.log(DOCPROCESSLEVEL, "DOC_PROCESS_START|CSV|" +
documentIdentifier);
+ final String[] documentIdentifierArr =
documentIdentifier.split(DOCUMENT_ID_SEPARATOR);
+ final String lineToRead = documentIdentifierArr[0];
+ final String docPath = documentIdentifierArr[1];
+ if (linesToReadPerDoc.containsKey(docPath)) {
+ linesToReadPerDoc.get(docPath).add(Long.parseLong(lineToRead));
+ } else {
+ final List<Long> linesToRead = new ArrayList<>();
+ linesToRead.add(Long.parseLong(lineToRead));
+ linesToReadPerDoc.put(docPath, linesToRead);
+ }
+ }
+
+ for (final String docPath : linesToReadPerDoc.keySet()) {
+ final String[] docLabels = csvSpecs.CSVMap.get(docPath);
+
+ if (docLabels == null){
+ for (final Long lineToRead : linesToReadPerDoc.get(docPath)){
+ String documentIdentifier = getDocumentIdentifier(lineToRead,
docPath);
+ activities.deleteDocument(documentIdentifier);
+ }
+ } else {
+ try {
+ processToIngestDocument(csvSpecs ,docPath, linesToReadPerDoc,
activities, startFetchTime);
+ } catch (final IOException e) {
+ String errorCode = "KO";
+ String description = "Unable to read file " + docPath + " : " +
e.getMessage();
+ // Rebuild documentIdentifier (MCF id)
+ final String documentIdentifier =
getDocumentIdentifier(linesToReadPerDoc.get(docPath).get(0), docPath);
+ activities.recordActivity(startFetchTime, ACTIVITY_READ, 0L,
documentIdentifier, errorCode, description, null);
+ LOGGER.error(description, e);
+ }
+ }
+ }
+
+ }
+
+ private String getDocumentIdentifier(long lineToRead, String docPath) {
+ return lineToRead + DOCUMENT_ID_SEPARATOR + docPath;
+ }
+
+ private void processToIngestDocument(final CSVSpecs csvSpecs, final String
docPath, final Map<String,List<Long>> linesToReadPerDoc, final IProcessActivity
activities, final long startFetchTime)
+ throws ManifoldCFException, IOException, ServiceInterruption {
+ final String[] docLabels = csvSpecs.CSVMap.get(docPath);
+ final Long[] linesToRead = linesToReadPerDoc.get(docPath).toArray(new
Long[0]);
+ // Sort lines to read so we can sequentially read the file
+ Arrays.sort(linesToRead);
+ final File csvFile = new File(docPath);
+ long cptLine = 0;
+ // init cptLinesToRead
+ int cptLinesToRead = 0;
+
+ final String versionString = "";
+
+ // init lineToRead
+ long lineToRead = linesToRead[0];
+
+ try (FileReader fr = new FileReader(csvFile); BufferedReader br = new
BufferedReader(fr);) {
+ String line = null;
+ while ((line = br.readLine()) != null) {
+ if (cptLine < lineToRead) {
+ cptLine++;
+ continue;
+ }
+
+ // Rebuild documentIdentifier (MCF id)
+ final String documentIdentifier = getDocumentIdentifier(lineToRead,
docPath);
+ String ingestId = String.valueOf(lineToRead);
+ final RepositoryDocument rd = new RepositoryDocument();
+ byte[] contentBytes = null;
+ final String[] values = line.split(csvSpecs.getSeparator());
+ for (int i = 0; i < values.length; i++) {
+ final String value = values[i];
+ final String label = docLabels[i];
+ if (label.contentEquals(csvSpecs.getContentColumnLabel())) {
+ contentBytes = value.getBytes();
+ } else {
+ if (label.contentEquals(csvSpecs.getIdColumnLabel())) {
+ ingestId = value;
+ }
+ rd.addField(label, value);
+ }
+ }
+
+ if (versionString.length() == 0 ||
activities.checkDocumentNeedsReindexing(documentIdentifier, versionString)) {
+ // Ingest document
+ try (ByteArrayInputStream inputStream = new
ByteArrayInputStream(contentBytes)) {
+ rd.setBinary(inputStream, contentBytes.length);
+ activities.ingestDocumentWithException(documentIdentifier,
versionString, ingestId, rd);
+ String errorCode = "OK";
+ activities.recordActivity(startFetchTime, ACTIVITY_READ, (long)
contentBytes.length, documentIdentifier, errorCode, StringUtils.EMPTY, null);
+ } finally {
+ LOGGER.log(DOCPROCESSLEVEL, "DOC_PROCESS_END|CSV|" +
documentIdentifier);
+ }
+ }
+
+ // We just read a line to read, so search for the next line to read
+ cptLinesToRead++;
+ // If there is still line to read, then set lineToRead with the new
value, otherwise we read all wanted lines so we can close the stream;
+ if (cptLinesToRead < linesToRead.length) {
+ lineToRead = linesToRead[cptLinesToRead];
+ } else {
+ // We have read all the linesToRead so we can stop reading the stream
+ break;
+ }
+ cptLine++;
+ }
+ }
+
+ }
+
+ @Override
+ public String processSpecificationPost(final IPostParameters
variableContext, final Locale locale, final Specification os, final int
connectionSequenceNumber) throws ManifoldCFException {
+ final String seqPrefix = "s" + connectionSequenceNumber + "_";
+
+ String x;
+
+ x = variableContext.getParameter(seqPrefix + "filepath_count");
+ if (x != null && x.length() > 0) {
+ // About to gather the filepath nodes, so get rid of the old ones.
+ int i = 0;
+ while (i < os.getChildCount()) {
+ final SpecificationNode node = os.getChild(i);
+ if (node.getType().equals(CSVConfig.NODE_FILEPATH)) {
+ os.removeChild(i);
+ } else {
+ i++;
+ }
+ }
+ final int count = Integer.parseInt(x);
+ i = 0;
+ while (i < count) {
+ final String prefix = seqPrefix + "filepath_";
+ final String suffix = "_" + Integer.toString(i);
+ final String op = variableContext.getParameter(prefix + "op" + suffix);
+ if (op == null || !op.equals("Delete")) {
+ // Gather the includefilters etc.
+ final String value = variableContext.getParameter(prefix +
CSVConfig.ATTRIBUTE_VALUE + suffix);
+ final SpecificationNode node = new
SpecificationNode(CSVConfig.NODE_FILEPATH);
+ node.setAttribute(CSVConfig.ATTRIBUTE_VALUE, value);
+ os.addChild(os.getChildCount(), node);
+ }
+ i++;
+ }
+
+ final String addop = variableContext.getParameter(seqPrefix +
"filepath_op");
+ if (addop != null && addop.equals("Add")) {
+ final String regex = variableContext.getParameter(seqPrefix +
"filepath_value");
+ final SpecificationNode node = new
SpecificationNode(CSVConfig.NODE_FILEPATH);
+ node.setAttribute(CSVConfig.ATTRIBUTE_VALUE, regex);
+ os.addChild(os.getChildCount(), node);
+ }
+ }
+
+ x = variableContext.getParameter(seqPrefix + CSVConfig.NODE_ID_COLUMN);
+ if (x != null) {
+ // Delete id column entry
+ int i = 0;
+ while (i < os.getChildCount()) {
+ final SpecificationNode sn = os.getChild(i);
+ if (sn.getType().equals(CSVConfig.NODE_ID_COLUMN)) {
+ os.removeChild(i);
+ } else {
+ i++;
+ }
+ }
+ if (x.length() > 0) {
+ final SpecificationNode node = new
SpecificationNode(CSVConfig.NODE_ID_COLUMN);
+ node.setAttribute(CSVConfig.ATTRIBUTE_VALUE, x);
+ os.addChild(os.getChildCount(), node);
+ }
+ }
+
+ x = variableContext.getParameter(seqPrefix +
CSVConfig.NODE_CONTENT_COLUMN);
+ if (x != null) {
+ // Delete content column entry
+ int i = 0;
+ while (i < os.getChildCount()) {
+ final SpecificationNode sn = os.getChild(i);
+ if (sn.getType().equals(CSVConfig.NODE_CONTENT_COLUMN)) {
+ os.removeChild(i);
+ } else {
+ i++;
+ }
+ }
+ if (x.length() > 0) {
+ final SpecificationNode node = new
SpecificationNode(CSVConfig.NODE_CONTENT_COLUMN);
+ node.setAttribute(CSVConfig.ATTRIBUTE_VALUE, x);
+ os.addChild(os.getChildCount(), node);
+ }
+ }
+
+ x = variableContext.getParameter(seqPrefix + CSVConfig.NODE_SEPARATOR);
+ if (x != null) {
+ // Delete separator entry
+ int i = 0;
+ while (i < os.getChildCount()) {
+ final SpecificationNode sn = os.getChild(i);
+ if (sn.getType().equals(CSVConfig.NODE_SEPARATOR)) {
+ os.removeChild(i);
+ } else {
+ i++;
+ }
+ }
+ if (x.length() > 0) {
+ final SpecificationNode node = new
SpecificationNode(CSVConfig.NODE_SEPARATOR);
+ node.setAttribute(CSVConfig.ATTRIBUTE_VALUE, x);
+ os.addChild(os.getChildCount(), node);
+ }
+ }
+
+ return null;
+ }
+
+ /**
+ * Output the specification header section. This method is called in the
head section of a job page which has selected a pipeline connection of the
current type. Its purpose is to add the required
+ * tabs to the list, and to output any javascript methods that might be
needed by the job editing HTML.
+ *
+ * @param out is the output to which any HTML should be
sent.
+ * @param locale is the preferred local of the output.
+ * @param os is the current pipeline specification for
this connection.
+ * @param connectionSequenceNumber is the unique number of this connection
within the job.
+ * @param tabsArray is an array of tab names. Add to this
array any tab names that are specific to the connector.
+ */
+ @Override
+ public void outputSpecificationHeader(final IHTTPOutput out, final Locale
locale, final Specification os, final int connectionSequenceNumber, final
List<String> tabsArray)
+ throws ManifoldCFException, IOException {
+ final Map<String, Object> paramMap = new HashMap<>();
+ paramMap.put("SEQNUM", Integer.toString(connectionSequenceNumber));
+
+ tabsArray.add(Messages.getString(locale, "CSV.CSVTabName"));
+
+ Messages.outputResourceWithVelocity(out, locale, EDIT_SPECIFICATION_JS,
paramMap);
+ }
+
+ /**
+ * Output the specification body section. This method is called in the body
section of a job page which has selected a pipeline connection of the current
type. Its purpose is to present the required
+ * form elements for editing. The coder can presume that the HTML that is
output from this configuration will be within appropriate <html>, <body>, and
<form> tags. The name of the form is
+ * "editjob".
+ *
+ * @param out is the output to which any HTML should be
sent.
+ * @param locale is the preferred local of the output.
+ * @param os is the current pipeline specification for
this job.
+ * @param connectionSequenceNumber is the unique number of this connection
within the job.
+ * @param actualSequenceNumber is the connection within the job that has
currently been selected.
+ * @param tabName is the current tab name.
+ */
+ @Override
+ public void outputSpecificationBody(final IHTTPOutput out, final Locale
locale, final Specification os, final int connectionSequenceNumber, final int
actualSequenceNumber, final String tabName)
+ throws ManifoldCFException, IOException {
+ final Map<String, Object> paramMap = new HashMap<>();
+
+ // Set the tab name
+ paramMap.put("TABNAME", tabName);
+ paramMap.put("SEQNUM", Integer.toString(connectionSequenceNumber));
+ paramMap.put("SELECTEDNUM", Integer.toString(actualSequenceNumber));
+
+ // Fill in the field mapping tab data
+ fillInCSVSpecificationMap(paramMap, os);
+ // fillInSecuritySpecificationMap(paramMap, os);
+
+ Messages.outputResourceWithVelocity(out, locale,
EDIT_SPECIFICATION_CSV_HTML, paramMap);
+ }
+
+ /**
+ * View specification. This method is called in the body section of a job's
view page. Its purpose is to present the pipeline specification information to
the user. The coder can presume that the
+ * HTML that is output from this configuration will be within appropriate
<html> and <body> tags.
+ *
+ * @param out is the output to which any HTML should be
sent.
+ * @param locale is the preferred local of the output.
+ * @param connectionSequenceNumber is the unique number of this connection
within the job.
+ * @param os is the current pipeline specification for
this job.
+ */
+
+ @Override
+ public void viewSpecification(final IHTTPOutput out, final Locale locale,
final Specification os, final int connectionSequenceNumber) throws
ManifoldCFException, IOException {
+ final Map<String, Object> paramMap = new HashMap<>();
+ paramMap.put("SEQNUM", Integer.toString(connectionSequenceNumber));
+
+ // Fill in the map with data from all tabs
+ fillInCSVSpecificationMap(paramMap, os);
+
+ Messages.outputResourceWithVelocity(out, locale,
VIEW_SPECIFICATION_CSV_HTML, paramMap);
+
+ }
+
+ private void fillInCSVSpecificationMap(final Map<String, Object> paramMap,
final Specification os) {
+
+ final List<String> filePaths = new ArrayList<>();
+ String contentColumn = "content";
+ String idColumn = "id";
+ String separator = ",";
+
+ for (int i = 0; i < os.getChildCount(); i++) {
+ final SpecificationNode sn = os.getChild(i);
+
+ if (sn.getType().equals(CSVConfig.NODE_FILEPATH)) {
+ final String includeFileFilter =
sn.getAttributeValue(CSVConfig.ATTRIBUTE_VALUE);
+ if (includeFileFilter != null) {
+ filePaths.add(includeFileFilter);
+ }
+ } else if (sn.getType().equals(CSVConfig.NODE_ID_COLUMN)) {
+ if (sn.getAttributeValue(CSVConfig.ATTRIBUTE_VALUE) != null) {
+ idColumn = sn.getAttributeValue(CSVConfig.ATTRIBUTE_VALUE);
+ }
+ } else if (sn.getType().equals(CSVConfig.NODE_CONTENT_COLUMN)) {
+ if (sn.getAttributeValue(CSVConfig.ATTRIBUTE_VALUE) != null) {
+ contentColumn = sn.getAttributeValue(CSVConfig.ATTRIBUTE_VALUE);
+ }
+ } else if (sn.getType().equals(CSVConfig.NODE_SEPARATOR)) {
+ if (sn.getAttributeValue(CSVConfig.ATTRIBUTE_VALUE) != null) {
+ separator = sn.getAttributeValue(CSVConfig.ATTRIBUTE_VALUE);
+ }
+ }
+ }
+
+ paramMap.put("FILEPATHS", filePaths);
+ paramMap.put("CONTENTCOLUMN", contentColumn);
+ paramMap.put("IDCOLUMN", idColumn);
+ paramMap.put("SEPARATOR", separator);
+ }
+
+ private static class CSVSpecs {
+
+ private final Map<String, String[]> CSVMap = new HashMap<>();
+ private String contentColumnLabel = "content";
+ private final String idColumnLabel = "id";
+ private String separator;
+
+ public CSVSpecs(final Specification os) {
+
+ final List<String> csvFiles = new ArrayList<>();
+
+ for (int i = 0; i < os.getChildCount(); i++) {
+ final SpecificationNode sn = os.getChild(i);
+
+ if (sn.getType().equals(CSVConfig.NODE_FILEPATH)) {
+ final String value = sn.getAttributeValue(CSVConfig.ATTRIBUTE_VALUE);
+ csvFiles.add(value);
+ } else if (sn.getType().equals(CSVConfig.NODE_CONTENT_COLUMN)) {
+ contentColumnLabel = sn.getAttributeValue(CSVConfig.ATTRIBUTE_VALUE);
+ } else if (sn.getType().equals(CSVConfig.NODE_SEPARATOR)) {
+ separator = sn.getAttributeValue(CSVConfig.ATTRIBUTE_VALUE);
+ } else if (sn.getType().equals(CSVConfig.NODE_ID_COLUMN)) {
+ separator = sn.getAttributeValue(CSVConfig.ATTRIBUTE_VALUE);
+ }
+
+ }
+
+ for (final String csvFilePath : csvFiles) {
+ try {
+ final String[] columnsLabel = CSVUtils.getColumnsLabel(csvFilePath,
separator);
+ CSVMap.put(csvFilePath, columnsLabel);
+ } catch (final IOException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+ }
+
+ }
+
+ public Map<String, String[]> getCSVMap() {
+ return CSVMap;
+ }
+
+ public String getContentColumnLabel() {
+ return contentColumnLabel;
+ }
+
+ public String getIdColumnLabel() {
+ return idColumnLabel;
+ }
+
+ public String getSeparator() {
+ return separator;
+ }
+
+ }
+}
Added:
manifoldcf/trunk/connectors/csv/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/csv/CSVUtils.java
URL:
http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/csv/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/csv/CSVUtils.java?rev=1912766&view=auto
==============================================================================
---
manifoldcf/trunk/connectors/csv/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/csv/CSVUtils.java
(added)
+++
manifoldcf/trunk/connectors/csv/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/csv/CSVUtils.java
Fri Oct 6 07:24:43 2023
@@ -0,0 +1,36 @@
+package org.apache.manifoldcf.crawler.connectors.csv;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.util.stream.Stream;
+
+public class CSVUtils {
+
+ public static String[] getColumnsLabel(final String csvFilePath, final
String separator) throws FileNotFoundException, IOException {
+ final File csvFile = new File(csvFilePath);
+ if (csvFile.exists() && csvFile.canRead()) {
+ try (FileReader fr = new FileReader(csvFile); BufferedReader br = new
BufferedReader(fr);) {
+ final String firstLine = br.readLine();
+ final String[] columnsLabel = firstLine.split(separator);
+ return columnsLabel;
+ }
+ } else {
+ throw new IOException("Cannot read file");
+ }
+ }
+
+ public static long getCSVLinesNumber(final String csvFilePath) throws
IOException {
+ final File csvFile = new File(csvFilePath);
+ long numberOfLines;
+ try (Stream<String> lines = Files.lines(csvFile.toPath(),
StandardCharsets.UTF_8);) {
+ numberOfLines = lines.count();
+ }
+ return numberOfLines;
+ }
+
+}
Added:
manifoldcf/trunk/connectors/csv/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/csv/Messages.java
URL:
http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/csv/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/csv/Messages.java?rev=1912766&view=auto
==============================================================================
---
manifoldcf/trunk/connectors/csv/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/csv/Messages.java
(added)
+++
manifoldcf/trunk/connectors/csv/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/csv/Messages.java
Fri Oct 6 07:24:43 2023
@@ -0,0 +1,114 @@
+/* $Id: Messages.java 1295926 2012-03-01 21:56:27Z kwright $ */
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to You under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+* http://www.apache.org/licenses/LICENSE-2.0
+ *
+* Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
under
+ * the License.
+ */
+package org.apache.manifoldcf.crawler.connectors.csv;
+
+import java.util.Locale;
+import java.util.Map;
+
+import org.apache.manifoldcf.core.interfaces.IHTTPOutput;
+import org.apache.manifoldcf.core.interfaces.ManifoldCFException;
+
+public class Messages extends org.apache.manifoldcf.ui.i18n.Messages {
+
+ public static final String DEFAULT_BUNDLE_NAME =
"org.apache.manifoldcf.crawler.connectors.csv.common";
+
+ public static final String DEFAULT_PATH_NAME =
"org.apache.manifoldcf.crawler.connectors.csv";
+
+ /**
+ * Constructor - do no instantiate
+ */
+ protected Messages() {
+ }
+
+ public static String getString(final Locale locale, final String messageKey)
{
+ return getString(DEFAULT_BUNDLE_NAME, locale, messageKey, null);
+ }
+
+ public static String getAttributeString(final Locale locale, final String
messageKey) {
+ return getAttributeString(DEFAULT_BUNDLE_NAME, locale, messageKey, null);
+ }
+
+ public static String getBodyString(final Locale locale, final String
messageKey) {
+ return getBodyString(DEFAULT_BUNDLE_NAME, locale, messageKey, null);
+ }
+
+ public static String getAttributeJavascriptString(final Locale locale, final
String messageKey) {
+ return getAttributeJavascriptString(DEFAULT_BUNDLE_NAME, locale,
messageKey, null);
+ }
+
+ public static String getBodyJavascriptString(final Locale locale, final
String messageKey) {
+ return getBodyJavascriptString(DEFAULT_BUNDLE_NAME, locale, messageKey,
null);
+ }
+
+ public static String getString(final Locale locale, final String messageKey,
final Object[] args) {
+ return getString(DEFAULT_BUNDLE_NAME, locale, messageKey, args);
+ }
+
+ public static String getAttributeString(final Locale locale, final String
messageKey, final Object[] args) {
+ return getAttributeString(DEFAULT_BUNDLE_NAME, locale, messageKey, args);
+ }
+
+ public static String getBodyString(final Locale locale, final String
messageKey, final Object[] args) {
+ return getBodyString(DEFAULT_BUNDLE_NAME, locale, messageKey, args);
+ }
+
+ public static String getAttributeJavascriptString(final Locale locale, final
String messageKey, final Object[] args) {
+ return getAttributeJavascriptString(DEFAULT_BUNDLE_NAME, locale,
messageKey, args);
+ }
+
+ public static String getBodyJavascriptString(final Locale locale, final
String messageKey, final Object[] args) {
+ return getBodyJavascriptString(DEFAULT_BUNDLE_NAME, locale, messageKey,
args);
+ }
+
+ // More general methods which allow bundlenames and class loaders to be
specified.
+ public static String getString(final String bundleName, final Locale locale,
final String messageKey, final Object[] args) {
+ return getString(Messages.class, bundleName, locale, messageKey, args);
+ }
+
+ public static String getAttributeString(final String bundleName, final
Locale locale, final String messageKey, final Object[] args) {
+ return getAttributeString(Messages.class, bundleName, locale, messageKey,
args);
+ }
+
+ public static String getBodyString(final String bundleName, final Locale
locale, final String messageKey, final Object[] args) {
+ return getBodyString(Messages.class, bundleName, locale, messageKey, args);
+ }
+
+ public static String getAttributeJavascriptString(final String bundleName,
final Locale locale, final String messageKey, final Object[] args) {
+ return getAttributeJavascriptString(Messages.class, bundleName, locale,
messageKey, args);
+ }
+
+ public static String getBodyJavascriptString(final String bundleName, final
Locale locale, final String messageKey, final Object[] args) {
+ return getBodyJavascriptString(Messages.class, bundleName, locale,
messageKey, args);
+ }
+
+ // Resource output
+ public static void outputResource(final IHTTPOutput output, final Locale
locale, final String resourceKey, final Map<String, String>
substitutionParameters, final boolean mapToUpperCase)
+ throws ManifoldCFException {
+ outputResource(output, Messages.class, DEFAULT_PATH_NAME, locale,
resourceKey, substitutionParameters, mapToUpperCase);
+ }
+
+ public static void outputResourceWithVelocity(final IHTTPOutput output,
final Locale locale, final String resourceKey, final Map<String, String>
substitutionParameters, final boolean mapToUpperCase)
+ throws ManifoldCFException {
+ outputResourceWithVelocity(output, Messages.class, DEFAULT_BUNDLE_NAME,
DEFAULT_PATH_NAME, locale, resourceKey, substitutionParameters, mapToUpperCase);
+ }
+
+ public static void outputResourceWithVelocity(final IHTTPOutput output,
final Locale locale, final String resourceKey, final Map<String, Object>
contextObjects) throws ManifoldCFException {
+ outputResourceWithVelocity(output, Messages.class, DEFAULT_BUNDLE_NAME,
DEFAULT_PATH_NAME, locale, resourceKey, contextObjects);
+ }
+
+}
Added:
manifoldcf/trunk/connectors/csv/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/csv/common_en_US.properties
URL:
http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/csv/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/csv/common_en_US.properties?rev=1912766&view=auto
==============================================================================
---
manifoldcf/trunk/connectors/csv/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/csv/common_en_US.properties
(added)
+++
manifoldcf/trunk/connectors/csv/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/csv/common_en_US.properties
Fri Oct 6 07:24:43 2023
@@ -0,0 +1,26 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+CSV.CSVTabName=CSV
+CSV.FilesPaths=CSV Files Paths
+CSV.FilePath=File path
+CSV.NoFilePathSpecified=No file path specified
+CSV.Add=Add
+CSV.Delete=Delete
+CSV.AddFilePath=Add this CSV file path
+CSV.ContentColumn=Content Column Label
+CSV.IdColumn=Id Column Label
+CSV.Separator=Separator character
+CSV.ContentColumnMustNotBeEmpty=The Content Column Label must not be empty !
+CSV.IdColumnMustNotBeEmpty=The Id Column Label must not be empty !
\ No newline at end of file
Added:
manifoldcf/trunk/connectors/csv/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/csv/common_es_ES.properties
URL:
http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/csv/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/csv/common_es_ES.properties?rev=1912766&view=auto
==============================================================================
---
manifoldcf/trunk/connectors/csv/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/csv/common_es_ES.properties
(added)
+++
manifoldcf/trunk/connectors/csv/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/csv/common_es_ES.properties
Fri Oct 6 07:24:43 2023
@@ -0,0 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+CSV.CSVTabName=CSV
+CSV.FilesPaths=CSV Files Paths
+CSV.FilePath=File path
+CSV.NoFilePathSpecified=No file path specified
+CSV.Add=Add
+CSV.Delete=Delete
+CSV.AddFilePath=Add this CSV file path
+CSV.ContentColumn=Content Column Label
+CSV.IdColumn=Id Column Label
+CSV.Separator=Separator character
\ No newline at end of file
Added:
manifoldcf/trunk/connectors/csv/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/csv/common_fr_FR.properties
URL:
http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/csv/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/csv/common_fr_FR.properties?rev=1912766&view=auto
==============================================================================
---
manifoldcf/trunk/connectors/csv/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/csv/common_fr_FR.properties
(added)
+++
manifoldcf/trunk/connectors/csv/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/csv/common_fr_FR.properties
Fri Oct 6 07:24:43 2023
@@ -0,0 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+CSV.CSVTabName=CSV
+CSV.FilesPaths=Chemin fichiers CSV
+CSV.FilePath=Chemin Fichier
+CSV.NoFilePathSpecified=Aucun chemin de fichier sp�cifi�
+CSV.Add=Ajouter
+CSV.Delete=Supprimer
+CSV.AddFilePath=Ajouter ce chemin de fichier CSV
+CSV.ContentColumn=Label de la colonne 'contenu'
+CSV.IdColumn=Label de la colonne 'id'
+CSV.Separator=Caract�re de sp�ration
\ No newline at end of file
Added:
manifoldcf/trunk/connectors/csv/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/csv/common_ja_JP.properties
URL:
http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/csv/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/csv/common_ja_JP.properties?rev=1912766&view=auto
==============================================================================
---
manifoldcf/trunk/connectors/csv/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/csv/common_ja_JP.properties
(added)
+++
manifoldcf/trunk/connectors/csv/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/csv/common_ja_JP.properties
Fri Oct 6 07:24:43 2023
@@ -0,0 +1,25 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+CSV.CSVTabName=CSV
+CSV.FilesPaths=CSV Files Paths
+CSV.FilePath=File path
+CSV.NoFilePathSpecified=No file path specified
+CSV.Add=Add
+CSV.Delete=Delete
+CSV.AddFilePath=Add this CSV file path
+CSV.ContentColumn=Content Column Label
+CSV.IdColumn=Id Column Label
+CSV.Separator=Separator character
\ No newline at end of file
Added:
manifoldcf/trunk/connectors/csv/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/csv/common_zh_CN.properties
URL:
http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/csv/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/csv/common_zh_CN.properties?rev=1912766&view=auto
==============================================================================
---
manifoldcf/trunk/connectors/csv/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/csv/common_zh_CN.properties
(added)
+++
manifoldcf/trunk/connectors/csv/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/csv/common_zh_CN.properties
Fri Oct 6 07:24:43 2023
@@ -0,0 +1,25 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+CSV.CSVTabName=CSV
+CSV.FilesPaths=CSV Files Paths
+CSV.FilePath=File path
+CSV.NoFilePathSpecified=No file path specified
+CSV.Add=Add
+CSV.Delete=Delete
+CSV.AddFilePath=Add this CSV file path
+CSV.ContentColumn=Content Column Label
+CSV.IdColumn=Id Column Label
+CSV.Separator=Separator character
\ No newline at end of file
Added:
manifoldcf/trunk/connectors/csv/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/csv/editSpecification.js
URL:
http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/csv/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/csv/editSpecification.js?rev=1912766&view=auto
==============================================================================
---
manifoldcf/trunk/connectors/csv/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/csv/editSpecification.js
(added)
+++
manifoldcf/trunk/connectors/csv/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/csv/editSpecification.js
Fri Oct 6 07:24:43 2023
@@ -0,0 +1,47 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<script type="text/javascript">
+//<!--
+
+function s${SEQNUM}_addFilePath()
+{
+ if (editjob.s${SEQNUM}_filepath_value.value == "")
+ {
+
alert("$Encoder.bodyEscape($ResourceBundle.getString('CSV.NoFilePathSpecified'))");
+ editjob.s${SEQNUM}_filepath_value.focus();
+ return;
+ }
+ editjob.s${SEQNUM}_filepath_op.value="Add";
+ postFormSetAnchor("s${SEQNUM}_filepath");
+}
+
+function s${SEQNUM}_deleteFilePath(i)
+{
+ // Set the operation
+ eval("editjob.s${SEQNUM}_filepath_op_"+i+".value=\"Delete\"");
+ // Submit
+ if (editjob.s${SEQNUM}_filepath_count.value==i)
+ postFormSetAnchor("s${SEQNUM}_filepath");
+ else
+ postFormSetAnchor("s${SEQNUM}_filepath_"+i)
+ // Undo, so we won't get two deletes next time
+ eval("editjob.s${SEQNUM}_filepath_op_"+i+".value=\"Continue\"");
+}
+
+//-->
+</script>
Added:
manifoldcf/trunk/connectors/csv/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/csv/editSpecification_CSV.html
URL:
http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/csv/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/csv/editSpecification_CSV.html?rev=1912766&view=auto
==============================================================================
---
manifoldcf/trunk/connectors/csv/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/csv/editSpecification_CSV.html
(added)
+++
manifoldcf/trunk/connectors/csv/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/csv/editSpecification_CSV.html
Fri Oct 6 07:24:43 2023
@@ -0,0 +1,113 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+#if($TABNAME == $ResourceBundle.getString('CSV.CSVTabName') && ${SEQNUM} ==
${SELECTEDNUM})
+
+
+<table class="displaytable">
+
+ <tr>
+ <td
class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('CSV.FilesPaths'))</nobr></td>
+ <td class="boxcell">
+ <table class="formtable">
+ <tr class="formheaderrow">
+ <td class="formcolumnheader"></td>
+ <td
class="formcolumnheader"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('CSV.FilePath'))</nobr></td>
+ </tr>
+
+ #set($filepathcounter = 0)
+ #foreach($filepath in $FILEPATHS)
+ #set($filepathcounterdisplay = $filepathcounter + 1)
+ #if(($filepathcounter % 2) == 0)
+ <tr class="evenformrow">
+ #else
+ <tr class="oddformrow">
+ #end
+ <td class="formcolumncell">
+ <a name="s${SEQNUM}_filepath_$filepathcounter">
+ <input type="button"
value="$Encoder.attributeEscape($ResourceBundle.getString('CSV.Delete'))"
alt="$Encoder.attributeEscape($ResourceBundle.getString('CSV.DeleteFilePath'))$filepathcounterdisplay"
onclick='javascript:s${SEQNUM}_deleteFilePath("$filepathcounter");'/>
+ <input type="hidden"
name="s${SEQNUM}_filepath_op_$filepathcounter" value="Continue"/>
+ <input type="hidden"
name="s${SEQNUM}_filepath_value_$filepathcounter"
value="$Encoder.attributeEscape($filepath)"/>
+ </a>
+ </td>
+ <td class="formcolumncell">
+ <nobr>$Encoder.bodyEscape($filepath)</nobr>
+ </td>
+ </tr>
+ #set($filepathcounter = $filepathcounter + 1)
+ #end
+
+ #if($filepathcounter == 0)
+ <tr class="formrow"><td class="formmessage"
colspan="3">$Encoder.bodyEscape($ResourceBundle.getString('CSV.NoFilePathSpecified'))</td></tr>
+ #end
+
+ <tr class="formrow"><td class="formseparator"
colspan="3"><hr/></td></tr>
+ <tr class="formrow">
+ <td class="formcolumncell">
+ <a name="filepath">
+ <input type="button"
value="$Encoder.attributeEscape($ResourceBundle.getString('CSV.Add'))"
alt="$Encoder.attributeEscape($ResourceBundle.getString('CSV.AddFilePath'))"
onclick="javascript:s${SEQNUM}_addFilePath();"/>
+ </a>
+ <input type="hidden" name="s${SEQNUM}_filepath_count"
value="$filepathcounter"/>
+ <input type="hidden" name="s${SEQNUM}_filepath_op"
value="Continue"/>
+ </td>
+ <td class="formcolumncell">
+ <nobr><input type="text" size="35"
name="s${SEQNUM}_filepath_value" value=""/></nobr>
+ </td>
+ </tr>
+ </table>
+ </td>
+ </tr>
+ <tr>
+ <td class="description">
+
<nobr>$Encoder.bodyEscape($ResourceBundle.getString('CSV.Separator'))</nobr>
+ </td>
+ <td class="value">
+ <input type="text" name="s${SEQNUM}_separator" size="10"
value="$Encoder.attributeEscape($SEPARATOR)"/>
+ </td>
+ </tr>
+ <tr>
+ <td class="description">
+
<nobr>$Encoder.bodyEscape($ResourceBundle.getString('CSV.ContentColumn'))</nobr>
+ </td>
+ <td class="value">
+ <input type="text" name="s${SEQNUM}_contentcolumn" size="25"
value="$Encoder.attributeEscape($CONTENTCOLUMN)"/>
+ </td>
+ </tr>
+ <tr>
+ <td class="description">
+
<nobr>$Encoder.bodyEscape($ResourceBundle.getString('CSV.IdColumn'))</nobr>
+ </td>
+ <td class="value">
+ <input type="text" name="s${SEQNUM}_idcolumn" size="25"
value="$Encoder.attributeEscape($IDCOLUMN)"/>
+ </td>
+ </tr>
+
+</table>
+#else
+
+ #set($filepathcounter = 0)
+ #foreach($filepath in $FILEPATHS)
+<input type="hidden" name="s${SEQNUM}_filepath_value_$filepathcounter"
value="$Encoder.attributeEscape($filepath)"/>
+ #set($filepathcounter = $filepathcounter + 1)
+ #end
+<input type="hidden" name="s${SEQNUM}_filepath_count"
value="$filepathcounter"/>
+<input type="hidden" name="s${SEQNUM}_contentcolumn" value="$CONTENTCOLUMN"/>
+<input type="hidden" name="s${SEQNUM}_contentcolumn" value="$IDCOLUMN"/>
+<input type="hidden" name="s${SEQNUM}_separator" value="$SEPARATOR"/>
+
+
+#end
\ No newline at end of file
Added:
manifoldcf/trunk/connectors/csv/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/csv/viewSpecification_CSV.html
URL:
http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/csv/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/csv/viewSpecification_CSV.html?rev=1912766&view=auto
==============================================================================
---
manifoldcf/trunk/connectors/csv/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/csv/viewSpecification_CSV.html
(added)
+++
manifoldcf/trunk/connectors/csv/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/csv/viewSpecification_CSV.html
Fri Oct 6 07:24:43 2023
@@ -0,0 +1,77 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<table class="displaytable">
+
+ <tr>
+ <td class="description">
+
<nobr>$Encoder.bodyEscape($ResourceBundle.getString('CSV.FilesPaths'))</nobr>
+ </td>
+ <td class="boxcell">
+ <table class="formtable">
+ <tr class="formheaderrow">
+ <td
class="formcolumnheader"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('CSV.FilePath'))</nobr></td>
+ </tr>
+
+ #set($filepathcounter = 0)
+ #foreach($filepath in $FILEPATHS)
+ #if(($filepathcounter % 2) == 0)
+ <tr class="evenformrow">
+ #else
+ <tr class="oddformrow">
+ #end
+ <td class="formcolumncell">
+ <nobr>$Encoder.bodyEscape($filepath)</nobr>
+ </td>
+ </tr>
+ #set($filepathcounter = $filepathcounter + 1)
+ #end
+
+ #if($filepathcounter == 0)
+ <tr class="formrow"><td class="formmessage"
colspan="3">$Encoder.bodyEscape($ResourceBundle.getString('CSV.NoFilePathSpecified'))</td></tr>
+ #end
+ </table>
+ </td>
+ </tr>
+ <tr>
+ <td class="description" style="max-width: 5em;">
+ $Encoder.bodyEscape($ResourceBundle.getString('CSV.Separator'))
+ </td>
+ <td class="message">
+ <nobr>$Encoder.bodyEscape($SEPARATOR)</nobr>
+ </td>
+ </tr>
+ <tr>
+ <td class="description" style="max-width: 5em;">
+ $Encoder.bodyEscape($ResourceBundle.getString('CSV.ContentColumn'))
+ </td>
+ <td class="message">
+ <nobr>$Encoder.bodyEscape($CONTENTCOLUMN)</nobr>
+ </td>
+ </tr>
+ <tr>
+ <td class="description" style="max-width: 5em;">
+ $Encoder.bodyEscape($ResourceBundle.getString('CSV.IdColumn'))
+ </td>
+ <td class="message">
+ <nobr>$Encoder.bodyEscape($IDCOLUMN)</nobr>
+ </td>
+ </tr>
+
+
+
+</table>
Added: manifoldcf/trunk/connectors/csv/pom.xml
URL:
http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/csv/pom.xml?rev=1912766&view=auto
==============================================================================
--- manifoldcf/trunk/connectors/csv/pom.xml (added)
+++ manifoldcf/trunk/connectors/csv/pom.xml Fri Oct 6 07:24:43 2023
@@ -0,0 +1,91 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>org.apache.manifoldcf</groupId>
+ <artifactId>mcf-connectors</artifactId>
+ <version>2.26-SNAPSHOT</version>
+ </parent>
+
+ <properties>
+ <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+ <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
+ </properties>
+
+ <artifactId>mcf-csv-connector</artifactId>
+ <name>ManifoldCF - Connectors - CSV</name>
+
+ <build>
+ <defaultGoal>integration-test</defaultGoal>
+ <sourceDirectory>${basedir}/connector/src/main/java</sourceDirectory>
+
<testSourceDirectory>${basedir}/connector/src/test/java</testSourceDirectory>
+ <resources>
+ <resource>
+ <directory>${basedir}/connector/src/main/native2ascii</directory>
+ <includes>
+ <include>**/*.properties</include>
+ </includes>
+ </resource>
+ <resource>
+ <directory>${basedir}/connector/src/main/resources</directory>
+ <includes>
+ <include>**/*.html</include>
+ <include>**/*.js</include>
+ </includes>
+ </resource>
+ </resources>
+ <testResources>
+ <testResource>
+ <directory>${basedir}/connector/src/test/resources</directory>
+ </testResource>
+ </testResources>
+ </build>
+
+ <dependencies>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>mcf-core</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>mcf-agents</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>mcf-connector-common</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>mcf-pull-agent</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>mcf-ui-core</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+
+ </dependencies>
+</project>
\ No newline at end of file
Modified: manifoldcf/trunk/connectors/pom.xml
URL:
http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/pom.xml?rev=1912766&r1=1912765&r2=1912766&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/pom.xml (original)
+++ manifoldcf/trunk/connectors/pom.xml Fri Oct 6 07:24:43 2023
@@ -80,6 +80,7 @@
<module>html-extractor</module>
<module>mongodb</module>
<module>csws</module>
+ <module>csv</module>
</modules>
-</project>
+</project>
\ No newline at end of file