Author: kwright
Date: Sun Jan 27 14:54:17 2013
New Revision: 1439092
URL: http://svn.apache.org/viewvc?rev=1439092&view=rev
Log:
Change the appropriate interfaces to allow the forced parameters to make it
through to the incremental ingester.
Modified:
manifoldcf/branches/CONNECTORS-552-2/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java
manifoldcf/branches/CONNECTORS-552-2/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IIncrementalIngester.java
manifoldcf/branches/CONNECTORS-552-2/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java
Modified:
manifoldcf/branches/CONNECTORS-552-2/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java
URL:
http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-552-2/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java?rev=1439092&r1=1439091&r2=1439092&view=diff
==============================================================================
---
manifoldcf/branches/CONNECTORS-552-2/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java
(original)
+++
manifoldcf/branches/CONNECTORS-552-2/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java
Sun Jan 27 14:54:17 2013
@@ -358,6 +358,48 @@ public class IncrementalIngester extends
IOutputActivity activities)
throws ManifoldCFException, ServiceInterruption
{
+ return documentIngest(outputConnectionName,
+ identifierClass,
+ identifierHash,
+ documentVersion,
+ outputVersion,
+ authorityName,
+ new HashMap<String,Set<String>>(),
+ data,
+ ingestTime,
+ documentURI,
+ activities);
+ }
+
+ /** Ingest a document.
+ * This ingests the document, and notes it. If this is a repeat ingestion of
the document, this
+ * method also REMOVES ALL OLD METADATA. When complete, the index will
contain only the metadata
+ * described by the RepositoryDocument object passed to this method.
+ * ServiceInterruption is thrown if the document ingestion must be
rescheduled.
+ *@param outputConnectionName is the name of the output connection associated
with this action.
+ *@param identifierClass is the name of the space in which the identifier
hash should be interpreted.
+ *@param identifierHash is the hashed document identifier.
+ *@param documentVersion is the document version.
+ *@param outputVersion is the output version string constructed from the
output specification by the output connector.
+ *@param authorityName is the name of the authority associated with the
document, if any.
+ *@param forcedParameters are the indexing parameters related to the job
itself.
+ *@param data is the document data. The data is closed after ingestion is
complete.
+ *@param ingestTime is the time at which the ingestion took place, in
milliseconds since epoch.
+ *@param documentURI is the URI of the document, which will be used as the
key of the document in the index.
+ *@param activities is an object providing a set of methods that the
implementer can use to perform the operation.
+ *@return true if the ingest was ok, false if the ingest is illegal (and
should not be repeated).
+ */
+ public boolean documentIngest(String outputConnectionName,
+ String identifierClass, String identifierHash,
+ String documentVersion,
+ String outputVersion,
+ String authorityName,
+ Map<String,Set<String>> forcedParameters,
+ RepositoryDocument data,
+ long ingestTime, String documentURI,
+ IOutputActivity activities)
+ throws ManifoldCFException, ServiceInterruption
+ {
IOutputConnection connection =
connectionManager.load(outputConnectionName);
String docKey = makeKey(identifierClass,identifierHash);
Modified:
manifoldcf/branches/CONNECTORS-552-2/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IIncrementalIngester.java
URL:
http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-552-2/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IIncrementalIngester.java?rev=1439092&r1=1439091&r2=1439092&view=diff
==============================================================================
---
manifoldcf/branches/CONNECTORS-552-2/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IIncrementalIngester.java
(original)
+++
manifoldcf/branches/CONNECTORS-552-2/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IIncrementalIngester.java
Sun Jan 27 14:54:17 2013
@@ -20,6 +20,7 @@ package org.apache.manifoldcf.agents.int
import org.apache.manifoldcf.core.interfaces.*;
import java.io.*;
+import java.util.*;
/** This interface describes the incremental ingestion API.
* SOME NOTES:
@@ -148,6 +149,35 @@ public interface IIncrementalIngester
IOutputActivity activities)
throws ManifoldCFException, ServiceInterruption;
+ /** Ingest a document.
+ * This ingests the document, and notes it. If this is a repeat ingestion of
the document, this
+ * method also REMOVES ALL OLD METADATA. When complete, the index will
contain only the metadata
+ * described by the RepositoryDocument object passed to this method.
+ * ServiceInterruption is thrown if the document ingestion must be
rescheduled.
+ *@param outputConnectionName is the name of the output connection associated
with this action.
+ *@param identifierClass is the name of the space in which the identifier
hash should be interpreted.
+ *@param identifierHash is the hashed document identifier.
+ *@param documentVersion is the document version.
+ *@param outputVersion is the output version string constructed from the
output specification by the output connector.
+ *@param authorityName is the name of the authority associated with the
document, if any.
+ *@param forcedParameters are the indexing parameters related to the job
itself.
+ *@param data is the document data. The data is closed after ingestion is
complete.
+ *@param ingestTime is the time at which the ingestion took place, in
milliseconds since epoch.
+ *@param documentURI is the URI of the document, which will be used as the
key of the document in the index.
+ *@param activities is an object providing a set of methods that the
implementer can use to perform the operation.
+ *@return true if the ingest was ok, false if the ingest is illegal (and
should not be repeated).
+ */
+ public boolean documentIngest(String outputConnectionName,
+ String identifierClass, String identifierHash,
+ String documentVersion,
+ String outputVersion,
+ String authorityName,
+ Map<String,Set<String>> forcedParameters,
+ RepositoryDocument data,
+ long ingestTime, String documentURI,
+ IOutputActivity activities)
+ throws ManifoldCFException, ServiceInterruption;
+
/** Note the fact that we checked a document (and found that it did not need
to be ingested, because the
* versions agreed).
*@param outputConnectionName is the name of the output connection associated
with this action.
Modified:
manifoldcf/branches/CONNECTORS-552-2/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java
URL:
http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-552-2/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java?rev=1439092&r1=1439091&r2=1439092&view=diff
==============================================================================
---
manifoldcf/branches/CONNECTORS-552-2/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java
(original)
+++
manifoldcf/branches/CONNECTORS-552-2/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java
Sun Jan 27 14:54:17 2013
@@ -1652,6 +1652,7 @@ public class WorkerThread extends Thread
job.getConnectionName(),documentIdentifierHash,
version,outputVersion,
connection.getACLAuthority(),
+ job.getForcedMetadata(),
data,currentTime,
documentURI,
ingestLogger);