Author: kwright
Date: Thu Jan 17 13:02:36 2013
New Revision: 1434653
URL: http://svn.apache.org/viewvc?rev=1434653&view=rev
Log:
Provide a mechanism to communicate a mime type (if known) from repository
connector to output connector, and hook this up into the Solr, Web, and RSS
connectors. Part of CONNECTORS-613.
Modified:
manifoldcf/trunk/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/RSSConnector.java
manifoldcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/HttpPoster.java
manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java
manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/RepositoryDocument.java
Modified:
manifoldcf/trunk/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/RSSConnector.java
URL:
http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/RSSConnector.java?rev=1434653&r1=1434652&r2=1434653&view=diff
==============================================================================
---
manifoldcf/trunk/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/RSSConnector.java
(original)
+++
manifoldcf/trunk/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/RSSConnector.java
Thu Jan 17 13:02:36 2013
@@ -1355,6 +1355,9 @@ public class RSSConnector extends org.ap
long dataSize = cache.getDataLength(urlValue);
RepositoryDocument rd = new RepositoryDocument();
+ // Set content type
+ rd.setMimeType(cache.getContentType(urlValue));
+
// Turn into acls and add into description
String[] aclArray = new String[acls.size()];
int j = 0;
Modified:
manifoldcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/HttpPoster.java
URL:
http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/HttpPoster.java?rev=1434653&r1=1434652&r2=1434653&view=diff
==============================================================================
---
manifoldcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/HttpPoster.java
(original)
+++
manifoldcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/HttpPoster.java
Thu Jan 17 13:02:36 2013
@@ -736,6 +736,7 @@ public class HttpPoster
{
long length = document.getBinaryLength();
InputStream is = document.getBinaryStream();
+ String contentType = document.getMimeType();
try
{
@@ -786,7 +787,7 @@ public class HttpPoster
contentStreamUpdateRequest.setParams(out);
- contentStreamUpdateRequest.addContentStream(new
RepositoryDocumentStream(is,length));
+ contentStreamUpdateRequest.addContentStream(new
RepositoryDocumentStream(is,length,contentType));
// Fire off the request.
// Note: I need to know whether the document has been permanently
rejected or not, but we currently have
@@ -1089,13 +1090,15 @@ public class HttpPoster
*/
protected static class RepositoryDocumentStream extends ContentStreamBase
{
- protected InputStream is;
- protected long length;
+ protected final InputStream is;
+ protected final long length;
+ protected final String contentType;
- public RepositoryDocumentStream(InputStream is, long length)
+ public RepositoryDocumentStream(InputStream is, long length, String
contentType)
{
this.is = is;
this.length = length;
+ this.contentType = contentType;
}
@Override
@@ -1119,7 +1122,7 @@ public class HttpPoster
@Override
public String getContentType()
{
- return "application/octet-stream";
+ return contentType;
}
}
Modified:
manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java
URL:
http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java?rev=1434653&r1=1434652&r2=1434653&view=diff
==============================================================================
---
manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java
(original)
+++
manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java
Thu Jan 17 13:02:36 2013
@@ -1349,6 +1349,9 @@ public class WebcrawlerConnector extends
RepositoryDocument rd = new RepositoryDocument();
+ // Set the content type
+ rd.setMimeType(cache.getContentType(documentIdentifier));
+
// Turn into acls and add into description
String[] aclArray = new String[acls.size()];
int j = 0;
Modified:
manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/RepositoryDocument.java
URL:
http://svn.apache.org/viewvc/manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/RepositoryDocument.java?rev=1434653&r1=1434652&r2=1434653&view=diff
==============================================================================
---
manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/RepositoryDocument.java
(original)
+++
manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/RepositoryDocument.java
Thu Jan 17 13:02:36 2013
@@ -45,6 +45,7 @@ public class RepositoryDocument
protected Security shareSecurity = new Security();
protected List<Security> directorySecurity = new ArrayList<Security>();
protected String fileName = "docname";
+ protected String contentMimeType = "application/octet-stream";
/** Constructor.
*/
@@ -52,6 +53,22 @@ public class RepositoryDocument
{
}
+ /** Set the document's mime type.
+ *@param mimeType is the mime type.
+ */
+ public void setMimeType(String mimeType)
+ {
+ contentMimeType = mimeType;
+ }
+
+ /** Get the document's mime type.
+ *@return the mime type.
+ */
+ public String getMimeType()
+ {
+ return contentMimeType;
+ }
+
/** Set the document's "file" allow acls.
*@param acl is the allowed "file" access control token list for the document.
*/