Author: kwright
Date: Thu Jul 21 08:27:31 2022
New Revision: 1902902
URL: http://svn.apache.org/viewvc?rev=1902902&view=rev
Log:
CONNECTORS-1541, revisited
Modified:
manifoldcf/trunk/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/agents/output/cmisoutput/CmisOutputConnector.java
manifoldcf/trunk/connectors/googledrive/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/googledrive/GoogleDriveRepositoryConnector.java
Modified:
manifoldcf/trunk/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/agents/output/cmisoutput/CmisOutputConnector.java
URL:
http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/agents/output/cmisoutput/CmisOutputConnector.java?rev=1902902&r1=1902901&r2=1902902&view=diff
==============================================================================
---
manifoldcf/trunk/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/agents/output/cmisoutput/CmisOutputConnector.java
(original)
+++
manifoldcf/trunk/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/agents/output/cmisoutput/CmisOutputConnector.java
Thu Jul 21 08:27:31 2022
@@ -1093,7 +1093,11 @@ public class CmisOutputConnector extends
*/
private String getContentPath(String documentURI) throws URISyntaxException,
UnsupportedEncodingException {
String contentPath = StringUtils.EMPTY;
- String documentURIWithFixedEncoding = StringUtils.replace(documentURI, "
", "%20");
+ String documentURIWithFixedEncoding = StringUtils.replace(documentURI,
"%", "_");
+ documentURIWithFixedEncoding =
StringUtils.replace(documentURIWithFixedEncoding, " ", "%20");
+ documentURIWithFixedEncoding =
StringUtils.replace(documentURIWithFixedEncoding, "`", "%60");
+ documentURIWithFixedEncoding =
StringUtils.replace(documentURIWithFixedEncoding, "#", "%23");
+
List<NameValuePair> params = URLEncodedUtils.parse(new
URI(documentURIWithFixedEncoding), StandardCharsets.UTF_8);
Iterator<NameValuePair> paramsIterator = params.iterator();
while (paramsIterator.hasNext()) {
Modified:
manifoldcf/trunk/connectors/googledrive/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/googledrive/GoogleDriveRepositoryConnector.java
URL:
http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/googledrive/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/googledrive/GoogleDriveRepositoryConnector.java?rev=1902902&r1=1902901&r2=1902902&view=diff
==============================================================================
---
manifoldcf/trunk/connectors/googledrive/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/googledrive/GoogleDriveRepositoryConnector.java
(original)
+++
manifoldcf/trunk/connectors/googledrive/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/googledrive/GoogleDriveRepositoryConnector.java
Thu Jul 21 08:27:31 2022
@@ -1149,6 +1149,14 @@ public class GoogleDriveRepositoryConnec
break;
}
+ // Google native format documents may exist, but have 0 byte in
size.
+ // In cases like this, there is no way to export it, and because
of that, it is going to be ignored
+ if (documentURI == null) {
+ errorCode = "NOLENGTH";
+ errorDesc = "Document "+nodeId+" had no length; skipping";
+ continue;
+ }
+
String fullContentPath = getDocumentContentPath(googleFile,
documentURI);
// Append the new parameters in the query string
@@ -1158,8 +1166,6 @@ public class GoogleDriveRepositoryConnec
documentURI = documentURI + "?" + CONTENT_PATH_PARAM + "=" +
fullContentPath;
}
- System.out.println("documentURI: " + documentURI);
-
if (!activities.checkLengthIndexable(fileLength)) {
errorCode = activities.EXCLUDED_LENGTH;
errorDesc = "Excluding document because of file length
('"+fileLength+"')";
@@ -1215,13 +1221,24 @@ public class GoogleDriveRepositoryConnec
if (StringUtils.endsWithIgnoreCase(title, "." + extension)) {
rd.setFileName(title);
} else {
- rd.setFileName(title + "." + extension);
+ String name = title + "." + extension;
+
+ if (StringUtils.endsWithIgnoreCase(name, ".")) {
+ name = StringUtils.chomp(name, ".");
+ }
+
+ rd.setFileName(name);
}
} else {
if (title == null)
title = "";
- rd.setFileName(title + "." + getExtensionByMimeType(mimeType));
+ String name = title + "." + getExtensionByMimeType(mimeType);
+
+ if (StringUtils.endsWithIgnoreCase(name, ".")) {
+ name = StringUtils.chomp(name, ".");
+ }
+ rd.setFileName(name);
}
// Get general document metadata
@@ -1362,7 +1379,11 @@ public class GoogleDriveRepositoryConnec
name = title + "." +
getExtensionByMimeType(googleFile.getMimeType());
}
- fullContentPath = path + SLASH + name;
+ if (StringUtils.endsWithIgnoreCase(name, ".")) {
+ name = StringUtils.chomp(name, ".");
+ }
+
+ fullContentPath = path + SLASH + StringUtils.trim(name);
}
} else {
String path = getFilePath(googleFile);
@@ -1472,7 +1493,7 @@ public class GoogleDriveRepositoryConnec
if (googleFile.containsKey("fileSize")) {
return googleFile.getDownloadUrl();
} else {
- return googleFile.getExportLinks().get(exportType);
+ return (googleFile.getExportLinks() != null) ?
googleFile.getExportLinks().get(exportType) : null;
}
}
@@ -1568,7 +1589,10 @@ public class GoogleDriveRepositoryConnec
private String cleanupFileFolderName(String name) {
name = name.trim();
- name = name.replaceAll("[\\\\/:*?\"<>|]", "_");
+ name = name.replaceAll("[\\\\/:*?\"<>%|]", "_");
+ if (StringUtils.endsWithIgnoreCase(name, ".")) {
+ name = StringUtils.chomp(name, ".");
+ }
return name;
}
}
\ No newline at end of file