Author: kwright
Date: Thu Jul 21 08:27:31 2022
New Revision: 1902902

URL: http://svn.apache.org/viewvc?rev=1902902&view=rev
Log:
CONNECTORS-1541, revisited

Modified:
    
manifoldcf/trunk/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/agents/output/cmisoutput/CmisOutputConnector.java
    
manifoldcf/trunk/connectors/googledrive/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/googledrive/GoogleDriveRepositoryConnector.java

Modified: 
manifoldcf/trunk/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/agents/output/cmisoutput/CmisOutputConnector.java
URL: 
http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/agents/output/cmisoutput/CmisOutputConnector.java?rev=1902902&r1=1902901&r2=1902902&view=diff
==============================================================================
--- 
manifoldcf/trunk/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/agents/output/cmisoutput/CmisOutputConnector.java
 (original)
+++ 
manifoldcf/trunk/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/agents/output/cmisoutput/CmisOutputConnector.java
 Thu Jul 21 08:27:31 2022
@@ -1093,7 +1093,11 @@ public class CmisOutputConnector extends
    */
   private String getContentPath(String documentURI) throws URISyntaxException, 
UnsupportedEncodingException {
     String contentPath = StringUtils.EMPTY;
-    String documentURIWithFixedEncoding = StringUtils.replace(documentURI, " 
", "%20");
+    String documentURIWithFixedEncoding = StringUtils.replace(documentURI, 
"%", "_"); 
+    documentURIWithFixedEncoding = 
StringUtils.replace(documentURIWithFixedEncoding, " ", "%20");
+    documentURIWithFixedEncoding = 
StringUtils.replace(documentURIWithFixedEncoding, "`", "%60");
+    documentURIWithFixedEncoding = 
StringUtils.replace(documentURIWithFixedEncoding, "#", "%23");
+    
     List<NameValuePair> params = URLEncodedUtils.parse(new 
URI(documentURIWithFixedEncoding), StandardCharsets.UTF_8);
     Iterator<NameValuePair> paramsIterator = params.iterator();
     while (paramsIterator.hasNext()) {

Modified: 
manifoldcf/trunk/connectors/googledrive/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/googledrive/GoogleDriveRepositoryConnector.java
URL: 
http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/googledrive/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/googledrive/GoogleDriveRepositoryConnector.java?rev=1902902&r1=1902901&r2=1902902&view=diff
==============================================================================
--- 
manifoldcf/trunk/connectors/googledrive/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/googledrive/GoogleDriveRepositoryConnector.java
 (original)
+++ 
manifoldcf/trunk/connectors/googledrive/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/googledrive/GoogleDriveRepositoryConnector.java
 Thu Jul 21 08:27:31 2022
@@ -1149,6 +1149,14 @@ public class GoogleDriveRepositoryConnec
                   break;
               }
 
+              // Google native format documents may exist, but have 0 byte in 
size.
+              // In cases like this, there is no way to export it, and because 
of that, it is going to be ignored
+              if (documentURI == null) {
+                  errorCode = "NOLENGTH";
+                  errorDesc = "Document "+nodeId+" had no length; skipping";
+                  continue;
+              }
+
               String fullContentPath = getDocumentContentPath(googleFile, 
documentURI);
               
               // Append the new parameters in the query string
@@ -1158,8 +1166,6 @@ public class GoogleDriveRepositoryConnec
                 documentURI = documentURI + "?" + CONTENT_PATH_PARAM + "=" + 
fullContentPath;
               }
 
-              System.out.println("documentURI: " + documentURI);
-
               if (!activities.checkLengthIndexable(fileLength)) {
                 errorCode = activities.EXCLUDED_LENGTH;
                 errorDesc = "Excluding document because of file length 
('"+fileLength+"')";
@@ -1215,13 +1221,24 @@ public class GoogleDriveRepositoryConnec
                 if (StringUtils.endsWithIgnoreCase(title, "." + extension)) {
                   rd.setFileName(title);
                 } else {
-                  rd.setFileName(title + "." + extension);
+                  String name = title + "." + extension;
+                  
+                  if (StringUtils.endsWithIgnoreCase(name, ".")) {
+                    name = StringUtils.chomp(name, ".");
+                  }
+                  
+                  rd.setFileName(name);
                 }
               } else {
                 if (title == null)
                   title = "";
               
-                rd.setFileName(title + "." + getExtensionByMimeType(mimeType));
+                String name = title + "." + getExtensionByMimeType(mimeType);
+                
+                if (StringUtils.endsWithIgnoreCase(name, ".")) {
+                    name = StringUtils.chomp(name, ".");
+                }
+                rd.setFileName(name);
               }
 
               // Get general document metadata
@@ -1362,7 +1379,11 @@ public class GoogleDriveRepositoryConnec
             name = title + "." + 
getExtensionByMimeType(googleFile.getMimeType());
           }
 
-          fullContentPath = path + SLASH + name;
+          if (StringUtils.endsWithIgnoreCase(name, ".")) {
+            name = StringUtils.chomp(name, ".");
+          }
+          
+          fullContentPath = path + SLASH + StringUtils.trim(name);
         }
       } else {
         String path = getFilePath(googleFile);
@@ -1472,7 +1493,7 @@ public class GoogleDriveRepositoryConnec
     if (googleFile.containsKey("fileSize")) {
       return googleFile.getDownloadUrl();
     } else {
-      return googleFile.getExportLinks().get(exportType);
+      return (googleFile.getExportLinks() != null) ? 
googleFile.getExportLinks().get(exportType) : null;
     }
   }
 
@@ -1568,7 +1589,10 @@ public class GoogleDriveRepositoryConnec
   
   private String cleanupFileFolderName(String name) {
          name = name.trim();
-         name = name.replaceAll("[\\\\/:*?\"<>|]", "_");
+         name = name.replaceAll("[\\\\/:*?\"<>%|]", "_");
+         if (StringUtils.endsWithIgnoreCase(name, ".")) {
+           name = StringUtils.chomp(name, ".");
+         }
          return name;
   }
 }
\ No newline at end of file


Reply via email to