Author: kwright
Date: Thu Oct  4 18:41:28 2018
New Revision: 1842842

URL: http://svn.apache.org/viewvc?rev=1842842&view=rev
Log:
Fix for CONNECTORS-1537.

Modified:
    manifoldcf/trunk/CHANGES.txt
    
manifoldcf/trunk/connectors/googledrive/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/googledrive/GoogleDriveRepositoryConnector.java

Modified: manifoldcf/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/manifoldcf/trunk/CHANGES.txt?rev=1842842&r1=1842841&r2=1842842&view=diff
==============================================================================
--- manifoldcf/trunk/CHANGES.txt (original)
+++ manifoldcf/trunk/CHANGES.txt Thu Oct  4 18:41:28 2018
@@ -3,6 +3,9 @@ $Id$
 
 ======================= 2.12-dev =====================
 
+CONNECTORS-1537: Numerous changes to GoogleDrive connector to support paths 
etc.
+(Douglas C. R. Paes)
+
 CONNECTORS-1536: Update GoogleDrive connector to support Teams.
 (Douglas C. R. Paes, Karl Wright)
 

Modified: 
manifoldcf/trunk/connectors/googledrive/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/googledrive/GoogleDriveRepositoryConnector.java
URL: 
http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/googledrive/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/googledrive/GoogleDriveRepositoryConnector.java?rev=1842842&r1=1842841&r2=1842842&view=diff
==============================================================================
--- 
manifoldcf/trunk/connectors/googledrive/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/googledrive/GoogleDriveRepositoryConnector.java
 (original)
+++ 
manifoldcf/trunk/connectors/googledrive/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/googledrive/GoogleDriveRepositoryConnector.java
 Thu Oct  4 18:41:28 2018
@@ -19,50 +19,46 @@
 
 package org.apache.manifoldcf.crawler.connectors.googledrive;
 
-import org.apache.manifoldcf.core.common.*;
-import org.apache.manifoldcf.connectorcommon.common.*;
-
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InterruptedIOException;
+import java.security.GeneralSecurityException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Date;
 import java.util.HashMap;
 import java.util.HashSet;
-import java.util.ArrayList;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
-import java.util.Date;
+import java.util.Map.Entry;
 import java.util.Set;
-import java.util.Iterator;
 
-import org.apache.manifoldcf.crawler.system.Logging;
-import org.apache.manifoldcf.crawler.connectors.BaseRepositoryConnector;
-import org.apache.manifoldcf.agents.interfaces.ServiceInterruption;
-import org.apache.manifoldcf.core.interfaces.ConfigParams;
-import org.apache.manifoldcf.core.interfaces.Specification;
-import org.apache.manifoldcf.core.interfaces.ManifoldCFException;
 import org.apache.commons.lang.StringUtils;
 import org.apache.manifoldcf.agents.interfaces.RepositoryDocument;
+import org.apache.manifoldcf.agents.interfaces.ServiceInterruption;
+import org.apache.manifoldcf.connectorcommon.common.XThreadInputStream;
+import org.apache.manifoldcf.connectorcommon.common.XThreadStringBuffer;
+import org.apache.manifoldcf.core.interfaces.ConfigParams;
 import org.apache.manifoldcf.core.interfaces.IHTTPOutput;
 import org.apache.manifoldcf.core.interfaces.IPasswordMapperActivity;
 import org.apache.manifoldcf.core.interfaces.IPostParameters;
 import org.apache.manifoldcf.core.interfaces.IThreadContext;
+import org.apache.manifoldcf.core.interfaces.ManifoldCFException;
+import org.apache.manifoldcf.core.interfaces.Specification;
 import org.apache.manifoldcf.core.interfaces.SpecificationNode;
-import org.apache.manifoldcf.crawler.interfaces.ISeedingActivity;
-import org.apache.manifoldcf.crawler.interfaces.IProcessActivity;
+import org.apache.manifoldcf.crawler.connectors.BaseRepositoryConnector;
 import org.apache.manifoldcf.crawler.interfaces.IExistingVersions;
-import org.apache.log4j.Logger;
+import org.apache.manifoldcf.crawler.interfaces.IProcessActivity;
+import org.apache.manifoldcf.crawler.interfaces.ISeedingActivity;
+import org.apache.manifoldcf.crawler.system.Logging;
 
-import com.google.api.services.drive.model.File;
 import com.google.api.client.repackaged.com.google.common.base.Objects;
 import com.google.api.client.util.DateTime;
+import com.google.api.services.drive.model.File;
+import com.google.api.services.drive.model.ParentReference;
 
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.FileOutputStream;
-import java.io.OutputStream;
-import java.util.Map.Entry;
-import java.security.GeneralSecurityException;
 /**
  *
  * @author andrew
@@ -123,6 +119,11 @@ public class GoogleDriveRepositoryConnec
    */
   private static final String VIEW_SPEC_FORWARD = 
"viewSpecification_googledrive.html";
   
+  /** The content path param used for managing content migration deletion **/
+  private static final String CONTENT_PATH_PARAM = "contentPath";
+  
+  private String SLASH = "/";
+       
   /**
    * Endpoint server name
    */
@@ -980,6 +981,23 @@ public class GoogleDriveRepositoryConnec
 
     for (String documentIdentifier : documentIdentifiers) {
       File googleFile = getObject(documentIdentifier);
+
+      // StringBuilder log = new StringBuilder();
+      // log.append("File Original Name: " + googleFile.getOriginalFilename());
+      // log.append(System.getProperty("line.separator"));
+      // log.append("File Title: " + googleFile.getTitle());
+      // log.append(System.getProperty("line.separator"));
+      // log.append("File Description: " + googleFile.getDescription());
+      // log.append(System.getProperty("line.separator"));
+      // log.append("File Extension: " + googleFile.getFileExtension());
+      // log.append(System.getProperty("line.separator"));
+      // log.append("File MimeType: " + googleFile.getMimeType());
+      // log.append(System.getProperty("line.separator"));
+      // log.append("File Version: " + googleFile.getVersion());
+      // log.append(System.getProperty("line.separator"));
+      //
+      // System.out.println(log);
+
       String versionString;
       
       if (googleFile == null || (googleFile.containsKey("explicitlyTrashed") 
&& googleFile.getExplicitlyTrashed())) {
@@ -1106,14 +1124,43 @@ public class GoogleDriveRepositoryConnec
               Date modifiedDate = (modifiedDateObject==null)?null:new 
Date(modifiedDateObject.getValue());
               // We always direct to the PDF except for Spreadsheets
               String documentURI = null;
-              if (!mimeType.equals("application/vnd.google-apps.spreadsheet")) 
{
-                documentURI = getUrl(googleFile, "application/pdf");
+              // if 
(!mimeType.equals("application/vnd.google-apps.spreadsheet")) {
+              // documentURI = getUrl(googleFile, "application/pdf");
+              // } else {
+              // documentURI = getUrl(googleFile,
+              // 
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
+              // }
+
+              switch (mimeType) {
+                case "application/vnd.google-apps.spreadsheet":
+                  documentURI = getUrl(googleFile, 
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
+                  break;
+
+                case "application/vnd.google-apps.document":
+                  documentURI = getUrl(googleFile, 
"application/vnd.openxmlformats-officedocument.wordprocessingml.document");
+                  break;
+
+                case "application/vnd.google-apps.presentation":
+                  documentURI = getUrl(googleFile, 
"application/vnd.openxmlformats-officedocument.presentationml.presentation");
+                  break;
+
+                default:
+                  documentURI = getUrl(googleFile, "application/pdf");
+                  break;
+              }
+
+              String fullContentPath = getDocumentContentPath(googleFile, 
documentURI);
+              
+              // Append the new parameters in the query string
+              if (StringUtils.contains(documentURI, '?')) {
+                documentURI = documentURI + "&" + CONTENT_PATH_PARAM + "=" + 
fullContentPath;
               } else {
-                documentURI = getUrl(googleFile, 
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
+                documentURI = documentURI + "?" + CONTENT_PATH_PARAM + "=" + 
fullContentPath;
               }
 
-              if (!activities.checkLengthIndexable(fileLength))
-              {
+              System.out.println("documentURI: " + documentURI);
+
+              if (!activities.checkLengthIndexable(fileLength)) {
                 errorCode = activities.EXCLUDED_LENGTH;
                 errorDesc = "Excluding document because of file length 
('"+fileLength+"')";
                 activities.noDocument(nodeId,version);
@@ -1155,7 +1202,7 @@ public class GoogleDriveRepositoryConnec
               }
               
               if (mimeType != null)
-                rd.setMimeType(mimeType);
+                rd.setMimeType(getFixedMimeType(mimeType));
               if (createdDate != null)
                 rd.setCreatedDate(createdDate);
               if (modifiedDate != null)
@@ -1164,7 +1211,17 @@ public class GoogleDriveRepositoryConnec
               {
                 if (title == null)
                   title = "";
-                rd.setFileName(title + "." + extension);
+
+                if (StringUtils.endsWithIgnoreCase(title, "." + extension)) {
+                  rd.setFileName(title);
+                } else {
+                  rd.setFileName(title + "." + extension);
+                }
+              } else {
+                if (title == null)
+                  title = "";
+              
+                rd.setFileName(title + "." + getExtensionByMimeType(mimeType));
               }
 
               // Get general document metadata
@@ -1181,6 +1238,13 @@ public class GoogleDriveRepositoryConnec
                   InputStream is = t.getSafeInputStream();
                   try {
                     // Can only index while background thread is running!
+                         
+                       //filter the fields selected in the query
+                       List<String> sourcePath = new ArrayList<>();
+                       sourcePath.add(fullContentPath);
+                       rd.setSourcePath(sourcePath);
+                    //ingestion
+                         
                     rd.setBinary(is, fileLength);
                     activities.ingestDocumentWithException(nodeId, version, 
documentURI, rd);
                   } finally {
@@ -1232,13 +1296,130 @@ public class GoogleDriveRepositoryConnec
         }
       }
     }
-    
+
   }
-  
+
+  private String getFixedMimeType(String mimeType) {
+    switch (mimeType) {
+      case "application/vnd.google-apps.spreadsheet":
+        return 
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet";
+
+      case "application/vnd.google-apps.document":
+        return 
"application/vnd.openxmlformats-officedocument.wordprocessingml.document";
+               
+      case "application/vnd.google-apps.presentation":
+        return 
"application/vnd.openxmlformats-officedocument.presentationml.presentation";
+               
+      default:
+        return mimeType;
+    }
+  }
+
+  private String getExtensionByMimeType(String mimeType) {
+    switch (mimeType) {
+      case "application/vnd.google-apps.spreadsheet":
+      return "xlsx";
+
+      case "application/vnd.google-apps.document":
+        return "docx";
+
+      case "application/vnd.google-apps.presentation":
+        return "pptx";
+
+      default:
+        return null;
+    }
+  }
+
+  private String getDocumentContentPath(File googleFile, String documentURI) {
+    String fullContentPath = null;
+       try {
+      if (!isDir(googleFile)) {
+        if (googleFile.getParents() != null && 
!googleFile.getParents().isEmpty()) {
+          ParentReference parentRef = googleFile.getParents().get(0);
+          File parent;
+
+          parent = getObject(parentRef.getId());
+
+          String path = getFilePath(parent);
+          String name;
+          String title = googleFile.getTitle();
+
+          String extension = googleFile.getFileExtension();
+
+          if (extension != null) {
+            if (title == null)
+              title = "";
+
+            if (StringUtils.endsWithIgnoreCase(title, "." + extension)) {
+              name = title;
+            } else {
+              name = title + "." + extension;
+            }
+          } else {
+            if (title == null)
+              title = "";
+            name = title + "." + 
getExtensionByMimeType(googleFile.getMimeType());
+          }
+
+          fullContentPath = path + SLASH + name;
+        }
+      } else {
+        String path = getFilePath(googleFile);
+        String name = googleFile.getTitle();
+        fullContentPath = path + SLASH + name;
+      }
+    } catch (ManifoldCFException e) {
+      // TODO Auto-generated catch block
+      e.printStackTrace();
+    } catch (ServiceInterruption e) {
+      // TODO Auto-generated catch block
+      e.printStackTrace();
+    } catch (IOException e) {
+      // TODO Auto-generated catch block
+      e.printStackTrace();
+       }
+    return fullContentPath;
+  }
+
+  private String getFilePath(File file) throws IOException, 
ManifoldCFException, ServiceInterruption {
+    String folderPath = "";
+    String fullFilePath = null;
+
+    List<ParentReference> parentReferencesList = file.getParents();
+    List<String> folderList = new ArrayList<String>();
+
+    List<String> finalFolderList = getfoldersList(parentReferencesList, 
folderList);
+    Collections.reverse(finalFolderList);
+
+    for (String folder : finalFolderList) {
+      folderPath += "/" + folder;
+    }
+
+    fullFilePath = folderPath + "/" + file.getTitle();
+
+    return fullFilePath;
+  }
+
+  private List<String> getfoldersList(List<ParentReference> 
parentReferencesList, List<String> folderList)
+    throws IOException, ManifoldCFException, ServiceInterruption {
+    for (int i = 0; i < parentReferencesList.size(); i++) {
+      String id = parentReferencesList.get(i).getId();
+
+      File file = getObject(id);
+      folderList.add(file.getTitle());
+
+      if (!(file.getParents().isEmpty())) {
+        List<ParentReference> parentReferenceslist2 = file.getParents();
+        getfoldersList(parentReferenceslist2, folderList);
+      }
+    }
+    return folderList;
+  }
+
   protected class DocumentReadingThread extends Thread {
 
-    protected Throwable exception = null;
-    protected final String fileURL;
+    protected Throwable exception = null;    protected final String fileURL;
     protected final XThreadInputStream stream;
     
     public DocumentReadingThread(String fileURL) {


Reply via email to