Xuanwo commented on code in PR #2892:
URL: 
https://github.com/apache/incubator-opendal/pull/2892#discussion_r1299940382


##########
core/src/services/gdrive/core.rs:
##########
@@ -56,91 +57,225 @@ impl Debug for GdriveCore {
 }
 
 impl GdriveCore {
-    async fn get_abs_root_id(&self) -> Result<String> {
-        let root = "root";
+    /// Get the file id by path.
+    /// Including file and folder.
+    ///
+    /// The path is rooted at the root of the Google Drive.
+    ///
+    /// # Notes
+    ///
+    /// - A path is a sequence of file names separated by slashes.
+    /// - A file only knows its parent id, but not its name.
+    /// - To find the file id of a file, we need to traverse the path from the 
root to the file.
+    pub(crate) async fn get_file_id_by_path(&self, file_path: &str) -> 
Result<String> {
+        let path = build_rooted_abs_path(&self.root, file_path);
 
-        if let Some(root_id) = self.path_cache.lock().await.get(root) {
-            return Ok(root_id.to_string());
-        }
+        let mut parent_id = "root".to_owned();
+        let file_path_items: Vec<&str> = path.split('/').filter(|&x| 
!x.is_empty()).collect();
 
-        let req = self
-            .sign(Request::get(
-                "https://www.googleapis.com/drive/v3/files/root";,
-            ))
-            .body(AsyncBody::Empty)
-            .map_err(new_request_build_error)?;
+        for (i, item) in file_path_items.iter().enumerate() {
+            let mut query = format!(
+                "name = \"{}\" and \"{}\" in parents and trashed = false",
+                item, parent_id
+            );
+            if i != file_path_items.len() - 1 {
+                query += " and mimeType = 
'application/vnd.google-apps.folder'";
+            }
 
-        let resp = self.client.send(req).await?;
-        let status = resp.status();
+            let req = self.sign(
+                Request::get(format!(
+                    "https://www.googleapis.com/drive/v3/files?q={}";,
+                    percent_encode_path(&query)
+                ))
+                .body(AsyncBody::default())
+                .map_err(new_request_build_error)?,
+            );
 
-        match status {
-            StatusCode::OK => {
-                let resp_body = &resp.into_body().bytes().await?;
+            let resp = self.client.send(req).await?;
+            let status = resp.status();
+
+            match status {
+                StatusCode::OK => {
+                    let resp_body = &resp.into_body().bytes().await?;
 
-                let gdrive_file: GdriveFile =
-                    
serde_json::from_slice(resp_body).map_err(new_json_deserialize_error)?;
+                    let gdrive_file_list: GdriveFileList =
+                        
serde_json::from_slice(resp_body).map_err(new_json_deserialize_error)?;
 
-                let root_id = gdrive_file.id;
+                    if gdrive_file_list.files.is_empty() {
+                        return Err(Error::new(
+                            ErrorKind::NotFound,
+                            &format!("path not found: {}", item),
+                        ));
+                    }
 
-                let mut cache_guard = self.path_cache.lock().await;
-                cache_guard.insert(root.to_owned(), root_id.clone());
+                    if gdrive_file_list.files.len() > 1 {
+                        return Err(Error::new(ErrorKind::Unexpected, 
&format!("please ensure that the file corresponding to the path exists and is 
unique. the response body is {}", String::from_utf8_lossy(resp_body))));
+                    }
 
-                Ok(root_id)
+                    parent_id = gdrive_file_list.files[0].id.clone();
+                }
+                _ => {
+                    return Err(parse_error(resp).await?);
+                }
             }
-            _ => Err(parse_error(resp).await?),
         }
-    }
 
-    async fn get_file_id_by_path(&self, file_path: &str) -> Result<String> {
-        let path = build_rooted_abs_path(&self.root, file_path);
+        Ok(parent_id)
+    }
 
-        if let Some(file_id) = self.path_cache.lock().await.get(&path) {
-            return Ok(file_id.to_string());
-        }
+    /// Ensure the parent path exists.
+    /// If the parent path does not exist, create it.
+    ///
+    /// # Notes
+    ///
+    /// - The path is rooted at the root of the Google Drive.
+    /// - Will create the parent path recursively.
+    pub(crate) async fn ensure_parent_path(&self, path: &str) -> 
Result<String> {
+        let path = build_rooted_abs_path(&self.root, path);
 
-        let mut parent_id = self.get_abs_root_id().await?;
-        let file_path_items: Vec<&str> = path.split('/').filter(|&x| 
!x.is_empty()).collect();
+        let mut parent: String = "root".to_owned();
+        let mut file_path_items: Vec<&str> = path.split('/').filter(|&x| 
!x.is_empty()).collect();
+        file_path_items.pop();
 
         for (i, item) in file_path_items.iter().enumerate() {
-            let mut query = format!(
-                "name = '{}' and parents = '{}' and trashed = false",
-                item, parent_id
+            let query = format!(
+                "name = \"{}\" and \"{}\" in parents and trashed = false and 
mimeType = 'application/vnd.google-apps.folder'",
+                item, parent
             );
-            if i != file_path_items.len() - 1 {
-                query += "and mimeType = 'application/vnd.google-apps.folder'";
-            }
 
-            let req = self
-                .sign(Request::get(format!(
+            let req = self.sign(

Review Comment:
   Please don't use sign in this way. Instead:
   
   ```rust
   let mut req = Request::get("xxx").xxx().yyy();
   let _ = self.sign(&mut req)?;
   ```



##########
core/src/services/gdrive/core.rs:
##########
@@ -20,30 +20,31 @@ use std::fmt::Debug;
 use std::fmt::Formatter;
 use std::sync::Arc;
 
+use bytes;
 use http::header;
-use http::request::Builder;
 use http::Request;
 use http::Response;
 use http::StatusCode;
 use serde::Deserialize;
+use serde_json::json;
 use tokio::sync::Mutex;
 
 use super::error::parse_error;
-use crate::raw::build_rooted_abs_path;
-use crate::raw::new_json_deserialize_error;
-use crate::raw::new_request_build_error;
-use crate::raw::percent_encode_path;
-use crate::raw::AsyncBody;
-use crate::raw::HttpClient;
-use crate::raw::IncomingAsyncBody;
+use crate::raw::*;
 use crate::types::Result;
 use crate::Error;
 use crate::ErrorKind;
 
 pub struct GdriveCore {
     pub root: String,
     pub access_token: String,
+
     pub client: HttpClient,
+
+    /// Cache the mapping from path to file id
+    ///
+    /// Google Drive uses file id to identify a file.
+    /// As the path is immutable, we can cache the mapping from path to file 
id.

Review Comment:
   We can extract those path to id logic to `raw`, since many service will have 
similiar logic.



##########
core/src/services/gdrive/backend.rs:
##########
@@ -93,20 +163,71 @@ impl Accessor for GdriveBackend {
             ));
         }
 
+        // As Google Drive allows files have the same name, we need to check 
if the file exists.
+        // If the file exists, we will keep its ID and update it.
+        let mut file_id: Option<String> = None;
+
+        let resp = self.core.gdrive_stat(path).await;
+        // We don't care about the error here.
+        // As long as the file doesn't exist, we will create a new one.
+        if let Ok(resp) = resp {
+            let status = resp.status();
+
+            if status == StatusCode::OK {
+                let body = resp.into_body().bytes().await?;
+                let meta = serde_json::from_slice::<GdriveFile>(&body)
+                    .map_err(new_json_deserialize_error)?;
+
+                file_id = if meta.id.is_empty() {
+                    None
+                } else {
+                    Some(meta.id)
+                };
+            }
+        }
+
         Ok((
             RpWrite::default(),
-            GdriveWriter::new(self.core.clone(), args, String::from(path)),
+            GdriveWriter::new(self.core.clone(), String::from(path), file_id),
         ))
     }
 
     async fn delete(&self, path: &str, _: OpDelete) -> Result<RpDelete> {
-        let resp = self.core.gdrive_delete(path).await?;
+        let resp = self.core.gdrive_delete(path).await;
+        if let Ok(resp) = resp {
+            let status = resp.status();
 
-        let status = resp.status();
+            match status {
+                StatusCode::NO_CONTENT => return Ok(RpDelete::default()),
+                _ => return Err(parse_error(resp).await?),
+            }
+        };
 
-        match status {
-            StatusCode::NO_CONTENT => Ok(RpDelete::default()),
-            _ => Err(parse_error(resp).await?),
+        let e = resp.err().unwrap();
+        if e.kind() == ErrorKind::NotFound {
+            Ok(RpDelete::default())
+        } else {
+            Err(e)
         }
     }
 }
+
+impl GdriveBackend {
+    pub(crate) fn parse_metadata(&self, body: bytes::Bytes) -> 
Result<Metadata> {
+        let metadata =
+            
serde_json::from_slice::<GdriveFile>(&body).map_err(new_json_deserialize_error)?;
+
+        let mut meta = Metadata::new(match metadata.mime_type.as_str() {
+            "application/vnd.google-apps.folder" => EntryMode::DIR,
+            _ => EntryMode::FILE,
+        });
+        meta = 
meta.with_content_length(metadata.size.parse::<u64>().unwrap_or(0));
+        meta = meta.with_last_modified(
+            metadata
+                .modified_time
+                .parse::<chrono::DateTime<Utc>>()
+                .unwrap_or_default(),

Review Comment:
   We should throw an error instead of use default.



##########
core/src/services/gdrive/core.rs:
##########
@@ -190,29 +326,112 @@ impl GdriveCore {
             self.get_file_id_by_path(path).await?
         );
 
-        let req = self
-            .sign(Request::delete(&url))
+        let mut req = Request::delete(&url)
             .body(AsyncBody::Empty)
             .map_err(new_request_build_error)?;
 
+        req = self.sign(req);
+
+        self.client.send(req).await
+    }
+
+    pub async fn gdrive_upload_simple_request(
+        &self,
+        path: &str,
+        size: u64,
+        body: bytes::Bytes,
+    ) -> Result<Response<IncomingAsyncBody>> {
+        let parent = self.ensure_parent_path(path).await?;
+
+        let url = 
"https://www.googleapis.com/upload/drive/v3/files?uploadType=multipart";;
+
+        let file_name = path.split('/').filter(|&x| 
!x.is_empty()).last().unwrap();
+
+        let metadata = &json!({
+            "name": file_name,
+            "parents": [parent],
+        });
+
+        let req = Request::post(url).header("X-Upload-Content-Length", size);
+
+        let multipart = Multipart::new()
+            .part(
+                FormDataPart::new("metadata")
+                    .header(
+                        header::CONTENT_TYPE,
+                        "application/json; charset=UTF-8".parse().unwrap(),
+                    )
+                    .content(metadata.to_string()),
+            )
+            .part(
+                FormDataPart::new("file")
+                    .header(
+                        header::CONTENT_TYPE,
+                        "application/octet-stream".parse().unwrap(),
+                    )
+                    .content(body),
+            );
+
+        let mut req = multipart.apply(req)?;
+
+        req = self.sign(req);
+
         self.client.send(req).await
     }
 
-    fn sign(&self, mut req: Builder) -> Builder {
+    pub async fn gdrive_upload_overwrite_simple_request(
+        &self,
+        file_id: &str,
+        size: u64,
+        body: bytes::Bytes,
+    ) -> Result<Response<IncomingAsyncBody>> {
+        let url = format!(
+            
"https://www.googleapis.com/upload/drive/v3/files/{}?uploadType=media";,
+            file_id
+        );
+
+        let mut req = Request::patch(url)
+            .header(header::CONTENT_TYPE, "application/octet-stream")
+            .header(header::CONTENT_LENGTH, size)
+            .header("X-Upload-Content-Length", size)
+            .body(AsyncBody::Bytes(body))
+            .map_err(new_request_build_error)?;
+
+        req = self.sign(req);
+
+        self.client.send(req).await
+    }
+
+    fn sign<T>(&self, mut req: Request<T>) -> Request<T> {
         let auth_header_content = format!("Bearer {}", self.access_token);
-        req = req.header(header::AUTHORIZATION, auth_header_content);
+        req.headers_mut().insert(
+            header::AUTHORIZATION,
+            auth_header_content
+                .parse()
+                .expect("failed to parse auth header"),
+        );
         req
     }
 }
 
+// This is the file struct returned by the Google Drive API.
+// This is a complex struct, but we only add the fields we need.
 // refer to 
https://developers.google.com/drive/api/reference/rest/v3/files#File
 #[derive(Deserialize)]
-struct GdriveFile {
-    id: String,
+#[serde(rename_all = "camelCase")]
+pub struct GdriveFile {
+    pub mime_type: String,
+    pub id: String,
+    pub name: String,
+    #[serde(default)]
+    pub size: String,

Review Comment:
   If the field could be empty, we should use `Option<String>` here.



##########
core/src/services/gdrive/core.rs:
##########
@@ -190,29 +326,112 @@ impl GdriveCore {
             self.get_file_id_by_path(path).await?
         );
 
-        let req = self
-            .sign(Request::delete(&url))
+        let mut req = Request::delete(&url)
             .body(AsyncBody::Empty)
             .map_err(new_request_build_error)?;
 
+        req = self.sign(req);
+
+        self.client.send(req).await
+    }
+
+    pub async fn gdrive_upload_simple_request(
+        &self,
+        path: &str,
+        size: u64,
+        body: bytes::Bytes,
+    ) -> Result<Response<IncomingAsyncBody>> {
+        let parent = self.ensure_parent_path(path).await?;
+
+        let url = 
"https://www.googleapis.com/upload/drive/v3/files?uploadType=multipart";;
+
+        let file_name = path.split('/').filter(|&x| 
!x.is_empty()).last().unwrap();
+
+        let metadata = &json!({
+            "name": file_name,
+            "parents": [parent],
+        });
+
+        let req = Request::post(url).header("X-Upload-Content-Length", size);
+
+        let multipart = Multipart::new()
+            .part(
+                FormDataPart::new("metadata")
+                    .header(
+                        header::CONTENT_TYPE,
+                        "application/json; charset=UTF-8".parse().unwrap(),
+                    )
+                    .content(metadata.to_string()),
+            )
+            .part(
+                FormDataPart::new("file")
+                    .header(
+                        header::CONTENT_TYPE,
+                        "application/octet-stream".parse().unwrap(),
+                    )
+                    .content(body),
+            );
+
+        let mut req = multipart.apply(req)?;
+
+        req = self.sign(req);
+
         self.client.send(req).await
     }
 
-    fn sign(&self, mut req: Builder) -> Builder {
+    pub async fn gdrive_upload_overwrite_simple_request(
+        &self,
+        file_id: &str,
+        size: u64,
+        body: bytes::Bytes,
+    ) -> Result<Response<IncomingAsyncBody>> {
+        let url = format!(
+            
"https://www.googleapis.com/upload/drive/v3/files/{}?uploadType=media";,
+            file_id
+        );
+
+        let mut req = Request::patch(url)
+            .header(header::CONTENT_TYPE, "application/octet-stream")
+            .header(header::CONTENT_LENGTH, size)
+            .header("X-Upload-Content-Length", size)
+            .body(AsyncBody::Bytes(body))
+            .map_err(new_request_build_error)?;
+
+        req = self.sign(req);
+
+        self.client.send(req).await
+    }
+
+    fn sign<T>(&self, mut req: Request<T>) -> Request<T> {
         let auth_header_content = format!("Bearer {}", self.access_token);
-        req = req.header(header::AUTHORIZATION, auth_header_content);
+        req.headers_mut().insert(
+            header::AUTHORIZATION,
+            auth_header_content
+                .parse()
+                .expect("failed to parse auth header"),

Review Comment:
   `access_token` is provided by users so it's possible to be invalid.
   
   Thus, our sign API should be `sign<T>(&self, req: &mut Request<T>) -> 
Result<()>`.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to