This is an automated email from the ASF dual-hosted git repository.
xuanwo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/opendal.git
The following commit(s) were added to refs/heads/main by this push:
new 577912dfe refactor: remove uuid dependency when creating a temp path
(#6324)
577912dfe is described below
commit 577912dfeaa2fcdb0de794607d8c45bf76cb419b
Author: Erick Guan <[email protected]>
AuthorDate: Sun Jun 22 05:24:03 2025 +0200
refactor: remove uuid dependency when creating a temp path (#6324)
---
core/src/raw/path.rs | 69 +++++++++++++++++++++++++++++++++++++++
core/src/services/fs/backend.rs | 20 ------------
core/src/services/fs/core.rs | 31 +++++-------------
core/src/services/ftp/backend.rs | 9 +----
core/src/services/hdfs/backend.rs | 18 +++-------
5 files changed, 83 insertions(+), 64 deletions(-)
diff --git a/core/src/raw/path.rs b/core/src/raw/path.rs
index c49c94e3e..55733f47c 100644
--- a/core/src/raw/path.rs
+++ b/core/src/raw/path.rs
@@ -16,6 +16,7 @@
// under the License.
use crate::*;
+use std::hash::{BuildHasher, Hasher};
/// build_abs_path will build an absolute path with root.
///
@@ -210,6 +211,49 @@ pub fn get_parent(path: &str) -> &str {
}
}
+// Sets the size of random generated postfix for random file names
+const RANDOM_TMP_PATH_POSTFIX_LENGTH: usize = 8;
+// Allowed characters for choices in a random-generated char
+const CHARS: &[u8] =
b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
+const CHARS_LENGTH: u64 = CHARS.len() as u64;
+
+/// Build a temporary path of a file path.
+///
+/// `build_tmp_path_of` appends a dot following a random generated postfix.
+/// Don't use it with a path to a folder.
+#[inline]
+pub fn build_tmp_path_of(path: &str) -> String {
+ let name = get_basename(path);
+ let mut buf = String::with_capacity(name.len() +
RANDOM_TMP_PATH_POSTFIX_LENGTH);
+ buf.push_str(name);
+ buf.push('.');
+
+ // Uses `std` for the random number generator instead of external crates.
+ //
+ // `RandomState::new` builds a hasher that generates a different random
sequence each time.
+ // Calling `RandomState::new` each time produces a `RandomState` from
+ // a per-thread pseudo seed pools that Rust manages.
+ // The default hasher, `SipHasher13`, has some notable properties:
+ //
+ // 1. `fastrand` is roughly 10x faster than `SipHasher13`, but it adds an
extra dependency.
+ // 2. While `fastrand` is faster, `SipHasher13` is fast enough for our
needs since
+ // we're only generating a few characters.
+ // 3. This is not a cryptographically secure pseudorandom number generator
(CSPRNG).
+ //
+ // If we need stronger randomness in the future, we can:
+ // 1. Increase the output length.
+ // 2. Use the `getrandom` crate to source randomness from the OS.
+ for _ in 0..RANDOM_TMP_PATH_POSTFIX_LENGTH {
+ let random = std::collections::hash_map::RandomState::new()
+ .build_hasher()
+ .finish();
+ let choice: usize = (random % CHARS_LENGTH).try_into().unwrap();
+ buf.push(CHARS[choice] as char);
+ }
+
+ buf
+}
+
/// Validate given path is match with given EntryMode.
#[inline]
pub fn validate_path(path: &str, mode: EntryMode) -> bool {
@@ -379,4 +423,29 @@ mod tests {
assert_eq!(actual, expect, "{name}")
}
}
+
+ #[test]
+ fn test_build_tmp_path_of() {
+ let cases = vec![
+ ("a file path", "example.txt", "example.txt."),
+ (
+ "a file path in a directory",
+ "folder/example.txt",
+ "example.txt.",
+ ),
+ ];
+
+ for (name, path, expect_starts_with) in cases {
+ let actual = build_tmp_path_of(path);
+ assert!(
+ actual.starts_with(expect_starts_with),
+ "{name}: got `{actual}`, but expect `{expect_starts_with}`"
+ );
+ assert_eq!(
+ actual.len(),
+ expect_starts_with.len() + 8, // See
RANDOM_TMP_PATH_POSTFIX_SIZE
+ "{name}: got `{actual}`, but expect `{expect_starts_with}`"
+ )
+ }
+ }
}
diff --git a/core/src/services/fs/backend.rs b/core/src/services/fs/backend.rs
index 08e679cd9..9f6762d6e 100644
--- a/core/src/services/fs/backend.rs
+++ b/core/src/services/fs/backend.rs
@@ -275,23 +275,3 @@ impl Access for FsBackend {
Ok(RpRename::default())
}
}
-
-#[cfg(test)]
-mod tests {
- use super::*;
-
- #[test]
- fn test_tmp_file_of() {
- let cases = vec![
- ("hello.txt", "hello.txt"),
- ("/tmp/opendal.log", "opendal.log"),
- ("/abc/def/hello.parquet", "hello.parquet"),
- ];
-
- for (path, expected_prefix) in cases {
- let tmp_file = tmp_file_of(path);
- assert!(tmp_file.len() > expected_prefix.len());
- assert!(tmp_file.starts_with(expected_prefix));
- }
- }
-}
diff --git a/core/src/services/fs/core.rs b/core/src/services/fs/core.rs
index 40cb50a63..eb29b5e4c 100644
--- a/core/src/services/fs/core.rs
+++ b/core/src/services/fs/core.rs
@@ -21,7 +21,6 @@ use std::path::PathBuf;
use std::sync::Arc;
use chrono::DateTime;
-use uuid::Uuid;
use super::error::*;
use crate::raw::*;
@@ -118,28 +117,24 @@ impl FsCore {
path: &str,
op: &OpWrite,
) -> Result<(PathBuf, Option<PathBuf>)> {
- let (target_path, tmp_path) = if let Some(atomic_write_dir) =
&self.atomic_write_dir {
+ if let Some(atomic_write_dir) = &self.atomic_write_dir {
let target_path = self.ensure_write_abs_path(&self.root,
path).await?;
let tmp_path = self
- .ensure_write_abs_path(atomic_write_dir, &tmp_file_of(path))
+ .ensure_write_abs_path(atomic_write_dir,
&build_tmp_path_of(path))
.await?;
// If the target file exists, we should append to the end of it
directly.
- if op.append()
+ let should_append = op.append()
&& tokio::fs::try_exists(&target_path)
.await
- .map_err(new_std_io_error)?
- {
- (target_path, None)
- } else {
- (target_path, Some(tmp_path))
- }
+ .map_err(new_std_io_error)?;
+ let tmp_path = should_append.then_some(tmp_path);
+
+ Ok((target_path, tmp_path))
} else {
let p = self.ensure_write_abs_path(&self.root, path).await?;
- (p, None)
- };
-
- Ok((target_path, tmp_path))
+ Ok((p, None))
+ }
}
pub async fn fs_write(
@@ -236,11 +231,3 @@ impl FsCore {
Ok(())
}
}
-
-#[inline]
-pub fn tmp_file_of(path: &str) -> String {
- let name = get_basename(path);
- let uuid = Uuid::new_v4().to_string();
-
- format!("{name}.{uuid}")
-}
diff --git a/core/src/services/ftp/backend.rs b/core/src/services/ftp/backend.rs
index 960efee76..24324e072 100644
--- a/core/src/services/ftp/backend.rs
+++ b/core/src/services/ftp/backend.rs
@@ -29,7 +29,6 @@ use suppaftp::types::Response;
use suppaftp::FtpError;
use suppaftp::Status;
use tokio::sync::OnceCell;
-use uuid::Uuid;
use super::core::FtpCore;
use super::delete::FtpDeleter;
@@ -300,13 +299,7 @@ impl Access for FtpBackend {
}
}
- let tmp_path = if op.append() {
- None
- } else {
- let uuid = Uuid::new_v4().to_string();
- Some(format!("{}.{}", path, uuid))
- };
-
+ let tmp_path = op.append().then_some(build_tmp_path_of(path));
let w = FtpWriter::new(ftp_stream, path.to_string(), tmp_path);
Ok((RpWrite::new(), w))
diff --git a/core/src/services/hdfs/backend.rs
b/core/src/services/hdfs/backend.rs
index bd3275991..53136ef33 100644
--- a/core/src/services/hdfs/backend.rs
+++ b/core/src/services/hdfs/backend.rs
@@ -23,7 +23,6 @@ use std::path::PathBuf;
use std::sync::Arc;
use log::debug;
-use uuid::Uuid;
use super::delete::HdfsDeleter;
use super::lister::HdfsLister;
@@ -207,14 +206,6 @@ impl Builder for HdfsBuilder {
}
}
-#[inline]
-fn tmp_file_of(path: &str) -> String {
- let name = get_basename(path);
- let uuid = Uuid::new_v4().to_string();
-
- format!("{name}.{uuid}")
-}
-
/// Backend for hdfs services.
#[derive(Debug, Clone)]
pub struct HdfsBackend {
@@ -309,11 +300,10 @@ impl Access for HdfsBackend {
let should_append = op.append() && target_exists;
let tmp_path =
self.atomic_write_dir.as_ref().and_then(|atomic_write_dir| {
// If the target file exists, we should append to the end of it
directly.
- if should_append {
- None
- } else {
- Some(build_rooted_abs_path(atomic_write_dir,
&tmp_file_of(path)))
- }
+ should_append.then_some(build_rooted_abs_path(
+ atomic_write_dir,
+ &build_tmp_path_of(path),
+ ))
});
if !target_exists {