This is an automated email from the ASF dual-hosted git repository.

xushiyan pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/hudi-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new a8419ab  feat: support picking up storage options from `HOODIE_ENV_` 
env vars (#410)
a8419ab is described below

commit a8419ab5f53306fd5df78724001e06d59ee30125
Author: Yunchi Pang <[email protected]>
AuthorDate: Sat Sep 27 12:32:56 2025 -0700

    feat: support picking up storage options from `HOODIE_ENV_` env vars (#410)
    
    
    ---------
    
    Signed-off-by: Yunchi Pang <[email protected]>
    Co-authored-by: Shiyan Xu <[email protected]>
---
 crates/core/src/table/builder.rs | 96 ++++++++++++++++++++++++++++++++++------
 1 file changed, 83 insertions(+), 13 deletions(-)

diff --git a/crates/core/src/table/builder.rs b/crates/core/src/table/builder.rs
index 8b5bf92..d6b61b4 100644
--- a/crates/core/src/table/builder.rs
+++ b/crates/core/src/table/builder.rs
@@ -19,7 +19,6 @@
 
 use paste::paste;
 use std::collections::HashMap;
-use std::env;
 use std::path::PathBuf;
 use std::sync::Arc;
 
@@ -159,10 +158,13 @@ impl OptionResolver {
     /// env vars, and global Hudi configs. The precedence order is as follows:
     ///
     /// 1. hoodie.properties
-    /// 2. Explicit options provided by the user
-    /// 3. Generic options provided by the user
-    /// 4. Env vars
-    /// 5. Global Hudi configs
+    /// 2. User-provided options
+    ///    - Explicit Hudi options provided by the user
+    ///    - Generic options provided by the user
+    /// 3. Env vars for storage options
+    ///    - With HOODIE_ENV_ prefix
+    ///    - Cloud storage options specified via env vars
+    /// 4. Global Hudi configs
     ///
     /// [note] Error may occur when 1 and 2 have conflicts.
     pub async fn resolve_options(&mut self) -> Result<()> {
@@ -170,7 +172,7 @@ impl OptionResolver {
 
         // If any user-provided options are intended for cloud storage and in 
uppercase,
         // convert them to lowercase. This is to allow `object_store` to pick 
them up.
-        self.resolve_cloud_env_vars();
+        self.resolve_env_vars();
 
         // At this point, we have resolved the storage options needed for 
accessing the storage layer.
         // We can now resolve the hudi options
@@ -197,14 +199,27 @@ impl OptionResolver {
         extend_if_absent(&mut self.storage_options, &generic_other_opts)
     }
 
-    fn resolve_cloud_env_vars(&mut self) {
-        for (key, value) in env::vars() {
-            if Storage::CLOUD_STORAGE_PREFIXES
+    /// Resolve env vars for keys starting with `HOODIE_ENV_` or cloud storage 
options.
+    ///
+    /// For example: `HOODIE_ENV_fs_DOT_s3a_DOT_access_DOT_key` will be 
converted to `fs.s3a.access.key`.
+    ///
+    /// Also supports standard cloud storage env vars like 
`AWS_ACCESS_KEY_ID`, `GOOGLE_APPLICATION_CREDENTIALS`, 
`AZURE_STORAGE_ACCOUNT_KEY`, etc.
+    ///
+    /// [note] All keys will be converted to lowercase.
+    fn resolve_env_vars(&mut self) {
+        for (env_key, env_value) in std::env::vars() {
+            let lower_option_key = if let Some(stripped) = 
env_key.strip_prefix("HOODIE_ENV_") {
+                Some(stripped.replace("_DOT_", ".").to_ascii_lowercase())
+            } else if Storage::CLOUD_STORAGE_PREFIXES
                 .iter()
-                .any(|prefix| key.starts_with(prefix))
-                && 
!self.storage_options.contains_key(&key.to_ascii_lowercase())
+                .any(|prefix| env_key.starts_with(prefix))
             {
-                self.storage_options.insert(key.to_ascii_lowercase(), value);
+                Some(env_key.to_ascii_lowercase())
+            } else {
+                None
+            };
+            if let Some(key) = lower_option_key {
+                self.storage_options.entry(key).or_insert(env_value);
             }
         }
     }
@@ -247,7 +262,7 @@ impl OptionResolver {
         options: &mut HashMap<String, String>,
         storage: Arc<Storage>,
     ) -> Result<()> {
-        let global_config_path = env::var(HUDI_CONF_DIR)
+        let global_config_path = std::env::var(HUDI_CONF_DIR)
             .map(PathBuf::from)
             .unwrap_or_else(|_| PathBuf::from("/etc/hudi/conf"))
             .join("hudi-defaults.conf");
@@ -368,4 +383,59 @@ mod tests {
             "s3.us-east-1.amazonaws.com"
         );
     }
+
+    #[test]
+    fn test_resolve_cloud_env_vars_with_hudi_style() {
+        std::env::remove_var("HOODIE_ENV_fs_DOT_s3a_DOT_access_DOT_key");
+        std::env::remove_var("HOODIE_ENV_fs_DOT_s3a_DOT_secret_DOT_key");
+
+        std::env::set_var(
+            "HOODIE_ENV_fs_DOT_s3a_DOT_access_DOT_key",
+            "test_access_key",
+        );
+        std::env::set_var(
+            "HOODIE_ENV_fs_DOT_s3a_DOT_secret_DOT_key",
+            "test_secret_key",
+        );
+
+        let mut resolver = OptionResolver::new("test_uri");
+        resolver.resolve_env_vars();
+
+        assert_eq!(
+            resolver.storage_options.get("fs.s3a.access.key"),
+            Some(&"test_access_key".to_string())
+        );
+        assert_eq!(
+            resolver.storage_options.get("fs.s3a.secret.key"),
+            Some(&"test_secret_key".to_string())
+        );
+
+        std::env::remove_var("HOODIE_ENV_fs_DOT_s3a_DOT_access_DOT_key");
+        std::env::remove_var("HOODIE_ENV_fs_DOT_s3a_DOT_secret_DOT_key");
+    }
+
+    #[test]
+    fn test_resolve_cloud_env_vars_precedence() {
+        std::env::remove_var("HOODIE_ENV_fs_DOT_s3a_DOT_access_DOT_key");
+        std::env::remove_var("AWS_ACCESS_KEY_ID");
+
+        // Test that manually set storage options take precedence over env vars
+        std::env::set_var("HOODIE_ENV_fs_DOT_s3a_DOT_access_DOT_key", 
"env_access_key");
+        std::env::set_var("AWS_ACCESS_KEY_ID", "standard_access_key");
+
+        let mut resolver = OptionResolver::new("test_uri");
+        resolver.storage_options.insert(
+            "fs.s3a.access.key".to_string(),
+            "manual_access_key".to_string(),
+        );
+        resolver.resolve_env_vars();
+
+        assert_eq!(
+            resolver.storage_options.get("fs.s3a.access.key"),
+            Some(&"manual_access_key".to_string())
+        );
+
+        std::env::remove_var("HOODIE_ENV_fs_DOT_s3a_DOT_access_DOT_key");
+        std::env::remove_var("AWS_ACCESS_KEY_ID");
+    }
 }

Reply via email to