comphead commented on code in PR #3802:
URL: https://github.com/apache/datafusion-comet/pull/3802#discussion_r3060602315
##########
native/core/src/parquet/parquet_support.rs:
##########
@@ -467,17 +520,45 @@ pub(crate) fn prepare_object_store_with_configs(
&url[url::Position::BeforeHost..url::Position::AfterPort],
);
- let (object_store, object_store_path): (Box<dyn ObjectStore>, Path) = if
is_hdfs_scheme {
- create_hdfs_object_store(&url)
- } else if scheme == "s3" {
- objectstore::s3::create_store(&url, object_store_configs,
Duration::from_secs(300))
- } else {
- parse_url(&url)
- }
- .map_err(|e| ExecutionError::GeneralError(e.to_string()))?;
+ let config_hash = hash_object_store_configs(object_store_configs);
+ let cache_key = (url_key.clone(), config_hash);
+
+ // Check the cache first to reuse existing object store instances.
+ // This enables HTTP connection pooling and avoids redundant DNS lookups.
+ let cached = {
+ let cache = object_store_cache()
+ .read()
+ .map_err(|e| ExecutionError::GeneralError(format!("Object store
cache error: {e}")))?;
+ cache.get(&cache_key).cloned()
+ };
+
+ let (object_store, object_store_path): (Arc<dyn ObjectStore>, Path) =
+ if let Some(store) = cached {
+ debug!("Reusing cached object store for {url_key}");
+ let path = Path::from_url_path(url.path())
+ .map_err(|e| ExecutionError::GeneralError(e.to_string()))?;
+ (store, path)
+ } else {
+ debug!("Creating new object store for {url_key}");
+ let (store, path): (Box<dyn ObjectStore>, Path) = if
is_hdfs_scheme {
+ create_hdfs_object_store(&url)
+ } else if scheme == "s3" {
+ objectstore::s3::create_store(&url, object_store_configs,
Duration::from_secs(300))
+ } else {
+ parse_url(&url)
+ }
+ .map_err(|e| ExecutionError::GeneralError(e.to_string()))?;
+
+ let store: Arc<dyn ObjectStore> = Arc::from(store);
+ // Insert into cache
+ if let Ok(mut cache) = object_store_cache().write() {
Review Comment:
why the `write()` is here?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]