helxsz commented on issue #9280:
URL:
https://github.com/apache/arrow-datafusion/issues/9280#issuecomment-1959110383
I used objectstore with s3 which is working well.
```
object_store = { version = "0.9.0", features =["aws"] }
datafusion = { git = "https://github.com/apache/arrow-datafusion", branch =
"main", features = ["backtrace"]}
datafusion-common = { version = "35.0.0" }
```
```
async fn get_data_multiple_parquet_s3() -> Result<(), Box<dyn
std::error::Error>> {
// create local execution context
let ctx = SessionContext::new();
let s3 = AmazonS3Builder::new()
.with_bucket_name(DEFAULT_S3_BUCKET)
//.with_region(DEFAULT_S3_REGION)
.with_access_key_id(DEFAULT_S3_ACCESS_KEY)
.with_secret_access_key(DEFAULT_S3_SECRET_KEY)
.with_endpoint(DEFAULT_S3_URL)
.with_allow_http(true)
.build()?;
const MAX_OBJECT_STORE_REQUESTS: usize = 1000;
let s3 = LimitStore::new(s3, MAX_OBJECT_STORE_REQUESTS);
let bucket_name = "test";
let path = format!("s3://{bucket_name}");
let s3_url = Url::parse(&path).unwrap();
ctx.runtime_env().register_object_store( &s3_url , Arc::new(s3));
// Configure listing options
let out_path = format!("s3://{bucket_name}/test_write/");
let file_format = ParquetFormat::default().with_enable_pruning(Some(true));
let listing_options = ListingOptions::new(Arc::new(file_format))
.with_file_extension(".parquet");
// Register a listing table - this will use all files in the directory as
data sources for the query
ctx.register_listing_table(
"my_table",
&out_path,
listing_options,
None,
None,
)
.await
.unwrap();
println!("get_data_multiple_parquet_s3 。。。");
// execute the query
let df = ctx
.sql("SELECT * \
FROM my_table \
",
)
.await?;
// print the results
df.show().await?;
Ok(())
}
```
however trying with local file system with absolute path not working with an
error saying:
called `Result::unwrap()` on an `Err` value: ObjectStore(NotFound { path:
"/Users/Desktop/rust/rustapi-master/resources/user", source: Custom { kind:
NotFound, error: "is directory" } })
on line
```
ctx.register_listing_table(
"my_table",
&format!("file://{}","/Users/Desktop/rust/rustapi-master/resources/user"),
listing_options,
None,
None,
)
.await
.unwrap();
```
full code in filesystem
```
async fn get_listing_file() -> Result<(), Box<dyn std::error::Error>> {
// create local execution context
let ctx = SessionContext::new();
use object_store::local::LocalFileSystem;
let local = LocalFileSystem::default();
let url =
url::Url::parse(&format!("file://{}","/Users/Desktop/rust/rustapi-master/resources/user"))?;
ctx.runtime_env().register_object_store(&url, Arc::new(local));
// Configure listing options
let file_format = ParquetFormat::default().with_enable_pruning(Some(true));
let listing_options = ListingOptions::new(Arc::new(file_format))
//.with_table_partition_cols(vec![])
.with_file_extension(".parquet");
//.with_collect_stat(true);
// Register a listing table - this will use all files in the directory as
data sources for the query
ctx.register_listing_table(
"my_table",
&format!("file://{}","/Users/Desktop/rust/rustapi-master/resources/user"),
listing_options,
None,
None,
)
.await
.unwrap();
/**/
// execute the query
let df = ctx
.sql("SELECT * \
FROM my_table \
LIMIT 1",
)
.await?;
// print the results
df.show().await?;
Ok(())
}
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]