helxsz commented on issue #9280:
URL: 
https://github.com/apache/arrow-datafusion/issues/9280#issuecomment-1959110383

   I used objectstore with s3 which is working well.
   
   ```
   object_store = { version = "0.9.0", features =["aws"] }
   datafusion = { git = "https://github.com/apache/arrow-datafusion";, branch = 
"main", features = ["backtrace"]}
   datafusion-common = { version = "35.0.0" }
   ```
   
   ```
   async fn get_data_multiple_parquet_s3() -> Result<(), Box<dyn 
std::error::Error>> {
   
     // create local execution context
     let ctx = SessionContext::new();
   
     let s3 = AmazonS3Builder::new()
         .with_bucket_name(DEFAULT_S3_BUCKET)
         //.with_region(DEFAULT_S3_REGION)
         .with_access_key_id(DEFAULT_S3_ACCESS_KEY)
         .with_secret_access_key(DEFAULT_S3_SECRET_KEY)
         .with_endpoint(DEFAULT_S3_URL)
         .with_allow_http(true)
         .build()?;
   
     const MAX_OBJECT_STORE_REQUESTS: usize = 1000;
   
     let s3 = LimitStore::new(s3, MAX_OBJECT_STORE_REQUESTS);
   
     let bucket_name = "test";
     let path = format!("s3://{bucket_name}");
     let s3_url = Url::parse(&path).unwrap();
   
     ctx.runtime_env().register_object_store( &s3_url , Arc::new(s3));
     
     // Configure listing options
     let out_path = format!("s3://{bucket_name}/test_write/");
   
     let file_format = ParquetFormat::default().with_enable_pruning(Some(true));
     let listing_options = ListingOptions::new(Arc::new(file_format))
     .with_file_extension(".parquet");
   
     // Register a listing table - this will use all files in the directory as 
data sources for the query
     ctx.register_listing_table(
         "my_table",
         &out_path,
         listing_options,
         None,
         None,
     )
     .await
     .unwrap();
   
   println!("get_data_multiple_parquet_s3 。。。");
   
     // execute the query
     let df = ctx
         .sql("SELECT * \
         FROM my_table \
         ",
         )
         .await?;
   
     // print the results
     df.show().await?;
   
     Ok(())
   }
   ```
   however trying with local file system with absolute path not working with an 
error saying: 
   
   called `Result::unwrap()` on an `Err` value: ObjectStore(NotFound { path: 
"/Users/Desktop/rust/rustapi-master/resources/user", source: Custom { kind: 
NotFound, error: "is directory" } })
   
   on line 
   
   ```
     ctx.register_listing_table(
         "my_table",
         
&format!("file://{}","/Users/Desktop/rust/rustapi-master/resources/user"),
         listing_options,
         None,
         None,
     )
     .await
     .unwrap();
   ```
   
   full code in filesystem
   
   ```
   async fn get_listing_file() -> Result<(), Box<dyn std::error::Error>> {
   
     // create local execution context
     let ctx = SessionContext::new();
   
     use object_store::local::LocalFileSystem;
   
     let local = LocalFileSystem::default();
     let url = 
url::Url::parse(&format!("file://{}","/Users/Desktop/rust/rustapi-master/resources/user"))?;
     
     ctx.runtime_env().register_object_store(&url, Arc::new(local));
     
     // Configure listing options
      
     let file_format = ParquetFormat::default().with_enable_pruning(Some(true));
     let listing_options = ListingOptions::new(Arc::new(file_format))
     //.with_table_partition_cols(vec![])
     .with_file_extension(".parquet");
     //.with_collect_stat(true);
   
     // Register a listing table - this will use all files in the directory as 
data sources for the query
     ctx.register_listing_table(
         "my_table",
         
&format!("file://{}","/Users/Desktop/rust/rustapi-master/resources/user"),
         listing_options,
         None,
         None,
     )
     .await
     .unwrap();
   /**/
   
     // execute the query
     let df = ctx
         .sql("SELECT * \
         FROM my_table \
         LIMIT 1",
         )
         .await?;
   
     // print the results
     df.show().await?;
   
     Ok(())
   }
   ```
   
   
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to