Is `iceberg-test/warehouse/test/metadata` a parquet file? I only ask because there is no extension. The commented out FileSystemDatasetFactory is only accessing bucket_uri so it would potentially succeed even if the metadata file did not exist.
On Fri, Apr 8, 2022 at 1:48 AM 1057445597 <1057445...@qq.com.invalid> wrote: > > I want use ParquetDatasetFactory to create a dataset for s3, but failed! The > error message as follows > > > /build/apache-arrow-7.0.0/cpp/src/arrow/result.cc:28: ValueOrDie called on an > error: IOError: Path does not exist 'iceberg-test/warehouse/test/metadata' > /lib/x86_64-linux-gnu/libarrow.so.700(+0x10430bb)[0x7f4ee6fe50bb] > /lib/x86_64-linux-gnu/libarrow.so.700(_ZN5arrow4util8ArrowLogD1Ev+0xed)[0x7f4ee6fe52fd] > > /lib/x86_64-linux-gnu/libarrow.so.700(_ZN5arrow8internal17InvalidValueOrDieERKNS_6StatusE+0x17e)[0x7f4ee7104a2e] > ./example(+0xd97d)[0x564087f3e97d] ./example(+0x8bc2)[0x564087f39bc2] > ./example(+0x94c8)[0x564087f3a4c8] ./example(+0x9fb4)[0x564087f3afb4] > /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf3)[0x7f4ee572b0b3] > ./example(+0x69fe)[0x564087f379fe] Aborted (core dumped) > > > In the follow code snippet,There is a line of comment code,use > FileSystemDatasetFactory to create dataset, It works well, Can't a dataset be > created through a ParquetDatasetFactory?? > > > std::shared_ptr<ds::Dataset> GetDatasetFromS3(const std::string& > access_key, > const std::string& > secret_key, > const std::string& > endpoint_override, > const std::string& > bucket_uri) { > EnsureS3Initialized(); > > S3Options s3Options = S3Options::FromAccessKey(access_key, secret_key); > s3Options.endpoint_override = endpoint_override; > s3Options.scheme = "http"; > > std::shared_ptr<S3FileSystem> s3fs = > S3FileSystem::Make(s3Options).ValueOrDie(); > > std::string path; > std::stringstream ss; > ss << "s3://" << access_key << ":" << secret_key > << "@" << K_METADATA_PATH > << "?scheme=http&endpoint_override=" << endpoint_override; > auto fs = arrow::fs::FileSystemFromUri(ss.str(), &path).ValueOrDie(); > // auto fileInfo = fs->GetFileInfo().ValueOrDie(); > > auto format = std::make_shared<ParquetFileFormat>(); > > // FileSelector selector; > // selector.base_dir = bucket_uri; > > // FileSystemFactoryOptions options; > ds::ParquetFactoryOptions options; > > std::string metadata_path = bucket_uri; > > ds::FileSource source(bucket_uri, s3fs); > //auto factory = ds::ParquetDatasetFactory::Make(source, bucket_uri, fs, > format, options).ValueOrDie(); > auto factory = ds::ParquetDatasetFactory::Make(path, fs, format, > options).ValueOrDie(); > > //auto factory = FileSystemDatasetFactory::Make(s3fs, selector, format, > options).ValueOrDie(); > return factory->Finish().ValueOrDie(); > }