[GitHub] [arrow] fsaintjacques commented on a change in pull request #7073: ARROW-8318: [C++][Dataset] Construct FileSystemDataset from fragments
fsaintjacques commented on a change in pull request #7073: URL: https://github.com/apache/arrow/pull/7073#discussion_r418564299 ## File path: cpp/src/arrow/dataset/file_base.cc ## @@ -83,131 +83,67 @@ Result FileFragment::Scan(std::shared_ptr options FileSystemDataset::FileSystemDataset(std::shared_ptr schema, std::shared_ptr root_partition, - std::shared_ptr format, - std::shared_ptr filesystem, - fs::PathForest forest, - ExpressionVector file_partitions) + std::vector> fragments) : Dataset(std::move(schema), std::move(root_partition)), - format_(std::move(format)), - filesystem_(std::move(filesystem)), - forest_(std::move(forest)), - partitions_(std::move(file_partitions)) { - DCHECK_EQ(static_cast(forest_.size()), partitions_.size()); -} - -Result> FileSystemDataset::Make( -std::shared_ptr schema, std::shared_ptr root_partition, -std::shared_ptr format, std::shared_ptr filesystem, -std::vector infos) { - ExpressionVector partitions(infos.size(), scalar(true)); - return Make(std::move(schema), std::move(root_partition), std::move(format), - std::move(filesystem), std::move(infos), std::move(partitions)); -} + fragments_(std::move(fragments)) {} Result> FileSystemDataset::Make( std::shared_ptr schema, std::shared_ptr root_partition, -std::shared_ptr format, std::shared_ptr filesystem, -std::vector infos, ExpressionVector partitions) { - ARROW_ASSIGN_OR_RAISE(auto forest, fs::PathForest::Make(std::move(infos), )); - return Make(std::move(schema), std::move(root_partition), std::move(format), - std::move(filesystem), std::move(forest), std::move(partitions)); -} +FragmentVector fragments) { + std::vector> file_fragments; + for (const auto& fragment : fragments) { +auto file_fragment = internal::checked_pointer_cast(fragment); Review comment: Resolved by changing the to vector and moving the checks at callsite. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [arrow] fsaintjacques commented on a change in pull request #7073: ARROW-8318: [C++][Dataset] Construct FileSystemDataset from fragments
fsaintjacques commented on a change in pull request #7073: URL: https://github.com/apache/arrow/pull/7073#discussion_r418556583 ## File path: cpp/src/arrow/dataset/file_base.cc ## @@ -221,42 +157,34 @@ Result> FileSystemDataset::Write( filesystem = std::make_shared(); } - std::vector files(plan.paths.size()); - ExpressionVector partition_expressions(plan.paths.size(), scalar(true)); auto task_group = scan_context->TaskGroup(); - auto partition_base_dir = fs::internal::EnsureTrailingSlash(plan.partition_base_dir); auto extension = "." + plan.format->type_name(); + FragmentVector fragments; for (size_t i = 0; i < plan.paths.size(); ++i) { const auto& op = plan.fragment_or_partition_expressions[i]; -if (util::holds_alternative>(op)) { - files[i].set_type(fs::FileType::Directory); - files[i].set_path(partition_base_dir + plan.paths[i]); +if (util::holds_alternative>(op)) { Review comment: This will be done part of ARROW-8382. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [arrow] fsaintjacques commented on a change in pull request #7073: ARROW-8318: [C++][Dataset] Construct FileSystemDataset from fragments
fsaintjacques commented on a change in pull request #7073: URL: https://github.com/apache/arrow/pull/7073#discussion_r418556583 ## File path: cpp/src/arrow/dataset/file_base.cc ## @@ -221,42 +157,34 @@ Result> FileSystemDataset::Write( filesystem = std::make_shared(); } - std::vector files(plan.paths.size()); - ExpressionVector partition_expressions(plan.paths.size(), scalar(true)); auto task_group = scan_context->TaskGroup(); - auto partition_base_dir = fs::internal::EnsureTrailingSlash(plan.partition_base_dir); auto extension = "." + plan.format->type_name(); + FragmentVector fragments; for (size_t i = 0; i < plan.paths.size(); ++i) { const auto& op = plan.fragment_or_partition_expressions[i]; -if (util::holds_alternative>(op)) { - files[i].set_type(fs::FileType::Directory); - files[i].set_path(partition_base_dir + plan.paths[i]); +if (util::holds_alternative>(op)) { Review comment: This will be part of ARROW-8382. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [arrow] fsaintjacques commented on a change in pull request #7073: ARROW-8318: [C++][Dataset] Construct FileSystemDataset from fragments
fsaintjacques commented on a change in pull request #7073: URL: https://github.com/apache/arrow/pull/7073#discussion_r418554202 ## File path: cpp/src/arrow/dataset/file_base.cc ## @@ -83,131 +83,67 @@ Result FileFragment::Scan(std::shared_ptr options FileSystemDataset::FileSystemDataset(std::shared_ptr schema, std::shared_ptr root_partition, - std::shared_ptr format, - std::shared_ptr filesystem, - fs::PathForest forest, - ExpressionVector file_partitions) + std::vector> fragments) : Dataset(std::move(schema), std::move(root_partition)), - format_(std::move(format)), - filesystem_(std::move(filesystem)), - forest_(std::move(forest)), - partitions_(std::move(file_partitions)) { - DCHECK_EQ(static_cast(forest_.size()), partitions_.size()); -} - -Result> FileSystemDataset::Make( -std::shared_ptr schema, std::shared_ptr root_partition, -std::shared_ptr format, std::shared_ptr filesystem, -std::vector infos) { - ExpressionVector partitions(infos.size(), scalar(true)); - return Make(std::move(schema), std::move(root_partition), std::move(format), - std::move(filesystem), std::move(infos), std::move(partitions)); -} + fragments_(std::move(fragments)) {} Result> FileSystemDataset::Make( std::shared_ptr schema, std::shared_ptr root_partition, -std::shared_ptr format, std::shared_ptr filesystem, -std::vector infos, ExpressionVector partitions) { - ARROW_ASSIGN_OR_RAISE(auto forest, fs::PathForest::Make(std::move(infos), )); - return Make(std::move(schema), std::move(root_partition), std::move(format), - std::move(filesystem), std::move(forest), std::move(partitions)); -} +FragmentVector fragments) { + std::vector> file_fragments; + for (const auto& fragment : fragments) { +auto file_fragment = internal::checked_pointer_cast(fragment); Review comment: derp, I actually forgot to check. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org