This is an automated email from the ASF dual-hosted git repository.
lixueclaire pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-graphar.git
The following commit(s) were added to refs/heads/main by this push:
new 146eb615 feat(c++): support reading certain set of properties and auto
select FileReader or Scanner (#706)
146eb615 is described below
commit 146eb6159dc3df14f15f1ea6f91584d561b4507f
Author: Xiaokang Yang <[email protected]>
AuthorDate: Thu Jun 19 13:11:07 2025 +0800
feat(c++): support reading certain set of properties and auto select
FileReader or Scanner (#706)
---
.github/workflows/ci.yml | 1 +
cpp/benchmarks/arrow_chunk_reader_benchmark.cc | 283 ++++++++++++++++--
cpp/examples/mid_level_reader_example.cc | 140 ++++++++-
cpp/src/graphar/arrow/chunk_reader.cc | 207 ++++++++++++-
cpp/src/graphar/arrow/chunk_reader.h | 76 ++++-
cpp/src/graphar/filesystem.cc | 29 ++
cpp/src/graphar/filesystem.h | 4 +
cpp/src/graphar/fwd.h | 3 +
cpp/src/graphar/high-level/graph_reader.cc | 9 +-
cpp/src/graphar/high-level/graph_reader.h | 7 +-
cpp/test/test_arrow_chunk_reader.cc | 390 +++++++++++++++++++++++--
cpp/test/test_multi_label.cc | 5 +-
docs/libraries/cpp/getting-started.md | 2 +-
13 files changed, 1089 insertions(+), 67 deletions(-)
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 309e37ab..b1a8ff51 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -166,6 +166,7 @@ jobs:
run: |
./graph_info_benchmark
./arrow_chunk_reader_benchmark
+ ./label_filter_benchmark
- name: Use Static Arrow
working-directory: "cpp"
diff --git a/cpp/benchmarks/arrow_chunk_reader_benchmark.cc
b/cpp/benchmarks/arrow_chunk_reader_benchmark.cc
index 833224a0..24fd4fc3 100644
--- a/cpp/benchmarks/arrow_chunk_reader_benchmark.cc
+++ b/cpp/benchmarks/arrow_chunk_reader_benchmark.cc
@@ -21,14 +21,17 @@
#include "./benchmark_util.h"
#include "graphar/api/arrow_reader.h"
+#include "graphar/fwd.h"
namespace graphar {
BENCHMARK_DEFINE_F(BenchmarkFixture, CreateVertexPropertyArrowChunkReader)
(::benchmark::State& state) { // NOLINT
for (auto _ : state) {
- auto maybe_reader = VertexPropertyArrowChunkReader::Make(
- graph_info_, "person", "firstName");
+ auto gp =
+ graph_info_->GetVertexInfo("person")->GetPropertyGroup("firstName");
+ auto maybe_reader =
+ VertexPropertyArrowChunkReader::Make(graph_info_, "person", gp);
if (maybe_reader.has_error()) {
state.SkipWithError(maybe_reader.status().message().c_str());
return;
@@ -75,10 +78,10 @@ BENCHMARK_DEFINE_F(BenchmarkFixture,
CreateAdjListPropertyArrowChunkReader)
}
}
-BENCHMARK_DEFINE_F(BenchmarkFixture, VertexPropertyArrowChunkReaderReadChunk)
+BENCHMARK_DEFINE_F(BenchmarkFixture, AdjListArrowChunkReaderReadChunk)
(::benchmark::State& state) { // NOLINT
- auto maybe_reader =
- VertexPropertyArrowChunkReader::Make(graph_info_, "person", "firstName");
+ auto maybe_reader = AdjListArrowChunkReader::Make(
+ graph_info_, "person", "knows", "person",
AdjListType::ordered_by_source);
if (maybe_reader.has_error()) {
state.SkipWithError(maybe_reader.status().message().c_str());
return;
@@ -91,9 +94,9 @@ BENCHMARK_DEFINE_F(BenchmarkFixture,
VertexPropertyArrowChunkReaderReadChunk)
}
}
-BENCHMARK_DEFINE_F(BenchmarkFixture, AdjListArrowChunkReaderReadChunk)
+BENCHMARK_DEFINE_F(BenchmarkFixture, AdjListOffsetArrowChunkReaderReadChunk)
(::benchmark::State& state) { // NOLINT
- auto maybe_reader = AdjListArrowChunkReader::Make(
+ auto maybe_reader = AdjListOffsetArrowChunkReader::Make(
graph_info_, "person", "knows", "person",
AdjListType::ordered_by_source);
if (maybe_reader.has_error()) {
state.SkipWithError(maybe_reader.status().message().c_str());
@@ -107,10 +110,11 @@ BENCHMARK_DEFINE_F(BenchmarkFixture,
AdjListArrowChunkReaderReadChunk)
}
}
-BENCHMARK_DEFINE_F(BenchmarkFixture, AdjListOffsetArrowChunkReaderReadChunk)
+BENCHMARK_DEFINE_F(BenchmarkFixture, AdjListPropertyArrowChunkReaderReadChunk)
(::benchmark::State& state) { // NOLINT
- auto maybe_reader = AdjListOffsetArrowChunkReader::Make(
- graph_info_, "person", "knows", "person",
AdjListType::ordered_by_source);
+ auto maybe_reader = AdjListPropertyArrowChunkReader::Make(
+ graph_info_, "person", "knows", "person", "creationDate",
+ AdjListType::ordered_by_source);
if (maybe_reader.has_error()) {
state.SkipWithError(maybe_reader.status().message().c_str());
return;
@@ -123,11 +127,13 @@ BENCHMARK_DEFINE_F(BenchmarkFixture,
AdjListOffsetArrowChunkReaderReadChunk)
}
}
-BENCHMARK_DEFINE_F(BenchmarkFixture, AdjListPropertyArrowChunkReaderReadChunk)
+BENCHMARK_DEFINE_F(
+ BenchmarkFixture,
+ VertexPropertyArrowChunkReaderReadChunk_firstGraph_AllColumns_V1)
(::benchmark::State& state) { // NOLINT
- auto maybe_reader = AdjListPropertyArrowChunkReader::Make(
- graph_info_, "person", "knows", "person", "creationDate",
- AdjListType::ordered_by_source);
+ auto gp =
graph_info_->GetVertexInfo("person")->GetPropertyGroup("firstName");
+ auto maybe_reader =
+ VertexPropertyArrowChunkReader::Make(graph_info_, "person", gp);
if (maybe_reader.has_error()) {
state.SkipWithError(maybe_reader.status().message().c_str());
return;
@@ -135,7 +141,216 @@ BENCHMARK_DEFINE_F(BenchmarkFixture,
AdjListPropertyArrowChunkReaderReadChunk)
auto reader = maybe_reader.value();
for (auto _ : state) {
assert(reader->seek(0).ok());
- assert(reader->GetChunk().status().ok());
+ assert(reader->GetChunk(GetChunkVersion::V1).status().ok());
+ assert(reader->next_chunk().ok());
+ }
+}
+// select one columns and internal ID column
+BENCHMARK_DEFINE_F(
+ BenchmarkFixture,
+ VertexPropertyArrowChunkReaderReadChunk_firstGraph_OneColumns_V1)
+(::benchmark::State& state) { // NOLINT
+ auto maybe_reader =
+ VertexPropertyArrowChunkReader::Make(graph_info_, "person", "firstName");
+ if (maybe_reader.has_error()) {
+ state.SkipWithError(maybe_reader.status().message().c_str());
+ return;
+ }
+ auto reader = maybe_reader.value();
+ for (auto _ : state) {
+ assert(reader->seek(0).ok());
+ assert(reader->GetChunk(GetChunkVersion::V1).status().ok());
+ assert(reader->next_chunk().ok());
+ }
+}
+
+// select tow columns and internal ID column
+BENCHMARK_DEFINE_F(
+ BenchmarkFixture,
+ VertexPropertyArrowChunkReaderReadChunk_firstGraph_TwoColumns_V1)
+(::benchmark::State& state) { // NOLINT
+ auto maybe_reader = VertexPropertyArrowChunkReader::Make(
+ graph_info_, "person", {"firstName", "lastName"},
SelectType::PROPERTIES);
+ if (maybe_reader.has_error()) {
+ state.SkipWithError(maybe_reader.status().message().c_str());
+ return;
+ }
+ auto reader = maybe_reader.value();
+ for (auto _ : state) {
+ assert(reader->seek(0).ok());
+ assert(reader->GetChunk(GetChunkVersion::V1).status().ok());
+ assert(reader->next_chunk().ok());
+ }
+}
+
+BENCHMARK_DEFINE_F(
+ BenchmarkFixture,
+ VertexPropertyArrowChunkReaderReadChunk_firstGraph_AllColumns_V2)
+(::benchmark::State& state) { // NOLINT
+ auto gp =
graph_info_->GetVertexInfo("person")->GetPropertyGroup("firstName");
+ auto maybe_reader =
+ VertexPropertyArrowChunkReader::Make(graph_info_, "person", gp);
+ if (maybe_reader.has_error()) {
+ state.SkipWithError(maybe_reader.status().message().c_str());
+ return;
+ }
+ auto reader = maybe_reader.value();
+ for (auto _ : state) {
+ assert(reader->seek(0).ok());
+ assert(reader->GetChunk(GetChunkVersion::V2).status().ok());
+ assert(reader->next_chunk().ok());
+ }
+}
+// select one columns and internal ID column
+BENCHMARK_DEFINE_F(
+ BenchmarkFixture,
+ VertexPropertyArrowChunkReaderReadChunk_firstGraph_OneColumns_V2)
+(::benchmark::State& state) { // NOLINT
+ auto maybe_reader =
+ VertexPropertyArrowChunkReader::Make(graph_info_, "person", "firstName");
+ if (maybe_reader.has_error()) {
+ state.SkipWithError(maybe_reader.status().message().c_str());
+ return;
+ }
+ auto reader = maybe_reader.value();
+ for (auto _ : state) {
+ assert(reader->seek(0).ok());
+ assert(reader->GetChunk(GetChunkVersion::V2).status().ok());
+ assert(reader->next_chunk().ok());
+ }
+}
+
+// select tow columns and internal ID column
+BENCHMARK_DEFINE_F(
+ BenchmarkFixture,
+ VertexPropertyArrowChunkReaderReadChunk_firstGraph_TwoColumns_V2)
+(::benchmark::State& state) { // NOLINT
+ auto maybe_reader = VertexPropertyArrowChunkReader::Make(
+ graph_info_, "person", {"firstName", "lastName"},
SelectType::PROPERTIES);
+ if (maybe_reader.has_error()) {
+ state.SkipWithError(maybe_reader.status().message().c_str());
+ return;
+ }
+ auto reader = maybe_reader.value();
+ for (auto _ : state) {
+ assert(reader->seek(0).ok());
+ assert(reader->GetChunk(GetChunkVersion::V2).status().ok());
+ assert(reader->next_chunk().ok());
+ }
+}
+
+BENCHMARK_DEFINE_F(
+ BenchmarkFixture,
+ VertexPropertyArrowChunkReaderReadChunk_secondGraph_AllColumns_V1)
+(::benchmark::State& state) { // NOLINT
+ auto gp =
+
second_graph_info_->GetVertexInfo("organisation")->GetPropertyGroup("id");
+ auto maybe_reader = VertexPropertyArrowChunkReader::Make(second_graph_info_,
+ "organisation", gp);
+ if (maybe_reader.has_error()) {
+ state.SkipWithError(maybe_reader.status().message().c_str());
+ return;
+ }
+ auto reader = maybe_reader.value();
+ for (auto _ : state) {
+ assert(reader->seek(0).ok());
+ assert(reader->GetChunk(GetChunkVersion::V1).status().ok());
+ assert(reader->next_chunk().ok());
+ }
+}
+// select one columns and internal ID column
+BENCHMARK_DEFINE_F(
+ BenchmarkFixture,
+ VertexPropertyArrowChunkReaderReadChunk_secondGraph_OneColumns_V1)
+(::benchmark::State& state) { // NOLINT
+ auto maybe_reader = VertexPropertyArrowChunkReader::Make(
+ second_graph_info_, "organisation", "id");
+ if (maybe_reader.has_error()) {
+ state.SkipWithError(maybe_reader.status().message().c_str());
+ return;
+ }
+ auto reader = maybe_reader.value();
+ for (auto _ : state) {
+ assert(reader->seek(0).ok());
+ assert(reader->GetChunk(GetChunkVersion::V1).status().ok());
+ assert(reader->next_chunk().ok());
+ }
+}
+
+// select tow columns and internal ID column
+BENCHMARK_DEFINE_F(
+ BenchmarkFixture,
+ VertexPropertyArrowChunkReaderReadChunk_secondGraph_TwoColumns_V1)
+(::benchmark::State& state) { // NOLINT
+ auto maybe_reader = VertexPropertyArrowChunkReader::Make(
+ second_graph_info_, "organisation", {"id", "name"},
+ SelectType::PROPERTIES);
+ if (maybe_reader.has_error()) {
+ state.SkipWithError(maybe_reader.status().message().c_str());
+ return;
+ }
+ auto reader = maybe_reader.value();
+ for (auto _ : state) {
+ assert(reader->seek(0).ok());
+ assert(reader->GetChunk(GetChunkVersion::V1).status().ok());
+ assert(reader->next_chunk().ok());
+ }
+}
+
+BENCHMARK_DEFINE_F(
+ BenchmarkFixture,
+ VertexPropertyArrowChunkReaderReadChunk_secondGraph_AllColumns_V2)
+(::benchmark::State& state) { // NOLINT
+ auto gp =
+
second_graph_info_->GetVertexInfo("organisation")->GetPropertyGroup("id");
+ auto maybe_reader = VertexPropertyArrowChunkReader::Make(second_graph_info_,
+ "organisation", gp);
+ if (maybe_reader.has_error()) {
+ state.SkipWithError(maybe_reader.status().message().c_str());
+ return;
+ }
+ auto reader = maybe_reader.value();
+ for (auto _ : state) {
+ assert(reader->seek(0).ok());
+ assert(reader->GetChunk(GetChunkVersion::V2).status().ok());
+ assert(reader->next_chunk().ok());
+ }
+}
+// select one columns and internal ID column
+BENCHMARK_DEFINE_F(
+ BenchmarkFixture,
+ VertexPropertyArrowChunkReaderReadChunk_secondGraph_OneColumns_V2)
+(::benchmark::State& state) { // NOLINT
+ auto maybe_reader = VertexPropertyArrowChunkReader::Make(
+ second_graph_info_, "organisation", "id");
+ if (maybe_reader.has_error()) {
+ state.SkipWithError(maybe_reader.status().message().c_str());
+ return;
+ }
+ auto reader = maybe_reader.value();
+ for (auto _ : state) {
+ assert(reader->seek(0).ok());
+ assert(reader->GetChunk(GetChunkVersion::V2).status().ok());
+ assert(reader->next_chunk().ok());
+ }
+}
+
+// select tow columns and internal ID column
+BENCHMARK_DEFINE_F(
+ BenchmarkFixture,
+ VertexPropertyArrowChunkReaderReadChunk_secondGraph_TwoColumns_V2)
+(::benchmark::State& state) { // NOLINT
+ auto maybe_reader = VertexPropertyArrowChunkReader::Make(
+ second_graph_info_, "organisation", {"id", "name"},
+ SelectType::PROPERTIES);
+ if (maybe_reader.has_error()) {
+ state.SkipWithError(maybe_reader.status().message().c_str());
+ return;
+ }
+ auto reader = maybe_reader.value();
+ for (auto _ : state) {
+ assert(reader->seek(0).ok());
+ assert(reader->GetChunk(GetChunkVersion::V2).status().ok());
assert(reader->next_chunk().ok());
}
}
@@ -145,9 +360,43 @@ BENCHMARK_REGISTER_F(BenchmarkFixture,
CreateAdjListArrowChunkReader);
BENCHMARK_REGISTER_F(BenchmarkFixture, CreateAdjListOffsetArrowChunkReader);
BENCHMARK_REGISTER_F(BenchmarkFixture,
AdjListPropertyArrowChunkReaderReadChunk);
-
-BENCHMARK_REGISTER_F(BenchmarkFixture,
VertexPropertyArrowChunkReaderReadChunk);
BENCHMARK_REGISTER_F(BenchmarkFixture, AdjListArrowChunkReaderReadChunk);
BENCHMARK_REGISTER_F(BenchmarkFixture, AdjListOffsetArrowChunkReaderReadChunk);
BENCHMARK_REGISTER_F(BenchmarkFixture, AdjListOffsetArrowChunkReaderReadChunk);
+BENCHMARK_REGISTER_F(
+ BenchmarkFixture,
+ VertexPropertyArrowChunkReaderReadChunk_firstGraph_AllColumns_V1);
+BENCHMARK_REGISTER_F(
+ BenchmarkFixture,
+ VertexPropertyArrowChunkReaderReadChunk_firstGraph_TwoColumns_V1);
+BENCHMARK_REGISTER_F(
+ BenchmarkFixture,
+ VertexPropertyArrowChunkReaderReadChunk_firstGraph_OneColumns_V1);
+BENCHMARK_REGISTER_F(
+ BenchmarkFixture,
+ VertexPropertyArrowChunkReaderReadChunk_firstGraph_AllColumns_V2);
+BENCHMARK_REGISTER_F(
+ BenchmarkFixture,
+ VertexPropertyArrowChunkReaderReadChunk_firstGraph_TwoColumns_V2);
+BENCHMARK_REGISTER_F(
+ BenchmarkFixture,
+ VertexPropertyArrowChunkReaderReadChunk_firstGraph_OneColumns_V2);
+BENCHMARK_REGISTER_F(
+ BenchmarkFixture,
+ VertexPropertyArrowChunkReaderReadChunk_secondGraph_AllColumns_V1);
+BENCHMARK_REGISTER_F(
+ BenchmarkFixture,
+ VertexPropertyArrowChunkReaderReadChunk_secondGraph_TwoColumns_V1);
+BENCHMARK_REGISTER_F(
+ BenchmarkFixture,
+ VertexPropertyArrowChunkReaderReadChunk_secondGraph_OneColumns_V1);
+BENCHMARK_REGISTER_F(
+ BenchmarkFixture,
+ VertexPropertyArrowChunkReaderReadChunk_secondGraph_AllColumns_V2);
+BENCHMARK_REGISTER_F(
+ BenchmarkFixture,
+ VertexPropertyArrowChunkReaderReadChunk_secondGraph_TwoColumns_V2);
+BENCHMARK_REGISTER_F(
+ BenchmarkFixture,
+ VertexPropertyArrowChunkReaderReadChunk_secondGraph_OneColumns_V2);
} // namespace graphar
diff --git a/cpp/examples/mid_level_reader_example.cc
b/cpp/examples/mid_level_reader_example.cc
index 5cc341de..1789139d 100644
--- a/cpp/examples/mid_level_reader_example.cc
+++ b/cpp/examples/mid_level_reader_example.cc
@@ -18,24 +18,28 @@
*/
#include <iostream>
+#include <vector>
#include "arrow/api.h"
#include "arrow/filesystem/api.h"
#include "./config.h"
#include "graphar/api/arrow_reader.h"
+#include "graphar/fwd.h"
void vertex_property_chunk_reader(
const std::shared_ptr<graphar::GraphInfo>& graph_info) {
- // create reader
+ // create reader (property group)
std::string type = "person", property_name = "gender";
+ auto property_group =
+ graph_info->GetVertexInfo(type)->GetPropertyGroup(property_name);
auto maybe_reader = graphar::VertexPropertyArrowChunkReader::Make(
- graph_info, type, property_name);
+ graph_info, type, property_group);
ASSERT(maybe_reader.status().ok());
auto reader = maybe_reader.value();
// use reader
- auto result = reader->GetChunk();
+ auto result = reader->GetChunk(graphar::GetChunkVersion::V1);
ASSERT(!result.has_error());
std::cout << "chunk number: " << reader->GetChunkNum() << std::endl;
auto table = result.value();
@@ -50,7 +54,7 @@ void vertex_property_chunk_reader(
<< std::endl;
// seek vertex id
ASSERT(reader->seek(100).ok());
- result = reader->GetChunk();
+ result = reader->GetChunk(graphar::GetChunkVersion::V1);
ASSERT(!result.has_error());
table = result.value();
index_col = table->GetColumnByName(graphar::GeneralParams::kVertexIndexCol);
@@ -59,7 +63,7 @@ void vertex_property_chunk_reader(
<< index_col->ToString() << " " << std::endl;
// next chunk
ASSERT(reader->next_chunk().ok());
- result = reader->GetChunk();
+ result = reader->GetChunk(graphar::GetChunkVersion::V1);
ASSERT(!result.has_error());
table = result.value();
index_col = table->GetColumnByName(graphar::GeneralParams::kVertexIndexCol);
@@ -67,12 +71,118 @@ void vertex_property_chunk_reader(
std::cout << "Internal id column of next chunk: " << index_col->ToString()
<< " " << std::endl;
+ // read specific one column
+ std::string specific_col_name = "lastName";
+ auto maybe_specific_reader = graphar::VertexPropertyArrowChunkReader::Make(
+ graph_info, type, specific_col_name);
+ ASSERT(maybe_specific_reader.status().ok());
+ auto specific_reader = maybe_specific_reader.value();
+ auto specific_result =
+ specific_reader->GetChunk(graphar::GetChunkVersion::V1);
+ ASSERT(!result.has_error());
+ auto specific_table = specific_result.value();
+ std::cout << "rows number of first specificed vertex property chunk: "
+ << specific_table->num_rows() << std::endl;
+ ASSERT(specific_table->num_columns() == 2);
+ std::cout << "schema of first specificed vertex property chunk: " <<
std::endl
+ << specific_table->schema()->ToString() << std::endl;
+ index_col =
+ specific_table->GetColumnByName(graphar::GeneralParams::kVertexIndexCol);
+ ASSERT(index_col != nullptr);
+ std::cout << "Internal id column: " << index_col->ToString() << " "
+ << std::endl;
+ auto specific_col = specific_table->GetColumnByName("lastName");
+ ASSERT(specific_col != nullptr);
+ std::cout << "Internal id column: " << specific_col->ToString() << " "
+ << std::endl;
+ // read specific one column V2
+ specific_col_name = "lastName";
+ maybe_specific_reader = graphar::VertexPropertyArrowChunkReader::Make(
+ graph_info, type, specific_col_name);
+ ASSERT(maybe_specific_reader.status().ok());
+ specific_reader = maybe_specific_reader.value();
+ specific_result = specific_reader->GetChunk(graphar::GetChunkVersion::V2);
+ ASSERT(!specific_result.has_error());
+ specific_table = specific_result.value();
+ std::cout << "rows number of first specificed vertex property chunk (V2): "
+ << specific_table->num_rows() << std::endl;
+ ASSERT(specific_table->num_columns() == 2);
+ std::cout << "schema of first specificed vertex property chunk (V2): "
+ << std::endl
+ << specific_table->schema()->ToString() << std::endl;
+ index_col =
+ specific_table->GetColumnByName(graphar::GeneralParams::kVertexIndexCol);
+ ASSERT(index_col != nullptr);
+ std::cout << "Internal id column: " << index_col->ToString() << " "
+ << std::endl;
+ specific_col = specific_table->GetColumnByName("lastName");
+ ASSERT(specific_col != nullptr);
+ std::cout << "Internal id column: " << specific_col->ToString() << " "
+ << std::endl;
+
+ // read specific columns
+ std::vector<std::string> specific_cols = {"firstName", "lastName"};
+ maybe_specific_reader = graphar::VertexPropertyArrowChunkReader::Make(
+ graph_info, type, specific_cols, graphar::SelectType::PROPERTIES);
+ ASSERT(maybe_specific_reader.status().ok());
+ specific_reader = maybe_specific_reader.value();
+ specific_result = specific_reader->GetChunk(graphar::GetChunkVersion::V1);
+ ASSERT(!result.has_error());
+ specific_table = specific_result.value();
+ std::cout << "rows number of specificed vertex properties chunk: "
+ << specific_table->num_rows() << std::endl;
+ ASSERT(specific_table->num_columns() == specific_cols.size() + 1);
+ std::cout << "schema of specificed vertex properties chunk: " << std::endl
+ << specific_table->schema()->ToString() << std::endl;
+ index_col =
+ specific_table->GetColumnByName(graphar::GeneralParams::kVertexIndexCol);
+ ASSERT(index_col != nullptr);
+ std::cout << "Internal id column: " << index_col->ToString() << " "
+ << std::endl;
+ specific_col = specific_table->GetColumnByName("firstName");
+ ASSERT(specific_col != nullptr);
+ std::cout << "firstName column: " << specific_col->ToString() << " "
+ << std::endl;
+ specific_col = specific_table->GetColumnByName("lastName");
+ ASSERT(specific_col != nullptr);
+ std::cout << "lastName column: " << specific_col->ToString() << " "
+ << std::endl;
+
+ // read specific columns V2
+ specific_cols = {"firstName", "lastName"};
+ maybe_specific_reader = graphar::VertexPropertyArrowChunkReader::Make(
+ graph_info, type, specific_cols, graphar::SelectType::PROPERTIES);
+ ASSERT(maybe_specific_reader.status().ok());
+ specific_reader = maybe_specific_reader.value();
+ specific_result = specific_reader->GetChunk(graphar::GetChunkVersion::V2);
+ ASSERT(!specific_result.has_error());
+ specific_table = specific_result.value();
+ std::cout << "rows number of specificed vertex properties chunk (V2): "
+ << specific_table->num_rows() << std::endl;
+ ASSERT(specific_table->num_columns() == specific_cols.size() + 1);
+ std::cout << "schema of specificed vertex properties chunk (V2): "
+ << std::endl
+ << specific_table->schema()->ToString() << std::endl;
+ index_col =
+ specific_table->GetColumnByName(graphar::GeneralParams::kVertexIndexCol);
+ ASSERT(index_col != nullptr);
+ std::cout << "Internal id column: " << index_col->ToString() << " "
+ << std::endl;
+ specific_col = specific_table->GetColumnByName("firstName");
+ ASSERT(specific_col != nullptr);
+ std::cout << "firstName column: " << specific_col->ToString() << " "
+ << std::endl;
+ specific_col = specific_table->GetColumnByName("lastName");
+ ASSERT(specific_col != nullptr);
+ std::cout << "lastName column: " << specific_col->ToString() << " "
+ << std::endl;
+
// reader with filter pushdown
auto filter = graphar::_Equal(graphar::_Property("gender"),
graphar::_Literal("female"));
std::vector<std::string> expected_cols{"firstName", "lastName"};
auto maybe_filter_reader = graphar::VertexPropertyArrowChunkReader::Make(
- graph_info, type, property_name);
+ graph_info, type, property_group);
ASSERT(maybe_filter_reader.status().ok());
auto filter_reader = maybe_filter_reader.value();
filter_reader->Filter(filter);
@@ -84,6 +194,24 @@ void vertex_property_chunk_reader(
<< filter_table->num_rows() << std::endl;
std::cout << "schema of first filtered vertex property chunk: " << std::endl
<< filter_table->schema()->ToString() << std::endl;
+ // reader with filter pushdown && select specific column
+ maybe_filter_reader = graphar::VertexPropertyArrowChunkReader::Make(
+ graph_info, type, {"firstName", "lastName"},
+ graphar::SelectType::PROPERTIES);
+ ASSERT(maybe_filter_reader.status().ok());
+ filter_reader = maybe_filter_reader.value();
+ filter_reader->Filter(filter);
+ filter_reader->Select(expected_cols);
+ filter_result = filter_reader->GetChunk();
+ ASSERT(!result.has_error());
+ filter_table = filter_result.value();
+ std::cout << "rows number of first filtered vertex property chunk (select "
+ "specific column): "
+ << filter_table->num_rows() << std::endl;
+ std::cout << "schema of first filtered vertex property chunk (select "
+ "specific column): "
+ << std::endl
+ << filter_table->schema()->ToString() << std::endl;
}
void adj_list_chunk_reader(
diff --git a/cpp/src/graphar/arrow/chunk_reader.cc
b/cpp/src/graphar/arrow/chunk_reader.cc
index ff6adab4..a5c77cd2 100644
--- a/cpp/src/graphar/arrow/chunk_reader.cc
+++ b/cpp/src/graphar/arrow/chunk_reader.cc
@@ -17,13 +17,17 @@
* under the License.
*/
+#include <algorithm>
+#include <string>
#include <utility>
+#include <vector>
#include "arrow/api.h"
#include "arrow/compute/api.h"
#include "graphar/arrow/chunk_reader.h"
#include "graphar/filesystem.h"
+#include "graphar/fwd.h"
#include "graphar/general_params.h"
#include "graphar/graph_info.h"
#include "graphar/reader_util.h"
@@ -106,15 +110,21 @@ Status CastTableWithSchema(const
std::shared_ptr<arrow::Table>& table,
if (table->schema()->Equals(*schema)) {
*out_table = table;
}
+ std::vector<std::shared_ptr<arrow::Field>> fields;
std::vector<std::shared_ptr<arrow::ChunkedArray>> columns;
for (int64_t i = 0; i < table->num_columns(); ++i) {
auto column = table->column(i);
- if (table->field(i)->type()->Equals(schema->field(i)->type())) {
+ auto table_field = table->field(i);
+ auto field_name = table_field->name();
+
+ auto schema_field = schema->GetFieldByName(field_name);
+ if (table_field->type()->Equals(schema_field->type())) {
columns.push_back(column);
+ fields.push_back(table_field);
continue;
}
- auto from_t = table->field(i)->type();
- auto to_t = schema->field(i)->type();
+ auto from_t = table_field->type();
+ auto to_t = schema_field->type();
std::vector<std::shared_ptr<arrow::Array>> chunks;
// process cast for each chunk
for (int64_t j = 0; j < column->num_chunks(); ++j) {
@@ -129,10 +139,11 @@ Status CastTableWithSchema(const
std::shared_ptr<arrow::Table>& table,
chunks.push_back(out);
}
}
+ fields.push_back(arrow::field(field_name, to_t));
columns.push_back(std::make_shared<arrow::ChunkedArray>(chunks, to_t));
}
-
- *out_table = arrow::Table::Make(schema, columns);
+ auto new_schema = std::make_shared<arrow::Schema>(fields);
+ *out_table = arrow::Table::Make(new_schema, columns);
return Status::OK();
}
} // namespace
@@ -141,8 +152,17 @@
VertexPropertyArrowChunkReader::VertexPropertyArrowChunkReader(
const std::shared_ptr<VertexInfo>& vertex_info,
const std::shared_ptr<PropertyGroup>& property_group,
const std::string& prefix, const util::FilterOptions& options)
+ : VertexPropertyArrowChunkReader(vertex_info, property_group, {}, prefix,
+ options) {}
+
+VertexPropertyArrowChunkReader::VertexPropertyArrowChunkReader(
+ const std::shared_ptr<VertexInfo>& vertex_info,
+ const std::shared_ptr<PropertyGroup>& property_group,
+ const std::vector<std::string>& property_names, const std::string& prefix,
+ const util::FilterOptions& options)
: vertex_info_(std::move(vertex_info)),
property_group_(std::move(property_group)),
+ property_names_(std::move(property_names)),
chunk_index_(0),
seek_id_(0),
schema_(nullptr),
@@ -201,16 +221,81 @@ Status VertexPropertyArrowChunkReader::seek(IdType id) {
}
Result<std::shared_ptr<arrow::Table>>
-VertexPropertyArrowChunkReader::GetChunk() {
- GAR_RETURN_NOT_OK(util::CheckFilterOptions(filter_options_,
property_group_));
+VertexPropertyArrowChunkReader::GetChunkV2() {
if (chunk_table_ == nullptr) {
GAR_ASSIGN_OR_RAISE(
auto chunk_file_path,
vertex_info_->GetFilePath(property_group_, chunk_index_));
+ std::vector<int> column_indices = {};
+ std::vector<std::string> property_names;
+ if (!filter_options_.columns && !property_names_.empty()) {
+ property_names = property_names_;
+ } else {
+ if (!property_names_.empty()) {
+ for (const auto& col : filter_options_.columns.value().get()) {
+ if (std::find(property_names_.begin(), property_names_.end(), col) ==
+ property_names_.end()) {
+ return Status::Invalid("Column ", col,
+ " is not in select properties.");
+ }
+ property_names.push_back(col);
+ }
+ }
+ }
+ for (const auto& col : property_names) {
+ auto field_index = schema_->GetFieldIndex(col);
+ if (field_index == -1) {
+ return Status::Invalid("Column ", col, " is not in select
properties.");
+ }
+ column_indices.push_back(field_index);
+ }
std::string path = prefix_ + chunk_file_path;
GAR_ASSIGN_OR_RAISE(
chunk_table_, fs_->ReadFileToTable(path,
property_group_->GetFileType(),
- filter_options_));
+ column_indices));
+ if (schema_ != nullptr && filter_options_.filter == nullptr) {
+ GAR_RETURN_NOT_OK(
+ CastTableWithSchema(chunk_table_, schema_, &chunk_table_));
+ }
+ }
+ IdType row_offset = seek_id_ - chunk_index_ * vertex_info_->GetChunkSize();
+ return chunk_table_->Slice(row_offset);
+}
+
+Result<std::shared_ptr<arrow::Table>>
+VertexPropertyArrowChunkReader::GetChunkV1() {
+ GAR_RETURN_NOT_OK(util::CheckFilterOptions(filter_options_,
property_group_));
+ if (chunk_table_ == nullptr) {
+ GAR_ASSIGN_OR_RAISE(
+ auto chunk_file_path,
+ vertex_info_->GetFilePath(property_group_, chunk_index_));
+ std::string path = prefix_ + chunk_file_path;
+ if (property_names_.empty()) {
+ GAR_ASSIGN_OR_RAISE(
+ chunk_table_,
+ fs_->ReadFileToTable(path, property_group_->GetFileType(),
+ filter_options_));
+ } else {
+ util::FilterOptions temp_filter_options;
+ temp_filter_options.filter = filter_options_.filter;
+ std::vector<std::string> intersection_columns;
+ if (!filter_options_.columns) {
+ temp_filter_options.columns = std::ref(property_names_);
+ } else {
+ for (const auto& col : filter_options_.columns.value().get()) {
+ if (std::find(property_names_.begin(), property_names_.end(), col) ==
+ property_names_.end()) {
+ return Status::Invalid("Column ", col,
+ " is not in select properties.");
+ }
+ }
+ temp_filter_options.columns = filter_options_.columns;
+ }
+ GAR_ASSIGN_OR_RAISE(
+ chunk_table_,
+ fs_->ReadFileToTable(path, property_group_->GetFileType(),
+ temp_filter_options));
+ }
// TODO(acezen): filter pushdown doesn't support cast schema now
if (schema_ != nullptr && filter_options_.filter == nullptr) {
GAR_RETURN_NOT_OK(
@@ -221,6 +306,24 @@ VertexPropertyArrowChunkReader::GetChunk() {
return chunk_table_->Slice(row_offset);
}
+Result<std::shared_ptr<arrow::Table>> VertexPropertyArrowChunkReader::GetChunk(
+ GetChunkVersion version) {
+ switch (version) {
+ case GetChunkVersion::V1:
+ return GetChunkV1();
+ case GetChunkVersion::V2:
+ return GetChunkV2();
+ case GetChunkVersion::AUTO:
+ if (filter_options_.filter != nullptr) {
+ return GetChunkV1();
+ } else {
+ return GetChunkV2();
+ }
+ default:
+ return Status::Invalid("unsupport GetChunkVersion ", version);
+ }
+}
+
Result<std::shared_ptr<arrow::Table>>
VertexPropertyArrowChunkReader::GetLabelChunk() {
FileType filetype = FileType::PARQUET;
@@ -268,6 +371,16 @@ VertexPropertyArrowChunkReader::Make(
vertex_info, property_group, prefix, options);
}
+Result<std::shared_ptr<VertexPropertyArrowChunkReader>>
+VertexPropertyArrowChunkReader::Make(
+ const std::shared_ptr<VertexInfo>& vertex_info,
+ const std::shared_ptr<PropertyGroup>& property_group,
+ const std::vector<std::string>& property_names, const std::string& prefix,
+ const util::FilterOptions& options) {
+ return std::make_shared<VertexPropertyArrowChunkReader>(
+ vertex_info, property_group, property_names, prefix, options);
+}
+
Result<std::shared_ptr<VertexPropertyArrowChunkReader>>
VertexPropertyArrowChunkReader::Make(
const std::shared_ptr<GraphInfo>& graph_info, const std::string& type,
@@ -297,9 +410,83 @@ VertexPropertyArrowChunkReader::Make(
return Status::KeyError("The property ", property_name,
" doesn't exist in vertex type ", type, ".");
}
- return Make(vertex_info, property_group, graph_info->GetPrefix(), options);
+ std::vector<std::string> property_names = {property_name};
+ if (property_name != graphar::GeneralParams::kVertexIndexCol) {
+ property_names.insert(property_names.begin(),
+ graphar::GeneralParams::kVertexIndexCol);
+ }
+ return Make(vertex_info, property_group, property_names,
+ graph_info->GetPrefix(), options);
}
+Result<std::shared_ptr<VertexPropertyArrowChunkReader>>
+VertexPropertyArrowChunkReader::Make(
+ const std::shared_ptr<GraphInfo>& graph_info, const std::string& type,
+ const std::vector<std::string>& property_names_or_labels,
+ const SelectType select_type, const util::FilterOptions& options) {
+ switch (select_type) {
+ case SelectType::LABELS:
+ return MakeForLabels(graph_info, type, property_names_or_labels, options);
+ case SelectType::PROPERTIES:
+ return MakeForProperties(graph_info, type, property_names_or_labels,
+ options);
+ }
+}
+
+Result<std::shared_ptr<VertexPropertyArrowChunkReader>>
+VertexPropertyArrowChunkReader::MakeForProperties(
+ const std::shared_ptr<GraphInfo>& graph_info, const std::string& type,
+ const std::vector<std::string>& property_names,
+ const util::FilterOptions& options) {
+ auto vertex_info = graph_info->GetVertexInfo(type);
+ if (!vertex_info) {
+ return Status::KeyError("The vertex type ", type,
+ " doesn't exist in graph ", graph_info->GetName(),
+ ".");
+ }
+ if (property_names.empty()) {
+ return Status::Invalid("The property names cannot be empty.");
+ }
+ bool hasIndexCol = false;
+ std::vector<std::string> property_names_mutable = property_names;
+ if (property_names_mutable[property_names_mutable.size() - 1] ==
+ graphar::GeneralParams::kVertexIndexCol) {
+ hasIndexCol = true;
+ std::iter_swap(property_names_mutable.begin(),
+ property_names_mutable.end() - 1);
+ }
+ auto property_group = vertex_info->GetPropertyGroup(
+ property_names_mutable[property_names_mutable.size() - 1]);
+ if (!property_group) {
+ return Status::KeyError(
+ "The property ",
+ property_names_mutable[property_names_mutable.size() - 1],
+ " doesn't exist in vertex type ", type, ".");
+ }
+ for (int i = 0; i < property_names_mutable.size() - 1; i++) {
+ if (property_names_mutable[i] == graphar::GeneralParams::kVertexIndexCol) {
+ hasIndexCol = true;
+ }
+ auto pg = vertex_info->GetPropertyGroup(property_names_mutable[i]);
+ if (!pg) {
+ return Status::KeyError("The property ", property_names_mutable[i],
+ " doesn't exist in vertex type ", type, ".");
+ }
+ if (pg != property_group) {
+ return Status::Invalid(
+ "The properties ", property_names_mutable[i], " and ",
+ property_names_mutable[property_names_mutable.size() - 1],
+ " are not in the same property group, please use Make with "
+ "property_group instead.");
+ }
+ }
+ if (!hasIndexCol) {
+ property_names_mutable.insert(property_names_mutable.begin(),
+ graphar::GeneralParams::kVertexIndexCol);
+ }
+ return Make(vertex_info, property_group, property_names_mutable,
+ graph_info->GetPrefix(), options);
+}
Result<std::shared_ptr<VertexPropertyArrowChunkReader>>
VertexPropertyArrowChunkReader::Make(
const std::shared_ptr<VertexInfo>& vertex_info,
@@ -310,7 +497,7 @@ VertexPropertyArrowChunkReader::Make(
}
Result<std::shared_ptr<VertexPropertyArrowChunkReader>>
-VertexPropertyArrowChunkReader::Make(
+VertexPropertyArrowChunkReader::MakeForLabels(
const std::shared_ptr<GraphInfo>& graph_info, const std::string& type,
const std::vector<std::string>& labels,
const util::FilterOptions& options) {
diff --git a/cpp/src/graphar/arrow/chunk_reader.h
b/cpp/src/graphar/arrow/chunk_reader.h
index 739995ec..de2cf41f 100644
--- a/cpp/src/graphar/arrow/chunk_reader.h
+++ b/cpp/src/graphar/arrow/chunk_reader.h
@@ -26,6 +26,7 @@
#include "graphar/fwd.h"
#include "graphar/reader_util.h"
+#include "graphar/status.h"
// forward declaration
namespace arrow {
@@ -52,6 +53,19 @@ class VertexPropertyArrowChunkReader {
const std::shared_ptr<VertexInfo>& vertex_info,
const std::shared_ptr<PropertyGroup>& property_group,
const std::string& prefix, const util::FilterOptions& options = {});
+ /**
+ * @brief Initialize the VertexPropertyArrowChunkReader.
+ *
+ * @param vertex_info The vertex info that describes the vertex type.
+ * @param property_group The property group that describes the property
group.
+ * @param property_names Only these properties will be read.
+ * @param prefix The absolute prefix.
+ */
+ VertexPropertyArrowChunkReader(
+ const std::shared_ptr<VertexInfo>& vertex_info,
+ const std::shared_ptr<PropertyGroup>& property_group,
+ const std::vector<std::string>& property_names, const std::string&
prefix,
+ const util::FilterOptions& options = {});
VertexPropertyArrowChunkReader() : vertex_info_(nullptr), prefix_("") {}
@@ -79,7 +93,8 @@ class VertexPropertyArrowChunkReader {
/**
* @brief Return the current arrow chunk table of chunk position indicator.
*/
- Result<std::shared_ptr<arrow::Table>> GetChunk();
+ Result<std::shared_ptr<arrow::Table>> GetChunk(
+ GetChunkVersion version = GetChunkVersion::AUTO);
/**
* @brief Return the current arrow label chunk table of chunk position
* indicator.
@@ -126,6 +141,21 @@ class VertexPropertyArrowChunkReader {
const std::shared_ptr<PropertyGroup>& property_group,
const std::string& prefix, const util::FilterOptions& options = {});
+ /**
+ * @brief Create a VertexPropertyArrowChunkReader instance from vertex info.
+ *
+ * @param vertex_info The vertex info.
+ * @param property_group The property group of the vertex property.
+ * @param property_names is not empty, only these properties will be read.
+ * @param prefix The absolute prefix of the graph.
+ * @param options The filter options, default is empty.
+ */
+ static Result<std::shared_ptr<VertexPropertyArrowChunkReader>> Make(
+ const std::shared_ptr<VertexInfo>& vertex_info,
+ const std::shared_ptr<PropertyGroup>& property_group,
+ const std::vector<std::string>& property_names, const std::string&
prefix,
+ const util::FilterOptions& options = {});
+
/**
* @brief Create a VertexPropertyArrowChunkReader instance from graph info
and
* property group.
@@ -154,6 +184,21 @@ class VertexPropertyArrowChunkReader {
const std::shared_ptr<GraphInfo>& graph_info, const std::string& type,
const std::string& property_name,
const util::FilterOptions& options = {});
+ /**
+ * @brief Create a VertexPropertyArrowChunkReader instance from vertex info
+ * for labels.
+ *
+ * @param vertex_info The vertex info.
+ * @param labels The name of labels you want to read.
+ * @param select_type The select type, properties or labels.
+ * @param prefix The absolute prefix of the graph.
+ * @param options The filter options, default is empty.
+ */
+
+ static Result<std::shared_ptr<VertexPropertyArrowChunkReader>> Make(
+ const std::shared_ptr<GraphInfo>& graph_info, const std::string& type,
+ const std::vector<std::string>& property_names_or_labels,
+ const SelectType select_type, const util::FilterOptions& options = {});
/**
* @brief Create a VertexPropertyArrowChunkReader instance from vertex info
@@ -169,6 +214,22 @@ class VertexPropertyArrowChunkReader {
const std::vector<std::string>& labels, const std::string& prefix,
const util::FilterOptions& options = {});
+ /**
+ * @brief Create a VertexPropertyArrowChunkReader instance from graph info
+ * for properties.
+ *
+ * @param graph_info The graph info.
+ * @param type The vertex type.
+ * @param property_names The name of properties you want to read.
+ * @param prefix The absolute prefix of the graph.
+ * @param options The filter options, default is empty.
+ */
+ static Result<std::shared_ptr<VertexPropertyArrowChunkReader>>
+ MakeForProperties(const std::shared_ptr<GraphInfo>& graph_info,
+ const std::string& type,
+ const std::vector<std::string>& property_names,
+ const util::FilterOptions& options = {});
+
/**
* @brief Create a VertexPropertyArrowChunkReader instance from graph info
* for labels.
@@ -179,14 +240,25 @@ class VertexPropertyArrowChunkReader {
* @param prefix The absolute prefix of the graph.
* @param options The filter options, default is empty.
*/
- static Result<std::shared_ptr<VertexPropertyArrowChunkReader>> Make(
+ static Result<std::shared_ptr<VertexPropertyArrowChunkReader>> MakeForLabels(
const std::shared_ptr<GraphInfo>& graph_info, const std::string& type,
const std::vector<std::string>& labels,
const util::FilterOptions& options = {});
+ private:
+ /**
+ * @brief Read the chunk through the scanner.
+ */
+ Result<std::shared_ptr<arrow::Table>> GetChunkV1();
+ /**
+ * @brief Read the chunk through the reader.
+ */
+ Result<std::shared_ptr<arrow::Table>> GetChunkV2();
+
private:
std::shared_ptr<VertexInfo> vertex_info_;
std::shared_ptr<PropertyGroup> property_group_;
+ std::vector<std::string> property_names_;
std::string prefix_;
std::vector<std::string> labels_;
IdType chunk_index_;
diff --git a/cpp/src/graphar/filesystem.cc b/cpp/src/graphar/filesystem.cc
index dfbbcb1d..e246eeb4 100644
--- a/cpp/src/graphar/filesystem.cc
+++ b/cpp/src/graphar/filesystem.cc
@@ -23,6 +23,7 @@
#include "arrow/api.h"
#include "arrow/csv/api.h"
#include "arrow/dataset/api.h"
+#include "parquet/arrow/reader.h"
#if defined(ARROW_VERSION) && ARROW_VERSION <= 12000000
#include "arrow/dataset/file_json.h"
#endif
@@ -106,6 +107,34 @@ std::shared_ptr<ds::FileFormat> FileSystem::GetFileFormat(
}
}
+Result<std::shared_ptr<arrow::Table>> FileSystem::ReadFileToTable(
+ const std::string& path, FileType file_type,
+ const std::vector<int>& column_indices) const noexcept {
+ parquet::arrow::FileReaderBuilder builder;
+ auto open_file_status = builder.OpenFile(path);
+ if (!open_file_status.ok()) {
+ return Status::Invalid("Failed to open file: ", path, " - ",
+ open_file_status.ToString());
+ }
+ builder.memory_pool(arrow::default_memory_pool());
+ GAR_RETURN_ON_ARROW_ERROR_AND_ASSIGN(auto reader, builder.Build());
+ std::shared_ptr<arrow::Table> table;
+ if (column_indices.empty()) {
+ arrow::Status read_status = reader->ReadTable(&table);
+ if (!read_status.ok()) {
+ return Status::Invalid("Failed to read table from file: ", path, " - ",
+ read_status.ToString());
+ }
+ } else {
+ arrow::Status read_status = reader->ReadTable(column_indices, &table);
+ if (!read_status.ok()) {
+ return Status::Invalid("Failed to read table from file: ", path, " - ",
+ read_status.ToString());
+ }
+ }
+ return table;
+}
+
Result<std::shared_ptr<arrow::Table>> FileSystem::ReadFileToTable(
const std::string& path, FileType file_type,
const util::FilterOptions& options) const noexcept {
diff --git a/cpp/src/graphar/filesystem.h b/cpp/src/graphar/filesystem.h
index 939b73c6..986b7d59 100644
--- a/cpp/src/graphar/filesystem.h
+++ b/cpp/src/graphar/filesystem.h
@@ -80,6 +80,10 @@ class FileSystem {
const std::string& path, FileType file_type,
const util::FilterOptions& options = {}) const noexcept;
+ Result<std::shared_ptr<arrow::Table>> ReadFileToTable(
+ const std::string& path, FileType file_type,
+ const std::vector<int>& column_indices) const noexcept;
+
/**
* @brief Read a file and convert its bytes to a value of type T.
*
diff --git a/cpp/src/graphar/fwd.h b/cpp/src/graphar/fwd.h
index fdb34b20..f7b5d34a 100644
--- a/cpp/src/graphar/fwd.h
+++ b/cpp/src/graphar/fwd.h
@@ -74,6 +74,9 @@ enum class Type;
class DataType;
/** Type of file format */
enum FileType { CSV = 0, PARQUET = 1, ORC = 2, JSON = 3 };
+enum SelectType { PROPERTIES = 0, LABELS = 1 };
+/** GetChunkVersion: V1 use scanner, V2 use FileReader */
+enum GetChunkVersion { AUTO = 0, V1 = 1, V2 = 2 };
enum class AdjListType : uint8_t;
template <typename T>
diff --git a/cpp/src/graphar/high-level/graph_reader.cc
b/cpp/src/graphar/high-level/graph_reader.cc
index 66438af2..5460dfdd 100644
--- a/cpp/src/graphar/high-level/graph_reader.cc
+++ b/cpp/src/graphar/high-level/graph_reader.cc
@@ -81,7 +81,8 @@ Vertex::Vertex(IdType id,
: id_(id) {
// get the first row of table
for (auto& reader : readers) {
- GAR_ASSIGN_OR_RAISE_ERROR(auto chunk_table, reader.GetChunk());
+ GAR_ASSIGN_OR_RAISE_ERROR(auto chunk_table,
+ reader.GetChunk(graphar::GetChunkVersion::V1));
auto schema = chunk_table->schema();
for (int i = 0; i < schema->num_fields(); ++i) {
auto field = chunk_table->field(i);
@@ -280,7 +281,8 @@ Result<std::vector<IdType>> VerticesCollection::filter(
for (int chunk_idx : valid_chunk_) {
// how to itetate valid_chunk_?
filter_reader->seek(chunk_idx * CHUNK_SIZE);
- auto filter_result = filter_reader->GetChunk();
+ auto filter_result =
+ filter_reader->GetChunk(graphar::GetChunkVersion::V1);
auto filter_table = filter_result.value();
int count = filter_table->num_rows();
if (count != 0 && new_valid_chunk != nullptr) {
@@ -301,7 +303,8 @@ Result<std::vector<IdType>> VerticesCollection::filter(
} else {
for (int chunk_idx = 0; chunk_idx * CHUNK_SIZE < TOT_ROWS_NUM;
++chunk_idx) {
- auto filter_result = filter_reader->GetChunk();
+ auto filter_result =
+ filter_reader->GetChunk(graphar::GetChunkVersion::V1);
auto filter_table = filter_result.value();
int count = filter_table->num_rows();
filter_reader->next_chunk();
diff --git a/cpp/src/graphar/high-level/graph_reader.h
b/cpp/src/graphar/high-level/graph_reader.h
index 31a64ff0..e9f76e41 100644
--- a/cpp/src/graphar/high-level/graph_reader.h
+++ b/cpp/src/graphar/high-level/graph_reader.h
@@ -25,7 +25,6 @@
#include <memory>
#include <string>
#include <utility>
-#include <variant>
#include <vector>
#include "graphar/arrow/chunk_reader.h"
@@ -236,7 +235,8 @@ class VertexIter {
if (is_filtered_) {
for (auto& reader : readers_) {
reader.seek(filtered_ids_[cur_offset_]);
- GAR_ASSIGN_OR_RAISE(auto chunk_table, reader.GetChunk());
+ GAR_ASSIGN_OR_RAISE(auto chunk_table,
+ reader.GetChunk(graphar::GetChunkVersion::V1));
column = util::GetArrowColumnByName(chunk_table, property);
if (column != nullptr) {
break;
@@ -245,7 +245,8 @@ class VertexIter {
} else {
for (auto& reader : readers_) {
reader.seek(cur_offset_);
- GAR_ASSIGN_OR_RAISE(auto chunk_table, reader.GetChunk());
+ GAR_ASSIGN_OR_RAISE(auto chunk_table,
+ reader.GetChunk(graphar::GetChunkVersion::V1));
column = util::GetArrowColumnByName(chunk_table, property);
if (column != nullptr) {
break;
diff --git a/cpp/test/test_arrow_chunk_reader.cc
b/cpp/test/test_arrow_chunk_reader.cc
index c68b194e..8fcdfb6a 100644
--- a/cpp/test/test_arrow_chunk_reader.cc
+++ b/cpp/test/test_arrow_chunk_reader.cc
@@ -21,10 +21,10 @@
#include "arrow/api.h"
+#include <catch2/catch_test_macros.hpp>
#include "./util.h"
#include "graphar/api/arrow_reader.h"
-#include <catch2/catch_test_macros.hpp>
namespace graphar {
TEST_CASE_METHOD(GlobalFixture, "ArrowChunkReader") {
@@ -33,6 +33,7 @@ TEST_CASE_METHOD(GlobalFixture, "ArrowChunkReader") {
test_data_dir + "/ldbc_sample/parquet/ldbc_sample.graph.yml";
std::string src_type = "person", edge_type = "knows", dst_type = "person";
std::string vertex_property_name = "id";
+ std::vector<std::string> vertex_property_names = {"firstName", "gender"};
std::string edge_property_name = "creationDate";
auto maybe_graph_info = GraphInfo::Load(path);
REQUIRE(maybe_graph_info.status().ok());
@@ -46,41 +47,89 @@ TEST_CASE_METHOD(GlobalFixture, "ArrowChunkReader") {
auto e_pg = edge_info->GetPropertyGroup(edge_property_name);
REQUIRE(e_pg != nullptr);
- SECTION("VertexPropertyArrowChunkReader") {
- auto maybe_reader = VertexPropertyArrowChunkReader::Make(
- graph_info, src_type, vertex_property_name);
+ SECTION("VertexPropertyArrowChunkReader through propertyGroup") {
+ auto maybe_reader =
+ VertexPropertyArrowChunkReader::Make(graph_info, src_type, v_pg);
REQUIRE(maybe_reader.status().ok());
auto reader = maybe_reader.value();
REQUIRE(reader->GetChunkNum() == 10);
SECTION("Basics") {
- auto result = reader->GetChunk();
+ auto result = reader->GetChunk(graphar::GetChunkVersion::V1);
REQUIRE(!result.has_error());
auto table = result.value();
REQUIRE(table->num_rows() == 100);
+ std::cout << table->num_columns() << std::endl;
+ std::cout << v_pg->GetProperties().size() << std::endl;
+ REQUIRE(table->num_columns() == v_pg->GetProperties().size() + 1);
REQUIRE(table->GetColumnByName(GeneralParams::kVertexIndexCol) !=
nullptr);
// seek
REQUIRE(reader->seek(100).ok());
- result = reader->GetChunk();
+ result = reader->GetChunk(graphar::GetChunkVersion::V1);
REQUIRE(!result.has_error());
table = result.value();
REQUIRE(table->num_rows() == 100);
+ REQUIRE(table->num_columns() == v_pg->GetProperties().size() + 1);
REQUIRE(table->GetColumnByName(GeneralParams::kVertexIndexCol) !=
nullptr);
REQUIRE(reader->next_chunk().ok());
- result = reader->GetChunk();
+ result = reader->GetChunk(graphar::GetChunkVersion::V1);
REQUIRE(!result.has_error());
table = result.value();
REQUIRE(table->num_rows() == 100);
+ REQUIRE(table->num_columns() == v_pg->GetProperties().size() + 1);
REQUIRE(table->GetColumnByName(GeneralParams::kVertexIndexCol) !=
nullptr);
REQUIRE(reader->seek(900).ok());
- result = reader->GetChunk();
+ result = reader->GetChunk(graphar::GetChunkVersion::V1);
+ REQUIRE(!result.has_error());
+ table = result.value();
+ REQUIRE(table->num_rows() == 3);
+ REQUIRE(table->num_columns() == v_pg->GetProperties().size() + 1);
+ REQUIRE(table->GetColumnByName(GeneralParams::kVertexIndexCol) !=
+ nullptr);
+ REQUIRE(reader->GetChunkNum() == 10);
+ REQUIRE(reader->next_chunk().IsIndexError());
+
+ REQUIRE(reader->seek(1024).IsIndexError());
+ }
+
+ SECTION("GetChunkV2") {
+ auto result = reader->GetChunk(graphar::GetChunkVersion::V2);
+ REQUIRE(!result.has_error());
+ auto table = result.value();
+ REQUIRE(table->num_rows() == 100);
+ std::cout << table->num_columns() << std::endl;
+ std::cout << v_pg->GetProperties().size() << std::endl;
+ REQUIRE(table->num_columns() == v_pg->GetProperties().size() + 1);
+ REQUIRE(table->GetColumnByName(GeneralParams::kVertexIndexCol) !=
+ nullptr);
+
+ // seek
+ REQUIRE(reader->seek(100).ok());
+ result = reader->GetChunk(graphar::GetChunkVersion::V2);
+ REQUIRE(!result.has_error());
+ table = result.value();
+ REQUIRE(table->num_rows() == 100);
+ REQUIRE(table->num_columns() == v_pg->GetProperties().size() + 1);
+ REQUIRE(table->GetColumnByName(GeneralParams::kVertexIndexCol) !=
+ nullptr);
+ REQUIRE(reader->next_chunk().ok());
+ result = reader->GetChunk(graphar::GetChunkVersion::V2);
+ REQUIRE(!result.has_error());
+ table = result.value();
+ REQUIRE(table->num_rows() == 100);
+ REQUIRE(table->num_columns() == v_pg->GetProperties().size() + 1);
+ REQUIRE(table->GetColumnByName(GeneralParams::kVertexIndexCol) !=
+ nullptr);
+ REQUIRE(reader->seek(900).ok());
+ result = reader->GetChunk(graphar::GetChunkVersion::V2);
REQUIRE(!result.has_error());
table = result.value();
REQUIRE(table->num_rows() == 3);
+ REQUIRE(table->num_columns() == v_pg->GetProperties().size() + 1);
REQUIRE(table->GetColumnByName(GeneralParams::kVertexIndexCol) !=
nullptr);
REQUIRE(reader->GetChunkNum() == 10);
@@ -88,6 +137,33 @@ TEST_CASE_METHOD(GlobalFixture, "ArrowChunkReader") {
REQUIRE(reader->seek(1024).IsIndexError());
}
+ SECTION("Auto select GetChunkVersion") {
+ auto v_pg_name = vertex_info->GetPropertyGroup("firstName");
+ auto maybe_reader_auto_select =
+ VertexPropertyArrowChunkReader::Make(graph_info, src_type,
v_pg_name);
+ REQUIRE(maybe_reader_auto_select.status().ok());
+ auto reader_auto_select = maybe_reader_auto_select.value();
+ // VERSION V2
+ auto result = reader_auto_select->GetChunk();
+ REQUIRE(!result.has_error());
+ auto table = result.value();
+ REQUIRE(table->num_rows() == 100);
+ REQUIRE(table->num_columns() == v_pg_name->GetProperties().size() + 1);
+ REQUIRE(table->GetColumnByName(GeneralParams::kVertexIndexCol) !=
+ nullptr);
+ // VERSION V1
+ auto filter = graphar::_Equal(graphar::_Property("gender"),
+ graphar::_Literal("female"));
+ reader_auto_select->Filter(filter);
+ reader_auto_select->next_chunk();
+ result = reader_auto_select->GetChunk();
+ REQUIRE(!result.has_error());
+ table = result.value();
+ REQUIRE(table->num_rows() == 48);
+ REQUIRE(table->num_columns() == v_pg_name->GetProperties().size() + 1);
+ REQUIRE(table->GetColumnByName(GeneralParams::kVertexIndexCol) !=
+ nullptr);
+ }
SECTION("CastDataType") {
std::string prefix = test_data_dir + "/modern_graph/";
@@ -116,7 +192,7 @@ TEST_CASE_METHOD(GlobalFixture, "ArrowChunkReader") {
VertexPropertyArrowChunkReader::Make(vertex_info, new_pg, prefix);
REQUIRE(maybe_reader.status().ok());
auto reader = maybe_reader.value();
- auto result = reader->GetChunk();
+ auto result = reader->GetChunk(graphar::GetChunkVersion::V1);
REQUIRE(!result.has_error());
auto table = result.value();
REQUIRE(table->schema()->GetFieldByName("id")->type()->id() ==
@@ -125,6 +201,8 @@ TEST_CASE_METHOD(GlobalFixture, "ArrowChunkReader") {
SECTION("PropertyPushDown") {
std::string filter_property = "gender";
+ auto filter_pg = graph_info->GetVertexInfo(src_type)->GetPropertyGroup(
+ filter_property);
auto filter = _Equal(_Property(filter_property), _Literal("female"));
std::vector<std::string> expected_cols;
expected_cols.push_back("firstName");
@@ -136,7 +214,7 @@ TEST_CASE_METHOD(GlobalFixture, "ArrowChunkReader") {
std::shared_ptr<arrow::Table> table;
do {
- auto result = reader->GetChunk();
+ auto result = reader->GetChunk(graphar::GetChunkVersion::V1);
REQUIRE(!result.has_error());
table = result.value();
std::cout << "Chunk: " << idx << ",\tNums: " << table->num_rows()
@@ -166,16 +244,20 @@ TEST_CASE_METHOD(GlobalFixture, "ArrowChunkReader") {
util::FilterOptions options;
options.filter = filter;
options.columns = expected_cols;
+ auto pg = graph_info->GetVertexInfo(src_type)->GetPropertyGroup(
+ filter_property);
auto maybe_reader = VertexPropertyArrowChunkReader::Make(
- graph_info, src_type, filter_property, options);
+ graph_info, src_type, pg, options);
REQUIRE(maybe_reader.status().ok());
walkReader(maybe_reader.value());
}
SECTION("pushdown by function Filter() & Select()") {
std::cout << "Vertex property pushdown by Filter() & Select():\n";
- auto maybe_reader = VertexPropertyArrowChunkReader::Make(
- graph_info, src_type, filter_property);
+ auto pg = graph_info->GetVertexInfo(src_type)->GetPropertyGroup(
+ filter_property);
+ auto maybe_reader =
+ VertexPropertyArrowChunkReader::Make(graph_info, src_type, pg);
REQUIRE(maybe_reader.status().ok());
auto reader = maybe_reader.value();
reader->Filter(filter);
@@ -190,10 +272,10 @@ TEST_CASE_METHOD(GlobalFixture, "ArrowChunkReader") {
options.filter = filter;
options.columns = expected_cols;
auto maybe_reader = VertexPropertyArrowChunkReader::Make(
- graph_info, src_type, filter_property, options);
+ graph_info, src_type, filter_pg, options);
REQUIRE(maybe_reader.status().ok());
auto reader = maybe_reader.value();
- auto result = reader->GetChunk();
+ auto result = reader->GetChunk(graphar::GetChunkVersion::V1);
REQUIRE(result.error().IsInvalid());
std::cerr << result.error().message() << std::endl;
}
@@ -207,10 +289,10 @@ TEST_CASE_METHOD(GlobalFixture, "ArrowChunkReader") {
options.filter = filter;
options.columns = expected_cols_2;
auto maybe_reader = VertexPropertyArrowChunkReader::Make(
- graph_info, src_type, filter_property, options);
+ graph_info, src_type, filter_pg, options);
REQUIRE(maybe_reader.status().ok());
auto reader = maybe_reader.value();
- auto result = reader->GetChunk();
+ auto result = reader->GetChunk(graphar::GetChunkVersion::V1);
REQUIRE(result.error().IsInvalid());
std::cerr << result.error().message() << std::endl;
}
@@ -233,6 +315,268 @@ TEST_CASE_METHOD(GlobalFixture, "ArrowChunkReader") {
}
}
+ SECTION("VertexPropertyArrowChunkReader through propertyNames") {
+ auto maybe_reader = VertexPropertyArrowChunkReader::Make(
+ graph_info, src_type, vertex_property_names, SelectType::PROPERTIES);
+ REQUIRE(maybe_reader.status().ok());
+ auto reader = maybe_reader.value();
+ REQUIRE(reader->GetChunkNum() == 10);
+
+ SECTION("Basics") {
+ auto result = reader->GetChunk(graphar::GetChunkVersion::V1);
+ REQUIRE(!result.has_error());
+ auto table = result.value();
+ REQUIRE(table->num_rows() == 100);
+ std::cout << table->num_columns() << std::endl;
+ std::cout << vertex_property_names.size() << std::endl;
+ REQUIRE(table->num_columns() == vertex_property_names.size() + 1);
+ REQUIRE(table->GetColumnByName(GeneralParams::kVertexIndexCol) !=
+ nullptr);
+ for (auto pn : vertex_property_names) {
+ REQUIRE(table->GetColumnByName(pn) != nullptr);
+ }
+ // seek
+ REQUIRE(reader->seek(100).ok());
+ result = reader->GetChunk(graphar::GetChunkVersion::V1);
+ REQUIRE(!result.has_error());
+ table = result.value();
+ REQUIRE(table->num_rows() == 100);
+ REQUIRE(table->num_columns() == vertex_property_names.size() + 1);
+ REQUIRE(table->GetColumnByName(GeneralParams::kVertexIndexCol) !=
+ nullptr);
+ for (auto pn : vertex_property_names) {
+ REQUIRE(table->GetColumnByName(pn) != nullptr);
+ }
+ REQUIRE(reader->next_chunk().ok());
+ result = reader->GetChunk(graphar::GetChunkVersion::V1);
+ REQUIRE(!result.has_error());
+ table = result.value();
+ REQUIRE(table->num_rows() == 100);
+ REQUIRE(table->num_columns() == vertex_property_names.size() + 1);
+ REQUIRE(table->GetColumnByName(GeneralParams::kVertexIndexCol) !=
+ nullptr);
+ for (auto pn : vertex_property_names) {
+ REQUIRE(table->GetColumnByName(pn) != nullptr);
+ }
+ REQUIRE(reader->seek(900).ok());
+ result = reader->GetChunk(graphar::GetChunkVersion::V1);
+ REQUIRE(!result.has_error());
+ table = result.value();
+ REQUIRE(table->num_rows() == 3);
+ REQUIRE(table->num_columns() == vertex_property_names.size() + 1);
+ REQUIRE(table->GetColumnByName(GeneralParams::kVertexIndexCol) !=
+ nullptr);
+ for (auto pn : vertex_property_names) {
+ REQUIRE(table->GetColumnByName(pn) != nullptr);
+ }
+ REQUIRE(reader->GetChunkNum() == 10);
+ REQUIRE(reader->next_chunk().IsIndexError());
+
+ REQUIRE(reader->seek(1024).IsIndexError());
+ }
+ SECTION("VertexPropertyArrowChunkReader through propertyNames V2") {
+ auto result = reader->GetChunk(graphar::GetChunkVersion::V2);
+ REQUIRE(!result.has_error());
+ auto table = result.value();
+ REQUIRE(table->num_rows() == 100);
+ std::cout << table->num_columns() << std::endl;
+ std::cout << vertex_property_names.size() << std::endl;
+ REQUIRE(table->num_columns() == vertex_property_names.size() + 1);
+ REQUIRE(table->GetColumnByName(GeneralParams::kVertexIndexCol) !=
+ nullptr);
+ for (auto pn : vertex_property_names) {
+ REQUIRE(table->GetColumnByName(pn) != nullptr);
+ }
+ // seek
+ REQUIRE(reader->seek(100).ok());
+ result = reader->GetChunk(graphar::GetChunkVersion::V2);
+ REQUIRE(!result.has_error());
+ table = result.value();
+ REQUIRE(table->num_rows() == 100);
+ REQUIRE(table->num_columns() == vertex_property_names.size() + 1);
+ REQUIRE(table->GetColumnByName(GeneralParams::kVertexIndexCol) !=
+ nullptr);
+ for (auto pn : vertex_property_names) {
+ REQUIRE(table->GetColumnByName(pn) != nullptr);
+ }
+ REQUIRE(reader->next_chunk().ok());
+ result = reader->GetChunk(graphar::GetChunkVersion::V2);
+ REQUIRE(!result.has_error());
+ table = result.value();
+ REQUIRE(table->num_rows() == 100);
+ REQUIRE(table->num_columns() == vertex_property_names.size() + 1);
+ REQUIRE(table->GetColumnByName(GeneralParams::kVertexIndexCol) !=
+ nullptr);
+ for (auto pn : vertex_property_names) {
+ REQUIRE(table->GetColumnByName(pn) != nullptr);
+ }
+ REQUIRE(reader->seek(900).ok());
+ result = reader->GetChunk(graphar::GetChunkVersion::V2);
+ REQUIRE(!result.has_error());
+ table = result.value();
+ REQUIRE(table->num_rows() == 3);
+ REQUIRE(table->num_columns() == vertex_property_names.size() + 1);
+ REQUIRE(table->GetColumnByName(GeneralParams::kVertexIndexCol) !=
+ nullptr);
+ for (auto pn : vertex_property_names) {
+ REQUIRE(table->GetColumnByName(pn) != nullptr);
+ }
+ REQUIRE(reader->GetChunkNum() == 10);
+ REQUIRE(reader->next_chunk().IsIndexError());
+
+ REQUIRE(reader->seek(1024).IsIndexError());
+ }
+ SECTION("VertexPropertyArrowChunkReader through One propertyName") {
+ std::string vertex_property_name = "firstName";
+ auto maybe_reader = VertexPropertyArrowChunkReader::Make(
+ graph_info, src_type, vertex_property_name);
+ REQUIRE(maybe_reader.status().ok());
+ auto reader = maybe_reader.value();
+ REQUIRE(reader->GetChunkNum() == 10);
+
+ auto result = reader->GetChunk(graphar::GetChunkVersion::V1);
+ REQUIRE(!result.has_error());
+ auto table = result.value();
+ REQUIRE(table->num_rows() == 100);
+ std::cout << table->num_columns() << std::endl;
+ REQUIRE(table->num_columns() == 2);
+ REQUIRE(table->GetColumnByName(GeneralParams::kVertexIndexCol) !=
+ nullptr);
+ REQUIRE(table->GetColumnByName(vertex_property_name) != nullptr);
+
+ // seek
+ REQUIRE(reader->seek(100).ok());
+ result = reader->GetChunk(graphar::GetChunkVersion::V1);
+ REQUIRE(!result.has_error());
+ table = result.value();
+ REQUIRE(table->num_rows() == 100);
+ REQUIRE(table->num_columns() == 2);
+ REQUIRE(table->GetColumnByName(GeneralParams::kVertexIndexCol) !=
+ nullptr);
+ REQUIRE(table->GetColumnByName(vertex_property_name) != nullptr);
+ REQUIRE(reader->next_chunk().ok());
+ result = reader->GetChunk(graphar::GetChunkVersion::V1);
+ REQUIRE(!result.has_error());
+ table = result.value();
+ REQUIRE(table->num_rows() == 100);
+ REQUIRE(table->num_columns() == 2);
+ REQUIRE(table->GetColumnByName(GeneralParams::kVertexIndexCol) !=
+ nullptr);
+ REQUIRE(table->GetColumnByName(vertex_property_name) != nullptr);
+ REQUIRE(reader->seek(900).ok());
+ result = reader->GetChunk(graphar::GetChunkVersion::V1);
+ REQUIRE(!result.has_error());
+ table = result.value();
+ REQUIRE(table->num_rows() == 3);
+ REQUIRE(table->num_columns() == 2);
+ REQUIRE(table->GetColumnByName(GeneralParams::kVertexIndexCol) !=
+ nullptr);
+ REQUIRE(table->GetColumnByName(vertex_property_name) != nullptr);
+ REQUIRE(reader->GetChunkNum() == 10);
+ REQUIRE(reader->next_chunk().IsIndexError());
+
+ REQUIRE(reader->seek(1024).IsIndexError());
+ }
+ SECTION("VertexPropertyArrowChunkReader through One propertyName (V2)") {
+ std::string vertex_property_name = "firstName";
+ auto maybe_reader = VertexPropertyArrowChunkReader::Make(
+ graph_info, src_type, vertex_property_name);
+ REQUIRE(maybe_reader.status().ok());
+ auto reader = maybe_reader.value();
+ REQUIRE(reader->GetChunkNum() == 10);
+
+ auto result = reader->GetChunk(graphar::GetChunkVersion::V2);
+ REQUIRE(!result.has_error());
+ auto table = result.value();
+ REQUIRE(table->num_rows() == 100);
+ std::cout << table->num_columns() << std::endl;
+ REQUIRE(table->num_columns() == 2);
+ REQUIRE(table->GetColumnByName(GeneralParams::kVertexIndexCol) !=
+ nullptr);
+ REQUIRE(table->GetColumnByName(vertex_property_name) != nullptr);
+
+ // seek
+ REQUIRE(reader->seek(100).ok());
+ result = reader->GetChunk(graphar::GetChunkVersion::V2);
+ REQUIRE(!result.has_error());
+ table = result.value();
+ REQUIRE(table->num_rows() == 100);
+ REQUIRE(table->num_columns() == 2);
+ REQUIRE(table->GetColumnByName(GeneralParams::kVertexIndexCol) !=
+ nullptr);
+ REQUIRE(table->GetColumnByName(vertex_property_name) != nullptr);
+ REQUIRE(reader->next_chunk().ok());
+ result = reader->GetChunk(graphar::GetChunkVersion::V2);
+ REQUIRE(!result.has_error());
+ table = result.value();
+ REQUIRE(table->num_rows() == 100);
+ REQUIRE(table->num_columns() == 2);
+ REQUIRE(table->GetColumnByName(GeneralParams::kVertexIndexCol) !=
+ nullptr);
+ REQUIRE(table->GetColumnByName(vertex_property_name) != nullptr);
+ REQUIRE(reader->seek(900).ok());
+ result = reader->GetChunk(graphar::GetChunkVersion::V2);
+ REQUIRE(!result.has_error());
+ table = result.value();
+ REQUIRE(table->num_rows() == 3);
+ REQUIRE(table->num_columns() == 2);
+ REQUIRE(table->GetColumnByName(GeneralParams::kVertexIndexCol) !=
+ nullptr);
+ REQUIRE(table->GetColumnByName(vertex_property_name) != nullptr);
+ REQUIRE(reader->GetChunkNum() == 10);
+ REQUIRE(reader->next_chunk().IsIndexError());
+
+ REQUIRE(reader->seek(1024).IsIndexError());
+ }
+ SECTION("properties don't in this same propertyGroup") {
+ std::cout << "properties don't in this same propertyGroup:\n";
+
+ std::vector<std::string> select_col = {"id", "gender"};
+ auto maybe_reader = VertexPropertyArrowChunkReader::Make(
+ graph_info, src_type, select_col, SelectType::PROPERTIES);
+ REQUIRE(maybe_reader.error().IsInvalid());
+ std::cerr << maybe_reader.error().message() << std::endl;
+ }
+
+ SECTION("PropertyPushDown") {
+ std::string filter_property = "gender";
+ auto filter = _Equal(_Property(filter_property), _Literal("female"));
+ std::vector<std::string> expected_cols;
+ expected_cols.push_back("firstName");
+ expected_cols.push_back("lastName");
+
+ SECTION("pushdown column not all in select columns (V1)") {
+ std::vector<std::string> select_col = {"firstName"};
+ std::cout << "pushdown column not all in select columns:\n";
+ std::vector<std::string> expected_cols = {"firstName", "gender"};
+ util::FilterOptions options;
+ options.columns = expected_cols;
+ auto maybe_reader = VertexPropertyArrowChunkReader::Make(
+ graph_info, src_type, select_col, SelectType::PROPERTIES, options);
+ REQUIRE(maybe_reader.status().ok());
+ auto reader = maybe_reader.value();
+ auto result = reader->GetChunk(graphar::GetChunkVersion::V1);
+ REQUIRE(result.error().IsInvalid());
+ std::cerr << result.error().message() << std::endl;
+ }
+
+ SECTION("pushdown column not in select columns (V2)") {
+ std::vector<std::string> select_col = {"lastName", "gender"};
+ std::cout << "pushdown column not in select columns:\n";
+ std::vector<std::string> expected_cols = {"firstName"};
+ util::FilterOptions options;
+ options.columns = expected_cols;
+ auto maybe_reader = VertexPropertyArrowChunkReader::Make(
+ graph_info, src_type, select_col, SelectType::PROPERTIES, options);
+ REQUIRE(maybe_reader.status().ok());
+ auto reader = maybe_reader.value();
+ auto result = reader->GetChunk(graphar::GetChunkVersion::V2);
+ REQUIRE(result.error().IsInvalid());
+ std::cerr << result.error().message() << std::endl;
+ }
+ }
+ }
+
SECTION("AdjListArrowChunkReader") {
auto maybe_reader =
AdjListArrowChunkReader::Make(graph_info, src_type, edge_type,
dst_type,
@@ -511,14 +855,14 @@ TEST_CASE_METHOD(GlobalFixture, "JSON_TEST") {
REQUIRE(e_pg != nullptr);
SECTION("VertexPropertyArrowChunkReader") {
- auto maybe_reader = VertexPropertyArrowChunkReader::Make(
- graph_info, src_type, vertex_property_name);
+ auto maybe_reader =
+ VertexPropertyArrowChunkReader::Make(graph_info, src_type, v_pg);
REQUIRE(maybe_reader.status().ok());
auto reader = maybe_reader.value();
REQUIRE(reader->GetChunkNum() == 10);
SECTION("Basics") {
- auto result = reader->GetChunk();
+ auto result = reader->GetChunk(graphar::GetChunkVersion::V1);
REQUIRE(!result.has_error());
auto table = result.value();
REQUIRE(table->num_rows() == 100);
@@ -527,21 +871,21 @@ TEST_CASE_METHOD(GlobalFixture, "JSON_TEST") {
// seek
REQUIRE(reader->seek(100).ok());
- result = reader->GetChunk();
+ result = reader->GetChunk(graphar::GetChunkVersion::V1);
REQUIRE(!result.has_error());
table = result.value();
REQUIRE(table->num_rows() == 100);
REQUIRE(table->GetColumnByName(GeneralParams::kVertexIndexCol) !=
nullptr);
REQUIRE(reader->next_chunk().ok());
- result = reader->GetChunk();
+ result = reader->GetChunk(graphar::GetChunkVersion::V1);
REQUIRE(!result.has_error());
table = result.value();
REQUIRE(table->num_rows() == 100);
REQUIRE(table->GetColumnByName(GeneralParams::kVertexIndexCol) !=
nullptr);
REQUIRE(reader->seek(900).ok());
- result = reader->GetChunk();
+ result = reader->GetChunk(graphar::GetChunkVersion::V1);
REQUIRE(!result.has_error());
table = result.value();
REQUIRE(table->num_rows() == 3);
diff --git a/cpp/test/test_multi_label.cc b/cpp/test/test_multi_label.cc
index 0f02299d..a77b9ff0 100644
--- a/cpp/test/test_multi_label.cc
+++ b/cpp/test/test_multi_label.cc
@@ -24,6 +24,7 @@
#include "arrow/csv/api.h"
#include "arrow/filesystem/api.h"
#include "arrow/io/api.h"
+#include "graphar/fwd.h"
#include "parquet/arrow/writer.h"
#include "./util.h"
@@ -85,8 +86,8 @@ TEST_CASE_METHOD(GlobalFixture, "test_multi_label_builder") {
REQUIRE(writer->WriteVerticesNum(table->num_rows()).ok());
// read label chunk as arrow table
- auto maybe_reader =
- VertexPropertyArrowChunkReader::Make(graph_info, "organisation", labels);
+ auto maybe_reader = VertexPropertyArrowChunkReader::Make(
+ graph_info, "organisation", labels, SelectType::LABELS);
assert(maybe_reader.status().ok());
auto reader = maybe_reader.value();
assert(reader->seek(0).ok());
diff --git a/docs/libraries/cpp/getting-started.md
b/docs/libraries/cpp/getting-started.md
index d1458dff..a9b9fa03 100644
--- a/docs/libraries/cpp/getting-started.md
+++ b/docs/libraries/cpp/getting-started.md
@@ -351,7 +351,7 @@ You can export label table to disk in parquet format, and
read it back into memo
// read parquet chunk as arrow table
auto maybe_reader =
- VertexPropertyArrowChunkReader::Make(graph_info, "organisation", labels);
+ VertexPropertyArrowChunkReader::MakeForLabels(graph_info,
"organisation", labels, SelectType::LABELS);
assert(maybe_reader.status().ok());
auto reader = maybe_reader.value();
assert(reader->seek(0).ok());
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]