This is an automated email from the ASF dual-hosted git repository.
bchapuis pushed a commit to branch 849-benchmarking
in repository https://gitbox.apache.org/repos/asf/incubator-baremaps.git
The following commit(s) were added to refs/heads/849-benchmarking by this push:
new f638b9f8 Compare parallel and sequential spliterator
f638b9f8 is described below
commit f638b9f80db97043fe143bdda95df33c3da23c23
Author: Bertil Chapuis <[email protected]>
AuthorDate: Wed Jun 12 18:03:25 2024 +0200
Compare parallel and sequential spliterator
---
.../baremaps/benchmarking/SmallFileBenchmark.java | 8 +++++++-
.../apache/baremaps/geoparquet/GeoParquetReader.java | 11 ++++++++++-
.../geoparquet/GeoParquetSequentialSpliterator.java | 17 +++++++++--------
3 files changed, 26 insertions(+), 10 deletions(-)
diff --git
a/baremaps-benchmarking/src/main/java/org/apache/baremaps/benchmarking/SmallFileBenchmark.java
b/baremaps-benchmarking/src/main/java/org/apache/baremaps/benchmarking/SmallFileBenchmark.java
index a9038d5f..546eafd2 100644
---
a/baremaps-benchmarking/src/main/java/org/apache/baremaps/benchmarking/SmallFileBenchmark.java
+++
b/baremaps-benchmarking/src/main/java/org/apache/baremaps/benchmarking/SmallFileBenchmark.java
@@ -38,7 +38,7 @@ public class SmallFileBenchmark {
private Path source =
Path.of("baremaps-testing/data/samples/example.parquet").toAbsolutePath();
private Path directory =
Path.of("baremaps-benchmarking/small").toAbsolutePath();
- public static void main(String[] args) throws RunnerException, IOException {
+ public static void main(String[] args) throws RunnerException {
Options opt = new OptionsBuilder()
.include(SmallFileBenchmark.class.getSimpleName())
.forks(1)
@@ -59,6 +59,12 @@ public class SmallFileBenchmark {
@Benchmark
public void read() {
+ GeoParquetReader reader = new GeoParquetReader(directory.toUri());
+ reader.read().count();
+ }
+
+ @Benchmark
+ public void readParallel() {
GeoParquetReader reader = new GeoParquetReader(directory.toUri());
reader.readParallel().count();
}
diff --git
a/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetReader.java
b/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetReader.java
index 03a07e68..203d5211 100644
---
a/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetReader.java
+++
b/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetReader.java
@@ -124,6 +124,15 @@ public class GeoParquetReader {
return files;
}
+ private FileStatus file() {
+ try {
+ FileSystem fileSystem = FileSystem.get(uri, configuration);
+ return fileSystem.getFileStatus(new Path(uri));
+ } catch (IOException e) {
+ throw new GeoParquetException("IOException while attempting to list
files.", e);
+ }
+ }
+
private FileInfo buildFileInfo(FileStatus file) throws IOException {
long recordCount;
MessageType messageType;
@@ -156,7 +165,7 @@ public class GeoParquetReader {
}
public Stream<GeoParquetGroup> read() {
- return retrieveGeoParquetGroups(false);
+ return StreamSupport.stream(new GeoParquetSequentialSpliterator(file(),
configuration), false);
}
private static Configuration createConfiguration() {
diff --git
a/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetSequentialSpliterator.java
b/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetSequentialSpliterator.java
index 02af24ca..aa7187f1 100644
---
a/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetSequentialSpliterator.java
+++
b/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetSequentialSpliterator.java
@@ -71,14 +71,15 @@ public class GeoParquetSequentialSpliterator implements
Spliterator<GeoParquetGr
@Override
public Spliterator<GeoParquetGroup> trySplit() {
- List<GeoParquetGroup> batch = new ArrayList<>();
- while (batch.size() < 1_000 && tryAdvance(batch::add)) {
- }
- if (!batch.isEmpty()) {
- return Spliterators.spliterator(batch, characteristics() | SIZED);
- } else {
- return null;
- }
+ return null;
+// List<GeoParquetGroup> batch = new ArrayList<>();
+// while (batch.size() < 1_000 && tryAdvance(batch::add)) {
+// }
+// if (!batch.isEmpty()) {
+// return Spliterators.spliterator(batch, characteristics() | SIZED);
+// } else {
+// return null;
+// }
}
@Override