This is an automated email from the ASF dual-hosted git repository.

bchapuis pushed a commit to branch 849-benchmarking
in repository https://gitbox.apache.org/repos/asf/incubator-baremaps.git


The following commit(s) were added to refs/heads/849-benchmarking by this push:
     new f638b9f8 Compare parallel and sequential spliterator
f638b9f8 is described below

commit f638b9f80db97043fe143bdda95df33c3da23c23
Author: Bertil Chapuis <[email protected]>
AuthorDate: Wed Jun 12 18:03:25 2024 +0200

    Compare parallel and sequential spliterator
---
 .../baremaps/benchmarking/SmallFileBenchmark.java       |  8 +++++++-
 .../apache/baremaps/geoparquet/GeoParquetReader.java    | 11 ++++++++++-
 .../geoparquet/GeoParquetSequentialSpliterator.java     | 17 +++++++++--------
 3 files changed, 26 insertions(+), 10 deletions(-)

diff --git 
a/baremaps-benchmarking/src/main/java/org/apache/baremaps/benchmarking/SmallFileBenchmark.java
 
b/baremaps-benchmarking/src/main/java/org/apache/baremaps/benchmarking/SmallFileBenchmark.java
index a9038d5f..546eafd2 100644
--- 
a/baremaps-benchmarking/src/main/java/org/apache/baremaps/benchmarking/SmallFileBenchmark.java
+++ 
b/baremaps-benchmarking/src/main/java/org/apache/baremaps/benchmarking/SmallFileBenchmark.java
@@ -38,7 +38,7 @@ public class SmallFileBenchmark {
   private Path source = 
Path.of("baremaps-testing/data/samples/example.parquet").toAbsolutePath();
   private Path directory = 
Path.of("baremaps-benchmarking/small").toAbsolutePath();
 
-  public static void main(String[] args) throws RunnerException, IOException {
+  public static void main(String[] args) throws RunnerException {
     Options opt = new OptionsBuilder()
         .include(SmallFileBenchmark.class.getSimpleName())
         .forks(1)
@@ -59,6 +59,12 @@ public class SmallFileBenchmark {
 
   @Benchmark
   public void read() {
+    GeoParquetReader reader = new GeoParquetReader(directory.toUri());
+    reader.read().count();
+  }
+
+  @Benchmark
+  public void readParallel() {
     GeoParquetReader reader = new GeoParquetReader(directory.toUri());
     reader.readParallel().count();
   }
diff --git 
a/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetReader.java
 
b/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetReader.java
index 03a07e68..203d5211 100644
--- 
a/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetReader.java
+++ 
b/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetReader.java
@@ -124,6 +124,15 @@ public class GeoParquetReader {
     return files;
   }
 
+  private FileStatus file() {
+    try {
+      FileSystem fileSystem = FileSystem.get(uri, configuration);
+      return fileSystem.getFileStatus(new Path(uri));
+    } catch (IOException e) {
+      throw new GeoParquetException("IOException while attempting to list 
files.", e);
+    }
+  }
+
   private FileInfo buildFileInfo(FileStatus file) throws IOException {
     long recordCount;
     MessageType messageType;
@@ -156,7 +165,7 @@ public class GeoParquetReader {
   }
 
   public Stream<GeoParquetGroup> read() {
-    return retrieveGeoParquetGroups(false);
+    return StreamSupport.stream(new GeoParquetSequentialSpliterator(file(), 
configuration), false);
   }
 
   private static Configuration createConfiguration() {
diff --git 
a/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetSequentialSpliterator.java
 
b/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetSequentialSpliterator.java
index 02af24ca..aa7187f1 100644
--- 
a/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetSequentialSpliterator.java
+++ 
b/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetSequentialSpliterator.java
@@ -71,14 +71,15 @@ public class GeoParquetSequentialSpliterator implements 
Spliterator<GeoParquetGr
 
   @Override
   public Spliterator<GeoParquetGroup> trySplit() {
-    List<GeoParquetGroup> batch = new ArrayList<>();
-    while (batch.size() < 1_000 && tryAdvance(batch::add)) {
-    }
-    if (!batch.isEmpty()) {
-      return Spliterators.spliterator(batch, characteristics() | SIZED);
-    } else {
-      return null;
-    }
+    return null;
+//    List<GeoParquetGroup> batch = new ArrayList<>();
+//    while (batch.size() < 1_000 && tryAdvance(batch::add)) {
+//    }
+//    if (!batch.isEmpty()) {
+//      return Spliterators.spliterator(batch, characteristics() | SIZED);
+//    } else {
+//      return null;
+//    }
   }
 
   @Override

Reply via email to