This is an automated email from the ASF dual-hosted git repository.

bchapuis pushed a commit to branch 849-benchmarking
in repository https://gitbox.apache.org/repos/asf/incubator-baremaps.git


The following commit(s) were added to refs/heads/849-benchmarking by this push:
     new 8e6ec161 Add sequential spliterator
8e6ec161 is described below

commit 8e6ec161178e1d1fd6f5b6de29b3e4209fd812ec
Author: Bertil Chapuis <[email protected]>
AuthorDate: Wed Jun 12 17:57:05 2024 +0200

    Add sequential spliterator
---
 baremaps-benchmarking/pom.xml                      | 112 ++++++++++-----------
 .../benchmarking/GeoParquetReaderBenchmark.java    |  78 +++++++-------
 .../baremaps/benchmarking/SmallFileBenchmark.java  |  72 +++++++------
 .../GeoParquetSequentialSpliterator.java           |  94 +++++++++++++++++
 pom.xml                                            |   2 +-
 5 files changed, 234 insertions(+), 124 deletions(-)

diff --git a/baremaps-benchmarking/pom.xml b/baremaps-benchmarking/pom.xml
index 251b5ea4..ae6708b1 100644
--- a/baremaps-benchmarking/pom.xml
+++ b/baremaps-benchmarking/pom.xml
@@ -1,61 +1,61 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project xmlns="http://maven.apache.org/POM/4.0.0"; 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"; 
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/maven-v4_0_0.xsd";>
-    <modelVersion>4.0.0</modelVersion>
-    <parent>
-        <groupId>org.apache.baremaps</groupId>
-        <artifactId>baremaps</artifactId>
-        <version>0.7.4-SNAPSHOT</version>
-    </parent>
-    <artifactId>baremaps-benchmarking</artifactId>
-    <properties>
-        <jmh.version>1.37</jmh.version>
-        <maven.deploy.skip>true</maven.deploy.skip>
-    </properties>
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.baremaps</groupId>
+    <artifactId>baremaps</artifactId>
+    <version>0.7.4-SNAPSHOT</version>
+  </parent>
+  <artifactId>baremaps-benchmarking</artifactId>
+  <properties>
+    <jmh.version>1.37</jmh.version>
+    <maven.deploy.skip>true</maven.deploy.skip>
+  </properties>
 
-    <dependencies>
-        <dependency>
-            <groupId>org.apache.baremaps</groupId>
-            <artifactId>baremaps-geoparquet</artifactId>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.baremaps</groupId>
-            <artifactId>baremaps-testing</artifactId>
-        </dependency>
-        <dependency>
-            <groupId>org.openjdk.jmh</groupId>
-            <artifactId>jmh-core</artifactId>
-            <version>${jmh.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.openjdk.jmh</groupId>
-            <artifactId>jmh-generator-annprocess</artifactId>
-            <version>${jmh.version}</version>
-        </dependency>
-    </dependencies>
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.baremaps</groupId>
+      <artifactId>baremaps-geoparquet</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.baremaps</groupId>
+      <artifactId>baremaps-testing</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.openjdk.jmh</groupId>
+      <artifactId>jmh-core</artifactId>
+      <version>${jmh.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.openjdk.jmh</groupId>
+      <artifactId>jmh-generator-annprocess</artifactId>
+      <version>${jmh.version}</version>
+    </dependency>
+  </dependencies>
 
-    <build>
-        <plugins>
-            <plugin>
-                <groupId>org.apache.maven.plugins</groupId>
-                <artifactId>maven-shade-plugin</artifactId>
-                <version>3.6.0</version>
-                <executions>
-                    <execution>
-                        <goals>
-                            <goal>shade</goal>
-                        </goals>
-                        <phase>package</phase>
-                        <configuration>
-                            <finalName>benchmarks</finalName>
-                            <transformers>
-                                <transformer 
implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
-                                    <mainClass>org.openjdk.jmh.Main</mainClass>
-                                </transformer>
-                            </transformers>
-                        </configuration>
-                    </execution>
-                </executions>
-            </plugin>
-        </plugins>
-    </build>
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-shade-plugin</artifactId>
+        <version>3.6.0</version>
+        <executions>
+          <execution>
+            <goals>
+              <goal>shade</goal>
+            </goals>
+            <phase>package</phase>
+            <configuration>
+              <finalName>benchmarks</finalName>
+              <transformers>
+                <transformer 
implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
+                  <mainClass>org.openjdk.jmh.Main</mainClass>
+                </transformer>
+              </transformers>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+  </build>
 </project>
diff --git 
a/baremaps-benchmarking/src/main/java/org/apache/baremaps/benchmarking/GeoParquetReaderBenchmark.java
 
b/baremaps-benchmarking/src/main/java/org/apache/baremaps/benchmarking/GeoParquetReaderBenchmark.java
index cd0f15b9..0cb83166 100644
--- 
a/baremaps-benchmarking/src/main/java/org/apache/baremaps/benchmarking/GeoParquetReaderBenchmark.java
+++ 
b/baremaps-benchmarking/src/main/java/org/apache/baremaps/benchmarking/GeoParquetReaderBenchmark.java
@@ -40,49 +40,49 @@ import 
software.amazon.awssdk.services.s3.model.ListObjectsV2Request;
 @Measurement(iterations = 1)
 public class GeoParquetReaderBenchmark {
 
-    private static Path directory = Path.of("baremaps-benchmarks/data");
+  private static Path directory = Path.of("baremaps-benchmarks/data");
 
-    public static void main(String[] args) throws RunnerException {
-        Options opt = new OptionsBuilder()
-                .include(GeoParquetReaderBenchmark.class.getSimpleName())
-                .forks(1)
-                .build();
-        new Runner(opt).run();
-    }
+  public static void main(String[] args) throws RunnerException {
+    Options opt = new OptionsBuilder()
+        .include(GeoParquetReaderBenchmark.class.getSimpleName())
+        .forks(1)
+        .build();
+    new Runner(opt).run();
+  }
 
-    @Setup
-    public void setup() throws IOException {
-        if (!Files.exists(directory)) {
-            try (var client = S3Client.builder()
-                    .region(Region.US_EAST_1)
-                    .credentialsProvider(new AnonymousAWSCredentialsProvider())
-                    .build()) {
-                var listRequest = ListObjectsV2Request.builder()
-                        .bucket("overturemaps-us-west-2")
-                        
.prefix("release/2024-03-12-alpha.0/theme=admins/type=locality_area/")
-                        .build();
-                var objects = client.listObjectsV2(listRequest).contents();
-                for (var object : objects) {
-                    var key = object.key();
-                    var name = key.substring(key.lastIndexOf("/") + 1);
-                    var file = directory.resolve(name);
-                    Files.createDirectories(file.getParent());
-                    if (!Files.exists(file)) {
-                        var getRequest = GetObjectRequest.builder()
-                                .bucket("overturemaps-us-west-2")
-                                .key(key)
-                                .build();
-                        client.getObject(getRequest, file);
-                    }
-                }
-            }
+  @Setup
+  public void setup() throws IOException {
+    if (!Files.exists(directory)) {
+      try (var client = S3Client.builder()
+          .region(Region.US_EAST_1)
+          .credentialsProvider(new AnonymousAWSCredentialsProvider())
+          .build()) {
+        var listRequest = ListObjectsV2Request.builder()
+            .bucket("overturemaps-us-west-2")
+            
.prefix("release/2024-03-12-alpha.0/theme=admins/type=locality_area/")
+            .build();
+        var objects = client.listObjectsV2(listRequest).contents();
+        for (var object : objects) {
+          var key = object.key();
+          var name = key.substring(key.lastIndexOf("/") + 1);
+          var file = directory.resolve(name);
+          Files.createDirectories(file.getParent());
+          if (!Files.exists(file)) {
+            var getRequest = GetObjectRequest.builder()
+                .bucket("overturemaps-us-west-2")
+                .key(key)
+                .build();
+            client.getObject(getRequest, file);
+          }
         }
+      }
     }
+  }
 
-    @Benchmark
-    public void read() {
-        GeoParquetReader reader = new GeoParquetReader(directory.toUri());
-        reader.readParallel().count();
-    }
+  @Benchmark
+  public void read() {
+    GeoParquetReader reader = new GeoParquetReader(directory.toUri());
+    reader.readParallel().count();
+  }
 
 }
diff --git 
a/baremaps-benchmarking/src/main/java/org/apache/baremaps/benchmarking/SmallFileBenchmark.java
 
b/baremaps-benchmarking/src/main/java/org/apache/baremaps/benchmarking/SmallFileBenchmark.java
index 91a3ac46..a9038d5f 100644
--- 
a/baremaps-benchmarking/src/main/java/org/apache/baremaps/benchmarking/SmallFileBenchmark.java
+++ 
b/baremaps-benchmarking/src/main/java/org/apache/baremaps/benchmarking/SmallFileBenchmark.java
@@ -1,5 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package org.apache.baremaps.benchmarking;
 
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.concurrent.TimeUnit;
 import org.apache.baremaps.geoparquet.GeoParquetReader;
 import org.openjdk.jmh.annotations.*;
 import org.openjdk.jmh.runner.Runner;
@@ -7,11 +28,6 @@ import org.openjdk.jmh.runner.RunnerException;
 import org.openjdk.jmh.runner.options.Options;
 import org.openjdk.jmh.runner.options.OptionsBuilder;
 
-import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.util.concurrent.TimeUnit;
-
 @BenchmarkMode(Mode.AverageTime)
 @OutputTimeUnit(TimeUnit.MILLISECONDS)
 @State(Scope.Benchmark)
@@ -19,31 +35,31 @@ import java.util.concurrent.TimeUnit;
 @Measurement(iterations = 1)
 public class SmallFileBenchmark {
 
-    private  Path source = 
Path.of("baremaps-testing/data/samples/example.parquet").toAbsolutePath();
-    private  Path directory = 
Path.of("baremaps-benchmarking/small").toAbsolutePath();
+  private Path source = 
Path.of("baremaps-testing/data/samples/example.parquet").toAbsolutePath();
+  private Path directory = 
Path.of("baremaps-benchmarking/small").toAbsolutePath();
 
-    public static void main(String[] args) throws RunnerException, IOException 
{
-        Options opt = new OptionsBuilder()
-                .include(SmallFileBenchmark.class.getSimpleName())
-                .forks(1)
-                .build();
-        new Runner(opt).run();
-    }
+  public static void main(String[] args) throws RunnerException, IOException {
+    Options opt = new OptionsBuilder()
+        .include(SmallFileBenchmark.class.getSimpleName())
+        .forks(1)
+        .build();
+    new Runner(opt).run();
+  }
 
-    @Setup
-    public void setup() throws IOException {
-        if (!Files.exists(directory)) {
-            for (int i = 0; i < 1000; i++) {
-                Path target = directory.resolve(i + ".parquet");
-                Files.createDirectories(target.getParent());
-                Files.copy(source, target);
-            }
-        }
+  @Setup
+  public void setup() throws IOException {
+    if (!Files.exists(directory)) {
+      for (int i = 0; i < 1000; i++) {
+        Path target = directory.resolve(i + ".parquet");
+        Files.createDirectories(target.getParent());
+        Files.copy(source, target);
+      }
     }
+  }
 
-    @Benchmark
-    public void read() {
-        GeoParquetReader reader = new GeoParquetReader(directory.toUri());
-        reader.readParallel().count();
-    }
+  @Benchmark
+  public void read() {
+    GeoParquetReader reader = new GeoParquetReader(directory.toUri());
+    reader.readParallel().count();
+  }
 }
diff --git 
a/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetSequentialSpliterator.java
 
b/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetSequentialSpliterator.java
new file mode 100644
index 00000000..02af24ca
--- /dev/null
+++ 
b/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetSequentialSpliterator.java
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.baremaps.geoparquet;
+
+import java.io.IOException;
+import java.util.*;
+import java.util.function.Consumer;
+import org.apache.baremaps.geoparquet.data.GeoParquetGroup;
+import org.apache.baremaps.geoparquet.hadoop.GeoParquetGroupReadSupport;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.parquet.hadoop.ParquetReader;
+
+public class GeoParquetSequentialSpliterator implements 
Spliterator<GeoParquetGroup> {
+
+  private ParquetReader<GeoParquetGroup> reader;
+
+  GeoParquetSequentialSpliterator(FileStatus fileStatus, Configuration 
configuration) {
+    try {
+      this.reader = ParquetReader
+          .builder(new GeoParquetGroupReadSupport(), fileStatus.getPath())
+          .withConf(configuration)
+          .build();
+    } catch (IOException e) {
+      throw new GeoParquetException("Failed to create reader for " + 
fileStatus, e);
+    }
+  }
+
+  @Override
+  public boolean tryAdvance(Consumer<? super GeoParquetGroup> action) {
+    try {
+      // Read the next group
+      GeoParquetGroup group = reader.read();
+
+      // If the group is null, close the resources and set the variables to 
null
+      if (group == null) {
+        reader.close();
+        return false;
+      }
+
+      // Accept the group
+      action.accept(group);
+
+      // tell the caller that there are more groups to read
+      return true;
+
+    } catch (IOException e) {
+      try {
+        reader.close();
+      } catch (IOException e2) {
+        // Ignore the exception as the original exception is more important
+      }
+      throw new GeoParquetException("IOException caught while trying to read 
the next file.", e);
+    }
+  }
+
+  @Override
+  public Spliterator<GeoParquetGroup> trySplit() {
+    List<GeoParquetGroup> batch = new ArrayList<>();
+    while (batch.size() < 1_000 && tryAdvance(batch::add)) {
+    }
+    if (!batch.isEmpty()) {
+      return Spliterators.spliterator(batch, characteristics() | SIZED);
+    } else {
+      return null;
+    }
+  }
+
+  @Override
+  public long estimateSize() {
+    return Long.MAX_VALUE;
+  }
+
+  @Override
+  public int characteristics() {
+    // The spliterator is not ordered, or sorted
+    return NONNULL | DISTINCT | IMMUTABLE | SUBSIZED;
+  }
+}
diff --git a/pom.xml b/pom.xml
index 4f7233f7..3642444b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -49,6 +49,7 @@ limitations under the License.
   </developers>
 
   <modules>
+    <module>baremaps-benchmarking</module>
     <module>baremaps-cli</module>
     <module>baremaps-core</module>
     <module>baremaps-data</module>
@@ -58,7 +59,6 @@ limitations under the License.
     <module>baremaps-pmtiles</module>
     <module>baremaps-server</module>
     <module>baremaps-testing</module>
-      <module>baremaps-benchmarking</module>
   </modules>
 
   <scm>

Reply via email to