This is an automated email from the ASF dual-hosted git repository.

bchapuis pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-baremaps.git


The following commit(s) were added to refs/heads/main by this push:
     new f2fb634fb Implement a geoparquet writer (#899)
f2fb634fb is described below

commit f2fb634fb0a7f1c5398d6c7df9b0b9f2b938e8f4
Author: Bertil Chapuis <[email protected]>
AuthorDate: Tue Nov 5 21:54:41 2024 +0100

    Implement a geoparquet writer (#899)
---
 .../geoparquet/OvertureMapsBenchmark.java          |   6 +-
 .../geoparquet/SmallFileBenchmark.java             |   7 +-
 .../storage/geoparquet/GeoParquetDataTable.java    |   3 +-
 .../baremaps/geoparquet/GeoParquetGroup.java       |   2 +-
 .../baremaps/geoparquet/GeoParquetReader.java      |  26 ++--
 .../geoparquet/GeoParquetWriteSupport.java         | 138 ++++++++++++++++++
 .../baremaps/geoparquet/GeoParquetWriter.java      |  98 +++++++++++++
 .../baremaps/geoparquet/GeoParquetReaderTest.java  |  12 +-
 .../baremaps/geoparquet/GeoParquetWriterTest.java  | 154 +++++++++++++++++++++
 .../baremaps/geoparquet/OvertureMapsTest.java      |  32 ++---
 10 files changed, 429 insertions(+), 49 deletions(-)

diff --git 
a/baremaps-benchmarking/src/main/java/org/apache/baremaps/benchmarking/geoparquet/OvertureMapsBenchmark.java
 
b/baremaps-benchmarking/src/main/java/org/apache/baremaps/benchmarking/geoparquet/OvertureMapsBenchmark.java
index a230e41da..59da4d153 100644
--- 
a/baremaps-benchmarking/src/main/java/org/apache/baremaps/benchmarking/geoparquet/OvertureMapsBenchmark.java
+++ 
b/baremaps-benchmarking/src/main/java/org/apache/baremaps/benchmarking/geoparquet/OvertureMapsBenchmark.java
@@ -83,14 +83,16 @@ public class OvertureMapsBenchmark {
   @SuppressWarnings({"squid:S1481", "squid:S2201"})
   @Benchmark
   public void read() {
-    GeoParquetReader reader = new GeoParquetReader(directory.toUri());
+    var path = new org.apache.hadoop.fs.Path(directory.toUri());
+    GeoParquetReader reader = new GeoParquetReader(path);
     reader.read().count();
   }
 
   @SuppressWarnings({"squid:S1481", "squid:S2201"})
   @Benchmark
   public void readParallel() {
-    GeoParquetReader reader = new GeoParquetReader(directory.toUri());
+    var path = new org.apache.hadoop.fs.Path(directory.toUri());
+    GeoParquetReader reader = new GeoParquetReader(path);
     reader.readParallel().count();
   }
 }
diff --git 
a/baremaps-benchmarking/src/main/java/org/apache/baremaps/benchmarking/geoparquet/SmallFileBenchmark.java
 
b/baremaps-benchmarking/src/main/java/org/apache/baremaps/benchmarking/geoparquet/SmallFileBenchmark.java
index 11f468f00..1ae2a7476 100644
--- 
a/baremaps-benchmarking/src/main/java/org/apache/baremaps/benchmarking/geoparquet/SmallFileBenchmark.java
+++ 
b/baremaps-benchmarking/src/main/java/org/apache/baremaps/benchmarking/geoparquet/SmallFileBenchmark.java
@@ -61,16 +61,17 @@ public class SmallFileBenchmark {
   @SuppressWarnings({"squid:S1481", "squid:S2201"})
   @Benchmark
   public void read() {
-    GeoParquetReader reader =
-        new 
GeoParquetReader(Path.of("baremaps-benchmarking/data/small/*.parquet").toUri());
+    var path = new 
org.apache.hadoop.fs.Path("baremaps-benchmarking/data/small/*.parquet");
+    GeoParquetReader reader = new GeoParquetReader(path);
     reader.read().count();
   }
 
   @SuppressWarnings({"squid:S1481", "squid:S2201"})
   @Benchmark
   public void readParallel() {
+    var path = new 
org.apache.hadoop.fs.Path("baremaps-benchmarking/data/small/*.parquet");
     GeoParquetReader reader =
-        new 
GeoParquetReader(Path.of("baremaps-benchmarking/data/small/*.parquet").toUri());
+        new GeoParquetReader(path);
     reader.readParallel().count();
   }
 }
diff --git 
a/baremaps-core/src/main/java/org/apache/baremaps/storage/geoparquet/GeoParquetDataTable.java
 
b/baremaps-core/src/main/java/org/apache/baremaps/storage/geoparquet/GeoParquetDataTable.java
index 90c78d1a7..b3867cfa6 100644
--- 
a/baremaps-core/src/main/java/org/apache/baremaps/storage/geoparquet/GeoParquetDataTable.java
+++ 
b/baremaps-core/src/main/java/org/apache/baremaps/storage/geoparquet/GeoParquetDataTable.java
@@ -24,6 +24,7 @@ import java.util.stream.Stream;
 import org.apache.baremaps.data.storage.*;
 import org.apache.baremaps.geoparquet.GeoParquetException;
 import org.apache.baremaps.geoparquet.GeoParquetReader;
+import org.apache.hadoop.fs.Path;
 
 public class GeoParquetDataTable implements DataTable {
 
@@ -35,7 +36,7 @@ public class GeoParquetDataTable implements DataTable {
 
   public GeoParquetDataTable(URI path) {
     this.path = path;
-    this.reader = new GeoParquetReader(path);
+    this.reader = new GeoParquetReader(new Path(path));
   }
 
   @Override
diff --git 
a/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetGroup.java
 
b/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetGroup.java
index 38c6dd746..e524a229f 100644
--- 
a/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetGroup.java
+++ 
b/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetGroup.java
@@ -105,7 +105,7 @@ public class GeoParquetGroup {
     }
   }
 
-  private Object getValue(int fieldIndex, int index) {
+  Object getValue(int fieldIndex, int index) {
     Object value = data[fieldIndex];
     if (value instanceof List<?>list) {
       return list.get(index);
diff --git 
a/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetReader.java
 
b/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetReader.java
index 8b4697aa1..e965d250a 100644
--- 
a/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetReader.java
+++ 
b/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetReader.java
@@ -20,7 +20,6 @@ package org.apache.baremaps.geoparquet;
 import com.fasterxml.jackson.databind.DeserializationFeature;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import java.io.IOException;
-import java.net.URI;
 import java.util.*;
 import java.util.concurrent.atomic.AtomicLong;
 import java.util.stream.Collectors;
@@ -53,31 +52,31 @@ public class GeoParquetReader {
   /**
    * Constructs a new {@code GeoParquetReader}.
    *
-   * @param uri the URI to read from
+   * @param path the path to read from
    */
-  public GeoParquetReader(URI uri) {
-    this(uri, null, new Configuration());
+  public GeoParquetReader(Path path) {
+    this(path, null, new Configuration());
   }
 
   /**
    * Constructs a new {@code GeoParquetReader}.
    *
-   * @param uri the URI to read from
+   * @param path the path to read from
    * @param envelope the envelope to filter records
    */
-  public GeoParquetReader(URI uri, Envelope envelope) {
-    this(uri, envelope, new Configuration());
+  public GeoParquetReader(Path path, Envelope envelope) {
+    this(path, envelope, new Configuration());
   }
 
   /**
    * Constructs a new {@code GeoParquetReader}.
    *
-   * @param uri the URI to read from
+   * @param path the path to read from
    * @param configuration the configuration
    */
-  public GeoParquetReader(URI uri, Envelope envelope, Configuration 
configuration) {
+  public GeoParquetReader(Path path, Envelope envelope, Configuration 
configuration) {
     this.configuration = configuration;
-    this.files = initializeFiles(uri, configuration);
+    this.files = initializeFiles(path, configuration);
     this.envelope = envelope;
   }
 
@@ -168,11 +167,10 @@ public class GeoParquetReader {
     }
   }
 
-  private static List<FileStatus> initializeFiles(URI uri, Configuration 
configuration) {
+  private static List<FileStatus> initializeFiles(Path path, Configuration 
configuration) {
     try {
-      Path globPath = new Path(uri.getPath());
-      FileSystem fileSystem = FileSystem.get(uri, configuration);
-      FileStatus[] fileStatuses = fileSystem.globStatus(globPath);
+      FileSystem fileSystem = FileSystem.get(path.toUri(), configuration);
+      FileStatus[] fileStatuses = fileSystem.globStatus(path);
       if (fileStatuses == null) {
         throw new GeoParquetException("No files found at the specified URI.");
       }
diff --git 
a/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetWriteSupport.java
 
b/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetWriteSupport.java
new file mode 100644
index 000000000..b6be38165
--- /dev/null
+++ 
b/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetWriteSupport.java
@@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.baremaps.geoparquet;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import java.util.HashMap;
+import java.util.Map;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.parquet.hadoop.api.WriteSupport;
+import org.apache.parquet.io.api.Binary;
+import org.apache.parquet.io.api.RecordConsumer;
+import org.apache.parquet.schema.*;
+
+/**
+ * WriteSupport implementation for writing GeoParquetGroup instances to 
Parquet.
+ */
+public class GeoParquetWriteSupport extends WriteSupport<GeoParquetGroup> {
+
+  private Configuration configuration;
+  private final MessageType schema;
+  private final GeoParquetMetadata metadata;
+  private RecordConsumer recordConsumer;
+  private final ObjectMapper objectMapper = new ObjectMapper();
+
+  /**
+   * Constructs a new GeoParquetWriteSupport.
+   *
+   * @param schema the Parquet schema
+   * @param metadata the GeoParquet metadata
+   */
+  public GeoParquetWriteSupport(MessageType schema, GeoParquetMetadata 
metadata) {
+    this.schema = schema;
+    this.metadata = metadata;
+  }
+
+  @Override
+  public WriteContext init(Configuration configuration) {
+    Map<String, String> extraMetadata = new HashMap<>();
+    String geoMetadataJson = serializeMetadata(metadata);
+    extraMetadata.put("geo", geoMetadataJson);
+    return new WriteContext(schema, extraMetadata);
+  }
+
+  @Override
+  public void prepareForWrite(RecordConsumer recordConsumer) {
+    this.recordConsumer = recordConsumer;
+  }
+
+  @Override
+  public void write(GeoParquetGroup group) {
+    recordConsumer.startMessage();
+    writeGroup(group, schema, true);
+    recordConsumer.endMessage();
+  }
+
+  private void writeGroup(GeoParquetGroup group, GroupType groupType, boolean 
isRoot) {
+    if (!isRoot) {
+      recordConsumer.startGroup();
+    }
+    for (int i = 0; i < groupType.getFieldCount(); i++) {
+      Type fieldType = groupType.getType(i);
+      String fieldName = fieldType.getName();
+      int repetitionCount = group.getFieldRepetitionCount(i);
+      if (repetitionCount == 0) {
+        continue; // Skip if no values are present
+      }
+      for (int j = 0; j < repetitionCount; j++) {
+        recordConsumer.startField(fieldName, i);
+        if (fieldType.isPrimitive()) {
+          Object value = group.getValue(i, j);
+          writePrimitive(value, fieldType.asPrimitiveType());
+        } else {
+          GeoParquetGroup childGroup = group.getGroup(i, j);
+          writeGroup(childGroup, fieldType.asGroupType(), false);
+        }
+        recordConsumer.endField(fieldName, i);
+      }
+    }
+    if (!isRoot) {
+      recordConsumer.endGroup();
+    }
+  }
+
+  private void writePrimitive(Object value, PrimitiveType primitiveType) {
+    if (value == null) {
+      // The Parquet format does not support writing null values directly.
+      // If the field is optional and the value is null, we simply do not 
write it.
+      return;
+    }
+    switch (primitiveType.getPrimitiveTypeName()) {
+      case INT32:
+        recordConsumer.addInteger((Integer) value);
+        break;
+      case INT64:
+        recordConsumer.addLong((Long) value);
+        break;
+      case FLOAT:
+        recordConsumer.addFloat((Float) value);
+        break;
+      case DOUBLE:
+        recordConsumer.addDouble((Double) value);
+        break;
+      case BOOLEAN:
+        recordConsumer.addBoolean((Boolean) value);
+        break;
+      case BINARY, FIXED_LEN_BYTE_ARRAY:
+        recordConsumer.addBinary((Binary) value);
+        break;
+      default:
+        throw new GeoParquetException(
+            "Unsupported type: " + primitiveType.getPrimitiveTypeName());
+    }
+  }
+
+  private String serializeMetadata(GeoParquetMetadata metadata) {
+    try {
+      return objectMapper.writeValueAsString(metadata);
+    } catch (JsonProcessingException e) {
+      throw new GeoParquetException("Failed to serialize GeoParquet metadata", 
e);
+    }
+  }
+}
diff --git 
a/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetWriter.java
 
b/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetWriter.java
new file mode 100644
index 000000000..e2e4292a8
--- /dev/null
+++ 
b/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetWriter.java
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.baremaps.geoparquet;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.parquet.conf.ParquetConfiguration;
+import org.apache.parquet.hadoop.ParquetWriter;
+import org.apache.parquet.hadoop.api.WriteSupport;
+import org.apache.parquet.schema.MessageType;
+
+/**
+ * A writer for GeoParquet files that writes GeoParquetGroup instances to a 
Parquet file.
+ */
+public class GeoParquetWriter {
+
+  private GeoParquetWriter() {
+    // Prevent instantiation
+  }
+
+  public static Builder builder(Path file) {
+    return new Builder(file);
+  }
+
+  public static class Builder
+      extends ParquetWriter.Builder<GeoParquetGroup, GeoParquetWriter.Builder> 
{
+
+    private MessageType type = null;
+
+    private GeoParquetMetadata metadata = null;
+
+    private Builder(Path file) {
+      super(file);
+    }
+
+    /**
+     * Replace the message type with the specified one.
+     *
+     * @param type the message type
+     * @return the builder
+     */
+    public GeoParquetWriter.Builder withType(MessageType type) {
+      this.type = type;
+      return this;
+    }
+
+    /**
+     * Replace the metadata with the specified one.
+     *
+     * @param metadata the metadata
+     * @return the builder
+     */
+    public GeoParquetWriter.Builder withGeoParquetMetadata(GeoParquetMetadata 
metadata) {
+      this.metadata = metadata;
+      return this;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Override
+    protected WriteSupport<GeoParquetGroup> getWriteSupport(Configuration 
conf) {
+      // We don't need access to the hadoop configuration for now
+      return getWriteSupport((ParquetConfiguration) null);
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Override
+    protected WriteSupport<GeoParquetGroup> 
getWriteSupport(ParquetConfiguration conf) {
+      return new GeoParquetWriteSupport(type, metadata);
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Override
+    protected GeoParquetWriter.Builder self() {
+      return this;
+    }
+  }
+}
diff --git 
a/baremaps-geoparquet/src/test/java/org/apache/baremaps/geoparquet/GeoParquetReaderTest.java
 
b/baremaps-geoparquet/src/test/java/org/apache/baremaps/geoparquet/GeoParquetReaderTest.java
index 849720a3e..b7b73a9d5 100644
--- 
a/baremaps-geoparquet/src/test/java/org/apache/baremaps/geoparquet/GeoParquetReaderTest.java
+++ 
b/baremaps-geoparquet/src/test/java/org/apache/baremaps/geoparquet/GeoParquetReaderTest.java
@@ -19,8 +19,8 @@ package org.apache.baremaps.geoparquet;
 
 import static org.junit.jupiter.api.Assertions.*;
 
-import java.net.URI;
 import org.apache.baremaps.testing.TestFiles;
+import org.apache.hadoop.fs.Path;
 import org.junit.jupiter.api.Test;
 import org.locationtech.jts.geom.Envelope;
 
@@ -28,14 +28,14 @@ class GeoParquetReaderTest {
 
   @Test
   void read() {
-    URI geoParquet = TestFiles.GEOPARQUET.toUri();
+    Path geoParquet = new Path(TestFiles.GEOPARQUET.toUri());
     GeoParquetReader geoParquetReader = new GeoParquetReader(geoParquet);
     assertEquals(5, geoParquetReader.read().count());
   }
 
   @Test
   void readFiltered() {
-    URI geoParquet = TestFiles.GEOPARQUET.toUri();
+    Path geoParquet = new Path(TestFiles.GEOPARQUET.toUri());
     GeoParquetReader geoParquetReader =
         new GeoParquetReader(geoParquet, new Envelope(-172, -65, 18, 72));
     assertEquals(1, geoParquetReader.read().count());
@@ -43,21 +43,21 @@ class GeoParquetReaderTest {
 
   @Test
   void size() {
-    URI geoParquet = TestFiles.GEOPARQUET.toUri();
+    Path geoParquet = new Path(TestFiles.GEOPARQUET.toUri());
     GeoParquetReader geoParquetReader = new GeoParquetReader(geoParquet);
     assertEquals(5, geoParquetReader.size());
   }
 
   @Test
   void count() {
-    URI geoParquet = TestFiles.GEOPARQUET.toUri();
+    Path geoParquet = new Path(TestFiles.GEOPARQUET.toUri());
     GeoParquetReader geoParquetReader = new GeoParquetReader(geoParquet);
     assertEquals(5, geoParquetReader.read().count());
   }
 
   @Test
   void validateSchemas() {
-    URI geoParquet = TestFiles.GEOPARQUET.toUri();
+    Path geoParquet = new Path(TestFiles.GEOPARQUET.toUri());
     GeoParquetReader geoParquetReader = new GeoParquetReader(geoParquet);
     assertTrue(geoParquetReader.validateSchemasAreIdentical());
   }
diff --git 
a/baremaps-geoparquet/src/test/java/org/apache/baremaps/geoparquet/GeoParquetWriterTest.java
 
b/baremaps-geoparquet/src/test/java/org/apache/baremaps/geoparquet/GeoParquetWriterTest.java
new file mode 100644
index 000000000..f1c577fd7
--- /dev/null
+++ 
b/baremaps-geoparquet/src/test/java/org/apache/baremaps/geoparquet/GeoParquetWriterTest.java
@@ -0,0 +1,154 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.baremaps.geoparquet;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import org.apache.baremaps.geoparquet.GeoParquetMetadata.Column;
+import org.apache.baremaps.testing.TestFiles;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.parquet.hadoop.ParquetWriter;
+import org.apache.parquet.schema.LogicalTypeAnnotation;
+import org.apache.parquet.schema.MessageType;
+import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
+import org.apache.parquet.schema.Types;
+import org.junit.jupiter.api.Tag;
+import org.junit.jupiter.api.Test;
+import org.locationtech.jts.geom.Coordinate;
+import org.locationtech.jts.geom.GeometryFactory;
+import org.locationtech.jts.geom.Point;
+
+class GeoParquetWriterTest {
+
+  @Test
+  @Tag("integration")
+  void testWriteAndReadGeoParquet() throws IOException {
+    // Create the output file
+    Configuration conf = new Configuration();
+    Path outputPath = new Path("target/test-output/geoparquet-test.parquet");
+
+    try {
+      // Define the Parquet schema
+      MessageType type = Types.buildMessage()
+          
.required(PrimitiveTypeName.BINARY).as(LogicalTypeAnnotation.stringType()).named("name")
+          
.required(PrimitiveTypeName.BINARY).as(LogicalTypeAnnotation.stringType()).named("city")
+          .optional(PrimitiveTypeName.BINARY).named("geometry")
+          .named("GeoParquetSchema");
+
+      // Create GeoParquet metadata
+      Map<String, Column> columns = new HashMap<>();
+      columns.put("geometry", new GeoParquetMetadata.Column(
+          "WKB",
+          List.of("Point"),
+          null,
+          null,
+          null,
+          null));
+
+      GeoParquetMetadata metadata = new GeoParquetMetadata(
+          "1.0",
+          "geometry",
+          columns,
+          null,
+          null,
+          null,
+          null,
+          null,
+          null,
+          null);
+
+      // Create a Point geometry
+      GeometryFactory geometryFactory = new GeometryFactory();
+      Point point = geometryFactory.createPoint(new Coordinate(1.0, 2.0));
+
+      // Create the GeoParquetWriter
+      try (ParquetWriter<GeoParquetGroup> writer = 
GeoParquetWriter.builder(outputPath)
+          .withType(type)
+          .withGeoParquetMetadata(metadata)
+          .build()) {
+
+        // Create a GeoParquetGroup and write it
+        GeoParquetSchema geoParquetSchema =
+            GeoParquetGroupFactory.createGeoParquetSchema(type, metadata);
+        GeoParquetGroup group =
+            new GeoParquetGroup(type.asGroupType(), metadata, 
geoParquetSchema);
+        group.add("name", "Test Point");
+        group.add("city", "Test City");
+        group.add("geometry", point);
+
+        // Write the group
+        writer.write(group);
+      }
+
+      // Now read back the file using GeoParquetReader
+      GeoParquetReader reader = new GeoParquetReader(outputPath, null, conf);
+      GeoParquetGroup readGroup = reader.read().findFirst().orElse(null);
+
+      assertNotNull(readGroup, "Read group should not be null");
+
+      // Verify the data
+      assertEquals("Test Point", readGroup.getStringValue("name"));
+      assertEquals("Test City", readGroup.getStringValue("city"));
+
+      Point readPoint = (Point) readGroup.getGeometryValue("geometry");
+      assertEquals(point.getX(), readPoint.getX(), 0.0001);
+      assertEquals(point.getY(), readPoint.getY(), 0.0001);
+    } finally {
+      outputPath.getFileSystem(conf).delete(outputPath, false);
+    }
+  }
+
+  @Test
+  @Tag("integration")
+  void copyGeoParquetData() throws IOException {
+    Path geoParquet = new Path(TestFiles.GEOPARQUET.toUri());
+
+    Configuration conf = new Configuration();
+    Path outputPath = new Path("target/test-output/geoparquet-copy.parquet");
+
+    try {
+      // Write the GeoParquet file
+      GeoParquetReader reader = new GeoParquetReader(geoParquet, null, conf);
+      GeoParquetWriter.Builder builder = GeoParquetWriter.builder(outputPath);
+      ParquetWriter<GeoParquetGroup> writer = 
builder.withType(reader.getParquetSchema())
+          .withGeoParquetMetadata(reader.getGeoParquetMetadata()).build();
+      Iterator<GeoParquetGroup> iterator = reader.read().iterator();
+      while (iterator.hasNext()) {
+        writer.write(iterator.next());
+      }
+      writer.close();
+
+      // Read the copied file
+      GeoParquetReader copiedReader = new GeoParquetReader(outputPath, null, 
conf);
+      assertEquals(5, copiedReader.read().count());
+    } finally {
+      outputPath.getFileSystem(conf).delete(outputPath, false);
+    }
+
+
+
+  }
+
+}
diff --git 
a/baremaps-geoparquet/src/test/java/org/apache/baremaps/geoparquet/OvertureMapsTest.java
 
b/baremaps-geoparquet/src/test/java/org/apache/baremaps/geoparquet/OvertureMapsTest.java
index 0fb5b9205..206aa9f19 100644
--- 
a/baremaps-geoparquet/src/test/java/org/apache/baremaps/geoparquet/OvertureMapsTest.java
+++ 
b/baremaps-geoparquet/src/test/java/org/apache/baremaps/geoparquet/OvertureMapsTest.java
@@ -20,8 +20,7 @@ package org.apache.baremaps.geoparquet;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
-import java.net.URI;
-import java.net.URISyntaxException;
+import org.apache.hadoop.fs.Path;
 import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Test;
 import org.locationtech.jts.geom.Envelope;
@@ -30,9 +29,9 @@ class OvertureMapsTest {
 
   @Disabled("Requires access to the Internet")
   @Test
-  void countAddresses() throws URISyntaxException {
-    URI geoParquet = new URI(
-        
"s3a://overturemaps-us-west-2/release/2024-09-18.0/theme=addresses/**/*.parquet");
+  void countAddressesInSwitzerland() {
+    Path geoParquet =
+        new 
Path("s3a://overturemaps-us-west-2/release/2024-09-18.0/theme=addresses/**/*.parquet");
     Envelope switzerland = new Envelope(6.02260949059, 10.4427014502, 
45.7769477403, 47.8308275417);
     GeoParquetReader geoParquetReader =
         new GeoParquetReader(geoParquet, switzerland, 
OvertureMaps.configuration());
@@ -41,20 +40,9 @@ class OvertureMapsTest {
 
   @Disabled("Requires access to the Internet")
   @Test
-  void countAddressesInSwitzerland() throws URISyntaxException {
-    URI geoParquet = new URI(
-        
"s3a://overturemaps-us-west-2/release/2024-09-18.0/theme=addresses/**/*.parquet");
-    Envelope switzerland = new Envelope(6.02260949059, 10.4427014502, 
45.7769477403, 47.8308275417);
-    GeoParquetReader geoParquetReader =
-        new GeoParquetReader(geoParquet, switzerland, 
OvertureMaps.configuration());
-    assertEquals(10397434, geoParquetReader.readParallel().count());
-  }
-
-  @Disabled("Requires access to the Internet")
-  @Test
-  void validateSchemas() throws URISyntaxException {
-    URI geoParquet = new URI(
-        
"s3a://overturemaps-us-west-2/release/2024-09-18.0/theme=addresses/**/*.parquet");
+  void validateSchemas() {
+    Path geoParquet =
+        new 
Path("s3a://overturemaps-us-west-2/release/2024-09-18.0/theme=addresses/**/*.parquet");
     GeoParquetReader geoParquetReader =
         new GeoParquetReader(geoParquet, null, OvertureMaps.configuration());
     assertTrue(geoParquetReader.validateSchemasAreIdentical(), "Schemas are 
identical");
@@ -62,9 +50,9 @@ class OvertureMapsTest {
 
   @Disabled("Requires access to the Internet")
   @Test
-  void size() throws URISyntaxException {
-    URI geoParquet = new URI(
-        
"s3a://overturemaps-us-west-2/release/2024-09-18.0/theme=addresses/**/*.parquet");
+  void size() {
+    Path geoParquet =
+        new 
Path("s3a://overturemaps-us-west-2/release/2024-09-18.0/theme=addresses/**/*.parquet");
     GeoParquetReader geoParquetReader =
         new GeoParquetReader(geoParquet, null, OvertureMaps.configuration());
     assertEquals(213535887L, geoParquetReader.size());

Reply via email to