This is an automated email from the ASF dual-hosted git repository.
bchapuis pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-baremaps.git
The following commit(s) were added to refs/heads/main by this push:
new f2fb634fb Implement a geoparquet writer (#899)
f2fb634fb is described below
commit f2fb634fb0a7f1c5398d6c7df9b0b9f2b938e8f4
Author: Bertil Chapuis <[email protected]>
AuthorDate: Tue Nov 5 21:54:41 2024 +0100
Implement a geoparquet writer (#899)
---
.../geoparquet/OvertureMapsBenchmark.java | 6 +-
.../geoparquet/SmallFileBenchmark.java | 7 +-
.../storage/geoparquet/GeoParquetDataTable.java | 3 +-
.../baremaps/geoparquet/GeoParquetGroup.java | 2 +-
.../baremaps/geoparquet/GeoParquetReader.java | 26 ++--
.../geoparquet/GeoParquetWriteSupport.java | 138 ++++++++++++++++++
.../baremaps/geoparquet/GeoParquetWriter.java | 98 +++++++++++++
.../baremaps/geoparquet/GeoParquetReaderTest.java | 12 +-
.../baremaps/geoparquet/GeoParquetWriterTest.java | 154 +++++++++++++++++++++
.../baremaps/geoparquet/OvertureMapsTest.java | 32 ++---
10 files changed, 429 insertions(+), 49 deletions(-)
diff --git
a/baremaps-benchmarking/src/main/java/org/apache/baremaps/benchmarking/geoparquet/OvertureMapsBenchmark.java
b/baremaps-benchmarking/src/main/java/org/apache/baremaps/benchmarking/geoparquet/OvertureMapsBenchmark.java
index a230e41da..59da4d153 100644
---
a/baremaps-benchmarking/src/main/java/org/apache/baremaps/benchmarking/geoparquet/OvertureMapsBenchmark.java
+++
b/baremaps-benchmarking/src/main/java/org/apache/baremaps/benchmarking/geoparquet/OvertureMapsBenchmark.java
@@ -83,14 +83,16 @@ public class OvertureMapsBenchmark {
@SuppressWarnings({"squid:S1481", "squid:S2201"})
@Benchmark
public void read() {
- GeoParquetReader reader = new GeoParquetReader(directory.toUri());
+ var path = new org.apache.hadoop.fs.Path(directory.toUri());
+ GeoParquetReader reader = new GeoParquetReader(path);
reader.read().count();
}
@SuppressWarnings({"squid:S1481", "squid:S2201"})
@Benchmark
public void readParallel() {
- GeoParquetReader reader = new GeoParquetReader(directory.toUri());
+ var path = new org.apache.hadoop.fs.Path(directory.toUri());
+ GeoParquetReader reader = new GeoParquetReader(path);
reader.readParallel().count();
}
}
diff --git
a/baremaps-benchmarking/src/main/java/org/apache/baremaps/benchmarking/geoparquet/SmallFileBenchmark.java
b/baremaps-benchmarking/src/main/java/org/apache/baremaps/benchmarking/geoparquet/SmallFileBenchmark.java
index 11f468f00..1ae2a7476 100644
---
a/baremaps-benchmarking/src/main/java/org/apache/baremaps/benchmarking/geoparquet/SmallFileBenchmark.java
+++
b/baremaps-benchmarking/src/main/java/org/apache/baremaps/benchmarking/geoparquet/SmallFileBenchmark.java
@@ -61,16 +61,17 @@ public class SmallFileBenchmark {
@SuppressWarnings({"squid:S1481", "squid:S2201"})
@Benchmark
public void read() {
- GeoParquetReader reader =
- new
GeoParquetReader(Path.of("baremaps-benchmarking/data/small/*.parquet").toUri());
+ var path = new
org.apache.hadoop.fs.Path("baremaps-benchmarking/data/small/*.parquet");
+ GeoParquetReader reader = new GeoParquetReader(path);
reader.read().count();
}
@SuppressWarnings({"squid:S1481", "squid:S2201"})
@Benchmark
public void readParallel() {
+ var path = new
org.apache.hadoop.fs.Path("baremaps-benchmarking/data/small/*.parquet");
GeoParquetReader reader =
- new
GeoParquetReader(Path.of("baremaps-benchmarking/data/small/*.parquet").toUri());
+ new GeoParquetReader(path);
reader.readParallel().count();
}
}
diff --git
a/baremaps-core/src/main/java/org/apache/baremaps/storage/geoparquet/GeoParquetDataTable.java
b/baremaps-core/src/main/java/org/apache/baremaps/storage/geoparquet/GeoParquetDataTable.java
index 90c78d1a7..b3867cfa6 100644
---
a/baremaps-core/src/main/java/org/apache/baremaps/storage/geoparquet/GeoParquetDataTable.java
+++
b/baremaps-core/src/main/java/org/apache/baremaps/storage/geoparquet/GeoParquetDataTable.java
@@ -24,6 +24,7 @@ import java.util.stream.Stream;
import org.apache.baremaps.data.storage.*;
import org.apache.baremaps.geoparquet.GeoParquetException;
import org.apache.baremaps.geoparquet.GeoParquetReader;
+import org.apache.hadoop.fs.Path;
public class GeoParquetDataTable implements DataTable {
@@ -35,7 +36,7 @@ public class GeoParquetDataTable implements DataTable {
public GeoParquetDataTable(URI path) {
this.path = path;
- this.reader = new GeoParquetReader(path);
+ this.reader = new GeoParquetReader(new Path(path));
}
@Override
diff --git
a/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetGroup.java
b/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetGroup.java
index 38c6dd746..e524a229f 100644
---
a/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetGroup.java
+++
b/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetGroup.java
@@ -105,7 +105,7 @@ public class GeoParquetGroup {
}
}
- private Object getValue(int fieldIndex, int index) {
+ Object getValue(int fieldIndex, int index) {
Object value = data[fieldIndex];
if (value instanceof List<?>list) {
return list.get(index);
diff --git
a/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetReader.java
b/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetReader.java
index 8b4697aa1..e965d250a 100644
---
a/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetReader.java
+++
b/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetReader.java
@@ -20,7 +20,6 @@ package org.apache.baremaps.geoparquet;
import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
import java.io.IOException;
-import java.net.URI;
import java.util.*;
import java.util.concurrent.atomic.AtomicLong;
import java.util.stream.Collectors;
@@ -53,31 +52,31 @@ public class GeoParquetReader {
/**
* Constructs a new {@code GeoParquetReader}.
*
- * @param uri the URI to read from
+ * @param path the path to read from
*/
- public GeoParquetReader(URI uri) {
- this(uri, null, new Configuration());
+ public GeoParquetReader(Path path) {
+ this(path, null, new Configuration());
}
/**
* Constructs a new {@code GeoParquetReader}.
*
- * @param uri the URI to read from
+ * @param path the path to read from
* @param envelope the envelope to filter records
*/
- public GeoParquetReader(URI uri, Envelope envelope) {
- this(uri, envelope, new Configuration());
+ public GeoParquetReader(Path path, Envelope envelope) {
+ this(path, envelope, new Configuration());
}
/**
* Constructs a new {@code GeoParquetReader}.
*
- * @param uri the URI to read from
+ * @param path the path to read from
* @param configuration the configuration
*/
- public GeoParquetReader(URI uri, Envelope envelope, Configuration
configuration) {
+ public GeoParquetReader(Path path, Envelope envelope, Configuration
configuration) {
this.configuration = configuration;
- this.files = initializeFiles(uri, configuration);
+ this.files = initializeFiles(path, configuration);
this.envelope = envelope;
}
@@ -168,11 +167,10 @@ public class GeoParquetReader {
}
}
- private static List<FileStatus> initializeFiles(URI uri, Configuration
configuration) {
+ private static List<FileStatus> initializeFiles(Path path, Configuration
configuration) {
try {
- Path globPath = new Path(uri.getPath());
- FileSystem fileSystem = FileSystem.get(uri, configuration);
- FileStatus[] fileStatuses = fileSystem.globStatus(globPath);
+ FileSystem fileSystem = FileSystem.get(path.toUri(), configuration);
+ FileStatus[] fileStatuses = fileSystem.globStatus(path);
if (fileStatuses == null) {
throw new GeoParquetException("No files found at the specified URI.");
}
diff --git
a/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetWriteSupport.java
b/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetWriteSupport.java
new file mode 100644
index 000000000..b6be38165
--- /dev/null
+++
b/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetWriteSupport.java
@@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.baremaps.geoparquet;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import java.util.HashMap;
+import java.util.Map;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.parquet.hadoop.api.WriteSupport;
+import org.apache.parquet.io.api.Binary;
+import org.apache.parquet.io.api.RecordConsumer;
+import org.apache.parquet.schema.*;
+
+/**
+ * WriteSupport implementation for writing GeoParquetGroup instances to
Parquet.
+ */
+public class GeoParquetWriteSupport extends WriteSupport<GeoParquetGroup> {
+
+ private Configuration configuration;
+ private final MessageType schema;
+ private final GeoParquetMetadata metadata;
+ private RecordConsumer recordConsumer;
+ private final ObjectMapper objectMapper = new ObjectMapper();
+
+ /**
+ * Constructs a new GeoParquetWriteSupport.
+ *
+ * @param schema the Parquet schema
+ * @param metadata the GeoParquet metadata
+ */
+ public GeoParquetWriteSupport(MessageType schema, GeoParquetMetadata
metadata) {
+ this.schema = schema;
+ this.metadata = metadata;
+ }
+
+ @Override
+ public WriteContext init(Configuration configuration) {
+ Map<String, String> extraMetadata = new HashMap<>();
+ String geoMetadataJson = serializeMetadata(metadata);
+ extraMetadata.put("geo", geoMetadataJson);
+ return new WriteContext(schema, extraMetadata);
+ }
+
+ @Override
+ public void prepareForWrite(RecordConsumer recordConsumer) {
+ this.recordConsumer = recordConsumer;
+ }
+
+ @Override
+ public void write(GeoParquetGroup group) {
+ recordConsumer.startMessage();
+ writeGroup(group, schema, true);
+ recordConsumer.endMessage();
+ }
+
+ private void writeGroup(GeoParquetGroup group, GroupType groupType, boolean
isRoot) {
+ if (!isRoot) {
+ recordConsumer.startGroup();
+ }
+ for (int i = 0; i < groupType.getFieldCount(); i++) {
+ Type fieldType = groupType.getType(i);
+ String fieldName = fieldType.getName();
+ int repetitionCount = group.getFieldRepetitionCount(i);
+ if (repetitionCount == 0) {
+ continue; // Skip if no values are present
+ }
+ for (int j = 0; j < repetitionCount; j++) {
+ recordConsumer.startField(fieldName, i);
+ if (fieldType.isPrimitive()) {
+ Object value = group.getValue(i, j);
+ writePrimitive(value, fieldType.asPrimitiveType());
+ } else {
+ GeoParquetGroup childGroup = group.getGroup(i, j);
+ writeGroup(childGroup, fieldType.asGroupType(), false);
+ }
+ recordConsumer.endField(fieldName, i);
+ }
+ }
+ if (!isRoot) {
+ recordConsumer.endGroup();
+ }
+ }
+
+ private void writePrimitive(Object value, PrimitiveType primitiveType) {
+ if (value == null) {
+ // The Parquet format does not support writing null values directly.
+ // If the field is optional and the value is null, we simply do not
write it.
+ return;
+ }
+ switch (primitiveType.getPrimitiveTypeName()) {
+ case INT32:
+ recordConsumer.addInteger((Integer) value);
+ break;
+ case INT64:
+ recordConsumer.addLong((Long) value);
+ break;
+ case FLOAT:
+ recordConsumer.addFloat((Float) value);
+ break;
+ case DOUBLE:
+ recordConsumer.addDouble((Double) value);
+ break;
+ case BOOLEAN:
+ recordConsumer.addBoolean((Boolean) value);
+ break;
+ case BINARY, FIXED_LEN_BYTE_ARRAY:
+ recordConsumer.addBinary((Binary) value);
+ break;
+ default:
+ throw new GeoParquetException(
+ "Unsupported type: " + primitiveType.getPrimitiveTypeName());
+ }
+ }
+
+ private String serializeMetadata(GeoParquetMetadata metadata) {
+ try {
+ return objectMapper.writeValueAsString(metadata);
+ } catch (JsonProcessingException e) {
+ throw new GeoParquetException("Failed to serialize GeoParquet metadata",
e);
+ }
+ }
+}
diff --git
a/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetWriter.java
b/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetWriter.java
new file mode 100644
index 000000000..e2e4292a8
--- /dev/null
+++
b/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetWriter.java
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.baremaps.geoparquet;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.parquet.conf.ParquetConfiguration;
+import org.apache.parquet.hadoop.ParquetWriter;
+import org.apache.parquet.hadoop.api.WriteSupport;
+import org.apache.parquet.schema.MessageType;
+
+/**
+ * A writer for GeoParquet files that writes GeoParquetGroup instances to a
Parquet file.
+ */
+public class GeoParquetWriter {
+
+ private GeoParquetWriter() {
+ // Prevent instantiation
+ }
+
+ public static Builder builder(Path file) {
+ return new Builder(file);
+ }
+
+ public static class Builder
+ extends ParquetWriter.Builder<GeoParquetGroup, GeoParquetWriter.Builder>
{
+
+ private MessageType type = null;
+
+ private GeoParquetMetadata metadata = null;
+
+ private Builder(Path file) {
+ super(file);
+ }
+
+ /**
+ * Replace the message type with the specified one.
+ *
+ * @param type the message type
+ * @return the builder
+ */
+ public GeoParquetWriter.Builder withType(MessageType type) {
+ this.type = type;
+ return this;
+ }
+
+ /**
+ * Replace the metadata with the specified one.
+ *
+ * @param metadata the metadata
+ * @return the builder
+ */
+ public GeoParquetWriter.Builder withGeoParquetMetadata(GeoParquetMetadata
metadata) {
+ this.metadata = metadata;
+ return this;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ protected WriteSupport<GeoParquetGroup> getWriteSupport(Configuration
conf) {
+ // We don't need access to the hadoop configuration for now
+ return getWriteSupport((ParquetConfiguration) null);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ protected WriteSupport<GeoParquetGroup>
getWriteSupport(ParquetConfiguration conf) {
+ return new GeoParquetWriteSupport(type, metadata);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ protected GeoParquetWriter.Builder self() {
+ return this;
+ }
+ }
+}
diff --git
a/baremaps-geoparquet/src/test/java/org/apache/baremaps/geoparquet/GeoParquetReaderTest.java
b/baremaps-geoparquet/src/test/java/org/apache/baremaps/geoparquet/GeoParquetReaderTest.java
index 849720a3e..b7b73a9d5 100644
---
a/baremaps-geoparquet/src/test/java/org/apache/baremaps/geoparquet/GeoParquetReaderTest.java
+++
b/baremaps-geoparquet/src/test/java/org/apache/baremaps/geoparquet/GeoParquetReaderTest.java
@@ -19,8 +19,8 @@ package org.apache.baremaps.geoparquet;
import static org.junit.jupiter.api.Assertions.*;
-import java.net.URI;
import org.apache.baremaps.testing.TestFiles;
+import org.apache.hadoop.fs.Path;
import org.junit.jupiter.api.Test;
import org.locationtech.jts.geom.Envelope;
@@ -28,14 +28,14 @@ class GeoParquetReaderTest {
@Test
void read() {
- URI geoParquet = TestFiles.GEOPARQUET.toUri();
+ Path geoParquet = new Path(TestFiles.GEOPARQUET.toUri());
GeoParquetReader geoParquetReader = new GeoParquetReader(geoParquet);
assertEquals(5, geoParquetReader.read().count());
}
@Test
void readFiltered() {
- URI geoParquet = TestFiles.GEOPARQUET.toUri();
+ Path geoParquet = new Path(TestFiles.GEOPARQUET.toUri());
GeoParquetReader geoParquetReader =
new GeoParquetReader(geoParquet, new Envelope(-172, -65, 18, 72));
assertEquals(1, geoParquetReader.read().count());
@@ -43,21 +43,21 @@ class GeoParquetReaderTest {
@Test
void size() {
- URI geoParquet = TestFiles.GEOPARQUET.toUri();
+ Path geoParquet = new Path(TestFiles.GEOPARQUET.toUri());
GeoParquetReader geoParquetReader = new GeoParquetReader(geoParquet);
assertEquals(5, geoParquetReader.size());
}
@Test
void count() {
- URI geoParquet = TestFiles.GEOPARQUET.toUri();
+ Path geoParquet = new Path(TestFiles.GEOPARQUET.toUri());
GeoParquetReader geoParquetReader = new GeoParquetReader(geoParquet);
assertEquals(5, geoParquetReader.read().count());
}
@Test
void validateSchemas() {
- URI geoParquet = TestFiles.GEOPARQUET.toUri();
+ Path geoParquet = new Path(TestFiles.GEOPARQUET.toUri());
GeoParquetReader geoParquetReader = new GeoParquetReader(geoParquet);
assertTrue(geoParquetReader.validateSchemasAreIdentical());
}
diff --git
a/baremaps-geoparquet/src/test/java/org/apache/baremaps/geoparquet/GeoParquetWriterTest.java
b/baremaps-geoparquet/src/test/java/org/apache/baremaps/geoparquet/GeoParquetWriterTest.java
new file mode 100644
index 000000000..f1c577fd7
--- /dev/null
+++
b/baremaps-geoparquet/src/test/java/org/apache/baremaps/geoparquet/GeoParquetWriterTest.java
@@ -0,0 +1,154 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.baremaps.geoparquet;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import org.apache.baremaps.geoparquet.GeoParquetMetadata.Column;
+import org.apache.baremaps.testing.TestFiles;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.parquet.hadoop.ParquetWriter;
+import org.apache.parquet.schema.LogicalTypeAnnotation;
+import org.apache.parquet.schema.MessageType;
+import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
+import org.apache.parquet.schema.Types;
+import org.junit.jupiter.api.Tag;
+import org.junit.jupiter.api.Test;
+import org.locationtech.jts.geom.Coordinate;
+import org.locationtech.jts.geom.GeometryFactory;
+import org.locationtech.jts.geom.Point;
+
+class GeoParquetWriterTest {
+
+ @Test
+ @Tag("integration")
+ void testWriteAndReadGeoParquet() throws IOException {
+ // Create the output file
+ Configuration conf = new Configuration();
+ Path outputPath = new Path("target/test-output/geoparquet-test.parquet");
+
+ try {
+ // Define the Parquet schema
+ MessageType type = Types.buildMessage()
+
.required(PrimitiveTypeName.BINARY).as(LogicalTypeAnnotation.stringType()).named("name")
+
.required(PrimitiveTypeName.BINARY).as(LogicalTypeAnnotation.stringType()).named("city")
+ .optional(PrimitiveTypeName.BINARY).named("geometry")
+ .named("GeoParquetSchema");
+
+ // Create GeoParquet metadata
+ Map<String, Column> columns = new HashMap<>();
+ columns.put("geometry", new GeoParquetMetadata.Column(
+ "WKB",
+ List.of("Point"),
+ null,
+ null,
+ null,
+ null));
+
+ GeoParquetMetadata metadata = new GeoParquetMetadata(
+ "1.0",
+ "geometry",
+ columns,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null);
+
+ // Create a Point geometry
+ GeometryFactory geometryFactory = new GeometryFactory();
+ Point point = geometryFactory.createPoint(new Coordinate(1.0, 2.0));
+
+ // Create the GeoParquetWriter
+ try (ParquetWriter<GeoParquetGroup> writer =
GeoParquetWriter.builder(outputPath)
+ .withType(type)
+ .withGeoParquetMetadata(metadata)
+ .build()) {
+
+ // Create a GeoParquetGroup and write it
+ GeoParquetSchema geoParquetSchema =
+ GeoParquetGroupFactory.createGeoParquetSchema(type, metadata);
+ GeoParquetGroup group =
+ new GeoParquetGroup(type.asGroupType(), metadata,
geoParquetSchema);
+ group.add("name", "Test Point");
+ group.add("city", "Test City");
+ group.add("geometry", point);
+
+ // Write the group
+ writer.write(group);
+ }
+
+ // Now read back the file using GeoParquetReader
+ GeoParquetReader reader = new GeoParquetReader(outputPath, null, conf);
+ GeoParquetGroup readGroup = reader.read().findFirst().orElse(null);
+
+ assertNotNull(readGroup, "Read group should not be null");
+
+ // Verify the data
+ assertEquals("Test Point", readGroup.getStringValue("name"));
+ assertEquals("Test City", readGroup.getStringValue("city"));
+
+ Point readPoint = (Point) readGroup.getGeometryValue("geometry");
+ assertEquals(point.getX(), readPoint.getX(), 0.0001);
+ assertEquals(point.getY(), readPoint.getY(), 0.0001);
+ } finally {
+ outputPath.getFileSystem(conf).delete(outputPath, false);
+ }
+ }
+
+ @Test
+ @Tag("integration")
+ void copyGeoParquetData() throws IOException {
+ Path geoParquet = new Path(TestFiles.GEOPARQUET.toUri());
+
+ Configuration conf = new Configuration();
+ Path outputPath = new Path("target/test-output/geoparquet-copy.parquet");
+
+ try {
+ // Write the GeoParquet file
+ GeoParquetReader reader = new GeoParquetReader(geoParquet, null, conf);
+ GeoParquetWriter.Builder builder = GeoParquetWriter.builder(outputPath);
+ ParquetWriter<GeoParquetGroup> writer =
builder.withType(reader.getParquetSchema())
+ .withGeoParquetMetadata(reader.getGeoParquetMetadata()).build();
+ Iterator<GeoParquetGroup> iterator = reader.read().iterator();
+ while (iterator.hasNext()) {
+ writer.write(iterator.next());
+ }
+ writer.close();
+
+ // Read the copied file
+ GeoParquetReader copiedReader = new GeoParquetReader(outputPath, null,
conf);
+ assertEquals(5, copiedReader.read().count());
+ } finally {
+ outputPath.getFileSystem(conf).delete(outputPath, false);
+ }
+
+
+
+ }
+
+}
diff --git
a/baremaps-geoparquet/src/test/java/org/apache/baremaps/geoparquet/OvertureMapsTest.java
b/baremaps-geoparquet/src/test/java/org/apache/baremaps/geoparquet/OvertureMapsTest.java
index 0fb5b9205..206aa9f19 100644
---
a/baremaps-geoparquet/src/test/java/org/apache/baremaps/geoparquet/OvertureMapsTest.java
+++
b/baremaps-geoparquet/src/test/java/org/apache/baremaps/geoparquet/OvertureMapsTest.java
@@ -20,8 +20,7 @@ package org.apache.baremaps.geoparquet;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
-import java.net.URI;
-import java.net.URISyntaxException;
+import org.apache.hadoop.fs.Path;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.locationtech.jts.geom.Envelope;
@@ -30,9 +29,9 @@ class OvertureMapsTest {
@Disabled("Requires access to the Internet")
@Test
- void countAddresses() throws URISyntaxException {
- URI geoParquet = new URI(
-
"s3a://overturemaps-us-west-2/release/2024-09-18.0/theme=addresses/**/*.parquet");
+ void countAddressesInSwitzerland() {
+ Path geoParquet =
+ new
Path("s3a://overturemaps-us-west-2/release/2024-09-18.0/theme=addresses/**/*.parquet");
Envelope switzerland = new Envelope(6.02260949059, 10.4427014502,
45.7769477403, 47.8308275417);
GeoParquetReader geoParquetReader =
new GeoParquetReader(geoParquet, switzerland,
OvertureMaps.configuration());
@@ -41,20 +40,9 @@ class OvertureMapsTest {
@Disabled("Requires access to the Internet")
@Test
- void countAddressesInSwitzerland() throws URISyntaxException {
- URI geoParquet = new URI(
-
"s3a://overturemaps-us-west-2/release/2024-09-18.0/theme=addresses/**/*.parquet");
- Envelope switzerland = new Envelope(6.02260949059, 10.4427014502,
45.7769477403, 47.8308275417);
- GeoParquetReader geoParquetReader =
- new GeoParquetReader(geoParquet, switzerland,
OvertureMaps.configuration());
- assertEquals(10397434, geoParquetReader.readParallel().count());
- }
-
- @Disabled("Requires access to the Internet")
- @Test
- void validateSchemas() throws URISyntaxException {
- URI geoParquet = new URI(
-
"s3a://overturemaps-us-west-2/release/2024-09-18.0/theme=addresses/**/*.parquet");
+ void validateSchemas() {
+ Path geoParquet =
+ new
Path("s3a://overturemaps-us-west-2/release/2024-09-18.0/theme=addresses/**/*.parquet");
GeoParquetReader geoParquetReader =
new GeoParquetReader(geoParquet, null, OvertureMaps.configuration());
assertTrue(geoParquetReader.validateSchemasAreIdentical(), "Schemas are
identical");
@@ -62,9 +50,9 @@ class OvertureMapsTest {
@Disabled("Requires access to the Internet")
@Test
- void size() throws URISyntaxException {
- URI geoParquet = new URI(
-
"s3a://overturemaps-us-west-2/release/2024-09-18.0/theme=addresses/**/*.parquet");
+ void size() {
+ Path geoParquet =
+ new
Path("s3a://overturemaps-us-west-2/release/2024-09-18.0/theme=addresses/**/*.parquet");
GeoParquetReader geoParquetReader =
new GeoParquetReader(geoParquet, null, OvertureMaps.configuration());
assertEquals(213535887L, geoParquetReader.size());