This is an automated email from the ASF dual-hosted git repository.

bchapuis pushed a commit to branch csv-datastore
in repository https://gitbox.apache.org/repos/asf/incubator-baremaps.git

commit 31b8bd2b93e64ee5794a5d66e1ff386895722ea1
Author: Bertil Chapuis <[email protected]>
AuthorDate: Wed Oct 30 20:52:52 2024 +0100

    Add a csv datastore
---
 .../apache/baremaps/geocoder/GeonamesReader.java   |   2 +
 .../apache/baremaps/storage/csv/CsvDataStore.java  |  81 +++++++++
 .../apache/baremaps/storage/csv/CsvDataTable.java  | 196 +++++++++++++++++++++
 .../baremaps/storage/csv/CsvDataTableTest.java     | 182 +++++++++++++++++++
 4 files changed, 461 insertions(+)

diff --git 
a/baremaps-core/src/main/java/org/apache/baremaps/geocoder/GeonamesReader.java 
b/baremaps-core/src/main/java/org/apache/baremaps/geocoder/GeonamesReader.java
index b35c4c45f..53a18dc8f 100644
--- 
a/baremaps-core/src/main/java/org/apache/baremaps/geocoder/GeonamesReader.java
+++ 
b/baremaps-core/src/main/java/org/apache/baremaps/geocoder/GeonamesReader.java
@@ -22,6 +22,8 @@ package org.apache.baremaps.geocoder;
 import com.fasterxml.jackson.databind.MappingIterator;
 import com.fasterxml.jackson.dataformat.csv.CsvMapper;
 import com.fasterxml.jackson.dataformat.csv.CsvSchema;
+import com.fasterxml.jackson.dataformat.csv.CsvSchema.ColumnType;
+
 import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStream;
diff --git 
a/baremaps-core/src/main/java/org/apache/baremaps/storage/csv/CsvDataStore.java 
b/baremaps-core/src/main/java/org/apache/baremaps/storage/csv/CsvDataStore.java
new file mode 100644
index 000000000..f79112427
--- /dev/null
+++ 
b/baremaps-core/src/main/java/org/apache/baremaps/storage/csv/CsvDataStore.java
@@ -0,0 +1,81 @@
+package org.apache.baremaps.storage.csv;
+
+import org.apache.baremaps.data.storage.DataSchema;
+import org.apache.baremaps.data.storage.DataStore;
+import org.apache.baremaps.data.storage.DataStoreException;
+import org.apache.baremaps.data.storage.DataTable;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * A DataStore implementation that manages a single CSV file.
+ */
+public class CsvDataStore implements DataStore {
+
+    private final String tableName;
+    private final DataSchema schema;
+    private final CsvDataTable dataTable;
+
+    /**
+     * Constructs a CsvDataStore with the specified table name, schema, and 
CSV file.
+     *
+     * @param tableName the name of the table
+     * @param schema    the data schema defining the structure
+     * @param csvFile   the CSV file to read data from
+     * @param hasHeader whether the CSV file includes a header row
+     * @param separator the character used to separate columns in the CSV file
+     * @throws IOException if an I/O error occurs
+     */
+    public CsvDataStore(String tableName, DataSchema schema, File csvFile, 
boolean hasHeader, char separator) throws IOException {
+        this.tableName = tableName;
+        this.schema = schema;
+        this.dataTable = new CsvDataTable(schema, csvFile, hasHeader, 
separator);
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Override
+    public List<String> list() throws DataStoreException {
+        return Collections.singletonList(tableName);
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Override
+    public DataTable get(String name) throws DataStoreException {
+        if (this.tableName.equals(name)) {
+            return dataTable;
+        } else {
+            throw new DataStoreException("Table '" + name + "' not found.");
+        }
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Override
+    public void add(DataTable table) throws DataStoreException {
+        throw new UnsupportedOperationException("Adding tables is not 
supported in CsvDataStore.");
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Override
+    public void add(String name, DataTable table) throws DataStoreException {
+        throw new UnsupportedOperationException("Adding tables is not 
supported in CsvDataStore.");
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Override
+    public void remove(String name) throws DataStoreException {
+        throw new UnsupportedOperationException("Removing tables is not 
supported in CsvDataStore.");
+    }
+}
diff --git 
a/baremaps-core/src/main/java/org/apache/baremaps/storage/csv/CsvDataTable.java 
b/baremaps-core/src/main/java/org/apache/baremaps/storage/csv/CsvDataTable.java
new file mode 100644
index 000000000..ca26cb70b
--- /dev/null
+++ 
b/baremaps-core/src/main/java/org/apache/baremaps/storage/csv/CsvDataTable.java
@@ -0,0 +1,196 @@
+package org.apache.baremaps.storage.csv;
+
+import com.fasterxml.jackson.core.JsonParser;
+import com.fasterxml.jackson.core.JsonToken;
+import com.fasterxml.jackson.dataformat.csv.CsvMapper;
+import com.fasterxml.jackson.dataformat.csv.CsvSchema;
+import org.apache.baremaps.data.storage.DataColumn;
+import org.apache.baremaps.data.storage.DataRow;
+import org.apache.baremaps.data.storage.DataSchema;
+import org.apache.baremaps.data.storage.DataTable;
+import org.locationtech.jts.io.WKTReader;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.*;
+import java.util.stream.Stream;
+import java.util.stream.StreamSupport;
+
+/**
+ * A DataTable implementation that reads data from a CSV file using Jackson.
+ */
+public class CsvDataTable implements DataTable {
+
+    private final DataSchema schema;
+    private final File csvFile;
+    private final CsvSchema csvSchema;
+    private final long size;
+
+    /**
+     * Constructs a CsvDataTable with the specified schema, CSV file, header 
presence, and separator.
+     *
+     * @param schema    the data schema defining the structure
+     * @param csvFile   the CSV file to read data from
+     * @param hasHeader whether the CSV file includes a header row
+     * @param separator the character used to separate columns in the CSV file
+     * @throws IOException if an I/O error occurs
+     */
+    public CsvDataTable(DataSchema schema, File csvFile, boolean hasHeader, 
char separator) throws IOException {
+        this.schema = schema;
+        this.csvFile = csvFile;
+        this.csvSchema = buildCsvSchema(schema, hasHeader, separator);
+        this.size = calculateSize();
+    }
+
+    /**
+     * Builds the CsvSchema for Jackson based on the provided DataSchema, 
header presence, and separator.
+     *
+     * @param dataSchema the data schema
+     * @param hasHeader  whether the CSV file includes a header row
+     * @param separator  the character used to separate columns
+     * @return the CsvSchema for Jackson
+     */
+    private CsvSchema buildCsvSchema(DataSchema dataSchema, boolean hasHeader, 
char separator) {
+        CsvSchema.Builder builder = CsvSchema.builder();
+        for (DataColumn column : dataSchema.columns()) {
+            builder.addColumn(column.name());
+        }
+        CsvSchema schema = 
builder.setUseHeader(hasHeader).setColumnSeparator(separator).build();
+        return schema;
+    }
+
+    /**
+     * Calculates the number of rows in the CSV file.
+     *
+     * @return the number of rows
+     * @throws IOException if an I/O error occurs
+     */
+    private long calculateSize() throws IOException {
+        try (var parser = new CsvMapper().readerFor(Map.class)
+                .with(csvSchema)
+                .createParser(csvFile)) {
+            long rowCount = 0;
+            while (parser.nextToken() != null) {
+                if (parser.currentToken() == JsonToken.START_OBJECT) {
+                    rowCount++;
+                }
+            }
+            return rowCount;
+        }
+    }
+
+    @Override
+    public DataSchema schema() {
+        return schema;
+    }
+
+    @Override
+    public boolean add(DataRow row) {
+        throw new UnsupportedOperationException("Adding rows is not 
supported.");
+    }
+
+    @Override
+    public void clear() {
+        throw new UnsupportedOperationException("Clearing rows is not 
supported.");
+    }
+
+    @Override
+    public long size() {
+        return size;
+    }
+
+    @Override
+    public Iterator<DataRow> iterator() {
+        try {
+            CsvMapper csvMapper = new CsvMapper();
+            JsonParser parser = csvMapper.readerFor(Map.class)
+                    .with(csvSchema)
+                    .createParser(csvFile);
+
+            Iterator<Map<String, String>> csvIterator = 
csvMapper.readerFor(Map.class)
+                    .with(csvSchema)
+                    .readValues(parser);
+
+            return new Iterator<>() {
+                @Override
+                public boolean hasNext() {
+                    return csvIterator.hasNext();
+                }
+
+                @Override
+                public DataRow next() {
+                    Map<String, String> csvRow = csvIterator.next();
+                    DataRow dataRow = schema.createRow();
+
+                    for (int i = 0; i < schema.columns().size(); i++) {
+                        DataColumn column = schema.columns().get(i);
+                        String columnName = column.name();
+                        String value = csvRow.get(columnName);
+
+                        if (value != null) {
+                            Object parsedValue = parseValue(column, value);
+                            dataRow.set(i, parsedValue);
+                        } else {
+                            dataRow.set(i, null);
+                        }
+                    }
+                    return dataRow;
+                }
+            };
+
+        } catch (IOException e) {
+            throw new RuntimeException("Error reading CSV file", e);
+        }
+    }
+
+    /**
+     * Parses the string value from the CSV according to the column type.
+     *
+     * @param column the data column
+     * @param value  the string value from the CSV
+     * @return the parsed value
+     */
+    private Object parseValue(DataColumn column, String value) {
+        DataColumn.Type type = column.type();
+        try {
+            switch (type) {
+                case STRING:
+                    return value;
+                case INTEGER:
+                    return Integer.parseInt(value);
+                case LONG:
+                    return Long.parseLong(value);
+                case FLOAT:
+                    return Float.parseFloat(value);
+                case DOUBLE:
+                    return Double.parseDouble(value);
+                case BOOLEAN:
+                    return Boolean.parseBoolean(value);
+                case GEOMETRY:
+                case POINT:
+                case LINESTRING:
+                case POLYGON:
+                case MULTIPOINT:
+                case MULTILINESTRING:
+                case MULTIPOLYGON:
+                case GEOMETRYCOLLECTION:
+                    WKTReader reader = new WKTReader();
+                    return reader.read(value);
+                default:
+                    throw new IllegalArgumentException("Unsupported column 
type: " + type);
+            }
+        } catch (Exception e) {
+            throw new RuntimeException("Error parsing value for column " + 
column.name(), e);
+        }
+    }
+
+    @Override
+    public Spliterator<DataRow> spliterator() {
+        return Spliterators.spliteratorUnknownSize(iterator(), 
Spliterator.ORDERED);
+    }
+
+    @Override
+    public Stream<DataRow> stream() {
+        return StreamSupport.stream(spliterator(), false);
+    }
+}
diff --git 
a/baremaps-core/src/test/java/org/apache/baremaps/storage/csv/CsvDataTableTest.java
 
b/baremaps-core/src/test/java/org/apache/baremaps/storage/csv/CsvDataTableTest.java
new file mode 100644
index 000000000..09747d9ea
--- /dev/null
+++ 
b/baremaps-core/src/test/java/org/apache/baremaps/storage/csv/CsvDataTableTest.java
@@ -0,0 +1,182 @@
+package org.apache.baremaps.storage.csv;
+
+import org.apache.baremaps.data.storage.*;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.locationtech.jts.geom.Geometry;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.util.List;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+class CsvDataTableTest {
+
+    private File tempCsvFile;
+
+    @BeforeEach
+    void setUp() throws IOException {
+        tempCsvFile = File.createTempFile("test", ".csv");
+        tempCsvFile.deleteOnExit();
+    }
+
+    @AfterEach
+    void tearDown() {
+        if (tempCsvFile.exists()) {
+            tempCsvFile.delete();
+        }
+    }
+
+    @Test
+    void testCsvWithHeaderAndCommaSeparator() throws IOException {
+        String csvContent = """
+                id,name,geom
+                1,PointA,"POINT(1 1)"
+                2,PointB,"POINT(2 2)"
+                """;
+        Files.writeString(tempCsvFile.toPath(), csvContent);
+        List<DataColumn> columns = List.of(
+                new DataColumnFixed("id", DataColumn.Cardinality.REQUIRED, 
DataColumn.Type.INTEGER),
+                new DataColumnFixed("name", DataColumn.Cardinality.OPTIONAL, 
DataColumn.Type.STRING),
+                new DataColumnFixed("geom", DataColumn.Cardinality.OPTIONAL, 
DataColumn.Type.GEOMETRY)
+        );
+        DataSchema schema = new DataSchemaImpl("test_table", columns);
+        DataTable dataTable = new CsvDataTable(schema, tempCsvFile, true, ',');
+        assertEquals(2, dataTable.size());
+        int rowCount = 0;
+        for (DataRow row : dataTable) {
+            rowCount++;
+            Integer id = (Integer) row.get("id");
+            String name = (String) row.get("name");
+            Geometry geometry = (Geometry) row.get("geom");
+            assertNotNull(id);
+            assertNotNull(name);
+            assertNotNull(geometry);
+            assertEquals("Point" + (rowCount == 1 ? "A" : "B"), name);
+            assertEquals("POINT (" + rowCount + " " + rowCount + ")", 
geometry.toText());
+        }
+        assertEquals(2, rowCount);
+    }
+
+    @Test
+    void testCsvWithoutHeaderAndSemicolonSeparator() throws IOException {
+        String csvContent = """
+                1;PointA;"POINT(1 1)"
+                2;PointB;"POINT(2 2)"
+                """;
+        Files.writeString(tempCsvFile.toPath(), csvContent);
+        List<DataColumn> columns = List.of(
+                new DataColumnFixed("column1", 
DataColumn.Cardinality.REQUIRED, DataColumn.Type.INTEGER),
+                new DataColumnFixed("column2", 
DataColumn.Cardinality.OPTIONAL, DataColumn.Type.STRING),
+                new DataColumnFixed("column3", 
DataColumn.Cardinality.OPTIONAL, DataColumn.Type.GEOMETRY)
+        );
+        DataSchema schema = new DataSchemaImpl("test_table", columns);
+        DataTable dataTable = new CsvDataTable(schema, tempCsvFile, false, 
';');
+        assertEquals(2, dataTable.size());
+        int rowCount = 0;
+        for (DataRow row : dataTable) {
+            rowCount++;
+            Integer id = (Integer) row.get("column1");
+            String name = (String) row.get("column2");
+            Geometry geometry = (Geometry) row.get("column3");
+
+            // Verify data
+            assertNotNull(id);
+            assertNotNull(name);
+            assertNotNull(geometry);
+
+            assertEquals("Point" + (rowCount == 1 ? "A" : "B"), name);
+            assertEquals("POINT (" + rowCount + " " + rowCount + ")", 
geometry.toText());
+        }
+        assertEquals(2, rowCount);
+    }
+
+    @Test
+    void testCsvWithDifferentDataTypes() throws IOException {
+        String csvContent = """
+                int_col,double_col,bool_col,string_col
+                1,1.1,true,Hello
+                2,2.2,false,World
+                """;
+        Files.writeString(tempCsvFile.toPath(), csvContent);
+        List<DataColumn> columns = List.of(
+                new DataColumnFixed("int_col", 
DataColumn.Cardinality.REQUIRED, DataColumn.Type.INTEGER),
+                new DataColumnFixed("double_col", 
DataColumn.Cardinality.REQUIRED, DataColumn.Type.DOUBLE),
+                new DataColumnFixed("bool_col", 
DataColumn.Cardinality.REQUIRED, DataColumn.Type.BOOLEAN),
+                new DataColumnFixed("string_col", 
DataColumn.Cardinality.REQUIRED, DataColumn.Type.STRING)
+        );
+        DataSchema schema = new DataSchemaImpl("test_table", columns);
+        DataTable dataTable = new CsvDataTable(schema, tempCsvFile, true, ',');
+        assertEquals(2, dataTable.size());
+        int rowCount = 0;
+        for (DataRow row : dataTable) {
+            rowCount++;
+            Integer intCol = (Integer) row.get("int_col");
+            Double doubleCol = (Double) row.get("double_col");
+            Boolean boolCol = (Boolean) row.get("bool_col");
+            String stringCol = (String) row.get("string_col");
+
+            // Verify data
+            assertEquals(rowCount, intCol);
+            assertEquals(rowCount * 1.1, doubleCol);
+            assertEquals(rowCount == 1, boolCol);
+            assertEquals(rowCount == 1 ? "Hello" : "World", stringCol);
+        }
+        assertEquals(2, rowCount);
+    }
+
+    @Test
+    void testCsvWithInvalidData() throws IOException {
+        String csvContent = """
+                id,name
+                abc,TestName
+                """;
+        Files.writeString(tempCsvFile.toPath(), csvContent);
+        List<DataColumn> columns = List.of(
+                new DataColumnFixed("id", DataColumn.Cardinality.REQUIRED, 
DataColumn.Type.INTEGER),
+                new DataColumnFixed("name", DataColumn.Cardinality.OPTIONAL, 
DataColumn.Type.STRING)
+        );
+        DataSchema schema = new DataSchemaImpl("test_table", columns);
+        DataTable dataTable = new CsvDataTable(schema, tempCsvFile, true, ',');
+        assertThrows(RuntimeException.class, () -> {
+            for (DataRow row : dataTable) {
+                // This line should throw an exception because abc is not a 
valid integer
+            }
+        });
+    }
+
+    @Test
+    void testAddAndClearUnsupportedOperations() throws IOException {
+        String csvContent = "";
+        Files.writeString(tempCsvFile.toPath(), csvContent);
+        List<DataColumn> columns = List.of(
+                new DataColumnFixed("id", DataColumn.Cardinality.REQUIRED, 
DataColumn.Type.INTEGER),
+                new DataColumnFixed("name", DataColumn.Cardinality.OPTIONAL, 
DataColumn.Type.STRING)
+        );
+        DataSchema schema = new DataSchemaImpl("test_table", columns);
+        DataTable dataTable = new CsvDataTable(schema, tempCsvFile, true, ',');
+        assertThrows(UnsupportedOperationException.class, () -> 
dataTable.add(null));
+        assertThrows(UnsupportedOperationException.class, dataTable::clear);
+    }
+
+    @Test
+    void testSizeCalculation() throws IOException {
+        String csvContent = """
+                id,name
+                1,Name1
+                2,Name2
+                3,Name3
+                """;
+        Files.writeString(tempCsvFile.toPath(), csvContent);
+        List<DataColumn> columns = List.of(
+                new DataColumnFixed("id", DataColumn.Cardinality.REQUIRED, 
DataColumn.Type.INTEGER),
+                new DataColumnFixed("name", DataColumn.Cardinality.OPTIONAL, 
DataColumn.Type.STRING)
+        );
+        DataSchema schema = new DataSchemaImpl("test_table", columns);
+        DataTable dataTable = new CsvDataTable(schema, tempCsvFile, true, ',');
+        assertEquals(3, dataTable.size());
+    }
+}
\ No newline at end of file

Reply via email to