This is an automated email from the ASF dual-hosted git repository. sgoeschl pushed a commit to branch FREEMARKER-144 in repository https://gitbox.apache.org/repos/asf/freemarker-generator.git
commit b17b86821f1ae1a17562026f91d6efeb43ffeb09 Author: Siegfried Goeschl <[email protected]> AuthorDate: Sun May 31 01:19:25 2020 +0200 FREEMARKER-144 Proof Of Concept for providing DataFrames --- .../site/sample/csv/data_join_a.csv | 5 ++ .../site/sample/csv/data_join_b.csv | 5 ++ .../src/main/config/freemarker-cli.properties | 5 +- .../src/main/resources/freemarker-cli.properties | 1 + .../freemarker/generator/cli/ManualTest.java | 2 +- .../templates/dataframe/html/print.ftl | 65 ++++++++++++++++++ freemarker-generator-tools/pom.xml | 6 ++ .../generator/tools/dataframe/DataFrameTool.java | 71 +++++++++++++++++++ .../src/test/data/csv/data_join_a.csv | 5 ++ .../src/test/data/csv/data_join_b.csv | 5 ++ .../tools/dataframe/DataFrameToolTest.java | 80 ++++++++++++++++++++++ 11 files changed, 247 insertions(+), 3 deletions(-) diff --git a/freemarker-generator-cli/site/sample/csv/data_join_a.csv b/freemarker-generator-cli/site/sample/csv/data_join_a.csv new file mode 100644 index 0000000..cc05775 --- /dev/null +++ b/freemarker-generator-cli/site/sample/csv/data_join_a.csv @@ -0,0 +1,5 @@ +GENE_ID;FPKM;CHR +A;5;1 +B;4;2 +C;6;3 +D;6;1 \ No newline at end of file diff --git a/freemarker-generator-cli/site/sample/csv/data_join_b.csv b/freemarker-generator-cli/site/sample/csv/data_join_b.csv new file mode 100644 index 0000000..c84d1a0 --- /dev/null +++ b/freemarker-generator-cli/site/sample/csv/data_join_b.csv @@ -0,0 +1,5 @@ +TRANSCRIPT_ID;GENE_ID;FPKM;TRANSCRIPT_NUMBER +TA;A;7;1 +TB;A;3;2 +TC;B;6;1 +TD;E;4;1 \ No newline at end of file diff --git a/freemarker-generator-cli/src/main/config/freemarker-cli.properties b/freemarker-generator-cli/src/main/config/freemarker-cli.properties index fa08255..bd9c917 100644 --- a/freemarker-generator-cli/src/main/config/freemarker-cli.properties +++ b/freemarker-generator-cli/src/main/config/freemarker-cli.properties @@ -25,15 +25,16 @@ # Configure FreeMarker Tools (name -> implementation class) ############################################################################# freemarker.tools.CSVTool=org.apache.freemarker.generator.tools.commonscsv.CommonsCSVTool -freemarker.tools.ExecTool=org.apache.freemarker.generator.tools.commonsexec.CommonsExecTool +freemarker.tools.DataFrameTool=org.apache.freemarker.generator.tools.dataframe.DataFrameTool freemarker.tools.ExcelTool=org.apache.freemarker.generator.tools.excel.ExcelTool +freemarker.tools.ExecTool=org.apache.freemarker.generator.tools.commonsexec.CommonsExecTool freemarker.tools.FreeMarkerTool=org.apache.freemarker.generator.tools.freemarker.FreeMarkerTool freemarker.tools.GrokTool=org.apache.freemarker.generator.tools.grok.GrokTool freemarker.tools.GsonTool=org.apache.freemarker.generator.tools.gson.GsonTool freemarker.tools.JsonPathTool=org.apache.freemarker.generator.tools.jsonpath.JsonPathTool freemarker.tools.JsoupTool=org.apache.freemarker.generator.tools.jsoup.JsoupTool freemarker.tools.PropertiesTool=org.apache.freemarker.generator.tools.properties.PropertiesTool -freemarker.tools.YamlTool=org.apache.freemarker.generator.tools.snakeyaml.SnakeYamlTool freemarker.tools.SystemTool=org.apache.freemarker.generator.tools.system.SystemTool freemarker.tools.UUIDTool=org.apache.freemarker.generator.tools.uuid.UUIDTool freemarker.tools.XmlTool=org.apache.freemarker.generator.tools.xml.XmlTool +freemarker.tools.YamlTool=org.apache.freemarker.generator.tools.snakeyaml.SnakeYamlTool diff --git a/freemarker-generator-cli/src/main/resources/freemarker-cli.properties b/freemarker-generator-cli/src/main/resources/freemarker-cli.properties index 0a0542a..bd9c917 100644 --- a/freemarker-generator-cli/src/main/resources/freemarker-cli.properties +++ b/freemarker-generator-cli/src/main/resources/freemarker-cli.properties @@ -25,6 +25,7 @@ # Configure FreeMarker Tools (name -> implementation class) ############################################################################# freemarker.tools.CSVTool=org.apache.freemarker.generator.tools.commonscsv.CommonsCSVTool +freemarker.tools.DataFrameTool=org.apache.freemarker.generator.tools.dataframe.DataFrameTool freemarker.tools.ExcelTool=org.apache.freemarker.generator.tools.excel.ExcelTool freemarker.tools.ExecTool=org.apache.freemarker.generator.tools.commonsexec.CommonsExecTool freemarker.tools.FreeMarkerTool=org.apache.freemarker.generator.tools.freemarker.FreeMarkerTool diff --git a/freemarker-generator-cli/src/test/java/org/apache/freemarker/generator/cli/ManualTest.java b/freemarker-generator-cli/src/test/java/org/apache/freemarker/generator/cli/ManualTest.java index 210f32f..4a47b1f 100644 --- a/freemarker-generator-cli/src/test/java/org/apache/freemarker/generator/cli/ManualTest.java +++ b/freemarker-generator-cli/src/test/java/org/apache/freemarker/generator/cli/ManualTest.java @@ -48,7 +48,7 @@ public class ManualTest { // private static final String CMD = "-b ./src/test -t templates/demo.ftl -m env=./site/sample/properties/user_0001/user.properties"; // private static final String CMD = "-b ./src/test -t templates/demo.ftl -m ./site/sample/properties/user_0001/user.properties"; // private static final String CMD = "-b ./src/test --data-model post=https://jsonplaceholder.typicode.com/posts/2 -t templates/info.ftl"; - private static final String CMD = "-b ./src/test -t templates/info.ftl -P name=value"; + private static final String CMD = "-b ./src/test -t templates/dataframe/html/print.ftl ./site/sample/csv/data_join_a.csv ./site/sample/csv/data_join_b.csv"; public static void main(String[] args) { diff --git a/freemarker-generator-cli/templates/dataframe/html/print.ftl b/freemarker-generator-cli/templates/dataframe/html/print.ftl new file mode 100644 index 0000000..f34e745 --- /dev/null +++ b/freemarker-generator-cli/templates/dataframe/html/print.ftl @@ -0,0 +1,65 @@ +<#ftl output_format="HTML" > +<#-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> +<#assign dataSource = DataSources.get(0)> +<#assign name = dataSource.name> +<#assign date = .now?iso_utc> +<#assign dataFrame = DataFrameTool.parse(dataSource, csvReader())> +<#---------------------------------------------------------------------------> +<!DOCTYPE html> +<html> +<head> + <title>${name}</title> + <meta charset="UTF-8"> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/4.1.3/css/bootstrap.min.css"> +</head> +<body> +<div class="container-fluid"> + <h1>DataFrame Test <small>${name}, ${date}</small></h1> + <table class="table table-striped"> + <@writeHeader dataFrame/> + <@writeRows dataFrame/> + </table> +</div> +</body> +</html> + +<#---------------------------------------------------------------------------> +<#function csvReader> + <#return DataFrameTool.csvReaderBuilder.containsHeader(true).withSeparator(CSV_IN_DELIMITER!';').build()> +</#function> + +<#---------------------------------------------------------------------------> +<#macro writeHeader dataFrame> + <tr> + <#list dataFrame.columns as column> + <th>${column.name}</th> + </#list> + </tr> +</#macro> + +<#---------------------------------------------------------------------------> +<#macro writeRows dataFrame> + <#list dataFrame.iterator() as row> + <tr> + <#list 0..row.size()-1 as idx> + <td>${row.getString(idx)}</td> + </#list> + </tr> + </#list> +</#macro> \ No newline at end of file diff --git a/freemarker-generator-tools/pom.xml b/freemarker-generator-tools/pom.xml index 95e7d4c..57c6b59 100644 --- a/freemarker-generator-tools/pom.xml +++ b/freemarker-generator-tools/pom.xml @@ -54,6 +54,12 @@ <artifactId>commons-csv</artifactId> <version>1.8</version> </dependency> + <!-- DataFrame --> + <dependency> + <groupId>de.unknownreality</groupId> + <artifactId>dataframe</artifactId> + <version>0.7.6</version> + </dependency> <!-- ExcelTool --> <dependency> <groupId>org.apache.poi</groupId> diff --git a/freemarker-generator-tools/src/main/java/org/apache/freemarker/generator/tools/dataframe/DataFrameTool.java b/freemarker-generator-tools/src/main/java/org/apache/freemarker/generator/tools/dataframe/DataFrameTool.java new file mode 100644 index 0000000..749e005 --- /dev/null +++ b/freemarker-generator-tools/src/main/java/org/apache/freemarker/generator/tools/dataframe/DataFrameTool.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.freemarker.generator.tools.dataframe; + +import de.unknownreality.dataframe.DataFrame; +import de.unknownreality.dataframe.csv.CSVReader; +import de.unknownreality.dataframe.csv.CSVReaderBuilder; +import de.unknownreality.dataframe.io.FileFormat; +import org.apache.freemarker.generator.base.datasource.DataSource; + +import java.io.IOException; +import java.io.InputStream; + +public class DataFrameTool { + + /** + * Create a data frame. + * + * @param dataSource data source + * @return data frame + */ + public DataFrame parse(DataSource dataSource) { + try (InputStream is = dataSource.getUnsafeInputStream()) { + final DataFrame dataFrame = DataFrame.load(is, FileFormat.CSV); + dataFrame.setName(dataSource.getName()); + return dataFrame; + } catch (IOException e) { + throw new RuntimeException("Failed to parse data source: " + dataSource, e); + } + } + + /** + * Create a data frame. + * + * @param dataSource data source + * @param csvReader CSV format specification to use + * @return data frame + */ + public DataFrame parse(DataSource dataSource, CSVReader csvReader) { + try (InputStream is = dataSource.getUnsafeInputStream()) { + final DataFrame dataFrame = DataFrame.load(is, csvReader); + dataFrame.setName(dataSource.getName()); + return dataFrame; + } catch (IOException e) { + throw new RuntimeException("Failed to parse data source: " + dataSource, e); + } + } + + public CSVReaderBuilder getCsvReaderBuilder() { + return CSVReaderBuilder.create(); + } + + @Override + public String toString() { + return "Bridge to nRo/DataFrame (see https://github.com/nRo/DataFrame)"; + } +} diff --git a/freemarker-generator-tools/src/test/data/csv/data_join_a.csv b/freemarker-generator-tools/src/test/data/csv/data_join_a.csv new file mode 100644 index 0000000..cc05775 --- /dev/null +++ b/freemarker-generator-tools/src/test/data/csv/data_join_a.csv @@ -0,0 +1,5 @@ +GENE_ID;FPKM;CHR +A;5;1 +B;4;2 +C;6;3 +D;6;1 \ No newline at end of file diff --git a/freemarker-generator-tools/src/test/data/csv/data_join_b.csv b/freemarker-generator-tools/src/test/data/csv/data_join_b.csv new file mode 100644 index 0000000..c84d1a0 --- /dev/null +++ b/freemarker-generator-tools/src/test/data/csv/data_join_b.csv @@ -0,0 +1,5 @@ +TRANSCRIPT_ID;GENE_ID;FPKM;TRANSCRIPT_NUMBER +TA;A;7;1 +TB;A;3;2 +TC;B;6;1 +TD;E;4;1 \ No newline at end of file diff --git a/freemarker-generator-tools/src/test/java/org/apache/freemarker/generator/tools/dataframe/DataFrameToolTest.java b/freemarker-generator-tools/src/test/java/org/apache/freemarker/generator/tools/dataframe/DataFrameToolTest.java new file mode 100644 index 0000000..96d6ee3 --- /dev/null +++ b/freemarker-generator-tools/src/test/java/org/apache/freemarker/generator/tools/dataframe/DataFrameToolTest.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.freemarker.generator.tools.dataframe; + +import de.unknownreality.dataframe.DataFrame; +import de.unknownreality.dataframe.csv.CSVReader; +import org.apache.freemarker.generator.base.datasource.DataSource; +import org.apache.freemarker.generator.base.datasource.DataSourceFactory; +import org.junit.Test; + +import java.io.File; + +import static de.unknownreality.dataframe.sort.SortColumn.Direction.Descending; +import static java.nio.charset.StandardCharsets.UTF_8; +import static junit.framework.Assert.assertEquals; + +public class DataFrameToolTest { + + private static final File CONTRACT_CSV = new File("./src/test/data/csv/contract.csv"); + private static final File DATA_JOIN_A = new File("./src/test/data/csv/data_join_a.csv"); + private static final File DATA_JOIN_B = new File("./src/test/data/csv/data_join_b.csv"); + + @Test + public void shouldParseCsvFile() { + final DataFrame dataFrame = dataFrameTool().parse(dataSource(DATA_JOIN_A)); + + assertEquals("data_join_a.csv", dataFrame.getName()); + assertEquals(3, dataFrame.getColumns().size()); + assertEquals(4, dataFrame.getRows().size()); + assertEquals("A", dataFrame.getColumn("GENE_ID").get(0)); + } + + @Test + public void shouldParseCsvFileUsingCSVReader() { + final DataFrameTool dataFrameTool = dataFrameTool(); + final CSVReader csvReader = dataFrameTool.getCsvReaderBuilder().containsHeader(true).withSeparator(',').build(); + final DataFrame dataFrame = dataFrameTool.parse(dataSource(CONTRACT_CSV), csvReader); + + assertEquals("contract.csv", dataFrame.getName()); + assertEquals(32, dataFrame.getColumns().size()); + assertEquals(22, dataFrame.getRows().size()); + assertEquals("C71", dataFrame.getColumn("contract_id").get(0)); + } + + @Test + public void shouldJoinDataFrames() { + final String columnName = "GENE_ID"; + final DataFrame dataFrameA = dataFrameTool().parse(dataSource(DATA_JOIN_A)); + final DataFrame dataFrameB = dataFrameTool().parse(dataSource(DATA_JOIN_B)); + final DataFrame dataFrame = dataFrameA.joinInner(dataFrameB, columnName).sort(columnName, Descending); + + assertEquals(6, dataFrame.getColumns().size()); + assertEquals(3, dataFrame.getRows().size()); + assertEquals("B", dataFrame.getColumn(columnName).get(0)); + + dataFrame.print(); + } + + private DataFrameTool dataFrameTool() { + return new DataFrameTool(); + } + + private DataSource dataSource(File file) { + return DataSourceFactory.fromFile(file, UTF_8); + } +}
