This is an automated email from the ASF dual-hosted git repository. sgoeschl pushed a commit to branch FREEMARKER-144 in repository https://gitbox.apache.org/repos/asf/freemarker-generator.git
commit 6a7556113975e408b4ff84a4cefe62f6832ce2fa Author: Siegfried Goeschl <[email protected]> AuthorDate: Mon Jun 1 10:08:11 2020 +0200 FREEMARKER-144 Proof Of Concept for providing DataFrames --- CHANGELOG.md | 4 +- .../src/site/markdown/cli/tools/dataframe.md | 130 +++++++++++++++++++++ .../src/site/markdown/index.md | 7 +- .../templates/dataframe/example.ftl | 2 +- .../generator/tools/dataframe/DataFrameTool.java | 17 ++- 5 files changed, 155 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2846d0a..d7f6023 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ All notable changes to this project will be documented in this file. We try to a ## 0.1.0-SNAPSHOT ### Added +* [FREEMARKER-144] Proof Of Concept for providing DataFrames * [FREEMARKER-139] freemarker-cli: Provide GsonTool to align with Maven plugin * An environment variable can bes passed as `DataSource` * [FREEMARKER-135] Support user-supplied names for `DataSource` on the command line @@ -36,4 +37,5 @@ All notable changes to this project will be documented in this file. We try to a [FREEMARKER-135]: https://issues.apache.org/jira/browse/FREEMARKER-135 [FREEMARKER-136]: https://issues.apache.org/jira/browse/FREEMARKER-136 [FREEMARKER-138]: https://issues.apache.org/jira/browse/FREEMARKER-138 -[FREEMARKER-139]: https://issues.apache.org/jira/browse/FREEMARKER-139 \ No newline at end of file +[FREEMARKER-139]: https://issues.apache.org/jira/browse/FREEMARKER-139 +[FREEMARKER-144]: https://issues.apache.org/jira/browse/FREEMARKER-144 \ No newline at end of file diff --git a/freemarker-generator-cli/src/site/markdown/cli/tools/dataframe.md b/freemarker-generator-cli/src/site/markdown/cli/tools/dataframe.md new file mode 100644 index 0000000..e2f04ba --- /dev/null +++ b/freemarker-generator-cli/src/site/markdown/cli/tools/dataframe.md @@ -0,0 +1,130 @@ +# DataFrameTool + +The `DataFrameTool` uses [nRo/DataFrame](https://github.com/nRo/DataFrame) to convert tabular data into a `DataFrame`. + +A `DataFrame` allows declartive filtering and transformation of tabular data, i.e. little code to write. + +Currently the following sources are supported + +* Apache Commons CSV Parser +* JSON arrays +* Excel sheets (to be done) + +## Examples + +[nRo/DataFrame]("https://raw.githubusercontent.com/nRo/DataFrame/master/src/test/resources/users.csv") provides the following CSV file + +``` +┌────────────┬────────────┬────────────┐ +│#name │#age │#country │ +├────────────┼────────────┼────────────┤ +│Schmitt │24 │Germany │ +├────────────┼────────────┼────────────┤ +│Parker │45 │USA │ +├────────────┼────────────┼────────────┤ +│Meier │20 │Germany │ +├────────────┼────────────┼────────────┤ +│Schmitt │30 │France │ +├────────────┼────────────┼────────────┤ +│Peter │44 │Germany │ +├────────────┼────────────┼────────────┤ +│Meier │24 │Germany │ +├────────────┼────────────┼────────────┤ +│Green │33 │UK │ +├────────────┼────────────┼────────────┤ +│Schmitt │30 │Germany │ +├────────────┼────────────┼────────────┤ +│Meier │30 │Germany │ +└────────────┴────────────┴────────────┘ +``` + +and create a `DateFrame` using the following code + +``` +<#assign cvsFormat = CSVTool.formats["DEFAULT"].withHeader().withDelimiter(';')> +<#assign csvParser = CSVTool.parse(DataSources.get(0), cvsFormat)> +<#assign users = DataFrameTool.toDataFrame(csvParser)> +``` + +### Select & Sort + +Now we want to create a new `DataFrame` by selecting `name` and `country` + +``` +<#assign country = "Germany"> +${DataFrameTool.print(users + .select("(name == 'Schmitt' || name == 'Meier') && country == '${country}'") + .sort("name", DataFrameTool.sortOrder["ASCENDING"]))} +``` + +which shows + +``` +┌────────────┬────────────┬────────────┐ +│#name │#age │#country │ +├────────────┼────────────┼────────────┤ +│Meier │20 │Germany │ +├────────────┼────────────┼────────────┤ +│Meier │24 │Germany │ +├────────────┼────────────┼────────────┤ +│Meier │30 │Germany │ +├────────────┼────────────┼────────────┤ +│Schmitt │24 │Germany │ +├────────────┼────────────┼────────────┤ +│Schmitt │30 │Germany │ +└────────────┴────────────┴────────────┘ +``` + +### Count Column Values + +Let's assume we want to count the records for each `country` + +``` +${DataFrameTool.print(users.getColumn("country").transform(DataFrameTool.transformer["COUNT"]))} +``` + +returns the following `DataFrame` + +``` +┌────────────┬────────────┐ +│#country │#counts │ +├────────────┼────────────┤ +│Germany │6 │ +├────────────┼────────────┤ +│USA │1 │ +├────────────┼────────────┤ +│France │1 │ +├────────────┼────────────┤ +│UK │1 │ +└────────────┴────────────┘ +``` + +### Group By Age And Country + +Let's assume that we want to group the `DataFrame` by `age` and `country` + +``` +${DataFrameTool.print(users.groupBy("age", "country").sort("age"))} +``` + +which results in + +``` +┌────────────┬────────────┐ +│#age │#country │ +├────────────┼────────────┤ +│20 │Germany │ +├────────────┼────────────┤ +│24 │Germany │ +├────────────┼────────────┤ +│30 │France │ +├────────────┼────────────┤ +│30 │Germany │ +├────────────┼────────────┤ +│33 │UK │ +├────────────┼────────────┤ +│44 │Germany │ +├────────────┼────────────┤ +│45 │USA │ +└────────────┴────────────┘ +``` \ No newline at end of file diff --git a/freemarker-generator-cli/src/site/markdown/index.md b/freemarker-generator-cli/src/site/markdown/index.md index 791ab12..c52e1b4 100644 --- a/freemarker-generator-cli/src/site/markdown/index.md +++ b/freemarker-generator-cli/src/site/markdown/index.md @@ -4,4 +4,9 @@ * [User-Supplied Parameters](cli/concepts/user-parameters.html) * [Named URIs](cli/concepts/named-uris.html) -* [Data Models](cli/concepts/data-models.html) \ No newline at end of file +* [Data Models](cli/concepts/data-models.html) + +### Tools + +* [DataFrameTool](cli/tools/dataframe.html) + diff --git a/freemarker-generator-cli/templates/dataframe/example.ftl b/freemarker-generator-cli/templates/dataframe/example.ftl index a351297..7983b53 100644 --- a/freemarker-generator-cli/templates/dataframe/example.ftl +++ b/freemarker-generator-cli/templates/dataframe/example.ftl @@ -36,7 +36,7 @@ ${DataFrameTool.print(users.head(2))} Count Column Values ============================================================================= -${DataFrameTool.print(users.getColumn("country").transform(DataFrameTool.countTransformer(false)))} +${DataFrameTool.print(users.getColumn("country").transform(DataFrameTool.transformer["COUNT"]))} Group By Age & Country ============================================================================= diff --git a/freemarker-generator-tools/src/main/java/org/apache/freemarker/generator/tools/dataframe/DataFrameTool.java b/freemarker-generator-tools/src/main/java/org/apache/freemarker/generator/tools/dataframe/DataFrameTool.java index 18db73d..d34e705 100644 --- a/freemarker-generator-tools/src/main/java/org/apache/freemarker/generator/tools/dataframe/DataFrameTool.java +++ b/freemarker-generator-tools/src/main/java/org/apache/freemarker/generator/tools/dataframe/DataFrameTool.java @@ -20,6 +20,7 @@ import de.unknownreality.dataframe.DataFrame; import de.unknownreality.dataframe.DataFrameBuilder; import de.unknownreality.dataframe.DataFrameWriter; import de.unknownreality.dataframe.sort.SortColumn.Direction; +import de.unknownreality.dataframe.transform.ColumnDataFrameTransform; import de.unknownreality.dataframe.transform.CountTransformer; import org.apache.commons.csv.CSVParser; import org.apache.commons.csv.CSVRecord; @@ -121,8 +122,15 @@ public class DataFrameTool { return result; } - public CountTransformer countTransformer(boolean ignoreNA) { - return new CountTransformer(ignoreNA); + /** + * Provide a map with predefined transformers. + * + * @return available transformers + */ + public Map<String, ColumnDataFrameTransform> getTransformer() { + final Map<String, ColumnDataFrameTransform> result = new HashMap<>(); + result.put("COUNT", countTransformer(false)); + return result; } /** @@ -139,4 +147,9 @@ public class DataFrameTool { public String toString() { return "Bridge to nRo/DataFrame (see https://github.com/nRo/DataFrame)"; } + + private static CountTransformer countTransformer(boolean ignoreNA) { + return new CountTransformer(ignoreNA); + } + }
