This is an automated email from the ASF dual-hosted git repository.
mthomsen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nifi.git
The following commit(s) were added to refs/heads/master by this push:
new e5fa18d NIFI-6088: Widen type inference for BIGINT and DOUBLE
e5fa18d is described below
commit e5fa18d63cda60f0b10a8a1a2abff1cf69e976bd
Author: Matthew Burgess <[email protected]>
AuthorDate: Wed Feb 27 20:25:25 2019 -0500
NIFI-6088: Widen type inference for BIGINT and DOUBLE
This closes #3342
Signed-off-by: Mike Thomsen <[email protected]>
---
.../nifi-record-serialization-services/pom.xml | 1 +
.../org/apache/nifi/json/JsonSchemaInference.java | 6 +-
.../apache/nifi/json/TestJsonSchemaInference.java | 84 ++++++++++++++++++++++
.../src/test/resources/json/data-types.json | 24 +++++++
4 files changed, 112 insertions(+), 3 deletions(-)
diff --git
a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/pom.xml
b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/pom.xml
index d65f471..27d4da7 100755
---
a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/pom.xml
+++
b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/pom.xml
@@ -152,6 +152,7 @@
<exclude>src/test/resources/json/bank-account-multiarray.json</exclude>
<exclude>src/test/resources/json/bank-account-multiline.json</exclude>
<exclude>src/test/resources/json/bank-account-oneline.json</exclude>
+
<exclude>src/test/resources/json/data-types.json</exclude>
<exclude>src/test/resources/json/json-with-unicode.json</exclude>
<exclude>src/test/resources/json/primitive-type-array.json</exclude>
<exclude>src/test/resources/json/single-bank-account.json</exclude>
diff --git
a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/json/JsonSchemaInference.java
b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/json/JsonSchemaInference.java
index b09c79f..02587cc 100644
---
a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/json/JsonSchemaInference.java
+++
b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/json/JsonSchemaInference.java
@@ -56,13 +56,13 @@ public class JsonSchemaInference extends
HierarchicalSchemaInference<JsonNode> {
}
if (jsonNode.isIntegralNumber()) {
+ if (jsonNode.isBigInteger()) {
+ return RecordFieldType.BIGINT.getDataType();
+ }
return RecordFieldType.LONG.getDataType();
}
if (jsonNode.isFloatingPointNumber()) {
- return RecordFieldType.FLOAT.getDataType();
- }
- if (jsonNode.isDouble()) {
return RecordFieldType.DOUBLE.getDataType();
}
if (jsonNode.isBinary()) {
diff --git
a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/json/TestJsonSchemaInference.java
b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/json/TestJsonSchemaInference.java
new file mode 100644
index 0000000..0e50764
--- /dev/null
+++
b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/json/TestJsonSchemaInference.java
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.json;
+
+import org.apache.nifi.logging.ComponentLog;
+import org.apache.nifi.schema.inference.InferSchemaAccessStrategy;
+import org.apache.nifi.schema.inference.TimeValueInference;
+import org.apache.nifi.serialization.record.RecordFieldType;
+import org.apache.nifi.serialization.record.RecordSchema;
+import org.junit.Test;
+import org.mockito.Mockito;
+
+import java.io.BufferedInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Arrays;
+import java.util.List;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertSame;
+
+public class TestJsonSchemaInference {
+
+ private final TimeValueInference timestampInference = new
TimeValueInference("yyyy-MM-dd", "HH:mm:ss", "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'");
+
+ @Test
+ public void testInferenceIncludesAllRecords() throws IOException {
+ final File file = new File("src/test/resources/json/data-types.json");
+
+ final RecordSchema schema;
+ try (final InputStream in = new FileInputStream(file);
+ final InputStream bufferedIn = new BufferedInputStream(in)) {
+
+ final InferSchemaAccessStrategy<?> accessStrategy = new
InferSchemaAccessStrategy<>(
+ (var, content) -> new JsonRecordSource(content),
+ new JsonSchemaInference(timestampInference),
Mockito.mock(ComponentLog.class));
+ schema = accessStrategy.getSchema(null, bufferedIn, null);
+ }
+
+ assertSame(RecordFieldType.STRING,
schema.getDataType("varcharc").get().getFieldType());
+ assertSame(RecordFieldType.LONG,
schema.getDataType("uuid").get().getFieldType());
+ assertSame(RecordFieldType.LONG,
schema.getDataType("tinyintc").get().getFieldType());
+ assertSame(RecordFieldType.STRING,
schema.getDataType("textc").get().getFieldType());
+ assertEquals(RecordFieldType.DATE.getDataType("yyyy-MM-dd"),
schema.getDataType("datec").get());
+ assertSame(RecordFieldType.LONG,
schema.getDataType("smallintc").get().getFieldType());
+ assertSame(RecordFieldType.LONG,
schema.getDataType("mediumintc").get().getFieldType());
+ assertSame(RecordFieldType.LONG,
schema.getDataType("intc").get().getFieldType());
+ assertSame(RecordFieldType.BIGINT,
schema.getDataType("bigintc").get().getFieldType());
+ assertSame(RecordFieldType.DOUBLE,
schema.getDataType("floatc").get().getFieldType());
+ assertSame(RecordFieldType.DOUBLE,
schema.getDataType("doublec").get().getFieldType());
+ assertSame(RecordFieldType.DOUBLE,
schema.getDataType("decimalc").get().getFieldType());
+
assertEquals(RecordFieldType.TIMESTAMP.getDataType("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"),
schema.getDataType("timestampc").get());
+ assertEquals(RecordFieldType.TIME.getDataType("HH:mm:ss"),
schema.getDataType("timec").get());
+ assertEquals(RecordFieldType.STRING.getDataType(),
schema.getDataType("charc").get());
+ assertEquals(RecordFieldType.STRING.getDataType(),
schema.getDataType("tinytextc").get());
+ assertEquals(RecordFieldType.STRING.getDataType(),
schema.getDataType("blobc").get());
+ assertEquals(RecordFieldType.STRING.getDataType(),
schema.getDataType("mediumtextc").get());
+ assertSame(RecordFieldType.LONG,
schema.getDataType("enumc").get().getFieldType());
+ assertSame(RecordFieldType.LONG,
schema.getDataType("setc").get().getFieldType());
+ assertSame(RecordFieldType.LONG,
schema.getDataType("boolc").get().getFieldType());
+ assertEquals(RecordFieldType.STRING.getDataType(),
schema.getDataType("binaryc").get());
+
+ final List<String> fieldNames = schema.getFieldNames();
+ assertEquals(Arrays.asList("varcharc", "uuid", "tinyintc", "textc",
"datec", "smallintc", "mediumintc", "intc", "bigintc",
+ "floatc", "doublec", "decimalc", "timestampc", "timec",
"charc", "tinytextc", "blobc", "mediumtextc", "enumc", "setc", "boolc",
"binaryc"), fieldNames);
+ }
+
+}
\ No newline at end of file
diff --git
a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/resources/json/data-types.json
b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/resources/json/data-types.json
new file mode 100644
index 0000000..343288e
--- /dev/null
+++
b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/resources/json/data-types.json
@@ -0,0 +1,24 @@
+[{
+ "varcharc": "Nam penatibus in neque.",
+ "uuid": 1,
+ "tinyintc": -81,
+ "textc": "A faucibus volutpat placerat euismod mollis, quis semper quis
ultrices aliquam massa vestibulum a lacus hendrerit turpis nullam, tincidunt
ullamcorper ad ridiculus habitasse tristique vivamus elit. Ac id montes erat
accumsan rhoncus consectetur leo condimentum.\n\nConubia lectus et viverra
taciti, mollis molestie phasellus, fermentum accumsan sem nisi sit dapibus
interdum ridiculus blandit blandit. Volutpat nullam orci cras. Justo nullam
penatibus non fusce vivamus integer [...]
+ "datec": "2019-02-27",
+ "smallintc": -8423,
+ "mediumintc": 6008538,
+ "intc": -1130599020,
+ "bigintc": 171234567890123456789,
+ "floatc": 182.33,
+ "doublec": 149.67382865705562,
+ "decimalc": 109.88,
+ "timestampc": "2019-02-27T20:40:53.000Z",
+ "timec": "20:40:53",
+ "charc": "DBDDGpPz",
+ "tinytextc": "hgFuypClmWWMNsDXEFJJOhdsljdBP",
+ "blobc": "Wc5YvvF8fUsOgejKPsOa",
+ "mediumtextc": "Torquent aliquet malesuada adipiscing, eget himenaeos
facilisi ridiculus eros netus, nisi semper eleifend dolor nisi sapien phasellus
luctus libero aenean suscipit pulvinar, lacus posuere id hendrerit feugiat
vitae purus ac blandit euismod pharetra. Adipiscing lectus primis eros
pellentesque porta blandit dictum fermentum lectus tortor nam, fusce est dis
class ornare neque est enim quisque a.\n\nScelerisque aptent etiam non
imperdiet volutpat. Quisque est fusce purus [...]
+ "enumc": 1,
+ "setc": 4,
+ "boolc": 0,
+ "binaryc": "ehynfnybBfxmxgkMVrVt"
+}]
\ No newline at end of file