This is an automated email from the ASF dual-hosted git repository. volodymyr pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/drill.git
commit cc51f2458c49ab786634a0a739652a4b7d454b44 Author: Paul Rogers <[email protected]> AuthorDate: Mon Mar 9 11:34:46 2020 -0700 DRILL-7631: Updates to the Json Structure Parser Revised how "look-ahead" works. Added support for unknown types. closes #2016 --- .../easy/json/parser/AbstractElementParser.java | 3 + .../exec/store/easy/json/parser/ArrayListener.java | 89 ++----- .../exec/store/easy/json/parser/ArrayParser.java | 63 ++++- .../store/easy/json/parser/DummyValueParser.java | 2 - .../easy/json/parser/JsonStructureParser.java | 12 + .../exec/store/easy/json/parser/JsonType.java | 25 -- .../store/easy/json/parser/JsonValueParser.java | 116 ++++++++ .../store/easy/json/parser/ObjectListener.java | 110 ++++---- .../exec/store/easy/json/parser/ObjectParser.java | 60 ++++- .../exec/store/easy/json/parser/TokenIterator.java | 16 ++ .../exec/store/easy/json/parser/ValueDef.java | 85 ++++++ .../store/easy/json/parser/ValueDefFactory.java | 99 +++++++ .../exec/store/easy/json/parser/ValueFactory.java | 222 ---------------- .../exec/store/easy/json/parser/ValueListener.java | 37 +-- .../exec/store/easy/json/parser/ValueParser.java | 55 ++-- .../{ => easy}/json/parser/BaseTestJsonParser.java | 128 ++++----- .../json/parser/TestJsonParserArrays.java | 86 +++--- .../json/parser/TestJsonParserBasics.java | 92 +++++-- .../json/parser/TestJsonParserErrors.java | 5 +- .../json/parser/TestJsonParserObjects.java | 9 +- .../easy/json/parser/TestJsonParserUnknowns.java | 294 +++++++++++++++++++++ 21 files changed, 1051 insertions(+), 557 deletions(-) diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/AbstractElementParser.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/AbstractElementParser.java index ed811d5..a99dcec 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/AbstractElementParser.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/AbstractElementParser.java @@ -18,6 +18,9 @@ package org.apache.drill.exec.store.easy.json.parser; +/** + * Abstract base class for all JSON element parsers. + */ public abstract class AbstractElementParser implements ElementParser { final JsonStructureParser structParser; private final ElementParser parent; diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/ArrayListener.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/ArrayListener.java index c5d588d..06a67aa 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/ArrayListener.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/ArrayListener.java @@ -18,8 +18,8 @@ package org.apache.drill.exec.store.easy.json.parser; /** - * Represents one level within an array. The first time the parser sees - * the array element, it will call one of the "Element" methods with the + * Represents one level within array. The first time the parser sees the array element, + * it will call the {@link #element(ValueDef)} method with the * look-ahead values visible to the parser. Since JSON is flexible, later * data shapes may not necessarily follow the first shape. The implementation * must handle this or throw an error if not supported. @@ -44,15 +44,16 @@ package org.apache.drill.exec.store.easy.json.parser; * Three JSON-specific cases warrant attention: * <ol> * <li>The first occurrence of the array is empty: {@code [ ]}. In this case, - * the structure parser will ask for an element child by providing the - * {@link JsonType#EMPTY} type, which is not very useful, but is all that - * the parser knows. The listener is responsible for implementing some kind of - * "deferred type" logic to wait and see what kind of element appears - * later..</li> + * the structure parser will defer asking for an element parser (and listener) + * until an actual value appears. The array listener is responsible for + * implementing some kind of "deferred type" logic to wait and see what + * kind of element appears later.</li> * <li>The first occurrence of the array has, as its first element, a * {@code null} value. The structure parser will ask this listener to create * an array child for the {@code null} value, but the listener has no type - * information. Again, the listener is responsible for type-deferal.</li> + * information. Since null values must be recorded (so we know how many + * appear in each array), the listener is forced to choose a type. Choose + * wisely as there is no way to know what type will appear in the future.</li> * <li>A generalized form of the above is that the structure parser only * knows what it sees on the first element when it asks for an element * child. In a well-formed file, that first token will predict the type @@ -81,8 +82,19 @@ package org.apache.drill.exec.store.easy.json.parser; public interface ArrayListener { /** - * Called at the start of a set of values for an array. That is, called - * when the structure parser accepts the {@code [} token. + * Provide an element listener for the first non-empty value + * seen for the array. + * + * @param valueDef description of the element (without the array + * dimensions) + * @return a listener to consume values of the array element + */ + ValueListener element(ValueDef valueDef); + + /** + * Called at the entrance to each level (dimension) of an array. + * That is, called when the structure parser accepts the {@code [} + * token. */ void onStart(); @@ -91,61 +103,16 @@ public interface ArrayListener { * by its own listener which receives the value of the element (if * scalar) or element events (if structured.) */ - void onElement(); - - /** - * Called at the end of a set of values for an array. That is, called - * when the structure parser accepts the {@code ]} token. - */ - void onEnd(); + void onElementStart(); /** - * The first element seen is a scalar, {@code null} or empty. That is, - * {@code [ <scalar>}, {@code [ null} or {@code [ ]}. - * - * @param type the JSON type of the object as given by the token - * which the Jackson parser returned for the value. The type can - * be {@code null}, which means that the parser does not know what - * actual type might occur later - * @return a value listener for the scalar type, or if {@code null}, - * perhaps waiting for more information to commit to a type + * Called after each element of the array. */ - ValueListener scalarElement(JsonType type); + void onElementEnd(); /** - * The first element an array or scalars (or {@code null}.That is, - * {@code [ [+ <scalar>}. - * - * @param arrayDims the number of dimensions observed during the - * first-element parse, not including the surrounding array - * itself. As in all cases, there is no guarantee that - * that this number will remain valid later, and may be wrong if the - * first-seen element was empty: {@code []}. - * @return a listener for the value of the top-level element (which - * the listener can assume will turn out to be an array.) - */ - ValueListener arrayElement(int arrayDims, JsonType type); - - /** - * The first element seen for an array is an object. That is, - * <code>[ {</code>. - * - * @return a listener for the value of the top-level element (which - * the listener can assume will turn out to be an object.) - */ - ValueListener objectElement(); - - /** - * The first element seen is an object array.That is, - * <code>[ [* {</code>. - * - * @param arrayDims the number of dimensions observed during the - * first-element parse, not including the surrounding array - * itself. As in all cases, there is no guarantee that - * that this number will remain valid later, and may be wrong if the - * first-seen element was empty: {@code []}. - * @return a listener for the value of the top-level element (which - * the listener can assume will turn out to be an array.) + * Called at the end of a set of values for an array. That is, called + * when the structure parser accepts the {@code ]} token. */ - ValueListener objectArrayElement(int arrayDims); + void onEnd(); } diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/ArrayParser.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/ArrayParser.java index 2d4431a..963b8c5 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/ArrayParser.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/ArrayParser.java @@ -17,6 +17,8 @@ */ package org.apache.drill.exec.store.easy.json.parser; +import org.apache.drill.exec.store.easy.json.parser.ObjectListener.FieldType; + import com.fasterxml.jackson.core.JsonToken; /** @@ -24,6 +26,10 @@ import com.fasterxml.jackson.core.JsonToken; * represented by a {@code ValueListener}. There is a single listener * for all the elements, which are presumed to be of the same type. * <p> + * The element is created when first encountered, either as part of field + * creation (<code>{a: [10]}</code>) or when later encountered in parsing + * (<code{a: []} {a: [10]}</code>). + * <p> * This parser <i>does not</i> attempt to parse an array as a poor-man's * tuple: {@code [ 101, "fred", 23.45 ]}. The listener could handle this * case. But, if we need to handle such a case, it would be better to @@ -32,13 +38,12 @@ import com.fasterxml.jackson.core.JsonToken; */ public class ArrayParser extends AbstractElementParser { - private final ArrayListener arrayListener; - private final ValueParser elementParser; + private ValueParser elementParser; + private ArrayListener arrayListener; - public ArrayParser(ValueParser parent, ArrayListener arrayListener, ValueListener elementListener) { + public ArrayParser(ValueParser parent, ArrayListener arrayListener) { super(parent); this.arrayListener = arrayListener; - this.elementParser = new ValueParser(this, "[]", elementListener); } public ValueParser elementParser() { return elementParser; } @@ -51,18 +56,58 @@ public class ArrayParser extends AbstractElementParser { arrayListener.onStart(); top: for (;;) { // Position: [ (value, )* ^ ? - JsonToken token = tokenizer.requireNext(); + JsonToken token = tokenizer.requireNext(); switch (token) { case END_ARRAY: break top; - default: tokenizer.unget(token); - arrayListener.onElement(); - elementParser.parse(tokenizer); - break; + parseElement(tokenizer); } } arrayListener.onEnd(); } + + private void parseElement(TokenIterator tokenizer) { + if (elementParser == null) { + detectElement(tokenizer); + } + arrayListener.onElementStart(); + elementParser.parse(tokenizer); + arrayListener.onElementEnd(); + } + + private void detectElement(TokenIterator tokenizer) { + addElement(ValueDefFactory.lookAhead(tokenizer)); + } + + public void addElement(ValueDef valueDef) { + bindElement(arrayListener.element(valueDef)); + } + + public void bindElement(ValueListener elementListener) { + elementParser = new ValueParser(this, "[]", FieldType.TYPED); + elementParser.bindListener(elementListener); + } + + public void bindListener(ArrayListener newListener) { + arrayListener = newListener; + if (elementParser != null) { + elementParser.bindListener(arrayListener.element(ValueDef.UNKNOWN)); + } + } + + /** + * Expand the structure of this array given a description of the + * look-ahead value. Skip if this is a 1D array of unknown type. + * If 2D or greater, then we must create the child array of one + * less dimension. + */ + public void expandStructure(ValueDef valueDef) { + if (valueDef.dimensions() > 1 || !valueDef.type().isUnknown()) { + ValueDef elementDef = new ValueDef(valueDef.type(), valueDef.dimensions() - 1); + addElement(elementDef); + elementParser.expandStructure(elementDef); + } + } } diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/DummyValueParser.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/DummyValueParser.java index 86b345c..7d5131b 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/DummyValueParser.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/DummyValueParser.java @@ -55,14 +55,12 @@ class DummyValueParser extends AbstractElementParser { public void parseTail(TokenIterator tokenizer) { // Parse (field: value)* } - for (;;) { JsonToken token = tokenizer.requireNext(); switch (token) { // Not exactly precise, but the JSON parser handles the // details. - case END_OBJECT: case END_ARRAY: return; diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/JsonStructureParser.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/JsonStructureParser.java index 23693ca..14016a8 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/JsonStructureParser.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/JsonStructureParser.java @@ -204,6 +204,18 @@ public class JsonStructureParser { public int recoverableErrorCount() { return errorRecoveryCount; } + public int lineNumber() { + return tokenizer.lineNumber(); + } + + public int columnNumber() { + return tokenizer.columnNumber(); + } + + public String token() { + return tokenizer.token(); + } + public void close() { if (errorRecoveryCount > 0) { logger.warn("Read JSON input with {} recoverable error(s).", diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/JsonType.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/JsonType.java deleted file mode 100644 index 9481a4a..0000000 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/JsonType.java +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.drill.exec.store.easy.json.parser; - -/** - * Description of JSON types as derived from JSON tokens. - */ -public enum JsonType { - ARRAY, OBJECT, NULL, EMPTY, BOOLEAN, INTEGER, FLOAT, STRING -} diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/JsonValueParser.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/JsonValueParser.java new file mode 100644 index 0000000..15fc128 --- /dev/null +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/JsonValueParser.java @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.store.easy.json.parser; + +import com.fasterxml.jackson.core.JsonToken; + +/** + * Parses an arbitrary JSON value (which can be a subtree of any + * complexity) into a JSON string. That is, converts the parsed + * JSON tokens back into the original JSON text. + */ +public class JsonValueParser extends AbstractElementParser { + + private final ValueListener listener; + private final StringBuilder json = new StringBuilder(); + + protected JsonValueParser(ElementParser parent, String key, + ValueListener listener) { + super(parent); + this.listener = listener; + } + + @Override + public void parse(TokenIterator tokenizer) { + JsonToken token = tokenizer.requireNext(); + json.setLength(0); + parseValue(tokenizer, token); + listener.onString(json.toString()); + json.setLength(0); + } + + private void parseValue(TokenIterator tokenizer, JsonToken token) { + String textValue = tokenizer.textValue(); + switch (token) { + case START_ARRAY: + json.append(textValue); + parseArrayTail(tokenizer); + break; + + case START_OBJECT: + json.append(textValue); + parseObjectTail(tokenizer); + break; + + case VALUE_STRING: + json.append("\""); + json.append(textValue); + json.append("\""); + break; + + default: + json.append(textValue); + break; + } + } + + public void parseArrayTail(TokenIterator tokenizer) { + + // Accept value* ] + + boolean first = true; + for (;;) { + JsonToken token = tokenizer.requireNext(); + if (token == JsonToken.END_ARRAY) { + json.append(tokenizer.textValue()); + return; + } + if (! first) { + json.append(", "); + } + first = false; + parseValue(tokenizer, token); + } + } + + public void parseObjectTail(TokenIterator tokenizer) { + + // Accept (field: value)* } + + boolean first = true; + for (;;) { + JsonToken token = tokenizer.requireNext(); + if (token == JsonToken.END_OBJECT) { + json.append(tokenizer.textValue()); + return; + } + if (! first) { + json.append(", "); + } + first = false; + if (token != JsonToken.FIELD_NAME) { + throw errorFactory().syntaxError(token); + } + + json.append("\""); + json.append(tokenizer.textValue()); + json.append("\": "); + parseValue(tokenizer, tokenizer.requireNext()); + } + } +} diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/ObjectListener.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/ObjectListener.java index ec40b0e..40b8617 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/ObjectListener.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/ObjectListener.java @@ -17,6 +17,8 @@ */ package org.apache.drill.exec.store.easy.json.parser; +import org.apache.calcite.model.JsonType; + /** * Represents events on a object value. The object value may be a top-level * field or may be the element of an array. The listener gets an event when @@ -52,6 +54,35 @@ package org.apache.drill.exec.store.easy.json.parser; */ public interface ObjectListener { + enum FieldType { + + /** + * The field is unprojected, ignore its content. No value listener + * is created. + */ + IGNORE, + + /** + * Parse the JSON object according to its type. + */ + TYPED, + + /** + * The field is to be treated as "all-text". Used when the parser-level + * setting for {@code allTextMode} is {@code false}; allows per-field + * overrides to, perhaps, ride over inconsistent scalar types for a + * single field. The listener will receive only strings. + */ + TEXT, + + /** + * Parse the value, and all its children, as JSON. + * That is, converts the parsed JSON back into a + * JSON string. The listener will receive only strings. + */ + JSON + } + /** * Called at the start of a set of values for an object. That is, called * when the structure parser accepts the <code>{</code> token. @@ -59,73 +90,46 @@ public interface ObjectListener { void onStart(); /** - * Called at the end of a set of values for an object. That is, called - * when the structure parser accepts the <code>}</code> token. - */ - void onEnd(); - - /** * Called by the structure parser when it first sees a new field for - * and object to determine if that field is to be projected (is needed - * by the listener.) If not projected, the structure parser will not + * and object to determine how to parse the field. + * If not projected, the structure parser will not * ask for a value listener and will insert a "dummy" parser that will * free-wheel over any value of that field. As a result, unprojected * fields can not cause type errors: they are invisible as long as * they are syntactically valid. + * <p> + * The {@link FieldType#JSON} type says to parse the entire field, and + * its children, as a JSON string. The parser will ask for a value + * listener to accept the JSON text. * * @param key the object field name - * @return {@code true} if this listener wants to provide a listener - * for the field, {@code false} if the field should be ignored + * @return how the field should be parsed */ - boolean isProjected(String key); + FieldType fieldType(String key); /** - * A new field has appeared with a scalar (or {@code null}) value. - * That is: {@code key: <scalar>}. + * The structure parser has just encountered a new field for this + * object. The {@link #fieldType(String)} indicated that the field is + * to be projected. This method performs any setup needed to handle the + * field, then returns a value listener to receive events for the + * field value. The value listener may be asked to create additional + * structure, such as arrays or nested objects. * * @param key the field name - * @param type the type as given by the JSON token for the value - * @return a value listener for the scalar value + * @param valueDef a description of the field as inferred by looking + * ahead some number of tokens in the input JSON. Provides both a data + * type and array depth (dimensions.) If the type is + * {@link JsonType#NONE EMPTY}, then the field is an empty array. + * If the type is {@link JsonType#NULL NULL}, then the value is null. In these + * cases, the listener can replace itself when an actual value appears + * later + * @return a listener to receive events for the newly-created field */ - ValueListener addScalar(String key, JsonType type); + ValueListener addField(String key, ValueDef valueDef); /** - * A new field has appeared with a scalar, {@code null} or empty array - * value. That is, one of: - * <ul> - * <li><code>key: [+ <scalar></code></li> - * <li><code>key: [+ null</code></li> - * <li><code>key: [+ ]</code></li> - * </ul> - * Where "[+" means one or more opening array elements. - * - * @param key the field name - * @param arrayDims number of dimensions observed in the first appearance - * of the array (more may appear later) - * @param type the observed type of the first element of the array, or - * {@link JsonType.NULL} if {@code null} was see, or - * {@link JsonType.EMPTY} if an empty array was seen - * @return a listener for the field itself which is prepared to - * return an array listener - */ - ValueListener addArray(String key, int arrayDims, JsonType type); - - /** - * A new field has appeared with an object value. - * That is: {@code key: <scalar>}. - * - * @param key the field name - * @return a value listener which assumes the value is an object - */ - ValueListener addObject(String key); - - /** - * A new field has appeared with an object array value. - * That is: <code>key: ]+ {</code>. - * - * @param key the field name - * @return a value listener which assumes the value is an object - * array + * Called at the end of a set of values for an object. That is, called + * when the structure parser accepts the <code>}</code> token. */ - ValueListener addObjectArray(String key, int dims); + void onEnd(); } diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/ObjectParser.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/ObjectParser.java index cfc0c76..226cc29 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/ObjectParser.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/ObjectParser.java @@ -20,6 +20,8 @@ package org.apache.drill.exec.store.easy.json.parser; import java.util.Map; import org.apache.drill.common.map.CaseInsensitiveMap; +import org.apache.drill.exec.store.easy.json.parser.ObjectListener.FieldType; +import org.apache.drill.exec.store.easy.json.parser.ValueDef.JsonType; import com.fasterxml.jackson.core.JsonToken; @@ -159,9 +161,61 @@ public class ObjectParser extends AbstractElementParser { throw errorFactory().structureError( "Drill does not allow empty keys in JSON key/value pairs"); } - if (!listener.isProjected(key)) { - return new DummyValueParser(this); + FieldType type = listener.fieldType(key); + switch (type) { + case IGNORE: + return new DummyValueParser(this); + case JSON: + return new JsonValueParser(this, key, + listener.addField(key, new ValueDef(JsonType.STRING, 0))); + default: + return createFieldParser(key, type, tokenizer); } - return ValueFactory.createFieldParser(this, key, tokenizer); + } + + /** + * Parse position: <code>{ ... field : ^ ?</code> for a newly-seen field. + * Constructs a value parser and its listeners by looking ahead + * some number of tokens to "sniff" the type of the value. For + * example: + * <ul> + * <li>{@code foo: <value>} - Field value</li> + * <li>{@code foo: [ <value> ]} - 1D array value</li> + * <li>{@code foo: [ [<value> ] ]} - 2D array value</li> + * <li>Etc.</li> + * </ul> + * <p> + * There are two cases in which no type estimation is possible: + * <ul> + * <li>The value is {@code null}, indicated by + * {@link JsonType#NULL}.</code> + * <li>The value is an array, and the array is empty, indicated + * by {@link JsonType#EMPTY}.</li> + * </ul> + * {@link ValueDefFactory} handles syntactic type inference. The associated + * listener enforces semantic rules. For example, if a schema is + * available, and we know that field "x" must be an Integer, but + * this class reports that it is an object, then the listener should + * raise an exception. + * <p> + * Also, the parser cannot enforce type consistency. This method + * looks only at the first appearance of a value: a sample size of + * one. JSON allows anything. + * The listener must enforce semantic rules that say whether a different + * type is allowed for later values. + * + * @param key the name of the field + * @param type the kind of field parser to create + * @param tokenizer the token parser + * @return the value parser for the element, which may contain additional + * structure for objects or arrays + */ + public ElementParser createFieldParser(String key, FieldType type, + TokenIterator tokenizer) { + ValueParser fp = new ValueParser(this, key, type); + ValueDef valueDef = ValueDefFactory.lookAhead(tokenizer); + fp.bindListener(listener.addField(key, valueDef)); + fp.expandStructure(valueDef); + return fp; } } diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/TokenIterator.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/TokenIterator.java index cfb7440..ecd5b29 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/TokenIterator.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/TokenIterator.java @@ -87,6 +87,22 @@ public class TokenIterator { .toString(); } + public int lineNumber() { + return parser.getCurrentLocation().getLineNr(); + } + + public int columnNumber() { + return parser.getCurrentLocation().getColumnNr(); + } + + public String token() { + try { + return parser.getText(); + } catch (IOException e) { + return null; + } + } + public JsonToken requireNext() { JsonToken token = next(); if (token == null) { diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/ValueDef.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/ValueDef.java new file mode 100644 index 0000000..8896129 --- /dev/null +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/ValueDef.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.store.easy.json.parser; + +/** + * Description of a JSON value as inferred from looking ahead in + * the JSON stream. Includes a type (which can be empty for an empty + * array, or null), and an array size (which is 0 for simple values.) + * <p> + * To be clear, this is the JSON parser's best guess at a field type + * from the input token stream. This is <i>not</i> a description of the + * desired data type as JSON can only react to what it sees on input. + */ +public class ValueDef { + + /** + * Description of JSON types as derived from JSON tokens. + */ + public enum JsonType { + OBJECT, NULL, BOOLEAN, + INTEGER, FLOAT, STRING, EMBEDDED_OBJECT, + + /** + * Indicates an empty array. + */ + EMPTY, + + /** + * Indicates an unknown array, appears when replacing the + * value listener for an array. + */ + UNKNOWN; + + public boolean isObject() { return this == OBJECT; } + + public boolean isUnknown() { + return this == NULL || this == EMPTY || + this == UNKNOWN; + } + + public boolean isScalar() { + return !isObject() && !isUnknown(); + } + } + + public static final ValueDef UNKNOWN_ARRAY = new ValueDef(JsonType.UNKNOWN, 1); + public static final ValueDef UNKNOWN = new ValueDef(JsonType.UNKNOWN, 0); + + private final int arrayDims; + private final JsonType type; + + public ValueDef(JsonType type, int dims) { + this.type = type; + this.arrayDims = dims; + } + + public JsonType type() { return type; } + public int dimensions() { return arrayDims; } + public boolean isArray() { return arrayDims > 0; } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder() + .append(type.name()); + for (int i = 0; i < arrayDims; i++) { + buf.append("[]"); + } + return buf.toString(); + } +} diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/ValueDefFactory.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/ValueDefFactory.java new file mode 100644 index 0000000..9013dd2 --- /dev/null +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/ValueDefFactory.java @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.store.easy.json.parser; + +import org.apache.drill.exec.store.easy.json.parser.ValueDef.JsonType; + +import com.fasterxml.jackson.core.JsonToken; + +/** + * Constructs a {@link ValueDef} by looking ahead on the input stream. + * Looking ahead is safe because this class only looks at syntactic + * tokens such as <code>{</code>, {@code [} or the first value token. + * The underlying JSON parser is left with the first value token + * as its current token. Pushes other tokens back on the token stack + * so they can be re-consumed by the actual parser. + */ +public class ValueDefFactory { + + private int arrayDims; + private JsonType jsonType = JsonType.EMPTY; + + public ValueDefFactory(TokenIterator tokenizer) { + inferValueType(tokenizer); + } + + public static ValueDef lookAhead(TokenIterator tokenizer) { + ValueDefFactory factory = new ValueDefFactory(tokenizer); + return new ValueDef(factory.jsonType, factory.arrayDims); + } + + public static ValueDef arrayLookAhead(TokenIterator tokenizer) { + ValueDefFactory factory = new ValueDefFactory(tokenizer); + // Already in an array (saw [), so add one to dimensions + return new ValueDef(factory.jsonType, factory.arrayDims + 1); + } + + private void inferValueType(TokenIterator tokenizer) { + JsonToken token = tokenizer.requireNext(); + switch (token) { + case START_ARRAY: + // Position: key: [ ^ + arrayDims++; + inferValueType(tokenizer); + break; + + case END_ARRAY: + break; + + case START_OBJECT: + // Position: key: { ^ + jsonType = JsonType.OBJECT; + break; + + case VALUE_NULL: + + // Position: key: null ^ + jsonType = JsonType.NULL; + break; + + case VALUE_FALSE: + case VALUE_TRUE: + jsonType = JsonType.BOOLEAN; + break; + + case VALUE_NUMBER_INT: + jsonType = JsonType.INTEGER; + break; + + case VALUE_NUMBER_FLOAT: + jsonType = JsonType.FLOAT; + break; + + case VALUE_STRING: + jsonType = JsonType.STRING; + break; + + default: + // Won't get here: the Jackson parser catches + // errors. + throw tokenizer.errorFactory().syntaxError(token); + } + tokenizer.unget(token); + } +} diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/ValueFactory.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/ValueFactory.java deleted file mode 100644 index 97ca0ca..0000000 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/ValueFactory.java +++ /dev/null @@ -1,222 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.drill.exec.store.easy.json.parser; - -import com.fasterxml.jackson.core.JsonToken; - -/** - * Constructs a value parser and its listeners by looking ahead - * some number of tokens to "sniff" the type of the value. For - * example: - * <ul> - * <li>{@code foo: <value>} - Field value</li> - * <li>{@code foo: [ <value> ]} - 1D array value</li> - * <li>{@code foo: [ [<value> ] ]} - 2D array value</li> - * <li>Etc.</li> - * </ul> - * <p> - * There are two cases in which no type estimation is possible: - * <ul> - * <li>The value is {@code null}, indicated by - * {@link JsonType#NULL}.</code> - * <li>The value is an array, and the array is empty, indicated - * by {@link JsonType#EMPTY}.</li> - * </ul> - * This class handles syntactic type inference. The associated - * listener enforces semantic rules. For example, if a schema is - * available, and we know that field "x" must be an Integer, but - * this class reports that it is an object, then the listener should - * raise an exception. - * <p> - * Also, the parser cannot enforce type consistency. This class - * looks only at the first appearance of a value. JSON allows anything. - * The listener must enforce semantic rules that say whether a different - * type is allowed for later values. - */ -public class ValueFactory { - - public static class FieldDescrip { - protected int arrayDims; - protected JsonType type; - - public boolean isArray() { return arrayDims > 0; } - - public boolean isObject() { return type == JsonType.OBJECT; } - } - - private ValueFactory() { } - - /** - * Parse position: <code>{ ... field : ^ ?</code> for a newly-seen field. - * Look ahead to guess the field type, then declare the field. - * - * @param parent the object parser declaring the field - * @param key the name of the field - * @param tokenizer the token parser - * @return the value parser for the element, which may contain additional - * structure for objects or arrays - */ - public static ElementParser createFieldParser(ObjectParser parent, String key, TokenIterator tokenizer) { - FieldDescrip descrip = new FieldDescrip(); - inferFieldType(descrip, tokenizer); - ObjectListener objListener = parent.listener(); - ValueListener fieldListener; - if (descrip.isObject()) { - if (descrip.isArray()) { - // Object array field - fieldListener = objListener.addObjectArray(key, descrip.arrayDims); - } else { - // Object field - fieldListener = objListener.addObject(key); - } - } else { - if (descrip.isArray()) { - // Scalar (or unknown) array field - fieldListener = objListener.addArray(key, descrip.arrayDims, descrip.type); - } else { - // Scalar field - fieldListener = objListener.addScalar(key, descrip.type); - } - } - ValueParser fp = new ValueParser(parent, key, fieldListener); - createStructureParser(fp, descrip); - return fp; - } - - /** - * Add the object or array parser, if the structured type is known. - */ - private static void createStructureParser(ValueParser valueParser, - FieldDescrip descrip) { - if (descrip.isArray()) { - valueParser.bindArrayParser(createArrayParser(valueParser, descrip)); - } else if (descrip.isObject()) { - valueParser.bindObjectParser(objectParser(valueParser)); - } - } - - /** - * Parse position: <code>... [ ?</code> for a field or array element not previously - * known to be an array. Look ahead to determine if the array is nested and its - * element types. - * - * @param parent the parser for the value that has been found to contain an - * array - * @param tokenizer the JSON token parser - * @return an array parser to bind to the parent value parser to parse the - * array - */ - public static ArrayParser createArrayParser(ValueParser parent, TokenIterator tokenizer) { - FieldDescrip descrip = new FieldDescrip(); - // Already in an array, so add the outer dimension. - descrip.arrayDims++; - inferFieldType(descrip, tokenizer); - return createArrayParser(parent, descrip); - } - - public static ArrayParser createArrayParser(ValueParser parent, FieldDescrip descrip) { - ValueListener fieldListener = parent.listener(); - ArrayListener arrayListener; - if (descrip.isObject()) { - // Object array elements - arrayListener = fieldListener.objectArray(descrip.arrayDims); - } else { - arrayListener = fieldListener.array(descrip.arrayDims, descrip.type); - } - descrip.arrayDims--; - ValueListener elementListener; - if (descrip.isObject()) { - if (descrip.isArray()) { - // Object array elements - elementListener = arrayListener.objectArrayElement(descrip.arrayDims); - } else { - // Object elements - elementListener = arrayListener.objectElement(); - } - } else { - if (descrip.isArray()) { - // Scalar (or unknown) array elements - elementListener = arrayListener.arrayElement(descrip.arrayDims, descrip.type); - } else { - // Scalar elements - elementListener = arrayListener.scalarElement(descrip.type); - } - } - ArrayParser arrayParser = new ArrayParser(parent, arrayListener, elementListener); - createStructureParser(arrayParser.elementParser(), descrip); - return arrayParser; - } - - public static ObjectParser objectParser(ValueParser parent) { - ValueListener valueListener = parent.listener(); - ObjectListener objListener = valueListener.object(); - return new ObjectParser(parent, objListener); - } - - protected static void inferFieldType(FieldDescrip descrip, TokenIterator tokenizer) { - JsonToken token = tokenizer.requireNext(); - switch (token) { - case START_ARRAY: - // Position: key: [ ^ - descrip.arrayDims++; - inferFieldType(descrip, tokenizer); - break; - - case END_ARRAY: - if (descrip.arrayDims == 0) { - throw tokenizer.errorFactory().syntaxError(token); - } - descrip.type = JsonType.EMPTY; - break; - - case START_OBJECT: - // Position: key: { ^ - descrip.type = JsonType.OBJECT; - break; - - case VALUE_NULL: - - // Position: key: null ^ - descrip.type = JsonType.NULL; - break; - - case VALUE_FALSE: - case VALUE_TRUE: - descrip.type = JsonType.BOOLEAN; - break; - - case VALUE_NUMBER_INT: - descrip.type = JsonType.INTEGER; - break; - - case VALUE_NUMBER_FLOAT: - descrip.type = JsonType.FLOAT; - break; - - case VALUE_STRING: - descrip.type = JsonType.STRING; - break; - - default: - // Won't get here: the Jackson parser catches - // errors. - throw tokenizer.errorFactory().syntaxError(token); - } - tokenizer.unget(token); - } -} diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/ValueListener.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/ValueListener.java index 9625f55..6037069 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/ValueListener.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/ValueListener.java @@ -62,16 +62,15 @@ package org.apache.drill.exec.store.easy.json.parser; public interface ValueListener { /** - * The field is to be treated as "all-text". Used when the parser-level - * setting for {@code allTextMode} is {@code false}; allows per-field - * overrides to, perhaps, ride over inconsistent scalar types for a - * single field. - * - * @return {@code true} if the field is to be read in "all-text mode" even - * if the global setting is off, {@code false} to read the field as - * typed values. + * Allows the object listener to revise the listener for a field, + * such as when a field starts null and resolves to some concrete + * type. */ - boolean isText(); + interface ValueHost { + void bindListener(ValueListener listener); + } + + void bind(ValueHost host); /** * Called on parsing a {@code null} value for the field. Called whether @@ -125,28 +124,18 @@ public interface ValueListener { /** * The parser has encountered a object value for the field for the first - * time. That is: {@code foo: {</code}. + * time. That is: <code>foo: {</code>. * * @return an object listener for the object */ ObjectListener object(); /** - * The parser has encountered a array value for the first - * time, and that array is scalar, null or empty. - * - * @param arrayDims the number of observed array dimensions - * @param type the observed JSON token type for the array element - * @return an array listener for the array - */ - ArrayListener array(int arrayDims, JsonType type); - - /** - * The parser has encountered a array value for the first - * time, and that array contains an object. + * The parser has encountered a array value for the first time. * - * @param arrayDims the number of observed array dimensions + * @param valueDef description of the array dimensions (if + * a multi-dimensional array) and type (if known) * @return an array listener for the array */ - ArrayListener objectArray(int arrayDims); + ArrayListener array(ValueDef valueDef); } diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/ValueParser.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/ValueParser.java index ac7ee31..8a7fd77 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/ValueParser.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/ValueParser.java @@ -17,6 +17,9 @@ */ package org.apache.drill.exec.store.easy.json.parser; +import org.apache.drill.exec.store.easy.json.parser.ObjectListener.FieldType; +import org.apache.drill.exec.store.easy.json.parser.ValueListener.ValueHost; + import com.fasterxml.jackson.core.JsonToken; /** @@ -36,7 +39,7 @@ import com.fasterxml.jackson.core.JsonToken; * Listeners can enforce one type only, or can be more flexible and * allow multiple types. */ -public class ValueParser extends AbstractElementParser { +public class ValueParser extends AbstractElementParser implements ValueHost { private interface ValueHandler { void accept(TokenIterator tokenizer, JsonToken token); @@ -107,34 +110,34 @@ public class ValueParser extends AbstractElementParser { } private final String key; - private final ValueListener listener; - private final ValueHandler valueHandler; + protected final ValueHandler valueHandler; + private ValueListener listener; private ObjectParser objectParser; private ArrayParser arrayParser; - public ValueParser(ElementParser parent, String key, ValueListener listener) { + public ValueParser(ElementParser parent, String key, FieldType type) { super(parent); this.key = key; - this.listener = listener; - if (listener.isText() || structParser().options().allTextMode) { + if (type == FieldType.TEXT || structParser().options().allTextMode) { valueHandler = new TextValueHandler(); } else { valueHandler = new TypedValueHandler(); } } + @Override + public void bindListener(ValueListener listener) { + this.listener = listener; + listener.bind(this); + if (arrayParser != null) { + arrayParser.bindListener(listener.array(ValueDef.UNKNOWN_ARRAY)); + } + } + public String key() { return key; } public ValueListener listener() { return listener; } - public void bindObjectParser(ObjectParser parser) { - objectParser = parser; - } - - public void bindArrayParser(ArrayParser parser) { - arrayParser = parser; - } - /** * Parses <code>true | false | null | integer | float | string| * embedded-object | { ... } | [ ... ]</code> @@ -148,17 +151,17 @@ public class ValueParser extends AbstractElementParser { if (objectParser == null) { // No object parser yet. May be that the value was null, // or may be that it changed types. - objectParser = ValueFactory.objectParser(this); + addObjectParser(); } objectParser.parse(tokenizer); break; case START_ARRAY: - // Position: { ^ + // Position: [ ^ if (arrayParser == null) { // No array parser yet. May be that the value was null, // or may be that it changed types. - arrayParser = ValueFactory.createArrayParser(this, tokenizer); + addArrayParser(ValueDefFactory.arrayLookAhead(tokenizer)); } arrayParser.parse(tokenizer); break; @@ -171,4 +174,22 @@ public class ValueParser extends AbstractElementParser { valueHandler.accept(tokenizer, token); } } + + public void addObjectParser() { + objectParser = new ObjectParser(this, listener().object()); + } + + private void addArrayParser(ValueDef valueDef) { + ArrayListener arrayListener = listener().array(valueDef); + arrayParser = new ArrayParser(this, arrayListener); + arrayParser.expandStructure(valueDef); + } + + public void expandStructure(ValueDef valueDef) { + if (valueDef.isArray()) { + addArrayParser(valueDef); + } else if (valueDef.type().isObject()) { + addObjectParser(); + } + } } diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/json/parser/BaseTestJsonParser.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/parser/BaseTestJsonParser.java similarity index 71% rename from exec/java-exec/src/test/java/org/apache/drill/exec/store/json/parser/BaseTestJsonParser.java rename to exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/parser/BaseTestJsonParser.java index 3f9b715..a3c3e08 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/json/parser/BaseTestJsonParser.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/parser/BaseTestJsonParser.java @@ -15,12 +15,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.drill.exec.store.json.parser; +package org.apache.drill.exec.store.easy.json.parser; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import java.io.IOException; @@ -31,13 +32,6 @@ import java.util.Map; import java.util.Set; import org.apache.commons.io.input.ReaderInputStream; -import org.apache.drill.exec.store.easy.json.parser.ArrayListener; -import org.apache.drill.exec.store.easy.json.parser.ErrorFactory; -import org.apache.drill.exec.store.easy.json.parser.JsonStructureOptions; -import org.apache.drill.exec.store.easy.json.parser.JsonStructureParser; -import org.apache.drill.exec.store.easy.json.parser.JsonType; -import org.apache.drill.exec.store.easy.json.parser.ObjectListener; -import org.apache.drill.exec.store.easy.json.parser.ValueListener; import org.apache.drill.exec.vector.accessor.UnsupportedConversionError; import com.fasterxml.jackson.core.JsonParseException; @@ -105,23 +99,19 @@ public class BaseTestJsonParser { protected static class ValueListenerFixture implements ValueListener { - final int dimCount; - final JsonType type; + final ValueDef valueDef; int nullCount; int valueCount; Object value; + ValueHost host; ObjectListenerFixture objectValue; ArrayListenerFixture arrayValue; - public ValueListenerFixture(int dimCount, JsonType type) { - this.dimCount = dimCount; - this.type = type; + public ValueListenerFixture(ValueDef valueDef) { + this.valueDef = valueDef; } @Override - public boolean isText() { return false; } - - @Override public void onNull() { nullCount++; } @@ -164,32 +154,29 @@ public class BaseTestJsonParser { } @Override - public ArrayListener array(int arrayDims, JsonType type) { - assertNull(arrayValue); - arrayValue = new ArrayListenerFixture(arrayDims, type); + public ArrayListener array(ValueDef valueDef) { + if (arrayValue == null) { + arrayValue = new ArrayListenerFixture(valueDef); + } return arrayValue; } @Override - public ArrayListener objectArray(int arrayDims) { - assertNull(arrayValue); - arrayValue = new ArrayListenerFixture(arrayDims, JsonType.OBJECT); - return arrayValue; + public void bind(ValueHost host) { + this.host = host; } } protected static class ArrayListenerFixture implements ArrayListener { - final int dimCount; - final JsonType type; + final ValueDef valueDef; int startCount; int endCount; int elementCount; ValueListenerFixture element; - public ArrayListenerFixture(int dimCount, JsonType type) { - this.dimCount = dimCount; - this.type = type; + public ArrayListenerFixture(ValueDef valueDef) { + this.valueDef = valueDef; } @Override @@ -198,38 +185,23 @@ public class BaseTestJsonParser { } @Override - public void onElement() { + public void onElementStart() { elementCount++; } @Override - public void onEnd() { - endCount++; - } - - @Override - public ValueListener objectArrayElement(int arrayDims) { - return element(arrayDims, JsonType.OBJECT); - } + public void onElementEnd() { } @Override - public ValueListener objectElement() { - return element(0, JsonType.OBJECT); - } - - @Override - public ValueListener arrayElement(int arrayDims, JsonType type) { - return element(arrayDims, type); + public void onEnd() { + endCount++; } @Override - public ValueListener scalarElement(JsonType type) { - return element(0, type); - } - - private ValueListener element(int arrayDims, JsonType type) { - assertNull(element); - element = new ValueListenerFixture(arrayDims, type); + public ValueListener element(ValueDef valueDef) { + if (element == null) { + element = new ValueListenerFixture(valueDef); + } return element; } } @@ -238,6 +210,7 @@ public class BaseTestJsonParser { final Map<String, ValueListenerFixture> fields = new HashMap<>(); Set<String> projectFilter; + FieldType fieldType = FieldType.TYPED; int startCount; int endCount; @@ -252,37 +225,25 @@ public class BaseTestJsonParser { } @Override - public boolean isProjected(String key) { - return projectFilter == null || projectFilter.contains(key); - } - - @Override - public ValueListener addScalar(String key, JsonType type) { - return field(key, 0, type); - } - - @Override - public ValueListener addArray(String key, int dims, JsonType type) { - return field(key, dims, type); - } - - @Override - public ValueListener addObject(String key) { - return field(key, 0, JsonType.OBJECT); + public FieldType fieldType(String key) { + if (projectFilter != null && !projectFilter.contains(key)) { + return FieldType.IGNORE; + } + return fieldType; } @Override - public ValueListener addObjectArray(String key, int dims) { - return field(key, dims, JsonType.OBJECT); - } - - private ValueListener field(String key, int dims, JsonType type) { + public ValueListener addField(String key, ValueDef valueDef) { assertFalse(fields.containsKey(key)); - ValueListenerFixture field = new ValueListenerFixture(dims, type); + ValueListenerFixture field = makeField(key, valueDef); fields.put(key, field); return field; } + public ValueListenerFixture makeField(String key, ValueDef valueDef) { + return new ValueListenerFixture(valueDef); + } + public ValueListenerFixture field(String key) { ValueListenerFixture field = fields.get(key); assertNotNull(field); @@ -320,6 +281,25 @@ public class BaseTestJsonParser { return rootObject.field(key); } + public void expect(String key, Object[] values) { + ValueListenerFixture valueListener = null; + int expectedNullCount = 0; + for (int i = 0; i < values.length; i++) { + assertTrue(next()); + if (valueListener == null) { + valueListener = field(key); + expectedNullCount = valueListener.nullCount; + } + Object value = values[i]; + if (value == null) { + expectedNullCount++; + } else { + assertEquals(value, valueListener.value); + } + assertEquals(expectedNullCount, valueListener.nullCount); + } + } + public void close() { if (parser != null) { parser.close(); diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/json/parser/TestJsonParserArrays.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/parser/TestJsonParserArrays.java similarity index 82% rename from exec/java-exec/src/test/java/org/apache/drill/exec/store/json/parser/TestJsonParserArrays.java rename to exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/parser/TestJsonParserArrays.java index 97ccfc8..7275733 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/json/parser/TestJsonParserArrays.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/parser/TestJsonParserArrays.java @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.drill.exec.store.json.parser; +package org.apache.drill.exec.store.easy.json.parser; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; @@ -23,13 +23,16 @@ import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; -import org.apache.drill.exec.store.easy.json.parser.JsonType; +import org.apache.drill.categories.RowSetTests; +import org.apache.drill.exec.store.easy.json.parser.ValueDef.JsonType; import org.junit.Test; +import org.junit.experimental.categories.Category; /** * Tests array (including multi-dimensional and object) support * for the JSON structure parser. */ +@Category(RowSetTests.class) public class TestJsonParserArrays extends BaseTestJsonParser { @Test @@ -45,21 +48,21 @@ public class TestJsonParserArrays extends BaseTestJsonParser { // Value of object.a ValueListenerFixture a = fixture.field("a"); - assertEquals(JsonType.INTEGER, a.type); - assertEquals(1, a.dimCount); + assertEquals(JsonType.INTEGER, a.valueDef.type()); + assertEquals(1, a.valueDef.dimensions()); // Array aspect of a assertNotNull(a.arrayValue); ArrayListenerFixture aArray = a.arrayValue; assertEquals(1, aArray.startCount); assertEquals(aArray.startCount, aArray.endCount); - assertEquals(1, aArray.dimCount); + assertEquals(1, aArray.valueDef.dimensions()); // Value of each element of array aspect of a assertNotNull(aArray.element); ValueListenerFixture aElement = aArray.element; - assertEquals(JsonType.INTEGER, aElement.type); - assertEquals(0, aElement.dimCount); + assertEquals(JsonType.INTEGER, aElement.valueDef.type()); + assertEquals(0, aElement.valueDef.dimensions()); assertNull(aElement.arrayValue); assertEquals(2, aElement.valueCount); assertEquals(100L, aElement.value); @@ -105,8 +108,8 @@ public class TestJsonParserArrays extends BaseTestJsonParser { // {a: null} assertTrue(fixture.next()); ValueListenerFixture a = fixture.field("a"); - assertEquals(JsonType.NULL, a.type); - assertEquals(0, a.dimCount); + assertEquals(JsonType.NULL, a.valueDef.type()); + assertEquals(0, a.valueDef.dimensions()); assertNull(a.arrayValue); // See an array, can revise estimate of field type @@ -114,7 +117,7 @@ public class TestJsonParserArrays extends BaseTestJsonParser { assertTrue(fixture.next()); assertNotNull(a.arrayValue); ArrayListenerFixture aArray = a.arrayValue; - assertEquals(1, aArray.dimCount); + assertEquals(1, aArray.valueDef.dimensions()); ValueListenerFixture aElement = aArray.element; assertEquals(2, aElement.valueCount); assertEquals(100L, aElement.value); @@ -130,21 +133,22 @@ public class TestJsonParserArrays extends BaseTestJsonParser { JsonParserFixture fixture = new JsonParserFixture(); fixture.open(json); - // Can't predict the future, all we know is a is an array. + // Can't predict the future, all we know is `a` is an array. // "{a: []} assertTrue(fixture.next()); ValueListenerFixture a = fixture.field("a"); - assertEquals(JsonType.EMPTY, a.type); - assertEquals(1, a.dimCount); + assertEquals(JsonType.EMPTY, a.valueDef.type()); + assertEquals(1, a.valueDef.dimensions()); assertNotNull(a.arrayValue); ArrayListenerFixture aArray = a.arrayValue; - assertEquals(1, aArray.dimCount); - ValueListenerFixture aElement = aArray.element; - assertEquals(JsonType.EMPTY, aElement.type); + assertEquals(1, aArray.valueDef.dimensions()); + assertNull(aArray.element); // Saw no element yet // See elements, can revise estimate of element type // {a: [1, 100]} assertTrue(fixture.next()); + ValueListenerFixture aElement = aArray.element; + assertNotNull(aElement); assertEquals(2, aElement.valueCount); assertEquals(100L, aElement.value); @@ -155,43 +159,43 @@ public class TestJsonParserArrays extends BaseTestJsonParser { @Test public void test2DArray() { final String json = - "{a: [ [10, 1], [20, 2]]}\n" + + "{a: [[10, 1], [20, 2]]}\n" + "{a: [[null]]} {a: [[]]} {a: [null]} {a: null}"; JsonParserFixture fixture = new JsonParserFixture(); fixture.open(json); - // {a: [ [10, 1], [20, 2]]} + // {a: [[10, 1], [20, 2]]} assertTrue(fixture.next()); // Value of a ValueListenerFixture a = fixture.field("a"); - assertEquals(JsonType.INTEGER, a.type); - assertEquals(2, a.dimCount); + assertEquals(JsonType.INTEGER, a.valueDef.type()); + assertEquals(2, a.valueDef.dimensions()); // Array for a[] assertNotNull(a.arrayValue); ArrayListenerFixture outerArr = a.arrayValue; - assertEquals(2, outerArr.dimCount); + assertEquals(2, outerArr.valueDef.dimensions()); assertEquals(1, outerArr.startCount); assertEquals(outerArr.startCount, outerArr.endCount); // Value of a[] elements ValueListenerFixture outerElement = outerArr.element; - assertEquals(JsonType.INTEGER, outerElement.type); - assertEquals(1, outerElement.dimCount); + assertEquals(JsonType.INTEGER, outerElement.valueDef.type()); + assertEquals(1, outerElement.valueDef.dimensions()); assertNotNull(outerElement.arrayValue); // Array for a[][] assertNotNull(outerElement.arrayValue); ArrayListenerFixture innerArr = outerElement.arrayValue; - assertEquals(1, innerArr.dimCount); + assertEquals(1, innerArr.valueDef.dimensions()); assertEquals(2, innerArr.startCount); assertEquals(innerArr.startCount, innerArr.endCount); // Value of a[][] elements ValueListenerFixture innerElement = innerArr.element; - assertEquals(JsonType.INTEGER, innerElement.type); - assertEquals(0, innerElement.dimCount); + assertEquals(JsonType.INTEGER, innerElement.valueDef.type()); + assertEquals(0, innerElement.valueDef.dimensions()); assertEquals(4, innerElement.valueCount); assertEquals(0, innerElement.nullCount); assertEquals(2L, innerElement.value); @@ -271,21 +275,21 @@ public class TestJsonParserArrays extends BaseTestJsonParser { // Value of object.a ValueListenerFixture a = fixture.field("a"); - assertEquals(JsonType.OBJECT, a.type); - assertEquals(1, a.dimCount); + assertEquals(JsonType.OBJECT, a.valueDef.type()); + assertEquals(1, a.valueDef.dimensions()); // a[] assertNotNull(a.arrayValue); ArrayListenerFixture aArray = a.arrayValue; assertEquals(1, aArray.startCount); assertEquals(aArray.startCount, aArray.endCount); - assertEquals(1, aArray.dimCount); + assertEquals(1, aArray.valueDef.dimensions()); // Value of each element of a[] assertNotNull(aArray.element); ValueListenerFixture aElement = aArray.element; - assertEquals(JsonType.OBJECT, aElement.type); - assertEquals(0, aElement.dimCount); + assertEquals(JsonType.OBJECT, aElement.valueDef.type()); + assertEquals(0, aElement.valueDef.dimensions()); assertNull(aElement.arrayValue); assertEquals(0, aElement.valueCount); assertEquals(0, aElement.nullCount); @@ -317,21 +321,21 @@ public class TestJsonParserArrays extends BaseTestJsonParser { // Value of object.a ValueListenerFixture a = fixture.field("a"); - assertEquals(JsonType.OBJECT, a.type); - assertEquals(2, a.dimCount); + assertEquals(JsonType.OBJECT, a.valueDef.type()); + assertEquals(2, a.valueDef.dimensions()); // a[] assertNotNull(a.arrayValue); ArrayListenerFixture outerArray = a.arrayValue; assertEquals(1, outerArray.startCount); assertEquals(outerArray.startCount, outerArray.endCount); - assertEquals(2, outerArray.dimCount); + assertEquals(2, outerArray.valueDef.dimensions()); // Value of each element of a[] assertNotNull(outerArray.element); ValueListenerFixture outerElement = outerArray.element; - assertEquals(JsonType.OBJECT, outerElement.type); - assertEquals(1, outerElement.dimCount); + assertEquals(JsonType.OBJECT, outerElement.valueDef.type()); + assertEquals(1, outerElement.valueDef.dimensions()); assertEquals(0, outerElement.valueCount); assertEquals(0, outerElement.nullCount); @@ -340,13 +344,13 @@ public class TestJsonParserArrays extends BaseTestJsonParser { ArrayListenerFixture innerArray = outerElement.arrayValue; assertEquals(2, innerArray.startCount); assertEquals(innerArray.startCount, innerArray.endCount); - assertEquals(1, innerArray.dimCount); + assertEquals(1, innerArray.valueDef.dimensions()); // Value of each element of a[][] assertNotNull(innerArray.element); ValueListenerFixture innerElement = innerArray.element; - assertEquals(JsonType.OBJECT, innerElement.type); - assertEquals(0, innerElement.dimCount); + assertEquals(JsonType.OBJECT, innerElement.valueDef.type()); + assertEquals(0, innerElement.valueDef.dimensions()); assertEquals(0, innerElement.valueCount); assertEquals(0, innerElement.nullCount); @@ -380,8 +384,8 @@ public class TestJsonParserArrays extends BaseTestJsonParser { assertEquals(4, fixture.read()); ValueListenerFixture a = fixture.field("a"); // Type first seen - assertEquals(JsonType.INTEGER, a.type); - assertEquals(1, a.dimCount); + assertEquals(JsonType.INTEGER, a.valueDef.type()); + assertEquals(1, a.valueDef.dimensions()); // Everything populated diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/json/parser/TestJsonParserBasics.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/parser/TestJsonParserBasics.java similarity index 74% rename from exec/java-exec/src/test/java/org/apache/drill/exec/store/json/parser/TestJsonParserBasics.java rename to exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/parser/TestJsonParserBasics.java index 4215ff5..d51e1a8 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/json/parser/TestJsonParserBasics.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/parser/TestJsonParserBasics.java @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.drill.exec.store.json.parser; +package org.apache.drill.exec.store.easy.json.parser; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; @@ -23,13 +23,17 @@ import static org.junit.Assert.assertTrue; import java.util.HashSet; -import org.apache.drill.exec.store.easy.json.parser.JsonType; +import org.apache.drill.categories.RowSetTests; +import org.apache.drill.exec.store.easy.json.parser.ObjectListener.FieldType; +import org.apache.drill.exec.store.easy.json.parser.ValueDef.JsonType; import org.junit.Test; +import org.junit.experimental.categories.Category; /** * Tests JSON structure parser functionality excluding nested objects * and arrays. Tests the "happy path." */ +@Category(RowSetTests.class) public class TestJsonParserBasics extends BaseTestJsonParser { @Test @@ -63,8 +67,8 @@ public class TestJsonParserBasics extends BaseTestJsonParser { assertEquals(1, fixture.rootObject.startCount); assertEquals(1, fixture.rootObject.fields.size()); ValueListenerFixture a = fixture.field("a"); - assertEquals(JsonType.BOOLEAN, a.type); - assertEquals(0, a.dimCount); + assertEquals(JsonType.BOOLEAN, a.valueDef.type()); + assertEquals(0, a.valueDef.dimensions()); assertEquals(0, a.nullCount); assertEquals(Boolean.TRUE, a.value); assertEquals(2, fixture.read()); @@ -80,7 +84,7 @@ public class TestJsonParserBasics extends BaseTestJsonParser { fixture.open(json); assertTrue(fixture.next()); ValueListenerFixture a = fixture.field("a"); - assertEquals(JsonType.INTEGER, a.type); + assertEquals(JsonType.INTEGER, a.valueDef.type()); assertEquals(2, fixture.read()); assertEquals(1, a.nullCount); assertEquals(100L, a.value); @@ -94,7 +98,7 @@ public class TestJsonParserBasics extends BaseTestJsonParser { fixture.open(json); assertTrue(fixture.next()); ValueListenerFixture a = fixture.field("a"); - assertEquals(JsonType.FLOAT, a.type); + assertEquals(JsonType.FLOAT, a.valueDef.type()); assertEquals(2, fixture.read()); assertEquals(1, a.nullCount); assertEquals(2, a.valueCount); @@ -111,7 +115,7 @@ public class TestJsonParserBasics extends BaseTestJsonParser { fixture.open(json); assertTrue(fixture.next()); ValueListenerFixture a = fixture.field("a"); - assertEquals(JsonType.FLOAT, a.type); + assertEquals(JsonType.FLOAT, a.valueDef.type()); assertEquals(2, fixture.read()); assertEquals(3, a.valueCount); assertEquals(Double.NEGATIVE_INFINITY, a.value); @@ -125,7 +129,7 @@ public class TestJsonParserBasics extends BaseTestJsonParser { fixture.open(json); assertTrue(fixture.next()); ValueListenerFixture a = fixture.field("a"); - assertEquals(JsonType.STRING, a.type); + assertEquals(JsonType.STRING, a.valueDef.type()); assertEquals(2, fixture.read()); assertEquals(1, a.nullCount); assertEquals(2, a.valueCount); @@ -140,7 +144,7 @@ public class TestJsonParserBasics extends BaseTestJsonParser { fixture.open(json); assertTrue(fixture.next()); ValueListenerFixture a = fixture.field("a"); - assertEquals(JsonType.STRING, a.type); + assertEquals(JsonType.STRING, a.valueDef.type()); assertEquals("hi", a.value); assertTrue(fixture.next()); assertEquals(10L, a.value); @@ -255,24 +259,68 @@ public class TestJsonParserBasics extends BaseTestJsonParser { fixture.options.allTextMode = true; fixture.open(json); - assertTrue(fixture.next()); - ValueListenerFixture a = fixture.field("a"); - assertEquals("1", a.value); + fixture.expect("a", + new Object[] {"1", "foo", "true", "20.5", null}); + assertFalse(fixture.next()); + fixture.close(); + } - assertTrue(fixture.next()); - assertEquals("foo", a.value); + @Test + public void testColumnTextMode() { + final String json = + "{a: 1} {a: \"foo\"} {a: true} {a: 20.5} {a: null}"; + JsonParserFixture fixture = new JsonParserFixture(); + fixture.rootObject.fieldType = FieldType.TEXT; + fixture.open(json); - assertTrue(fixture.next()); - assertEquals("true", a.value); + fixture.expect("a", + new Object[] {"1", "foo", "true", "20.5", null}); + assertFalse(fixture.next()); + fixture.close(); + } - assertTrue(fixture.next()); - assertEquals("20.5", a.value); - assertEquals(0, a.nullCount); + @Test + public void testJsonModeScalars() { + final String json = + "{a: 1} {a: \"foo\"} {a: true} {a: 20.5} {a: null}"; + JsonParserFixture fixture = new JsonParserFixture(); + fixture.rootObject.fieldType = FieldType.JSON; + fixture.open(json); - assertTrue(fixture.next()); - assertEquals("20.5", a.value); - assertEquals(1, a.nullCount); + fixture.expect("a", + new Object[] {"1", "\"foo\"", "true", "20.5", "null"}); + assertFalse(fixture.next()); + fixture.close(); + } + + @Test + public void testJsonModeArrays() { + final String json = + "{a: []} {a: [null]} {a: [null, null]} {a: [[]]}\n" + + "{a: [1, \"foo\", true]} {a: [[1, 2], [3, 4]]}\n"; + JsonParserFixture fixture = new JsonParserFixture(); + fixture.rootObject.fieldType = FieldType.JSON; + fixture.open(json); + + fixture.expect("a", + new Object[] {"[]", "[null]", "[null, null]", "[[]]", + "[1, \"foo\", true]", "[[1, 2], [3, 4]]"}); + assertFalse(fixture.next()); + fixture.close(); + } + + @Test + public void testJsonModeObjects() { + final String json = + "{a: {}} {a: {b: null}} {a: {b: null, b: null}}\n" + + "{a: {b: {c: {d: [{e: 10}, null, 20], f: \"foo\"}, g:30}, h: 40}}\n"; + JsonParserFixture fixture = new JsonParserFixture(); + fixture.rootObject.fieldType = FieldType.JSON; + fixture.open(json); + fixture.expect("a", + new Object[] {"{}", "{\"b\": null}", "{\"b\": null, \"b\": null}", + "{\"b\": {\"c\": {\"d\": [{\"e\": 10}, null, 20], \"f\": \"foo\"}, \"g\": 30}, \"h\": 40}"}); assertFalse(fixture.next()); fixture.close(); } diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/json/parser/TestJsonParserErrors.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/parser/TestJsonParserErrors.java similarity index 94% rename from exec/java-exec/src/test/java/org/apache/drill/exec/store/json/parser/TestJsonParserErrors.java rename to exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/parser/TestJsonParserErrors.java index 1b99ca2..2693635 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/json/parser/TestJsonParserErrors.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/parser/TestJsonParserErrors.java @@ -15,19 +15,22 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.drill.exec.store.json.parser; +package org.apache.drill.exec.store.easy.json.parser; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; +import org.apache.drill.categories.RowSetTests; import org.junit.Test; +import org.junit.experimental.categories.Category; /** * Tests the un-happy path cases in the JSON structure parser. Some * error cases can't occur because the Jackson parser catches them * first. */ +@Category(RowSetTests.class) public class TestJsonParserErrors extends BaseTestJsonParser { @Test diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/json/parser/TestJsonParserObjects.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/parser/TestJsonParserObjects.java similarity index 94% rename from exec/java-exec/src/test/java/org/apache/drill/exec/store/json/parser/TestJsonParserObjects.java rename to exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/parser/TestJsonParserObjects.java index 9a350b6..d2cca9c 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/json/parser/TestJsonParserObjects.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/parser/TestJsonParserObjects.java @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.drill.exec.store.json.parser; +package org.apache.drill.exec.store.easy.json.parser; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; @@ -23,12 +23,15 @@ import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; -import org.apache.drill.exec.store.easy.json.parser.JsonType; +import org.apache.drill.categories.RowSetTests; +import org.apache.drill.exec.store.easy.json.parser.ValueDef.JsonType; import org.junit.Test; +import org.junit.experimental.categories.Category; /** * Tests nested object support in the JSON structure parser. */ +@Category(RowSetTests.class) public class TestJsonParserObjects extends BaseTestJsonParser { @Test @@ -48,7 +51,7 @@ public class TestJsonParserObjects extends BaseTestJsonParser { assertEquals(1, custObj.startCount); assertEquals(custObj.startCount, custObj.endCount); ValueListenerFixture name = custObj.field("name"); - assertEquals(JsonType.STRING, name.type); + assertEquals(JsonType.STRING, name.valueDef.type()); assertEquals("fred", name.value); assertTrue(fixture.next()); diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/parser/TestJsonParserUnknowns.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/parser/TestJsonParserUnknowns.java new file mode 100644 index 0000000..8fa37b8 --- /dev/null +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/json/parser/TestJsonParserUnknowns.java @@ -0,0 +1,294 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.store.easy.json.parser; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNotSame; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertSame; +import static org.junit.Assert.assertTrue; + +import org.apache.drill.categories.RowSetTests; +import org.apache.drill.exec.store.easy.json.parser.ValueDef.JsonType; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +@Category(RowSetTests.class) +public class TestJsonParserUnknowns extends BaseTestJsonParser { + + /** + * Test replacing a value lister "in-flight". Handles the + * (simulated) case where the initial value is ambiguous (here, null), + * and a later token resolves the value to something concrete. + */ + @Test + public void testReplaceListener() { + final String json = "{a: null} {a: true} {a: false}"; + JsonParserFixture fixture = new JsonParserFixture(); + fixture.open(json); + assertTrue(fixture.next()); + assertEquals(1, fixture.rootObject.startCount); + assertEquals(1, fixture.rootObject.fields.size()); + ValueListenerFixture a = fixture.field("a"); + assertEquals(JsonType.NULL, a.valueDef.type()); + assertEquals(0, a.valueDef.dimensions()); + assertEquals(1, a.nullCount); + + // Replace the listener with a new one + ValueListenerFixture a2 = new ValueListenerFixture(new ValueDef(JsonType.BOOLEAN, a.valueDef.dimensions())); + a.host.bindListener(a2); + assertSame(a.host, a2.host); + + assertTrue(fixture.next()); + assertEquals(Boolean.TRUE, a2.value); + assertTrue(fixture.next()); + assertEquals(0, a2.nullCount); + assertEquals(Boolean.FALSE, a2.value); + fixture.close(); + } + + private static class SwapperElementFixture extends ValueListenerFixture { + + private final SwapperListenerFixture parent; + + public SwapperElementFixture(SwapperListenerFixture parent, ValueDef valueDef) { + super(valueDef); + this.parent = parent; + } + + @Override + public void onInt(long value) { + ValueListenerFixture newParent = parent.replace(); + newParent.array(ValueDef.UNKNOWN_ARRAY); + newParent.arrayValue.element(ValueDef.UNKNOWN); + newParent.arrayValue.element.onInt(value); + } + + } + private static class SwapperListenerFixture extends ValueListenerFixture { + + private final ObjectListenerFixture parent; + private final String key; + + public SwapperListenerFixture(ObjectListenerFixture parent, + String key, ValueDef valueDef) { + super(valueDef); + this.parent = parent; + this.key = key; + } + + @Override + public void onInt(long value) { + replace().onInt(value); + } + + @Override + public ObjectListener object() { + return replace().object(); + } + + @Override + public ArrayListener array(ValueDef valueDef) { + if (valueDef.type() == JsonType.EMPTY) { + super.array(valueDef); + arrayValue.element = new SwapperElementFixture(this, ValueDef.UNKNOWN); + return arrayValue; + } else { + return replace().array(valueDef); + } + } + + private ValueListenerFixture replace() { + return replaceWith(new ValueListenerFixture(valueDef)); + } + + private ValueListenerFixture replaceWith(ValueListenerFixture newListener) { + parent.fields.put(key, newListener); + host.bindListener(newListener); + return newListener; + } + } + + @Test + public void testNullToScalar() { + final String json = + "{a: null} {a: 2} {a: 3}"; + JsonParserFixture fixture = new JsonParserFixture(); + fixture.rootObject = new ObjectListenerFixture() { + @Override + public ValueListenerFixture makeField(String key, ValueDef valueDef) { + return new SwapperListenerFixture(this, key, valueDef); + } + }; + fixture.open(json); + + // {a: null} + assertTrue(fixture.next()); + ValueListenerFixture a = fixture.field("a"); + assertEquals(JsonType.NULL, a.valueDef.type()); + assertEquals(0, a.valueDef.dimensions()); + assertNull(a.arrayValue); + + // See a scalar, can revise estimate of field type + // {a: 2} + assertTrue(fixture.next()); + ValueListenerFixture a2 = fixture.field("a"); + assertNotSame(a, a2); + assertEquals(2L, a2.value); + assertEquals(0, a2.nullCount); + assertEquals(1, a2.valueCount); + + assertTrue(fixture.next()); + assertEquals(3L, a2.value); + + fixture.close(); + } + + @Test + public void testNullToObject() { + final String json = + "{a: null} {a: {}} {a: {b: 3}}"; + JsonParserFixture fixture = new JsonParserFixture(); + fixture.rootObject = new ObjectListenerFixture() { + @Override + public ValueListenerFixture makeField(String key, ValueDef valueDef) { + return new SwapperListenerFixture(this, key, valueDef); + } + }; + fixture.open(json); + + // {a: null} + assertTrue(fixture.next()); + ValueListenerFixture a = fixture.field("a"); + assertEquals(1, a.nullCount); + + // See an object, can revise estimate of field type + // {a: {}} + assertTrue(fixture.next()); + ValueListenerFixture a2 = fixture.field("a"); + assertNotSame(a, a2); + assertNotNull(a2.objectValue); + assertTrue(a2.objectValue.fields.isEmpty()); + + assertTrue(fixture.next()); + ValueListenerFixture b = a2.objectValue.field("b"); + assertEquals(3L, b.value); + + fixture.close(); + } + + @Test + public void testNullToEmptyArray() { + final String json = + "{a: null} {a: []} {a: []} {a: [10, 20]} {a: [30, 40]}"; + JsonParserFixture fixture = new JsonParserFixture(); + fixture.rootObject = new ObjectListenerFixture() { + @Override + public ValueListenerFixture makeField(String key, ValueDef valueDef) { + return new SwapperListenerFixture(this, key, valueDef); + } + }; + fixture.open(json); + + // {a: null} + assertTrue(fixture.next()); + ValueListenerFixture a = fixture.field("a"); + assertEquals(1, a.nullCount); + + // See an empty array, can revise estimate of field type + // {a: []} + assertTrue(fixture.next()); + assertSame(a, fixture.field("a")); + assertEquals(1, a.arrayValue.startCount); + + // Ensure things are stable + // {a: []} + assertTrue(fixture.next()); + assertSame(a, fixture.field("a")); + assertEquals(2, a.arrayValue.startCount); + + // Revise again once we see element type + // {a: [10, 20]} + assertTrue(fixture.next()); + ValueListenerFixture a2 = fixture.field("a"); + assertNotSame(a, a2); + assertEquals(3, a.arrayValue.startCount); // Start on old listener + assertEquals(0, a2.arrayValue.startCount); + assertEquals(1, a2.arrayValue.endCount); // End on new one + assertEquals(2, a2.arrayValue.element.valueCount); + assertEquals(20L, a2.arrayValue.element.value); + + // Check stability again + // {a: [30, 40]} + assertTrue(fixture.next()); + assertSame(a2, fixture.field("a")); + assertEquals(1, a2.arrayValue.startCount); + assertEquals(4, a2.arrayValue.element.valueCount); + assertEquals(40L, a2.arrayValue.element.value); + + fixture.close(); + } + + /** + * As above, but skips the intermediate empty array. + * The array, when it appears, has type info. + */ + @Test + public void testNullToArray() { + final String json = + "{a: null} {a: [10, 20]} {a: [30, 40]}"; + JsonParserFixture fixture = new JsonParserFixture(); + fixture.rootObject = new ObjectListenerFixture() { + @Override + public ValueListenerFixture makeField(String key, ValueDef valueDef) { + return new SwapperListenerFixture(this, key, valueDef); + } + }; + fixture.open(json); + + // {a: null} + assertTrue(fixture.next()); + ValueListenerFixture a = fixture.field("a"); + assertEquals(1, a.nullCount); + + // See a typed empty array, can revise estimate of field type + // {a: [10, 20]} + assertTrue(fixture.next()); + ValueListenerFixture a2 = fixture.field("a"); + assertNotSame(a, a2); + assertEquals(1, a2.arrayValue.startCount); + assertEquals(1, a2.arrayValue.startCount); + assertEquals(JsonType.INTEGER, a2.arrayValue.valueDef.type()); + assertEquals(1, a2.arrayValue.valueDef.dimensions()); + assertEquals(JsonType.INTEGER, a2.arrayValue.element.valueDef.type()); + assertEquals(0, a2.arrayValue.element.valueDef.dimensions()); + assertEquals(2, a2.arrayValue.element.valueCount); + assertEquals(20L, a2.arrayValue.element.value); + + // Ensure things are stable + // {a: [30, 40]} + assertTrue(fixture.next()); + assertSame(a2, fixture.field("a")); + assertEquals(2, a2.arrayValue.startCount); + assertEquals(4, a2.arrayValue.element.valueCount); + assertEquals(40L, a2.arrayValue.element.value); + + fixture.close(); + } +}
