Repository: asterixdb Updated Branches: refs/heads/master ab36948a6 -> 733fba8e4
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/733fba8e/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/json-parser/unsupported-type-point3d/unsupported-type-point3d.2.query.sqlpp ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/json-parser/unsupported-type-point3d/unsupported-type-point3d.2.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/json-parser/unsupported-type-point3d/unsupported-type-point3d.2.query.sqlpp new file mode 100644 index 0000000..8e43190 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/json-parser/unsupported-type-point3d/unsupported-type-point3d.2.query.sqlpp @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +USE test; + +SELECT VALUE j +FROM Json as j; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/asterixdb/blob/733fba8e/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/json-parser/unsupported-type-rectangle/unsupported-type-rectangle.1.ddl.sqlpp ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/json-parser/unsupported-type-rectangle/unsupported-type-rectangle.1.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/json-parser/unsupported-type-rectangle/unsupported-type-rectangle.1.ddl.sqlpp new file mode 100644 index 0000000..159bc0c --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/json-parser/unsupported-type-rectangle/unsupported-type-rectangle.1.ddl.sqlpp @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +DROP DATAVERSE test IF EXISTS; +CREATE DATAVERSE test; + +USE test; + +CREATE TYPE JsonType AS +{ + int_value: int, + rectangle_value: rectangle? +}; + +CREATE EXTERNAL DATASET Json(JsonType) USING localfs +( + ("path"="asterix_nc1://data/json/int-1000.json"), + ("format"="json") +) \ No newline at end of file http://git-wip-us.apache.org/repos/asf/asterixdb/blob/733fba8e/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/json-parser/unsupported-type-rectangle/unsupported-type-rectangle.2.query.sqlpp ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/json-parser/unsupported-type-rectangle/unsupported-type-rectangle.2.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/json-parser/unsupported-type-rectangle/unsupported-type-rectangle.2.query.sqlpp new file mode 100644 index 0000000..8e43190 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/json-parser/unsupported-type-rectangle/unsupported-type-rectangle.2.query.sqlpp @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +USE test; + +SELECT VALUE j +FROM Json as j; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/asterixdb/blob/733fba8e/asterixdb/asterix-app/src/test/resources/runtimets/results/json-parser/null-missing/null-missing.1.json ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/json-parser/null-missing/null-missing.1.json b/asterixdb/asterix-app/src/test/resources/runtimets/results/json-parser/null-missing/null-missing.1.json new file mode 100644 index 0000000..6955a79 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/json-parser/null-missing/null-missing.1.json @@ -0,0 +1 @@ +{ "int_value": 1000, "null_value": null } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/asterixdb/blob/733fba8e/asterixdb/asterix-app/src/test/resources/runtimets/results/json-parser/numeric-demote-double-bigint/numeric-demote-double-bigint.1.json ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/json-parser/numeric-demote-double-bigint/numeric-demote-double-bigint.1.json b/asterixdb/asterix-app/src/test/resources/runtimets/results/json-parser/numeric-demote-double-bigint/numeric-demote-double-bigint.1.json new file mode 100644 index 0000000..4701cc7 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/json-parser/numeric-demote-double-bigint/numeric-demote-double-bigint.1.json @@ -0,0 +1 @@ +150 \ No newline at end of file http://git-wip-us.apache.org/repos/asf/asterixdb/blob/733fba8e/asterixdb/asterix-app/src/test/resources/runtimets/results/json-parser/numeric-float/numeric-float.1.json ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/json-parser/numeric-float/numeric-float.1.json b/asterixdb/asterix-app/src/test/resources/runtimets/results/json-parser/numeric-float/numeric-float.1.json new file mode 100644 index 0000000..0cda3b0 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/json-parser/numeric-float/numeric-float.1.json @@ -0,0 +1 @@ +150.11 \ No newline at end of file http://git-wip-us.apache.org/repos/asf/asterixdb/blob/733fba8e/asterixdb/asterix-app/src/test/resources/runtimets/results/json-parser/numeric-promote-bigint-double/numeric-demote-double-bigint.1.json ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/json-parser/numeric-promote-bigint-double/numeric-demote-double-bigint.1.json b/asterixdb/asterix-app/src/test/resources/runtimets/results/json-parser/numeric-promote-bigint-double/numeric-demote-double-bigint.1.json new file mode 100644 index 0000000..3bac2cd --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/json-parser/numeric-promote-bigint-double/numeric-demote-double-bigint.1.json @@ -0,0 +1 @@ +1000.0 \ No newline at end of file http://git-wip-us.apache.org/repos/asf/asterixdb/blob/733fba8e/asterixdb/asterix-app/src/test/resources/runtimets/results/json-parser/numeric-tinyint/numeric-tinyint.1.json ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/json-parser/numeric-tinyint/numeric-tinyint.1.json b/asterixdb/asterix-app/src/test/resources/runtimets/results/json-parser/numeric-tinyint/numeric-tinyint.1.json new file mode 100644 index 0000000..105d7d9 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/json-parser/numeric-tinyint/numeric-tinyint.1.json @@ -0,0 +1 @@ +100 \ No newline at end of file http://git-wip-us.apache.org/repos/asf/asterixdb/blob/733fba8e/asterixdb/asterix-app/src/test/resources/runtimets/results/json-parser/spatial/spatial.1.json ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/json-parser/spatial/spatial.1.json b/asterixdb/asterix-app/src/test/resources/runtimets/results/json-parser/spatial/spatial.1.json new file mode 100644 index 0000000..94ace91 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/json-parser/spatial/spatial.1.json @@ -0,0 +1 @@ +{ "point_value": point("10.0,10.0"), "line_value": line("10.0,10.0 11.0,11.0"), "polygon_value": polygon("10.0,10.0 11.0,11.0 10.0,10.0") } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/asterixdb/blob/733fba8e/asterixdb/asterix-app/src/test/resources/runtimets/results/json-parser/temporal/temporal.1.json ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/json-parser/temporal/temporal.1.json b/asterixdb/asterix-app/src/test/resources/runtimets/results/json-parser/temporal/temporal.1.json new file mode 100644 index 0000000..cdfc51d --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/json-parser/temporal/temporal.1.json @@ -0,0 +1 @@ +{ "datefield": date("-2012-12-12"), "dateFieldPlus": date("0990-01-01"), "timeField": time("23:49:12.390Z"), "timeFieldPlus": time("03:23:12.200Z"), "datetimeField": datetime("2012-12-12T00:00:00.001Z"), "datetimeFieldPlus": datetime("-0013-08-10T22:10:15.398Z") } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/asterixdb/blob/733fba8e/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml index 71f1e15..bc919db 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml +++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml @@ -9180,4 +9180,137 @@ </compilation-unit> </test-case> </test-group> + <test-group name="json-parser"> + <test-case FilePath="json-parser"> + <compilation-unit name="numeric-tinyint"> + <output-dir compare="Text">numeric-tinyint</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="json-parser"> + <compilation-unit name="numeric-tinyint-overflow"> + <output-dir compare="Text">numeric-tinyint</output-dir> + <expected-error>Numeric value (1000) out of range of Java byte</expected-error> + </compilation-unit> + </test-case> + <test-case FilePath="json-parser"> + <compilation-unit name="numeric-float"> + <output-dir compare="Text">numeric-float</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="json-parser"> + <compilation-unit name="numeric-demote-double-bigint"> + <output-dir compare="Text">numeric-demote-double-bigint</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="json-parser"> + <compilation-unit name="numeric-promote-bigint-double"> + <output-dir compare="Text">numeric-promote-bigint-double</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="json-parser"> + <compilation-unit name="duplicate-fields"> + <output-dir compare="Text">duplicate-fields</output-dir> + <expected-error>Duplicate field 'field'</expected-error> + </compilation-unit> + </test-case> + <test-case FilePath="json-parser"> + <compilation-unit name="malformed-json"> + <output-dir compare="Text">malformed-json</output-dir> + <expected-error>Unexpected character ('}' (code 125)): was expecting double-quote to start field name</expected-error> + </compilation-unit> + </test-case> + <test-case FilePath="json-parser"> + <compilation-unit name="null-missing"> + <output-dir compare="Text">null-missing</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="json-parser"> + <compilation-unit name="nonoptional-missing"> + <output-dir compare="Text">nonoptional-missing</output-dir> + <expected-error>ASX3075: Closed field missing_value has null value</expected-error> + </compilation-unit> + </test-case> + <test-case FilePath="json-parser"> + <compilation-unit name="nonoptional-null"> + <output-dir compare="Text">nonoptional-null</output-dir> + <expected-error>ASX3075: Closed field null_value has null value</expected-error> + </compilation-unit> + </test-case> + <test-case FilePath="json-parser"> + <compilation-unit name="spatial"> + <output-dir compare="Text">spatial</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="json-parser"> + <compilation-unit name="spatial-line-3-points"> + <output-dir compare="Text">spatial-line-3-points</output-dir> + <expected-error>Line must have 4 coordinates</expected-error> + </compilation-unit> + </test-case> + <test-case FilePath="json-parser"> + <compilation-unit name="spatial-polygon-unclosed"> + <output-dir compare="Text">spatial-polygon-unclosed</output-dir> + <expected-error>Unclosed polygon is not supported</expected-error> + </compilation-unit> + </test-case> + <test-case FilePath="json-parser"> + <compilation-unit name="spatial-polygon-with-hole"> + <output-dir compare="Text">spatial-polygon-with-hole</output-dir> + <expected-error>Only simple geometries are supported (Point, LineString and Polygon without holes)</expected-error> + </compilation-unit> + </test-case> + <test-case FilePath="json-parser"> + <compilation-unit name="temporal"> + <output-dir compare="Text">temporal</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="json-parser"> + <compilation-unit name="type-mismatch"> + <output-dir compare="Text">type-mismatch</output-dir> + <expected-error>ASX3054: Mismatch Type, expecting a value of type string</expected-error> + </compilation-unit> + </test-case> + <test-case FilePath="json-parser"> + <compilation-unit name="unsupported-type-circle"> + <output-dir compare="Text">unsupported-type-circle</output-dir> + <expected-error>ASX0004: Unsupported type: org.apache.asterix.external.parser.factory.JSONDataParserFactory cannot process input type circle</expected-error> + </compilation-unit> + </test-case> + <test-case FilePath="json-parser"> + <compilation-unit name="unsupported-type-daytimeduration"> + <output-dir compare="Text">unsupported-type-daytimeduration</output-dir> + <expected-error>ASX0004: Unsupported type: org.apache.asterix.external.parser.factory.JSONDataParserFactory cannot process input type daytimeduration</expected-error> + </compilation-unit> + </test-case> + <test-case FilePath="json-parser"> + <compilation-unit name="unsupported-type-duration"> + <output-dir compare="Text">unsupported-type-duration</output-dir> + <expected-error>ASX0004: Unsupported type: org.apache.asterix.external.parser.factory.JSONDataParserFactory cannot process input type duration</expected-error> + </compilation-unit> + </test-case> + <test-case FilePath="json-parser"> + <compilation-unit name="unsupported-type-interval"> + <output-dir compare="Text">unsupported-type-interval</output-dir> + <expected-error>ASX0004: Unsupported type: org.apache.asterix.external.parser.factory.JSONDataParserFactory cannot process input type interval</expected-error> + </compilation-unit> + </test-case> + <test-case FilePath="json-parser"> + <compilation-unit name="unsupported-type-multiset"> + <output-dir compare="Text">unsupported-type-multiset</output-dir> + <expected-error>ASX0004: Unsupported type: org.apache.asterix.external.parser.factory.JSONDataParserFactory cannot process input type multiset</expected-error> + </compilation-unit> + </test-case> + <test-case FilePath="json-parser"> + <compilation-unit name="unsupported-type-point3d"> + <output-dir compare="Text">unsupported-type-point3d</output-dir> + <expected-error>ASX0004: Unsupported type: org.apache.asterix.external.parser.factory.JSONDataParserFactory cannot process input type point3d</expected-error> + </compilation-unit> + </test-case> + <test-case FilePath="json-parser"> + <compilation-unit name="unsupported-type-rectangle"> + <output-dir compare="Text">unsupported-type-rectangle</output-dir> + <expected-error>ASX0004: Unsupported type: org.apache.asterix.external.parser.factory.JSONDataParserFactory cannot process input type rectangle</expected-error> + </compilation-unit> + </test-case> + </test-group> </test-suite> http://git-wip-us.apache.org/repos/asf/asterixdb/blob/733fba8e/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java index 1c85ff9..7f0c303 100644 --- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java +++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java @@ -247,6 +247,7 @@ public class ErrorCode { public static final int METADATA_DROP_FUCTION_IN_USE = 3109; public static final int FEED_FAILED_WHILE_GETTING_A_NEW_RECORD = 3110; public static final int FEED_START_FEED_WITHOUT_CONNECTION = 3111; + public static final int PARSER_COLLECTION_ITEM_CANNOT_BE_NULL = 3112; // Lifecycle management errors public static final int DUPLICATE_PARTITION_ID = 4000; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/733fba8e/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties b/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties index 5a7cbc3..7384d14 100644 --- a/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties +++ b/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties @@ -236,6 +236,7 @@ 3109 = Function %1$s is being used. It cannot be dropped 3110 = Feed failed while reading a new record 3111 = Feed %1$s is not connected to any dataset +3112 = Array/Multiset item cannot be null # Lifecycle management errors 4000 = Partition id %1$d for node %2$s already in use by node %3$s http://git-wip-us.apache.org/repos/asf/asterixdb/blob/733fba8e/asterixdb/asterix-external-data/pom.xml ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-external-data/pom.xml b/asterixdb/asterix-external-data/pom.xml index 7cf6334..6eca5c2 100644 --- a/asterixdb/asterix-external-data/pom.xml +++ b/asterixdb/asterix-external-data/pom.xml @@ -407,5 +407,14 @@ <groupId>com.fasterxml.jackson.core</groupId> <artifactId>jackson-databind</artifactId> </dependency> + <dependency> + <groupId>com.fasterxml.jackson.core</groupId> + <artifactId>jackson-core</artifactId> + </dependency> + <dependency> + <groupId>org.apache.commons</groupId> + <artifactId>commons-collections4</artifactId> + <version>4.1</version> + </dependency> </dependencies> </project> http://git-wip-us.apache.org/repos/asf/asterixdb/blob/733fba8e/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/api/IDataParserFactory.java ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/api/IDataParserFactory.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/api/IDataParserFactory.java index b1c6428..3ac2770 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/api/IDataParserFactory.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/api/IDataParserFactory.java @@ -22,6 +22,7 @@ import java.io.Serializable; import java.util.List; import java.util.Map; +import org.apache.asterix.common.exceptions.AsterixException; import org.apache.asterix.external.api.IExternalDataSourceFactory.DataSourceType; import org.apache.asterix.om.types.ARecordType; import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException; @@ -49,8 +50,10 @@ public interface IDataParserFactory extends Serializable { * Set the record type expected to be produced by parsers created by this factory * * @param recordType + * @throws AsterixException + * if the parser does not support certain types defined in {@value recordType}. */ - public void setRecordType(ARecordType recordType); + public void setRecordType(ARecordType recordType) throws AsterixException; /** * Set the meta record type expected to be produced by parsers created by this factory http://git-wip-us.apache.org/repos/asf/asterixdb/blob/733fba8e/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AbstractNestedDataParser.java ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AbstractNestedDataParser.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AbstractNestedDataParser.java new file mode 100644 index 0000000..c6f605d --- /dev/null +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AbstractNestedDataParser.java @@ -0,0 +1,193 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.asterix.external.parser; + +import java.io.DataOutput; +import java.io.IOException; +import java.util.BitSet; + +import org.apache.asterix.common.exceptions.ErrorCode; +import org.apache.asterix.common.exceptions.RuntimeDataException; +import org.apache.asterix.om.base.AUnorderedList; +import org.apache.asterix.om.types.AOrderedListType; +import org.apache.asterix.om.types.ARecordType; +import org.apache.asterix.om.types.ATypeTag; +import org.apache.asterix.om.types.AUnionType; +import org.apache.asterix.om.types.AUnorderedListType; +import org.apache.asterix.om.types.BuiltinType; +import org.apache.asterix.om.types.IAType; +import org.apache.asterix.om.types.hierachy.ATypeHierarchy; +import org.apache.asterix.om.utils.RecordUtil; +import org.apache.hyracks.api.exceptions.HyracksDataException; + +/** + * Abstract class for nested formats (ADM, JSON, XML ... etc) + * TODO(wyk): remove extends AbstractDataParser and only take what's needed from it. + * TODO(wyk): find a way to support ADM constructors for ADMDataParser + */ +public abstract class AbstractNestedDataParser<T> extends AbstractDataParser { + + private T currentParsedToken; + + /** + * Parse object using the defined recordType. + * + * @param recordType + * {@value RecordUtil.FULLY_OPEN_RECORD_TYPE} if parsing open object + * @param out + * @throws HyracksDataException + */ + protected abstract void parseObject(ARecordType recordType, DataOutput out) throws IOException; + + /** + * Parse array using the defined listType. + * + * NOTE: currently AsterixDB only supports null values for open collection types. + * + * @param recordType + * {@value AOrderedListType.FULL_OPEN_ORDEREDLIST_TYPE} if parsing open array + * @param out + * @throws HyracksDataException + */ + protected abstract void parseArray(AOrderedListType listType, DataOutput out) throws IOException; + + /** + * Parse multiset using the defined listType. + * + * NOTE: currently AsterixDB only supports null values for open collection types. + * + * @param recordType + * {@value AUnorderedListType.FULLY_OPEN_UNORDEREDLIST_TYPE} if parsing open multiset + * @param out + * @throws HyracksDataException + */ + protected abstract void parseMultiset(AUnorderedList listType, DataOutput out) throws IOException; + + /** + * Map the third-party parser's token to {@link T} + * This method is called by nextToken to set {@link AbstractNestedDataParser#currentParsedToken} + * + * @return the corresponding token + * @throws IOException + */ + protected abstract T advanceToNextToken() throws IOException; + + public final T nextToken() throws IOException { + currentParsedToken = advanceToNextToken(); + return currentParsedToken; + } + + public final T currentToken() { + return currentParsedToken; + } + + protected boolean isNullableType(IAType definedType) { + if (definedType.getTypeTag() != ATypeTag.UNION) { + return false; + } + + return ((AUnionType) definedType).isNullableType(); + } + + protected boolean isMissableType(IAType definedType) { + if (definedType.getTypeTag() != ATypeTag.UNION) { + return false; + } + + return ((AUnionType) definedType).isMissableType(); + } + + protected void checkOptionalConstraints(ARecordType recordType, BitSet nullBitmap) throws RuntimeDataException { + for (int i = 0; i < recordType.getFieldTypes().length; i++) { + if (!nullBitmap.get(i) && !isMissableType(recordType.getFieldTypes()[i])) { + throw new RuntimeDataException(ErrorCode.PARSER_TWEET_PARSER_CLOSED_FIELD_NULL, + recordType.getFieldNames()[i]); + } + } + } + + /** + * Parser is not expecting definedType to be null. + * + * @param definedType + * type defined by the user. + * @param parsedTypeTag + * parsed type. + * @return + * definedType is nullable && parsedTypeTag == ATypeTag.NULL => return ANULL + * definedType == ANY && isComplexType => fully_open_complex_type + * definedType == ANY && isAtomicType => ANY + * defiendType == parsedTypeTag | canBeConverted => return definedType + * @throws RuntimeDataException + * type mismatch + */ + protected IAType checkAndGetType(IAType definedType, ATypeTag parsedTypeTag) throws RuntimeDataException { + //Cannot be missing + if (parsedTypeTag == ATypeTag.NULL && isNullableType(definedType)) { + return BuiltinType.ANULL; + } + + final IAType actualDefinedType = getActualType(definedType); + if (actualDefinedType.getTypeTag() == ATypeTag.ANY) { + switch (parsedTypeTag) { + case OBJECT: + return RecordUtil.FULLY_OPEN_RECORD_TYPE; + case ARRAY: + return AOrderedListType.FULL_OPEN_ORDEREDLIST_TYPE; + case MULTISET: + return AUnorderedListType.FULLY_OPEN_UNORDEREDLIST_TYPE; + default: + return BuiltinType.ANY; + } + } else if (actualDefinedType.getTypeTag() == parsedTypeTag + || isConvertable(parsedTypeTag, actualDefinedType.getTypeTag())) { + return actualDefinedType; + } + + throw new RuntimeDataException(ErrorCode.PARSER_ADM_DATA_PARSER_TYPE_MISMATCH, definedType.getTypeName()); + } + + private IAType getActualType(IAType definedType) { + if (definedType.getTypeTag() == ATypeTag.UNION) { + return ((AUnionType) definedType).getActualType(); + } + return definedType; + } + + /** + * Check promote/demote rules for mismatched types. + * String type is a special case as it can be parsed as date/time/datetime/UUID + * + * @param parsedTypeTag + * @param definedTypeTag + * @return + * true if it can be converted + * false otherwise + */ + protected boolean isConvertable(ATypeTag parsedTypeTag, ATypeTag definedTypeTag) { + boolean convertable = parsedTypeTag == ATypeTag.STRING; + + convertable &= definedTypeTag == ATypeTag.UUID || definedTypeTag == ATypeTag.DATE + || definedTypeTag == ATypeTag.TIME || definedTypeTag == ATypeTag.DATETIME; + + return convertable || ATypeHierarchy.canPromote(parsedTypeTag, definedTypeTag) + || ATypeHierarchy.canDemote(parsedTypeTag, definedTypeTag); + } + +} http://git-wip-us.apache.org/repos/asf/asterixdb/blob/733fba8e/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/JSONDataParser.java ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/JSONDataParser.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/JSONDataParser.java new file mode 100644 index 0000000..bbe4a60 --- /dev/null +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/JSONDataParser.java @@ -0,0 +1,439 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.asterix.external.parser; + +import java.io.DataOutput; +import java.io.IOException; +import java.io.InputStream; +import java.util.BitSet; + +import org.apache.asterix.builders.IARecordBuilder; +import org.apache.asterix.builders.IAsterixListBuilder; +import org.apache.asterix.common.exceptions.ErrorCode; +import org.apache.asterix.common.exceptions.RuntimeDataException; +import org.apache.asterix.external.api.IRawRecord; +import org.apache.asterix.external.api.IRecordDataParser; +import org.apache.asterix.external.api.IStreamDataParser; +import org.apache.asterix.external.parser.jackson.ADMToken; +import org.apache.asterix.external.parser.jackson.GeometryCoParser; +import org.apache.asterix.external.parser.jackson.ParserContext; +import org.apache.asterix.om.base.ABoolean; +import org.apache.asterix.om.base.ANull; +import org.apache.asterix.om.base.AUnorderedList; +import org.apache.asterix.om.types.AOrderedListType; +import org.apache.asterix.om.types.ARecordType; +import org.apache.asterix.om.types.ATypeTag; +import org.apache.asterix.om.types.AbstractCollectionType; +import org.apache.asterix.om.types.BuiltinType; +import org.apache.asterix.om.types.IAType; +import org.apache.asterix.om.utils.RecordUtil; +import org.apache.asterix.runtime.exceptions.UnsupportedTypeException; +import org.apache.hyracks.api.exceptions.HyracksDataException; +import org.apache.hyracks.data.std.api.IMutableValueStorage; + +import com.fasterxml.jackson.core.JsonFactory; +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.core.JsonToken; + +/** + * JSON format parser using Jakson parser. + */ +public class JSONDataParser extends AbstractNestedDataParser<ADMToken> + implements IStreamDataParser, IRecordDataParser<char[]> { + + protected final ParserContext parserContext; + protected final JsonFactory jsonFactory; + protected final ARecordType rootType; + protected final GeometryCoParser geometryCoParser; + + protected JsonParser jsonParser; + + /** + * Initialize JSONDataParser with GeometryCoParser + * + * @param recordType + * defined type. + * @param jsonFactory + * Jackson JSON parser factory. + */ + public JSONDataParser(ARecordType recordType, JsonFactory jsonFactory) { + // recordType currently cannot be null, however this is to guarantee for any future changes. + this.rootType = recordType != null ? recordType : RecordUtil.FULLY_OPEN_RECORD_TYPE; + this.jsonFactory = jsonFactory; + //GeometyCoParser to parse GeoJSON objects to AsterixDB internal spatial types. + geometryCoParser = new GeometryCoParser(jsonParser); + parserContext = new ParserContext(); + } + + /* + **************************************************** + * Public methods + **************************************************** + */ + + @Override + public final void parse(IRawRecord<? extends char[]> record, DataOutput out) throws HyracksDataException { + try { + //TODO(wyk): find a way to reset byte[] instead of creating a new parser for each record. + jsonParser = jsonFactory.createParser(record.get(), 0, record.size()); + geometryCoParser.reset(jsonParser); + nextToken(); + parseObject(rootType, out); + } catch (IOException e) { + throw new RuntimeDataException(ErrorCode.RECORD_READER_MALFORMED_INPUT_STREAM, e); + } + } + + @Override + public void setInputStream(InputStream in) throws IOException { + jsonParser = jsonFactory.createParser(in); + geometryCoParser.reset(jsonParser); + } + + @Override + public boolean parse(DataOutput out) throws HyracksDataException { + try { + if (nextToken() == ADMToken.EOF) { + return false; + } + parseObject(rootType, out); + return true; + } catch (IOException e) { + throw new RuntimeDataException(ErrorCode.RECORD_READER_MALFORMED_INPUT_STREAM, e); + } + } + + @Override + public boolean reset(InputStream in) throws IOException { + setInputStream(in); + return true; + } + + /* + **************************************************** + * Abstract method implementation + **************************************************** + */ + + /** + * Jackson token to ADM token mapper + */ + @Override + protected final ADMToken advanceToNextToken() throws IOException { + final JsonToken jsonToken = jsonParser.nextToken(); + if (jsonToken == null) { + return ADMToken.EOF; + } + ADMToken token; + switch (jsonToken) { + case VALUE_FALSE: + token = ADMToken.FALSE; + break; + case VALUE_TRUE: + token = ADMToken.TRUE; + break; + case VALUE_STRING: + token = ADMToken.STRING; + break; + case VALUE_NULL: + token = ADMToken.NULL; + break; + case VALUE_NUMBER_FLOAT: + token = ADMToken.DOUBLE; + break; + case VALUE_NUMBER_INT: + token = ADMToken.INT; + break; + case START_OBJECT: + token = ADMToken.OBJECT_START; + break; + case END_OBJECT: + token = ADMToken.OBJECT_END; + break; + case START_ARRAY: + token = ADMToken.ARRAY_START; + break; + case END_ARRAY: + token = ADMToken.ARRAY_END; + break; + case FIELD_NAME: + token = ADMToken.FIELD_NAME; + break; + default: + throw new RuntimeDataException(ErrorCode.TYPE_UNSUPPORTED, jsonParser.currentToken().toString()); + } + + return token; + } + /* + **************************************************** + * Overridden methods + **************************************************** + */ + + /** + * In the case of JSON, we can parse GeoJSON objects as internal AsterixDB spatial types. + */ + @Override + protected boolean isConvertable(ATypeTag parsedTypeTag, ATypeTag definedTypeTag) { + if (parsedTypeTag == ATypeTag.OBJECT && (definedTypeTag == ATypeTag.POINT || definedTypeTag == ATypeTag.LINE + || definedTypeTag == ATypeTag.POLYGON)) { + return true; + } + return super.isConvertable(parsedTypeTag, definedTypeTag); + } + + /* + **************************************************** + * Complex types parsers + **************************************************** + */ + + @Override + protected final void parseObject(ARecordType recordType, DataOutput out) throws IOException { + final IMutableValueStorage valueBuffer = parserContext.enterObject(); + final IARecordBuilder objectBuilder = parserContext.getObjectBuilder(recordType); + final BitSet nullBitMap = parserContext.getNullBitmap(recordType.getFieldTypes().length); + while (nextToken() != ADMToken.OBJECT_END) { + /* + * Jackson parser calls String.intern() for field names (if enabled). + * Calling getCurrentName() will not create multiple objects. + */ + final String fieldName = jsonParser.getCurrentName(); + final int fieldIndex = recordType.getFieldIndex(fieldName); + + if (!recordType.isOpen() && fieldIndex < 0) { + throw new RuntimeDataException(ErrorCode.PARSER_ADM_DATA_PARSER_EXTRA_FIELD_IN_CLOSED_RECORD, + fieldName); + } + valueBuffer.reset(); + nextToken(); + + if (fieldIndex < 0) { + //field is not defined and the type is open + parseValue(BuiltinType.ANY, valueBuffer.getDataOutput()); + objectBuilder.addField(parserContext.getSerializedFieldName(fieldName), valueBuffer); + } else { + //field is defined + final IAType fieldType = recordType.getFieldType(fieldName); + + //fail fast if the current field is not nullable + if (currentToken() == ADMToken.NULL && !isNullableType(fieldType)) { + throw new RuntimeDataException(ErrorCode.PARSER_TWEET_PARSER_CLOSED_FIELD_NULL, fieldName); + } + + nullBitMap.set(fieldIndex); + parseValue(fieldType, valueBuffer.getDataOutput()); + objectBuilder.addField(fieldIndex, valueBuffer); + } + } + + /* + * Check for any possible missed values for a defined (non-nullable) type. + * Throws exception if there is a violation + */ + if (nullBitMap != null) { + checkOptionalConstraints(recordType, nullBitMap); + } + parserContext.exitObject(valueBuffer, nullBitMap, objectBuilder); + objectBuilder.write(out, true); + } + + /** + * Geometry in GeoJSON is an object + * + * @param typeTag + * geometry typeTag + * @param out + * @throws IOException + */ + private void parseGeometry(ATypeTag typeTag, DataOutput out) throws IOException { + //Start the co-parser + geometryCoParser.starGeometry(); + while (nextToken() != ADMToken.OBJECT_END) { + if (currentToken() == ADMToken.FIELD_NAME) { + geometryCoParser.checkFieldName(jsonParser.getCurrentName()); + } else if (!geometryCoParser.checkValue(currentToken())) { + throw new IOException(geometryCoParser.getErrorMessage()); + } + } + + geometryCoParser.serialize(typeTag, out); + } + + @Override + protected final void parseArray(AOrderedListType listType, DataOutput out) throws IOException { + parseCollection(listType, ADMToken.ARRAY_END, out); + } + + @Override + protected void parseMultiset(AUnorderedList listType, DataOutput out) throws IOException { + throw new UnsupportedTypeException("JSON parser", ATypeTag.SERIALIZED_UNORDEREDLIST_TYPE_TAG); + } + + protected final void parseCollection(AbstractCollectionType collectionType, ADMToken endToken, DataOutput out) + throws IOException { + final IMutableValueStorage valueBuffer = parserContext.enterCollection(); + final IAsterixListBuilder arrayBuilder = parserContext.getCollectionBuilder(collectionType); + final boolean isOpen = collectionType.getItemType().getTypeTag() == ATypeTag.ANY; + while (nextToken() != endToken) { + valueBuffer.reset(); + if (isOpen) { + parseValue(BuiltinType.ANY, valueBuffer.getDataOutput()); + } else { + //fail fast if current value is null + if (currentToken() == ADMToken.NULL) { + throw new RuntimeDataException(ErrorCode.PARSER_COLLECTION_ITEM_CANNOT_BE_NULL); + } + parseValue(collectionType.getItemType(), valueBuffer.getDataOutput()); + } + arrayBuilder.addItem(valueBuffer); + } + parserContext.exitCollection(valueBuffer, arrayBuilder); + arrayBuilder.write(out, true); + } + + /* + **************************************************** + * Value parsers and serializers + **************************************************** + */ + + /** + * Parse JSON object or GeoJSON object. + * + * @param actualType + * @param out + * @throws IOException + */ + private void parseObject(IAType actualType, DataOutput out) throws IOException { + if (actualType.getTypeTag() == ATypeTag.OBJECT) { + parseObject((ARecordType) actualType, out); + } else { + parseGeometry(actualType.getTypeTag(), out); + } + } + + protected void parseValue(IAType definedType, DataOutput out) throws IOException { + final ATypeTag currentTypeTag = currentToken().getTypeTag(); + /* + * In case of type mismatch, checkAndGetType will throw an exception. + */ + final IAType actualType = checkAndGetType(definedType, currentTypeTag); + + switch (currentToken()) { + case NULL: + nullSerde.serialize(ANull.NULL, out); + break; + case FALSE: + booleanSerde.serialize(ABoolean.FALSE, out); + break; + case TRUE: + booleanSerde.serialize(ABoolean.TRUE, out); + break; + case INT: + case DOUBLE: + serailizeNumeric(actualType.getTypeTag(), out); + break; + case STRING: + serializeString(actualType.getTypeTag(), out); + break; + case OBJECT_START: + parseObject(actualType, out); + break; + case ARRAY_START: + parseArray((AOrderedListType) actualType, out); + break; + default: + throw new RuntimeDataException(ErrorCode.PARSE_ERROR, jsonParser.currentToken().toString()); + } + } + + /** + * Given that numeric values may underflow or overflow, an exception will be thrown. + * + * @param numericType + * @param out + * @throws IOException + */ + private void serailizeNumeric(ATypeTag numericType, DataOutput out) throws IOException { + final ATypeTag typeToUse = numericType == ATypeTag.ANY ? currentToken().getTypeTag() : numericType; + + switch (typeToUse) { + case BIGINT: + aInt64.setValue(jsonParser.getLongValue()); + int64Serde.serialize(aInt64, out); + break; + case INTEGER: + aInt32.setValue(jsonParser.getIntValue()); + int32Serde.serialize(aInt32, out); + break; + case SMALLINT: + aInt16.setValue(jsonParser.getShortValue()); + int16Serde.serialize(aInt16, out); + break; + case TINYINT: + aInt8.setValue(jsonParser.getByteValue()); + int8Serde.serialize(aInt8, out); + break; + case DOUBLE: + aDouble.setValue(jsonParser.getDoubleValue()); + doubleSerde.serialize(aDouble, out); + break; + case FLOAT: + aFloat.setValue(jsonParser.getFloatValue()); + floatSerde.serialize(aFloat, out); + break; + default: + throw new RuntimeDataException(ErrorCode.TYPE_UNSUPPORTED, jsonParser.currentToken().toString()); + } + } + + /** + * Serialize the string value. + * TODO(wyk) avoid String objects for type STRING + * + * @param stringVariantType + * @param out + * @throws IOException + */ + private void serializeString(ATypeTag stringVariantType, DataOutput out) throws IOException { + final String stringValue = jsonParser.getText(); + final ATypeTag typeToUse = stringVariantType == ATypeTag.ANY ? currentToken().getTypeTag() : stringVariantType; + + switch (typeToUse) { + case STRING: + aString.setValue(stringValue); + stringSerde.serialize(aString, out); + break; + case DATE: + parseDate(stringValue, out); + break; + case DATETIME: + parseDateTime(stringValue, out); + break; + case TIME: + parseTime(stringValue, out); + break; + default: + throw new RuntimeDataException(ErrorCode.TYPE_UNSUPPORTED, jsonParser.currentToken().toString()); + + } + } +} http://git-wip-us.apache.org/repos/asf/asterixdb/blob/733fba8e/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/ADMDataParserFactory.java ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/ADMDataParserFactory.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/ADMDataParserFactory.java index 489cf77..394fcb3 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/ADMDataParserFactory.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/ADMDataParserFactory.java @@ -18,24 +18,24 @@ */ package org.apache.asterix.external.parser.factory; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +import org.apache.asterix.external.api.IExternalDataSourceFactory.DataSourceType; import org.apache.asterix.external.api.IRecordDataParser; import org.apache.asterix.external.api.IStreamDataParser; -import org.apache.asterix.external.api.IExternalDataSourceFactory.DataSourceType; import org.apache.asterix.external.parser.ADMDataParser; import org.apache.asterix.external.util.ExternalDataUtils; import org.apache.asterix.om.types.ARecordType; import org.apache.hyracks.api.context.IHyracksTaskContext; import org.apache.hyracks.api.exceptions.HyracksDataException; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; - public class ADMDataParserFactory extends AbstractRecordStreamParserFactory<char[]> { private static final long serialVersionUID = 1L; - private static final List<String> parserFormats = Collections - .unmodifiableList(Arrays.asList("adm", "json", "semi-structured")); + private static final List<String> parserFormats = + Collections.unmodifiableList(Arrays.asList("adm", "semi-structured")); @Override public IRecordDataParser<char[]> createRecordParser(IHyracksTaskContext ctx) { http://git-wip-us.apache.org/repos/asf/asterixdb/blob/733fba8e/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/AbstractRecordStreamParserFactory.java ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/AbstractRecordStreamParserFactory.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/AbstractRecordStreamParserFactory.java index 8fd02dd..9352298 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/AbstractRecordStreamParserFactory.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/AbstractRecordStreamParserFactory.java @@ -20,6 +20,7 @@ package org.apache.asterix.external.parser.factory; import java.util.Map; +import org.apache.asterix.common.exceptions.AsterixException; import org.apache.asterix.external.api.IExternalDataSourceFactory.DataSourceType; import org.apache.asterix.external.api.IRecordDataParserFactory; import org.apache.asterix.external.api.IStreamDataParserFactory; @@ -44,7 +45,7 @@ public abstract class AbstractRecordStreamParserFactory<T> } @Override - public void setRecordType(ARecordType recordType) { + public void setRecordType(ARecordType recordType) throws AsterixException { this.recordType = recordType; } } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/733fba8e/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/JSONDataParserFactory.java ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/JSONDataParserFactory.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/JSONDataParserFactory.java new file mode 100644 index 0000000..501aea0 --- /dev/null +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/JSONDataParserFactory.java @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.asterix.external.parser.factory; + +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +import org.apache.asterix.common.exceptions.AsterixException; +import org.apache.asterix.common.exceptions.ErrorCode; +import org.apache.asterix.external.api.IRecordDataParser; +import org.apache.asterix.external.api.IStreamDataParser; +import org.apache.asterix.external.parser.JSONDataParser; +import org.apache.asterix.om.types.AOrderedListType; +import org.apache.asterix.om.types.ARecordType; +import org.apache.asterix.om.types.ATypeTag; +import org.apache.asterix.om.types.AUnionType; +import org.apache.asterix.om.types.IAType; +import org.apache.hyracks.api.context.IHyracksTaskContext; +import org.apache.hyracks.api.exceptions.HyracksDataException; + +import com.fasterxml.jackson.core.JsonFactory; +import com.fasterxml.jackson.core.JsonParser; + +public class JSONDataParserFactory extends AbstractRecordStreamParserFactory<char[]> { + + private static final long serialVersionUID = 1L; + private static final List<String> PARSER_FORMAT = Collections.unmodifiableList(Arrays.asList("json")); + private static final List<ATypeTag> UNSUPPORTED_TYPES = Collections + .unmodifiableList(Arrays.asList(ATypeTag.MULTISET, ATypeTag.POINT3D, ATypeTag.CIRCLE, ATypeTag.RECTANGLE, + ATypeTag.INTERVAL, ATypeTag.DAYTIMEDURATION, ATypeTag.DURATION, ATypeTag.BINARY)); + + private final JsonFactory jsonFactory; + + public JSONDataParserFactory() { + jsonFactory = new JsonFactory(); + jsonFactory.configure(JsonParser.Feature.STRICT_DUPLICATE_DETECTION, true); + jsonFactory.configure(JsonFactory.Feature.CANONICALIZE_FIELD_NAMES, true); + jsonFactory.configure(JsonFactory.Feature.INTERN_FIELD_NAMES, true); + } + + @Override + public IStreamDataParser createInputStreamParser(IHyracksTaskContext ctx, int partition) + throws HyracksDataException { + return createParser(); + } + + @Override + public void setMetaType(ARecordType metaType) { + // no MetaType to set. + } + + @Override + public List<String> getParserFormats() { + return PARSER_FORMAT; + } + + @Override + public IRecordDataParser<char[]> createRecordParser(IHyracksTaskContext ctx) throws HyracksDataException { + return createParser(); + } + + @Override + public Class<?> getRecordClass() { + return char[].class; + } + + private JSONDataParser createParser() throws HyracksDataException { + return new JSONDataParser(recordType, jsonFactory); + } + + /* + * check type compatibility before creating the parser. + */ + @Override + public void setRecordType(ARecordType recordType) throws AsterixException { + checkRecordTypeCompatibility(recordType); + super.setRecordType(recordType); + } + + /** + * Check if the defined type contains ADM special types. + * if it contains unsupported types. + * + * @param recordType + * @throws AsterixException + */ + private void checkRecordTypeCompatibility(ARecordType recordType) throws AsterixException { + final IAType[] fieldTypes = recordType.getFieldTypes(); + for (IAType type : fieldTypes) { + checkTypeCompatibility(type); + } + } + + private void checkTypeCompatibility(IAType type) throws AsterixException { + if (UNSUPPORTED_TYPES.contains(type.getTypeTag())) { + throw new AsterixException(ErrorCode.TYPE_UNSUPPORTED, JSONDataParserFactory.class.getName(), + type.getTypeTag().toString()); + } else if (type.getTypeTag() == ATypeTag.ARRAY) { + checkTypeCompatibility(((AOrderedListType) type).getItemType()); + } else if (type.getTypeTag() == ATypeTag.OBJECT) { + checkRecordTypeCompatibility((ARecordType) type); + } else if (type.getTypeTag() == ATypeTag.UNION) { + checkTypeCompatibility(((AUnionType) type).getActualType()); + } + //Compatible type + } + +} http://git-wip-us.apache.org/repos/asf/asterixdb/blob/733fba8e/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/jackson/ADMToken.java ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/jackson/ADMToken.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/jackson/ADMToken.java new file mode 100644 index 0000000..2c80288 --- /dev/null +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/jackson/ADMToken.java @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.asterix.external.parser.jackson; + +import org.apache.asterix.om.types.ATypeTag; + +public enum ADMToken { + /* + **************************************************** + * JSON format values + **************************************************** + */ + + NULL(ATypeTag.NULL), + FALSE(ATypeTag.BOOLEAN), + TRUE(ATypeTag.BOOLEAN), + INT(ATypeTag.BIGINT), //Default type of integers + DOUBLE(ATypeTag.DOUBLE), //Default type of floating-points + STRING(ATypeTag.STRING), + OBJECT_START(ATypeTag.OBJECT), + OBJECT_END, + ARRAY_START(ATypeTag.ARRAY), + ARRAY_END, + + //field name + FIELD_NAME, + + /* + **************************************************** + * ADM - Atomic value constructors + **************************************************** + */ + + //numeric constructors + TINYINT_ADM(ATypeTag.TINYINT), + INT_ADM(ATypeTag.INTEGER), + BIGINT_ADM(ATypeTag.BIGINT), + FLOAT_ADM(ATypeTag.FLOAT), + DOUBLE_ADM(ATypeTag.DOUBLE), + + //spatial + POINT_ADM(ATypeTag.POINT), + LINE_ADM(ATypeTag.LINE), + CIRCLE_ADM(ATypeTag.CIRCLE), + RECTANGLE_ADM(ATypeTag.RECTANGLE), + POLYGON_ADM(ATypeTag.POLYGON), + + //temporal + TIME_ADM(ATypeTag.TIME), + DATE_ADM(ATypeTag.DATE), + DATETIME_ADM(ATypeTag.DATETIME), + DURATION_ADM(ATypeTag.DURATION), + YEAR_MONTH_DURATION_ADM(ATypeTag.YEARMONTHDURATION), + DAY_TIME_DURATION_ADM(ATypeTag.DAYTIMEDURATION), + INTERVAL_ADM(ATypeTag.INTERVAL), + + //other + UUID_ADM(ATypeTag.UUID), + BINARY_ADM(ATypeTag.BINARY), + + /* + **************************************************** + * Parser control tokens + **************************************************** + */ + + PROCEED, + SKIP, + EOF; + + private final ATypeTag tokenMappedType; + + private ADMToken() { + tokenMappedType = null; + } + + private ADMToken(ATypeTag typeTag) { + this.tokenMappedType = typeTag; + } + + public ATypeTag getTypeTag() { + return tokenMappedType; + } +} http://git-wip-us.apache.org/repos/asf/asterixdb/blob/733fba8e/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/jackson/GeometryCoParser.java ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/jackson/GeometryCoParser.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/jackson/GeometryCoParser.java new file mode 100644 index 0000000..8f36b5a --- /dev/null +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/jackson/GeometryCoParser.java @@ -0,0 +1,315 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.asterix.external.parser.jackson; + +import java.io.DataOutput; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +import org.apache.asterix.common.exceptions.ErrorCode; +import org.apache.asterix.common.exceptions.RuntimeDataException; +import org.apache.asterix.dataflow.data.nontagged.serde.APointSerializerDeserializer; +import org.apache.asterix.om.types.ATypeTag; + +import com.fasterxml.jackson.core.JsonParser; + +/** + * Co-parser for JsonDataParser to help parse GeoJSON objects. + * It only supports POINT, LINE (with only two points) and Polygon. + * The co-parser can parse GeoJSON geometry object as in {@link https://tools.ietf.org/html/rfc7946} + * and converts it into a supported AsterixDB geometry type. + * + * Example: + * { ..., "geometry":{"type":"Point", "coordinates":[1.0, 1.0]}, ...} + * will be: + * { ..., "geometry":point("1.0, 1.0"), ...} + */ +public class GeometryCoParser { + + //Geometry object fields + private static final String DEFAULT_GEOMETERY_KEY = "geometry"; + private static final String COORDINATES_KEY = "coordinates"; + private static final String TYPE_KEY = "type"; + + //Supported geometry + private static final List<String> SUPPORTED_GEOMETRY = + Collections.unmodifiableList(Arrays.asList("Point", "LineString", "Polygon")); + private static final List<ATypeTag> SUPPORTED_GEOMETRY_TYPETAG = + Collections.unmodifiableList(Arrays.asList(ATypeTag.POINT, ATypeTag.LINE, ATypeTag.POLYGON)); + + //Error messages + private static final String INVALID_GEOMETRY = "Invalid GeoJSON geometry object"; + private static final String UNSUPPORTED_GEOMETRY = "Unsupported geometry type "; + + private static final int POINT = 0; + private static final int LINE = 1; + private static final int POLYGON = 2; + + private final List<Double> coordinates; + private JsonParser jsonParser; + private String currentField; + private int geometryType; + private String geometryTypeString; + + private int currentCoordinateLevel; + private int coordinateValueLevel; + private int coordinatesCounter; + private String errorMsg; + + /** + * @param jsonParser + * @param geometryFieldName + * override the default geometry + * field name {@value GeometryCoParser#DEFAULT_GEOMETERY_KEY} + */ + public GeometryCoParser(JsonParser jsonParser) { + this.jsonParser = jsonParser; + coordinates = new ArrayList<>(); + currentField = null; + geometryTypeString = null; + geometryType = -1; + currentCoordinateLevel = 0; + coordinateValueLevel = 0; + coordinatesCounter = 0; + errorMsg = null; + } + + /* + **************************************************** + * Public methods + **************************************************** + */ + + /** + * Check the field name if it's one of GeoJSON know fields + * + * @param fieldName + * @return + * returns true if the field name equals {@value GeometryCoParser#geometryFieldName}, + * which marks the start of a geometry object. + */ + public void checkFieldName(String fieldName) { + if (COORDINATES_KEY.equals(fieldName)) { + currentField = COORDINATES_KEY; + } else if (TYPE_KEY.equals(fieldName)) { + currentField = TYPE_KEY; + } else { + reset("Invalid geometry object"); + } + } + + /** + * Given the state of the current token in a geometry object, parse the potential values depends + * on the current field name. + * In the case of failure, the co-parser will stop parsing without throwing exception. + * + * @param token + * @throws IOException + * @return + * true: if it's an expected value. + * false: otherwise. + */ + public boolean checkValue(ADMToken token) throws IOException { + if (currentField == null) { + return false; + } + if (currentField == DEFAULT_GEOMETERY_KEY && token != ADMToken.OBJECT_START) { + reset(INVALID_GEOMETRY); + } else if (currentField == COORDINATES_KEY) { + parseCoordinates(token); + } else if (currentField == TYPE_KEY) { + if (token != ADMToken.STRING) { + //unexpected token + reset(INVALID_GEOMETRY); + } else { + geometryTypeString = jsonParser.getValueAsString(); + geometryType = SUPPORTED_GEOMETRY.indexOf(geometryTypeString); + if (geometryType < 0) { + reset(UNSUPPORTED_GEOMETRY + geometryTypeString); + } + } + } + return currentField != null; + } + + /** + * To begin parsing a defined geometry object. + */ + public void starGeometry() { + reset(""); + currentField = DEFAULT_GEOMETERY_KEY; + } + + /** + * Serialize the parsed geometry + * + * @param typeTag + * The expected typeTag of the geometry type. + * @param out + * @throws IOException + * an exception will be thrown in case of failure or type mismatch. + */ + public void serialize(ATypeTag typeTag, DataOutput out) throws IOException { + if (!isValidGeometry()) { + throw new IOException(errorMsg); + } else if (typeTag != SUPPORTED_GEOMETRY_TYPETAG.get(geometryType)) { + throw new RuntimeDataException(ErrorCode.PARSER_ADM_DATA_PARSER_TYPE_MISMATCH, typeTag); + } + + switch (geometryType) { + case POINT: + serializePoint(out); + break; + case LINE: + serializeLine(out); + break; + case POLYGON: + serializePolygon(out); + break; + default: + break; + } + } + + public String getErrorMessage() { + return errorMsg; + } + + /** + * Reset for a new record to be parsed. + * + * @param jsonParser + */ + public void reset(JsonParser jsonParser) { + reset(""); + this.jsonParser = jsonParser; + } + + /* + **************************************************** + * Helper methods + **************************************************** + */ + + /** + * Parse coordinates values. + * + * @param token + * @throws IOException + */ + private void parseCoordinates(ADMToken token) throws IOException { + if (token == ADMToken.DOUBLE) { + if (++coordinatesCounter > 2) { + //A point must have 2 coordinates + reset(INVALID_GEOMETRY); + } + + coordinates.add(jsonParser.getDoubleValue()); + + if (coordinateValueLevel == 0) { + coordinateValueLevel = currentCoordinateLevel; + } + } else if (token == ADMToken.ARRAY_START) { + currentCoordinateLevel++; + if (coordinateValueLevel - (currentCoordinateLevel - 1) > 1) { + reset("Only simple geometries are supported (Point, LineString and Polygon without holes)"); + } + } else if (token == ADMToken.ARRAY_END) { + currentCoordinateLevel--; + coordinatesCounter = 0; + } else { + //unexpected token + reset(INVALID_GEOMETRY); + } + } + + private void reset(String errorMsg) { + coordinates.clear(); + geometryType = -1; + currentField = null; + coordinatesCounter = 0; + coordinateValueLevel = 0; + currentCoordinateLevel = 0; + this.errorMsg = errorMsg; + } + + private boolean isValidGeometry() { + boolean valid; + switch (geometryType) { + case POINT: + valid = coordinateValueLevel == 1 && coordinates.size() == 2; + errorMsg = valid ? null : "Point must have 2 coordinates"; + break; + case LINE: + valid = coordinateValueLevel == 2 && coordinates.size() == 4; + errorMsg = valid ? null : "Line must have 4 coordinates"; + break; + case POLYGON: + valid = isValidPolygon(); + break; + default: + valid = false; + errorMsg = UNSUPPORTED_GEOMETRY + geometryTypeString; + } + + return valid; + } + + private boolean isValidPolygon() { + /* + * A valid polygon should have at least 3 points and should start and end at the same point. + */ + final int size = coordinates.size(); + if (size < 5) { + errorMsg = "Polygon must consists of at least 3 points (6 coordinates)"; + return false; + } else if (coordinateValueLevel != 3) { + errorMsg = "MultiPolygon is not supported"; + } else if (!(coordinates.get(0).equals(coordinates.get(size - 2)) + && coordinates.get(1).equals(coordinates.get(size - 1)))) { + errorMsg = "Unclosed polygon is not supported"; + return false; + } + return true; + } + + private void serializePoint(DataOutput out) throws IOException { + out.writeByte(ATypeTag.SERIALIZED_POINT_TYPE_TAG); + APointSerializerDeserializer.serialize(coordinates.get(0), coordinates.get(1), out); + } + + private void serializeLine(DataOutput out) throws IOException { + out.writeByte(ATypeTag.SERIALIZED_LINE_TYPE_TAG); + APointSerializerDeserializer.serialize(coordinates.get(0), coordinates.get(1), out); + APointSerializerDeserializer.serialize(coordinates.get(2), coordinates.get(3), out); + } + + private void serializePolygon(DataOutput out) throws IOException { + out.writeByte(ATypeTag.SERIALIZED_POLYGON_TYPE_TAG); + out.writeShort(coordinates.size() / 2); + for (int i = 0; i < coordinates.size(); i += 2) { + APointSerializerDeserializer.serialize(coordinates.get(i), coordinates.get(i + 1), out); + } + + } + +} http://git-wip-us.apache.org/repos/asf/asterixdb/blob/733fba8e/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/jackson/ObjectPool.java ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/jackson/ObjectPool.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/jackson/ObjectPool.java new file mode 100644 index 0000000..5ccd9e9 --- /dev/null +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/jackson/ObjectPool.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.asterix.external.parser.jackson; + +import java.util.ArrayDeque; +import java.util.Queue; + +import org.apache.asterix.om.util.container.IObjectFactory; + +/** + * Object pool for DFS traversal mode, which allows to recycle objects + * as soon as it is not needed. + */ +public class ObjectPool<E, T> { + private final IObjectFactory<E, T> objectFactory; + private final Queue<E> recycledObjects; + private final T element; + + public ObjectPool() { + this(null, null); + } + + public ObjectPool(IObjectFactory<E, T> objectFactory) { + this(objectFactory, null); + } + + public ObjectPool(IObjectFactory<E, T> objectFactory, T element) { + this.objectFactory = objectFactory; + recycledObjects = new ArrayDeque<>(); + this.element = element; + } + + public E getInstance() { + E instance = recycledObjects.poll(); + if (objectFactory != null && instance == null) { + instance = objectFactory.create(element); + } + return instance; + } + + public void recycle(E object) { + if (object != null) { + recycledObjects.add(object); + } + } +} http://git-wip-us.apache.org/repos/asf/asterixdb/blob/733fba8e/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/jackson/ParserContext.java ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/jackson/ParserContext.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/jackson/ParserContext.java new file mode 100644 index 0000000..387ba7f --- /dev/null +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/jackson/ParserContext.java @@ -0,0 +1,143 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.asterix.external.parser.jackson; + +import java.io.IOException; +import java.util.BitSet; +import java.util.Map; + +import org.apache.asterix.builders.AbvsBuilderFactory; +import org.apache.asterix.builders.IARecordBuilder; +import org.apache.asterix.builders.IAsterixListBuilder; +import org.apache.asterix.builders.ListBuilderFactory; +import org.apache.asterix.builders.RecordBuilderFactory; +import org.apache.asterix.external.parser.AbstractNestedDataParser; +import org.apache.asterix.formats.nontagged.SerializerDeserializerProvider; +import org.apache.asterix.om.base.AMutableString; +import org.apache.asterix.om.base.AString; +import org.apache.asterix.om.types.ARecordType; +import org.apache.asterix.om.types.ATypeTag; +import org.apache.asterix.om.types.AbstractCollectionType; +import org.apache.commons.collections4.map.LRUMap; +import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer; +import org.apache.hyracks.api.exceptions.HyracksDataException; +import org.apache.hyracks.data.std.api.IMutableValueStorage; +import org.apache.hyracks.data.std.util.ArrayBackedValueStorage; + +/** + * A state class that helps parsers of class {@link AbstractNestedDataParser} to maintain + * its temporary buffers. + */ +public class ParserContext { + private static final int SERIALIZED_FIELDNAME_MAP_MAX_SIZE = 128; + + private final ObjectPool<IARecordBuilder, ATypeTag> objectBuilderPool; + private final ObjectPool<IAsterixListBuilder, ATypeTag> arrayBuilderPool; + + /** + * Parsing nested structure using temporary buffers is expensive. + * Example: + * {"a":{"b":{"c":{"d":5}}}} + * + * Scalar value 5 is written 4 times in tempBuffer("d") then tempBuffer("c") ... tempBuffer("a") + */ + private final ObjectPool<IMutableValueStorage, ATypeTag> tempBufferPool; + private final ObjectPool<BitSet, Void> nullBitmapPool; + private final Map<String, IMutableValueStorage> serializedFieldNames; + private final ISerializerDeserializer<AString> stringSerDe; + private final AMutableString aString; + + @SuppressWarnings("unchecked") + public ParserContext() { + objectBuilderPool = new ObjectPool<>(new RecordBuilderFactory()); + arrayBuilderPool = new ObjectPool<>(new ListBuilderFactory(), ATypeTag.ARRAY); + tempBufferPool = new ObjectPool<>(new AbvsBuilderFactory()); + nullBitmapPool = new ObjectPool<>(); + serializedFieldNames = new LRUMap<>(SERIALIZED_FIELDNAME_MAP_MAX_SIZE); + stringSerDe = SerializerDeserializerProvider.INSTANCE.getAStringSerializerDeserializer(); + aString = new AMutableString(""); + } + + public IMutableValueStorage enterObject() { + return tempBufferPool.getInstance(); + } + + public BitSet getNullBitmap(int size) { + if (size < 1) { + return null; + } + + BitSet nullBitMap = nullBitmapPool.getInstance(); + if (nullBitMap == null) { + nullBitMap = new BitSet(size); + } + return nullBitMap; + } + + public IARecordBuilder getObjectBuilder(ARecordType recordType) { + IARecordBuilder builder = objectBuilderPool.getInstance(); + builder.reset(recordType); + return builder; + } + + /** + * Experimental. + * Check if too many serialization for the same field names can be expensive or not. + * + * @param fieldName + * @return + * @throws HyracksDataException + */ + public IMutableValueStorage getSerializedFieldName(String fieldName) throws IOException { + IMutableValueStorage serializedFieldName = serializedFieldNames.get(fieldName); + if (serializedFieldName == null) { + serializedFieldName = new ArrayBackedValueStorage(); + serializedFieldName.reset(); + aString.setValue(fieldName); + stringSerDe.serialize(aString, serializedFieldName.getDataOutput()); + serializedFieldNames.put(fieldName, serializedFieldName); + } + return serializedFieldName; + } + + public void exitObject(IMutableValueStorage tempBuffer, BitSet nullBitmap, IARecordBuilder builder) { + tempBufferPool.recycle(tempBuffer); + objectBuilderPool.recycle(builder); + if (nullBitmap != null) { + nullBitmap.clear(); + nullBitmapPool.recycle(nullBitmap); + } + } + + public IMutableValueStorage enterCollection() { + return tempBufferPool.getInstance(); + } + + public IAsterixListBuilder getCollectionBuilder(AbstractCollectionType collectionType) { + IAsterixListBuilder builder = arrayBuilderPool.getInstance(); + builder.reset(collectionType); + return builder; + } + + public void exitCollection(IMutableValueStorage tempBuffer, IAsterixListBuilder builder) { + tempBufferPool.recycle(tempBuffer); + arrayBuilderPool.recycle(builder); + } + +} http://git-wip-us.apache.org/repos/asf/asterixdb/blob/733fba8e/asterixdb/asterix-external-data/src/main/resources/META-INF/services/org.apache.asterix.external.api.IDataParserFactory ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-external-data/src/main/resources/META-INF/services/org.apache.asterix.external.api.IDataParserFactory b/asterixdb/asterix-external-data/src/main/resources/META-INF/services/org.apache.asterix.external.api.IDataParserFactory index 79289b0..7ce2048 100644 --- a/asterixdb/asterix-external-data/src/main/resources/META-INF/services/org.apache.asterix.external.api.IDataParserFactory +++ b/asterixdb/asterix-external-data/src/main/resources/META-INF/services/org.apache.asterix.external.api.IDataParserFactory @@ -16,6 +16,7 @@ # under the License. # org.apache.asterix.external.parser.factory.ADMDataParserFactory +org.apache.asterix.external.parser.factory.JSONDataParserFactory org.apache.asterix.external.parser.factory.DelimitedDataParserFactory org.apache.asterix.external.parser.factory.HiveDataParserFactory org.apache.asterix.external.parser.factory.RecordWithMetadataParserFactory http://git-wip-us.apache.org/repos/asf/asterixdb/blob/733fba8e/asterixdb/asterix-om/src/main/java/org/apache/asterix/builders/IAsterixListBuilder.java ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/builders/IAsterixListBuilder.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/builders/IAsterixListBuilder.java index 5aa95ef..9a7d0c4ee 100644 --- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/builders/IAsterixListBuilder.java +++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/builders/IAsterixListBuilder.java @@ -29,7 +29,7 @@ public interface IAsterixListBuilder { * @param listType * Type of the list: AUnorderedListType or AOrderedListType. */ - public void reset(AbstractCollectionType listType) throws HyracksDataException; + public void reset(AbstractCollectionType listType); /** * @param item
