[GitHub] spark pull request #22745: [SPARK-21402][SQL][FOLLOW-UP] Fix java map of str...
Github user vofque commented on a diff in the pull request: https://github.com/apache/spark/pull/22745#discussion_r226187880 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala --- @@ -509,3 +509,24 @@ case class UnresolvedOrdinal(ordinal: Int) override def nullable: Boolean = throw new UnresolvedException(this, "nullable") override lazy val resolved = false } + +/** + * When constructing `Invoke`, the data type must be given, which may be not possible to define + * before analysis. This class acts like a placeholder for `Invoke`, and will be replaced by + * `Invoke` during analysis after the input data is resolved. Data type passed to `Invoke`` + * will be defined by applying `dataTypeFunction` to the data type of the input data. + */ +case class UnresolvedInvoke( --- End diff -- I had such doubts too. OK. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #22745: [SPARK-21402][SQL][FOLLOW-UP] Fix java map of str...
Github user cloud-fan commented on a diff in the pull request: https://github.com/apache/spark/pull/22745#discussion_r226149644 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala --- @@ -509,3 +509,24 @@ case class UnresolvedOrdinal(ordinal: Int) override def nullable: Boolean = throw new UnresolvedException(this, "nullable") override lazy val resolved = false } + +/** + * When constructing `Invoke`, the data type must be given, which may be not possible to define + * before analysis. This class acts like a placeholder for `Invoke`, and will be replaced by + * `Invoke` during analysis after the input data is resolved. Data type passed to `Invoke`` + * will be defined by applying `dataTypeFunction` to the data type of the input data. + */ +case class UnresolvedInvoke( --- End diff -- I feel this is too general. Maybe we should just create a new expression `GetArrayFromMap` and resolve it to `Invoke` later. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #22745: [SPARK-21402][SQL][FOLLOW-UP] Fix java map of str...
Github user vofque commented on a diff in the pull request: https://github.com/apache/spark/pull/22745#discussion_r225810882 --- Diff: sql/core/src/test/java/test/org/apache/spark/sql/JavaBeanWithMapSuite.java --- @@ -0,0 +1,257 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package test.org.apache.spark.sql; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoder; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.test.TestSparkSession; +import org.apache.spark.sql.types.DataType; +import org.apache.spark.sql.types.DataTypes; +import org.apache.spark.sql.types.MapType; +import org.apache.spark.sql.types.Metadata; +import org.apache.spark.sql.types.StructField; +import org.apache.spark.sql.types.StructType; + +public class JavaBeanWithMapSuite { + +private static final List RECORDS = new ArrayList<>(); + +static { +RECORDS.add(new Record(1, +toMap( +Arrays.asList("a", "b"), +Arrays.asList(new Interval(111, 211), new Interval(121, 221)) +), +toMap(Arrays.asList("a", "b", "c"), Arrays.asList(11, 21, 31)) +)); +RECORDS.add(new Record(2, +toMap( +Arrays.asList("a", "b"), +Arrays.asList(new Interval(112, 212), new Interval(122, 222)) +), +toMap(Arrays.asList("a", "b", "c"), Arrays.asList(12, 22, 32)) +)); +RECORDS.add(new Record(3, +toMap( +Arrays.asList("a", "b"), +Arrays.asList(new Interval(113, 213), new Interval(123, 223)) +), +toMap(Arrays.asList("a", "b", "c"), Arrays.asList(13, 23, 33)) +)); +} + +private static Map toMap(Collection keys, Collection values) { +Map map = new HashMap<>(); +Iterator keyI = keys.iterator(); +Iterator valueI = values.iterator(); +while (keyI.hasNext() && valueI.hasNext()) { +map.put(keyI.next(), valueI.next()); +} +return map; +} + +private TestSparkSession spark; + +@Before +public void setUp() { +spark = new TestSparkSession(); +} + +@After +public void tearDown() { +spark.stop(); +spark = null; +} + +@Test +public void testBeanWithMapFieldsDeserialization() { + +StructType schema = createSchema(); +Encoder encoder = Encoders.bean(Record.class); + +Dataset dataset = spark +.read() +.format("json") +.schema(schema) +.load("src/test/resources/test-data/with-map-fields") +.as(encoder); + +List records = dataset.collectAsList(); + +Assert.assertTrue(Util.equals(records, RECORDS)); +} + +private static StructType createSchema() { +StructField[] intervalFields = { +new StructField("startTime", DataTypes.LongType, true, Metadata.empty()), +new StructField("endTime", DataTypes.LongType, true, Metadata.empty()) +}; +DataType intervalType = new StructType(intervalFields); + +DataType intervalsType = new MapType(DataTypes.StringType, intervalType, true); + +DataType valuesType = new MapType(DataTypes.StringType, DataTypes.IntegerT
[GitHub] spark pull request #22745: [SPARK-21402][SQL][FOLLOW-UP] Fix java map of str...
Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/22745#discussion_r225768707 --- Diff: sql/core/src/test/java/test/org/apache/spark/sql/JavaBeanWithMapSuite.java --- @@ -0,0 +1,257 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package test.org.apache.spark.sql; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoder; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.test.TestSparkSession; +import org.apache.spark.sql.types.DataType; +import org.apache.spark.sql.types.DataTypes; +import org.apache.spark.sql.types.MapType; +import org.apache.spark.sql.types.Metadata; +import org.apache.spark.sql.types.StructField; +import org.apache.spark.sql.types.StructType; + +public class JavaBeanWithMapSuite { + +private static final List RECORDS = new ArrayList<>(); + +static { +RECORDS.add(new Record(1, +toMap( +Arrays.asList("a", "b"), +Arrays.asList(new Interval(111, 211), new Interval(121, 221)) +), +toMap(Arrays.asList("a", "b", "c"), Arrays.asList(11, 21, 31)) +)); +RECORDS.add(new Record(2, +toMap( +Arrays.asList("a", "b"), +Arrays.asList(new Interval(112, 212), new Interval(122, 222)) +), +toMap(Arrays.asList("a", "b", "c"), Arrays.asList(12, 22, 32)) +)); +RECORDS.add(new Record(3, +toMap( +Arrays.asList("a", "b"), +Arrays.asList(new Interval(113, 213), new Interval(123, 223)) +), +toMap(Arrays.asList("a", "b", "c"), Arrays.asList(13, 23, 33)) +)); +} + +private static Map toMap(Collection keys, Collection values) { +Map map = new HashMap<>(); +Iterator keyI = keys.iterator(); +Iterator valueI = values.iterator(); +while (keyI.hasNext() && valueI.hasNext()) { +map.put(keyI.next(), valueI.next()); +} +return map; +} + +private TestSparkSession spark; + +@Before +public void setUp() { +spark = new TestSparkSession(); +} + +@After +public void tearDown() { +spark.stop(); +spark = null; +} + +@Test +public void testBeanWithMapFieldsDeserialization() { + +StructType schema = createSchema(); +Encoder encoder = Encoders.bean(Record.class); + +Dataset dataset = spark +.read() +.format("json") +.schema(schema) +.load("src/test/resources/test-data/with-map-fields") +.as(encoder); + +List records = dataset.collectAsList(); + +Assert.assertTrue(Util.equals(records, RECORDS)); +} + +private static StructType createSchema() { +StructField[] intervalFields = { +new StructField("startTime", DataTypes.LongType, true, Metadata.empty()), +new StructField("endTime", DataTypes.LongType, true, Metadata.empty()) +}; +DataType intervalType = new StructType(intervalFields); + +DataType intervalsType = new MapType(DataTypes.StringType, intervalType, true); + +DataType valuesType = new MapType(DataTypes.StringType, DataTypes.IntegerT