[GitHub] spark pull request #22745: [SPARK-21402][SQL][FOLLOW-UP] Fix java map of str...

2018-10-18 Thread vofque
Github user vofque commented on a diff in the pull request:

https://github.com/apache/spark/pull/22745#discussion_r226187880
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
 ---
@@ -509,3 +509,24 @@ case class UnresolvedOrdinal(ordinal: Int)
   override def nullable: Boolean = throw new UnresolvedException(this, 
"nullable")
   override lazy val resolved = false
 }
+
+/**
+ * When constructing `Invoke`, the data type must be given, which may be 
not possible to define
+ * before analysis. This class acts like a placeholder for `Invoke`, and 
will be replaced by
+ * `Invoke` during analysis after the input data is resolved. Data type 
passed to `Invoke``
+ * will be defined by applying `dataTypeFunction` to the data type of the 
input data.
+ */
+case class UnresolvedInvoke(
--- End diff --

I had such doubts too. OK.


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] spark pull request #22745: [SPARK-21402][SQL][FOLLOW-UP] Fix java map of str...

2018-10-17 Thread cloud-fan
Github user cloud-fan commented on a diff in the pull request:

https://github.com/apache/spark/pull/22745#discussion_r226149644
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
 ---
@@ -509,3 +509,24 @@ case class UnresolvedOrdinal(ordinal: Int)
   override def nullable: Boolean = throw new UnresolvedException(this, 
"nullable")
   override lazy val resolved = false
 }
+
+/**
+ * When constructing `Invoke`, the data type must be given, which may be 
not possible to define
+ * before analysis. This class acts like a placeholder for `Invoke`, and 
will be replaced by
+ * `Invoke` during analysis after the input data is resolved. Data type 
passed to `Invoke``
+ * will be defined by applying `dataTypeFunction` to the data type of the 
input data.
+ */
+case class UnresolvedInvoke(
--- End diff --

I feel this is too general. Maybe we should just create a new expression 
`GetArrayFromMap` and resolve it to `Invoke` later.


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] spark pull request #22745: [SPARK-21402][SQL][FOLLOW-UP] Fix java map of str...

2018-10-17 Thread vofque
Github user vofque commented on a diff in the pull request:

https://github.com/apache/spark/pull/22745#discussion_r225810882
  
--- Diff: 
sql/core/src/test/java/test/org/apache/spark/sql/JavaBeanWithMapSuite.java ---
@@ -0,0 +1,257 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package test.org.apache.spark.sql;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Encoder;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.test.TestSparkSession;
+import org.apache.spark.sql.types.DataType;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.MapType;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+
+public class JavaBeanWithMapSuite {
+
+private static final List RECORDS = new ArrayList<>();
+
+static {
+RECORDS.add(new Record(1,
+toMap(
+Arrays.asList("a", "b"),
+Arrays.asList(new Interval(111, 211), new 
Interval(121, 221))
+),
+toMap(Arrays.asList("a", "b", "c"), Arrays.asList(11, 21, 
31))
+));
+RECORDS.add(new Record(2,
+toMap(
+Arrays.asList("a", "b"),
+Arrays.asList(new Interval(112, 212), new 
Interval(122, 222))
+),
+toMap(Arrays.asList("a", "b", "c"), Arrays.asList(12, 22, 
32))
+));
+RECORDS.add(new Record(3,
+toMap(
+Arrays.asList("a", "b"),
+Arrays.asList(new Interval(113, 213), new 
Interval(123, 223))
+),
+toMap(Arrays.asList("a", "b", "c"), Arrays.asList(13, 23, 
33))
+));
+}
+
+private static  Map toMap(Collection keys, 
Collection values) {
+Map map = new HashMap<>();
+Iterator keyI = keys.iterator();
+Iterator valueI = values.iterator();
+while (keyI.hasNext() && valueI.hasNext()) {
+map.put(keyI.next(), valueI.next());
+}
+return map;
+}
+
+private TestSparkSession spark;
+
+@Before
+public void setUp() {
+spark = new TestSparkSession();
+}
+
+@After
+public void tearDown() {
+spark.stop();
+spark = null;
+}
+
+@Test
+public void testBeanWithMapFieldsDeserialization() {
+
+StructType schema = createSchema();
+Encoder encoder = Encoders.bean(Record.class);
+
+Dataset dataset = spark
+.read()
+.format("json")
+.schema(schema)
+.load("src/test/resources/test-data/with-map-fields")
+.as(encoder);
+
+List records = dataset.collectAsList();
+
+Assert.assertTrue(Util.equals(records, RECORDS));
+}
+
+private static StructType createSchema() {
+StructField[] intervalFields = {
+new StructField("startTime", DataTypes.LongType, true, 
Metadata.empty()),
+new StructField("endTime", DataTypes.LongType, true, 
Metadata.empty())
+};
+DataType intervalType = new StructType(intervalFields);
+
+DataType intervalsType = new MapType(DataTypes.StringType, 
intervalType, true);
+
+DataType valuesType = new MapType(DataTypes.StringType, 
DataTypes.IntegerT

[GitHub] spark pull request #22745: [SPARK-21402][SQL][FOLLOW-UP] Fix java map of str...

2018-10-16 Thread viirya
Github user viirya commented on a diff in the pull request:

https://github.com/apache/spark/pull/22745#discussion_r225768707
  
--- Diff: 
sql/core/src/test/java/test/org/apache/spark/sql/JavaBeanWithMapSuite.java ---
@@ -0,0 +1,257 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package test.org.apache.spark.sql;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Encoder;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.test.TestSparkSession;
+import org.apache.spark.sql.types.DataType;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.MapType;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+
+public class JavaBeanWithMapSuite {
+
+private static final List RECORDS = new ArrayList<>();
+
+static {
+RECORDS.add(new Record(1,
+toMap(
+Arrays.asList("a", "b"),
+Arrays.asList(new Interval(111, 211), new 
Interval(121, 221))
+),
+toMap(Arrays.asList("a", "b", "c"), Arrays.asList(11, 21, 
31))
+));
+RECORDS.add(new Record(2,
+toMap(
+Arrays.asList("a", "b"),
+Arrays.asList(new Interval(112, 212), new 
Interval(122, 222))
+),
+toMap(Arrays.asList("a", "b", "c"), Arrays.asList(12, 22, 
32))
+));
+RECORDS.add(new Record(3,
+toMap(
+Arrays.asList("a", "b"),
+Arrays.asList(new Interval(113, 213), new 
Interval(123, 223))
+),
+toMap(Arrays.asList("a", "b", "c"), Arrays.asList(13, 23, 
33))
+));
+}
+
+private static  Map toMap(Collection keys, 
Collection values) {
+Map map = new HashMap<>();
+Iterator keyI = keys.iterator();
+Iterator valueI = values.iterator();
+while (keyI.hasNext() && valueI.hasNext()) {
+map.put(keyI.next(), valueI.next());
+}
+return map;
+}
+
+private TestSparkSession spark;
+
+@Before
+public void setUp() {
+spark = new TestSparkSession();
+}
+
+@After
+public void tearDown() {
+spark.stop();
+spark = null;
+}
+
+@Test
+public void testBeanWithMapFieldsDeserialization() {
+
+StructType schema = createSchema();
+Encoder encoder = Encoders.bean(Record.class);
+
+Dataset dataset = spark
+.read()
+.format("json")
+.schema(schema)
+.load("src/test/resources/test-data/with-map-fields")
+.as(encoder);
+
+List records = dataset.collectAsList();
+
+Assert.assertTrue(Util.equals(records, RECORDS));
+}
+
+private static StructType createSchema() {
+StructField[] intervalFields = {
+new StructField("startTime", DataTypes.LongType, true, 
Metadata.empty()),
+new StructField("endTime", DataTypes.LongType, true, 
Metadata.empty())
+};
+DataType intervalType = new StructType(intervalFields);
+
+DataType intervalsType = new MapType(DataTypes.StringType, 
intervalType, true);
+
+DataType valuesType = new MapType(DataTypes.StringType, 
DataTypes.IntegerT