MaxGekk commented on code in PR #48397:
URL: https://github.com/apache/spark/pull/48397#discussion_r1799554487
##########
connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala:
##########
@@ -65,9 +65,139 @@ class PostgresIntegrationSuite extends
DockerJDBCIntegrationV2Suite with V2JDBCT
|)
""".stripMargin
).executeUpdate()
- connection.prepareStatement(
- "CREATE TABLE datetime (name VARCHAR(32), date1 DATE, time1 TIMESTAMP)")
+
+ connection.prepareStatement("CREATE TABLE array_test_table (int_array
int[]," +
+ "float_array FLOAT8[], timestamp_array TIMESTAMP[], string_array
TEXT[]," +
+ "datetime_array TIMESTAMPTZ[], array_of_int_arrays
INT[][])").executeUpdate()
+
+ val query =
+ """
+ INSERT INTO array_test_table
+ (int_array, float_array, timestamp_array, string_array,
+ datetime_array, array_of_int_arrays)
+ VALUES
+ (
+ ARRAY[1, 2, 3], -- Array of integers
+ ARRAY[1.1, 2.2, 3.3], -- Array of floats
+ ARRAY['2023-01-01 12:00'::timestamp, '2023-06-01
08:30'::timestamp],
+ ARRAY['hello', 'world'], -- Array of strings
+ ARRAY['2023-10-04 12:00:00+00'::timestamptz,
+ '2023-12-01 14:15:00+00'::timestamptz],
+ ARRAY[ARRAY[1, 2]] -- Array of arrays of integers
+ ),
+ (
+ ARRAY[10, 20, 30], -- Another set of data
+ ARRAY[10.5, 20.5, 30.5],
+ ARRAY['2022-01-01 09:15'::timestamp, '2022-03-15
07:45'::timestamp],
+ ARRAY['postgres', 'arrays'],
+ ARRAY['2022-11-22 09:00:00+00'::timestamptz,
+ '2022-12-31 23:59:59+00'::timestamptz],
+ ARRAY[ARRAY[10, 20]]
+ );
+ """
+ connection.prepareStatement(query).executeUpdate()
+
+ connection.prepareStatement("CREATE TABLE array_int (col
int[])").executeUpdate()
+ connection.prepareStatement("CREATE TABLE array_bigint(col
bigint[])").executeUpdate()
+ connection.prepareStatement("CREATE TABLE array_smallint (col
smallint[])").executeUpdate()
+ connection.prepareStatement("CREATE TABLE array_boolean (col
boolean[])").executeUpdate()
+ connection.prepareStatement("CREATE TABLE array_float (col
real[])").executeUpdate()
+ connection.prepareStatement("CREATE TABLE array_double (col
float8[])").executeUpdate()
+ connection.prepareStatement("CREATE TABLE array_timestamp (col
timestamp[])").executeUpdate()
+ connection.prepareStatement("CREATE TABLE array_timestamptz (col
timestamptz[])")
+ .executeUpdate()
+
+ connection.prepareStatement("INSERT INTO array_int VALUES
(array[array[10]])").executeUpdate()
+ connection.prepareStatement("INSERT INTO array_bigint VALUES
(array[array[10]])")
+ .executeUpdate()
+ connection.prepareStatement("INSERT INTO array_smallint VALUES
(array[array[10]])")
+ .executeUpdate()
+ connection.prepareStatement("INSERT INTO array_boolean VALUES
(array[array[true]])")
.executeUpdate()
+ connection.prepareStatement("INSERT INTO array_float VALUES
(array[array[10.5]])")
+ .executeUpdate()
+ connection.prepareStatement("INSERT INTO array_double VALUES
(array[array[10.1]])")
+ .executeUpdate()
+ connection.prepareStatement("INSERT INTO array_timestamp VALUES (" +
+ "array[array['2022-01-01 09:15'::timestamp]])").executeUpdate()
+ connection.prepareStatement("INSERT INTO array_timestamptz VALUES " +
+ "(array[array['2022-01-01 09:15'::timestamptz]])").executeUpdate()
+ connection.prepareStatement(
+ "CREATE TABLE datetime (name VARCHAR(32), date1 DATE, time1 TIMESTAMP)")
+ .executeUpdate()
+ }
+
+ test("Test multi-dimensional column types") {
+ val df = spark.read.format("jdbc")
+ .option("url", jdbcUrl)
+ .option("dbtable", "array_test_table")
+ .load()
+ df.collect()
+
+
+ intercept[SparkSQLException] {
+ val df = spark.read.format("jdbc")
+ .option("url", jdbcUrl)
+ .option("dbtable", "array_int")
+ .load()
+ df.collect()
+ }
+
+ intercept[SparkSQLException] {
Review Comment:
Could you deduplicate the code like:
```scala
Seq("array_int", "array_bigint").foreach { dbtable =>
intercept[SparkSQLException] {
spark.read.format("jdbc")
.option("url", jdbcUrl)
.option("dbtable", dbtable)
.load()
.collect()
}
}
```
##########
common/utils/src/main/resources/error/error-conditions.json:
##########
@@ -606,6 +606,12 @@
],
"sqlState" : "42711"
},
+ "COLUMN_ARRAY_ELEMENT_TYPE_MISMATCH" : {
+ "message" : [
+ "Some values in field <pos> are incompatible with the column array type.
Expected type <type>."
+ ],
+ "sqlState" : "0A000"
Review Comment:
Why did you select this error class:
```
"0A": "feature not supported",
```
Please, consider another one, for instance some sqlState with the error
class `42`
##########
connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala:
##########
@@ -65,9 +65,139 @@ class PostgresIntegrationSuite extends
DockerJDBCIntegrationV2Suite with V2JDBCT
|)
""".stripMargin
).executeUpdate()
- connection.prepareStatement(
- "CREATE TABLE datetime (name VARCHAR(32), date1 DATE, time1 TIMESTAMP)")
+
+ connection.prepareStatement("CREATE TABLE array_test_table (int_array
int[]," +
+ "float_array FLOAT8[], timestamp_array TIMESTAMP[], string_array
TEXT[]," +
+ "datetime_array TIMESTAMPTZ[], array_of_int_arrays
INT[][])").executeUpdate()
+
+ val query =
+ """
+ INSERT INTO array_test_table
+ (int_array, float_array, timestamp_array, string_array,
+ datetime_array, array_of_int_arrays)
+ VALUES
+ (
+ ARRAY[1, 2, 3], -- Array of integers
+ ARRAY[1.1, 2.2, 3.3], -- Array of floats
+ ARRAY['2023-01-01 12:00'::timestamp, '2023-06-01
08:30'::timestamp],
+ ARRAY['hello', 'world'], -- Array of strings
+ ARRAY['2023-10-04 12:00:00+00'::timestamptz,
+ '2023-12-01 14:15:00+00'::timestamptz],
+ ARRAY[ARRAY[1, 2]] -- Array of arrays of integers
+ ),
+ (
+ ARRAY[10, 20, 30], -- Another set of data
+ ARRAY[10.5, 20.5, 30.5],
+ ARRAY['2022-01-01 09:15'::timestamp, '2022-03-15
07:45'::timestamp],
+ ARRAY['postgres', 'arrays'],
+ ARRAY['2022-11-22 09:00:00+00'::timestamptz,
+ '2022-12-31 23:59:59+00'::timestamptz],
+ ARRAY[ARRAY[10, 20]]
+ );
+ """
+ connection.prepareStatement(query).executeUpdate()
+
+ connection.prepareStatement("CREATE TABLE array_int (col
int[])").executeUpdate()
+ connection.prepareStatement("CREATE TABLE array_bigint(col
bigint[])").executeUpdate()
+ connection.prepareStatement("CREATE TABLE array_smallint (col
smallint[])").executeUpdate()
+ connection.prepareStatement("CREATE TABLE array_boolean (col
boolean[])").executeUpdate()
+ connection.prepareStatement("CREATE TABLE array_float (col
real[])").executeUpdate()
+ connection.prepareStatement("CREATE TABLE array_double (col
float8[])").executeUpdate()
+ connection.prepareStatement("CREATE TABLE array_timestamp (col
timestamp[])").executeUpdate()
+ connection.prepareStatement("CREATE TABLE array_timestamptz (col
timestamptz[])")
+ .executeUpdate()
+
+ connection.prepareStatement("INSERT INTO array_int VALUES
(array[array[10]])").executeUpdate()
+ connection.prepareStatement("INSERT INTO array_bigint VALUES
(array[array[10]])")
+ .executeUpdate()
+ connection.prepareStatement("INSERT INTO array_smallint VALUES
(array[array[10]])")
+ .executeUpdate()
+ connection.prepareStatement("INSERT INTO array_boolean VALUES
(array[array[true]])")
.executeUpdate()
+ connection.prepareStatement("INSERT INTO array_float VALUES
(array[array[10.5]])")
+ .executeUpdate()
+ connection.prepareStatement("INSERT INTO array_double VALUES
(array[array[10.1]])")
+ .executeUpdate()
+ connection.prepareStatement("INSERT INTO array_timestamp VALUES (" +
+ "array[array['2022-01-01 09:15'::timestamp]])").executeUpdate()
+ connection.prepareStatement("INSERT INTO array_timestamptz VALUES " +
+ "(array[array['2022-01-01 09:15'::timestamptz]])").executeUpdate()
+ connection.prepareStatement(
+ "CREATE TABLE datetime (name VARCHAR(32), date1 DATE, time1 TIMESTAMP)")
+ .executeUpdate()
+ }
+
+ test("Test multi-dimensional column types") {
+ val df = spark.read.format("jdbc")
+ .option("url", jdbcUrl)
+ .option("dbtable", "array_test_table")
+ .load()
+ df.collect()
Review Comment:
What do you check here? Let's check the result.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]