gustavoatt commented on issue #2976:
URL: https://github.com/apache/iceberg/issues/2976#issuecomment-1006716342


   That's correct, iceberg `0.11` doesn't have support for Spark `3.1.x`. I'v 
tested iceberg `0.11` with Spark `3.0.x` and it works correctly.
   
   I also tested with plain Spark and there are no issues. I can only reproduce 
the error with Spark `3.1.x`  from Iceberg `>= 0.12`.
   
   Here is a test I ran from within the Iceberg repo that reproduces the issue:
   
   ```diff
   diff --git 
a/spark/v3.1/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestIcebergExpressions.java
 b/spark/v3.1/spark-extensions/src/test/java/org/apache/iceberg/spar
   index ce88814c..e2f37f59 100644
   --- 
a/spark/v3.1/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestIcebergExpressions.java
   +++ 
b/spark/v3.1/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestIcebergExpressions.java
   @@ -28,6 +28,7 @@ import org.apache.spark.sql.Row;
    import org.apache.spark.sql.catalyst.expressions.IcebergTruncateTransform;
    import org.junit.After;
    import org.junit.Test;
   +import org.junit.Assert;
   
    public class TestIcebergExpressions extends SparkExtensionsTestBase {
   
   @@ -67,4 +68,64 @@ public class TestIcebergExpressions extends 
SparkExtensionsTestBase {
            ImmutableList.of(row(100, 10000L, new BigDecimal("10.50"), "10", 
"12")),
            sql("SELECT int_c, long_c, dec_c, str_c, CAST(binary_c AS STRING) 
FROM v"));
      }
   +
   +  @Test
   +  public void testSelectAndFilterWithManyRows() {
   +    sql("DROP TABLE IF EXISTS %s", tableName);
   +    sql("CREATE TABLE %s (\n" +
   +            "  `meta_data` STRUCT<`event_schema`: STRING, `id`: STRING, 
`tiers`: ARRAY<STRUCT<`id`: STRING, `type`: STRING, `hostname`: STRING, 
`ip_address`: STRING, `timestamp`: BIGINT
   +            "  `schema_version` STRING,\n" +
   +            "  `context_user_id` BIGINT,\n" +
   +            "  `context_visitor_id` STRING,\n" +
   +            "  `schema` STRING,\n" +
   +            "  `event_name` STRING,\n" +
   +            "  `context` STRUCT<`timestamp`: BIGINT, `source`: STRING, 
`platform`: STRING, `user_agent`: STRING, `version`: STRING, `bev`: STRING, 
`user_id`: BIGINT, `hash_user_id`: STR
   +            "  `experiment` STRING,\n" +
   +            "  `treatment` STRING,\n" +
   +            "  `subject_type` STRING,\n" +
   +            "  `subject_id` STRING,\n" +
   +            "  `client` STRING,\n" +
   +            "  `client_version` STRING,\n" +
   +            "  `user_id` STRING,\n" +
   +            "  `visitor_id` STRING,\n" +
   +            "  `misa_id` STRING,\n" +
   +            "  `listing_id` STRING,\n" +
   +            "  `extra_data` MAP<STRING, STRING>,\n" +
   +            "  `ds` STRING,\n" +
   +            "  `hr` STRING)\n" +
   +            "USING iceberg\n" +
   +            "PARTITIONED BY (ds, hr)\n", tableName);
   +
   +    sql(
   +        "INSERT INTO %s (\n"
   +            + "    meta_data,\n"
   +            + "    schema_version,\n"
   +            + "    context_user_id,\n"
   +            + "    context_visitor_id,\n"
   +            + "    schema,\n"
   +            + "    event_name,\n"
   +            + "    context,\n"
   +            + "    experiment,\n"
   +            + "    treatment,\n"
   +            + "    subject_type,\n"
   +            + "    subject_id,\n"
   +            + "    client,\n"
   +            + "    client_version,\n"
   +            + "    user_id,\n"
   +            + "    visitor_id,\n"
   +            + "    misa_id,\n"
   +            + "    listing_id,\n"
   +            + "    extra_data,\n"
   +            + "    ds,\n"
   +            + "    hr\n"
   +            + ")\n"
   +            + "VALUES (\n"
   +            + "    null, null, null, null, null, null, null, null, null, 
null, null, null, null, null, null, null, null, null, '2022-01-01', '10'        
 \n"
   +            + ")",
   +        tableName);
   +
   +    Assert.assertEquals(1, sql("SELECT * FROM %s", tableName).size());
   +
   +    Assert.assertEquals(1, sql("SELECT * FROM %s WHERE hr = '10'", 
tableName).size());
   +  }
    }
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to