Github user liancheng commented on a diff in the pull request:

    https://github.com/apache/spark/pull/8063#discussion_r36648631
  
    --- Diff: 
sql/core/src/test/scala/org/apache/spark/sql/parquet/ProtoParquetTypesConverterTest.scala
 ---
    @@ -0,0 +1,91 @@
    +package org.apache.spark.sql.parquet
    +
    +import java.net.URL
    +
    +import org.apache.parquet.schema.{GroupType, PrimitiveType, MessageType}
    +import org.apache.spark.SparkFunSuite
    +import org.apache.spark.sql.{Row, QueryTest, DataFrame, SQLContext}
    +import org.apache.spark.sql.catalyst.expressions.{GenericRow, Attribute}
    +import org.apache.spark.sql.test.TestSQLContext
    +import org.scalatest.FunSuite
    +
    +
    +import scala.collection.mutable
    +import scala.collection.mutable.ArrayBuffer
    +
    +class ProtoParquetTypesConverterTest extends QueryTest with ParquetTest {
    +  override val sqlContext: SQLContext = TestSQLContext
    +
    +  def fetchRows(file: String, table: String ): Array[Row] = {
    +    val resource: URL = getClass.getResource(file)
    +    val pf: DataFrame = sqlContext.read.parquet(resource.toURI.toString)
    +    pf.registerTempTable(table)
    +    sqlContext.sql("select * from "  + table).collect()
    +  }
    +
    +  test("should work with repeated primitive") {
    +    val rows: Array[Row] = fetchRows("/old-repeated-int.parquet", 
"repeated_int")
    +    assert(rows(0) == Row(Seq(1,2,3)))
    +  }
    +
    +  test("should work with repeated complex") {
    +    val rows: Array[Row] = fetchRows("/old-repeated-message.parquet", 
"repeated_struct")
    +    val array: mutable.WrappedArray[GenericRow] = 
rows(0)(0).asInstanceOf[mutable.WrappedArray[GenericRow]]
    +    assert(array.length === 3)
    +    assert(array(0)=== Row("First inner",null,null))
    +    assert(array(1) === Row(null,"Second inner",null))
    +    assert(array(2) === Row(null, null,"Third inner"))
    +  }
    +
    +
    +  test("should work with repeated complex with more than one item in 
array") {
    +    val rows: Array[Row] = fetchRows("/proto-repeated-struct.parquet", 
"my_complex_table")
    +    assert(rows.length === 1)
    +    val array: mutable.WrappedArray[GenericRow] = 
rows(0)(0).asInstanceOf[mutable.WrappedArray[GenericRow]]
    +    assert(array.length === 2)
    +    assert(array(0) === Row("0 - 1", "0 - 2", "0 - 3"))
    +    assert(array(1) === Row("1 - 1", "1 - 2", "1 - 3"))
    +  }
    +
    +  test("should work with repeated complex with many rows") {
    +    val rows: Array[Row] = 
fetchRows("/proto-struct-with-array-many.parquet", "many_complex_rows")
    +    assert(rows.length === 3)
    +    val row0: mutable.WrappedArray[GenericRow] = 
rows(0)(0).asInstanceOf[mutable.WrappedArray[GenericRow]]
    +    val row1: mutable.WrappedArray[GenericRow] = 
rows(1)(0).asInstanceOf[mutable.WrappedArray[GenericRow]]
    +    val row2: mutable.WrappedArray[GenericRow] = 
rows(2)(0).asInstanceOf[mutable.WrappedArray[GenericRow]]
    +    assert(row0(0) === Row("0 - 0 - 1", "0 - 0 - 2","0 - 0 - 3"))
    +    assert(row0(1)=== Row("0 - 1 - 1", "0 - 1 - 2", "0 - 1 - 3"))
    +    assert(row1(0) === Row("1 - 0 - 1", "1 - 0 - 2","1 - 0 - 3"))
    +    assert(row1(1) === Row("1 - 1 - 1", "1 - 1 - 2", "1 - 1 - 3"))
    +    assert(row2(0) === Row("2 - 0 - 1", "2 - 0 - 2","2 - 0 - 3"))
    +    assert(row2(1) === Row("2 - 1 - 1", "2 - 1 - 2", "2 - 1 - 3"))
    +  }
    +
    +  test("should work with complex type containing array") {
    +    val rows: Array[Row] = fetchRows("/proto-struct-with-array.parquet", 
"struct_containing_array")
    +    assert(rows.length === 1)
    +    val theRow: GenericRow = rows(0).asInstanceOf[GenericRow]
    +    val expected = Row(10,9,null,null,Row(9),Seq(Row(9),Row(10)))
    --- End diff --
    
    The first `null` here should be an empty `Seq`. The schema of the testing 
Parquet data file is:
    
    ```
    message TestProtobuf.SchemaConverterRepetition {
      optional int32 optionalPrimitive;
      required int32 requiredPrimitive;
      repeated int32 repeatedPrimitive;
      optional group optionalMessage {
        optional int32 someId;
      }
      required group requiredMessage {
        optional int32 someId;
      }
      repeated group repeatedMessage {
        optional int32 someId;
      }
    }
    ```
    
    As stated by parquet-format spec, `repeatedPrimitive` should be interpreted 
as a required list of required elements, so it should never be null.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to