Github user HyukjinKwon commented on a diff in the pull request:

    https://github.com/apache/spark/pull/22418#discussion_r217952724
  
    --- Diff: 
sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala
 ---
    @@ -50,6 +55,66 @@ abstract class OrcSuite extends OrcTest with 
BeforeAndAfterAll {
           .createOrReplaceTempView("orc_temp_table")
       }
     
    +  protected def testBloomFilterCreation(bloomFilterKind: Kind) {
    +    val tableName = "bloomFilter"
    +
    +    withTempDir { dir =>
    +      withTable(tableName) {
    +        val sqlStatement = orcImp match {
    +          case "native" =>
    +            s"""
    +               |CREATE TABLE $tableName (a INT, b STRING)
    +               |USING ORC
    +               |OPTIONS (
    +               |  path '${dir.toURI}',
    +               |  orc.bloom.filter.columns '*',
    +               |  orc.bloom.filter.fpp 0.1
    +               |)
    +            """.stripMargin
    +          case "hive" =>
    +            s"""
    +               |CREATE TABLE $tableName (a INT, b STRING)
    +               |STORED AS ORC
    +               |LOCATION '${dir.toURI}'
    +               |TBLPROPERTIES (
    +               |  orc.bloom.filter.columns='*',
    +               |  orc.bloom.filter.fpp=0.1
    +               |)
    +            """.stripMargin
    +          case impl =>
    +            throw new UnsupportedOperationException(s"Unknown ORC 
implementation: $impl")
    +        }
    +
    +        sql(sqlStatement)
    +        sql(s"INSERT INTO $tableName VALUES (1, 'str')")
    +
    +        val partFiles = dir.listFiles()
    +          .filter(f => f.isFile && !f.getName.startsWith(".") && 
!f.getName.startsWith("_"))
    +        assert(partFiles.length === 1)
    +
    +        val orcFilePath = new Path(partFiles.head.getAbsolutePath)
    +        val readerOptions = OrcFile.readerOptions(new Configuration())
    +        val reader = OrcFile.createReader(orcFilePath, readerOptions)
    +        var recordReader: RecordReaderImpl = null
    +        try {
    +          recordReader = reader.rows.asInstanceOf[RecordReaderImpl]
    +
    +          // BloomFilter array is created for all types; `struct`, int 
(`a`), string (`b`)
    +          val sargColumns = Array(true, true, true)
    +          val orcIndex = recordReader.readRowIndex(0, null, sargColumns)
    +
    +          // Check the types and counts of bloom filters
    +          assert(orcIndex.getBloomFilterKinds.forall(_ === 
bloomFilterKind))
    --- End diff --
    
    Do you mean how we extend this test case? If so, I think it's fine since 
what we need to test within Spark is the specified bloom filter works or not. 
It's rather none of all so one test case should be okay.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to