Github user HyukjinKwon commented on a diff in the pull request:
https://github.com/apache/spark/pull/22418#discussion_r217617525
--- Diff:
sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala
---
@@ -50,6 +54,60 @@ abstract class OrcSuite extends OrcTest with
BeforeAndAfterAll {
.createOrReplaceTempView("orc_temp_table")
}
+ protected def testBloomFilterCreation(bloomFilterKind: Kind) {
+ val tableName = "bloomFilter"
+
+ withTempDir { dir =>
+ withTable(tableName) {
+ val sqlStatement = orcImp match {
+ case "native" =>
+ s"""
+ |CREATE TABLE $tableName (a INT, b STRING)
+ |USING ORC
+ |OPTIONS (
+ | path '${dir.toURI}',
+ | orc.bloom.filter.columns '*',
+ | orc.bloom.filter.fpp 0.1
+ |)
+ """.stripMargin
+ case "hive" =>
+ s"""
+ |CREATE TABLE $tableName (a INT, b STRING)
+ |STORED AS ORC
+ |LOCATION '${dir.toURI}'
+ |TBLPROPERTIES (
+ | orc.bloom.filter.columns='*',
+ | orc.bloom.filter.fpp=0.1
+ |)
+ """.stripMargin
+ case impl =>
+ throw new UnsupportedOperationException(s"Unknown ORC
implementation: $impl")
+ }
+
+ sql(sqlStatement)
+ sql(s"INSERT INTO $tableName VALUES (1, 'str')")
+
+ val partFiles = dir.listFiles()
+ .filter(f => f.isFile && !f.getName.startsWith(".") &&
!f.getName.startsWith("_"))
+ assert(partFiles.length === 1)
+
+ val orcFilePath = new Path(partFiles.head.getAbsolutePath)
+ val readerOptions = OrcFile.readerOptions(new Configuration())
+ val reader = OrcFile.createReader(orcFilePath, readerOptions)
+ val recordReader =
reader.rows.asInstanceOf[org.apache.orc.impl.RecordReaderImpl]
+
+ // BloomFilter array is created for all types; `struct`, int
(`a`), string (`b`)
+ val sargColumns = Array(true, true, true)
+ val orcIndex = recordReader.readRowIndex(0, null, sargColumns)
+ recordReader.close() // Close first to prevent resource leakage at
test failures
--- End diff --
nit: should we better leave this in finally?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]