Github user gatorsmile commented on a diff in the pull request: https://github.com/apache/spark/pull/22622#discussion_r222855990 --- Diff: sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala --- @@ -115,6 +116,69 @@ abstract class OrcSuite extends OrcTest with BeforeAndAfterAll { } } + protected def testSelectiveDictionaryEncoding(isSelective: Boolean) { + val tableName = "orcTable" + + withTempDir { dir => + withTable(tableName) { + val sqlStatement = orcImp match { + case "native" => + s""" + |CREATE TABLE $tableName (zipcode STRING, uniqColumn STRING, value DOUBLE) + |USING ORC + |OPTIONS ( + | path '${dir.toURI}', + | orc.dictionary.key.threshold '1.0', + | orc.column.encoding.direct 'uniqColumn' + |) + """.stripMargin + case "hive" => + s""" + |CREATE TABLE $tableName (zipcode STRING, uniqColumn STRING, value DOUBLE) + |STORED AS ORC + |LOCATION '${dir.toURI}' + |TBLPROPERTIES ( + | orc.dictionary.key.threshold '1.0', + | hive.exec.orc.dictionary.key.size.threshold '1.0', + | orc.column.encoding.direct 'uniqColumn' + |) + """.stripMargin + case impl => + throw new UnsupportedOperationException(s"Unknown ORC implementation: $impl") + } + + sql(sqlStatement) + sql(s"INSERT INTO $tableName VALUES ('94086', 'random-uuid-string', 0.0)") + + val partFiles = dir.listFiles() + .filter(f => f.isFile && !f.getName.startsWith(".") && !f.getName.startsWith("_")) + assert(partFiles.length === 1) + + val orcFilePath = new Path(partFiles.head.getAbsolutePath) + val readerOptions = OrcFile.readerOptions(new Configuration()) + val reader = OrcFile.createReader(orcFilePath, readerOptions) + var recordReader: RecordReaderImpl = null + try { + recordReader = reader.rows.asInstanceOf[RecordReaderImpl] + + // Check the kind + val stripe = recordReader.readStripeFooter(reader.getStripes.get(0)) + assert(stripe.getColumns(1).getKind === DICTIONARY_V2) --- End diff -- Could you write some comments to explain what `DICTIONARY_V2 `, `DIRECT_V2 ` and `DIRECT` are?
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org