codope commented on code in PR #10947:
URL: https://github.com/apache/hudi/pull/10947#discussion_r1549935529
##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/RecordLevelIndexSupport.scala:
##########
@@ -180,9 +157,40 @@ class RecordLevelIndexSupport(spark: SparkSession,
}
/**
- * Return true if metadata table is enabled and record index metadata
partition is available.
+ * Returns the attribute and literal pair given the operands of a binary
operator. The pair is returned only if one of
+ * the operand is an attribute and other is literal. In other cases it
returns an empty Option.
+ * @param expression1 - Left operand of the binary operator
+ * @param expression2 - Right operand of the binary operator
+ * @return Attribute and literal pair
*/
- def isIndexAvailable: Boolean = {
- metadataConfig.enabled &&
metaClient.getTableConfig.getMetadataPartitions.contains(HoodieTableMetadataUtil.PARTITION_NAME_RECORD_INDEX)
+ private def getAttributeLiteralTuple(expression1: Expression, expression2:
Expression): Option[(AttributeReference, Literal)] = {
+ expression1 match {
+ case attr: AttributeReference => expression2 match {
+ case literal: Literal =>
+ Option.apply(attr, literal)
+ case _ =>
+ Option.empty
+ }
+ case literal: Literal => expression2 match {
+ case attr: AttributeReference =>
+ Option.apply(attr, literal)
+ case _ =>
+ Option.empty
+ }
+ case _ => Option.empty
+ }
+ }
+
+ /**
+ * Matches the configured simple record key with the input attribute name.
+ * @param attributeName The attribute name provided in the query
+ * @return true if input attribute name matches the configured simple record
key
+ */
+ private def attributeMatchesRecordKey(attributeName: String, recordKeyOpt:
Option[String]): Boolean = {
Review Comment:
What was wrong with prev implementation which did call `getRecordKeyConfig`
inside this method? Or is it just refactoring?
##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/RecordLevelIndexSupport.scala:
##########
@@ -180,9 +157,40 @@ class RecordLevelIndexSupport(spark: SparkSession,
}
/**
- * Return true if metadata table is enabled and record index metadata
partition is available.
+ * Returns the attribute and literal pair given the operands of a binary
operator. The pair is returned only if one of
+ * the operand is an attribute and other is literal. In other cases it
returns an empty Option.
+ * @param expression1 - Left operand of the binary operator
+ * @param expression2 - Right operand of the binary operator
+ * @return Attribute and literal pair
*/
- def isIndexAvailable: Boolean = {
- metadataConfig.enabled &&
metaClient.getTableConfig.getMetadataPartitions.contains(HoodieTableMetadataUtil.PARTITION_NAME_RECORD_INDEX)
+ private def getAttributeLiteralTuple(expression1: Expression, expression2:
Expression): Option[(AttributeReference, Literal)] = {
Review Comment:
can we unti test this method?
##########
hudi-spark-datasource/hudi-spark-common/src/test/scala/org/apache/hudi/TestRecordLevelIndexSupport.scala:
##########
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi
+
+import org.apache.hudi.common.model.HoodieRecord.HoodieMetadataField
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, EqualTo,
FromUnixTime, Literal}
+import org.apache.spark.sql.types.StringType
+import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue}
+import org.junit.jupiter.api.Test
+
+import java.util.TimeZone
+
+class TestRecordLevelIndexSupport {
+ @Test
+ def testFilterQueryWithRecordKey(): Unit = {
Review Comment:
Good that we have the test for equalTo. Can we also some for In and Not In?
##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/RecordLevelIndexSupport.scala:
##########
@@ -143,22 +104,38 @@ class RecordLevelIndexSupport(spark: SparkSession,
}
}
+ /**
+ * Return true if metadata table is enabled and record index metadata
partition is available.
+ */
+ def isIndexAvailable: Boolean = {
+ metadataConfig.enabled &&
metaClient.getTableConfig.getMetadataPartitions.contains(HoodieTableMetadataUtil.PARTITION_NAME_RECORD_INDEX)
+ }
+}
+
+object RecordLevelIndexSupport {
/**
* If the input query is an EqualTo or IN query on simple record key
columns, the function returns a tuple of
* list of the query and list of record key literals present in the query
otherwise returns an empty option.
*
* @param queryFilter The query that need to be filtered.
* @return Tuple of filtered query and list of record key literals that need
to be matched
*/
- private def filterQueryWithRecordKey(queryFilter: Expression):
Option[(Expression, List[String])] = {
+ def filterQueryWithRecordKey(queryFilter: Expression, recordKeyOpt:
Option[String]): Option[(Expression, List[String])] = {
queryFilter match {
case equalToQuery: EqualTo =>
- val (attribute, literal) = getAttributeLiteralTuple(equalToQuery.left,
equalToQuery.right).orNull
- if (attribute != null && attribute.name != null &&
attributeMatchesRecordKey(attribute.name)) {
- Option.apply(equalToQuery, List.apply(literal.value.toString))
+ val attributeLiteralTuple =
getAttributeLiteralTuple(equalToQuery.left, equalToQuery.right).orNull
+ if (attributeLiteralTuple != null) {
Review Comment:
In what cases does `getAttributeLiteralTuple` return empty Option? Is it
when one of the operands is null?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]