bvaradar commented on code in PR #8452:
URL: https://github.com/apache/hudi/pull/8452#discussion_r1186777672


##########
hudi-common/src/main/java/org/apache/hudi/expression/Expression.java:
##########
@@ -40,14 +51,19 @@ public enum Operator {
     }
   }
 
-  private final List<Expression> children;
+  List<Expression> getChildren();

Review Comment:
   Make this and getDataType protected 



##########
hudi-common/src/main/java/org/apache/hudi/metadata/FileSystemBackedTableMetadata.java:
##########
@@ -112,6 +164,17 @@ private List<String> getPartitionPathWithPathPrefix(String 
relativePathPrefix) t
       }, listingParallelism);
       pathsToList.clear();
 
+      Expression boundedExpr;

Review Comment:
   Please add descriptive comment for this block. 



##########
hudi-common/src/main/java/org/apache/hudi/metadata/FileSystemBackedTableMetadata.java:
##########
@@ -95,11 +120,38 @@ public List<String> 
getPartitionPathWithPathPrefixes(List<String> relativePathPr
     }).collect(Collectors.toList());
   }
 
+  private int getRelativePathPartitionLevel(Types.RecordType partitionFields, 
String relativePathPrefix) {
+    if (StringUtils.isNullOrEmpty(relativePathPrefix) || partitionFields == 
null || partitionFields.fields().size() == 1) {
+      return 0;
+    }
+
+    int level = 0;
+    for (int i = 1; i < relativePathPrefix.length() - 1; i++) {

Review Comment:
   Can we use partitionFields.size to find the level ? 



##########
hudi-common/src/main/java/org/apache/hudi/expression/BindVisitor.java:
##########
@@ -0,0 +1,179 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.expression;
+
+import org.apache.hudi.internal.schema.Types;
+
+import java.util.List;
+import java.util.stream.Collectors;
+
+public class BindVisitor implements ExpressionVisitor<Expression>  {
+
+  protected final Types.RecordType recordType;
+  protected final boolean caseSensitive;
+
+  public BindVisitor(Types.RecordType recordType, boolean caseSensitive) {
+    this.recordType = recordType;
+    this.caseSensitive = caseSensitive;
+  }
+
+  @Override
+  public Expression alwaysTrue() {
+    return Predicates.True.get();
+  }
+
+  @Override
+  public Expression alwaysFalse() {
+    return Predicates.False.get();
+  }
+
+  @Override
+  public Expression visitAnd(Predicates.And and) {
+    if (and.getLeft() instanceof Predicates.False
+        || and.getRight() instanceof Predicates.False) {
+      return alwaysFalse();
+    }
+
+    Expression left = and.getLeft().accept(this);
+    Expression right = and.getRight().accept(this);
+    if (left instanceof Predicates.False
+        || right instanceof Predicates.False) {
+      return alwaysFalse();
+    }
+
+    if (left instanceof Predicates.True
+        && right instanceof Predicates.True) {
+      return alwaysTrue();
+    }
+
+    if (left instanceof Predicates.True) {
+      return right;
+    }
+
+    if (right instanceof Predicates.True) {
+      return left;
+    }
+
+    return Predicates.and(left, right);
+  }
+
+  @Override
+  public Expression visitOr(Predicates.Or or) {
+    if (or.getLeft() instanceof Predicates.True
+        || or.getRight() instanceof Predicates.True) {
+      return alwaysTrue();
+    }
+
+    Expression left = or.getLeft().accept(this);
+    Expression right = or.getRight().accept(this);
+    if (left instanceof Predicates.True
+        || right instanceof Predicates.True) {
+      return alwaysTrue();
+    }
+
+    if (left instanceof Predicates.False
+        && right instanceof Predicates.False) {
+      return alwaysFalse();
+    }
+
+    if (left instanceof Predicates.False) {
+      return right;
+    }
+
+    if (right instanceof Predicates.False) {
+      return left;
+    }
+
+    return Predicates.or(left, right);
+  }
+
+  @Override
+  public Expression visitLiteral(Literal literal) {
+    return literal;
+  }
+
+  @Override
+  public Expression visitNameReference(NameReference attribute) {
+    // TODO Should consider caseSensitive?

Review Comment:
   Yes, case insensitive by default would make it consistent with spark sql.  
For this , I think it would be ok to introduce a config for case sensitivity 
and align it with spark.sql.caseSensitive config in the hudi-spark integration. 



##########
hudi-common/src/main/java/org/apache/hudi/metadata/FileSystemBackedTableMetadata.java:
##########
@@ -121,14 +184,24 @@ private List<String> 
getPartitionPathWithPathPrefix(String relativePathPrefix) t
         List<Pair<Option<String>, Option<Path>>> result = 
engineContext.map(dirToFileListing, fileStatus -> {
           FileSystem fileSystem = 
fileStatus.getPath().getFileSystem(hadoopConf.get());
           if (fileStatus.isDirectory()) {
+            String relativePartitionPath = 
FSUtils.getRelativePartitionPath(new Path(datasetBasePath), 
fileStatus.getPath());
             if (HoodiePartitionMetadata.hasPartitionMetadata(fileSystem, 
fileStatus.getPath())) {
-              return Pair.of(Option.of(FSUtils.getRelativePartitionPath(new 
Path(datasetBasePath), fileStatus.getPath())), Option.empty());
+              if (boundedExpr instanceof Predicates.TrueExpression || 
(Boolean) boundedExpr.eval(HoodieTableMetadata
+                  .extractPartitionValues(partitionFields, 
relativePartitionPath, hiveStylePartitioningEnabled, 
urlEncodePartitioningEnabled))) {
+                return Pair.of(Option.of(relativePartitionPath), 
Option.empty());
+              }
             } else if 
(!fileStatus.getPath().getName().equals(HoodieTableMetaClient.METAFOLDER_NAME)) 
{
-              return Pair.of(Option.empty(), Option.of(fileStatus.getPath()));
+              if (boundedExpr instanceof Predicates.TrueExpression || 
(Boolean) boundedExpr.eval(HoodieTableMetadata
+                  .extractPartitionValues(partitionFields, 
relativePartitionPath, hiveStylePartitioningEnabled, 
urlEncodePartitioningEnabled))) {
+                return Pair.of(Option.empty(), 
Option.of(fileStatus.getPath()));
+              }
             }
           } else if 
(fileStatus.getPath().getName().startsWith(HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE_PREFIX))
 {

Review Comment:
   Why is this else-if needed to check if this is a file - 
.hoodie_partition_metadata. Wouldn't this be covered in the previous iteration 
? Is this needed for non-partitioned datasets ? 



##########
hudi-common/src/main/java/org/apache/hudi/metadata/FileSystemBackedTableMetadata.java:
##########
@@ -95,11 +120,38 @@ public List<String> 
getPartitionPathWithPathPrefixes(List<String> relativePathPr
     }).collect(Collectors.toList());
   }
 
+  private int getRelativePathPartitionLevel(Types.RecordType partitionFields, 
String relativePathPrefix) {
+    if (StringUtils.isNullOrEmpty(relativePathPrefix) || partitionFields == 
null || partitionFields.fields().size() == 1) {
+      return 0;
+    }
+
+    int level = 0;
+    for (int i = 1; i < relativePathPrefix.length() - 1; i++) {
+      if (relativePathPrefix.charAt(i) == '/') {
+        level++;
+      }
+    }
+    if (relativePathPrefix.startsWith("/")) {
+      level--;
+    }
+    if (relativePathPrefix.endsWith("/")) {
+      level--;
+    }
+    return level;
+  }
+
   private List<String> getPartitionPathWithPathPrefix(String 
relativePathPrefix) throws IOException {
+    return getPartitionPathWithPathPrefix(relativePathPrefix, null, null);
+  }
+
+  private List<String> getPartitionPathWithPathPrefix(String 
relativePathPrefix,

Review Comment:
   Lets use descriptive method names. 
getPartitionPathWithPathPrefixUsingFilterExpression. Its confusing to follow 
otherwise. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to