alexeykudinkin commented on code in PR #6725:
URL: https://github.com/apache/hudi/pull/6725#discussion_r1019618090
##########
hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfig.java:
##########
@@ -65,6 +65,17 @@ public class HiveSyncConfig extends HoodieSyncConfig {
public static final ConfigProperty<String> HIVE_SYNC_BUCKET_SYNC_SPEC =
HiveSyncConfigHolder.HIVE_SYNC_BUCKET_SYNC_SPEC;
public static final ConfigProperty<String> HIVE_SYNC_COMMENT =
HiveSyncConfigHolder.HIVE_SYNC_COMMENT;
+ public static final ConfigProperty<Boolean> HIVE_SYNC_FILTER_PUSHDOWN_ENABLE
= ConfigProperty
Review Comment:
nit: "enabled"
##########
hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfig.java:
##########
@@ -65,6 +65,17 @@ public class HiveSyncConfig extends HoodieSyncConfig {
public static final ConfigProperty<String> HIVE_SYNC_BUCKET_SYNC_SPEC =
HiveSyncConfigHolder.HIVE_SYNC_BUCKET_SYNC_SPEC;
public static final ConfigProperty<String> HIVE_SYNC_COMMENT =
HiveSyncConfigHolder.HIVE_SYNC_COMMENT;
+ public static final ConfigProperty<Boolean> HIVE_SYNC_FILTER_PUSHDOWN_ENABLE
= ConfigProperty
+ .key("hoodie.datasource.hive_sync.filter_pushdown_enable")
Review Comment:
Let's stick to convention of using dots "." and hyphens "-" as delimiters
##########
hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfig.java:
##########
@@ -65,6 +65,17 @@ public class HiveSyncConfig extends HoodieSyncConfig {
public static final ConfigProperty<String> HIVE_SYNC_BUCKET_SYNC_SPEC =
HiveSyncConfigHolder.HIVE_SYNC_BUCKET_SYNC_SPEC;
public static final ConfigProperty<String> HIVE_SYNC_COMMENT =
HiveSyncConfigHolder.HIVE_SYNC_COMMENT;
+ public static final ConfigProperty<Boolean> HIVE_SYNC_FILTER_PUSHDOWN_ENABLE
= ConfigProperty
+ .key("hoodie.datasource.hive_sync.filter_pushdown_enable")
+ .defaultValue(false)
+ .withDocumentation("Whether to enable push down partitions by filter");
+
+ public static final ConfigProperty<Integer>
HIVE_SYNC_FILTER_PUSHDOWN_MAX_SIZE = ConfigProperty
+ .key("hoodie.datasource.hive_sync.filter_pushdown_max_size")
Review Comment:
Same comment as above
##########
hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/expression/LeafExpression.java:
##########
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.hive.expression;
+
+/**
+ * Expression that without any child expressions.
+ */
+public abstract class LeafExpression extends Expression {
+
+ public LeafExpression() {
+ super(null);
+ }
+
+ public static class Literal extends LeafExpression {
+
+ private final String value;
+ private final String type;
+
+ public Literal(String value, String type) {
+ this.value = value;
+ this.type = type;
+ }
+
+ public String getValue() {
+ return value;
+ }
+
+ public String getType() {
+ return type;
+ }
+ }
+
+ public static class NameExpression extends LeafExpression {
Review Comment:
This is rather `AttributeReferenceExpression`
##########
hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/util/PartitionFilterGenerator.java:
##########
@@ -0,0 +1,288 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.hive.util;
+
+import org.apache.hudi.common.util.ReflectionUtils;
+import org.apache.hudi.hive.HiveSyncConfig;
+import org.apache.hudi.hive.HoodieHiveSyncException;
+import org.apache.hudi.hive.expression.BinaryOperator;
+import org.apache.hudi.hive.expression.Expression;
+import org.apache.hudi.hive.expression.LeafExpression;
+import org.apache.hudi.sync.common.model.FieldSchema;
+import org.apache.hudi.sync.common.model.Partition;
+import org.apache.hudi.sync.common.model.PartitionValueExtractor;
+
+import java.util.Arrays;
+import java.util.Comparator;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Locale;
+import java.util.Set;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+import static
org.apache.hudi.hive.HiveSyncConfig.HIVE_SYNC_FILTER_PUSHDOWN_MAX_SIZE;
+import static
org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_PARTITION_EXTRACTOR_CLASS;
+
+public class PartitionFilterGenerator {
+
+ private interface ExpressionBuilder<T> {
Review Comment:
I think the idea is actually a little bit different: we need visitors to
translate Expression tree into a Filter (string), but we don't need it to gen
Expression from Partition (for that we can just have a simple method).
To exemplify: we should have something like following:
```
class Expression {
abstract T accept(ExpressionVisitor<T> v);
}
class BinaryExpression {
T accept(ExpressionVisitor<T> v) {
v.visitBinaryExpr(this);
}
}
class ExpressionVisitor<T> {
T visitBinaryExpr(BinaryExpression binExpr);
// ...
}
class FilterGenVisitor extends ExpressionVisitor<String> { /* ... */ }
```
##########
hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncTool.java:
##########
@@ -309,14 +311,36 @@ private boolean syncSchema(String tableName, boolean
tableExists, boolean useRea
return schemaChanged;
}
+ /**
+ * Fetch partitions from meta service, will try to push down more filters to
avoid fetching
+ * too many unnecessary partitions.
+ */
+ private List<Partition> getTablePartitions(String tableName, List<String>
writtenPartitionsSince) {
Review Comment:
Can you elaborate what `writtenPartitionsSince` refer to?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]