prodeezy commented on a change in pull request #123: Add support for struct
field based filtering
URL: https://github.com/apache/incubator-iceberg/pull/123#discussion_r283073853
##########
File path: api/src/main/java/org/apache/iceberg/expressions/BoundReference.java
##########
@@ -19,57 +19,238 @@
package org.apache.iceberg.expressions;
+import java.io.Serializable;
import java.util.List;
+import java.util.Map;
+
+import com.google.common.collect.Maps;
+import org.apache.iceberg.Schema;
import org.apache.iceberg.StructLike;
import org.apache.iceberg.exceptions.ValidationException;
import org.apache.iceberg.types.Type;
+import org.apache.iceberg.types.TypeUtil;
import org.apache.iceberg.types.Types;
public class BoundReference<T> implements Reference {
private final int fieldId;
- private final Type type;
+ private final Accessor<StructLike> accessor;
private final int pos;
- BoundReference(Types.StructType struct, int fieldId) {
+ BoundReference(Schema schema, int fieldId) {
this.fieldId = fieldId;
- this.pos = find(fieldId, struct);
- this.type = struct.fields().get(pos).type();
+
+ Map<Integer, Accessor<StructLike>> accessors = lazyIdToAccessor(schema);
+
+ this.accessor = accessors.get(fieldId);
+
+ // only look for top level field position
+ this.pos = findTopFieldPos(fieldId, schema.asStruct());
+
}
- private int find(int fieldId, Types.StructType struct) {
+
+ private int findTopFieldPos(int fieldId, Types.StructType struct) {
List<Types.NestedField> fields = struct.fields();
for (int i = 0; i < fields.size(); i += 1) {
if (fields.get(i).fieldId() == fieldId) {
return i;
}
}
- throw new ValidationException(
- "Cannot find top-level field id %d in struct: %s", fieldId, struct);
+ return -1;
}
public Type type() {
- return type;
+ return accessor.type();
}
public int fieldId() {
return fieldId;
}
public int pos() {
+ if (pos == -1) {
+ throw new ValidationException("Cannot find position for non-primitive
field id %d", fieldId);
+ }
return pos;
}
public T get(StructLike struct) {
- return struct.get(pos, javaType());
+ return (T) accessor.get(struct);
}
@Override
public String toString() {
- return String.format("ref(id=%d, pos=%d, type=%s)", fieldId, pos, type);
+ return String.format("ref(id=%d, accessor=%s)", fieldId, accessor);
+ }
+
+ private interface Accessor<T> extends Serializable {
+ Object get(T container);
+
+ Type type();
+ }
+
+ private static class PositionAccessor implements Accessor<StructLike> {
+ private int p;
+ private final Type type;
+ private final Class<?> javaClass;
+
+ private PositionAccessor(int p, Type type) {
+ this.p = p;
+ this.type = type;
+ this.javaClass = type.typeId().javaClass();
+ }
+
+ @Override
+ public Object get(StructLike row) {
+ return row.get(p, javaClass);
+ }
+
+ @Override
+ public Type type() {
+ return type;
+ }
+ }
+
+ private static class Position2Accessor implements Accessor<StructLike> {
+ private final int p0;
+ private final int p1;
+ private final Type type;
+ private final Class<?> javaClass;
+
+ private Position2Accessor(int p, PositionAccessor wrapped) {
+ this.p0 = p;
+ this.p1 = wrapped.p;
+ this.type = wrapped.type;
+ this.javaClass = wrapped.javaClass;
+ }
+
+ @Override
+ public Object get(StructLike row) {
+ return row.get(p0, StructLike.class).get(p1, javaClass);
+ }
+
+ @Override
+ public Type type() {
+ return type;
+ }
+ }
+
+ private static class Position3Accessor implements Accessor<StructLike> {
+ private final int p0;
+ private final int p1;
+ private final int p2;
+ private final Type type;
+ private final Class<?> javaClass;
+
+ private Position3Accessor(int p, Position2Accessor wrapped) {
+ this.p0 = p;
+ this.p1 = wrapped.p0;
+ this.p2 = wrapped.p1;
+ this.type = wrapped.type;
+ this.javaClass = wrapped.javaClass;
+ }
+
+ @Override
+ public Object get(StructLike row) {
+ return row.get(p0, StructLike.class).get(p1, StructLike.class).get(p2,
javaClass);
+ }
+
+ @Override
+ public Type type() {
+ return type;
+ }
+ }
+
+ private static class WrappedPositionAccessor implements Accessor<StructLike>
{
+ private final int p;
+ private final Accessor<StructLike> accessor;
+
+ private WrappedPositionAccessor(int p, Accessor<StructLike> accessor) {
+ this.p = p;
+ this.accessor = accessor;
+ }
+
+ @Override
+ public Object get(StructLike row) {
+ StructLike inner = row.get(p, StructLike.class);
+ if (inner != null) {
+ return accessor.get(inner);
+ }
+ return null;
+ }
+
+ @Override
+ public Type type() {
+ return accessor.type();
+ }
+ }
+
+ private transient Map<Integer, Accessor<StructLike>> idToAccessor = null;
+
+ private Map<Integer, Accessor<StructLike>> lazyIdToAccessor(Schema schema) {
Review comment:
So i tried putting this into `Binder` but that is not the only place
predicates are bound seems like. `UnboundPredicate.bind()` is called from
`Projections`, `ResidualEvaluator`, and `Binder` . I also tried putting it in
`UnboundPredicate` but that is not useful coz there's one created for every
predicate.
Seems like Schema is a good place? So i'm putting it there. Lemme know if
this looks any better to you.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]