jackye1995 commented on a change in pull request #2354: URL: https://github.com/apache/iceberg/pull/2354#discussion_r601879489
########## File path: api/src/main/java/org/apache/iceberg/RowKey.java ########## @@ -0,0 +1,213 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg; + +import java.io.Serializable; +import java.util.Arrays; +import java.util.List; +import org.apache.iceberg.exceptions.ValidationException; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.types.Types; + +/** + * Row key of a table. + * <p> + * Row key is a definition of table row uniqueness, + * similar to the concept of primary key in a relational database system. + * A row should be unique in a table based on the values of each {@link RowKeyField}. + * Iceberg itself does not enforce row uniqueness based on this key. + * It is leveraged by operations such as streaming upsert. + */ +public class RowKey implements Serializable { + + private static final RowKey NOT_IDENTIFIED = new RowKey(null, 0, ImmutableList.of()); + + private final Schema schema; + private final int keyId; + private final RowKeyField[] fields; + + private transient volatile List<RowKeyField> fieldList; + + private RowKey(Schema schema, int keyId, List<RowKeyField> fields) { + this.schema = schema; + this.keyId = keyId; + this.fields = fields.toArray(new RowKeyField[0]); + } + + /** + * Returns the {@link Schema} referenced by the row key + */ + public Schema schema() { + return schema; + } + + /** + * Returns the ID of the row key + */ + public int keyId() { + return keyId; + } + + /** + * Return the list of {@link RowKeyField} in the row key + * <p> + * Notice that the order of each field matters. + * 2 keys with the same set of fields but different order are viewed as different. + * The fields of the key should ideally be ordered based on the importance of each field + * to be leveraged by features like secondary index. + * + * @return the list of fields in the row key + */ + public List<RowKeyField> fields() { + return lazyFieldList(); + } + + private List<RowKeyField> lazyFieldList() { + if (fieldList == null) { + synchronized (this) { + if (fieldList == null) { + this.fieldList = ImmutableList.copyOf(fields); + } + } + } + + return fieldList; + } + + /** + * Checks whether this row key is equivalent to another ignoring the key ID. + * + * @param another a different row key + * @return true if this row key is equivalent to the given one + */ + public boolean sameRowKey(RowKey another) { + return Arrays.equals(fields, another.fields); + } + + /** + * Returns the initial default row key that has no field + */ + public static RowKey notIdentified() { + return NOT_IDENTIFIED; + } + + /** + * Returns true if the row key is the default one with no field + */ + public boolean isNotIdentified() { + return fields.length < 1; + } + + @Override + public boolean equals(Object other) { + if (this == other) { + return true; + } else if (other == null || getClass() != other.getClass()) { + return false; + } + + RowKey that = (RowKey) other; + return this.keyId == that.keyId && sameRowKey(that); + } + + @Override + public int hashCode() { + return 31 * Integer.hashCode(keyId) + Arrays.hashCode(fields); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("["); + for (RowKeyField field : fields) { + sb.append("\n"); + sb.append(" ").append(field); + } + if (fields.length > 0) { + sb.append("\n"); + } + sb.append("]"); + return sb.toString(); + } + + /** + * Creates a new {@link Builder row key builder} for the given {@link Schema}. + * + * @param schema a schema + * @return a row key builder for the given schema. + */ + public static Builder builderFor(Schema schema) { + return new Builder(schema); + } + + /** + * A builder to create valid {@link RowKey row key}. + * <p> + * Call {@link #builderFor(Schema)} to create a new builder. + */ + public static class Builder { + private final Schema schema; + private final List<RowKeyField> fields = Lists.newArrayList(); + // Default key ID is 1 because 0 is reserved for default + private int keyId = 1; + + private Builder(Schema schema) { + this.schema = schema; + } + + public Builder withKeyId(int id) { + ValidationException.check(id >= 0, "Row key id must not be less than 0"); + this.keyId = id; + return this; + } + + public Builder addField(String name) { + Types.NestedField column = schema.findField(name); + ValidationException.check(column != null, "Cannot find column with name %s in schema", name); Review comment: sounds good to me -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
