aokolnychyi commented on a change in pull request #2403: URL: https://github.com/apache/iceberg/pull/2403#discussion_r606746022
########## File path: core/src/main/java/org/apache/iceberg/SerializableTableFactory.java ########## @@ -0,0 +1,383 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg; + +import java.io.Serializable; +import java.util.List; +import java.util.Map; +import org.apache.hadoop.conf.Configuration; +import org.apache.iceberg.encryption.EncryptionManager; +import org.apache.iceberg.hadoop.HadoopFileIO; +import org.apache.iceberg.io.FileIO; +import org.apache.iceberg.io.LocationProvider; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; +import org.apache.iceberg.util.SerializableMap; + +/** + * A factory to create serializable tables. + */ +public class SerializableTableFactory { + + private SerializableTableFactory() { + } + + /** + * Creates a read-only serializable table that can be sent to other nodes in a cluster. + * <p> + * The created table will copy the original table state in a serializable manner and will not + * reflect any subsequent changes to the original table. + * <p> + * While this class captures the metadata file location that can be used to load the complete + * table metadata, it directly persists the current schema, spec, sort order, table properties + * to avoid reading the metadata file from other nodes to access frequently needed metadata. + * <p> + * This implementation assumes the passed instances of {@link FileIO}, {@link EncryptionManager}, + * {@link LocationProvider} are serializable. If you are serializing the table using a custom + * serialization framework like Kryo, those instances of {@link FileIO}, {@link EncryptionManager}, + * {@link LocationProvider} must be supported by that particular serialization framework. + * <p> + * <em>Note:</em> loading the complete metadata from a large number of nodes can overwhelm the storage. + * + * @param table the original table to copy the state from + * @return a read-only serializable table reflecting the current state of the original table + */ + public static Table copyOf(Table table) { Review comment: Query engines that need Kryo support will call this method manually. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
