aokolnychyi commented on a change in pull request #2564: URL: https://github.com/apache/iceberg/pull/2564#discussion_r630685453
########## File path: core/src/main/java/org/apache/iceberg/ReachableFileUtils.java ########## @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; +import org.apache.iceberg.io.FileIO; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class ReachableFileUtils { + + private static final Logger LOG = LoggerFactory.getLogger(ReachableFileUtils.class); + + private ReachableFileUtils() { + } + + /** + * Returns the location of version.text file + * @param table table whose version.text path needs to be retrieved + * @return the path to version.text Review comment: nit: `return the location of the version hint file` ########## File path: core/src/main/java/org/apache/iceberg/ReachableFileUtils.java ########## @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; +import org.apache.iceberg.io.FileIO; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class ReachableFileUtils { + + private static final Logger LOG = LoggerFactory.getLogger(ReachableFileUtils.class); + + private ReachableFileUtils() { + } + + /** + * Returns the location of version.text file + * @param table table whose version.text path needs to be retrieved + * @return the path to version.text + */ + public static String versionHintLocation(Table table) { + TableOperations ops = ((HasTableOperations) table).operations(); + return ops.metadataFileLocation("version-hint.text"); + } + + /** + * Returns the metadata.json files associated with {@code table} + * @param table table to get the metadata json files from + * @param recursive + * <p>When true, recursively retrieves all the reachable metadata.json files. + * <p>when false, gets the all the metadata.json files only from the current metadata. + * @return a list of paths to metadata files + */ + public static Set<String> metadataFileLocations(Table table, boolean recursive) { + Set<String> metadataFileLocations = new HashSet<>(); + TableOperations ops = ((HasTableOperations) table).operations(); + TableMetadata tableMetadata = ops.current(); + metadataFileLocations.add(tableMetadata.metadataFileLocation()); + metadataFileLocations(tableMetadata, metadataFileLocations, ops.io(), recursive); + return metadataFileLocations; + } + + private static void metadataFileLocations(TableMetadata metadata, Set<String> metaFiles, + FileIO io, boolean isRecursive) { + List<TableMetadata.MetadataLogEntry> metadataLogEntries = metadata.previousFiles(); + List<String> previousMetadataFiles = + metadataLogEntries.stream().map(TableMetadata.MetadataLogEntry::file) + .collect(Collectors.toList()); + if (previousMetadataFiles.size() > 0) { + metaFiles.addAll(previousMetadataFiles); + // Find the first existent metadata json file and recurse + if (isRecursive) { + for (String metadataFileLocation : previousMetadataFiles) { + try { + TableMetadata newMetadata = TableMetadataParser.read(io, metadataFileLocation); + metadataFileLocations(newMetadata, metaFiles, io, isRecursive); + break; + } catch (Exception e) { + LOG.error("Failed to load {}", metadataFileLocation, e); + } + } + } + } + } + + /** + * Returns all the path locations of all Manifest Lists for a given table Review comment: nit: `Returns locations of manifest lists in a table.` Let's add an empty line before params too. ########## File path: core/src/main/java/org/apache/iceberg/ReachableFileUtils.java ########## @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; +import org.apache.iceberg.io.FileIO; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class ReachableFileUtils { Review comment: nit: Iceberg mostly calls utility classes as `xxxUtil`, not `xxxUtils`. ########## File path: core/src/main/java/org/apache/iceberg/ReachableFileUtils.java ########## @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; +import org.apache.iceberg.io.FileIO; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class ReachableFileUtils { + + private static final Logger LOG = LoggerFactory.getLogger(ReachableFileUtils.class); + + private ReachableFileUtils() { + } + + /** + * Returns the location of version.text file Review comment: nit: `... of the version hint file.` Let's also add an empty line before the params for readability. ########## File path: core/src/main/java/org/apache/iceberg/ReachableFileUtils.java ########## @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; +import org.apache.iceberg.io.FileIO; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class ReachableFileUtils { + + private static final Logger LOG = LoggerFactory.getLogger(ReachableFileUtils.class); + + private ReachableFileUtils() { + } + + /** + * Returns the location of version.text file + * @param table table whose version.text path needs to be retrieved + * @return the path to version.text + */ + public static String versionHintLocation(Table table) { + TableOperations ops = ((HasTableOperations) table).operations(); + return ops.metadataFileLocation("version-hint.text"); + } + + /** + * Returns the metadata.json files associated with {@code table} + * @param table table to get the metadata json files from + * @param recursive + * <p>When true, recursively retrieves all the reachable metadata.json files. + * <p>when false, gets the all the metadata.json files only from the current metadata. + * @return a list of paths to metadata files + */ + public static Set<String> metadataFileLocations(Table table, boolean recursive) { + Set<String> metadataFileLocations = new HashSet<>(); + TableOperations ops = ((HasTableOperations) table).operations(); + TableMetadata tableMetadata = ops.current(); + metadataFileLocations.add(tableMetadata.metadataFileLocation()); + metadataFileLocations(tableMetadata, metadataFileLocations, ops.io(), recursive); + return metadataFileLocations; + } + + private static void metadataFileLocations(TableMetadata metadata, Set<String> metaFiles, + FileIO io, boolean isRecursive) { + List<TableMetadata.MetadataLogEntry> metadataLogEntries = metadata.previousFiles(); + List<String> previousMetadataFiles = Review comment: This creates a temp list that gets added to the set immediately. Let's not add the overhead. ########## File path: core/src/main/java/org/apache/iceberg/ReachableFileUtils.java ########## @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; +import org.apache.iceberg.io.FileIO; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class ReachableFileUtils { + + private static final Logger LOG = LoggerFactory.getLogger(ReachableFileUtils.class); + + private ReachableFileUtils() { + } + + /** + * Returns the location of version.text file + * @param table table whose version.text path needs to be retrieved + * @return the path to version.text + */ + public static String versionHintLocation(Table table) { + TableOperations ops = ((HasTableOperations) table).operations(); + return ops.metadataFileLocation("version-hint.text"); + } + + /** + * Returns the metadata.json files associated with {@code table} + * @param table table to get the metadata json files from + * @param recursive + * <p>When true, recursively retrieves all the reachable metadata.json files. + * <p>when false, gets the all the metadata.json files only from the current metadata. + * @return a list of paths to metadata files + */ + public static Set<String> metadataFileLocations(Table table, boolean recursive) { + Set<String> metadataFileLocations = new HashSet<>(); + TableOperations ops = ((HasTableOperations) table).operations(); + TableMetadata tableMetadata = ops.current(); + metadataFileLocations.add(tableMetadata.metadataFileLocation()); + metadataFileLocations(tableMetadata, metadataFileLocations, ops.io(), recursive); + return metadataFileLocations; + } + + private static void metadataFileLocations(TableMetadata metadata, Set<String> metaFiles, + FileIO io, boolean isRecursive) { + List<TableMetadata.MetadataLogEntry> metadataLogEntries = metadata.previousFiles(); + List<String> previousMetadataFiles = + metadataLogEntries.stream().map(TableMetadata.MetadataLogEntry::file) + .collect(Collectors.toList()); + if (previousMetadataFiles.size() > 0) { + metaFiles.addAll(previousMetadataFiles); + // Find the first existent metadata json file and recurse Review comment: I think we better give up if the oldest one is not reachable. Trying metadata files one by one will send a request each time. If the lineage is broken, that's not our fault and it will not impact correctness. ########## File path: core/src/main/java/org/apache/iceberg/ReachableFileUtils.java ########## @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; +import org.apache.iceberg.io.FileIO; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class ReachableFileUtils { + + private static final Logger LOG = LoggerFactory.getLogger(ReachableFileUtils.class); + + private ReachableFileUtils() { + } + + /** + * Returns the location of version.text file + * @param table table whose version.text path needs to be retrieved + * @return the path to version.text + */ + public static String versionHintLocation(Table table) { + TableOperations ops = ((HasTableOperations) table).operations(); + return ops.metadataFileLocation("version-hint.text"); + } + + /** + * Returns the metadata.json files associated with {@code table} + * @param table table to get the metadata json files from + * @param recursive + * <p>When true, recursively retrieves all the reachable metadata.json files. Review comment: Can we move this description into the main Javadoc? ########## File path: core/src/main/java/org/apache/iceberg/ReachableFileUtils.java ########## @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; +import org.apache.iceberg.io.FileIO; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class ReachableFileUtils { + + private static final Logger LOG = LoggerFactory.getLogger(ReachableFileUtils.class); + + private ReachableFileUtils() { + } + + /** + * Returns the location of version.text file + * @param table table whose version.text path needs to be retrieved + * @return the path to version.text + */ + public static String versionHintLocation(Table table) { + TableOperations ops = ((HasTableOperations) table).operations(); + return ops.metadataFileLocation("version-hint.text"); + } + + /** + * Returns the metadata.json files associated with {@code table} Review comment: nit: `Returns locations of JSON metadata files in a table.` Let's also add an empty line before params. ########## File path: core/src/main/java/org/apache/iceberg/ReachableFileUtils.java ########## @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; +import org.apache.iceberg.io.FileIO; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class ReachableFileUtils { + + private static final Logger LOG = LoggerFactory.getLogger(ReachableFileUtils.class); + + private ReachableFileUtils() { + } + + /** + * Returns the location of version.text file + * @param table table whose version.text path needs to be retrieved + * @return the path to version.text + */ + public static String versionHintLocation(Table table) { + TableOperations ops = ((HasTableOperations) table).operations(); + return ops.metadataFileLocation("version-hint.text"); + } + + /** + * Returns the metadata.json files associated with {@code table} + * @param table table to get the metadata json files from + * @param recursive + * <p>When true, recursively retrieves all the reachable metadata.json files. + * <p>when false, gets the all the metadata.json files only from the current metadata. + * @return a list of paths to metadata files + */ + public static Set<String> metadataFileLocations(Table table, boolean recursive) { + Set<String> metadataFileLocations = new HashSet<>(); + TableOperations ops = ((HasTableOperations) table).operations(); + TableMetadata tableMetadata = ops.current(); + metadataFileLocations.add(tableMetadata.metadataFileLocation()); + metadataFileLocations(tableMetadata, metadataFileLocations, ops.io(), recursive); + return metadataFileLocations; + } + + private static void metadataFileLocations(TableMetadata metadata, Set<String> metaFiles, Review comment: I think I still prefer something like [this](https://github.com/apache/iceberg/pull/2564#discussion_r629842136) without a separate method. ########## File path: core/src/main/java/org/apache/iceberg/ReachableFileUtils.java ########## @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; +import org.apache.iceberg.io.FileIO; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class ReachableFileUtils { + + private static final Logger LOG = LoggerFactory.getLogger(ReachableFileUtils.class); + + private ReachableFileUtils() { + } + + /** + * Returns the location of version.text file + * @param table table whose version.text path needs to be retrieved + * @return the path to version.text + */ + public static String versionHintLocation(Table table) { + TableOperations ops = ((HasTableOperations) table).operations(); + return ops.metadataFileLocation("version-hint.text"); + } + + /** + * Returns the metadata.json files associated with {@code table} + * @param table table to get the metadata json files from + * @param recursive + * <p>When true, recursively retrieves all the reachable metadata.json files. + * <p>when false, gets the all the metadata.json files only from the current metadata. + * @return a list of paths to metadata files Review comment: nit: `locations of JSON metadata files` ########## File path: core/src/main/java/org/apache/iceberg/ReachableFileUtils.java ########## @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; +import org.apache.iceberg.io.FileIO; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class ReachableFileUtils { + + private static final Logger LOG = LoggerFactory.getLogger(ReachableFileUtils.class); + + private ReachableFileUtils() { + } + + /** + * Returns the location of version.text file + * @param table table whose version.text path needs to be retrieved + * @return the path to version.text + */ + public static String versionHintLocation(Table table) { + TableOperations ops = ((HasTableOperations) table).operations(); + return ops.metadataFileLocation("version-hint.text"); + } + + /** + * Returns the metadata.json files associated with {@code table} + * @param table table to get the metadata json files from + * @param recursive + * <p>When true, recursively retrieves all the reachable metadata.json files. + * <p>when false, gets the all the metadata.json files only from the current metadata. + * @return a list of paths to metadata files + */ + public static Set<String> metadataFileLocations(Table table, boolean recursive) { + Set<String> metadataFileLocations = new HashSet<>(); Review comment: nit: `Sets.newHashSet()` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
