Github user arina-ielchiieva commented on a diff in the pull request: https://github.com/apache/drill/pull/877#discussion_r133177759 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/Metadata.java --- @@ -543,78 +584,72 @@ private void readBlockMeta(String path, mapper.registerModule(serialModule); mapper.registerModule(module); mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); - FSDataInputStream is = fs.open(p); - - boolean alreadyCheckedModification = false; - boolean newMetadata = false; - - if (metaContext != null) { - alreadyCheckedModification = metaContext.getStatus(parentDirString); - } - - if (dirsOnly) { - parquetTableMetadataDirs = mapper.readValue(is, ParquetTableMetadataDirs.class); - logger.info("Took {} ms to read directories from directory cache file", timer.elapsed(TimeUnit.MILLISECONDS)); - timer.stop(); - parquetTableMetadataDirs.updateRelativePaths(parentDirString); - if (!alreadyCheckedModification && tableModified(parquetTableMetadataDirs.getDirectories(), p, parentDir, metaContext)) { - parquetTableMetadataDirs = - (createMetaFilesRecursively(Path.getPathWithoutSchemeAndAuthority(p.getParent()).toString())).getRight(); - newMetadata = true; - } - } else { - parquetTableMetadata = mapper.readValue(is, ParquetTableMetadataBase.class); - logger.info("Took {} ms to read metadata from cache file", timer.elapsed(TimeUnit.MILLISECONDS)); - timer.stop(); - if (parquetTableMetadata instanceof ParquetTableMetadata_v3) { - ((ParquetTableMetadata_v3) parquetTableMetadata).updateRelativePaths(parentDirString); - } - if (!alreadyCheckedModification && tableModified(parquetTableMetadata.getDirectories(), p, parentDir, metaContext)) { - parquetTableMetadata = - (createMetaFilesRecursively(Path.getPathWithoutSchemeAndAuthority(p.getParent()).toString())).getLeft(); - newMetadata = true; - } + try (FSDataInputStream is = fs.open(path)) { + boolean alreadyCheckedModification = false; + boolean newMetadata = false; + alreadyCheckedModification = metaContext.getStatus(metadataParentDirPath); + + if (dirsOnly) { + parquetTableMetadataDirs = mapper.readValue(is, ParquetTableMetadataDirs.class); + logger.info("Took {} ms to read directories from directory cache file", timer.elapsed(TimeUnit.MILLISECONDS)); + timer.stop(); + parquetTableMetadataDirs.updateRelativePaths(metadataParentDirPath); + if (!alreadyCheckedModification && tableModified(parquetTableMetadataDirs.getDirectories(), path, metadataParentDir, metaContext)) { + parquetTableMetadataDirs = + (createMetaFilesRecursively(Path.getPathWithoutSchemeAndAuthority(path.getParent()).toString())).getRight(); + newMetadata = true; + } + } else { + parquetTableMetadata = mapper.readValue(is, ParquetTableMetadataBase.class); + logger.info("Took {} ms to read metadata from cache file", timer.elapsed(TimeUnit.MILLISECONDS)); + timer.stop(); + if (new MetadataVersion(parquetTableMetadata.getMetadataVersion()).compareTo(new MetadataVersion(3, 0)) >= 0) { + ((ParquetTableMetadata_v3) parquetTableMetadata).updateRelativePaths(metadataParentDirPath); + } + if (!alreadyCheckedModification && tableModified(parquetTableMetadata.getDirectories(), path, metadataParentDir, metaContext)) { + parquetTableMetadata = + (createMetaFilesRecursively(Path.getPathWithoutSchemeAndAuthority(path.getParent()).toString())).getLeft(); + newMetadata = true; + } - // DRILL-5009: Remove the RowGroup if it is empty - List<? extends ParquetFileMetadata> files = parquetTableMetadata.getFiles(); - for (ParquetFileMetadata file : files) { - List<? extends RowGroupMetadata> rowGroups = file.getRowGroups(); - for (Iterator<? extends RowGroupMetadata> iter = rowGroups.iterator(); iter.hasNext(); ) { - RowGroupMetadata r = iter.next(); - if (r.getRowCount() == 0) { - iter.remove(); + // DRILL-5009: Remove the RowGroup if it is empty + List<? extends ParquetFileMetadata> files = parquetTableMetadata.getFiles(); + for (ParquetFileMetadata file : files) { + List<? extends RowGroupMetadata> rowGroups = file.getRowGroups(); + for (Iterator<? extends RowGroupMetadata> iter = rowGroups.iterator(); iter.hasNext(); ) { + RowGroupMetadata r = iter.next(); + if (r.getRowCount() == 0) { + iter.remove(); + } } } - } - - } - if (newMetadata && metaContext != null) { - // if new metadata files were created, invalidate the existing metadata context - metaContext.clear(); + } + if (newMetadata) { + // if new metadata files were created, invalidate the existing metadata context + metaContext.clear(); + } + } catch (IOException e) { + logger.error("Failed to read '{}' metadata file", path, e); + metaContext.setMetadataCacheCorrupted(true); } - } /** * Check if the parquet metadata needs to be updated by comparing the modification time of the directories with * the modification time of the metadata file * - * @param directories - * @param metaFilePath - * @return - * @throws IOException + * @param directories List of directories + * @param metaFilePath path of parquet metadata cache file + * @return true if metadata needs to be updated, false otherwise + * @throws IOException if some resources are not accessible */ - private boolean tableModified(List<String> directories, Path metaFilePath, - Path parentDir, - MetadataContext metaContext) + private boolean tableModified(List<String> directories, Path metaFilePath, Path parentDir, MetadataContext metaContext) throws IOException { Stopwatch timer = Stopwatch.createStarted(); - if (metaContext != null) { - metaContext.setStatus(parentDir.toString()); - } + metaContext.setStatus(parentDir.toString()); --- End diff -- `parentDir.toUri().getPath()`?
--- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---