Github user arina-ielchiieva commented on a diff in the pull request:

    https://github.com/apache/drill/pull/877#discussion_r133177759
  
    --- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/Metadata.java 
---
    @@ -543,78 +584,72 @@ private void readBlockMeta(String path,
         mapper.registerModule(serialModule);
         mapper.registerModule(module);
         mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, 
false);
    -    FSDataInputStream is = fs.open(p);
    -
    -    boolean alreadyCheckedModification = false;
    -    boolean newMetadata = false;
    -
    -    if (metaContext != null) {
    -      alreadyCheckedModification = metaContext.getStatus(parentDirString);
    -    }
    -
    -    if (dirsOnly) {
    -      parquetTableMetadataDirs = mapper.readValue(is, 
ParquetTableMetadataDirs.class);
    -      logger.info("Took {} ms to read directories from directory cache 
file", timer.elapsed(TimeUnit.MILLISECONDS));
    -      timer.stop();
    -      parquetTableMetadataDirs.updateRelativePaths(parentDirString);
    -      if (!alreadyCheckedModification && 
tableModified(parquetTableMetadataDirs.getDirectories(), p, parentDir, 
metaContext)) {
    -        parquetTableMetadataDirs =
    -            
(createMetaFilesRecursively(Path.getPathWithoutSchemeAndAuthority(p.getParent()).toString())).getRight();
    -        newMetadata = true;
    -      }
    -    } else {
    -      parquetTableMetadata = mapper.readValue(is, 
ParquetTableMetadataBase.class);
    -      logger.info("Took {} ms to read metadata from cache file", 
timer.elapsed(TimeUnit.MILLISECONDS));
    -      timer.stop();
    -      if (parquetTableMetadata instanceof ParquetTableMetadata_v3) {
    -        ((ParquetTableMetadata_v3) 
parquetTableMetadata).updateRelativePaths(parentDirString);
    -      }
    -      if (!alreadyCheckedModification && 
tableModified(parquetTableMetadata.getDirectories(), p, parentDir, 
metaContext)) {
    -        parquetTableMetadata =
    -            
(createMetaFilesRecursively(Path.getPathWithoutSchemeAndAuthority(p.getParent()).toString())).getLeft();
    -        newMetadata = true;
    -      }
    +    try (FSDataInputStream is = fs.open(path)) {
    +      boolean alreadyCheckedModification = false;
    +      boolean newMetadata = false;
    +        alreadyCheckedModification = 
metaContext.getStatus(metadataParentDirPath);
    +
    +      if (dirsOnly) {
    +        parquetTableMetadataDirs = mapper.readValue(is, 
ParquetTableMetadataDirs.class);
    +        logger.info("Took {} ms to read directories from directory cache 
file", timer.elapsed(TimeUnit.MILLISECONDS));
    +        timer.stop();
    +        
parquetTableMetadataDirs.updateRelativePaths(metadataParentDirPath);
    +        if (!alreadyCheckedModification && 
tableModified(parquetTableMetadataDirs.getDirectories(), path, 
metadataParentDir, metaContext)) {
    +          parquetTableMetadataDirs =
    +              
(createMetaFilesRecursively(Path.getPathWithoutSchemeAndAuthority(path.getParent()).toString())).getRight();
    +          newMetadata = true;
    +        }
    +      } else {
    +        parquetTableMetadata = mapper.readValue(is, 
ParquetTableMetadataBase.class);
    +        logger.info("Took {} ms to read metadata from cache file", 
timer.elapsed(TimeUnit.MILLISECONDS));
    +        timer.stop();
    +        if (new 
MetadataVersion(parquetTableMetadata.getMetadataVersion()).compareTo(new 
MetadataVersion(3, 0)) >= 0) {
    +          ((ParquetTableMetadata_v3) 
parquetTableMetadata).updateRelativePaths(metadataParentDirPath);
    +        }
    +        if (!alreadyCheckedModification && 
tableModified(parquetTableMetadata.getDirectories(), path, metadataParentDir, 
metaContext)) {
    +          parquetTableMetadata =
    +              
(createMetaFilesRecursively(Path.getPathWithoutSchemeAndAuthority(path.getParent()).toString())).getLeft();
    +          newMetadata = true;
    +        }
     
    -      // DRILL-5009: Remove the RowGroup if it is empty
    -      List<? extends ParquetFileMetadata> files = 
parquetTableMetadata.getFiles();
    -      for (ParquetFileMetadata file : files) {
    -        List<? extends RowGroupMetadata> rowGroups = file.getRowGroups();
    -        for (Iterator<? extends RowGroupMetadata> iter = 
rowGroups.iterator(); iter.hasNext(); ) {
    -          RowGroupMetadata r = iter.next();
    -          if (r.getRowCount() == 0) {
    -            iter.remove();
    +        // DRILL-5009: Remove the RowGroup if it is empty
    +        List<? extends ParquetFileMetadata> files = 
parquetTableMetadata.getFiles();
    +        for (ParquetFileMetadata file : files) {
    +          List<? extends RowGroupMetadata> rowGroups = file.getRowGroups();
    +          for (Iterator<? extends RowGroupMetadata> iter = 
rowGroups.iterator(); iter.hasNext(); ) {
    +            RowGroupMetadata r = iter.next();
    +            if (r.getRowCount() == 0) {
    +              iter.remove();
    +            }
               }
             }
    -      }
    -
    -    }
     
    -    if (newMetadata && metaContext != null) {
    -      // if new metadata files were created, invalidate the existing 
metadata context
    -      metaContext.clear();
    +      }
    +      if (newMetadata) {
    +        // if new metadata files were created, invalidate the existing 
metadata context
    +        metaContext.clear();
    +      }
    +    } catch (IOException e) {
    +      logger.error("Failed to read '{}' metadata file", path, e);
    +      metaContext.setMetadataCacheCorrupted(true);
         }
    -
       }
     
       /**
        * Check if the parquet metadata needs to be updated by comparing the 
modification time of the directories with
        * the modification time of the metadata file
        *
    -   * @param directories
    -   * @param metaFilePath
    -   * @return
    -   * @throws IOException
    +   * @param directories List of directories
    +   * @param metaFilePath path of parquet metadata cache file
    +   * @return true if metadata needs to be updated, false otherwise
    +   * @throws IOException if some resources are not accessible
        */
    -  private boolean tableModified(List<String> directories, Path 
metaFilePath,
    -      Path parentDir,
    -      MetadataContext metaContext)
    +  private boolean tableModified(List<String> directories, Path 
metaFilePath, Path parentDir, MetadataContext metaContext)
           throws IOException {
     
         Stopwatch timer = Stopwatch.createStarted();
     
    -    if (metaContext != null) {
    -      metaContext.setStatus(parentDir.toString());
    -    }
    +    metaContext.setStatus(parentDir.toString());
    --- End diff --
    
    `parentDir.toUri().getPath()`?


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---

Reply via email to