pvargacl commented on a change in pull request #1779: URL: https://github.com/apache/hive/pull/1779#discussion_r546924170
########## File path: ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java ########## @@ -994,18 +857,76 @@ public long getVisibilityTxnId() { public Path getBaseDirPath() { return baseDirPath; } - public static ParsedBase parseBase(Path path) { + + + + public static ParsedBaseLight parseBase(Path path) { String filename = path.getName(); if(!filename.startsWith(BASE_PREFIX)) { throw new IllegalArgumentException(filename + " does not start with " + BASE_PREFIX); } int idxOfv = filename.indexOf(VISIBILITY_PREFIX); if(idxOfv < 0) { - return new ParsedBase(Long.parseLong(filename.substring(BASE_PREFIX.length())), path); + return new ParsedBaseLight(Long.parseLong(filename.substring(BASE_PREFIX.length())), path); } - return new ParsedBase(Long.parseLong(filename.substring(BASE_PREFIX.length(), idxOfv)), + return new ParsedBaseLight(Long.parseLong(filename.substring(BASE_PREFIX.length(), idxOfv)), Long.parseLong(filename.substring(idxOfv + VISIBILITY_PREFIX.length())), path); } + + @Override + public String toString() { + return "Path: " + baseDirPath + "; writeId: " + + writeId + "; visibilityTxnId: " + visibilityTxnId; + } + } + /** + * In addition to {@link ParsedBaseLight} this knows if the data is in raw format, i.e. doesn't + * have acid metadata columns embedded in the files. To determine this in some cases + * requires looking at the footer of the data file which can be expensive so if this info is + * not needed {@link ParsedBaseLight} should be used. + */ + public static final class ParsedBase extends ParsedBaseLight { Review comment: ParsedBase represents a base directory with many files. AcidBaseFileInfo name is rather confusing for me, but it represent any datafile that could be in an acid table (original, bucketfile in base, bucketfile in delta) These are the "base" files for orc splits. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For additional commands, e-mail: gitbox-h...@hive.apache.org