VladRodionov commented on a change in pull request #623: HBASE-22749: 
Distributed MOB compactions
URL: https://github.com/apache/hbase/pull/623#discussion_r338216694
 
 

 ##########
 File path: 
hbase-server/src/main/java/org/apache/hadoop/hbase/mob/DefaultMobStoreCompactor.java
 ##########
 @@ -362,11 +508,375 @@ protected boolean performCompaction(FileDetails fd, 
InternalScanner scanner, Cel
         abortWriter(mobFileWriter);
       }
     }
+    // Commit or abort generational writers
+    if (mobWriters != null) {
+      for (StoreFileWriter w: mobWriters.getOutputWriters()) {
+        Long mobs = mobWriters.getMobCountForOutputWriter(w);
+        if (mobs != null && mobs > 0) {
+          mobRefSet.get().add(w.getPath().getName());
+          w.appendMetadata(fd.maxSeqId, major, mobs);
+          w.close();
+          mobStore.commitFile(w.getPath(), path);
+        } else {
+          abortWriter(w);
+        }
+      }
+    }
     mobStore.updateCellsCountCompactedFromMob(cellsCountCompactedFromMob);
     mobStore.updateCellsCountCompactedToMob(cellsCountCompactedToMob);
     mobStore.updateCellsSizeCompactedFromMob(cellsSizeCompactedFromMob);
     mobStore.updateCellsSizeCompactedToMob(cellsSizeCompactedToMob);
     progress.complete();
     return true;
   }
+
+  protected static String createKey(TableName tableName, String encodedName,
+      String columnFamilyName) {
+    return tableName.getNameAsString()+ "_" + encodedName + "_"+ 
columnFamilyName;
+  }
+
+  @Override
+  protected List<Path> commitWriter(StoreFileWriter writer, FileDetails fd,
+      CompactionRequestImpl request) throws IOException {
+    List<Path> newFiles = Lists.newArrayList(writer.getPath());
+    writer.appendMetadata(fd.maxSeqId, request.isAllFiles(), 
request.getFiles());
+    // Append MOB references
+    Set<String> refSet = mobRefSet.get();
+    writer.appendMobMetadata(refSet);
+    writer.close();
+    return newFiles;
+  }
+
+  private List<Path> getReferencedMobFiles(Collection<HStoreFile> storeFiles) {
+    Path mobDir = MobUtils.getMobFamilyPath(conf, store.getTableName(), 
store.getColumnFamilyName());
+    Set<String> mobSet = new HashSet<String>();
+    for (HStoreFile sf: storeFiles) {
+      byte[] value = sf.getMetadataValue(HStoreFile.MOB_FILE_REFS);
+      if (value != null) {
+        String s = new String(value);
+        String[] all = s.split(",");
+        Collections.addAll(mobSet, all);
+      }
+    }
+    List<Path> retList = new ArrayList<Path>();
+    for(String name: mobSet) {
+      retList.add(new Path(mobDir, name));
+    }
+    return retList;
+  }
+}
+
+class FileSelection implements Comparable<FileSelection> {
+
+  public final static String NULL_REGION = "";
+  private Path path;
+  private long earliestTs;
+  private Configuration conf;
+
+  public FileSelection(Path path, Configuration conf) throws IOException {
+    this.path = path;
+    this.conf = conf;
+    readEarliestTimestamp();
+  }
+
+  public  String getEncodedRegionName() {
+    String fileName = path.getName();
+    String[] parts = fileName.split("_");
+    if (parts.length == 2) {
+      return parts[1];
+    } else {
+      return NULL_REGION;
+    }
+  }
+
+  public Path getPath() {
+    return path;
+  }
+
+  public long getEarliestTimestamp() {
+    return earliestTs;
+  }
+
+  private void readEarliestTimestamp() throws IOException {
+    FileSystem fs = path.getFileSystem(conf);
+    HStoreFile sf = new HStoreFile(fs, path, conf, CacheConfig.DISABLED,
+      BloomType.NONE, true);
+    sf.initReader();
+    byte[] tsData = sf.getMetadataValue(HStoreFile.EARLIEST_PUT_TS);
+    if (tsData != null) {
+      this.earliestTs = Bytes.toLong(tsData);
+    }
+    sf.closeStoreFile(true);
+  }
+
+  @Override
+  public int compareTo(FileSelection o) {
+    if (this.earliestTs > o.earliestTs) {
+      return +1;
+    } else if (this.earliestTs == o.earliestTs) {
+      return 0;
+    } else {
+      return -1;
+    }
+  }
+
+}
+
+class Generations {
+
+  private List<Generation> generations;
+  private Configuration conf;
+
+  private Generations(List<Generation> gens, Configuration conf) {
+    this.generations = gens;
+    this.conf = conf;
+  }
+
+  List<CompactionSelection> getCompactionSelections() throws IOException {
+    int maxTotalFiles = 
this.conf.getInt(MobConstants.MOB_COMPACTION_MAX_TOTAL_FILES_KEY,
+                                         
MobConstants.DEFAULT_MOB_COMPACTION_MAX_TOTAL_FILES);
+    int currentTotal = 0;
+    List<CompactionSelection> list = new ArrayList<CompactionSelection>();
+
+    for (Generation g: generations) {
+      List<CompactionSelection> sel = g.getCompactionSelections(conf);
+      int size = getSize(sel);
+      if ((currentTotal + size > maxTotalFiles) && currentTotal > 0) {
+        break;
+      } else {
+        currentTotal += size;
+        list.addAll(sel);
+      }
+    }
+    return list;
+  }
+
+  private int getSize(List<CompactionSelection> sel) {
+    int size = 0;
+    for(CompactionSelection cs: sel) {
+      size += cs.size();
+    }
+    return size;
+  }
+
+  static Generations build(List<Path> files, Configuration conf) throws 
IOException {
+    Map <String, ArrayList<FileSelection>> map = new HashMap<String, 
ArrayList<FileSelection>>();
+    for(Path p: files) {
+      String key = getRegionNameFromFileName(p.getName());
+      ArrayList<FileSelection> list = map.get(key);
+      if (list == null) {
+        list = new ArrayList<FileSelection>();
+        map.put(key, list);
+      }
+      list.add(new FileSelection(p, conf));
+    }
+
+    List<Generation> gens = new ArrayList<Generation>();
+    for (String key: map.keySet()) {
+      Generation g = new Generation(key);
+      List<FileSelection> selFiles = map.get(key);
+      for(FileSelection fs: selFiles) {
+        g.addFile(fs);
+      }
+      gens.add(g);
+    }
+    // Sort all generation files one-by-one
+    for(Generation gg: gens) {
+      gg.sortFiles();
+    }
+    // Sort generations
+    Collections.sort(gens);
 
 Review comment:
   GEN0 files will have, by default, oldest timestamps, so they will come 
first. The one major assumption in new compaction also - is nobody touches MOB 
files except HBase system. If I got you correctly, you meant that somebody 
could touch MOB files and change their modification time? IN this case the 
system will fail, of course in several places, especially during cleaning MOB 
files, where we rely on correct timestamps. 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

Reply via email to