VladRodionov commented on a change in pull request #623: HBASE-22749: 
Distributed MOB compactions
URL: https://github.com/apache/hbase/pull/623#discussion_r338214674
 
 

 ##########
 File path: 
hbase-server/src/main/java/org/apache/hadoop/hbase/mob/DefaultMobStoreCompactor.java
 ##########
 @@ -362,11 +508,375 @@ protected boolean performCompaction(FileDetails fd, 
InternalScanner scanner, Cel
         abortWriter(mobFileWriter);
       }
     }
+    // Commit or abort generational writers
+    if (mobWriters != null) {
+      for (StoreFileWriter w: mobWriters.getOutputWriters()) {
+        Long mobs = mobWriters.getMobCountForOutputWriter(w);
+        if (mobs != null && mobs > 0) {
+          mobRefSet.get().add(w.getPath().getName());
+          w.appendMetadata(fd.maxSeqId, major, mobs);
+          w.close();
+          mobStore.commitFile(w.getPath(), path);
+        } else {
+          abortWriter(w);
+        }
+      }
+    }
     mobStore.updateCellsCountCompactedFromMob(cellsCountCompactedFromMob);
     mobStore.updateCellsCountCompactedToMob(cellsCountCompactedToMob);
     mobStore.updateCellsSizeCompactedFromMob(cellsSizeCompactedFromMob);
     mobStore.updateCellsSizeCompactedToMob(cellsSizeCompactedToMob);
     progress.complete();
     return true;
   }
+
+  protected static String createKey(TableName tableName, String encodedName,
+      String columnFamilyName) {
+    return tableName.getNameAsString()+ "_" + encodedName + "_"+ 
columnFamilyName;
+  }
+
+  @Override
+  protected List<Path> commitWriter(StoreFileWriter writer, FileDetails fd,
+      CompactionRequestImpl request) throws IOException {
+    List<Path> newFiles = Lists.newArrayList(writer.getPath());
+    writer.appendMetadata(fd.maxSeqId, request.isAllFiles(), 
request.getFiles());
+    // Append MOB references
+    Set<String> refSet = mobRefSet.get();
+    writer.appendMobMetadata(refSet);
+    writer.close();
+    return newFiles;
+  }
+
+  private List<Path> getReferencedMobFiles(Collection<HStoreFile> storeFiles) {
+    Path mobDir = MobUtils.getMobFamilyPath(conf, store.getTableName(), 
store.getColumnFamilyName());
+    Set<String> mobSet = new HashSet<String>();
+    for (HStoreFile sf: storeFiles) {
+      byte[] value = sf.getMetadataValue(HStoreFile.MOB_FILE_REFS);
+      if (value != null) {
+        String s = new String(value);
+        String[] all = s.split(",");
+        Collections.addAll(mobSet, all);
+      }
+    }
+    List<Path> retList = new ArrayList<Path>();
+    for(String name: mobSet) {
+      retList.add(new Path(mobDir, name));
+    }
+    return retList;
+  }
+}
+
+class FileSelection implements Comparable<FileSelection> {
+
+  public final static String NULL_REGION = "";
+  private Path path;
+  private long earliestTs;
+  private Configuration conf;
+
+  public FileSelection(Path path, Configuration conf) throws IOException {
+    this.path = path;
+    this.conf = conf;
+    readEarliestTimestamp();
+  }
+
+  public  String getEncodedRegionName() {
+    String fileName = path.getName();
+    String[] parts = fileName.split("_");
+    if (parts.length == 2) {
+      return parts[1];
+    } else {
+      return NULL_REGION;
+    }
+  }
+
+  public Path getPath() {
+    return path;
+  }
+
+  public long getEarliestTimestamp() {
+    return earliestTs;
+  }
+
+  private void readEarliestTimestamp() throws IOException {
+    FileSystem fs = path.getFileSystem(conf);
+    HStoreFile sf = new HStoreFile(fs, path, conf, CacheConfig.DISABLED,
+      BloomType.NONE, true);
+    sf.initReader();
+    byte[] tsData = sf.getMetadataValue(HStoreFile.EARLIEST_PUT_TS);
+    if (tsData != null) {
+      this.earliestTs = Bytes.toLong(tsData);
+    }
+    sf.closeStoreFile(true);
+  }
+
+  @Override
+  public int compareTo(FileSelection o) {
+    if (this.earliestTs > o.earliestTs) {
+      return +1;
+    } else if (this.earliestTs == o.earliestTs) {
+      return 0;
+    } else {
+      return -1;
+    }
+  }
+
+}
+
+class Generations {
+
+  private List<Generation> generations;
+  private Configuration conf;
+
+  private Generations(List<Generation> gens, Configuration conf) {
+    this.generations = gens;
+    this.conf = conf;
+  }
+
+  List<CompactionSelection> getCompactionSelections() throws IOException {
+    int maxTotalFiles = 
this.conf.getInt(MobConstants.MOB_COMPACTION_MAX_TOTAL_FILES_KEY,
+                                         
MobConstants.DEFAULT_MOB_COMPACTION_MAX_TOTAL_FILES);
+    int currentTotal = 0;
+    List<CompactionSelection> list = new ArrayList<CompactionSelection>();
+
+    for (Generation g: generations) {
+      List<CompactionSelection> sel = g.getCompactionSelections(conf);
+      int size = getSize(sel);
+      if ((currentTotal + size > maxTotalFiles) && currentTotal > 0) {
+        break;
+      } else {
+        currentTotal += size;
+        list.addAll(sel);
+      }
+    }
+    return list;
+  }
+
+  private int getSize(List<CompactionSelection> sel) {
+    int size = 0;
+    for(CompactionSelection cs: sel) {
+      size += cs.size();
+    }
+    return size;
+  }
+
+  static Generations build(List<Path> files, Configuration conf) throws 
IOException {
+    Map <String, ArrayList<FileSelection>> map = new HashMap<String, 
ArrayList<FileSelection>>();
+    for(Path p: files) {
+      String key = getRegionNameFromFileName(p.getName());
+      ArrayList<FileSelection> list = map.get(key);
+      if (list == null) {
+        list = new ArrayList<FileSelection>();
+        map.put(key, list);
+      }
+      list.add(new FileSelection(p, conf));
+    }
+
+    List<Generation> gens = new ArrayList<Generation>();
+    for (String key: map.keySet()) {
+      Generation g = new Generation(key);
+      List<FileSelection> selFiles = map.get(key);
+      for(FileSelection fs: selFiles) {
+        g.addFile(fs);
+      }
+      gens.add(g);
+    }
+    // Sort all generation files one-by-one
+    for(Generation gg: gens) {
+      gg.sortFiles();
+    }
+    // Sort generations
+    Collections.sort(gens);
+    return new Generations(gens, conf);
+  }
+
+  static String getRegionNameFromFileName(String name) {
+    int index = name.lastIndexOf("_");
+    if (index < 0) return Generation.GEN0;
+    return name.substring(index+1);
+  }
+}
+
+class Generation implements Comparable<Generation> {
+
+  static final String GEN0 ="GEN0";
+  private String regionName;
+  private long earliestTs = Long.MAX_VALUE;
+  private List<FileSelection> files = new ArrayList<>();
+  List<CompactionSelection> compSelections;
+
+  public Generation(String name) {
+    this.regionName = name;
+  }
+
+  @SuppressWarnings("deprecation")
+  public List<CompactionSelection> getCompactionSelections(Configuration conf) 
throws IOException {
+
+
+    int minFiles = conf.getInt(MobConstants.MOB_COMPACTION_MIN_FILES_KEY,
+                                MobConstants.DEFAULT_MOB_COMPACTION_MIN_FILES);
+    int maxFiles = conf.getInt(MobConstants.MOB_COMPACTION_MAX_FILES_KEY,
+                                MobConstants.DEFAULT_MOB_COMPACTION_MAX_FILES);
+    long maxSelectionSize = 
conf.getLong(MobConstants.MOB_COMPACTION_MAX_SELECTION_SIZE_KEY,
+                    MobConstants.DEFAULT_MOB_COMPACTION_MAX_SELECTION_SIZE);
+    // Now it is ordered from oldest to newest ones
+    List<FileSelection> rfiles = Lists.reverse(files);
+    List<CompactionSelection> retList = new ArrayList<CompactionSelection>();
+    FileSystem fs = rfiles.get(0).getPath().getFileSystem(conf);
+    int off = 0;
+    while (off < rfiles.size()) {
+      if (fs.getLength(rfiles.get(off).getPath()) >= maxSelectionSize) {
+        off++; continue;
+      }
+      long selSize = 0;
+      int limit = Math.min(off + maxFiles, rfiles.size());
+      int start = off;
+      List<FileSelection> sel = new ArrayList<FileSelection>();
+      for (; off < limit; off++) {
+        Path p = rfiles.get(off).getPath();
+        long fSize = fs.getLength(p);
+        if (selSize + fSize < maxSelectionSize) {
+          selSize+= fSize;
+          sel.add(new FileSelection(p, conf));
+        } else {
+          if (sel.size() < minFiles) {
+            // discard
+            sel.clear();
+            // advance by 1
+            off = start +1;
+          } else {
+            // we have new selection
+            CompactionSelection cs = new CompactionSelection(sel);
+            retList.add(cs);
+            off++;
+          }
+          break; // continue outer loop
+        }
+      }
+    }
+    return retList;
+  }
+
+  public boolean addFile(FileSelection f) {
+    if (f.getEncodedRegionName().equals(regionName)) {
+      files.add(f);
+      if (f.getEarliestTimestamp() < earliestTs) {
+        earliestTs = f.getEarliestTimestamp();
+      }
+      return true;
+    } else {
+       return false;
+    }
+  }
+
+  public void sortFiles() {
+    Collections.sort(files);
+  }
+
+  public List<FileSelection> getFiles() {
+    return files;
+  }
+
+  public String getEncodedRegionName() {
+    return regionName;
+  }
+
+  public long getEarliestTimestamp() {
+    return earliestTs;
+  }
+
+  @Override
+  public int compareTo(Generation o) {
+    if (this.earliestTs > o.earliestTs) {
+      return +1;
+    } else if (this.earliestTs == o.earliestTs) {
+      return 0;
+    } else {
+      return -1;
+    }
+  }
+}
+
+class CompactionSelection {
+  private static AtomicLong idGen = new AtomicLong();
+  private List<FileSelection> files;
+  private long id;
+
+  public CompactionSelection(List<FileSelection> files) {
+    this.files = files;
+    this.id = idGen.getAndIncrement();
+  }
+
+  public List<FileSelection> getFiles() {
+    return files;
+  }
+
+  public long getId() {
+    return id;
+  }
+
+  int size() {
+    return files.size();
+  }
+
+}
+
+class OutputMobWriters {
+
+  /*
+   * Input MOB file name -> output file writer
+   */
+  private Map<String, StoreFileWriter> writerMap = new HashMap<String, 
StoreFileWriter>();
+  /*
+   * Output file name -> MOB counter
+   */
+  private Map<String, Long> mapMobCounts = new HashMap<String, Long>();
 
 Review comment:
   Fixed.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

Reply via email to