lorinlee commented on code in PR #5545:
URL: https://github.com/apache/hbase/pull/5545#discussion_r1761489274
##########
hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileWriter.java:
##########
@@ -256,156 +251,560 @@ public void appendMobMetadata(SetMultimap<TableName,
String> mobRefSet) throws I
public void appendTrackedTimestampsToMetadata() throws IOException {
// TODO: The StoreFileReader always converts the byte[] to TimeRange
// via TimeRangeTracker, so we should write the serialization data of
TimeRange directly.
- appendFileInfo(TIMERANGE_KEY,
TimeRangeTracker.toByteArray(timeRangeTracker));
- appendFileInfo(EARLIEST_PUT_TS, Bytes.toBytes(earliestPutTs));
+ liveFileWriter.appendTrackedTimestampsToMetadata();
+ if (historicalFileWriter != null) {
+ historicalFileWriter.appendTrackedTimestampsToMetadata();
+ }
}
- /**
- * Record the earlest Put timestamp. If the timeRangeTracker is not set,
update TimeRangeTracker
- * to include the timestamp of this key
- */
- public void trackTimestamps(final Cell cell) {
- if (KeyValue.Type.Put.getCode() == cell.getTypeByte()) {
- earliestPutTs = Math.min(earliestPutTs, cell.getTimestamp());
+ @Override
+ public void beforeShipped() throws IOException {
+ liveFileWriter.beforeShipped();
+ if (historicalFileWriter != null) {
+ historicalFileWriter.beforeShipped();
}
- timeRangeTracker.includeTimestamp(cell);
}
- private void appendGeneralBloomfilter(final Cell cell) throws IOException {
- if (this.generalBloomFilterWriter != null) {
- /*
- *
http://2.bp.blogspot.com/_Cib_A77V54U/StZMrzaKufI/AAAAAAAAADo/ZhK7bGoJdMQ/s400/KeyValue.png
- * Key = RowLen + Row + FamilyLen + Column [Family + Qualifier] +
Timestamp 3 Types of
- * Filtering: 1. Row = Row 2. RowCol = Row + Qualifier 3.
RowPrefixFixedLength = Fixed Length
- * Row Prefix
- */
- bloomContext.writeBloom(cell);
+ public Path getPath() {
+ return liveFileWriter.getPath();
+ }
+
+ public List<Path> getPaths() {
+ if (historicalFileWriter == null) {
+ return Lists.newArrayList(liveFileWriter.getPath());
}
+ return Lists.newArrayList(liveFileWriter.getPath(),
historicalFileWriter.getPath());
}
- private void appendDeleteFamilyBloomFilter(final Cell cell) throws
IOException {
- if (!PrivateCellUtil.isDeleteFamily(cell) &&
!PrivateCellUtil.isDeleteFamilyVersion(cell)) {
- return;
+ public boolean hasGeneralBloom() {
+ return liveFileWriter.hasGeneralBloom();
+ }
+
+ /**
+ * For unit testing only.
+ * @return the Bloom filter used by this writer.
+ */
+ BloomFilterWriter getGeneralBloomWriter() {
+ return liveFileWriter.generalBloomFilterWriter;
+ }
+
+ public void close() throws IOException {
+ liveFileWriter.appendFileInfo(HISTORICAL_KEY, Bytes.toBytes(false));
+ liveFileWriter.close();
+ if (historicalFileWriter != null) {
+ historicalFileWriter.appendFileInfo(HISTORICAL_KEY, Bytes.toBytes(true));
+ historicalFileWriter.close();
}
+ }
- // increase the number of delete family in the store file
- deleteFamilyCnt++;
- if (this.deleteFamilyBloomFilterWriter != null) {
- deleteFamilyBloomContext.writeBloom(cell);
+ public void appendFileInfo(byte[] key, byte[] value) throws IOException {
+ liveFileWriter.appendFileInfo(key, value);
+ if (historicalFileWriter != null) {
+ historicalFileWriter.appendFileInfo(key, value);
}
}
- @Override
- public void append(final Cell cell) throws IOException {
- appendGeneralBloomfilter(cell);
- appendDeleteFamilyBloomFilter(cell);
- writer.append(cell);
- trackTimestamps(cell);
+ /**
+ * For use in testing.
+ */
+ HFile.Writer getLiveFileWriter() {
+ return liveFileWriter.getHFileWriter();
}
- @Override
- public void beforeShipped() throws IOException {
- // For now these writer will always be of type ShipperListener true.
- // TODO : Change all writers to be specifically created for compaction
context
- writer.beforeShipped();
- if (generalBloomFilterWriter != null) {
- generalBloomFilterWriter.beforeShipped();
+ /**
+ * @param dir Directory to create file in.
+ * @return random filename inside passed <code>dir</code>
+ */
+ public static Path getUniqueFile(final FileSystem fs, final Path dir) throws
IOException {
+ if (!fs.getFileStatus(dir).isDirectory()) {
+ throw new IOException("Expecting " + dir.toString() + " to be a
directory");
}
- if (deleteFamilyBloomFilterWriter != null) {
- deleteFamilyBloomFilterWriter.beforeShipped();
+ return new Path(dir,
dash.matcher(UUID.randomUUID().toString()).replaceAll(""));
+ }
+
+ private SingleStoreFileWriter getHistoricalFileWriter() throws IOException {
+ if (historicalFileWriter == null) {
+ historicalFileWriter =
+ new SingleStoreFileWriter(fs, historicalFilePath, conf, cacheConf,
bloomType, maxKeys,
+ favoredNodes, fileContext, shouldDropCacheBehind,
compactedFilesSupplier);
}
+ return historicalFileWriter;
}
- public Path getPath() {
- return this.writer.getPath();
+ private void initRowState() {
+ deleteFamily = null;
+ deleteFamilyVersionList.clear();
+ lastCell = null;
}
- public boolean hasGeneralBloom() {
- return this.generalBloomFilterWriter != null;
+ private void initColumnState() {
+ livePutCellCount = 0;
+ deleteColumn = null;
+ deleteColumnVersionList.clear();
+
}
- /**
- * For unit testing only.
- * @return the Bloom filter used by this writer.
- */
- BloomFilterWriter getGeneralBloomWriter() {
- return generalBloomFilterWriter;
+ private boolean isDeletedByDeleteFamily(Cell cell) {
+ return deleteFamily != null && (deleteFamily.getTimestamp() >
cell.getTimestamp()
+ || (deleteFamily.getTimestamp() == cell.getTimestamp()
+ && (!newVersionBehavior || cell.getSequenceId() <
deleteFamily.getSequenceId())));
}
- private boolean closeBloomFilter(BloomFilterWriter bfw) throws IOException {
- boolean haveBloom = (bfw != null && bfw.getKeyCount() > 0);
- if (haveBloom) {
- bfw.compactBloom();
+ private boolean isDeletedByDeleteFamilyVersion(Cell cell) {
+ for (Cell deleteFamilyVersion : deleteFamilyVersionList) {
+ if (
+ deleteFamilyVersion.getTimestamp() == cell.getTimestamp()
+ && (!newVersionBehavior || cell.getSequenceId() <
deleteFamilyVersion.getSequenceId())
+ ) {
+ return true;
+ }
}
- return haveBloom;
+ return false;
}
- private boolean closeGeneralBloomFilter() throws IOException {
- boolean hasGeneralBloom = closeBloomFilter(generalBloomFilterWriter);
+ private boolean isDeletedByDeleteColumn(Cell cell) {
+ return deleteColumn != null && (deleteColumn.getTimestamp() >
cell.getTimestamp()
+ || (deleteColumn.getTimestamp() == cell.getTimestamp()
+ && (!newVersionBehavior || cell.getSequenceId() <
deleteColumn.getSequenceId())));
+ }
- // add the general Bloom filter writer and append file info
- if (hasGeneralBloom) {
- writer.addGeneralBloomFilter(generalBloomFilterWriter);
- writer.appendFileInfo(BLOOM_FILTER_TYPE_KEY,
Bytes.toBytes(bloomType.toString()));
- if (bloomParam != null) {
- writer.appendFileInfo(BLOOM_FILTER_PARAM_KEY, bloomParam);
+ private boolean isDeletedByDeleteColumnVersion(Cell cell) {
+ for (Cell deleteColumnVersion : deleteColumnVersionList) {
+ if (
+ deleteColumnVersion.getTimestamp() == cell.getTimestamp()
+ && (!newVersionBehavior || cell.getSequenceId() <
deleteColumnVersion.getSequenceId())
+ ) {
+ return true;
}
- bloomContext.addLastBloomKey(writer);
}
- return hasGeneralBloom;
+ return false;
}
- private boolean closeDeleteFamilyBloomFilter() throws IOException {
- boolean hasDeleteFamilyBloom =
closeBloomFilter(deleteFamilyBloomFilterWriter);
+ private boolean isDeleted(Cell cell) {
+ return isDeletedByDeleteFamily(cell) || isDeletedByDeleteColumn(cell)
+ || isDeletedByDeleteFamilyVersion(cell) ||
isDeletedByDeleteColumnVersion(cell);
+ }
- // add the delete family Bloom filter writer
- if (hasDeleteFamilyBloom) {
- writer.addDeleteFamilyBloomFilter(deleteFamilyBloomFilterWriter);
+ private void appendCell(Cell cell) throws IOException {
+ if ((lastCell == null || !CellUtil.matchingColumn(lastCell, cell))) {
+ initColumnState();
}
+ if (cell.getType() == Cell.Type.DeleteFamily) {
+ if (deleteFamily == null) {
+ deleteFamily = cell;
+ liveFileWriter.append(cell);
+ } else {
+ getHistoricalFileWriter().append(cell);
+ }
+ } else if (cell.getType() == Cell.Type.DeleteFamilyVersion) {
+ if (!isDeletedByDeleteFamily(cell)) {
+ deleteFamilyVersionList.add(cell);
+ if (deleteFamily != null && deleteFamily.getTimestamp() ==
cell.getTimestamp()) {
+ // This means both the delete-family and delete-family-version
markers have the same
+ // timestamp but the sequence id of delete-family-version marker is
higher than that of
+ // the delete-family marker. In this case, there is no need to add
the
+ // delete-family-version marker to the live version file. This case
happens only with
+ // the new version behavior.
+ liveFileWriter.append(cell);
Review Comment:
Hi @kadirozde . Is there any inconsistency between the code and the comments
here? The comment says `In this case, there is no need to add the
delete-family-version marker to the live version file.`, but the actual code
still writes to the liveFileWriter.
##########
hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileWriter.java:
##########
@@ -256,156 +251,560 @@ public void appendMobMetadata(SetMultimap<TableName,
String> mobRefSet) throws I
public void appendTrackedTimestampsToMetadata() throws IOException {
// TODO: The StoreFileReader always converts the byte[] to TimeRange
// via TimeRangeTracker, so we should write the serialization data of
TimeRange directly.
- appendFileInfo(TIMERANGE_KEY,
TimeRangeTracker.toByteArray(timeRangeTracker));
- appendFileInfo(EARLIEST_PUT_TS, Bytes.toBytes(earliestPutTs));
+ liveFileWriter.appendTrackedTimestampsToMetadata();
+ if (historicalFileWriter != null) {
+ historicalFileWriter.appendTrackedTimestampsToMetadata();
+ }
}
- /**
- * Record the earlest Put timestamp. If the timeRangeTracker is not set,
update TimeRangeTracker
- * to include the timestamp of this key
- */
- public void trackTimestamps(final Cell cell) {
- if (KeyValue.Type.Put.getCode() == cell.getTypeByte()) {
- earliestPutTs = Math.min(earliestPutTs, cell.getTimestamp());
+ @Override
+ public void beforeShipped() throws IOException {
+ liveFileWriter.beforeShipped();
+ if (historicalFileWriter != null) {
+ historicalFileWriter.beforeShipped();
}
- timeRangeTracker.includeTimestamp(cell);
}
- private void appendGeneralBloomfilter(final Cell cell) throws IOException {
- if (this.generalBloomFilterWriter != null) {
- /*
- *
http://2.bp.blogspot.com/_Cib_A77V54U/StZMrzaKufI/AAAAAAAAADo/ZhK7bGoJdMQ/s400/KeyValue.png
- * Key = RowLen + Row + FamilyLen + Column [Family + Qualifier] +
Timestamp 3 Types of
- * Filtering: 1. Row = Row 2. RowCol = Row + Qualifier 3.
RowPrefixFixedLength = Fixed Length
- * Row Prefix
- */
- bloomContext.writeBloom(cell);
+ public Path getPath() {
+ return liveFileWriter.getPath();
+ }
+
+ public List<Path> getPaths() {
+ if (historicalFileWriter == null) {
+ return Lists.newArrayList(liveFileWriter.getPath());
}
+ return Lists.newArrayList(liveFileWriter.getPath(),
historicalFileWriter.getPath());
}
- private void appendDeleteFamilyBloomFilter(final Cell cell) throws
IOException {
- if (!PrivateCellUtil.isDeleteFamily(cell) &&
!PrivateCellUtil.isDeleteFamilyVersion(cell)) {
- return;
+ public boolean hasGeneralBloom() {
+ return liveFileWriter.hasGeneralBloom();
+ }
+
+ /**
+ * For unit testing only.
+ * @return the Bloom filter used by this writer.
+ */
+ BloomFilterWriter getGeneralBloomWriter() {
+ return liveFileWriter.generalBloomFilterWriter;
+ }
+
+ public void close() throws IOException {
+ liveFileWriter.appendFileInfo(HISTORICAL_KEY, Bytes.toBytes(false));
+ liveFileWriter.close();
+ if (historicalFileWriter != null) {
+ historicalFileWriter.appendFileInfo(HISTORICAL_KEY, Bytes.toBytes(true));
+ historicalFileWriter.close();
}
+ }
- // increase the number of delete family in the store file
- deleteFamilyCnt++;
- if (this.deleteFamilyBloomFilterWriter != null) {
- deleteFamilyBloomContext.writeBloom(cell);
+ public void appendFileInfo(byte[] key, byte[] value) throws IOException {
+ liveFileWriter.appendFileInfo(key, value);
+ if (historicalFileWriter != null) {
+ historicalFileWriter.appendFileInfo(key, value);
}
}
- @Override
- public void append(final Cell cell) throws IOException {
- appendGeneralBloomfilter(cell);
- appendDeleteFamilyBloomFilter(cell);
- writer.append(cell);
- trackTimestamps(cell);
+ /**
+ * For use in testing.
+ */
+ HFile.Writer getLiveFileWriter() {
+ return liveFileWriter.getHFileWriter();
}
- @Override
- public void beforeShipped() throws IOException {
- // For now these writer will always be of type ShipperListener true.
- // TODO : Change all writers to be specifically created for compaction
context
- writer.beforeShipped();
- if (generalBloomFilterWriter != null) {
- generalBloomFilterWriter.beforeShipped();
+ /**
+ * @param dir Directory to create file in.
+ * @return random filename inside passed <code>dir</code>
+ */
+ public static Path getUniqueFile(final FileSystem fs, final Path dir) throws
IOException {
+ if (!fs.getFileStatus(dir).isDirectory()) {
+ throw new IOException("Expecting " + dir.toString() + " to be a
directory");
}
- if (deleteFamilyBloomFilterWriter != null) {
- deleteFamilyBloomFilterWriter.beforeShipped();
+ return new Path(dir,
dash.matcher(UUID.randomUUID().toString()).replaceAll(""));
+ }
+
+ private SingleStoreFileWriter getHistoricalFileWriter() throws IOException {
+ if (historicalFileWriter == null) {
+ historicalFileWriter =
+ new SingleStoreFileWriter(fs, historicalFilePath, conf, cacheConf,
bloomType, maxKeys,
+ favoredNodes, fileContext, shouldDropCacheBehind,
compactedFilesSupplier);
}
+ return historicalFileWriter;
}
- public Path getPath() {
- return this.writer.getPath();
+ private void initRowState() {
+ deleteFamily = null;
+ deleteFamilyVersionList.clear();
+ lastCell = null;
}
- public boolean hasGeneralBloom() {
- return this.generalBloomFilterWriter != null;
+ private void initColumnState() {
+ livePutCellCount = 0;
+ deleteColumn = null;
+ deleteColumnVersionList.clear();
+
}
- /**
- * For unit testing only.
- * @return the Bloom filter used by this writer.
- */
- BloomFilterWriter getGeneralBloomWriter() {
- return generalBloomFilterWriter;
+ private boolean isDeletedByDeleteFamily(Cell cell) {
+ return deleteFamily != null && (deleteFamily.getTimestamp() >
cell.getTimestamp()
+ || (deleteFamily.getTimestamp() == cell.getTimestamp()
+ && (!newVersionBehavior || cell.getSequenceId() <
deleteFamily.getSequenceId())));
}
- private boolean closeBloomFilter(BloomFilterWriter bfw) throws IOException {
- boolean haveBloom = (bfw != null && bfw.getKeyCount() > 0);
- if (haveBloom) {
- bfw.compactBloom();
+ private boolean isDeletedByDeleteFamilyVersion(Cell cell) {
+ for (Cell deleteFamilyVersion : deleteFamilyVersionList) {
+ if (
+ deleteFamilyVersion.getTimestamp() == cell.getTimestamp()
+ && (!newVersionBehavior || cell.getSequenceId() <
deleteFamilyVersion.getSequenceId())
+ ) {
+ return true;
+ }
}
- return haveBloom;
+ return false;
}
- private boolean closeGeneralBloomFilter() throws IOException {
- boolean hasGeneralBloom = closeBloomFilter(generalBloomFilterWriter);
+ private boolean isDeletedByDeleteColumn(Cell cell) {
+ return deleteColumn != null && (deleteColumn.getTimestamp() >
cell.getTimestamp()
+ || (deleteColumn.getTimestamp() == cell.getTimestamp()
+ && (!newVersionBehavior || cell.getSequenceId() <
deleteColumn.getSequenceId())));
+ }
- // add the general Bloom filter writer and append file info
- if (hasGeneralBloom) {
- writer.addGeneralBloomFilter(generalBloomFilterWriter);
- writer.appendFileInfo(BLOOM_FILTER_TYPE_KEY,
Bytes.toBytes(bloomType.toString()));
- if (bloomParam != null) {
- writer.appendFileInfo(BLOOM_FILTER_PARAM_KEY, bloomParam);
+ private boolean isDeletedByDeleteColumnVersion(Cell cell) {
+ for (Cell deleteColumnVersion : deleteColumnVersionList) {
+ if (
+ deleteColumnVersion.getTimestamp() == cell.getTimestamp()
+ && (!newVersionBehavior || cell.getSequenceId() <
deleteColumnVersion.getSequenceId())
+ ) {
+ return true;
}
- bloomContext.addLastBloomKey(writer);
}
- return hasGeneralBloom;
+ return false;
}
- private boolean closeDeleteFamilyBloomFilter() throws IOException {
- boolean hasDeleteFamilyBloom =
closeBloomFilter(deleteFamilyBloomFilterWriter);
+ private boolean isDeleted(Cell cell) {
+ return isDeletedByDeleteFamily(cell) || isDeletedByDeleteColumn(cell)
+ || isDeletedByDeleteFamilyVersion(cell) ||
isDeletedByDeleteColumnVersion(cell);
+ }
- // add the delete family Bloom filter writer
- if (hasDeleteFamilyBloom) {
- writer.addDeleteFamilyBloomFilter(deleteFamilyBloomFilterWriter);
+ private void appendCell(Cell cell) throws IOException {
+ if ((lastCell == null || !CellUtil.matchingColumn(lastCell, cell))) {
+ initColumnState();
}
+ if (cell.getType() == Cell.Type.DeleteFamily) {
+ if (deleteFamily == null) {
+ deleteFamily = cell;
+ liveFileWriter.append(cell);
+ } else {
+ getHistoricalFileWriter().append(cell);
+ }
+ } else if (cell.getType() == Cell.Type.DeleteFamilyVersion) {
+ if (!isDeletedByDeleteFamily(cell)) {
+ deleteFamilyVersionList.add(cell);
+ if (deleteFamily != null && deleteFamily.getTimestamp() ==
cell.getTimestamp()) {
+ // This means both the delete-family and delete-family-version
markers have the same
+ // timestamp but the sequence id of delete-family-version marker is
higher than that of
+ // the delete-family marker. In this case, there is no need to add
the
+ // delete-family-version marker to the live version file. This case
happens only with
+ // the new version behavior.
+ liveFileWriter.append(cell);
Review Comment:
Also I would like to ask, in this scenario, why don't we need to add the
delete-family-version marker to the live version file? Thx!
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]