This is an automated email from the ASF dual-hosted git repository.
stoty pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/phoenix.git
The following commit(s) were added to refs/heads/master by this push:
new 80414aa6e8 PHOENIX-7331 Fix incompatibilities with HBASE-28644
80414aa6e8 is described below
commit 80414aa6e896ae364c3029bcabad192a2fc5f93f
Author: Istvan Toth <[email protected]>
AuthorDate: Tue Jun 18 07:27:30 2024 +0200
PHOENIX-7331 Fix incompatibilities with HBASE-28644
---
.../hbase/index/util/IndexManagementUtil.java | 13 +++---
.../apache/phoenix/util/PhoenixKeyValueUtil.java | 51 +++++++++++++++++++++-
2 files changed, 55 insertions(+), 9 deletions(-)
diff --git
a/phoenix-core-client/src/main/java/org/apache/phoenix/hbase/index/util/IndexManagementUtil.java
b/phoenix-core-client/src/main/java/org/apache/phoenix/hbase/index/util/IndexManagementUtil.java
index 48894b5306..3d8c111d8e 100644
---
a/phoenix-core-client/src/main/java/org/apache/phoenix/hbase/index/util/IndexManagementUtil.java
+++
b/phoenix-core-client/src/main/java/org/apache/phoenix/hbase/index/util/IndexManagementUtil.java
@@ -31,7 +31,6 @@ import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.hbase.KeyValueUtil;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Mutation;
import org.apache.hadoop.hbase.client.Put;
@@ -43,6 +42,7 @@ import org.apache.phoenix.hbase.index.covered.Batch;
import org.apache.phoenix.hbase.index.covered.data.LazyValueGetter;
import org.apache.phoenix.hbase.index.covered.update.ColumnReference;
import
org.apache.phoenix.hbase.index.scanner.ScannerBuilder.CoveredDeleteScanner;
+import org.apache.phoenix.util.PhoenixKeyValueUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -216,8 +216,7 @@ public class IndexManagementUtil {
}
/**
- * Batch all the {@link KeyValue}s in a collection of kvs by timestamp.
Updates any {@link KeyValue} with a
- * timestamp == {@link HConstants#LATEST_TIMESTAMP} to the timestamp at
the time the method is called.
+ * Batch all the {@link KeyValue}s in a collection of kvs by timestamp.
*
* @param kvs {@link KeyValue}s to break into batches
* @param batches to update with the given kvs
@@ -236,16 +235,16 @@ public class IndexManagementUtil {
}
/**
- * Batch all the {@link KeyValue}s in a {@link Mutation} by timestamp.
Updates any {@link KeyValue} with a timestamp
- * == {@link HConstants#LATEST_TIMESTAMP} to the timestamp at the time the
method is called.
- *
+ * Batch all the {@link KeyValue}s in a {@link Mutation} by timestamp.
+ *
* @param m {@link Mutation} from which to extract the {@link KeyValue}s
* @return the mutation, broken into batches and sorted in ascending order
(smallest first)
*/
public static Collection<Batch>
createTimestampBatchesFromMutation(Mutation m) {
Map<Long, Batch> batches = new HashMap<Long, Batch>();
for (List<Cell> family : m.getFamilyCellMap().values()) {
- List<KeyValue> familyKVs = KeyValueUtil.ensureKeyValues(family);
+ // TODO do we really need this to be on-heap ?
+ List<KeyValue> familyKVs =
PhoenixKeyValueUtil.ensureKeyValues(family);
createTimestampBatchesFromKeyValues(familyKVs, batches);
}
// sort the batches
diff --git
a/phoenix-core-client/src/main/java/org/apache/phoenix/util/PhoenixKeyValueUtil.java
b/phoenix-core-client/src/main/java/org/apache/phoenix/util/PhoenixKeyValueUtil.java
index 23dfc2a31d..a2bced47ee 100644
---
a/phoenix-core-client/src/main/java/org/apache/phoenix/util/PhoenixKeyValueUtil.java
+++
b/phoenix-core-client/src/main/java/org/apache/phoenix/util/PhoenixKeyValueUtil.java
@@ -17,6 +17,7 @@
*/
package org.apache.phoenix.util;
+import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
@@ -36,6 +37,8 @@ import org.apache.hadoop.hbase.KeyValueUtil;
import org.apache.hadoop.hbase.client.Mutation;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hbase.thirdparty.com.google.common.base.Function;
+import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
import org.apache.phoenix.compat.hbase.CompatUtil;
import org.apache.phoenix.execute.MutationState.MultiRowMutationState;
import org.apache.phoenix.execute.MutationState.RowMutationState;
@@ -210,13 +213,55 @@ public class PhoenixKeyValueUtil {
return size;
}
- public static KeyValue maybeCopyCell(Cell c) {
+ /**
+ * If c is not a KeyValue, cast it to KeyValue and return it.
+ * If c is a KeyValue, just return it
+ *
+ * @param c cell
+ * @return either c case to ExtendedCell, or its copy as a KeyValue
+ */
+ public static KeyValue ensureKeyValue(Cell c) {
// Same as KeyValueUtil, but HBase has deprecated this method. Avoid
depending on something
// that will likely be removed at some point in time.
if (c == null) return null;
// TODO Do we really want to return only KeyValues, or would it be
enough to
// copy ByteBufferExtendedCells to heap ?
// i.e can we avoid copying on-heap cells like
BufferedDataBlockEncoder.OnheapDecodedCell ?
+ if (c instanceof KeyValue) {
+ return (KeyValue) c;
+ } else {
+ return KeyValueUtil.copyToNewKeyValue(c);
+ }
+ }
+
+ /**
+ * Replace non-KeyValue cells with their KeyValue copies
+ *
+ * This ensures that all Cells are copied on-heap, but will do
+ * extra work for any non-KeyValue on-heap cells
+ *
+ * @param cells modified, its elements are replaced
+ * @return the modified input cells object, for convenience
+ */
+ public static List<KeyValue> ensureKeyValues(List<Cell> cells) {
+ List<KeyValue> lazyList = Lists.transform(cells, new Function<Cell,
KeyValue>() {
+ @Override
+ public KeyValue apply(Cell arg0) {
+ return ensureKeyValue(arg0);
+ }
+ });
+ return new ArrayList<>(lazyList);
+ }
+
+ public static KeyValue maybeCopyCell(Cell c) {
+ // Same as KeyValueUtil, but HBase has deprecated this method. Avoid
depending on something
+ // that will likely be removed at some point in time.
+ if (c == null) {
+ return null;
+ }
+ // TODO Do we really want to return only KeyValues, or would it be
enough to
+ // copy ByteBufferExtendedCells to heap ?
+ // i.e can we avoid copying on-heap cells like
BufferedDataBlockEncoder.OnheapDecodedCell ?
if (c instanceof KeyValue) {
return (KeyValue) c;
}
@@ -234,6 +279,7 @@ public class PhoenixKeyValueUtil {
ListIterator<Cell> cellsIt = cells.listIterator();
while (cellsIt.hasNext()) {
Cell c = cellsIt.next();
+ //FIXME this does not catch all off-heap cells
if (c instanceof ByteBufferExtendedCell) {
cellsIt.set(KeyValueUtil.copyToNewKeyValue(c));
}
@@ -259,7 +305,8 @@ public class PhoenixKeyValueUtil {
public static void setTimestamp(Mutation m, long timestamp) {
byte[] tsBytes = Bytes.toBytes(timestamp);
for (List<Cell> family : m.getFamilyCellMap().values()) {
- List<KeyValue> familyKVs =
org.apache.hadoop.hbase.KeyValueUtil.ensureKeyValues(family);
+ // TODO Do we really need to copy everything to the HEAP here ?
+ List<KeyValue> familyKVs = ensureKeyValues(family);
for (KeyValue kv : familyKVs) {
int tsOffset = kv.getTimestampOffset();
System.arraycopy(tsBytes, 0, kv.getBuffer(), tsOffset,
Bytes.SIZEOF_LONG);