This is an automated email from the ASF dual-hosted git repository. mblow pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/asterixdb.git
commit 0e432a003a308203db19ea752c64a1bd457d851f Author: Ali Alsuliman <ali.al.solai...@gmail.com> AuthorDate: Tue Aug 25 21:35:22 2020 -0700 [NO ISSUE][RT] Fix upserting into secondary indexes - user model changes: no - storage format changes: no - interface changes: no Details: - do not upsert tuples that have a null/missing value in any SK field into secondary indexes (whether composite or non-composite ones). Change-Id: I9cc94de34b5ef1dfd5e1e7b6b9b35cc7316759ab Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/7443 Integration-Tests: Jenkins <jenk...@fulliautomatix.ics.uci.edu> Tested-by: Jenkins <jenk...@fulliautomatix.ics.uci.edu> Reviewed-by: Michael Blow <mb...@apache.org> --- .../asterix/common/storage/IndexCheckpoint.java | 2 +- .../LSMSecondaryUpsertOperatorNodePushable.java | 41 +++++++++++++--------- 2 files changed, 26 insertions(+), 17 deletions(-) diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/storage/IndexCheckpoint.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/storage/IndexCheckpoint.java index 3d0b9cb..878c94e 100644 --- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/storage/IndexCheckpoint.java +++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/storage/IndexCheckpoint.java @@ -38,7 +38,7 @@ public class IndexCheckpoint { private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private static final long INITIAL_CHECKPOINT_ID = 0; // TODO(mblow): remove this marker & related logic once we no longer are able to read indexes prior to the fix - private static final long HAS_NULL_MISSING_VALUES_FIX = -1; + private static final long HAS_NULL_MISSING_VALUES_FIX = -2; private long id; private long validComponentSequence; private long lowWatermark; diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/operators/LSMSecondaryUpsertOperatorNodePushable.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/operators/LSMSecondaryUpsertOperatorNodePushable.java index aa5775e..35ae904 100644 --- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/operators/LSMSecondaryUpsertOperatorNodePushable.java +++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/operators/LSMSecondaryUpsertOperatorNodePushable.java @@ -45,21 +45,20 @@ import org.apache.hyracks.storage.am.lsm.common.dataflow.LSMIndexInsertUpdateDel * This operator node is used for secondary indexes with upsert operations. * It works in the following way: * For each incoming tuple - * -If old secondary keys == new secondary keys + * -If old secondary index tuple == new secondary index tuple * --do nothing * -else - * --If old secondary keys are null? + * --If any old field is null/missing? * ---do nothing * --else - * ---delete old secondary keys - * --If new keys are null? + * ---delete old secondary index tuple + * --If any new field is null/missing? * ---do nothing * --else - * ---insert new keys + * ---insert new secondary index tuple */ public class LSMSecondaryUpsertOperatorNodePushable extends LSMIndexInsertUpdateDeleteOperatorNodePushable { - private static final int NULL_MISSING_FIELD_INDEX = 0; private final PermutingFrameTupleReference prevValueTuple = new PermutingFrameTupleReference(); private final int upsertIndicatorFieldIndex; private final IBinaryBooleanInspector upsertIndicatorInspector; @@ -105,9 +104,9 @@ public class LSMSecondaryUpsertOperatorNodePushable extends LSMIndexInsertUpdate tuple.reset(accessor, i); prevValueTuple.reset(accessor, i); - boolean isNewValueNullOrMissing = isNullOrMissing(tuple); - boolean isOldValueNullOrMissing = isNullOrMissing(prevValueTuple); - if (isNewValueNullOrMissing && isOldValueNullOrMissing) { + boolean newTupleHasNullOrMissing = hasNullOrMissing(tuple); + boolean oldTupleHasNullOrMissing = hasNullOrMissing(prevValueTuple); + if (newTupleHasNullOrMissing && oldTupleHasNullOrMissing) { // No op continue; } @@ -118,13 +117,13 @@ public class LSMSecondaryUpsertOperatorNodePushable extends LSMIndexInsertUpdate // which are always the same continue; } - if (!isOldValueNullOrMissing) { - // We need to delete previous + // if all old fields are known values, then delete. skip deleting if any is null or missing + if (!oldTupleHasNullOrMissing) { abstractModCallback.setOp(Operation.DELETE); lsmAccessor.forceDelete(prevValueTuple); } - if (isUpsert && !isNewValueNullOrMissing) { - // we need to insert the new value + // if all new fields are known values, then insert. skip inserting if any is null or missing + if (isUpsert && !newTupleHasNullOrMissing) { abstractModCallback.setOp(Operation.INSERT); lsmAccessor.forceInsert(tuple); } @@ -138,8 +137,18 @@ public class LSMSecondaryUpsertOperatorNodePushable extends LSMIndexInsertUpdate FrameUtils.flushFrame(writeBuffer.getBuffer(), writer); } - private static boolean isNullOrMissing(PermutingFrameTupleReference tuple) { - return TypeTagUtil.isType(tuple, NULL_MISSING_FIELD_INDEX, ATypeTag.SERIALIZED_NULL_TYPE_TAG) - || TypeTagUtil.isType(tuple, NULL_MISSING_FIELD_INDEX, ATypeTag.SERIALIZED_MISSING_TYPE_TAG); + private boolean hasNullOrMissing(PermutingFrameTupleReference tuple) { + int fieldCount = tuple.getFieldCount(); + for (int i = 0; i < fieldCount; i++) { + if (isNullOrMissing(tuple, i)) { + return true; + } + } + return false; + } + + private static boolean isNullOrMissing(PermutingFrameTupleReference tuple, int fieldIdx) { + return TypeTagUtil.isType(tuple, fieldIdx, ATypeTag.SERIALIZED_NULL_TYPE_TAG) + || TypeTagUtil.isType(tuple, fieldIdx, ATypeTag.SERIALIZED_MISSING_TYPE_TAG); } }