virajjasani commented on code in PR #1906: URL: https://github.com/apache/phoenix/pull/1906#discussion_r1709728727
########## phoenix-core-client/src/main/java/org/apache/phoenix/expression/util/bson/UpdateExpressionUtils.java: ########## @@ -0,0 +1,950 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.phoenix.expression.util.bson; + +import java.math.BigDecimal; +import java.text.NumberFormat; +import java.text.ParseException; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.bson.BsonArray; +import org.bson.BsonDecimal128; +import org.bson.BsonDocument; +import org.bson.BsonDouble; +import org.bson.BsonInt32; +import org.bson.BsonInt64; +import org.bson.BsonNumber; +import org.bson.BsonString; +import org.bson.BsonValue; +import org.bson.types.Decimal128; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * BSON Update Expression Utility to perform the Document updates. All update expressions + * provided by this utility supports operations on nested document fields. The field key can + * represent any top level or nested fields within the document. The caller should use "." + * notation for accessing nested document elements and "[n]" notation for accessing nested array + * elements. Top level fields do not require any additional character. + */ +public class UpdateExpressionUtils { + + private static final Logger LOGGER = LoggerFactory.getLogger(UpdateExpressionUtils.class); + + /** + * Updates the given document based on the update expression. + * <p/> + * { + * "$SET": { <field1>: <value1>, <field2>: <value2>, .... }, + * "$UNSET": { <field1>: null, <field2>: null, ... }, + * "$ADD": { <field1>: <value1>, <field2>: <value2>, .... }, + * "$DELETE_FROM_SET": { <field1>: <value1>, <field2>: <value2>, .... } + * } + * <p/> + * "$SET": Use the SET action in an update expression to add one or more fields to a BSON + * Document. If any of these fields already exists, they are overwritten by the new values. + * To perform multiple SET actions, provide multiple fields key-value entries within the nested + * document under $SET field key. + * "$UNSET": Use the UNSET action in an update expression to unset or remove one or more fields + * from a BSON Document. To perform multiple UNSET actions, provide multiple field key-value + * entries within the nested document under $UNSET field key. + * "$ADD": Use the ADD action in an update expression to add a new field and its values to a + * BSON document. If the field already exists, the behavior of ADD depends on the field's + * data type: + * <p/> + * 1. If the field is a number, and the value you are adding is also a number, the value is + * mathematically added to the existing field. + * <p/> + * 2. If the field is a set, and the value you are adding is also a set, the value is appended + * to the existing set. + * <p/> + * "$DELETE_FROM_SET": Use the DELETE action in an update expression to remove one or more + * elements from a set. To perform multiple DELETE actions, provide multiple field key-value + * entries within the nested document under $DELETE_FROM_SET field key. + * Definition of path and subset in the context of the expression: + * <p/> + * 1. The path element is the document path to a field. The field must be a set data type. + * 2. The subset is one or more elements that you want to delete from the given path. Subset + * must be of set type. + * <p/> + * + * @param updateExpression Update Expression as a document. + * @param bsonDocument Document contents to be updated. + */ + public static void updateExpression(final BsonDocument updateExpression, + final BsonDocument bsonDocument) { + + LOGGER.info("Update Expression: {} , current bsonDocument: {}", updateExpression, bsonDocument); + + if (updateExpression.containsKey("$SET")) { + executeSetExpression((BsonDocument) updateExpression.get("$SET"), bsonDocument); + } + + if (updateExpression.containsKey("$UNSET")) { + executeRemoveExpression((BsonDocument) updateExpression.get("$UNSET"), bsonDocument); + } + + if (updateExpression.containsKey("$ADD")) { + executeAddExpression((BsonDocument) updateExpression.get("$ADD"), bsonDocument); + } + + if (updateExpression.containsKey("$DELETE_FROM_SET")) { + executeDeleteExpression((BsonDocument) updateExpression.get("$DELETE_FROM_SET"), + bsonDocument); + } + } + + /** + * Update the given document by performing DELETE operation. This operation is applicable + * only on Set data structure. The document is updated by removing the given set of elements from + * the given set of elements. + * Let's say if the document field is of string set data type, and the elements are: + * {"yellow", "green", "red", "blue"}. The elements to be removed from the set are provided + * as {"blue", "yellow"} with the delete expression. The operation is expected to update the + * existing field by removing "blue" and "yellow" from the given set and the resultant set is + * expected to contain: {"green", "red"}. + * + * @param deleteExpr Delete Expression Document with key-value pairs. Key represents field in the + * given document, on which operation is to be performed. Value represents set of elements to be + * removed from the existing set. + * @param bsonDocument Document contents to be updated. + */ + private static void executeDeleteExpression(final BsonDocument deleteExpr, + final BsonDocument bsonDocument) { + for (Map.Entry<String, BsonValue> deleteEntry : deleteExpr.entrySet()) { + String fieldKey = deleteEntry.getKey(); + BsonValue newVal = deleteEntry.getValue(); + BsonValue topLevelValue = bsonDocument.get(fieldKey); + if (!isBsonSet(newVal)) { + throw new RuntimeException("Type of new value to be removed should be sets only"); + } + // If the top level field exists, perform the operation here and return. + if (topLevelValue != null) { + BsonValue value = modifyFieldValueByDelete(topLevelValue, newVal); + if (value == null) { + bsonDocument.remove(fieldKey); + } else { + bsonDocument.put(fieldKey, value); + } + } else if (!fieldKey.contains(".") && !fieldKey.contains("[")) { + LOGGER.info("Nothing to be removed as field with key {} does not exist", fieldKey); + } else { + // If the top level field does not exist and the field key contains "." or "[]" notations + // for nested document or nested array, use the self-recursive function to go through + // the document tree, one level at a time. + updateNestedFieldByDelete(bsonDocument, 0, fieldKey, newVal); + } + } + } + + /** + * Update the nested field with $DELETE_FROM_SET operation. The field key is expected to contain + * "." and/or "[]" notations for nested documents and/or nested array elements. This function + * keeps recursively calling itself until it reaches the leaf node in the given tree. + * For instance, for field key "category.subcategories.brands[5]", first the function + * evaluates and retrieves the value for top-level field "category". The value of "category" + * is expected to be nested document. First function call has value as full document, it retries + * nested document under "category" field and calls the function recursively with index value + * same as index value of first "." (dot) in the field key. For field key + * "category.subcategories.brands[5]", the index value would be 8. The second function call + * retrieves value of field key "subcategories", which is expected to be nested document. + * The third function call gets this nested document as BsonValue and index value as 22 as the + * field key has second "." (dot) notation at index 22. The third function call searches for + * field key "brands" and expects its value as nested array. The forth function call gets + * this nested array as BsonValue and index 29 as the field key has "[]" array notation starting + * at index 29. The forth function call retrieves value of nested array element at index 5. + * As the function is at leaf node in the tree, now it performs the $DELETE_FROM_SET operation + * as per the $DELETE_FROM_SET semantics. + * + * @param value Bson value at the given level of the document hierarchy. + * @param idx The index value for the given field key. The function is expected to retrieve + * the value of the nested document or array at the given level of the tree. + * @param fieldKey The full field key. + * @param setValuesToDelete The set values that need to be removed from the existing set. + */ + private static void updateNestedFieldByDelete(final BsonValue value, + final int idx, final String fieldKey, final BsonValue setValuesToDelete) { + int curIdx = idx; + if (fieldKey.charAt(curIdx) == '.') { Review Comment: Also, even if we use function to return ancestor node, for (deeply) nested arrays, the caller still needs the array index on which to perform the update operation so the caller will need to parse the field key string from the last index position to find the leaf node (nested array) index position and then perform the operation. Hence, this approach is also not any simpler than the current way of doing recursion (procedural way) in which while we return void, we do take care of updating the value at the target leaf node. The recursion followed by updating the target node is not expected to be buggy. The same applies to (deeply) nested documents too. While the callers gets the value of ancestor node, it still needs to reverse search the target leaf node key from the full field key. This way of doing recursion is actually looking way too complicated to read. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
