tpalfy commented on a change in pull request #5381:
URL: https://github.com/apache/nifi/pull/5381#discussion_r744962782



##########
File path: 
nifi-commons/nifi-record-path/src/main/java/org/apache/nifi/record/path/RecordFieldRemover.java
##########
@@ -0,0 +1,169 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nifi.record.path;
+
+import org.apache.nifi.serialization.record.Record;
+import org.apache.nifi.serialization.record.RecordFieldRemovalPath;
+import org.apache.nifi.serialization.record.RecordSchema;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Optional;
+import java.util.stream.Collectors;
+
+public class RecordFieldRemover {
+    private Record record;
+
+    public RecordFieldRemover(Record record) {
+        this.record = record;
+    }
+
+    public Record remove(String path) {
+        RecordPath recordPath = new RecordPath(path);
+        RecordPathResult recordPathResult = 
org.apache.nifi.record.path.RecordPath.compile(recordPath.toString()).evaluate(record);
+        List<FieldValue> selectedFields = 
recordPathResult.getSelectedFields().collect(Collectors.toList());
+
+        if (recordPath.isAppliedToAllElementsInCollection()) {
+            removeAllElementsFromCollection(selectedFields);
+        } else {
+            selectedFields.forEach(field -> field.remove());
+        }
+
+        if (recordPath.pathRemovalRequiresSchemaModification()) {
+            modifySchema(selectedFields);
+        }
+
+        record.regenerateSchema();
+        return record;
+    }
+
+    private void removeAllElementsFromCollection(List<FieldValue> 
selectedFields) {
+        if (!selectedFields.isEmpty()) {
+            Optional<FieldValue> parentOptional = 
selectedFields.get(0).getParent();
+            if (parentOptional.isPresent()) {
+                FieldValue parent = parentOptional.get();
+                parent.removeContent();
+            }
+        }
+    }
+
+    private void modifySchema(List<FieldValue> selectedFields) {
+        List<RecordFieldRemovalPath> concretePaths = 
getConcretePaths(selectedFields);
+        removePathsFromSchema(concretePaths);
+    }
+
+    private List<RecordFieldRemovalPath> getConcretePaths(List<FieldValue> 
selectedFields) {
+        List<RecordFieldRemovalPath> paths = new 
ArrayList<>(selectedFields.size());
+        for (FieldValue field : selectedFields) {
+            RecordFieldRemovalPath path = new RecordFieldRemovalPath();
+            addToPathIfNotRoot(field, path);
+
+            Optional<FieldValue> parentOptional = field.getParent();
+            while (parentOptional.isPresent()) {
+                FieldValue parent = parentOptional.get();
+                addToPathIfNotRoot(parent, path);
+                parentOptional = parent.getParent();
+            }
+
+            paths.add(path);
+        }
+        return paths;
+    }
+
+    private void removePathsFromSchema(List<RecordFieldRemovalPath> paths) {
+        for (RecordFieldRemovalPath path : paths) {
+            RecordSchema schema = record.getSchema();
+            schema.removePath(path);
+        }
+    }
+
+    private void addToPathIfNotRoot(FieldValue field, RecordFieldRemovalPath 
path) {
+        if (field.getParent().isPresent()) {
+            path.add(field.getField().getFieldName());
+        }
+    }
+
+    private static class RecordPath {
+        private String recordPath;
+
+        public RecordPath(final String recordPath) {
+            this.recordPath = preprocessRecordPath(recordPath);
+        }
+
+        public boolean isAppliedToAllElementsInCollection() {
+            return recordPath.endsWith("[*]") || 
recordPath.endsWith("[0..-1]");
+        }
+
+        @Override
+        public String toString() {
+            return recordPath;
+        }
+
+        private String preprocessRecordPath(final String recordPath) {
+            if (recordPath.endsWith("]")) {
+                return unifyRecordPathEnd(recordPath);
+            }
+            return recordPath;
+        }
+
+        private String unifyRecordPathEnd(final String recordPath) {
+            String lastSquareBracketsOperator = 
getLastSquareBracketsOperator(recordPath);
+            if (lastSquareBracketsOperator.equals("[*]")) {
+                return recordPath.substring(0, recordPath.lastIndexOf('[')) + 
"[*]";
+            } else if (lastSquareBracketsOperator.equals("[0..-1]")) {
+                return recordPath.substring(0, recordPath.lastIndexOf('[')) + 
"[0..-1]";
+            } else {
+                return recordPath;
+            }
+        }
+
+        private String getLastSquareBracketsOperator(final String recordPath) {
+            int beginIndex = recordPath.lastIndexOf('[');
+            return recordPath.substring(beginIndex).replaceAll("\\s","");
+        }
+
+        public boolean pathRemovalRequiresSchemaModification() {
+            return allSquareBracketsContainAsteriskOnly(recordPath);
+        }
+
+        private boolean allSquareBracketsContainAsteriskOnly(String 
recordPath) {
+            boolean allSquareBracketsContainAsteriskOnly = true;
+            boolean inSquareBrackets = false;
+            for (int i = 0; i < recordPath.length() && 
allSquareBracketsContainAsteriskOnly; ++i) {
+                char character = recordPath.charAt(i);
+                if (inSquareBrackets) {
+                    switch (character) {
+                        case ' ':
+                        case '*':
+                            break;
+                        case ']':
+                            inSquareBrackets = false;
+                            break;
+                        default:
+                            allSquareBracketsContainAsteriskOnly = false;
+                    }
+                } else {
+                    if (character == '[') {
+                        inSquareBrackets = true;
+                    }
+                }
+            }
+            return allSquareBracketsContainAsteriskOnly;
+        }
+    }

Review comment:
       This could be significantly simplified:
   
   ```suggestion
       private boolean isAppliedToAllElementsInCollection(String recordPath) {
           // ends with [*] or [0..-1]
           return recordPath.matches(".*\\[\\s*\\*\\s*\\]$") || 
recordPath.matches(".*\\[\\s*0\\s*\\.\\.\\s*\\-1\\s*\\]$");
       }
   
       private boolean pathRemovalRequiresSchemaModification(String recordPath) 
{
           boolean pathNotReferencesIndividualArrayElements = 
!recordPath.matches(".*\\[(\\s*\\d+(\\s*,\\s*)?)+\\].*");
           boolean pathNotReferencesIndividualMapElements = 
!recordPath.matches(".*\\[\\s*'.*'\\s*\\].*");
   
           return pathNotReferencesIndividualArrayElements && 
pathNotReferencesIndividualMapElements;
       }
   ```

##########
File path: 
nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestRemoveRecordField.java
##########
@@ -0,0 +1,424 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nifi.processors.standard;
+
+import org.apache.nifi.json.JsonRecordSetWriter;
+import org.apache.nifi.json.JsonTreeReader;
+import org.apache.nifi.reporting.InitializationException;
+import org.apache.nifi.schema.access.SchemaAccessUtils;
+import org.apache.nifi.util.MockFlowFile;
+import org.apache.nifi.util.TestRunner;
+import org.apache.nifi.util.TestRunners;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.util.HashMap;
+import java.util.Map;
+
+public class TestRemoveRecordField {
+
+    private TestRunner runner;
+
+    @Before
+    public void setup() throws InitializationException {
+        runner = TestRunners.newTestRunner(RemoveRecordField.class);
+    }
+
+    @Test
+    public void testRemoveSimpleFieldThatIsMissingFromOneRecord() throws 
InitializationException, IOException {
+        // GIVEN
+        String inputSchema = 
"src/test/resources/TestRemoveRecordField/input_schema/complex-person.avsc";
+        String inputFlowFile = 
"src/test/resources/TestRemoveRecordField/input/complex-person.json";
+        String outPutSchema = 
"src/test/resources/TestRemoveRecordField/output_schema/complex-person-no-dateOfBirth.avsc";
+        String outPutFlowFile = 
"src/test/resources/TestRemoveRecordField/output/complex-person-no-dateOfBirth.json";
+        String fieldToRemove = "/dateOfBirth";
+
+        executeRemovalTest(inputSchema, inputFlowFile, outPutSchema, 
outPutFlowFile, fieldToRemove);
+    }
+
+    @Test
+    public void testRemoveComplexFieldThatIsMissingFromOneRecord() throws 
InitializationException, IOException {
+        // GIVEN
+        String inputSchema = 
"src/test/resources/TestRemoveRecordField/input_schema/complex-person.avsc";
+        String inputFlowFile = 
"src/test/resources/TestRemoveRecordField/input/complex-person.json";
+        String outPutSchema = 
"src/test/resources/TestRemoveRecordField/output_schema/complex-person-no-workAddress.avsc";
+        String outPutFlowFile = 
"src/test/resources/TestRemoveRecordField/output/complex-person-no-workAddress.json";
+        String fieldToRemove = "/workAddress";
+
+        executeRemovalTest(inputSchema, inputFlowFile, outPutSchema, 
outPutFlowFile, fieldToRemove);
+    }
+
+    @Test
+    public void testRemoveFieldFromDeepStructure() throws 
InitializationException, IOException {
+        // GIVEN
+        String inputSchema = 
"src/test/resources/TestRemoveRecordField/input_schema/complex-person.avsc";
+        String inputFlowFile = 
"src/test/resources/TestRemoveRecordField/input/complex-person.json";
+        String outPutSchema = 
"src/test/resources/TestRemoveRecordField/output_schema/complex-person-no-workAddress-zip.avsc";
+        String outPutFlowFile = 
"src/test/resources/TestRemoveRecordField/output/complex-person-no-workAddress-zip.json";
+        String fieldToRemove = "/workAddress/zip";
+
+        executeRemovalTest(inputSchema, inputFlowFile, outPutSchema, 
outPutFlowFile, fieldToRemove);
+    }
+
+    @Test
+    public void testRemoveFieldFromDeepStructureWithRelativePath() throws 
InitializationException, IOException {
+        // GIVEN
+        String inputSchema = 
"src/test/resources/TestRemoveRecordField/input_schema/complex-person.avsc";
+        String inputFlowFile = 
"src/test/resources/TestRemoveRecordField/input/complex-person.json";
+        String outPutSchema = 
"src/test/resources/TestRemoveRecordField/output_schema/complex-person-no-zip.avsc";
+        String outPutFlowFile = 
"src/test/resources/TestRemoveRecordField/output/complex-person-no-zip.json";
+        String fieldToRemove = "//zip";
+
+        executeRemovalTest(inputSchema, inputFlowFile, outPutSchema, 
outPutFlowFile, fieldToRemove);
+    }
+
+    @Test
+    public void testRemoveFieldFrom3LevelDeepStructure() throws 
InitializationException, IOException {
+        // GIVEN
+        String inputSchema = 
"src/test/resources/TestRemoveRecordField/input_schema/complex-person.avsc";
+        String inputFlowFile = 
"src/test/resources/TestRemoveRecordField/input/complex-person.json";
+        String outPutSchema = 
"src/test/resources/TestRemoveRecordField/output_schema/complex-person-no-workAddress-building-letter.avsc";
+        String outPutFlowFile = 
"src/test/resources/TestRemoveRecordField/output/complex-person-no-workAddress-building-letter.json";
+        String fieldToRemove = "/workAddress/building/letter";
+
+        executeRemovalTest(inputSchema, inputFlowFile, outPutSchema, 
outPutFlowFile, fieldToRemove);
+    }
+

Review comment:
       I'd add two more tests around here:
   ```java
       @Test
       public void betterNamePending() throws InitializationException, 
IOException {
           // GIVEN
           String fieldToRemove = "/workAddress//letter";
   
           String inputSchema = 
"src/test/resources/TestRemoveRecordField/input_schema/complex-person.avsc";
           String outPutSchema = 
"src/test/resources/TestRemoveRecordField/output_schema/complex-person-no-workAddress-building-letter.avsc";
           
           String inputFlowFile = 
"src/test/resources/TestRemoveRecordField/input/complex-person.json";
           String outPutFlowFile = 
"src/test/resources/TestRemoveRecordField/output/complex-person-no-workAddress-building-letter.json";
   
           executeRemovalTest(inputSchema, inputFlowFile, outPutSchema, 
outPutFlowFile, fieldToRemove);
       }
   
       @Test
       public void betterNamePending() throws InitializationException, 
IOException {
           // GIVEN
           String fieldToRemove = "/workAddress/nonExistent/letter";
           
           String inputSchema = 
"src/test/resources/TestRemoveRecordField/input_schema/complex-person.avsc";
           String outPutSchema = 
"src/test/resources/TestRemoveRecordField/output_schema/complex-person.avsc";
           
           String inputFlowFile = 
"src/test/resources/TestRemoveRecordField/input/complex-person.json";
           String outPutFlowFile = 
"src/test/resources/TestRemoveRecordField/output/complex-person.json";
   
           executeRemovalTest(inputSchema, inputFlowFile, outPutSchema, 
outPutFlowFile, fieldToRemove);
       }
   ```




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to