dan-s1 commented on code in PR #8229:
URL: https://github.com/apache/nifi/pull/8229#discussion_r1450508701
##########
nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ValidateCsv.java:
##########
@@ -619,18 +632,83 @@ public NifiCsvListReader(Reader reader, CsvPreference
preferences) {
super(reader, preferences);
}
- @Override
- public List<Object> read(CellProcessor... processors) throws
IOException {
+ public List<Object> read(boolean getAllViolations, CellProcessor...
processors) throws IOException {
if( processors == null ) {
throw new NullPointerException("Processors should not be
null");
}
if( readRow() ) {
- super.executeProcessors(new
ArrayList<Object>(getColumns().size()), processors);
+ executeProcessors(new ArrayList<Object>(getColumns().size()),
processors, getAllViolations);
return new ArrayList<Object>(getColumns());
Review Comment:
```suggestion
executeProcessors(new ArrayList<>(getColumns().size()),
processors, getAllViolations);
return new ArrayList<>(getColumns());
```
##########
nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ValidateCsv.java:
##########
@@ -619,18 +632,83 @@ public NifiCsvListReader(Reader reader, CsvPreference
preferences) {
super(reader, preferences);
}
- @Override
- public List<Object> read(CellProcessor... processors) throws
IOException {
+ public List<Object> read(boolean getAllViolations, CellProcessor...
processors) throws IOException {
if( processors == null ) {
throw new NullPointerException("Processors should not be
null");
}
if( readRow() ) {
- super.executeProcessors(new
ArrayList<Object>(getColumns().size()), processors);
+ executeProcessors(new ArrayList<Object>(getColumns().size()),
processors, getAllViolations);
Review Comment:
See earlier comment
```suggestion
executeProcessors(new
ArrayList<Object>(getColumns().size()), processors, allViolations);
```
##########
nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ValidateCsv.java:
##########
@@ -619,18 +632,83 @@ public NifiCsvListReader(Reader reader, CsvPreference
preferences) {
super(reader, preferences);
}
- @Override
- public List<Object> read(CellProcessor... processors) throws
IOException {
+ public List<Object> read(boolean getAllViolations, CellProcessor...
processors) throws IOException {
if( processors == null ) {
throw new NullPointerException("Processors should not be
null");
}
if( readRow() ) {
- super.executeProcessors(new
ArrayList<Object>(getColumns().size()), processors);
+ executeProcessors(new ArrayList<Object>(getColumns().size()),
processors, getAllViolations);
return new ArrayList<Object>(getColumns());
}
return null; // EOF
}
+ protected List<Object> executeProcessors(List<Object>
processedColumns, CellProcessor[] processors, boolean getAllViolations) {
+ this.executeCellProcessors(processedColumns, getColumns(),
processors, getLineNumber(), getRowNumber(), getAllViolations);
+ return processedColumns;
Review Comment:
See earlier comment
```suggestion
protected List<Object> executeProcessors(List<Object>
processedColumns, CellProcessor[] processors, boolean allViolations) {
this.executeCellProcessors(processedColumns, getColumns(),
processors, getLineNumber(), getRowNumber(), allViolations);
return processedColumns;
```
##########
nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ValidateCsv.java:
##########
@@ -619,18 +632,83 @@ public NifiCsvListReader(Reader reader, CsvPreference
preferences) {
super(reader, preferences);
}
- @Override
- public List<Object> read(CellProcessor... processors) throws
IOException {
+ public List<Object> read(boolean getAllViolations, CellProcessor...
processors) throws IOException {
if( processors == null ) {
throw new NullPointerException("Processors should not be
null");
}
if( readRow() ) {
- super.executeProcessors(new
ArrayList<Object>(getColumns().size()), processors);
+ executeProcessors(new ArrayList<Object>(getColumns().size()),
processors, getAllViolations);
return new ArrayList<Object>(getColumns());
}
return null; // EOF
}
+ protected List<Object> executeProcessors(List<Object>
processedColumns, CellProcessor[] processors, boolean getAllViolations) {
+ this.executeCellProcessors(processedColumns, getColumns(),
processors, getLineNumber(), getRowNumber(), getAllViolations);
+ return processedColumns;
+ }
+
+ private void executeCellProcessors(final List<Object> destination,
final List<?> source,
+ final CellProcessor[] processors, final int lineNo, final int
rowNo, boolean getAllViolations) {
+
+ if( destination == null ) {
+ throw new NullPointerException("destination should not be
null");
+ } else if( source == null ) {
+ throw new NullPointerException("source should not be
null");
+ } else if( processors == null ) {
+ throw new NullPointerException("processors should not be
null");
+ }
+
+ // the context used when cell processors report exceptions
+ final CsvContext context = new CsvContext(lineNo, rowNo, 1);
+ context.setRowSource(new ArrayList<Object>(source));
+
+ if( source.size() != processors.length ) {
+ throw new SuperCsvException(String.format(
+ "The number of columns to be processed (%d) must match
the number of CellProcessors (%d): check that the number"
+ + " of CellProcessors you have defined matches the
expected number of columns being read/written",
+ source.size(), processors.length), context);
+ }
+
+ destination.clear();
+
+ List<String> errors = new ArrayList<String>();
Review Comment:
```suggestion
List<String> errors = new ArrayList<>();
```
##########
nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ValidateCsv.java:
##########
@@ -174,6 +175,19 @@ public class ValidateCsv extends AbstractProcessor {
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.build();
+ public static final PropertyDescriptor GET_ALL_VIOLATIONS = new
PropertyDescriptor.Builder()
+ .name("validate-csv-violations")
+ .displayName("Get all violations")
+ .description("If true, the validation.error.message attribute
would contain the list of all the violations"
+ + " for the first invalid line. Note that setting this
property to true would slightly decrease"
+ + " the performances as all columns would be validated. If
false, a line is invalid as soon as a"
+ + " column is found violating the specified constraint and
only this violation for the first invalid"
+ + " line will be indicated in the validation.error.message
attribute.")
+ .required(true)
+ .allowableValues("true", "false")
+ .defaultValue("false")
+ .build();
Review Comment:
Verbs such as `get` are usually for methods. In my opinion I think
`ALL_VIOLATIONS` conveys the same point.
```suggestion
public static final PropertyDescriptor ALL_VIOLATIONS = new
PropertyDescriptor.Builder()
.name("validate-csv-violations")
.displayName("All violations")
.description("If true, the validation.error.message attribute
would contain the list of all the violations"
+ " for the first invalid line. Note that setting this
property to true would slightly decrease"
+ " the performances as all columns would be validated.
If false, a line is invalid as soon as a"
+ " column is found violating the specified constraint
and only this violation for the first invalid"
+ " line will be indicated in the
validation.error.message attribute.")
.required(true)
.allowableValues("true", "false")
.defaultValue("false")
.build();
```
##########
nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ValidateCsv.java:
##########
@@ -619,18 +632,83 @@ public NifiCsvListReader(Reader reader, CsvPreference
preferences) {
super(reader, preferences);
}
- @Override
- public List<Object> read(CellProcessor... processors) throws
IOException {
+ public List<Object> read(boolean getAllViolations, CellProcessor...
processors) throws IOException {
if( processors == null ) {
throw new NullPointerException("Processors should not be
null");
}
if( readRow() ) {
- super.executeProcessors(new
ArrayList<Object>(getColumns().size()), processors);
+ executeProcessors(new ArrayList<Object>(getColumns().size()),
processors, getAllViolations);
return new ArrayList<Object>(getColumns());
}
return null; // EOF
}
+ protected List<Object> executeProcessors(List<Object>
processedColumns, CellProcessor[] processors, boolean getAllViolations) {
+ this.executeCellProcessors(processedColumns, getColumns(),
processors, getLineNumber(), getRowNumber(), getAllViolations);
+ return processedColumns;
+ }
+
+ private void executeCellProcessors(final List<Object> destination,
final List<?> source,
+ final CellProcessor[] processors, final int lineNo, final int
rowNo, boolean getAllViolations) {
+
+ if( destination == null ) {
+ throw new NullPointerException("destination should not be
null");
+ } else if( source == null ) {
+ throw new NullPointerException("source should not be
null");
+ } else if( processors == null ) {
+ throw new NullPointerException("processors should not be
null");
+ }
+
+ // the context used when cell processors report exceptions
+ final CsvContext context = new CsvContext(lineNo, rowNo, 1);
+ context.setRowSource(new ArrayList<Object>(source));
Review Comment:
```suggestion
context.setRowSource(new ArrayList<>(source));
```
##########
nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ValidateCsv.java:
##########
@@ -446,6 +461,7 @@ public void onTrigger(final ProcessContext context, final
ProcessSession session
final String schema =
context.getProperty(SCHEMA).evaluateAttributeExpressions(flowFile).getValue();
final CellProcessor[] cellProcs = this.parseSchema(schema);
final boolean isWholeFFValidation =
context.getProperty(VALIDATION_STRATEGY).getValue().equals(VALIDATE_WHOLE_FLOWFILE.getValue());
+ final boolean getAllViolations =
context.getProperty(GET_ALL_VIOLATIONS).asBoolean();
Review Comment:
See earlier comment.
```suggestion
final boolean allViolations =
context.getProperty(ALL_VIOLATIONS).asBoolean();
```
##########
nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ValidateCsv.java:
##########
@@ -619,18 +632,83 @@ public NifiCsvListReader(Reader reader, CsvPreference
preferences) {
super(reader, preferences);
}
- @Override
- public List<Object> read(CellProcessor... processors) throws
IOException {
+ public List<Object> read(boolean getAllViolations, CellProcessor...
processors) throws IOException {
if( processors == null ) {
throw new NullPointerException("Processors should not be
null");
}
if( readRow() ) {
- super.executeProcessors(new
ArrayList<Object>(getColumns().size()), processors);
+ executeProcessors(new ArrayList<Object>(getColumns().size()),
processors, getAllViolations);
return new ArrayList<Object>(getColumns());
}
return null; // EOF
}
+ protected List<Object> executeProcessors(List<Object>
processedColumns, CellProcessor[] processors, boolean getAllViolations) {
+ this.executeCellProcessors(processedColumns, getColumns(),
processors, getLineNumber(), getRowNumber(), getAllViolations);
+ return processedColumns;
+ }
+
+ private void executeCellProcessors(final List<Object> destination,
final List<?> source,
+ final CellProcessor[] processors, final int lineNo, final int
rowNo, boolean getAllViolations) {
+
+ if( destination == null ) {
+ throw new NullPointerException("destination should not be
null");
+ } else if( source == null ) {
+ throw new NullPointerException("source should not be
null");
+ } else if( processors == null ) {
+ throw new NullPointerException("processors should not be
null");
+ }
Review Comment:
I do not think this whole block is needed.
- Following the code it seems to me `destination` will never be null as
there is always a new list instantiated on line 640.
- `source` I do not think can be null as it comes from the call of
`getColumns()` (line 647) which I see in `org.supercsv.io.AbstractCsvReader` on
line 20 that columns is not null since its instantiated with an empty list.
- `processors` do not need to be checked as this check was already done on
lines 636-638
```suggestion
```
##########
nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ValidateCsv.java:
##########
@@ -619,18 +632,83 @@ public NifiCsvListReader(Reader reader, CsvPreference
preferences) {
super(reader, preferences);
}
- @Override
- public List<Object> read(CellProcessor... processors) throws
IOException {
+ public List<Object> read(boolean getAllViolations, CellProcessor...
processors) throws IOException {
if( processors == null ) {
throw new NullPointerException("Processors should not be
null");
}
if( readRow() ) {
- super.executeProcessors(new
ArrayList<Object>(getColumns().size()), processors);
+ executeProcessors(new ArrayList<Object>(getColumns().size()),
processors, getAllViolations);
return new ArrayList<Object>(getColumns());
}
return null; // EOF
}
+ protected List<Object> executeProcessors(List<Object>
processedColumns, CellProcessor[] processors, boolean getAllViolations) {
+ this.executeCellProcessors(processedColumns, getColumns(),
processors, getLineNumber(), getRowNumber(), getAllViolations);
+ return processedColumns;
+ }
+
+ private void executeCellProcessors(final List<Object> destination,
final List<?> source,
+ final CellProcessor[] processors, final int lineNo, final int
rowNo, boolean getAllViolations) {
Review Comment:
See earlier comment
```suggestion
final CellProcessor[] processors, final int lineNo, final
int rowNo, boolean allViolations) {
```
##########
nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ValidateCsv.java:
##########
@@ -619,18 +632,83 @@ public NifiCsvListReader(Reader reader, CsvPreference
preferences) {
super(reader, preferences);
}
- @Override
- public List<Object> read(CellProcessor... processors) throws
IOException {
+ public List<Object> read(boolean getAllViolations, CellProcessor...
processors) throws IOException {
if( processors == null ) {
throw new NullPointerException("Processors should not be
null");
}
if( readRow() ) {
- super.executeProcessors(new
ArrayList<Object>(getColumns().size()), processors);
+ executeProcessors(new ArrayList<Object>(getColumns().size()),
processors, getAllViolations);
return new ArrayList<Object>(getColumns());
}
return null; // EOF
}
+ protected List<Object> executeProcessors(List<Object>
processedColumns, CellProcessor[] processors, boolean getAllViolations) {
+ this.executeCellProcessors(processedColumns, getColumns(),
processors, getLineNumber(), getRowNumber(), getAllViolations);
+ return processedColumns;
+ }
+
+ private void executeCellProcessors(final List<Object> destination,
final List<?> source,
+ final CellProcessor[] processors, final int lineNo, final int
rowNo, boolean getAllViolations) {
+
+ if( destination == null ) {
+ throw new NullPointerException("destination should not be
null");
+ } else if( source == null ) {
+ throw new NullPointerException("source should not be
null");
+ } else if( processors == null ) {
+ throw new NullPointerException("processors should not be
null");
+ }
+
+ // the context used when cell processors report exceptions
+ final CsvContext context = new CsvContext(lineNo, rowNo, 1);
+ context.setRowSource(new ArrayList<Object>(source));
+
+ if( source.size() != processors.length ) {
+ throw new SuperCsvException(String.format(
+ "The number of columns to be processed (%d) must match
the number of CellProcessors (%d): check that the number"
+ + " of CellProcessors you have defined matches the
expected number of columns being read/written",
+ source.size(), processors.length), context);
+ }
+
+ destination.clear();
+
+ List<String> errors = new ArrayList<String>();
+
+ for( int i = 0; i < source.size(); i++ ) {
+
+ try {
+
+ context.setColumnNumber(i + 1); // update context
(columns start at 1)
+
+ if( processors[i] == null ) {
+ destination.add(source.get(i)); // no processing
required
+ } else {
+
destination.add(processors[i].execute(source.get(i), context)); // execute the
processor chain
+ }
+
+ } catch (SuperCsvException e) {
+
+ if(getAllViolations) {
Review Comment:
See earlier comment
```suggestion
if(allViolations) {
```
##########
nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ValidateCsv.java:
##########
@@ -496,7 +512,7 @@ public void process(OutputStream out) throws IOException {
try {
// read next row and check if no more row
- stop = listReader.read(cellProcs) == null;
+ stop = listReader.read(getAllViolations &&
valid.get(), cellProcs) == null;
Review Comment:
See earlier comment
```suggestion
stop = listReader.read(allViolations &&
valid.get(), cellProcs) == null;
```
##########
nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestValidateCsv.java:
##########
@@ -111,18 +111,19 @@ public void testValidDateOptionalDouble() {
runner.setProperty(ValidateCsv.END_OF_LINE_CHARACTER, "\r\n");
runner.setProperty(ValidateCsv.QUOTE_CHARACTER, "\"");
runner.setProperty(ValidateCsv.HEADER, "false");
+ runner.setProperty(ValidateCsv.GET_ALL_VIOLATIONS, "true");
Review Comment:
See earlier comment
```suggestion
runner.setProperty(ValidateCsv.ALL_VIOLATIONS, "true");
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]