[ 
https://issues.apache.org/jira/browse/NIFI-3497?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15896382#comment-15896382
 ] 

ASF GitHub Bot commented on NIFI-3497:
--------------------------------------

Github user joewitt commented on a diff in the pull request:

    https://github.com/apache/nifi/pull/1564#discussion_r104316866
  
    --- Diff: 
nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ScanAttribute.java
 ---
    @@ -207,36 +253,62 @@ public void onTrigger(final ProcessContext context, 
final ProcessSession session
     
             final boolean matchAll = 
context.getProperty(MATCHING_CRITERIA).getValue().equals(MATCH_CRITERIA_ALL);
     
    -        for (final FlowFile flowFile : flowFiles) {
    -            final boolean matched = matchAll ? allMatch(flowFile, 
attributePattern, dictionaryTerms) : anyMatch(flowFile, attributePattern, 
dictionaryTerms);
    -            final Relationship relationship = matched ? REL_MATCHED : 
REL_UNMATCHED;
    +        for  (FlowFile flowFile : flowFiles) {
    +            final Map<String,String> matched = (matchAll ? 
matchAll(flowFile, attributePattern, dictionaryTerms) : matchAny(flowFile, 
attributePattern, dictionaryTerms));
    +            flowFile = session.putAllAttributes(flowFile, matched);
    +
    +            final Relationship relationship = (((matched.size() == 
(attributeNameMatches.size() * 3) && matchAll) || (matched.size() > 0 && 
!matchAll))) ? REL_MATCHED : REL_UNMATCHED;
                 session.getProvenanceReporter().route(flowFile, relationship);
                 session.transfer(flowFile, relationship);
                 logger.info("Transferred {} to {}", new Object[]{flowFile, 
relationship});
             }
         }
     
    -    private boolean allMatch(final FlowFile flowFile, final Pattern 
attributePattern, final Set<String> dictionary) {
    -        for (final Map.Entry<String, String> entry : 
flowFile.getAttributes().entrySet()) {
    -            if (attributePattern == null || 
attributePattern.matcher(entry.getKey()).matches()) {
    -                if (!dictionary.contains(entry.getValue())) {
    -                    return false;
    +    private Map<String,String> matchAny(final FlowFile flowFile, final 
Pattern attributePattern, final Map<String,String> dictionary) {
    +        Map<String,String> dictionaryTermMatches = new 
HashMap<String,String>();
    +        attributeNameMatches = new HashSet<String>();
    +
    +        int hitCounter = 0;
    +
    +        for (final Map.Entry<String, String> attribute : 
flowFile.getAttributes().entrySet()) {
    +            if (attributePattern == null || 
attributePattern.matcher(attribute.getKey()).matches()) {
    +                attributeNameMatches.add(attribute.getKey());
    +
    +                if (dictionary.containsKey(attribute.getValue())) {
    +                    hitCounter = setDictionaryTermMatch(dictionary, 
dictionaryTermMatches, hitCounter, attribute);
                     }
                 }
             }
    -
    -        return true;
    +        return dictionaryTermMatches;
         }
     
    -    private boolean anyMatch(final FlowFile flowFile, final Pattern 
attributePattern, final Set<String> dictionary) {
    -        for (final Map.Entry<String, String> entry : 
flowFile.getAttributes().entrySet()) {
    -            if (attributePattern == null || 
attributePattern.matcher(entry.getKey()).matches()) {
    -                if (dictionary.contains(entry.getValue())) {
    -                    return true;
    +    private Map<String,String> matchAll(final FlowFile flowFile, final 
Pattern attributePattern, final Map<String,String> dictionary) {
    +        Map<String,String> dictionaryTermMatches = new 
HashMap<String,String>();
    +        attributeNameMatches = new HashSet<String>();
    +
    +        int hitCounter = 0;
    +
    +        for (final Map.Entry<String, String> attribute : 
flowFile.getAttributes().entrySet()) {
    +            if (attributePattern == null || 
attributePattern.matcher(attribute.getKey()).matches()) {
    +                attributeNameMatches.add(attribute.getKey());
    +
    +                if (dictionary.containsKey(attribute.getValue())) {
    +                    hitCounter = setDictionaryTermMatch(dictionary, 
dictionaryTermMatches, hitCounter, attribute);
    --- End diff --
    
    avoid assigning hitCounter from the return.  Increment then call. 


> ScanAttribute should support tagging a flowfile with metadata value from the 
> supplied dictionary
> ------------------------------------------------------------------------------------------------
>
>                 Key: NIFI-3497
>                 URL: https://issues.apache.org/jira/browse/NIFI-3497
>             Project: Apache NiFi
>          Issue Type: Improvement
>            Reporter: Joseph Witt
>            Assignee: Joseph Witt
>
> Today ScanAttribute just looks through the supplied dictionary and given 
> object for a string matching hit.  If it hits then it is a match otherwise it 
> is a 'not found'.  However, when a hit occurs it can often be quite useful to 
> gather additional metadata about that hit.  This makes cases like 
> enrichment/tagging much easier.
> So, plan is to have ScanAttribute support a dictionary value demarcator which 
> would separate the dictionary term from some string response that will be 
> added to the flowfile.  For instance a dictionary might have
> apples:These are red or green
> bananas:These are yellow unless you should toss them or make bread
> Then if a hit occurs on 'apples' the flowfile that contained such an 
> attribute would have a new attribute such as 'dictionary.hit.term' = 'apple' 
> and 'dictionary.hit.metadata' = 'These are red or green'.
> This means downstream processors could extract that metadata and do 
> interesting things with it.



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)

Reply via email to