Repository: nifi Updated Branches: refs/heads/master 8f688d492 -> 2c9fb676c
HTML Parsing Processors Bundle NIFI-1156 HTML Parsing Processors Bundle Project: http://git-wip-us.apache.org/repos/asf/nifi/repo Commit: http://git-wip-us.apache.org/repos/asf/nifi/commit/c82fc18f Tree: http://git-wip-us.apache.org/repos/asf/nifi/tree/c82fc18f Diff: http://git-wip-us.apache.org/repos/asf/nifi/diff/c82fc18f Branch: refs/heads/master Commit: c82fc18f8e306c5a31345856e529cfd9fe4c81ef Parents: 56ad22a Author: Jeremy Dyer <[email protected]> Authored: Fri Nov 13 15:01:10 2015 -0500 Committer: Jeremy Dyer <[email protected]> Committed: Fri Nov 13 15:01:10 2015 -0500 ---------------------------------------------------------------------- nifi-assembly/pom.xml | 5 + .../nifi-html-bundle/nifi-html-nar/pom.xml | 41 +++ .../nifi-html-processors/pom.xml | 59 ++++ .../org/apache/nifi/AbstractHTMLProcessor.java | 120 +++++++ .../java/org/apache/nifi/GetHTMLElement.java | 243 ++++++++++++++ .../java/org/apache/nifi/ModifyHTMLElement.java | 164 ++++++++++ .../java/org/apache/nifi/PutHTMLElement.java | 150 +++++++++ .../org.apache.nifi.processor.Processor | 17 + .../java/org/apache/nifi/AbstractHTMLTest.java | 74 +++++ .../org/apache/nifi/TestGetHTMLElement.java | 319 +++++++++++++++++++ .../org/apache/nifi/TestModifyHTMLElement.java | 223 +++++++++++++ .../org/apache/nifi/TestPutHTMLElement.java | 137 ++++++++ nifi-nar-bundles/nifi-html-bundle/pom.xml | 43 +++ nifi-nar-bundles/pom.xml | 3 +- pom.xml | 8 +- 15 files changed, 1604 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/nifi/blob/c82fc18f/nifi-assembly/pom.xml ---------------------------------------------------------------------- diff --git a/nifi-assembly/pom.xml b/nifi-assembly/pom.xml index 961349f..593de99 100644 --- a/nifi-assembly/pom.xml +++ b/nifi-assembly/pom.xml @@ -164,6 +164,11 @@ language governing permissions and limitations under the License. --> </dependency> <dependency> <groupId>org.apache.nifi</groupId> + <artifactId>nifi-html-nar</artifactId> + <type>nar</type> + </dependency> + <dependency> + <groupId>org.apache.nifi</groupId> <artifactId>nifi-kite-nar</artifactId> <type>nar</type> </dependency> http://git-wip-us.apache.org/repos/asf/nifi/blob/c82fc18f/nifi-nar-bundles/nifi-html-bundle/nifi-html-nar/pom.xml ---------------------------------------------------------------------- diff --git a/nifi-nar-bundles/nifi-html-bundle/nifi-html-nar/pom.xml b/nifi-nar-bundles/nifi-html-bundle/nifi-html-nar/pom.xml new file mode 100644 index 0000000..fd23f7b --- /dev/null +++ b/nifi-nar-bundles/nifi-html-bundle/nifi-html-nar/pom.xml @@ -0,0 +1,41 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + + <parent> + <groupId>org.apache.nifi</groupId> + <artifactId>nifi-html-bundle</artifactId> + <version>0.4.0-SNAPSHOT</version> + </parent> + + <artifactId>nifi-html-nar</artifactId> + <packaging>nar</packaging> + + <dependencies> + <dependency> + <groupId>org.apache.nifi</groupId> + <artifactId>nifi-standard-services-api-nar</artifactId> + <type>nar</type> + </dependency> + <dependency> + <groupId>org.apache.nifi</groupId> + <artifactId>nifi-html-processors</artifactId> + <version>0.4.0-SNAPSHOT</version> + </dependency> + </dependencies> + +</project> http://git-wip-us.apache.org/repos/asf/nifi/blob/c82fc18f/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/pom.xml ---------------------------------------------------------------------- diff --git a/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/pom.xml b/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/pom.xml new file mode 100644 index 0000000..609d679 --- /dev/null +++ b/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/pom.xml @@ -0,0 +1,59 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + + <parent> + <groupId>org.apache.nifi</groupId> + <artifactId>nifi-html-bundle</artifactId> + <version>0.4.0-SNAPSHOT</version> + </parent> + + <artifactId>nifi-html-processors</artifactId> + <description>Support for parsing HTML documents</description> + + <dependencies> + <dependency> + <groupId>org.jsoup</groupId> + <artifactId>jsoup</artifactId> + <version>1.8.3</version> + </dependency> + <dependency> + <groupId>org.apache.nifi</groupId> + <artifactId>nifi-api</artifactId> + </dependency> + <dependency> + <groupId>org.apache.nifi</groupId> + <artifactId>nifi-processor-utils</artifactId> + </dependency> + <dependency> + <groupId>org.apache.nifi</groupId> + <artifactId>nifi-mock</artifactId> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.slf4j</groupId> + <artifactId>slf4j-simple</artifactId> + <scope>test</scope> + </dependency> + <dependency> + <groupId>junit</groupId> + <artifactId>junit</artifactId> + <version>4.11</version> + <scope>test</scope> + </dependency> + </dependencies> +</project> http://git-wip-us.apache.org/repos/asf/nifi/blob/c82fc18f/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/src/main/java/org/apache/nifi/AbstractHTMLProcessor.java ---------------------------------------------------------------------- diff --git a/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/src/main/java/org/apache/nifi/AbstractHTMLProcessor.java b/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/src/main/java/org/apache/nifi/AbstractHTMLProcessor.java new file mode 100644 index 0000000..49b4ffb --- /dev/null +++ b/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/src/main/java/org/apache/nifi/AbstractHTMLProcessor.java @@ -0,0 +1,120 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nifi; + +import org.apache.nifi.components.PropertyDescriptor; +import org.apache.nifi.flowfile.FlowFile; +import org.apache.nifi.processor.AbstractProcessor; +import org.apache.nifi.processor.ProcessContext; +import org.apache.nifi.processor.ProcessSession; +import org.apache.nifi.processor.Relationship; +import org.apache.nifi.processor.io.InputStreamCallback; +import org.apache.nifi.processor.util.StandardValidators; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; + +import java.io.IOException; +import java.io.InputStream; +import java.util.concurrent.atomic.AtomicReference; + +public abstract class AbstractHTMLProcessor extends AbstractProcessor { + + protected static final String ELEMENT_HTML = "HTML"; + protected static final String ELEMENT_TEXT = "Text"; + protected static final String ELEMENT_DATA = "Data"; + protected static final String ELEMENT_ATTRIBUTE = "Attribute"; + + public static final PropertyDescriptor URL = new PropertyDescriptor + .Builder().name("URL") + .description("Base URL for the HTML page being parsed.") + .required(true) + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .build(); + + public static final PropertyDescriptor CSS_SELECTOR = new PropertyDescriptor + .Builder().name("CSS Selector") + .description("CSS selector syntax string used to extract the desired HTML element(s).") + .required(true) + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .expressionLanguageSupported(true) + .build(); + + public static final PropertyDescriptor HTML_CHARSET = new PropertyDescriptor + .Builder().name("HTML character encoding") + .description("Character encoding of the input HTML") + .defaultValue("UTF-8") + .required(true) + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .build(); + + public static final Relationship REL_ORIGINAL = new Relationship.Builder() + .name("original") + .description("The original HTML input") + .build(); + + public static final Relationship REL_SUCCESS = new Relationship.Builder() + .name("success") + .description("Successfully parsed HTML element") + .build(); + + public static final Relationship REL_FAILURE = new Relationship.Builder() + .name("failure") + .description("Failed to parse HTML content") + .build(); + + public static final Relationship REL_INVALID_HTML = new Relationship.Builder() + .name("invalid html") + .description("The input HTML syntax is invalid") + .build(); + + public static final Relationship REL_NOT_FOUND = new Relationship.Builder() + .name("element not found") + .description("Element could not be found in the HTML document. The original HTML input will remain " + + "in the flowfile content unchanged. Relationship '" + REL_ORIGINAL + "' will not be invoked " + + "in this scenario.") + .build(); + + /** + * Parses the Jsoup HTML document from the FlowFile input content. + * + * @param inputFlowFile + * Input FlowFile containing the HTML + * + * @param context + * ProcessContext + * + * @param session + * ProcessSession + * + * @return + * Jsoup Document + */ + protected Document parseHTMLDocumentFromFlowfile(FlowFile inputFlowFile, + final ProcessContext context, + final ProcessSession session) { + final AtomicReference<Document> doc = new AtomicReference<>(); + session.read(inputFlowFile, new InputStreamCallback() { + @Override + public void process(InputStream inputStream) throws IOException { + doc.set(Jsoup.parse(inputStream, + context.getProperty(HTML_CHARSET).getValue(), + context.getProperty(URL).getValue())); + } + }); + return doc.get(); + } +} http://git-wip-us.apache.org/repos/asf/nifi/blob/c82fc18f/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/src/main/java/org/apache/nifi/GetHTMLElement.java ---------------------------------------------------------------------- diff --git a/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/src/main/java/org/apache/nifi/GetHTMLElement.java b/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/src/main/java/org/apache/nifi/GetHTMLElement.java new file mode 100644 index 0000000..63d457c --- /dev/null +++ b/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/src/main/java/org/apache/nifi/GetHTMLElement.java @@ -0,0 +1,243 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nifi; + +import org.apache.commons.lang3.StringUtils; +import org.apache.nifi.components.PropertyDescriptor; +import org.apache.nifi.flowfile.FlowFile; +import org.apache.nifi.processor.ProcessContext; +import org.apache.nifi.processor.ProcessSession; +import org.apache.nifi.processor.Relationship; +import org.apache.nifi.processor.ProcessorInitializationContext; +import org.apache.nifi.annotation.behavior.WritesAttribute; +import org.apache.nifi.annotation.behavior.WritesAttributes; +import org.apache.nifi.annotation.documentation.CapabilityDescription; +import org.apache.nifi.annotation.documentation.SeeAlso; +import org.apache.nifi.annotation.documentation.Tags; +import org.apache.nifi.processor.exception.ProcessException; +import org.apache.nifi.processor.io.StreamCallback; +import org.apache.nifi.processor.util.StandardValidators; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.List; +import java.util.Set; +import java.util.HashSet; +import java.util.Collections; + +@Tags({"get", "html", "dom", "css", "element"}) +@CapabilityDescription("Parses HTML input using CSS selector syntax and creates a new flowfile containing the extracted" + + " element content for each matching CSS selector.") +@SeeAlso({ModifyHTMLElement.class, PutHTMLElement.class}) +@WritesAttributes({@WritesAttribute(attribute="HTMLElement", description="Flowfile attribute where the element result" + + " parsed from the HTML using the CSS selector syntax are placed if the destination is a flowfile attribute.")}) +public class GetHTMLElement + extends AbstractHTMLProcessor { + + public static final String HTML_ELEMENT_ATTRIBUTE_NAME = "HTMLElement"; + public static final String DESTINATION_ATTRIBUTE = "flowfile-attribute"; + public static final String DESTINATION_CONTENT = "flowfile-content"; + + public static final PropertyDescriptor PREPEND_ELEMENT_VALUE = new PropertyDescriptor + .Builder().name("Prepend Element value") + .description("Prepends the specified value to the resulting Element") + .required(false) + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .expressionLanguageSupported(true) + .build(); + + public static final PropertyDescriptor APPEND_ELEMENT_VALUE = new PropertyDescriptor + .Builder().name("Append Element value") + .description("Appends the specified value to the resulting Element") + .required(false) + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .expressionLanguageSupported(true) + .build(); + + public static final PropertyDescriptor ATTRIBUTE_KEY = new PropertyDescriptor + .Builder().name("Attribute Name") + .description(("When getting the value of an element attribute this value is used as the key to determine" + + " which attribute on the selected element should be retrieved.")) + .required(false) + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .expressionLanguageSupported(true) + .build(); + + + public static final PropertyDescriptor OUTPUT_TYPE = new PropertyDescriptor.Builder() + .name("Output Type") + .description("Controls the type of value that is retrieved from the element. " + + ELEMENT_HTML + "," + ELEMENT_TEXT + ", " + ELEMENT_ATTRIBUTE + " or " + ELEMENT_DATA) + .required(true) + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .allowableValues(ELEMENT_HTML, ELEMENT_TEXT, ELEMENT_ATTRIBUTE, ELEMENT_DATA) + .defaultValue(ELEMENT_HTML) + .build(); + + public static final PropertyDescriptor DESTINATION = new PropertyDescriptor.Builder() + .name("Destination") + .description("Control if element extracted is written as a flowfile attribute or " + + "as flowfile content.") + .required(true) + .allowableValues(DESTINATION_ATTRIBUTE, DESTINATION_CONTENT) + .defaultValue(DESTINATION_ATTRIBUTE) + .build(); + + private List<PropertyDescriptor> descriptors; + + private Set<Relationship> relationships; + + @Override + protected void init(final ProcessorInitializationContext context) { + final List<PropertyDescriptor> descriptors = new ArrayList<>(); + descriptors.add(URL); + descriptors.add(CSS_SELECTOR); + descriptors.add(HTML_CHARSET); + descriptors.add(OUTPUT_TYPE); + descriptors.add(DESTINATION); + descriptors.add(PREPEND_ELEMENT_VALUE); + descriptors.add(APPEND_ELEMENT_VALUE); + descriptors.add(ATTRIBUTE_KEY); + this.descriptors = Collections.unmodifiableList(descriptors); + + final Set<Relationship> relationships = new HashSet<>(); + relationships.add(REL_ORIGINAL); + relationships.add(REL_SUCCESS); + relationships.add(REL_FAILURE); + relationships.add(REL_NOT_FOUND); + this.relationships = Collections.unmodifiableSet(relationships); + } + + @Override + public Set<Relationship> getRelationships() { + return this.relationships; + } + + @Override + public final List<PropertyDescriptor> getSupportedPropertyDescriptors() { + return descriptors; + } + + @Override + public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException { + final FlowFile flowFile = session.get(); + if ( flowFile == null ) { + return; + } + + try { + + final Document doc = parseHTMLDocumentFromFlowfile(flowFile, context, session); + final Elements eles = doc.select(context.getProperty(CSS_SELECTOR) + .evaluateAttributeExpressions().getValue()); + final String prependValue = context.getProperty(PREPEND_ELEMENT_VALUE) + .evaluateAttributeExpressions(flowFile).getValue(); + final String appendValue = context.getProperty(APPEND_ELEMENT_VALUE) + .evaluateAttributeExpressions(flowFile).getValue(); + + if (eles == null || eles.size() == 0) { + //No element found + session.transfer(flowFile, REL_NOT_FOUND); + } else { + for (final Element ele : eles) { + final FlowFile ff = session.create(); + + switch (context.getProperty(DESTINATION).getValue()) { + case DESTINATION_ATTRIBUTE: + final FlowFile atFlowfile = session.putAttribute(ff, HTML_ELEMENT_ATTRIBUTE_NAME, + extractElementValue( + prependValue, + context.getProperty(OUTPUT_TYPE).getValue(), + appendValue, + ele, + context.getProperty(ATTRIBUTE_KEY).evaluateAttributeExpressions() + .getValue())); + session.getProvenanceReporter().create(atFlowfile); + session.transfer(atFlowfile, REL_SUCCESS); + break; + case DESTINATION_CONTENT: + final FlowFile conFlowfile = session.write(ff, new StreamCallback() { + @Override + public void process(InputStream inputStream, OutputStream outputStream) throws IOException { + try { + outputStream.write(extractElementValue( + prependValue, + context.getProperty(OUTPUT_TYPE).getValue(), + appendValue, + ele, + context.getProperty(ATTRIBUTE_KEY).evaluateAttributeExpressions() + .getValue()).getBytes()); + } catch (Exception ex) { + session.transfer(ff, REL_FAILURE); + } + } + }); + + session.getProvenanceReporter().create(conFlowfile); + session.transfer(conFlowfile, REL_SUCCESS); + break; + } + + } + + //Transfer the original HTML + session.transfer(flowFile, REL_ORIGINAL); + } + + } catch (Exception ex) { + getLogger().error(ex.getMessage()); + session.transfer(flowFile, REL_FAILURE); + } + + } + + + /** + * Extracts the HTML value based on the configuration values. + * + * @return + * value from the parsed HTML element + */ + private String extractElementValue(String prependValue, String outputType, String appendValue, Element ele, + String attrKey) { + if (StringUtils.isEmpty(prependValue)) { + prependValue = ""; + } + if (StringUtils.isEmpty(appendValue)) { + appendValue = ""; + } + + switch (outputType) { + case ELEMENT_HTML: + return prependValue + ele.html() + appendValue; + case ELEMENT_TEXT: + return prependValue + ele.text() + appendValue; + case ELEMENT_DATA: + return prependValue + ele.data() + appendValue; + case ELEMENT_ATTRIBUTE: + return prependValue + ele.attr(attrKey) + appendValue; + default: + return prependValue + ele.html() + appendValue; + } + } + +} http://git-wip-us.apache.org/repos/asf/nifi/blob/c82fc18f/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/src/main/java/org/apache/nifi/ModifyHTMLElement.java ---------------------------------------------------------------------- diff --git a/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/src/main/java/org/apache/nifi/ModifyHTMLElement.java b/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/src/main/java/org/apache/nifi/ModifyHTMLElement.java new file mode 100644 index 0000000..425d8fa --- /dev/null +++ b/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/src/main/java/org/apache/nifi/ModifyHTMLElement.java @@ -0,0 +1,164 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nifi; + +import org.apache.nifi.annotation.behavior.WritesAttribute; +import org.apache.nifi.annotation.behavior.WritesAttributes; +import org.apache.nifi.annotation.documentation.CapabilityDescription; +import org.apache.nifi.annotation.documentation.SeeAlso; +import org.apache.nifi.annotation.documentation.Tags; +import org.apache.nifi.components.PropertyDescriptor; +import org.apache.nifi.flowfile.FlowFile; +import org.apache.nifi.processor.ProcessContext; +import org.apache.nifi.processor.ProcessSession; +import org.apache.nifi.processor.Relationship; +import org.apache.nifi.processor.ProcessorInitializationContext; +import org.apache.nifi.processor.exception.ProcessException; +import org.apache.nifi.processor.io.StreamCallback; +import org.apache.nifi.processor.util.StandardValidators; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.List; +import java.util.ArrayList; +import java.util.Set; +import java.util.HashSet; +import java.util.Collections; + +@Tags({"modify", "html", "dom", "css", "element"}) +@CapabilityDescription("Modifies the value of an existing HTML element in the original input HTML") +@SeeAlso({GetHTMLElement.class, PutHTMLElement.class}) +@WritesAttributes({@WritesAttribute(attribute="NumElementsModified", description="Total number of HTML " + + "element modifications made")}) +public class ModifyHTMLElement extends AbstractHTMLProcessor { + + public static final String NUM_ELEMENTS_MODIFIED_ATTR = "NumElementsModified"; + + public static final PropertyDescriptor OUTPUT_TYPE = new PropertyDescriptor.Builder() + .name("Output Type") + .description("Controls whether the HTML element is output as " + + ELEMENT_HTML + "," + ELEMENT_TEXT + " or " + ELEMENT_DATA) + .required(true) + .allowableValues(ELEMENT_HTML, ELEMENT_TEXT, ELEMENT_ATTRIBUTE) + .defaultValue(ELEMENT_HTML) + .build(); + + public static final PropertyDescriptor MODIFIED_VALUE = new PropertyDescriptor + .Builder().name("Modified Value") + .description("Value to update the found HTML elements with") + .required(true) + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .expressionLanguageSupported(true) + .build(); + + public static final PropertyDescriptor ATTRIBUTE_KEY = new PropertyDescriptor + .Builder().name("Attribute Name") + .description(("When modifying the value of an element attribute this value is used as the key to determine" + + " which attribute on the selected element will be modified with the new value.")) + .required(false) + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .expressionLanguageSupported(true) + .build(); + + private List<PropertyDescriptor> descriptors; + + private Set<Relationship> relationships; + + @Override + protected void init(final ProcessorInitializationContext context) { + final List<PropertyDescriptor> descriptors = new ArrayList<>(); + descriptors.add(URL); + descriptors.add(CSS_SELECTOR); + descriptors.add(HTML_CHARSET); + descriptors.add(OUTPUT_TYPE); + descriptors.add(MODIFIED_VALUE); + descriptors.add(ATTRIBUTE_KEY); + this.descriptors = Collections.unmodifiableList(descriptors); + + final Set<Relationship> relationships = new HashSet<Relationship>(); + relationships.add(REL_ORIGINAL); + relationships.add(REL_SUCCESS); + relationships.add(REL_FAILURE); + relationships.add(REL_INVALID_HTML); + relationships.add(REL_NOT_FOUND); + this.relationships = Collections.unmodifiableSet(relationships); + } + + @Override + public Set<Relationship> getRelationships() { + return this.relationships; + } + + @Override + public final List<PropertyDescriptor> getSupportedPropertyDescriptors() { + return descriptors; + } + + @Override + public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException { + final FlowFile flowFile = session.get(); + if (flowFile == null) { + return; + } + + try { + final Document doc = parseHTMLDocumentFromFlowfile(flowFile, context, session); + final Elements eles = doc.select(context.getProperty(CSS_SELECTOR).evaluateAttributeExpressions().getValue()); + + if (eles == null || eles.size() == 0) { + //No element found + session.transfer(flowFile, REL_NOT_FOUND); + } else { + for (Element ele : eles) { + switch (context.getProperty(OUTPUT_TYPE).getValue()) { + case ELEMENT_HTML: + ele.html(context.getProperty(MODIFIED_VALUE).evaluateAttributeExpressions().getValue()); + break; + case ELEMENT_ATTRIBUTE: + ele.attr(context.getProperty(ATTRIBUTE_KEY).evaluateAttributeExpressions().getValue(), + context.getProperty(MODIFIED_VALUE).evaluateAttributeExpressions().getValue()); + break; + case ELEMENT_TEXT: + ele.text(context.getProperty(MODIFIED_VALUE).evaluateAttributeExpressions().getValue()); + break; + } + } + + FlowFile ff = session.write(session.create(flowFile), new StreamCallback() { + @Override + public void process(InputStream in, OutputStream out) throws IOException { + out.write(doc.html().getBytes()); + } + }); + ff = session.putAttribute(ff, NUM_ELEMENTS_MODIFIED_ATTR, new Integer(eles.size()).toString()); + session.transfer(ff, REL_SUCCESS); + + //Transfer the original HTML + session.transfer(flowFile, REL_ORIGINAL); + } + + } catch (Exception ex) { + getLogger().error(ex.getMessage()); + session.transfer(flowFile, REL_FAILURE); + } + } + +} http://git-wip-us.apache.org/repos/asf/nifi/blob/c82fc18f/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/src/main/java/org/apache/nifi/PutHTMLElement.java ---------------------------------------------------------------------- diff --git a/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/src/main/java/org/apache/nifi/PutHTMLElement.java b/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/src/main/java/org/apache/nifi/PutHTMLElement.java new file mode 100644 index 0000000..f0a8c39 --- /dev/null +++ b/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/src/main/java/org/apache/nifi/PutHTMLElement.java @@ -0,0 +1,150 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nifi; + +import org.apache.nifi.annotation.documentation.CapabilityDescription; +import org.apache.nifi.annotation.documentation.SeeAlso; +import org.apache.nifi.annotation.documentation.Tags; +import org.apache.nifi.components.PropertyDescriptor; +import org.apache.nifi.flowfile.FlowFile; +import org.apache.nifi.processor.ProcessContext; +import org.apache.nifi.processor.ProcessSession; +import org.apache.nifi.processor.Relationship; +import org.apache.nifi.processor.ProcessorInitializationContext; +import org.apache.nifi.processor.exception.ProcessException; +import org.apache.nifi.processor.io.StreamCallback; +import org.apache.nifi.processor.util.StandardValidators; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.List; +import java.util.Set; +import java.util.HashSet; +import java.util.Collections; + +@Tags({"put", "html", "dom", "css", "element"}) +@CapabilityDescription("Creates a new HTML element in the input HTML") +@SeeAlso({GetHTMLElement.class, ModifyHTMLElement.class}) +public class PutHTMLElement extends AbstractHTMLProcessor { + + public static final String APPEND_ELEMENT = "append-html"; + public static final String PREPEND_ELEMENT = "prepend-html"; + + public static final PropertyDescriptor PUT_LOCATION_TYPE = new PropertyDescriptor.Builder() + .name("Element Insert Location Type") + .description("Controls whether the new element is prepended or appended to the children of the " + + "Element located by the CSS selector. EX: prepended value '<b>Hi</b>' inside of " + + "Element (using CSS Selector 'p') '<p>There</p>' would result in " + + "'<p><b>Hi</b>There</p>'. Appending the value would result in '<p>There<b>Hi</b></p>'") + .required(true) + .allowableValues(APPEND_ELEMENT, PREPEND_ELEMENT) + .defaultValue(APPEND_ELEMENT) + .build(); + + public static final PropertyDescriptor PUT_VALUE = new PropertyDescriptor.Builder() + .name("Put Value") + .description("Value used when creating the new Element. Value should be a valid HTML element. " + + "The text should be supplied unencoded: characters like '<', '>', etc will be properly HTML " + + "encoded in the output.") + .required(true) + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .expressionLanguageSupported(true) + .build(); + + private List<PropertyDescriptor> descriptors; + + private Set<Relationship> relationships; + + @Override + protected void init(final ProcessorInitializationContext context) { + final List<PropertyDescriptor> descriptors = new ArrayList<PropertyDescriptor>(); + descriptors.add(URL); + descriptors.add(CSS_SELECTOR); + descriptors.add(HTML_CHARSET); + descriptors.add(PUT_LOCATION_TYPE); + descriptors.add(PUT_VALUE); + this.descriptors = Collections.unmodifiableList(descriptors); + + final Set<Relationship> relationships = new HashSet<Relationship>(); + relationships.add(REL_ORIGINAL); + relationships.add(REL_SUCCESS); + relationships.add(REL_FAILURE); + relationships.add(REL_INVALID_HTML); + this.relationships = Collections.unmodifiableSet(relationships); + } + + @Override + public Set<Relationship> getRelationships() { + return this.relationships; + } + + @Override + public final List<PropertyDescriptor> getSupportedPropertyDescriptors() { + return descriptors; + } + + @Override + public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException { + final FlowFile flowFile = session.get(); + if (flowFile == null) { + return; + } + + try { + final Document doc = parseHTMLDocumentFromFlowfile(flowFile, context, session); + final Elements eles = doc.select(context.getProperty(CSS_SELECTOR).evaluateAttributeExpressions().getValue()); + + if (eles == null || eles.size() == 0) { + //No element found + session.transfer(flowFile, REL_NOT_FOUND); + } else { + for (Element ele : eles) { + switch (context.getProperty(PUT_LOCATION_TYPE).getValue()) { + case APPEND_ELEMENT: + ele.append(context.getProperty(PUT_VALUE).evaluateAttributeExpressions().getValue()); + break; + case PREPEND_ELEMENT: + ele.prepend(context.getProperty(PUT_VALUE).evaluateAttributeExpressions().getValue()); + break; + } + } + + FlowFile ff = session.write(session.create(flowFile), new StreamCallback() { + @Override + public void process(InputStream in, OutputStream out) throws IOException { + out.write(doc.html().getBytes()); + } + }); + session.transfer(ff, REL_SUCCESS); + + //Transfer the original HTML + session.transfer(flowFile, REL_ORIGINAL); + } + + } catch (Exception ex) { + getLogger().error(ex.getMessage()); + session.transfer(flowFile, REL_FAILURE); + } + + } + +} http://git-wip-us.apache.org/repos/asf/nifi/blob/c82fc18f/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor ---------------------------------------------------------------------- diff --git a/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor b/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor new file mode 100644 index 0000000..aea1060 --- /dev/null +++ b/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +org.apache.nifi.GetHTMLElement +org.apache.nifi.ModifyHTMLElement +org.apache.nifi.PutHTMLElement \ No newline at end of file http://git-wip-us.apache.org/repos/asf/nifi/blob/c82fc18f/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/src/test/java/org/apache/nifi/AbstractHTMLTest.java ---------------------------------------------------------------------- diff --git a/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/src/test/java/org/apache/nifi/AbstractHTMLTest.java b/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/src/test/java/org/apache/nifi/AbstractHTMLTest.java new file mode 100644 index 0000000..88f4c63 --- /dev/null +++ b/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/src/test/java/org/apache/nifi/AbstractHTMLTest.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nifi; + +import org.apache.nifi.flowfile.FlowFile; +import org.apache.nifi.processor.ProcessSession; +import org.apache.nifi.processor.io.StreamCallback; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +public class AbstractHTMLTest { + + protected final String ATL_WEATHER_TEXT = "Atlanta Weather"; + protected final String GDR_WEATHER_TEXT = "<i>Grand Rapids Weather</i>"; + protected final String ATL_WEATHER_LINK = "http://w1.weather.gov/obhistory/KPDK.html"; + protected final String GR_WEATHER_LINK = "http://w1.weather.gov/obhistory/KGRR.html"; + protected final String AUTHOR_NAME = "Jeremy Dyer"; + protected final String ATL_ID = "ATL"; + protected final String GDR_ID = "GDR"; + + protected final String HTML = "<!doctype html>\n" + + "\n" + + "<html lang=\"en\">\n" + + "<head>\n" + + " <meta charset=\"utf-8\">\n" + + "\n" + + " <title>NiFi HTML Parsing Demo</title>\n" + + " <meta name=\"description\" content=\"NiFi HTML Parsing Demo\">\n" + + " <meta name=\"author\" content=\"" + AUTHOR_NAME + "\">\n" + + "\n" + + " <link rel=\"stylesheet\" href=\"css/styles.css?v=1.0\">\n" + + "\n" + + " <!--[if lt IE 9]>\n" + + " <script src=\"http://html5shiv.googlecode.com/svn/trunk/html5.js\"></script>\n" + + " <![endif]-->\n" + + "</head>\n" + + "\n" + + "<body>\n" + + " <script src=\"js/scripts.js\"></script>\n" + + " <p>Check out this weather! <a id=\"" + ATL_ID + "\" href=\"" + + ATL_WEATHER_LINK + "\">" + ATL_WEATHER_TEXT + "</a></p>\n" + + " <p>I guess it could be colder ... <a id=\"" + GDR_ID + "\" href=\"" + + GR_WEATHER_LINK + "\">" + GDR_WEATHER_TEXT + "</a></p>\n" + + " <div id=\"put\"><a href=\"httpd://localhost\" /></div>\n" + + "</body>\n" + + "</html>"; + + + protected FlowFile writeContentToNewFlowFile(final byte[] content, ProcessSession session) { + FlowFile ff = session.write(session.create(), new StreamCallback() { + @Override + public void process(InputStream in, OutputStream out) throws IOException { + out.write(content); + } + }); + return ff; + } +} http://git-wip-us.apache.org/repos/asf/nifi/blob/c82fc18f/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/src/test/java/org/apache/nifi/TestGetHTMLElement.java ---------------------------------------------------------------------- diff --git a/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/src/test/java/org/apache/nifi/TestGetHTMLElement.java b/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/src/test/java/org/apache/nifi/TestGetHTMLElement.java new file mode 100644 index 0000000..ae11768 --- /dev/null +++ b/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/src/test/java/org/apache/nifi/TestGetHTMLElement.java @@ -0,0 +1,319 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nifi; + +import org.apache.commons.lang3.StringUtils; +import org.apache.nifi.flowfile.FlowFile; +import org.apache.nifi.processor.ProcessSession; +import org.apache.nifi.util.MockFlowFile; +import org.apache.nifi.util.TestRunner; +import org.apache.nifi.util.TestRunners; +import org.junit.Before; +import org.junit.Test; + +import java.lang.Exception; +import java.util.List; + +import static org.junit.Assert.assertTrue; + +public class TestGetHTMLElement extends AbstractHTMLTest { + + private TestRunner testRunner; + + @Before + public void init() { + testRunner = TestRunners.newTestRunner(GetHTMLElement.class); + testRunner.setProperty(GetHTMLElement.URL, "http://localhost"); + testRunner.setProperty(GetHTMLElement.OUTPUT_TYPE, GetHTMLElement.ELEMENT_HTML); + testRunner.setProperty(GetHTMLElement.DESTINATION, GetHTMLElement.DESTINATION_CONTENT); + testRunner.setProperty(GetHTMLElement.HTML_CHARSET, "UTF-8"); + } + + @Test + public void testNoElementFound() throws Exception { + testRunner.setProperty(GetHTMLElement.CSS_SELECTOR, "b"); //Bold element is not present in sample HTML +// testRunner.setProperty(GetHTMLElement.APPEND_ELEMENT_VALUE, ""); + + ProcessSession session = testRunner.getProcessSessionFactory().createSession(); + FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session); + + testRunner.enqueue(ff); + testRunner.run(); + + testRunner.assertTransferCount(GetHTMLElement.REL_SUCCESS, 0); + testRunner.assertTransferCount(GetHTMLElement.REL_FAILURE, 0); + testRunner.assertTransferCount(GetHTMLElement.REL_NOT_FOUND, 1); + } + + @Test + public void testInvalidSelector() throws Exception { + testRunner.setProperty(GetHTMLElement.CSS_SELECTOR, "InvalidCSSSelectorSyntax"); + + ProcessSession session = testRunner.getProcessSessionFactory().createSession(); + FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session); + + testRunner.enqueue(ff); + testRunner.run(); + + testRunner.assertTransferCount(GetHTMLElement.REL_SUCCESS, 0); + testRunner.assertTransferCount(GetHTMLElement.REL_FAILURE, 0); + testRunner.assertTransferCount(GetHTMLElement.REL_NOT_FOUND, 1); + } + + @Test + public void testSingleElementFound() throws Exception { + testRunner.setProperty(GetHTMLElement.CSS_SELECTOR, "head"); + + ProcessSession session = testRunner.getProcessSessionFactory().createSession(); + FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session); + + testRunner.enqueue(ff); + testRunner.run(); + + testRunner.assertTransferCount(GetHTMLElement.REL_SUCCESS, 1); + testRunner.assertTransferCount(GetHTMLElement.REL_FAILURE, 0); + testRunner.assertTransferCount(GetHTMLElement.REL_ORIGINAL, 1); + testRunner.assertTransferCount(GetHTMLElement.REL_NOT_FOUND, 0); + } + + @Test + public void testMultipleElementFound() throws Exception { + testRunner.setProperty(GetHTMLElement.CSS_SELECTOR, "a"); + + ProcessSession session = testRunner.getProcessSessionFactory().createSession(); + FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session); + + testRunner.enqueue(ff); + testRunner.run(); + + testRunner.assertTransferCount(GetHTMLElement.REL_SUCCESS, 3); + testRunner.assertTransferCount(GetHTMLElement.REL_FAILURE, 0); + testRunner.assertTransferCount(GetHTMLElement.REL_ORIGINAL, 1); + testRunner.assertTransferCount(GetHTMLElement.REL_NOT_FOUND, 0); + } + + @Test + public void testElementFoundWriteToAttribute() throws Exception { + testRunner.setProperty(GetHTMLElement.CSS_SELECTOR, "#" + ATL_ID); + testRunner.setProperty(GetHTMLElement.DESTINATION, GetHTMLElement.DESTINATION_ATTRIBUTE); + testRunner.setProperty(GetHTMLElement.OUTPUT_TYPE, GetHTMLElement.ELEMENT_ATTRIBUTE); + testRunner.setProperty(GetHTMLElement.ATTRIBUTE_KEY, "href"); + + ProcessSession session = testRunner.getProcessSessionFactory().createSession(); + FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session); + + testRunner.enqueue(ff); + testRunner.run(); + + testRunner.assertTransferCount(GetHTMLElement.REL_SUCCESS, 1); + testRunner.assertTransferCount(GetHTMLElement.REL_FAILURE, 0); + testRunner.assertTransferCount(GetHTMLElement.REL_ORIGINAL, 1); + testRunner.assertTransferCount(GetHTMLElement.REL_NOT_FOUND, 0); + + List<MockFlowFile> ffs = testRunner.getFlowFilesForRelationship(GetHTMLElement.REL_SUCCESS); + assertTrue(ffs.size() == 1); + MockFlowFile fff = ffs.get(0); + String atValue = fff.getAttribute(GetHTMLElement.HTML_ELEMENT_ATTRIBUTE_NAME); + assertTrue(StringUtils.equals(ATL_WEATHER_LINK, atValue)); + } + + @Test + public void testElementFoundWriteToContent() throws Exception { + testRunner.setProperty(GetHTMLElement.CSS_SELECTOR, "#" + ATL_ID); + testRunner.setProperty(GetHTMLElement.DESTINATION, GetHTMLElement.DESTINATION_CONTENT); + testRunner.setProperty(GetHTMLElement.OUTPUT_TYPE, GetHTMLElement.ELEMENT_ATTRIBUTE); + testRunner.setProperty(GetHTMLElement.ATTRIBUTE_KEY, "href"); + + ProcessSession session = testRunner.getProcessSessionFactory().createSession(); + FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session); + + testRunner.enqueue(ff); + testRunner.run(); + + testRunner.assertTransferCount(GetHTMLElement.REL_SUCCESS, 1); + testRunner.assertTransferCount(GetHTMLElement.REL_FAILURE, 0); + testRunner.assertTransferCount(GetHTMLElement.REL_ORIGINAL, 1); + testRunner.assertTransferCount(GetHTMLElement.REL_NOT_FOUND, 0); + + List<MockFlowFile> ffs = testRunner.getFlowFilesForRelationship(GetHTMLElement.REL_SUCCESS); + assertTrue(ffs.size() == 1); + String data = new String(testRunner.getContentAsByteArray(ffs.get(0))); + assertTrue(StringUtils.equals(ATL_WEATHER_LINK, data)); + } + + @Test + public void testValidPrependValueToFoundElement() throws Exception { + final String PREPEND_VALUE = "TestPrepend"; + testRunner.setProperty(GetHTMLElement.PREPEND_ELEMENT_VALUE, PREPEND_VALUE); + testRunner.setProperty(GetHTMLElement.CSS_SELECTOR, "#" + ATL_ID); + testRunner.setProperty(GetHTMLElement.DESTINATION, GetHTMLElement.DESTINATION_CONTENT); + testRunner.setProperty(GetHTMLElement.OUTPUT_TYPE, GetHTMLElement.ELEMENT_ATTRIBUTE); + testRunner.setProperty(GetHTMLElement.ATTRIBUTE_KEY, "href"); + + ProcessSession session = testRunner.getProcessSessionFactory().createSession(); + FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session); + + testRunner.enqueue(ff); + testRunner.run(); + + testRunner.assertTransferCount(GetHTMLElement.REL_SUCCESS, 1); + testRunner.assertTransferCount(GetHTMLElement.REL_FAILURE, 0); + testRunner.assertTransferCount(GetHTMLElement.REL_ORIGINAL, 1); + testRunner.assertTransferCount(GetHTMLElement.REL_NOT_FOUND, 0); + + List<MockFlowFile> ffs = testRunner.getFlowFilesForRelationship(GetHTMLElement.REL_SUCCESS); + assertTrue(ffs.size() == 1); + String data = new String(testRunner.getContentAsByteArray(ffs.get(0))); + assertTrue(StringUtils.equals(PREPEND_VALUE + ATL_WEATHER_LINK, data)); + } + + @Test + public void testValidPrependValueToNotFoundElement() throws Exception { + final String PREPEND_VALUE = "TestPrepend"; + testRunner.setProperty(GetHTMLElement.PREPEND_ELEMENT_VALUE, PREPEND_VALUE); + testRunner.setProperty(GetHTMLElement.CSS_SELECTOR, "b"); + testRunner.setProperty(GetHTMLElement.DESTINATION, GetHTMLElement.DESTINATION_CONTENT); + testRunner.setProperty(GetHTMLElement.OUTPUT_TYPE, GetHTMLElement.ELEMENT_TEXT); + + ProcessSession session = testRunner.getProcessSessionFactory().createSession(); + FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session); + + testRunner.enqueue(ff); + testRunner.run(); + + testRunner.assertTransferCount(GetHTMLElement.REL_SUCCESS, 0); + testRunner.assertTransferCount(GetHTMLElement.REL_FAILURE, 0); + testRunner.assertTransferCount(GetHTMLElement.REL_ORIGINAL, 0); + testRunner.assertTransferCount(GetHTMLElement.REL_NOT_FOUND, 1); + } + + @Test + public void testValidAppendValueToFoundElement() throws Exception { + final String APPEND_VALUE = "TestAppend"; + testRunner.setProperty(GetHTMLElement.APPEND_ELEMENT_VALUE, APPEND_VALUE); + testRunner.setProperty(GetHTMLElement.CSS_SELECTOR, "#" + ATL_ID); + testRunner.setProperty(GetHTMLElement.DESTINATION, GetHTMLElement.DESTINATION_CONTENT); + testRunner.setProperty(GetHTMLElement.OUTPUT_TYPE, GetHTMLElement.ELEMENT_ATTRIBUTE); + testRunner.setProperty(GetHTMLElement.ATTRIBUTE_KEY, "href"); + + ProcessSession session = testRunner.getProcessSessionFactory().createSession(); + FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session); + + testRunner.enqueue(ff); + testRunner.run(); + + testRunner.assertTransferCount(GetHTMLElement.REL_SUCCESS, 1); + testRunner.assertTransferCount(GetHTMLElement.REL_FAILURE, 0); + testRunner.assertTransferCount(GetHTMLElement.REL_ORIGINAL, 1); + testRunner.assertTransferCount(GetHTMLElement.REL_NOT_FOUND, 0); + + List<MockFlowFile> ffs = testRunner.getFlowFilesForRelationship(GetHTMLElement.REL_SUCCESS); + assertTrue(ffs.size() == 1); + String data = new String(testRunner.getContentAsByteArray(ffs.get(0))); + assertTrue(StringUtils.equals(ATL_WEATHER_LINK + APPEND_VALUE, data)); + } + + @Test + public void testValidAppendValueToNotFoundElement() throws Exception { + final String APPEND_VALUE = "TestAppend"; + testRunner.setProperty(GetHTMLElement.APPEND_ELEMENT_VALUE, APPEND_VALUE); + testRunner.setProperty(GetHTMLElement.CSS_SELECTOR, "b"); + testRunner.setProperty(GetHTMLElement.DESTINATION, GetHTMLElement.DESTINATION_CONTENT); + testRunner.setProperty(GetHTMLElement.OUTPUT_TYPE, GetHTMLElement.ELEMENT_TEXT); + + ProcessSession session = testRunner.getProcessSessionFactory().createSession(); + FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session); + + testRunner.enqueue(ff); + testRunner.run(); + + testRunner.assertTransferCount(GetHTMLElement.REL_SUCCESS, 0); + testRunner.assertTransferCount(GetHTMLElement.REL_FAILURE, 0); + testRunner.assertTransferCount(GetHTMLElement.REL_ORIGINAL, 0); + testRunner.assertTransferCount(GetHTMLElement.REL_NOT_FOUND, 1); + } + + @Test + public void testExtractAttributeFromElement() throws Exception { + testRunner.setProperty(GetHTMLElement.CSS_SELECTOR, "meta[name=author]"); + testRunner.setProperty(GetHTMLElement.DESTINATION, GetHTMLElement.DESTINATION_CONTENT); + testRunner.setProperty(GetHTMLElement.OUTPUT_TYPE, GetHTMLElement.ELEMENT_ATTRIBUTE); + testRunner.setProperty(GetHTMLElement.ATTRIBUTE_KEY, "Content"); + + ProcessSession session = testRunner.getProcessSessionFactory().createSession(); + FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session); + + testRunner.enqueue(ff); + testRunner.run(); + + testRunner.assertTransferCount(GetHTMLElement.REL_SUCCESS, 1); + testRunner.assertTransferCount(GetHTMLElement.REL_FAILURE, 0); + testRunner.assertTransferCount(GetHTMLElement.REL_ORIGINAL, 1); + testRunner.assertTransferCount(GetHTMLElement.REL_NOT_FOUND, 0); + + List<MockFlowFile> ffs = testRunner.getFlowFilesForRelationship(GetHTMLElement.REL_SUCCESS); + assertTrue(ffs.size() == 1); + String data = new String(testRunner.getContentAsByteArray(ffs.get(0))); + assertTrue(StringUtils.equals(AUTHOR_NAME, data)); + } + + @Test + public void testExtractTextFromElement() throws Exception { + testRunner.setProperty(GetHTMLElement.CSS_SELECTOR, "#" + ATL_ID); + testRunner.setProperty(GetHTMLElement.DESTINATION, GetHTMLElement.DESTINATION_CONTENT); + testRunner.setProperty(GetHTMLElement.OUTPUT_TYPE, GetHTMLElement.ELEMENT_TEXT); + + ProcessSession session = testRunner.getProcessSessionFactory().createSession(); + FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session); + + testRunner.enqueue(ff); + testRunner.run(); + + testRunner.assertTransferCount(GetHTMLElement.REL_SUCCESS, 1); + testRunner.assertTransferCount(GetHTMLElement.REL_FAILURE, 0); + testRunner.assertTransferCount(GetHTMLElement.REL_ORIGINAL, 1); + testRunner.assertTransferCount(GetHTMLElement.REL_NOT_FOUND, 0); + + List<MockFlowFile> ffs = testRunner.getFlowFilesForRelationship(GetHTMLElement.REL_SUCCESS); + assertTrue(ffs.size() == 1); + String data = new String(testRunner.getContentAsByteArray(ffs.get(0))); + assertTrue(StringUtils.equals(ATL_WEATHER_TEXT, data)); + } + + @Test + public void testExtractHTMLFromElement() throws Exception { + testRunner.setProperty(GetHTMLElement.CSS_SELECTOR, "#" + GDR_ID); + testRunner.setProperty(GetHTMLElement.DESTINATION, GetHTMLElement.DESTINATION_CONTENT); + testRunner.setProperty(GetHTMLElement.OUTPUT_TYPE, GetHTMLElement.ELEMENT_HTML); + + ProcessSession session = testRunner.getProcessSessionFactory().createSession(); + FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session); + + testRunner.enqueue(ff); + testRunner.run(); + + testRunner.assertTransferCount(GetHTMLElement.REL_SUCCESS, 1); + testRunner.assertTransferCount(GetHTMLElement.REL_FAILURE, 0); + testRunner.assertTransferCount(GetHTMLElement.REL_ORIGINAL, 1); + testRunner.assertTransferCount(GetHTMLElement.REL_NOT_FOUND, 0); + + List<MockFlowFile> ffs = testRunner.getFlowFilesForRelationship(GetHTMLElement.REL_SUCCESS); + assertTrue(ffs.size() == 1); + String data = new String(testRunner.getContentAsByteArray(ffs.get(0))); + assertTrue(StringUtils.equals(GDR_WEATHER_TEXT, data)); + } +} http://git-wip-us.apache.org/repos/asf/nifi/blob/c82fc18f/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/src/test/java/org/apache/nifi/TestModifyHTMLElement.java ---------------------------------------------------------------------- diff --git a/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/src/test/java/org/apache/nifi/TestModifyHTMLElement.java b/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/src/test/java/org/apache/nifi/TestModifyHTMLElement.java new file mode 100644 index 0000000..010107f --- /dev/null +++ b/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/src/test/java/org/apache/nifi/TestModifyHTMLElement.java @@ -0,0 +1,223 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nifi; + +import org.apache.commons.lang3.StringUtils; +import org.apache.nifi.flowfile.FlowFile; +import org.apache.nifi.processor.ProcessSession; +import org.apache.nifi.util.MockFlowFile; +import org.apache.nifi.util.TestRunner; +import org.apache.nifi.util.TestRunners; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; +import org.junit.Before; +import org.junit.Test; + +import java.util.List; + +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +public class TestModifyHTMLElement extends AbstractHTMLTest { + + private TestRunner testRunner; + + @Before + public void init() { + testRunner = TestRunners.newTestRunner(ModifyHTMLElement.class); + testRunner = TestRunners.newTestRunner(ModifyHTMLElement.class); + testRunner.setProperty(ModifyHTMLElement.URL, "http://localhost"); + testRunner.setProperty(ModifyHTMLElement.OUTPUT_TYPE, GetHTMLElement.ELEMENT_HTML); + testRunner.setProperty(ModifyHTMLElement.HTML_CHARSET, "UTF-8"); + } + + @Test + public void testModifyText() throws Exception { + final String MOD_VALUE = "Newly modified value to replace " + ATL_WEATHER_TEXT; + testRunner.setProperty(ModifyHTMLElement.CSS_SELECTOR, "#" + ATL_ID); + testRunner.setProperty(ModifyHTMLElement.OUTPUT_TYPE, ModifyHTMLElement.ELEMENT_TEXT); + testRunner.setProperty(ModifyHTMLElement.MODIFIED_VALUE, MOD_VALUE); + + ProcessSession session = testRunner.getProcessSessionFactory().createSession(); + FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session); + + testRunner.enqueue(ff); + testRunner.run(); + + testRunner.assertTransferCount(ModifyHTMLElement.REL_SUCCESS, 1); + testRunner.assertTransferCount(ModifyHTMLElement.REL_FAILURE, 0); + testRunner.assertTransferCount(ModifyHTMLElement.REL_ORIGINAL, 1); + testRunner.assertTransferCount(ModifyHTMLElement.REL_NOT_FOUND, 0); + + List<MockFlowFile> ffs = testRunner.getFlowFilesForRelationship(ModifyHTMLElement.REL_SUCCESS); + assertTrue(ffs.size() == 1); + String data = new String(testRunner.getContentAsByteArray(ffs.get(0))); + + //Contents will be the entire HTML doc. So lets use Jsoup again just the grab the element we want. + Document doc = Jsoup.parse(data); + Elements eles = doc.select("#" + ATL_ID); + Element ele = eles.get(0); + + assertTrue(StringUtils.equals(MOD_VALUE, ele.text())); + } + + @Test + public void testModifyHTMLWithExpressionLanguage() throws Exception { + + final String MOD_VALUE = "Newly modified value to replace " + ATL_WEATHER_TEXT; + + testRunner.setProperty(ModifyHTMLElement.CSS_SELECTOR, "#" + ATL_ID); + testRunner.setProperty(ModifyHTMLElement.OUTPUT_TYPE, ModifyHTMLElement.ELEMENT_TEXT); + testRunner.setProperty(ModifyHTMLElement.MODIFIED_VALUE, "${\" " + MOD_VALUE + " \":trim()}"); + + ProcessSession session = testRunner.getProcessSessionFactory().createSession(); + FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session); + + testRunner.enqueue(ff); + testRunner.run(); + + testRunner.assertTransferCount(ModifyHTMLElement.REL_SUCCESS, 1); + testRunner.assertTransferCount(ModifyHTMLElement.REL_FAILURE, 0); + testRunner.assertTransferCount(ModifyHTMLElement.REL_ORIGINAL, 1); + testRunner.assertTransferCount(ModifyHTMLElement.REL_NOT_FOUND, 0); + + List<MockFlowFile> ffs = testRunner.getFlowFilesForRelationship(ModifyHTMLElement.REL_SUCCESS); + assertTrue(ffs.size() == 1); + String data = new String(testRunner.getContentAsByteArray(ffs.get(0))); + + //Contents will be the entire HTML doc. So lets use Jsoup again just the grab the element we want. + Document doc = Jsoup.parse(data); + Elements eles = doc.select("#" + ATL_ID); + Element ele = eles.get(0); + + assertNotNull(ele.text()); + } + + @Test + public void testModifyHTML() throws Exception { + final String MOD_VALUE = "Newly modified HTML to replace " + GDR_WEATHER_TEXT; + testRunner.setProperty(ModifyHTMLElement.CSS_SELECTOR, "#" + GDR_ID); + testRunner.setProperty(ModifyHTMLElement.OUTPUT_TYPE, ModifyHTMLElement.ELEMENT_HTML); + testRunner.setProperty(ModifyHTMLElement.MODIFIED_VALUE, MOD_VALUE); + + ProcessSession session = testRunner.getProcessSessionFactory().createSession(); + FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session); + + testRunner.enqueue(ff); + testRunner.run(); + + testRunner.assertTransferCount(ModifyHTMLElement.REL_SUCCESS, 1); + testRunner.assertTransferCount(ModifyHTMLElement.REL_FAILURE, 0); + testRunner.assertTransferCount(ModifyHTMLElement.REL_ORIGINAL, 1); + testRunner.assertTransferCount(ModifyHTMLElement.REL_NOT_FOUND, 0); + + List<MockFlowFile> ffs = testRunner.getFlowFilesForRelationship(ModifyHTMLElement.REL_SUCCESS); + assertTrue(ffs.size() == 1); + String data = new String(testRunner.getContentAsByteArray(ffs.get(0))); + + //Contents will be the entire HTML doc. So lets use Jsoup again just the grab the element we want. + Document doc = Jsoup.parse(data); + Elements eles = doc.select("#" + GDR_ID); + Element ele = eles.get(0); + + assertTrue(StringUtils.equals(MOD_VALUE, ele.html())); + } + + @Test + public void testModifyAttribute() throws Exception { + final String MOD_VALUE = "http://localhost/newlink"; + testRunner.setProperty(ModifyHTMLElement.CSS_SELECTOR, "#" + GDR_ID); + testRunner.setProperty(ModifyHTMLElement.OUTPUT_TYPE, ModifyHTMLElement.ELEMENT_ATTRIBUTE); + testRunner.setProperty(ModifyHTMLElement.ATTRIBUTE_KEY, "href"); + testRunner.setProperty(ModifyHTMLElement.MODIFIED_VALUE, MOD_VALUE); + + ProcessSession session = testRunner.getProcessSessionFactory().createSession(); + FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session); + + testRunner.enqueue(ff); + testRunner.run(); + + testRunner.assertTransferCount(ModifyHTMLElement.REL_SUCCESS, 1); + testRunner.assertTransferCount(ModifyHTMLElement.REL_FAILURE, 0); + testRunner.assertTransferCount(ModifyHTMLElement.REL_ORIGINAL, 1); + testRunner.assertTransferCount(ModifyHTMLElement.REL_NOT_FOUND, 0); + + List<MockFlowFile> ffs = testRunner.getFlowFilesForRelationship(ModifyHTMLElement.REL_SUCCESS); + assertTrue(ffs.size() == 1); + String data = new String(testRunner.getContentAsByteArray(ffs.get(0))); + + //Contents will be the entire HTML doc. So lets use Jsoup again just the grab the element we want. + Document doc = Jsoup.parse(data); + Elements eles = doc.select("#" + GDR_ID); + Element ele = eles.get(0); + + assertTrue(StringUtils.equals(MOD_VALUE, ele.attr("href"))); + } + + @Test + public void testModifyElementNotFound() throws Exception { + final String MOD_VALUE = "http://localhost/newlink"; + testRunner.setProperty(ModifyHTMLElement.CSS_SELECTOR, "b"); + testRunner.setProperty(ModifyHTMLElement.OUTPUT_TYPE, ModifyHTMLElement.ELEMENT_HTML); + testRunner.setProperty(ModifyHTMLElement.MODIFIED_VALUE, MOD_VALUE); + + ProcessSession session = testRunner.getProcessSessionFactory().createSession(); + FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session); + + testRunner.enqueue(ff); + testRunner.run(); + + testRunner.assertTransferCount(ModifyHTMLElement.REL_SUCCESS, 0); + testRunner.assertTransferCount(ModifyHTMLElement.REL_FAILURE, 0); + testRunner.assertTransferCount(ModifyHTMLElement.REL_ORIGINAL, 0); + testRunner.assertTransferCount(ModifyHTMLElement.REL_NOT_FOUND, 1); + } + + @Test + public void testModifyValueContainsHTMLCharacters() throws Exception { + final String MOD_VALUE = "Text that contains > and < characters"; + testRunner.setProperty(ModifyHTMLElement.CSS_SELECTOR, "#" + GDR_ID); + testRunner.setProperty(ModifyHTMLElement.OUTPUT_TYPE, ModifyHTMLElement.ELEMENT_HTML); + testRunner.setProperty(ModifyHTMLElement.MODIFIED_VALUE, MOD_VALUE); + + ProcessSession session = testRunner.getProcessSessionFactory().createSession(); + FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session); + + testRunner.enqueue(ff); + testRunner.run(); + + testRunner.assertTransferCount(ModifyHTMLElement.REL_SUCCESS, 1); + testRunner.assertTransferCount(ModifyHTMLElement.REL_FAILURE, 0); + testRunner.assertTransferCount(ModifyHTMLElement.REL_ORIGINAL, 1); + testRunner.assertTransferCount(ModifyHTMLElement.REL_NOT_FOUND, 0); + + List<MockFlowFile> ffs = testRunner.getFlowFilesForRelationship(ModifyHTMLElement.REL_SUCCESS); + assertTrue(ffs.size() == 1); + String data = new String(testRunner.getContentAsByteArray(ffs.get(0))); + + //Contents will be the entire HTML doc. So lets use Jsoup again just the grab the element we want. + Document doc = Jsoup.parse(data); + Elements eles = doc.select("#" + GDR_ID); + Element ele = eles.get(0); + + assertTrue(StringUtils.equals(MOD_VALUE, ele.text())); + assertTrue(StringUtils.equals(MOD_VALUE.replace(">", ">").replace("<", "<"), ele.html())); + } + +} http://git-wip-us.apache.org/repos/asf/nifi/blob/c82fc18f/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/src/test/java/org/apache/nifi/TestPutHTMLElement.java ---------------------------------------------------------------------- diff --git a/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/src/test/java/org/apache/nifi/TestPutHTMLElement.java b/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/src/test/java/org/apache/nifi/TestPutHTMLElement.java new file mode 100644 index 0000000..1dcc085 --- /dev/null +++ b/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/src/test/java/org/apache/nifi/TestPutHTMLElement.java @@ -0,0 +1,137 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nifi; + +import org.apache.commons.lang3.StringUtils; +import org.apache.nifi.flowfile.FlowFile; +import org.apache.nifi.processor.ProcessSession; +import org.apache.nifi.util.MockFlowFile; +import org.apache.nifi.util.TestRunner; +import org.apache.nifi.util.TestRunners; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; +import org.junit.Before; +import org.junit.Test; + +import java.util.List; +import static org.junit.Assert.assertTrue; + + +public class TestPutHTMLElement extends AbstractHTMLTest { + + private TestRunner testRunner; + + @Before + public void init() { + testRunner = TestRunners.newTestRunner(PutHTMLElement.class); + testRunner.setProperty(PutHTMLElement.URL, "http://localhost"); + } + + @Test + public void testAddNewElementToRoot() throws Exception { + final String MOD_VALUE = "<p>modified value</p>"; + testRunner.setProperty(PutHTMLElement.CSS_SELECTOR, "body"); + testRunner.setProperty(PutHTMLElement.PUT_LOCATION_TYPE, PutHTMLElement.PREPEND_ELEMENT); + testRunner.setProperty(PutHTMLElement.PUT_VALUE, MOD_VALUE); + + ProcessSession session = testRunner.getProcessSessionFactory().createSession(); + FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session); + + testRunner.enqueue(ff); + testRunner.run(); + + testRunner.assertTransferCount(PutHTMLElement.REL_SUCCESS, 1); + testRunner.assertTransferCount(PutHTMLElement.REL_FAILURE, 0); + testRunner.assertTransferCount(PutHTMLElement.REL_ORIGINAL, 1); + testRunner.assertTransferCount(PutHTMLElement.REL_NOT_FOUND, 0); + + List<MockFlowFile> ffs = testRunner.getFlowFilesForRelationship(PutHTMLElement.REL_SUCCESS); + assertTrue(ffs.size() == 1); + String data = new String(testRunner.getContentAsByteArray(ffs.get(0))); + + //Contents will be the entire HTML doc. So lets use Jsoup again just the grab the element we want. + Document doc = Jsoup.parse(data); + Elements eles = doc.select("body > p"); + Element ele = eles.get(0); + + assertTrue(StringUtils.equals(MOD_VALUE.replace("<p>", "").replace("</p>", ""), ele.html())); + } + + @Test + public void testPrependPElementToDiv() throws Exception { + final String MOD_VALUE = "<p>modified value</p>"; + testRunner.setProperty(PutHTMLElement.CSS_SELECTOR, "#put"); + testRunner.setProperty(PutHTMLElement.PUT_LOCATION_TYPE, PutHTMLElement.PREPEND_ELEMENT); + testRunner.setProperty(PutHTMLElement.PUT_VALUE, MOD_VALUE); + + ProcessSession session = testRunner.getProcessSessionFactory().createSession(); + FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session); + + testRunner.enqueue(ff); + testRunner.run(); + + testRunner.assertTransferCount(PutHTMLElement.REL_SUCCESS, 1); + testRunner.assertTransferCount(PutHTMLElement.REL_FAILURE, 0); + testRunner.assertTransferCount(PutHTMLElement.REL_ORIGINAL, 1); + testRunner.assertTransferCount(PutHTMLElement.REL_NOT_FOUND, 0); + + List<MockFlowFile> ffs = testRunner.getFlowFilesForRelationship(PutHTMLElement.REL_SUCCESS); + assertTrue(ffs.size() == 1); + String data = new String(testRunner.getContentAsByteArray(ffs.get(0))); + + //Contents will be the entire HTML doc. So lets use Jsoup again just the grab the element we want. + Document doc = Jsoup.parse(data); + Elements eles = doc.select("#put"); + Element ele = eles.get(0); + + assertTrue(StringUtils.equals("<p>modified value</p> \n<a href=\"httpd://localhost\"></a>", ele.html())); + } + + @Test + public void testAppendPElementToDiv() throws Exception { + final String MOD_VALUE = "<p>modified value</p>"; + testRunner.setProperty(PutHTMLElement.CSS_SELECTOR, "#put"); + testRunner.setProperty(PutHTMLElement.PUT_LOCATION_TYPE, PutHTMLElement.APPEND_ELEMENT); + testRunner.setProperty(PutHTMLElement.PUT_VALUE, MOD_VALUE); + + ProcessSession session = testRunner.getProcessSessionFactory().createSession(); + FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session); + + testRunner.enqueue(ff); + testRunner.run(); + + testRunner.assertTransferCount(PutHTMLElement.REL_SUCCESS, 1); + testRunner.assertTransferCount(PutHTMLElement.REL_FAILURE, 0); + testRunner.assertTransferCount(PutHTMLElement.REL_ORIGINAL, 1); + testRunner.assertTransferCount(PutHTMLElement.REL_NOT_FOUND, 0); + + List<MockFlowFile> ffs = testRunner.getFlowFilesForRelationship(PutHTMLElement.REL_SUCCESS); + assertTrue(ffs.size() == 1); + String data = new String(testRunner.getContentAsByteArray(ffs.get(0))); + + //Contents will be the entire HTML doc. So lets use Jsoup again just the grab the element we want. + Document doc = Jsoup.parse(data); + Elements eles = doc.select("#put"); + Element ele = eles.get(0); + + assertTrue(StringUtils.equals("<a href=\"httpd://localhost\"></a> \n" + + "<p>modified value</p>", ele.html())); + } + +} http://git-wip-us.apache.org/repos/asf/nifi/blob/c82fc18f/nifi-nar-bundles/nifi-html-bundle/pom.xml ---------------------------------------------------------------------- diff --git a/nifi-nar-bundles/nifi-html-bundle/pom.xml b/nifi-nar-bundles/nifi-html-bundle/pom.xml new file mode 100644 index 0000000..186fef3 --- /dev/null +++ b/nifi-nar-bundles/nifi-html-bundle/pom.xml @@ -0,0 +1,43 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + + <parent> + <groupId>org.apache.nifi</groupId> + <artifactId>nifi-nar-bundles</artifactId> + <version>0.4.0-SNAPSHOT</version> + </parent> + + <artifactId>nifi-html-bundle</artifactId> + <packaging>pom</packaging> + + <modules> + <module>nifi-html-processors</module> + <module>nifi-html-nar</module> + </modules> + + <dependencyManagement> + <dependencies> + <dependency> + <groupId>org.apache.nifi</groupId> + <artifactId>nifi-html-processors</artifactId> + <type>nar</type> + </dependency> + </dependencies> + </dependencyManagement> + +</project> http://git-wip-us.apache.org/repos/asf/nifi/blob/c82fc18f/nifi-nar-bundles/pom.xml ---------------------------------------------------------------------- diff --git a/nifi-nar-bundles/pom.xml b/nifi-nar-bundles/pom.xml index 4c0925f..5e3a97c 100644 --- a/nifi-nar-bundles/pom.xml +++ b/nifi-nar-bundles/pom.xml @@ -42,12 +42,13 @@ <module>nifi-language-translation-bundle</module> <module>nifi-mongodb-bundle</module> <module>nifi-flume-bundle</module> - <module>nifi-hbase-bundle</module> + <module>nifi-hbase-bundle</module> <module>nifi-ambari-bundle</module> <module>nifi-image-bundle</module> <module>nifi-avro-bundle</module> <module>nifi-couchbase-bundle</module> <module>nifi-azure-bundle</module> + <module>nifi-html-bundle</module> </modules> <dependencyManagement> <dependencies> http://git-wip-us.apache.org/repos/asf/nifi/blob/c82fc18f/pom.xml ---------------------------------------------------------------------- diff --git a/pom.xml b/pom.xml index 8a8cdb0..5efc0c6 100644 --- a/pom.xml +++ b/pom.xml @@ -825,6 +825,12 @@ </dependency> <dependency> <groupId>org.apache.nifi</groupId> + <artifactId>nifi-html-nar</artifactId> + <version>0.4.0-SNAPSHOT</version> + <type>nar</type> + </dependency> + <dependency> + <groupId>org.apache.nifi</groupId> <artifactId>nifi-kite-nar</artifactId> <version>0.4.0-SNAPSHOT</version> <type>nar</type> @@ -1375,4 +1381,4 @@ </build> </profile> </profiles> -</project> +</project> \ No newline at end of file
