[
https://issues.apache.org/jira/browse/NIFI-3031?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15670600#comment-15670600
]
ASF GitHub Bot commented on NIFI-3031:
--------------------------------------
Github user mattyb149 commented on a diff in the pull request:
https://github.com/apache/nifi/pull/1217#discussion_r87858807
--- Diff:
nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/java/org/apache/nifi/processors/hive/AbstractHiveQLProcessor.java
---
@@ -18,18 +18,203 @@
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.dbcp.hive.HiveDBCPService;
+import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.processor.AbstractProcessor;
+import org.apache.nifi.processor.ProcessSession;
+import org.apache.nifi.processor.exception.ProcessException;
+import org.apache.nifi.processor.io.InputStreamCallback;
+import org.apache.nifi.processor.util.StandardValidators;
+import org.apache.nifi.stream.io.StreamUtils;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.math.BigDecimal;
+import java.nio.charset.Charset;
+import java.sql.Time;
+import java.sql.Timestamp;
+import java.sql.Date;
+import java.sql.PreparedStatement;
+import java.sql.SQLException;
+import java.sql.Types;
+
+import java.util.Map;
+import java.util.TreeMap;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
/**
* An abstract base class for HiveQL processors to share common data,
methods, etc.
*/
public abstract class AbstractHiveQLProcessor extends AbstractProcessor {
+ protected static final Pattern HIVEQL_TYPE_ATTRIBUTE_PATTERN =
Pattern.compile("hiveql\\.args\\.(\\d+)\\.type");
+ protected static final Pattern NUMBER_PATTERN =
Pattern.compile("-?\\d+");
+
public static final PropertyDescriptor HIVE_DBCP_SERVICE = new
PropertyDescriptor.Builder()
.name("Hive Database Connection Pooling Service")
.description("The Hive Controller Service that is used to
obtain connection(s) to the Hive database")
.required(true)
.identifiesControllerService(HiveDBCPService.class)
.build();
+ public static final PropertyDescriptor CHARSET = new
PropertyDescriptor.Builder()
+ .name("hive-charset")
+ .displayName("Character Set")
+ .description("Specifies the character set of the record data.")
+ .required(true)
+ .defaultValue("UTF-8")
+ .addValidator(StandardValidators.CHARACTER_SET_VALIDATOR)
+ .build();
+
+ /**
+ * Determines the HiveQL statement that should be executed for the
given FlowFile
+ *
+ * @param session the session that can be used to access the given
FlowFile
+ * @param flowFile the FlowFile whose HiveQL statement should be
executed
+ * @return the HiveQL that is associated with the given FlowFile
+ */
+ protected String getHiveQL(final ProcessSession session, final
FlowFile flowFile, final Charset charset) {
+ // Read the HiveQL from the FlowFile's content
+ final byte[] buffer = new byte[(int) flowFile.getSize()];
+ session.read(flowFile, new InputStreamCallback() {
+ @Override
+ public void process(final InputStream in) throws IOException {
+ StreamUtils.fillBuffer(in, buffer);
+ }
+ });
+
+ // Create the PreparedStatement to use for this FlowFile.
+ return new String(buffer, charset);
+ }
+
+ private class ParameterHolder {
+ String attributeName;
+ int jdbcType;
+ String value;
+ }
+
+ /**
+ * Sets all of the appropriate parameters on the given
PreparedStatement, based on the given FlowFile attributes.
+ *
+ * @param stmt the statement to set the parameters on
+ * @param attributes the attributes from which to derive parameter
indices, values, and types
+ * @throws SQLException if the PreparedStatement throws a SQLException
when the appropriate setter is called
+ */
+ protected int setParameters(int base, final PreparedStatement stmt,
int paramCount, final Map<String, String> attributes) throws SQLException {
+
+ Map<Integer, ParameterHolder> parmMap = new TreeMap<Integer,
ParameterHolder>();
+
+// int curr = 1;
--- End diff --
General comment to remove all dead code (comments that aren't
documentation, e.g.)
> Support Multi-Statement Scripts in the PutHiveQL Processor
> ----------------------------------------------------------
>
> Key: NIFI-3031
> URL: https://issues.apache.org/jira/browse/NIFI-3031
> Project: Apache NiFi
> Issue Type: Improvement
> Reporter: Matt Burgess
>
> Trying to use the PutHiveQL processor to execute a HiveQL script that
> contains multiple statements.
> IE:
> USE my_database;
> FROM my_database_src.base_table
> INSERT OVERWRITE refined_table
> SELECT *;
> -- or --
> use my_database;
> create temporary table WORKING as
> select a,b,c from RAW;
> FROM RAW
> INSERT OVERWRITE refined_table
> SELECT *;
> The current implementation doesn't even like it when you have a semicolon at
> the end of the single statement.
> Either use a default delimiter like a semi-colon to mark the boundaries of a
> statement within the file or allow them to define there own.
> This enables the building of pipelines that are testable by not embedding
> HiveQL into a product; rather sourcing them from files. And the scripts can
> be complex. Each statement should run in a linear manner and be part of the
> same JDBC session to ensure things like "temporary" tables will work.
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)