flume: Fix RegexpKuduOperationsProducer javadocs This fixes some oversights in the Javadoc documentation of the Flume RegexpKuduOperationsProducer. It also makes the JDK8 javadoc linter pass (for this file only).
It also makes a minor improvement to avoid calling toLowerCase() in a loop and changes a couple of loose strings to be constants. Change-Id: I47fc65808f03e27872900a58b686b9204e46a2c6 Reviewed-on: http://gerrit.cloudera.org:8080/6020 Reviewed-by: Will Berkeley <[email protected]> Tested-by: Kudu Jenkins Project: http://git-wip-us.apache.org/repos/asf/kudu/repo Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/987cbc23 Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/987cbc23 Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/987cbc23 Branch: refs/heads/master Commit: 987cbc23f35e424fbfb1b5a7f9df3377d7dc46da Parents: 79ffb17 Author: Mike Percy <[email protected]> Authored: Wed Feb 15 12:59:07 2017 -0800 Committer: Mike Percy <[email protected]> Committed: Wed Feb 15 22:48:09 2017 +0000 ---------------------------------------------------------------------- .../kudu/flume/sink/KuduOperationsProducer.java | 4 +- .../sink/RegexpKuduOperationsProducer.java | 80 +++++++++++--------- 2 files changed, 46 insertions(+), 38 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kudu/blob/987cbc23/java/kudu-flume-sink/src/main/java/org/apache/kudu/flume/sink/KuduOperationsProducer.java ---------------------------------------------------------------------- diff --git a/java/kudu-flume-sink/src/main/java/org/apache/kudu/flume/sink/KuduOperationsProducer.java b/java/kudu-flume-sink/src/main/java/org/apache/kudu/flume/sink/KuduOperationsProducer.java index b38f387..6c9c3cd 100644 --- a/java/kudu-flume-sink/src/main/java/org/apache/kudu/flume/sink/KuduOperationsProducer.java +++ b/java/kudu-flume-sink/src/main/java/org/apache/kudu/flume/sink/KuduOperationsProducer.java @@ -45,8 +45,8 @@ public interface KuduOperationsProducer extends Configurable, AutoCloseable { /** * Returns the operations that should be written to Kudu as a result of this event. - * @return List of {@link org.apache.kudu.client.Operation} that - * should be written to Kudu + * @param event Event to convert to one or more Operations + * @return List of Operations that should be written to Kudu */ List<Operation> getOperations(Event event); http://git-wip-us.apache.org/repos/asf/kudu/blob/987cbc23/java/kudu-flume-sink/src/main/java/org/apache/kudu/flume/sink/RegexpKuduOperationsProducer.java ---------------------------------------------------------------------- diff --git a/java/kudu-flume-sink/src/main/java/org/apache/kudu/flume/sink/RegexpKuduOperationsProducer.java b/java/kudu-flume-sink/src/main/java/org/apache/kudu/flume/sink/RegexpKuduOperationsProducer.java index 4e261ff..14fd345 100644 --- a/java/kudu-flume-sink/src/main/java/org/apache/kudu/flume/sink/RegexpKuduOperationsProducer.java +++ b/java/kudu-flume-sink/src/main/java/org/apache/kudu/flume/sink/RegexpKuduOperationsProducer.java @@ -45,71 +45,80 @@ import org.apache.kudu.client.PartialRow; import org.apache.kudu.client.Upsert; /** - * A regular expression serializer that generates one {@link Insert} or - * {@link Upsert} per {@link Event} by parsing the payload into values using a - * regular expression. Values are coerced to the proper column types. + * A regular expression operations producer that generates one or more Kudu + * {@link Insert} or {@link Upsert} operations per Flume {@link Event} by + * parsing the event {@code body} using a regular expression. Values are + * coerced to the types of the named columns in the Kudu table. * - * Example: if the Kudu table has the schema + * <p>Example: If the Kudu table has the schema: * + * <pre> * key INT32 - * name STRING + * name STRING</pre> * - * and producer.pattern is '(?<key>\\d+),(?<name>\w+)', then the - * RegexpKuduOperationsProducer will parse the string + * <p>and {@code producer.pattern = (?<key>\\d+),(?<name>\\w+)} then + * {@code RegexpKuduOperationsProducer} will parse the string: * - * |12345,Mike||54321,Todd| + * <pre>|12345,Mike||54321,Todd|</pre> * - * into the rows (key=12345, name=Mike) and (key=54321, name=Todd). + * into the rows: {@code (key=12345, name=Mike)} and {@code (key=54321, name=Todd)}. * - * Note: this class relies on JDK7 named capturing groups, which are documented - * in {@link Pattern}. + * <p>Note: This class relies on JDK7 named capturing groups, which are + * documented in {@link Pattern}. The name of each capturing group must + * correspond to a column name in the destination Kudu table. * - * <p><strong>Regular Expression Kudu Operations Producer configuration parameters</strong></p> + * <p><strong><code>RegexpKuduOperationsProducer</code> Flume Configuration Parameters</strong></p> * - * <table cellpadding=3 cellspacing=0 border=1> + * <table cellpadding=3 cellspacing=0 border=1 summary="Flume Configuration Parameters"> * <tr> * <th>Property Name</th> * <th>Default</th> * <th>Required?</th> * <th>Description</th> * </tr> - * <tr></tr><td>producer.pattern</td><td></td><td>Yes</td> - * <td>The regular expression used to parse the event body.</td> + * <tr> + * <td>producer.pattern</td> + * <td></td> + * <td>Yes</td> + * <td>The regular expression used to parse the event body.</td> * </tr> * <tr> * <td>producer.charset</td> * <td>utf-8</td> * <td>No</td> - * <td>The charset of the event body.</td> + * <td>The character set of the event body.</td> * </tr> * <tr> * <td>producer.operation</td> * <td>upsert</td> * <td>No</td> - * <td>Operation type used to write the event to Kudu. Must be 'insert' or - * 'upsert'.</td> + * <td>Operation type used to write the event to Kudu. Must be either + * {@code insert} or {@code upsert}.</td> * </tr> * <tr> * <td>producer.skipMissingColumn</td> * <td>false</td> * <td>No</td> - * <td>Whether to ignore a column if it has no corresponding capture group, or - * instead completely abandon the attempt to parse and insert/upsert the row. + * <td>What to do if a column in the Kudu table has no corresponding capture group. + * If set to {@code true}, a warning message is logged and the operation is still attempted. + * If set to {@code false}, an exception is thrown and the sink will not process the + * {@code Event}, causing a Flume {@code Channel} rollback. * </tr> * <tr> * <td>producer.skipBadColumnValue</td> * <td>false</td> * <td>No</td> - * <td>Whether to omit a column value from the row if its raw value cannot be - * coerced to the right type, or instead complete abandon the attempt to parse - * and insert/operation the row.</td> + * <td>What to do if a value in the pattern match cannot be coerced to the required type. + * If set to {@code true}, a warning message is logged and the operation is still attempted. + * If set to {@code false}, an exception is thrown and the sink will not process the + * {@code Event}, causing a Flume {@code Channel} rollback. * </tr> * <tr> * <td>producer.warnUnmatchedRows</td> * <td>true</td> * <td>No</td> - * <td>Whether to warn about payloads that do not match the pattern. If this - * option is not set, event bodies with no matches will be silently dropped.</td> + * <td>Whether to log a warning about payloads that do not match the pattern. If set to + * {@code false}, event bodies with no matches will be silently dropped.</td> * </tr> * </table> * @@ -119,12 +128,15 @@ import org.apache.kudu.client.Upsert; @InterfaceStability.Evolving public class RegexpKuduOperationsProducer implements KuduOperationsProducer { private static final Logger logger = LoggerFactory.getLogger(RegexpKuduOperationsProducer.class); + private static final String INSERT = "insert"; + private static final String UPSERT = "upsert"; + private static final List<String> validOperations = Lists.newArrayList(UPSERT, INSERT); public static final String PATTERN_PROP = "pattern"; public static final String ENCODING_PROP = "encoding"; public static final String DEFAULT_ENCODING = "utf-8"; public static final String OPERATION_PROP = "operation"; - public static final String DEFAULT_OPERATION = "upsert"; + public static final String DEFAULT_OPERATION = UPSERT; public static final String SKIP_MISSING_COLUMN_PROP = "skipMissingColumn"; public static final boolean DEFAULT_SKIP_MISSING_COLUMN = false; public static final String SKIP_BAD_COLUMN_VALUE_PROP = "skipBadColumnValue"; @@ -132,9 +144,6 @@ public class RegexpKuduOperationsProducer implements KuduOperationsProducer { public static final String WARN_UNMATCHED_ROWS_PROP = "skipUnmatchedRows"; public static final boolean DEFAULT_WARN_UNMATCHED_ROWS = true; - private static final List<String> validOperations = - Lists.newArrayList("upsert", "insert"); - private KuduTable table; private Pattern pattern; private Charset charset; @@ -165,10 +174,9 @@ public class RegexpKuduOperationsProducer implements KuduOperationsProducer { throw new FlumeException( String.format("Invalid or unsupported charset %s", charsetName), e); } - operation = context.getString(OPERATION_PROP, - DEFAULT_OPERATION); + operation = context.getString(OPERATION_PROP, DEFAULT_OPERATION).toLowerCase(); Preconditions.checkArgument( - validOperations.contains(operation.toLowerCase()), + validOperations.contains(operation), "Unrecognized operation '%s'", operation); skipMissingColumn = context.getBoolean(SKIP_MISSING_COLUMN_PROP, @@ -194,16 +202,16 @@ public class RegexpKuduOperationsProducer implements KuduOperationsProducer { while (m.find()) { match = true; Operation op; - switch (operation.toLowerCase()) { - case "upsert": + switch (operation) { + case UPSERT: op = table.newUpsert(); break; - case "insert": + case INSERT: op = table.newInsert(); break; default: throw new FlumeException( - String.format("Unrecognized operation type '%s' in getOperations: " + + String.format("Unrecognized operation type '%s' in getOperations(): " + "this should never happen!", operation)); } PartialRow row = op.getRow();
