Github user markap14 commented on a diff in the pull request:

    https://github.com/apache/nifi/pull/158#discussion_r49475763
  
    --- Diff: 
nifi-nar-bundles/nifi-kite-bundle/nifi-kite-processors/src/main/java/org/apache/nifi/processors/kite/InferAvroSchemaFromCSV.java
 ---
    @@ -0,0 +1,235 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + *
    + *   http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing,
    + * software distributed under the License is distributed on an
    + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
    + * KIND, either express or implied.  See the License for the
    + * specific language governing permissions and limitations
    + * under the License.
    + */
    +package org.apache.nifi.processors.kite;
    +
    +
    +import java.io.IOException;
    +import java.io.BufferedReader;
    +import java.io.OutputStream;
    +import java.io.InputStream;
    +import java.io.InputStreamReader;
    +import java.util.List;
    +import java.util.Set;
    +import java.util.HashSet;
    +import java.util.Collections;
    +import java.util.ArrayList;
    +import java.util.concurrent.atomic.AtomicReference;
    +
    +import org.apache.nifi.annotation.behavior.InputRequirement;
    +import org.apache.nifi.annotation.behavior.InputRequirement.Requirement;
    +import org.apache.nifi.annotation.documentation.CapabilityDescription;
    +import org.apache.nifi.annotation.documentation.SeeAlso;
    +import org.apache.nifi.annotation.documentation.Tags;
    +import org.apache.nifi.components.PropertyDescriptor;
    +import org.apache.nifi.flowfile.FlowFile;
    +import org.apache.nifi.processor.ProcessContext;
    +import org.apache.nifi.processor.ProcessSession;
    +import org.apache.nifi.processor.ProcessorInitializationContext;
    +import org.apache.nifi.processor.Relationship;
    +import org.apache.nifi.processor.exception.ProcessException;
    +import org.apache.nifi.processor.io.InputStreamCallback;
    +import org.apache.nifi.processor.io.OutputStreamCallback;
    +import org.apache.nifi.processor.util.StandardValidators;
    +import org.kitesdk.data.spi.filesystem.CSVProperties;
    +import org.kitesdk.data.spi.filesystem.CSVUtil;
    +import org.kitesdk.shaded.com.google.common.collect.ImmutableSet;
    +
    +
    +@Tags({"kite", "csv", "avro", "infer", "schema"})
    +@SeeAlso({InferAvroSchemaFromCSV.class})
    +@InputRequirement(Requirement.INPUT_REQUIRED)
    +@CapabilityDescription("Creates an Avro schema from a CSV file header. The 
header line definition can either be provided" +
    +        "as a property to the processor OR present in the first line of 
CSV in the incoming FlowFile content. If a header" +
    +        " property is specified for this processor no attempt will be made 
to use the header line that may be present" +
    +        " in the incoming CSV FlowFile content.")
    +public class InferAvroSchemaFromCSV
    +        extends AbstractKiteProcessor {
    +
    +    public static final String CSV_DELIMITER = ",";
    +
    +    public static final PropertyDescriptor HEADER_LINE = new 
PropertyDescriptor.Builder()
    +            .name("CSV Header Line")
    +            .description("Comma separated string defining the column names 
expected in the CSV data. " +
    +                    "EX: \"fname,lname,zip,address\"")
    +            .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
    +            .build();
    +
    +    public static final PropertyDescriptor HEADER_LINE_SKIP_COUNT = new 
PropertyDescriptor.Builder()
    +            .name("CSV Header Line Skip Count")
    +            .description("Specifies the number of header lines that should 
be skipped when reading the CSV data. If the " +
    +                    " first line of the CSV data is a header line and you 
specify the \"CSV Header Line\" property " +
    +                    "you need to set this vlaue to 1 otherwise the header 
line will be treated as actual data.")
    +            .required(true)
    +            .defaultValue("0")
    +            .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
    +            .build();
    +
    +    public static final PropertyDescriptor ESCAPE_STRING = new 
PropertyDescriptor.Builder()
    +            .name("CSV escape string")
    +            .description("String that represents an escape sequence in the 
CSV FlowFile content data.")
    +            .required(true)
    +            .defaultValue("\\")
    +            .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
    +            .build();
    +
    +    public static final PropertyDescriptor QUOTE_STRING = new 
PropertyDescriptor.Builder()
    +            .name("CSV quote string")
    +            .description("String that represents a literal quote character 
in the CSV FlowFile content data.")
    +            .required(true)
    +            .defaultValue("'")
    +            .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
    +            .build();
    +
    +    public static final PropertyDescriptor RECORD_NAME = new 
PropertyDescriptor.Builder()
    +            .name("Avro Record Name")
    +            .description("Value to be placed in the Avro record schema 
\"name\" field.")
    +            .required(true)
    +            .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
    +            .build();
    +
    +    public static final PropertyDescriptor CHARSET = new 
PropertyDescriptor.Builder()
    +            .name("Charset")
    +            .description("Character encoding of CSV data.")
    +            .required(true)
    +            .defaultValue("UTF-8")
    +            .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
    +            .build();
    +
    +    public static final PropertyDescriptor PRETTY_AVRO_OUTPUT = new 
PropertyDescriptor.Builder()
    +            .name("Pretty Avro Output")
    +            .description("If true the Avro output will be formatted.")
    +            .required(true)
    +            .defaultValue("true")
    +            .allowableValues("true", "false")
    +            .addValidator(StandardValidators.BOOLEAN_VALIDATOR)
    +            .build();
    +
    +
    +    public static final Relationship REL_SUCCESS = new 
Relationship.Builder().name("success")
    --- End diff --
    
    Does it make sense to call this relationship 'schema' rather than 
'success'? Seems like it may be more intuitive, but I'll leave that 
determination up to you, if you prefer 'success'.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---

Reply via email to