[GitHub] flink pull request: [FLINK-1512] Add CsvReader for reading into PO...

fhueske Tue, 24 Mar 2015 04:20:07 -0700

Github user fhueske commented on a diff in the pull request:

    https://github.com/apache/flink/pull/426#discussion_r27018833
  
    --- Diff: 
flink-scala/src/main/java/org/apache/flink/api/scala/operators/ScalaCsvInputFormat.java
 ---
    @@ -19,66 +19,91 @@
     package org.apache.flink.api.scala.operators;
     
     
    -import com.google.common.base.Charsets;
     import com.google.common.base.Preconditions;
    -
     import org.apache.flink.api.common.ExecutionConfig;
     import org.apache.flink.api.common.io.GenericCsvInputFormat;
     import org.apache.flink.api.common.typeinfo.TypeInformation;
    +import org.apache.flink.api.java.typeutils.PojoTypeInfo;
     import org.apache.flink.api.java.typeutils.TupleTypeInfoBase;
     import org.apache.flink.api.java.typeutils.runtime.TupleSerializerBase;
     import org.apache.flink.core.fs.FileInputSplit;
     import org.apache.flink.core.fs.Path;
    -import org.apache.flink.types.parser.FieldParser;
    -import org.apache.flink.util.StringUtils;
     
    +import org.apache.flink.types.parser.FieldParser;
     import org.slf4j.Logger;
     import org.slf4j.LoggerFactory;
     
     import java.io.IOException;
    -import java.nio.charset.Charset;
    -import java.nio.charset.IllegalCharsetNameException;
    -import java.nio.charset.UnsupportedCharsetException;
    -import java.util.Map;
    -import java.util.TreeMap;
    +import java.lang.reflect.Field;
    +import java.util.Arrays;
     
    -import scala.Product;
    -
    -public class ScalaCsvInputFormat<OUT extends Product> extends 
GenericCsvInputFormat<OUT> {
    +public class ScalaCsvInputFormat<OUT> extends GenericCsvInputFormat<OUT> {
     
        private static final long serialVersionUID = 1L;
     
        private static final Logger LOG = 
LoggerFactory.getLogger(ScalaCsvInputFormat.class);
    -   
    -   private transient Object[] parsedValues;
    -   
    -   // To speed up readRecord processing. Used to find windows line endings.
    -   // It is set when open so that readRecord does not have to evaluate it
    -   private boolean lineDelimiterIsLinebreak = false;
     
    -   private final TupleSerializerBase<OUT> serializer;
    +   private transient Object[] parsedValues;
     
    -   private byte[] commentPrefix = null;
    +   private final TupleSerializerBase<OUT> tupleSerializer;
     
    -   private transient int commentCount;
    -   private transient int invalidLineCount;
    +   private Class<OUT> pojoTypeClass = null;
    +   private String[] pojoFieldsName = null;
    +   private transient Field[] pojoFields = null;
    +   private transient PojoTypeInfo<OUT> pojoTypeInfo = null;
     
        public ScalaCsvInputFormat(Path filePath, TypeInformation<OUT> 
typeInfo) {
                super(filePath);
     
    -           if (!(typeInfo.isTupleType())) {
    -                   throw new UnsupportedOperationException("This only 
works on tuple types.");
    +           Class<?>[] classes = new Class[typeInfo.getArity()];
    +
    +           if (typeInfo instanceof TupleTypeInfoBase) {
    +                   TupleTypeInfoBase<OUT> tupleType = 
(TupleTypeInfoBase<OUT>) typeInfo;
    +                   // We can use an empty config here, since we only use 
the serializer to create
    +                   // the top-level case class
    +                   tupleSerializer = (TupleSerializerBase<OUT>) 
tupleType.createSerializer(new ExecutionConfig());
    +
    +                   for (int i = 0; i < tupleType.getArity(); i++) {
    +                           classes[i] = 
tupleType.getTypeAt(i).getTypeClass();
    +                   }
    +
    +                   setFieldTypes(classes);
    +           } else {
    +                   tupleSerializer = null;
    +                   pojoTypeInfo = (PojoTypeInfo<OUT>) typeInfo;
    +                   pojoTypeClass = typeInfo.getTypeClass();
    +                   pojoFieldsName = pojoTypeInfo.getFieldNames();
    +
    +                   for (int i = 0, arity = pojoTypeInfo.getArity(); i < 
arity; i++) {
    +                           classes[i] = 
pojoTypeInfo.getTypeAt(i).getTypeClass();
    +                   }
    +
    +                   setFieldTypes(classes);
    +                   setFieldsOrder(pojoFieldsName);
    +           }
    +   }
    +
    +   public void setFieldsOrder(String[] fieldsOrder) {
    +           Preconditions.checkNotNull(pojoTypeClass, "Field ordering 
feature can be used only with POJO fields.");
    --- End diff --
    
    Use the same error messages as in the Java CsvInputFormat.



---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---

[GitHub] flink pull request: [FLINK-1512] Add CsvReader for reading into PO...

Reply via email to