Hey all,

I imported a table from PostgreSQL using Sqoop, generating a
SequenceFile. I'm now trying to read this file in pig using
SequenceFileLoader. The load function doesn't seem to recognize the
columns that were of type text in PostgreSQL. When I dump the
relation, I get the key (integer) but none of the other fields.

Here's the pig script:

REGISTER /usr/lib/pig/contrib/piggybank/java/piggybank.jar; register
/pesquisa.jar; register /usr/lib/sqoop/sqoop-1.3.0-cdh3u3.jar;
DEFINE SequenceFileLoader org.apache.pig.piggybank.storage.SequenceFileLoader();
univ = LOAD 'universities' USING SequenceFileLoader AS (a:int,
b:chararray, c:chararray, d:chararray, e:chararray, f:chararray,
g:chararray, h:chararray);
DUMP univ;

The SequenceFile class that Sqoop generates seems to be treating the
text fields as string (class copied below).

I'm new to Pig (love it) and Hadoop. I just want a simple way to be
able to process my data (mostly text documents) in postgresql with
Pig. I don't want to load directly from postgresql for performance
reasons. I tried importing to HBase but had too many problems. I'm now
trying with SequenceFiles from Sqoop. I'd be happy to hear suggestions
of better approaches.


Thanks!
Luiz

--
// ORM class for universities
// WARNING: This class is AUTO-GENERATED. Modify at your own risk.
package pesquisa;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.lib.db.DBWritable;
import com.cloudera.sqoop.lib.JdbcWritableBridge;
import com.cloudera.sqoop.lib.DelimiterSet;
import com.cloudera.sqoop.lib.FieldFormatter;
import com.cloudera.sqoop.lib.RecordParser;
import com.cloudera.sqoop.lib.BooleanParser;
import com.cloudera.sqoop.lib.BlobRef;
import com.cloudera.sqoop.lib.ClobRef;
import com.cloudera.sqoop.lib.LargeObjectLoader;
import com.cloudera.sqoop.lib.SqoopRecord;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.sql.Date;
import java.sql.Time;
import java.sql.Timestamp;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;

public class universities extends SqoopRecord  implements DBWritable, Writable {
  private final int PROTOCOL_VERSION = 3;
  public int getClassFormatVersion() { return PROTOCOL_VERSION; }
  protected ResultSet __cur_result_set;
  private Integer chave;
  public Integer get_chave() {
    return chave;
  }
  public void set_chave(Integer chave) {
    this.chave = chave;
  }
  public universities with_chave(Integer chave) {
    this.chave = chave;
    return this;
  }
  private String nome;
  public String get_nome() {
    return nome;
  }
  public void set_nome(String nome) {
    this.nome = nome;
  }
  public universities with_nome(String nome) {
    this.nome = nome;
    return this;
  }
  private String country;
  public String get_country() {
    return country;
  }
  public void set_country(String country) {
    this.country = country;
  }
  public universities with_country(String country) {
    this.country = country;
    return this;
  }
  private String class_size;
  public String get_class_size() {
    return class_size;
  }
  public void set_class_size(String class_size) {
    this.class_size = class_size;
  }
  public universities with_class_size(String class_size) {
    this.class_size = class_size;
    return this;
  }
  private String class_visibility;
  public String get_class_visibility() {
    return class_visibility;
  }
  public void set_class_visibility(String class_visibility) {
    this.class_visibility = class_visibility;
  }
  public universities with_class_visibility(String class_visibility) {
    this.class_visibility = class_visibility;
    return this;
  }
  private String class_richfiles;
  public String get_class_richfiles() {
    return class_richfiles;
  }
  public void set_class_richfiles(String class_richfiles) {
    this.class_richfiles = class_richfiles;
  }
  public universities with_class_richfiles(String class_richfiles) {
    this.class_richfiles = class_richfiles;
    return this;
  }
  private String class_scholar;
  public String get_class_scholar() {
    return class_scholar;
  }
  public void set_class_scholar(String class_scholar) {
    this.class_scholar = class_scholar;
  }
  public universities with_class_scholar(String class_scholar) {
    this.class_scholar = class_scholar;
    return this;
  }
  private String site;
  public String get_site() {
    return site;
  }
  public void set_site(String site) {
    this.site = site;
  }
  public universities with_site(String site) {
    this.site = site;
    return this;
  }
  public boolean equals(Object o) {
    if (this == o) {
      return true;
    }
    if (!(o instanceof universities)) {
      return false;
    }
    universities that = (universities) o;
    boolean equal = true;
    equal = equal && (this.chave == null ? that.chave == null :
this.chave.equals(that.chave));
    equal = equal && (this.nome == null ? that.nome == null :
this.nome.equals(that.nome));
    equal = equal && (this.country == null ? that.country == null :
this.country.equals(that.country));
    equal = equal && (this.class_size == null ? that.class_size ==
null : this.class_size.equals(that.class_size));
    equal = equal && (this.class_visibility == null ?
that.class_visibility == null :
this.class_visibility.equals(that.class_visibility));
    equal = equal && (this.class_richfiles == null ?
that.class_richfiles == null :
this.class_richfiles.equals(that.class_richfiles));
    equal = equal && (this.class_scholar == null ? that.class_scholar
== null : this.class_scholar.equals(that.class_scholar));
    equal = equal && (this.site == null ? that.site == null :
this.site.equals(that.site));
    return equal;
  }
  public void readFields(ResultSet __dbResults) throws SQLException {
    this.__cur_result_set = __dbResults;
    this.chave = JdbcWritableBridge.readInteger(1, __dbResults);
    this.nome = JdbcWritableBridge.readString(2, __dbResults);
    this.country = JdbcWritableBridge.readString(3, __dbResults);
    this.class_size = JdbcWritableBridge.readString(4, __dbResults);
    this.class_visibility = JdbcWritableBridge.readString(5, __dbResults);
    this.class_richfiles = JdbcWritableBridge.readString(6, __dbResults);
    this.class_scholar = JdbcWritableBridge.readString(7, __dbResults);
    this.site = JdbcWritableBridge.readString(8, __dbResults);
  }
  public void loadLargeObjects(LargeObjectLoader __loader)
      throws SQLException, IOException, InterruptedException {
  }
  public void write(PreparedStatement __dbStmt) throws SQLException {
    write(__dbStmt, 0);
  }

  public int write(PreparedStatement __dbStmt, int __off) throws SQLException {
    JdbcWritableBridge.writeInteger(chave, 1 + __off, 4, __dbStmt);
    JdbcWritableBridge.writeString(nome, 2 + __off, 12, __dbStmt);
    JdbcWritableBridge.writeString(country, 3 + __off, 12, __dbStmt);
    JdbcWritableBridge.writeString(class_size, 4 + __off, 12, __dbStmt);
    JdbcWritableBridge.writeString(class_visibility, 5 + __off, 12, __dbStmt);
    JdbcWritableBridge.writeString(class_richfiles, 6 + __off, 12, __dbStmt);
    JdbcWritableBridge.writeString(class_scholar, 7 + __off, 12, __dbStmt);
    JdbcWritableBridge.writeString(site, 8 + __off, 12, __dbStmt);
    return 8;
  }
  public void readFields(DataInput __dataIn) throws IOException {
    if (__dataIn.readBoolean()) {
        this.chave = null;
    } else {
    this.chave = Integer.valueOf(__dataIn.readInt());
    }
    if (__dataIn.readBoolean()) {
        this.nome = null;
    } else {
    this.nome = Text.readString(__dataIn);
    }
    if (__dataIn.readBoolean()) {
        this.country = null;
    } else {
    this.country = Text.readString(__dataIn);
    }
    if (__dataIn.readBoolean()) {
        this.class_size = null;
    } else {
    this.class_size = Text.readString(__dataIn);
    }
    if (__dataIn.readBoolean()) {
        this.class_visibility = null;
    } else {
    this.class_visibility = Text.readString(__dataIn);
    }
    if (__dataIn.readBoolean()) {
        this.class_richfiles = null;
    } else {
    this.class_richfiles = Text.readString(__dataIn);
    }
    if (__dataIn.readBoolean()) {
        this.class_scholar = null;
    } else {
    this.class_scholar = Text.readString(__dataIn);
    }
    if (__dataIn.readBoolean()) {
        this.site = null;
    } else {
    this.site = Text.readString(__dataIn);
    }
  }
  public void write(DataOutput __dataOut) throws IOException {
    if (null == this.chave) {
        __dataOut.writeBoolean(true);
    } else {
        __dataOut.writeBoolean(false);
    __dataOut.writeInt(this.chave);
    }
    if (null == this.nome) {
        __dataOut.writeBoolean(true);
    } else {
        __dataOut.writeBoolean(false);
    Text.writeString(__dataOut, nome);
    }
    if (null == this.country) {
        __dataOut.writeBoolean(true);
    } else {
        __dataOut.writeBoolean(false);
    Text.writeString(__dataOut, country);
    }
    if (null == this.class_size) {
        __dataOut.writeBoolean(true);
    } else {
        __dataOut.writeBoolean(false);
    Text.writeString(__dataOut, class_size);
    }
    if (null == this.class_visibility) {
        __dataOut.writeBoolean(true);
    } else {
        __dataOut.writeBoolean(false);
    Text.writeString(__dataOut, class_visibility);
    }
    if (null == this.class_richfiles) {
        __dataOut.writeBoolean(true);
    } else {
        __dataOut.writeBoolean(false);
    Text.writeString(__dataOut, class_richfiles);
    }
    if (null == this.class_scholar) {
        __dataOut.writeBoolean(true);
    } else {
        __dataOut.writeBoolean(false);
    Text.writeString(__dataOut, class_scholar);
    }
    if (null == this.site) {
        __dataOut.writeBoolean(true);
    } else {
        __dataOut.writeBoolean(false);
    Text.writeString(__dataOut, site);
    }
  }
  private final DelimiterSet __outputDelimiters = new
DelimiterSet((char) 44, (char) 10, (char) 0, (char) 0, false);
  public String toString() {
    return toString(__outputDelimiters, true);
  }
  public String toString(DelimiterSet delimiters) {
    return toString(delimiters, true);
  }
  public String toString(boolean useRecordDelim) {
    return toString(__outputDelimiters, useRecordDelim);
  }
  public String toString(DelimiterSet delimiters, boolean useRecordDelim) {
    StringBuilder __sb = new StringBuilder();
    char fieldDelim = delimiters.getFieldsTerminatedBy();
    __sb.append(FieldFormatter.escapeAndEnclose(chave==null?"null":""
+ chave, delimiters));
    __sb.append(fieldDelim);
    __sb.append(FieldFormatter.escapeAndEnclose(nome==null?"null":nome,
delimiters));
    __sb.append(fieldDelim);
    __sb.append(FieldFormatter.escapeAndEnclose(country==null?"null":country,
delimiters));
    __sb.append(fieldDelim);
    
__sb.append(FieldFormatter.escapeAndEnclose(class_size==null?"null":class_size,
delimiters));
    __sb.append(fieldDelim);
    
__sb.append(FieldFormatter.escapeAndEnclose(class_visibility==null?"null":class_visibility,
delimiters));
    __sb.append(fieldDelim);
    
__sb.append(FieldFormatter.escapeAndEnclose(class_richfiles==null?"null":class_richfiles,
delimiters));
    __sb.append(fieldDelim);
    
__sb.append(FieldFormatter.escapeAndEnclose(class_scholar==null?"null":class_scholar,
delimiters));
    __sb.append(fieldDelim);
    __sb.append(FieldFormatter.escapeAndEnclose(site==null?"null":site,
delimiters));
    if (useRecordDelim) {
      __sb.append(delimiters.getLinesTerminatedBy());
    }
    return __sb.toString();
  }
  private final DelimiterSet __inputDelimiters = new
DelimiterSet((char) 44, (char) 10, (char) 0, (char) 0, false);
  private RecordParser __parser;
  public void parse(Text __record) throws RecordParser.ParseError {
    if (null == this.__parser) {
      this.__parser = new RecordParser(__inputDelimiters);
    }
    List<String> __fields = this.__parser.parseRecord(__record);
    __loadFromFields(__fields);
  }

  public void parse(CharSequence __record) throws RecordParser.ParseError {
    if (null == this.__parser) {
      this.__parser = new RecordParser(__inputDelimiters);
    }
    List<String> __fields = this.__parser.parseRecord(__record);
    __loadFromFields(__fields);
  }

  public void parse(byte [] __record) throws RecordParser.ParseError {
    if (null == this.__parser) {
      this.__parser = new RecordParser(__inputDelimiters);
    }
    List<String> __fields = this.__parser.parseRecord(__record);
    __loadFromFields(__fields);
  }

  public void parse(char [] __record) throws RecordParser.ParseError {
    if (null == this.__parser) {
      this.__parser = new RecordParser(__inputDelimiters);
    }
    List<String> __fields = this.__parser.parseRecord(__record);
    __loadFromFields(__fields);
  }

  public void parse(ByteBuffer __record) throws RecordParser.ParseError {
    if (null == this.__parser) {
      this.__parser = new RecordParser(__inputDelimiters);
    }
    List<String> __fields = this.__parser.parseRecord(__record);
    __loadFromFields(__fields);
  }

  public void parse(CharBuffer __record) throws RecordParser.ParseError {
    if (null == this.__parser) {
      this.__parser = new RecordParser(__inputDelimiters);
    }
    List<String> __fields = this.__parser.parseRecord(__record);
    __loadFromFields(__fields);
  }

  private void __loadFromFields(List<String> fields) {
    Iterator<String> __it = fields.listIterator();
    String __cur_str;
    __cur_str = __it.next();
    if (__cur_str.equals("null") || __cur_str.length() == 0) {
this.chave = null; } else {
      this.chave = Integer.valueOf(__cur_str);
    }

    __cur_str = __it.next();
    if (__cur_str.equals("null")) { this.nome = null; } else {
      this.nome = __cur_str;
    }

    __cur_str = __it.next();
    if (__cur_str.equals("null")) { this.country = null; } else {
      this.country = __cur_str;
    }

    __cur_str = __it.next();
    if (__cur_str.equals("null")) { this.class_size = null; } else {
      this.class_size = __cur_str;
    }

    __cur_str = __it.next();
    if (__cur_str.equals("null")) { this.class_visibility = null; } else {
      this.class_visibility = __cur_str;
    }

    __cur_str = __it.next();
    if (__cur_str.equals("null")) { this.class_richfiles = null; } else {
      this.class_richfiles = __cur_str;
    }

    __cur_str = __it.next();
    if (__cur_str.equals("null")) { this.class_scholar = null; } else {
      this.class_scholar = __cur_str;
    }

    __cur_str = __it.next();
    if (__cur_str.equals("null")) { this.site = null; } else {
      this.site = __cur_str;
    }

  }

  public Object clone() throws CloneNotSupportedException {
    universities o = (universities) super.clone();
    return o;
  }

  public Map<String, Object> getFieldMap() {
    Map<String, Object> __sqoop$field_map = new TreeMap<String, Object>();
    __sqoop$field_map.put("chave", this.chave);
    __sqoop$field_map.put("nome", this.nome);
    __sqoop$field_map.put("country", this.country);
    __sqoop$field_map.put("class_size", this.class_size);
    __sqoop$field_map.put("class_visibility", this.class_visibility);
    __sqoop$field_map.put("class_richfiles", this.class_richfiles);
    __sqoop$field_map.put("class_scholar", this.class_scholar);
    __sqoop$field_map.put("site", this.site);
    return __sqoop$field_map;
  }

  public void setField(String __fieldName, Object __fieldVal) {
    if ("chave".equals(__fieldName)) {
      this.chave = (Integer) __fieldVal;
    }
    else    if ("nome".equals(__fieldName)) {
      this.nome = (String) __fieldVal;
    }
    else    if ("country".equals(__fieldName)) {
      this.country = (String) __fieldVal;
    }
    else    if ("class_size".equals(__fieldName)) {
      this.class_size = (String) __fieldVal;
    }
    else    if ("class_visibility".equals(__fieldName)) {
      this.class_visibility = (String) __fieldVal;
    }
    else    if ("class_richfiles".equals(__fieldName)) {
      this.class_richfiles = (String) __fieldVal;
    }
    else    if ("class_scholar".equals(__fieldName)) {
      this.class_scholar = (String) __fieldVal;
    }
    else    if ("site".equals(__fieldName)) {
      this.site = (String) __fieldVal;
    }
    else {
      throw new RuntimeException("No such field: " + __fieldName);
    }
  }
}

Reply via email to