Hey all,
I imported a table from PostgreSQL using Sqoop, generating a
SequenceFile. I'm now trying to read this file in pig using
SequenceFileLoader. The load function doesn't seem to recognize the
columns that were of type text in PostgreSQL. When I dump the
relation, I get the key (integer) but none of the other fields.
Here's the pig script:
REGISTER /usr/lib/pig/contrib/piggybank/java/piggybank.jar; register
/pesquisa.jar; register /usr/lib/sqoop/sqoop-1.3.0-cdh3u3.jar;
DEFINE SequenceFileLoader org.apache.pig.piggybank.storage.SequenceFileLoader();
univ = LOAD 'universities' USING SequenceFileLoader AS (a:int,
b:chararray, c:chararray, d:chararray, e:chararray, f:chararray,
g:chararray, h:chararray);
DUMP univ;
The SequenceFile class that Sqoop generates seems to be treating the
text fields as string (class copied below).
I'm new to Pig (love it) and Hadoop. I just want a simple way to be
able to process my data (mostly text documents) in postgresql with
Pig. I don't want to load directly from postgresql for performance
reasons. I tried importing to HBase but had too many problems. I'm now
trying with SequenceFiles from Sqoop. I'd be happy to hear suggestions
of better approaches.
Thanks!
Luiz
--
// ORM class for universities
// WARNING: This class is AUTO-GENERATED. Modify at your own risk.
package pesquisa;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.lib.db.DBWritable;
import com.cloudera.sqoop.lib.JdbcWritableBridge;
import com.cloudera.sqoop.lib.DelimiterSet;
import com.cloudera.sqoop.lib.FieldFormatter;
import com.cloudera.sqoop.lib.RecordParser;
import com.cloudera.sqoop.lib.BooleanParser;
import com.cloudera.sqoop.lib.BlobRef;
import com.cloudera.sqoop.lib.ClobRef;
import com.cloudera.sqoop.lib.LargeObjectLoader;
import com.cloudera.sqoop.lib.SqoopRecord;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.sql.Date;
import java.sql.Time;
import java.sql.Timestamp;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
public class universities extends SqoopRecord implements DBWritable, Writable {
private final int PROTOCOL_VERSION = 3;
public int getClassFormatVersion() { return PROTOCOL_VERSION; }
protected ResultSet __cur_result_set;
private Integer chave;
public Integer get_chave() {
return chave;
}
public void set_chave(Integer chave) {
this.chave = chave;
}
public universities with_chave(Integer chave) {
this.chave = chave;
return this;
}
private String nome;
public String get_nome() {
return nome;
}
public void set_nome(String nome) {
this.nome = nome;
}
public universities with_nome(String nome) {
this.nome = nome;
return this;
}
private String country;
public String get_country() {
return country;
}
public void set_country(String country) {
this.country = country;
}
public universities with_country(String country) {
this.country = country;
return this;
}
private String class_size;
public String get_class_size() {
return class_size;
}
public void set_class_size(String class_size) {
this.class_size = class_size;
}
public universities with_class_size(String class_size) {
this.class_size = class_size;
return this;
}
private String class_visibility;
public String get_class_visibility() {
return class_visibility;
}
public void set_class_visibility(String class_visibility) {
this.class_visibility = class_visibility;
}
public universities with_class_visibility(String class_visibility) {
this.class_visibility = class_visibility;
return this;
}
private String class_richfiles;
public String get_class_richfiles() {
return class_richfiles;
}
public void set_class_richfiles(String class_richfiles) {
this.class_richfiles = class_richfiles;
}
public universities with_class_richfiles(String class_richfiles) {
this.class_richfiles = class_richfiles;
return this;
}
private String class_scholar;
public String get_class_scholar() {
return class_scholar;
}
public void set_class_scholar(String class_scholar) {
this.class_scholar = class_scholar;
}
public universities with_class_scholar(String class_scholar) {
this.class_scholar = class_scholar;
return this;
}
private String site;
public String get_site() {
return site;
}
public void set_site(String site) {
this.site = site;
}
public universities with_site(String site) {
this.site = site;
return this;
}
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (!(o instanceof universities)) {
return false;
}
universities that = (universities) o;
boolean equal = true;
equal = equal && (this.chave == null ? that.chave == null :
this.chave.equals(that.chave));
equal = equal && (this.nome == null ? that.nome == null :
this.nome.equals(that.nome));
equal = equal && (this.country == null ? that.country == null :
this.country.equals(that.country));
equal = equal && (this.class_size == null ? that.class_size ==
null : this.class_size.equals(that.class_size));
equal = equal && (this.class_visibility == null ?
that.class_visibility == null :
this.class_visibility.equals(that.class_visibility));
equal = equal && (this.class_richfiles == null ?
that.class_richfiles == null :
this.class_richfiles.equals(that.class_richfiles));
equal = equal && (this.class_scholar == null ? that.class_scholar
== null : this.class_scholar.equals(that.class_scholar));
equal = equal && (this.site == null ? that.site == null :
this.site.equals(that.site));
return equal;
}
public void readFields(ResultSet __dbResults) throws SQLException {
this.__cur_result_set = __dbResults;
this.chave = JdbcWritableBridge.readInteger(1, __dbResults);
this.nome = JdbcWritableBridge.readString(2, __dbResults);
this.country = JdbcWritableBridge.readString(3, __dbResults);
this.class_size = JdbcWritableBridge.readString(4, __dbResults);
this.class_visibility = JdbcWritableBridge.readString(5, __dbResults);
this.class_richfiles = JdbcWritableBridge.readString(6, __dbResults);
this.class_scholar = JdbcWritableBridge.readString(7, __dbResults);
this.site = JdbcWritableBridge.readString(8, __dbResults);
}
public void loadLargeObjects(LargeObjectLoader __loader)
throws SQLException, IOException, InterruptedException {
}
public void write(PreparedStatement __dbStmt) throws SQLException {
write(__dbStmt, 0);
}
public int write(PreparedStatement __dbStmt, int __off) throws SQLException {
JdbcWritableBridge.writeInteger(chave, 1 + __off, 4, __dbStmt);
JdbcWritableBridge.writeString(nome, 2 + __off, 12, __dbStmt);
JdbcWritableBridge.writeString(country, 3 + __off, 12, __dbStmt);
JdbcWritableBridge.writeString(class_size, 4 + __off, 12, __dbStmt);
JdbcWritableBridge.writeString(class_visibility, 5 + __off, 12, __dbStmt);
JdbcWritableBridge.writeString(class_richfiles, 6 + __off, 12, __dbStmt);
JdbcWritableBridge.writeString(class_scholar, 7 + __off, 12, __dbStmt);
JdbcWritableBridge.writeString(site, 8 + __off, 12, __dbStmt);
return 8;
}
public void readFields(DataInput __dataIn) throws IOException {
if (__dataIn.readBoolean()) {
this.chave = null;
} else {
this.chave = Integer.valueOf(__dataIn.readInt());
}
if (__dataIn.readBoolean()) {
this.nome = null;
} else {
this.nome = Text.readString(__dataIn);
}
if (__dataIn.readBoolean()) {
this.country = null;
} else {
this.country = Text.readString(__dataIn);
}
if (__dataIn.readBoolean()) {
this.class_size = null;
} else {
this.class_size = Text.readString(__dataIn);
}
if (__dataIn.readBoolean()) {
this.class_visibility = null;
} else {
this.class_visibility = Text.readString(__dataIn);
}
if (__dataIn.readBoolean()) {
this.class_richfiles = null;
} else {
this.class_richfiles = Text.readString(__dataIn);
}
if (__dataIn.readBoolean()) {
this.class_scholar = null;
} else {
this.class_scholar = Text.readString(__dataIn);
}
if (__dataIn.readBoolean()) {
this.site = null;
} else {
this.site = Text.readString(__dataIn);
}
}
public void write(DataOutput __dataOut) throws IOException {
if (null == this.chave) {
__dataOut.writeBoolean(true);
} else {
__dataOut.writeBoolean(false);
__dataOut.writeInt(this.chave);
}
if (null == this.nome) {
__dataOut.writeBoolean(true);
} else {
__dataOut.writeBoolean(false);
Text.writeString(__dataOut, nome);
}
if (null == this.country) {
__dataOut.writeBoolean(true);
} else {
__dataOut.writeBoolean(false);
Text.writeString(__dataOut, country);
}
if (null == this.class_size) {
__dataOut.writeBoolean(true);
} else {
__dataOut.writeBoolean(false);
Text.writeString(__dataOut, class_size);
}
if (null == this.class_visibility) {
__dataOut.writeBoolean(true);
} else {
__dataOut.writeBoolean(false);
Text.writeString(__dataOut, class_visibility);
}
if (null == this.class_richfiles) {
__dataOut.writeBoolean(true);
} else {
__dataOut.writeBoolean(false);
Text.writeString(__dataOut, class_richfiles);
}
if (null == this.class_scholar) {
__dataOut.writeBoolean(true);
} else {
__dataOut.writeBoolean(false);
Text.writeString(__dataOut, class_scholar);
}
if (null == this.site) {
__dataOut.writeBoolean(true);
} else {
__dataOut.writeBoolean(false);
Text.writeString(__dataOut, site);
}
}
private final DelimiterSet __outputDelimiters = new
DelimiterSet((char) 44, (char) 10, (char) 0, (char) 0, false);
public String toString() {
return toString(__outputDelimiters, true);
}
public String toString(DelimiterSet delimiters) {
return toString(delimiters, true);
}
public String toString(boolean useRecordDelim) {
return toString(__outputDelimiters, useRecordDelim);
}
public String toString(DelimiterSet delimiters, boolean useRecordDelim) {
StringBuilder __sb = new StringBuilder();
char fieldDelim = delimiters.getFieldsTerminatedBy();
__sb.append(FieldFormatter.escapeAndEnclose(chave==null?"null":""
+ chave, delimiters));
__sb.append(fieldDelim);
__sb.append(FieldFormatter.escapeAndEnclose(nome==null?"null":nome,
delimiters));
__sb.append(fieldDelim);
__sb.append(FieldFormatter.escapeAndEnclose(country==null?"null":country,
delimiters));
__sb.append(fieldDelim);
__sb.append(FieldFormatter.escapeAndEnclose(class_size==null?"null":class_size,
delimiters));
__sb.append(fieldDelim);
__sb.append(FieldFormatter.escapeAndEnclose(class_visibility==null?"null":class_visibility,
delimiters));
__sb.append(fieldDelim);
__sb.append(FieldFormatter.escapeAndEnclose(class_richfiles==null?"null":class_richfiles,
delimiters));
__sb.append(fieldDelim);
__sb.append(FieldFormatter.escapeAndEnclose(class_scholar==null?"null":class_scholar,
delimiters));
__sb.append(fieldDelim);
__sb.append(FieldFormatter.escapeAndEnclose(site==null?"null":site,
delimiters));
if (useRecordDelim) {
__sb.append(delimiters.getLinesTerminatedBy());
}
return __sb.toString();
}
private final DelimiterSet __inputDelimiters = new
DelimiterSet((char) 44, (char) 10, (char) 0, (char) 0, false);
private RecordParser __parser;
public void parse(Text __record) throws RecordParser.ParseError {
if (null == this.__parser) {
this.__parser = new RecordParser(__inputDelimiters);
}
List<String> __fields = this.__parser.parseRecord(__record);
__loadFromFields(__fields);
}
public void parse(CharSequence __record) throws RecordParser.ParseError {
if (null == this.__parser) {
this.__parser = new RecordParser(__inputDelimiters);
}
List<String> __fields = this.__parser.parseRecord(__record);
__loadFromFields(__fields);
}
public void parse(byte [] __record) throws RecordParser.ParseError {
if (null == this.__parser) {
this.__parser = new RecordParser(__inputDelimiters);
}
List<String> __fields = this.__parser.parseRecord(__record);
__loadFromFields(__fields);
}
public void parse(char [] __record) throws RecordParser.ParseError {
if (null == this.__parser) {
this.__parser = new RecordParser(__inputDelimiters);
}
List<String> __fields = this.__parser.parseRecord(__record);
__loadFromFields(__fields);
}
public void parse(ByteBuffer __record) throws RecordParser.ParseError {
if (null == this.__parser) {
this.__parser = new RecordParser(__inputDelimiters);
}
List<String> __fields = this.__parser.parseRecord(__record);
__loadFromFields(__fields);
}
public void parse(CharBuffer __record) throws RecordParser.ParseError {
if (null == this.__parser) {
this.__parser = new RecordParser(__inputDelimiters);
}
List<String> __fields = this.__parser.parseRecord(__record);
__loadFromFields(__fields);
}
private void __loadFromFields(List<String> fields) {
Iterator<String> __it = fields.listIterator();
String __cur_str;
__cur_str = __it.next();
if (__cur_str.equals("null") || __cur_str.length() == 0) {
this.chave = null; } else {
this.chave = Integer.valueOf(__cur_str);
}
__cur_str = __it.next();
if (__cur_str.equals("null")) { this.nome = null; } else {
this.nome = __cur_str;
}
__cur_str = __it.next();
if (__cur_str.equals("null")) { this.country = null; } else {
this.country = __cur_str;
}
__cur_str = __it.next();
if (__cur_str.equals("null")) { this.class_size = null; } else {
this.class_size = __cur_str;
}
__cur_str = __it.next();
if (__cur_str.equals("null")) { this.class_visibility = null; } else {
this.class_visibility = __cur_str;
}
__cur_str = __it.next();
if (__cur_str.equals("null")) { this.class_richfiles = null; } else {
this.class_richfiles = __cur_str;
}
__cur_str = __it.next();
if (__cur_str.equals("null")) { this.class_scholar = null; } else {
this.class_scholar = __cur_str;
}
__cur_str = __it.next();
if (__cur_str.equals("null")) { this.site = null; } else {
this.site = __cur_str;
}
}
public Object clone() throws CloneNotSupportedException {
universities o = (universities) super.clone();
return o;
}
public Map<String, Object> getFieldMap() {
Map<String, Object> __sqoop$field_map = new TreeMap<String, Object>();
__sqoop$field_map.put("chave", this.chave);
__sqoop$field_map.put("nome", this.nome);
__sqoop$field_map.put("country", this.country);
__sqoop$field_map.put("class_size", this.class_size);
__sqoop$field_map.put("class_visibility", this.class_visibility);
__sqoop$field_map.put("class_richfiles", this.class_richfiles);
__sqoop$field_map.put("class_scholar", this.class_scholar);
__sqoop$field_map.put("site", this.site);
return __sqoop$field_map;
}
public void setField(String __fieldName, Object __fieldVal) {
if ("chave".equals(__fieldName)) {
this.chave = (Integer) __fieldVal;
}
else if ("nome".equals(__fieldName)) {
this.nome = (String) __fieldVal;
}
else if ("country".equals(__fieldName)) {
this.country = (String) __fieldVal;
}
else if ("class_size".equals(__fieldName)) {
this.class_size = (String) __fieldVal;
}
else if ("class_visibility".equals(__fieldName)) {
this.class_visibility = (String) __fieldVal;
}
else if ("class_richfiles".equals(__fieldName)) {
this.class_richfiles = (String) __fieldVal;
}
else if ("class_scholar".equals(__fieldName)) {
this.class_scholar = (String) __fieldVal;
}
else if ("site".equals(__fieldName)) {
this.site = (String) __fieldVal;
}
else {
throw new RuntimeException("No such field: " + __fieldName);
}
}
}