Updated Branches: refs/heads/trunk a220ae469 -> 20b16fd4b
SQOOP-862: Hbase import fails if there is a row where all columns are null (David Robson via Jarek Jarcec Cecho) Project: http://git-wip-us.apache.org/repos/asf/sqoop/repo Commit: http://git-wip-us.apache.org/repos/asf/sqoop/commit/20b16fd4 Tree: http://git-wip-us.apache.org/repos/asf/sqoop/tree/20b16fd4 Diff: http://git-wip-us.apache.org/repos/asf/sqoop/diff/20b16fd4 Branch: refs/heads/trunk Commit: 20b16fd4bd0b045a11006d81ddd1bcc8bccd01db Parents: a220ae4 Author: Jarek Jarcec Cecho <[email protected]> Authored: Thu Feb 7 18:37:50 2013 -0800 Committer: Jarek Jarcec Cecho <[email protected]> Committed: Thu Feb 7 18:37:50 2013 -0800 ---------------------------------------------------------------------- src/docs/user/hbase.txt | 3 +- .../org/apache/sqoop/hbase/HBasePutProcessor.java | 15 +++++++++++- .../com/cloudera/sqoop/hbase/HBaseImportTest.java | 15 +++++++++++ .../com/cloudera/sqoop/hbase/HBaseTestCase.java | 19 +++++++++++++++ 4 files changed, 50 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/sqoop/blob/20b16fd4/src/docs/user/hbase.txt ---------------------------------------------------------------------- diff --git a/src/docs/user/hbase.txt b/src/docs/user/hbase.txt index 7404929..24c8df8 100644 --- a/src/docs/user/hbase.txt +++ b/src/docs/user/hbase.txt @@ -48,6 +48,7 @@ using the default parameters from your HBase configuration. Sqoop currently serializes all values to HBase by converting each field to its string representation (as if you were importing to HDFS in text mode), and then inserts the UTF-8 bytes of this string in the target -cell. +cell. Sqoop will skip all rows containing null values in all columns +except the row key column. http://git-wip-us.apache.org/repos/asf/sqoop/blob/20b16fd4/src/java/org/apache/sqoop/hbase/HBasePutProcessor.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/sqoop/hbase/HBasePutProcessor.java b/src/java/org/apache/sqoop/hbase/HBasePutProcessor.java index cca641f..ffa5f63 100644 --- a/src/java/org/apache/sqoop/hbase/HBasePutProcessor.java +++ b/src/java/org/apache/sqoop/hbase/HBasePutProcessor.java @@ -23,10 +23,13 @@ import java.io.IOException; import java.util.List; import java.util.Map; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.util.ReflectionUtils; import org.apache.sqoop.mapreduce.ImportJobBase; @@ -41,6 +44,9 @@ import com.cloudera.sqoop.lib.ProcessingException; public class HBasePutProcessor implements Closeable, Configurable, FieldMapProcessor { + public static final Log LOG = LogFactory.getLog( + HBasePutProcessor.class.getName()); + /** Configuration key specifying the table to insert into. */ public static final String TABLE_NAME_KEY = "sqoop.hbase.insert.table"; @@ -124,7 +130,14 @@ public class HBasePutProcessor implements Closeable, Configurable, List<Put> putList = putTransformer.getPutCommand(fields); if (null != putList) { for (Put put : putList) { - this.table.put(put); + if (put!=null) { + if (put.isEmpty()) { + LOG.warn("Could not insert row with no columns " + + "for row-key column: " + Bytes.toString(put.getRow())); + } else { + this.table.put(put); + } + } } } } http://git-wip-us.apache.org/repos/asf/sqoop/blob/20b16fd4/src/test/com/cloudera/sqoop/hbase/HBaseImportTest.java ---------------------------------------------------------------------- diff --git a/src/test/com/cloudera/sqoop/hbase/HBaseImportTest.java b/src/test/com/cloudera/sqoop/hbase/HBaseImportTest.java index bf24608..e1f9696 100644 --- a/src/test/com/cloudera/sqoop/hbase/HBaseImportTest.java +++ b/src/test/com/cloudera/sqoop/hbase/HBaseImportTest.java @@ -112,4 +112,19 @@ public class HBaseImportTest extends HBaseTestCase { fail("should have gotten exception"); } + + @Test + public void testNullRow() throws IOException { + String [] argv = getArgv(true, "nullRowT", "nullRowF", true, null); + String [] types = { "INT", "INT" }; + String [] vals = { "0", "null" }; + createTableWithColTypes(types, vals); + runImport(argv); + + // This cell should not be placed in the results.. + verifyHBaseCell("nullRowT", "0", "nullRowF", getColName(1), null); + + int rowCount = countHBaseTable("nullRowT", "nullRowF"); + assertEquals(0, rowCount); + } } http://git-wip-us.apache.org/repos/asf/sqoop/blob/20b16fd4/src/test/com/cloudera/sqoop/hbase/HBaseTestCase.java ---------------------------------------------------------------------- diff --git a/src/test/com/cloudera/sqoop/hbase/HBaseTestCase.java b/src/test/com/cloudera/sqoop/hbase/HBaseTestCase.java index 65ff87b..37dc004 100644 --- a/src/test/com/cloudera/sqoop/hbase/HBaseTestCase.java +++ b/src/test/com/cloudera/sqoop/hbase/HBaseTestCase.java @@ -36,6 +36,7 @@ import org.apache.hadoop.hbase.zookeeper.MiniZooKeeperCluster; import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.util.StringUtils; @@ -234,4 +235,22 @@ public abstract class HBaseTestCase extends ImportJobTestCase { } throw new IllegalStateException("Failed to create directory"); } + + protected int countHBaseTable(String tableName, String colFamily) + throws IOException { + int count = 0; + HTable table = new HTable(new Configuration( + hbaseTestUtil.getConfiguration()), Bytes.toBytes(tableName)); + try { + ResultScanner scanner = table.getScanner(Bytes.toBytes(colFamily)); + for(Result result = scanner.next(); + result != null; + result = scanner.next()) { + count++; + } + } finally { + table.close(); + } + return count; + } }
