[hbase] 01/02: HBASE-27848:Should fast-fail if unmatched column family exists when using ImportTsv (#5225)

2023-05-21 Thread zhangduo
This is an automated email from the ASF dual-hosted git repository.

zhangduo pushed a commit to branch branch-2.5
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit 91dd29e869cd9e35c264ac90b1246d95f8f64cd8
Author: guluo 
AuthorDate: Sun May 21 19:47:27 2023 +0800

HBASE-27848:Should fast-fail if unmatched column family exists when using 
ImportTsv (#5225)

Signed-off-by: Duo Zhang 
(cherry picked from commit ce29f97a809a849bf067fa3571fd775fb596fc10)
---
 .../apache/hadoop/hbase/mapreduce/ImportTsv.java   | 17 +
 .../hadoop/hbase/mapreduce/TestImportTsv.java  | 22 ++
 2 files changed, 39 insertions(+)

diff --git 
a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/ImportTsv.java
 
b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/ImportTsv.java
index 665ff93a977..d7833fabeaf 100644
--- 
a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/ImportTsv.java
+++ 
b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/ImportTsv.java
@@ -45,6 +45,7 @@ import org.apache.hadoop.hbase.client.RegionLocator;
 import org.apache.hadoop.hbase.client.Table;
 import org.apache.hadoop.hbase.client.TableDescriptor;
 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.regionserver.NoSuchColumnFamilyException;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
 import org.apache.hadoop.hbase.util.Pair;
@@ -554,6 +555,22 @@ public class ImportTsv extends Configured implements Tool {
 LOG.error(errorMsg);
 throw new TableNotFoundException(errorMsg);
   }
+  try (Table table = connection.getTable(tableName)) {
+ArrayList unmatchedFamilies = new ArrayList<>();
+Set cfSet = getColumnFamilies(columns);
+TableDescriptor tDesc = table.getDescriptor();
+for (String cf : cfSet) {
+  if (!tDesc.hasColumnFamily(Bytes.toBytes(cf))) {
+unmatchedFamilies.add(cf);
+  }
+}
+if (unmatchedFamilies.size() > 0) {
+  String noSuchColumnFamiliesMsg =
+format("Column families: %s do not exist.", unmatchedFamilies);
+  LOG.error(noSuchColumnFamiliesMsg);
+  throw new NoSuchColumnFamilyException(noSuchColumnFamiliesMsg);
+}
+  }
   if (mapperClass.equals(TsvImporterTextMapper.class)) {
 usage(TsvImporterTextMapper.class.toString()
   + " should not be used for non bulkloading case. use "
diff --git 
a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsv.java
 
b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsv.java
index 737ae178b63..8a30e404cff 100644
--- 
a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsv.java
+++ 
b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsv.java
@@ -51,6 +51,7 @@ import org.apache.hadoop.hbase.client.Table;
 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
 import org.apache.hadoop.hbase.io.hfile.HFile;
 import org.apache.hadoop.hbase.io.hfile.HFileScanner;
+import org.apache.hadoop.hbase.regionserver.NoSuchColumnFamilyException;
 import org.apache.hadoop.hbase.testclassification.LargeTests;
 import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
 import org.apache.hadoop.hbase.util.Bytes;
@@ -241,6 +242,27 @@ public class TestImportTsv implements Configurable {
   }, args));
   }
 
+  @Test
+  public void testMRNoMatchedColumnFamily() throws Exception {
+util.createTable(tn, FAMILY);
+
+String[] args = new String[] {
+  "-D" + ImportTsv.COLUMNS_CONF_KEY
++ "=HBASE_ROW_KEY,FAM:A,FAM01_ERROR:A,FAM01_ERROR:B,FAM02_ERROR:C",
+  tn.getNameAsString(), "/inputFile" };
+exception.expect(NoSuchColumnFamilyException.class);
+assertEquals("running test job configuration failed.", 0,
+  ToolRunner.run(new Configuration(util.getConfiguration()), new 
ImportTsv() {
+@Override
+public int run(String[] args) throws Exception {
+  createSubmittableJob(getConf(), args);
+  return 0;
+}
+  }, args));
+
+util.deleteTable(tn);
+  }
+
   @Test
   public void testMRWithoutAnExistingTable() throws Exception {
 String[] args = new String[] { tn.getNameAsString(), "/inputFile" };



[hbase] 01/02: HBASE-27848:Should fast-fail if unmatched column family exists when using ImportTsv (#5225)

2023-05-21 Thread zhangduo
This is an automated email from the ASF dual-hosted git repository.

zhangduo pushed a commit to branch branch-2.4
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit fd4a1c638378b92f281b8c8a1278bc3a9d54b19b
Author: guluo 
AuthorDate: Sun May 21 19:47:27 2023 +0800

HBASE-27848:Should fast-fail if unmatched column family exists when using 
ImportTsv (#5225)

Signed-off-by: Duo Zhang 
(cherry picked from commit ce29f97a809a849bf067fa3571fd775fb596fc10)
---
 .../apache/hadoop/hbase/mapreduce/ImportTsv.java   | 17 +
 .../hadoop/hbase/mapreduce/TestImportTsv.java  | 22 ++
 2 files changed, 39 insertions(+)

diff --git 
a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/ImportTsv.java
 
b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/ImportTsv.java
index 2c1a2ddae24..3318dd3cc11 100644
--- 
a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/ImportTsv.java
+++ 
b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/ImportTsv.java
@@ -45,6 +45,7 @@ import org.apache.hadoop.hbase.client.RegionLocator;
 import org.apache.hadoop.hbase.client.Table;
 import org.apache.hadoop.hbase.client.TableDescriptor;
 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.regionserver.NoSuchColumnFamilyException;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.Pair;
 import org.apache.hadoop.io.Text;
@@ -553,6 +554,22 @@ public class ImportTsv extends Configured implements Tool {
 LOG.error(errorMsg);
 throw new TableNotFoundException(errorMsg);
   }
+  try (Table table = connection.getTable(tableName)) {
+ArrayList unmatchedFamilies = new ArrayList<>();
+Set cfSet = getColumnFamilies(columns);
+TableDescriptor tDesc = table.getDescriptor();
+for (String cf : cfSet) {
+  if (!tDesc.hasColumnFamily(Bytes.toBytes(cf))) {
+unmatchedFamilies.add(cf);
+  }
+}
+if (unmatchedFamilies.size() > 0) {
+  String noSuchColumnFamiliesMsg =
+format("Column families: %s do not exist.", unmatchedFamilies);
+  LOG.error(noSuchColumnFamiliesMsg);
+  throw new NoSuchColumnFamilyException(noSuchColumnFamiliesMsg);
+}
+  }
   if (mapperClass.equals(TsvImporterTextMapper.class)) {
 usage(TsvImporterTextMapper.class.toString()
   + " should not be used for non bulkloading case. use "
diff --git 
a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsv.java
 
b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsv.java
index 43516f20e84..374f444a711 100644
--- 
a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsv.java
+++ 
b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsv.java
@@ -51,6 +51,7 @@ import org.apache.hadoop.hbase.client.Table;
 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
 import org.apache.hadoop.hbase.io.hfile.HFile;
 import org.apache.hadoop.hbase.io.hfile.HFileScanner;
+import org.apache.hadoop.hbase.regionserver.NoSuchColumnFamilyException;
 import org.apache.hadoop.hbase.testclassification.LargeTests;
 import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
 import org.apache.hadoop.hbase.util.Bytes;
@@ -241,6 +242,27 @@ public class TestImportTsv implements Configurable {
   }, args));
   }
 
+  @Test
+  public void testMRNoMatchedColumnFamily() throws Exception {
+util.createTable(tn, FAMILY);
+
+String[] args = new String[] {
+  "-D" + ImportTsv.COLUMNS_CONF_KEY
++ "=HBASE_ROW_KEY,FAM:A,FAM01_ERROR:A,FAM01_ERROR:B,FAM02_ERROR:C",
+  tn.getNameAsString(), "/inputFile" };
+exception.expect(NoSuchColumnFamilyException.class);
+assertEquals("running test job configuration failed.", 0,
+  ToolRunner.run(new Configuration(util.getConfiguration()), new 
ImportTsv() {
+@Override
+public int run(String[] args) throws Exception {
+  createSubmittableJob(getConf(), args);
+  return 0;
+}
+  }, args));
+
+util.deleteTable(tn);
+  }
+
   @Test
   public void testMRWithoutAnExistingTable() throws Exception {
 String[] args = new String[] { tn.getNameAsString(), "/inputFile" };