[
https://issues.apache.org/jira/browse/PHOENIX-6104?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17255528#comment-17255528
]
Viraj Jasani edited comment on PHOENIX-6104 at 12/28/20, 11:51 AM:
-------------------------------------------------------------------
[~stoty] I think now we have some better data, earlier when we got the
Exception stacktrace as I mentioned, that was generated because in test, we are
splitting same table over and over again in a loop without having to worry
about the fact that all split regions of table so far are online.
I tried to dig bit more and found that there is no issue at HBase or Phoenix
level w.r.t splitting small table e.g SYSTEM.CATALOG in our case. The only
change required is in splitting of table that we do in BaseTest.
I was able to get the successful split of SYSTEM.CATALOG to 6 regions with some
changes. Let me attach the small patch here:
{code:java}
diff --git
a/phoenix-core/src/it/java/org/apache/phoenix/end2end/TableSnapshotReadsMapReduceIT.java
b/phoenix-core/src/it/java/org/apache/phoenix/end2end/TableSnapshotReadsMapReduceIT.java
index e493b1770..39d70bbd4 100644
---
a/phoenix-core/src/it/java/org/apache/phoenix/end2end/TableSnapshotReadsMapReduceIT.java
+++
b/phoenix-core/src/it/java/org/apache/phoenix/end2end/TableSnapshotReadsMapReduceIT.java
@@ -405,35 +405,6 @@ public class TableSnapshotReadsMapReduceIT extends
BaseUniqueNamesOwnClusterIT {
}
}
- private void splitTableSync(Admin admin, TableName hbaseTableName,
- byte[] splitPoint, int expectedRegions) throws IOException,
- InterruptedException {
- admin.split(hbaseTableName, splitPoint);
- AssignmentManager assignmentManager =
- getUtility().getHBaseCluster().getMaster().getAssignmentManager();
- // wait for split daughter regions coming online for ~20s
- for (int i = 0; i < 20; i++) {
- Thread.sleep(1000);
- List<HRegion> regions = getUtility().getHBaseCluster()
- .getRegions(hbaseTableName);
- if (regions.size() >= expectedRegions) {
- boolean allRegionsOnline = true;
- for (HRegion region : regions) {
- if (!assignmentManager.getRegionStates()
- .isRegionOnline(region.getRegionInfo())) {
- allRegionsOnline = false;
- break;
- }
- }
- if (allRegionsOnline) {
- break;
- }
- }
- LOGGER.info("Sleeping for 1000 ms while waiting for {} to split and all
regions to come online",
- hbaseTableName.getNameAsString());
- }
- }
-
private void deleteSnapshotIfExists(String snapshotName) throws Exception {
try (Connection conn = DriverManager.getConnection(getUrl());
Admin admin =
conn.unwrap(PhoenixConnection.class).getQueryServices().getAdmin()) {
diff --git a/phoenix-core/src/test/java/org/apache/phoenix/query/BaseTest.java
b/phoenix-core/src/test/java/org/apache/phoenix/query/BaseTest.java
index faf2a18d8..62ee7538a 100644
--- a/phoenix-core/src/test/java/org/apache/phoenix/query/BaseTest.java
+++ b/phoenix-core/src/test/java/org/apache/phoenix/query/BaseTest.java
@@ -130,6 +130,7 @@ import org.apache.hadoop.hbase.client.TableDescriptor;
import org.apache.hadoop.hbase.ipc.PhoenixRpcSchedulerFactory;
import org.apache.hadoop.hbase.master.HMaster;
import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
+import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.regionserver.HRegionServer;
import org.apache.hadoop.hbase.regionserver.RSRpcServices;
import org.apache.hadoop.hbase.util.Bytes;
@@ -1824,20 +1825,45 @@ public abstract class BaseTest {
}
phxConn.close();
}
-
/**
* Synchronously split table at the given split point
*/
- protected static void splitRegion(TableName fullTableName, byte[]
splitPoint) throws SQLException, IOException, InterruptedException {
- Admin admin =
- driver.getConnectionQueryServices(getUrl(),
TestUtil.TEST_PROPERTIES).getAdmin();
- admin.split(fullTableName, splitPoint);
- // make sure the split finishes (there's no synchronous splitting
before HBase 2.x)
- admin.disableTable(fullTableName);
- admin.enableTable(fullTableName);
+ protected static void splitTableSync(Admin admin, TableName hbaseTableName,
+ byte[] splitPoint, int expectedRegions) throws IOException,
+ InterruptedException {
+ admin.split(hbaseTableName, splitPoint);
+ AssignmentManager assignmentManager =
+ getUtility().getHBaseCluster().getMaster().getAssignmentManager();
+ boolean splitSuccessful = false;
+ for (int i = 0; i < 3; i++) {
+ Thread.sleep(10000);
+ List<HRegion> regions = getUtility().getHBaseCluster()
+ .getRegions(hbaseTableName);
+ if (regions.size() >= expectedRegions) {
+ boolean allRegionsOnline = true;
+ for (HRegion region : regions) {
+ if (!assignmentManager.getRegionStates()
+ .isRegionOnline(region.getRegionInfo())) {
+ allRegionsOnline = false;
+ break;
+ }
+ }
+ if (allRegionsOnline) {
+ splitSuccessful = true;
+ break;
+ }
+ }
+ LOGGER.info("Sleeping for 10000 ms while waiting for {} to split
and all regions to come online",
+ hbaseTableName.getNameAsString());
+ }
+ if (!splitSuccessful) {
+ throw new IOException("Split did not succeed for table: "
+ + hbaseTableName.getNameAsString() + " , expected regions
after split: "
+ + expectedRegions);
+ }
}
-
+
/**
* Returns true if the region contains atleast one of the metadata rows we
are interested in
*/
@@ -1866,8 +1892,10 @@ public abstract class BaseTest {
AssignmentManager am = master.getAssignmentManager();
// No need to split on the first splitPoint since the end key of
region boundaries are exclusive
for (int i=1; i<splitPoints.size(); ++i) {
- splitRegion(fullTableName, splitPoints.get(i));
+ splitTableSync(admin, fullTableName, splitPoints.get(i), i + 1);
}
+ List<RegionInfo> regionInfoList = admin.getRegions(fullTableName);
+ assertEquals(splitPoints.size(), regionInfoList.size());
HashMap<ServerName, List<HRegionInfo>> serverToRegionsList =
Maps.newHashMapWithExpectedSize(NUM_SLAVES_BASE);
Deque<ServerName> availableRegionServers = new
ArrayDeque<ServerName>(NUM_SLAVES_BASE);
for (int i=0; i<NUM_SLAVES_BASE; ++i) {
{code}
But you are right that the approach of disabling and re-enabling the table soon
after asynchronously calling split API is working only before HBase 2.3 only,
whether it is valid to follow this sequence for guaranteed splitting is worth
investigating (for another discussion).
If we don't follow this sequence and rather wait for table regions to get split
and come online (as per above patch), it should work for all versions IMHO.
was (Author: vjasani):
[~stoty] I think now we have some better data, earlier when we got the
Exception stacktrace as I mentioned, that was generated because in test, we are
splitting same table over and over again in a loop without having to worry
about the fact that all split regions of table so far are online.
I tried to dig bit more and found that there is no issue at HBase or Phoenix
level w.r.t splitting small table e.g SYSTEM.CATALOG in our case. The only
change required is in splitting of table that we do in BaseTest.
I was able to get the successful split of SYSTEM.CATALOG to 6 regions with some
changes. Let me attach the small patch here:
{code:java}
diff --git
a/phoenix-core/src/it/java/org/apache/phoenix/end2end/TableSnapshotReadsMapReduceIT.java
b/phoenix-core/src/it/java/org/apache/phoenix/end2end/TableSnapshotReadsMapReduceIT.java
index e493b1770..39d70bbd4 100644
---
a/phoenix-core/src/it/java/org/apache/phoenix/end2end/TableSnapshotReadsMapReduceIT.java
+++
b/phoenix-core/src/it/java/org/apache/phoenix/end2end/TableSnapshotReadsMapReduceIT.java
@@ -405,35 +405,6 @@ public class TableSnapshotReadsMapReduceIT extends
BaseUniqueNamesOwnClusterIT {
}
}
- private void splitTableSync(Admin admin, TableName hbaseTableName,
- byte[] splitPoint, int expectedRegions) throws IOException,
- InterruptedException {
- admin.split(hbaseTableName, splitPoint);
- AssignmentManager assignmentManager =
- getUtility().getHBaseCluster().getMaster().getAssignmentManager();
- // wait for split daughter regions coming online for ~20s
- for (int i = 0; i < 20; i++) {
- Thread.sleep(1000);
- List<HRegion> regions = getUtility().getHBaseCluster()
- .getRegions(hbaseTableName);
- if (regions.size() >= expectedRegions) {
- boolean allRegionsOnline = true;
- for (HRegion region : regions) {
- if (!assignmentManager.getRegionStates()
- .isRegionOnline(region.getRegionInfo())) {
- allRegionsOnline = false;
- break;
- }
- }
- if (allRegionsOnline) {
- break;
- }
- }
- LOGGER.info("Sleeping for 1000 ms while waiting for {} to split and all
regions to come online",
- hbaseTableName.getNameAsString());
- }
- }
-
private void deleteSnapshotIfExists(String snapshotName) throws Exception {
try (Connection conn = DriverManager.getConnection(getUrl());
Admin admin =
conn.unwrap(PhoenixConnection.class).getQueryServices().getAdmin()) {
diff --git a/phoenix-core/src/test/java/org/apache/phoenix/query/BaseTest.java
b/phoenix-core/src/test/java/org/apache/phoenix/query/BaseTest.java
index faf2a18d8..62ee7538a 100644
--- a/phoenix-core/src/test/java/org/apache/phoenix/query/BaseTest.java
+++ b/phoenix-core/src/test/java/org/apache/phoenix/query/BaseTest.java
@@ -130,6 +130,7 @@ import org.apache.hadoop.hbase.client.TableDescriptor;
import org.apache.hadoop.hbase.ipc.PhoenixRpcSchedulerFactory;
import org.apache.hadoop.hbase.master.HMaster;
import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
+import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.regionserver.HRegionServer;
import org.apache.hadoop.hbase.regionserver.RSRpcServices;
import org.apache.hadoop.hbase.util.Bytes;
@@ -1824,20 +1825,45 @@ public abstract class BaseTest {
}
phxConn.close();
}
-
/**
* Synchronously split table at the given split point
*/
- protected static void splitRegion(TableName fullTableName, byte[]
splitPoint) throws SQLException, IOException, InterruptedException {
- Admin admin =
- driver.getConnectionQueryServices(getUrl(),
TestUtil.TEST_PROPERTIES).getAdmin();
- admin.split(fullTableName, splitPoint);
- // make sure the split finishes (there's no synchronous splitting
before HBase 2.x)
- admin.disableTable(fullTableName);
- admin.enableTable(fullTableName);
+ protected static void splitTableSync(Admin admin, TableName hbaseTableName,
+ byte[] splitPoint, int expectedRegions) throws IOException,
+ InterruptedException {
+ admin.split(hbaseTableName, splitPoint);
+ AssignmentManager assignmentManager =
+ getUtility().getHBaseCluster().getMaster().getAssignmentManager();
+ boolean splitSuccessful = false;
+ for (int i = 0; i < 3; i++) {
+ Thread.sleep(10000);
+ List<HRegion> regions = getUtility().getHBaseCluster()
+ .getRegions(hbaseTableName);
+ if (regions.size() >= expectedRegions) {
+ boolean allRegionsOnline = true;
+ for (HRegion region : regions) {
+ if (!assignmentManager.getRegionStates()
+ .isRegionOnline(region.getRegionInfo())) {
+ allRegionsOnline = false;
+ break;
+ }
+ }
+ if (allRegionsOnline) {
+ splitSuccessful = true;
+ break;
+ }
+ }
+ LOGGER.info("Sleeping for 10000 ms while waiting for {} to split
and all regions to come online",
+ hbaseTableName.getNameAsString());
+ }
+ if (!splitSuccessful) {
+ throw new IOException("Split did not succeed for table: "
+ + hbaseTableName.getNameAsString() + " , expected regions
after split: "
+ + expectedRegions);
+ }
}
-
+
/**
* Returns true if the region contains atleast one of the metadata rows we
are interested in
*/
@@ -1866,8 +1892,10 @@ public abstract class BaseTest {
AssignmentManager am = master.getAssignmentManager();
// No need to split on the first splitPoint since the end key of
region boundaries are exclusive
for (int i=1; i<splitPoints.size(); ++i) {
- splitRegion(fullTableName, splitPoints.get(i));
+ splitTableSync(admin, fullTableName, splitPoints.get(i), i + 1);
}
+ List<RegionInfo> regionInfoList = admin.getRegions(fullTableName);
+ assertEquals(splitPoints.size(), regionInfoList.size());
HashMap<ServerName, List<HRegionInfo>> serverToRegionsList =
Maps.newHashMapWithExpectedSize(NUM_SLAVES_BASE);
Deque<ServerName> availableRegionServers = new
ArrayDeque<ServerName>(NUM_SLAVES_BASE);
for (int i=0; i<NUM_SLAVES_BASE; ++i) {
{code}
> SplitSystemCatalogIT tests very unstable with Hbase 2.3
> -------------------------------------------------------
>
> Key: PHOENIX-6104
> URL: https://issues.apache.org/jira/browse/PHOENIX-6104
> Project: Phoenix
> Issue Type: Bug
> Components: core
> Affects Versions: 5.1.0
> Reporter: Istvan Toth
> Assignee: Istvan Toth
> Priority: Major
> Attachments: 6104-testouput.log
>
>
> The failure is in the test preparation code, where we split the system
> catalog table, and it seems to be a HBase issue, rather than a Phoenix one,
> but we need to track the issue.
--
This message was sent by Atlassian Jira
(v8.3.4#803005)