Repository: hive Updated Branches: refs/heads/master d31dc22ae -> b26065454
HIVE-15805: Some minor improvement on the validation tool (Aihua Xu, reviewed by Yongzhi Chen) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b2606545 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b2606545 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b2606545 Branch: refs/heads/master Commit: b260654541587b26b59b9c39e67bdd693a39d546 Parents: d31dc22 Author: Aihua Xu <aihu...@apache.org> Authored: Fri Feb 3 17:20:59 2017 -0500 Committer: Aihua Xu <aihu...@apache.org> Committed: Mon Feb 6 09:45:05 2017 -0500 ---------------------------------------------------------------------- .../org/apache/hive/beeline/HiveSchemaTool.java | 182 +++++++++---------- .../org/apache/hive/beeline/TestSchemaTool.java | 10 +- 2 files changed, 96 insertions(+), 96 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/b2606545/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java ---------------------------------------------------------------------- diff --git a/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java b/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java index ea58776..96b2978 100644 --- a/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java +++ b/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java @@ -27,6 +27,7 @@ import org.apache.commons.cli.OptionGroup; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; import org.apache.commons.io.output.NullOutputStream; +import org.apache.commons.lang.ArrayUtils; import org.apache.commons.lang.StringUtils; import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.fs.Path; @@ -70,6 +71,7 @@ public class HiveSchemaTool { private boolean dryRun = false; private boolean verbose = false; private String dbOpts = null; + private URI[] validationServers = null; // The list of servers the database/partition/table can locate on private final HiveConf hiveConf; private final String dbType; private final MetaStoreSchemaInfo metaStoreSchemaInfo; @@ -121,6 +123,16 @@ public class HiveSchemaTool { this.dbOpts = dbOpts; } + public void setValidationServers(String servers) { + if(StringUtils.isNotEmpty(servers)) { + String[] strServers = servers.split(","); + this.validationServers = new URI[strServers.length]; + for (int i = 0; i < validationServers.length; i++) { + validationServers[i] = new Path(strServers[i]).toUri(); + } + } + } + private static void printAndExit(Options cmdLineOptions) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("schemaTool", cmdLineOptions); @@ -181,12 +193,12 @@ public class HiveSchemaTool { } } - boolean validateLocations(Connection conn, String defaultLocPrefix) throws HiveMetaException { + boolean validateLocations(Connection conn, URI[] defaultServers) throws HiveMetaException { System.out.println("Validating database/table/partition locations"); boolean rtn; - rtn = checkMetaStoreDBLocation(conn, defaultLocPrefix); - rtn = checkMetaStoreTableLocation(conn, defaultLocPrefix) && rtn; - rtn = checkMetaStorePartitionLocation(conn, defaultLocPrefix) && rtn; + rtn = checkMetaStoreDBLocation(conn, defaultServers); + rtn = checkMetaStoreTableLocation(conn, defaultServers) && rtn; + rtn = checkMetaStorePartitionLocation(conn, defaultServers) && rtn; System.out.println((rtn ? "Succeeded" : "Failed") + " in database/table/partition location validation"); return rtn; } @@ -196,10 +208,8 @@ public class HiveSchemaTool { return (itemName == null || itemName.isEmpty()) ? "ID: " + res.getString(idInx) : "Name: " + itemName; } - // read schema version from metastore - private boolean checkMetaStoreDBLocation(Connection conn, String locHeader) + private boolean checkMetaStoreDBLocation(Connection conn, URI[] defaultServers) throws HiveMetaException { - String defaultPrefix = locHeader; String dbLoc; boolean isValid = true; int numOfInvalid = 0; @@ -213,33 +223,11 @@ public class HiveSchemaTool { ResultSet res = stmt.executeQuery(dbLoc)) { while (res.next()) { String locValue = res.getString(3); - if (locValue == null) { - System.err.println("NULL Location for DB with " + getNameOrID(res,2,1)); + String dbName = getNameOrID(res,2,1); + if (!checkLocation("Database " + dbName, locValue, defaultServers)) { numOfInvalid++; - } else { - URI currentUri = null; - try { - currentUri = new Path(locValue).toUri(); - } catch (Exception pe) { - System.err.println("Invalid Location for DB with " + getNameOrID(res,2,1)); - System.err.println(pe.getMessage()); - numOfInvalid++; - continue; - } - - if (currentUri.getScheme() == null || currentUri.getScheme().isEmpty()) { - System.err.println("Missing Location scheme for DB with " + getNameOrID(res,2,1)); - System.err.println("The Location is: " + locValue); - numOfInvalid++; - } else if (defaultPrefix != null && !defaultPrefix.isEmpty() && locValue.substring(0,defaultPrefix.length()) - .compareToIgnoreCase(defaultPrefix) != 0) { - System.err.println("Mismatch root Location for DB with " + getNameOrID(res,2,1)); - System.err.println("The Location is: " + locValue); - numOfInvalid++; - } } } - } catch (SQLException e) { throw new HiveMetaException("Failed to get DB Location Info.", e); } @@ -250,9 +238,8 @@ public class HiveSchemaTool { return isValid; } - private boolean checkMetaStoreTableLocation(Connection conn, String locHeader) + private boolean checkMetaStoreTableLocation(Connection conn, URI[] defaultServers) throws HiveMetaException { - String defaultPrefix = locHeader; String tabLoc, tabIDRange; boolean isValid = true; int numOfInvalid = 0; @@ -290,33 +277,10 @@ public class HiveSchemaTool { res = pStmt.executeQuery(); while (res.next()) { String locValue = res.getString(3); - if (locValue == null) { - System.err.println("In DB with " + getNameOrID(res,5,4)); - System.err.println("NULL Location for TABLE with " + getNameOrID(res,2,1)); + String entity = "Database " + getNameOrID(res, 5, 4) + + ", Table " + getNameOrID(res,2,1); + if (!checkLocation(entity, locValue, defaultServers)) { numOfInvalid++; - } else { - URI currentUri = null; - try { - currentUri = new Path(locValue).toUri(); - } catch (Exception pe) { - System.err.println("In DB with " + getNameOrID(res,5,4)); - System.err.println("Invalid location for Table with " + getNameOrID(res,2,1)); - System.err.println(pe.getMessage()); - numOfInvalid++; - continue; - } - if (currentUri.getScheme() == null || currentUri.getScheme().isEmpty()) { - System.err.println("In DB with " + getNameOrID(res,5,4)); - System.err.println("Missing Location scheme for Table with " + getNameOrID(res,2,1)); - System.err.println("The Location is: " + locValue); - numOfInvalid++; - } else if(defaultPrefix != null && !defaultPrefix.isEmpty() && locValue.substring(0,defaultPrefix.length()) - .compareToIgnoreCase(defaultPrefix) != 0) { - System.err.println("In DB with " + getNameOrID(res,5,4)); - System.err.println("Mismatch root Location for Table with " + getNameOrID(res,2,1)); - System.err.println("The Location is: " + locValue); - numOfInvalid++; - } } } res.close(); @@ -335,9 +299,8 @@ public class HiveSchemaTool { return isValid; } - private boolean checkMetaStorePartitionLocation(Connection conn, String locHeader) + private boolean checkMetaStorePartitionLocation(Connection conn, URI[] defaultServers) throws HiveMetaException { - String defaultPrefix = locHeader; String partLoc, partIDRange; boolean isValid = true; int numOfInvalid = 0; @@ -377,33 +340,11 @@ public class HiveSchemaTool { res = pStmt.executeQuery(); while (res.next()) { String locValue = res.getString(3); - if (locValue == null) { - System.err.println("In DB with " + getNameOrID(res,7,6) + ", TABLE with " + getNameOrID(res,5,4)); - System.err.println("NULL Location for PARTITION with " + getNameOrID(res,2,1)); + String entity = "Database " + getNameOrID(res,7,6) + + ", Table " + getNameOrID(res,5,4) + + ", Partition " + getNameOrID(res,2,1); + if (!checkLocation(entity, locValue, defaultServers)) { numOfInvalid++; - } else { - URI currentUri = null; - try { - currentUri = new Path(locValue).toUri(); - } catch (Exception pe) { - System.err.println("In DB with " + getNameOrID(res,7,6) + ", TABLE with " + getNameOrID(res,5,4)); - System.err.println("Invalid location for PARTITON with " + getNameOrID(res,2,1)); - System.err.println(pe.getMessage()); - numOfInvalid++; - continue; - } - if (currentUri.getScheme() == null || currentUri.getScheme().isEmpty()) { - System.err.println("In DB with " + getNameOrID(res,7,6) + ", TABLE with " + getNameOrID(res,5,4)); - System.err.println("Missing Location scheme for PARTITON with " + getNameOrID(res,2,1)); - System.err.println("The Location is: " + locValue); - numOfInvalid++; - } else if (defaultPrefix != null && !defaultPrefix.isEmpty() && locValue.substring(0,defaultPrefix.length()) - .compareToIgnoreCase(defaultPrefix) != 0) { - System.err.println("In DB with " + getNameOrID(res,7,6) + ", TABLE with " + getNameOrID(res,5,4)); - System.err.println("Mismatch root Location for PARTITON with " + getNameOrID(res,2,1)); - System.err.println("The Location is: " + locValue); - numOfInvalid++; - } } } res.close(); @@ -420,6 +361,54 @@ public class HiveSchemaTool { return isValid; } + /** + * Check if the location is valid for the given entity + * @param entity the entity to represent a database, partition or table + * @param entityLocation the location + * @param defaultServers a list of the servers that the location needs to match. + * The location host needs to match one of the given servers. + * If empty, then no check against such list. + * @return true if the location is valid + */ + private boolean checkLocation( + String entity, + String entityLocation, + URI[] defaultServers) { + boolean isValid = true; + if (entityLocation == null) { + System.err.println(entity + ", error: empty location"); + isValid = false; + } else { + try { + URI currentUri = new Path(entityLocation).toUri(); + String scheme = currentUri.getScheme(); + if (StringUtils.isEmpty(scheme)) { + System.err.println(entity + ", location: "+ entityLocation + ", error: missing location scheme"); + isValid = false; + } else if (ArrayUtils.isNotEmpty(defaultServers) && currentUri.getAuthority() != null) { + String authority = currentUri.getAuthority(); + boolean matchServer = false; + for(URI server : defaultServers) { + if (StringUtils.equalsIgnoreCase(server.getScheme(), scheme) && + StringUtils.equalsIgnoreCase(server.getAuthority(), authority)) { + matchServer = true; + break; + } + } + if (!matchServer) { + System.err.println(entity + ", location: " + entityLocation + ", error: mismatched server"); + isValid = false; + } + } + } catch (Exception pe) { + System.err.println(entity + ", error: invalid location " + pe.getMessage()); + isValid =false; + } + } + + return isValid; + } + // test the connection metastore using the config property private void testConnectionToMetastore() throws HiveMetaException { Connection conn = getConnectionToMetastore(true); @@ -551,7 +540,7 @@ public class HiveSchemaTool { validateSchemaVersions(conn); validateSequences(conn); validateSchemaTables(conn); - validateLocations(conn, null); + validateLocations(conn, this.validationServers); validateColumnNullValues(conn); } finally { if (conn != null) { @@ -605,9 +594,12 @@ public class HiveSchemaTool { long maxId = res.getLong(1); if (maxId > 0) { ResultSet resSeq = stmt.executeQuery(seqQuery); - if (!resSeq.next() || resSeq.getLong(1) < maxId) { + if (!resSeq.next()) { + isValid = false; + System.err.println("Missing SEQUENCE_NAME " + seqName + " from SEQUENCE_TABLE"); + } else if (resSeq.getLong(1) < maxId) { isValid = false; - System.err.println("Incorrect sequence number: table - " + tableName); + System.err.println("NEXT_VAL for " + seqName + " in SEQUENCE_TABLE < max("+ tableKey + ") in " + tableName); } } } @@ -749,7 +741,7 @@ public class HiveSchemaTool { String tableName = res.getString("TBL_NAME"); String tableType = res.getString("TBL_TYPE"); isValid = false; - System.err.println("Value of SD_ID in TBLS should not be NULL: hive table - " + tableName + " tableId - " + tableId + " tableType - " + tableType); + System.err.println("SD_ID in TBLS should not be NULL for Table Name=" + tableName + ", Table ID=" + tableId + ", Table Type=" + tableType); } System.out.println((isValid ? "Succeeded" : "Failed") + " in column validation for incorrect NULL values"); @@ -915,7 +907,9 @@ public class HiveSchemaTool { .create("dbOpts"); Option dryRunOpt = new Option("dryRun", "list SQL scripts (no execute)"); Option verboseOpt = new Option("verbose", "only print SQL statements"); - + Option serversOpt = OptionBuilder.withArgName("serverList") + .hasArgs().withDescription("a comma-separated list of servers used in location validation") + .create("servers"); cmdLineOptions.addOption(help); cmdLineOptions.addOption(dryRunOpt); cmdLineOptions.addOption(userNameOpt); @@ -923,6 +917,7 @@ public class HiveSchemaTool { cmdLineOptions.addOption(dbTypeOpt); cmdLineOptions.addOption(verboseOpt); cmdLineOptions.addOption(dbOpts); + cmdLineOptions.addOption(serversOpt); cmdLineOptions.addOptionGroup(optGroup); } @@ -982,6 +977,9 @@ public class HiveSchemaTool { if (line.hasOption("dbOpts")) { schemaTool.setDbOpts(line.getOptionValue("dbOpts")); } + if (line.hasOption("validate") && line.hasOption("servers")) { + schemaTool.setValidationServers(line.getOptionValue("servers")); + } if (line.hasOption("info")) { schemaTool.showInfo(); } else if (line.hasOption("upgradeSchema")) { http://git-wip-us.apache.org/repos/asf/hive/blob/b2606545/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestSchemaTool.java ---------------------------------------------------------------------- diff --git a/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestSchemaTool.java b/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestSchemaTool.java index 724f990..5241807 100644 --- a/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestSchemaTool.java +++ b/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestSchemaTool.java @@ -25,6 +25,7 @@ import java.io.FileWriter; import java.io.IOException; import java.io.OutputStream; import java.io.PrintStream; +import java.net.URI; import java.sql.Connection; import java.util.Random; @@ -600,11 +601,12 @@ public class TestSchemaTool extends TestCase { */ public void testValidateLocations() throws Exception { schemaTool.doInit(); - String defaultRoot = "hdfs://myhost.com:8020"; + URI defaultRoot = new URI("hdfs://myhost.com:8020"); + URI defaultRoot2 = new URI("s3://myhost2.com:8888"); //check empty DB boolean isValid = schemaTool.validateLocations(conn, null); assertTrue(isValid); - isValid = schemaTool.validateLocations(conn, defaultRoot); + isValid = schemaTool.validateLocations(conn, new URI[] {defaultRoot,defaultRoot2}); assertTrue(isValid); // Test valid case @@ -621,7 +623,7 @@ public class TestSchemaTool extends TestCase { schemaTool.runBeeLine(scriptFile.getPath()); isValid = schemaTool.validateLocations(conn, null); assertTrue(isValid); - isValid = schemaTool.validateLocations(conn, defaultRoot); + isValid = schemaTool.validateLocations(conn, new URI[] {defaultRoot, defaultRoot2}); assertTrue(isValid); scripts = new String[] { "delete from PARTITIONS", @@ -642,7 +644,7 @@ public class TestSchemaTool extends TestCase { schemaTool.runBeeLine(scriptFile.getPath()); isValid = schemaTool.validateLocations(conn, null); assertFalse(isValid); - isValid = schemaTool.validateLocations(conn, defaultRoot); + isValid = schemaTool.validateLocations(conn, new URI[] {defaultRoot, defaultRoot2}); assertFalse(isValid); }