[jira] [Commented] (HIVE-4762) HMS cannot handle concurrent requests
[ https://issues.apache.org/jira/browse/HIVE-4762?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=13689403#comment-13689403 ] Brock Noland commented on HIVE-4762: I documented this https://cwiki.apache.org/confluence/display/Hive/AdminManual+MetastoreAdmin in revision https://cwiki.apache.org/confluence/pages/diffpagesbyversion.action?pageId=27362076&selectedPageVersions=12&selectedPageVersions=11. With the three fixes in HIVE-4759 I ran a long test overnight and I didn't have the same "Write" concurrency issues. > HMS cannot handle concurrent requests > - > > Key: HIVE-4762 > URL: https://issues.apache.org/jira/browse/HIVE-4762 > Project: Hive > Issue Type: Sub-task >Affects Versions: 0.11.0 >Reporter: Brock Noland >Assignee: Brock Noland >Priority: Critical > Fix For: 0.12.0 > > > It appears our use of DataNucleaus is not correct or perhaps there is a bug > in the ancient version of DN we are using. On startup having multiple threads > performing "show tables" results in failures. Additionally concurrent DML > will fail event after startup. I used the program below to demonstrate this. > {noformat} > package org.apache.hadoop.hive.ql; > import java.sql.Connection; > import java.sql.DriverManager; > import java.sql.Statement; > import java.util.concurrent.ExecutorService; > import java.util.concurrent.Executors; > import org.apache.hive.jdbc.HiveDriver; > public class MultiThreadTest { > public static class QueryRunner implements Runnable { > int id; > double averageElapsedTime; > Connection connection; > Statement statement; > QueryRunner(int id) { > this.id = id; > } > @Override > public void run() { > long count = 0; > double elapsedTime = 0; > try { > connection = > DriverManager.getConnection("jdbc:hive2://localhost:1/default", "brock", > "password"); > statement = connection.createStatement(); > //statement.execute("DROP TABLE IF EXISTS t" + id); > for (int i = 0; i < 10; i++) { > // statement.execute("CREATE TABLE t" + id + " (key int)"); > long start = System.currentTimeMillis(); > // statement.execute("DROP TABLE t" + id); > statement.execute("SHOW TABLES"); > elapsedTime += System.currentTimeMillis() - start; > count++; > } > if(statement != null) { > statement.close(); > } > if(connection != null) { > connection.close(); > } > } catch (Exception e) { > e.printStackTrace(); > } finally { > if(count > 0) { > averageElapsedTime = elapsedTime / (double)count; > } > } > } > } > public static void main(String[] args) throws Exception { > int numThreads = 50; > Class.forName(HiveDriver.class.getName()); > ExecutorService executor = Executors.newFixedThreadPool(numThreads); > QueryRunner[] queryRunners = new QueryRunner[numThreads]; > for (int i = 0; i < numThreads; i++) { > queryRunners[i] = new QueryRunner(i); > executor.execute(queryRunners[i]); > } > executor.shutdown(); > while(!executor.isTerminated()) { > System.out.println("Waiting..."); > Thread.sleep(1000L); > } > for (int i = 0; i < numThreads; i++) { > System.out.println(Math.round(queryRunners[i].averageElapsedTime)); > } > } > } > {noformat} -- This message is automatically generated by JIRA. If you think it was sent incorrectly, please contact your JIRA administrators For more information on JIRA, see: http://www.atlassian.com/software/jira
[jira] [Commented] (HIVE-4762) HMS cannot handle concurrent requests
[ https://issues.apache.org/jira/browse/HIVE-4762?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=13688852#comment-13688852 ] Brock Noland commented on HIVE-4762: The "Read" concurrency issue appears to be solved by placing the following in my hive-site.xml (HIVE-3521): {noformat} datanucleus.autoStartMechanism SchemaTable {noformat} > HMS cannot handle concurrent requests > - > > Key: HIVE-4762 > URL: https://issues.apache.org/jira/browse/HIVE-4762 > Project: Hive > Issue Type: Sub-task >Affects Versions: 0.11.0 >Reporter: Brock Noland >Assignee: Brock Noland >Priority: Critical > Fix For: 0.12.0 > > > It appears our use of DataNucleaus is not correct or perhaps there is a bug > in the ancient version of DN we are using. On startup having multiple threads > performing "show tables" results in failures. Additionally concurrent DML > will fail event after startup. I used the program below to demonstrate this. > {noformat} > package org.apache.hadoop.hive.ql; > import java.sql.Connection; > import java.sql.DriverManager; > import java.sql.Statement; > import java.util.concurrent.ExecutorService; > import java.util.concurrent.Executors; > import org.apache.hive.jdbc.HiveDriver; > public class MultiThreadTest { > public static class QueryRunner implements Runnable { > int id; > double averageElapsedTime; > Connection connection; > Statement statement; > QueryRunner(int id) { > this.id = id; > } > @Override > public void run() { > long count = 0; > double elapsedTime = 0; > try { > connection = > DriverManager.getConnection("jdbc:hive2://localhost:1/default", "brock", > "password"); > statement = connection.createStatement(); > //statement.execute("DROP TABLE IF EXISTS t" + id); > for (int i = 0; i < 10; i++) { > // statement.execute("CREATE TABLE t" + id + " (key int)"); > long start = System.currentTimeMillis(); > // statement.execute("DROP TABLE t" + id); > statement.execute("SHOW TABLES"); > elapsedTime += System.currentTimeMillis() - start; > count++; > } > if(statement != null) { > statement.close(); > } > if(connection != null) { > connection.close(); > } > } catch (Exception e) { > e.printStackTrace(); > } finally { > if(count > 0) { > averageElapsedTime = elapsedTime / (double)count; > } > } > } > } > public static void main(String[] args) throws Exception { > int numThreads = 50; > Class.forName(HiveDriver.class.getName()); > ExecutorService executor = Executors.newFixedThreadPool(numThreads); > QueryRunner[] queryRunners = new QueryRunner[numThreads]; > for (int i = 0; i < numThreads; i++) { > queryRunners[i] = new QueryRunner(i); > executor.execute(queryRunners[i]); > } > executor.shutdown(); > while(!executor.isTerminated()) { > System.out.println("Waiting..."); > Thread.sleep(1000L); > } > for (int i = 0; i < numThreads; i++) { > System.out.println(Math.round(queryRunners[i].averageElapsedTime)); > } > } > } > {noformat} -- This message is automatically generated by JIRA. If you think it was sent incorrectly, please contact your JIRA administrators For more information on JIRA, see: http://www.atlassian.com/software/jira
[jira] [Commented] (HIVE-4762) HMS cannot handle concurrent requests
[ https://issues.apache.org/jira/browse/HIVE-4762?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=13688617#comment-13688617 ] Brock Noland commented on HIVE-4762: This one is tough. The test above shows the HMS is not thread safe even with read requests. The DN docs state "A PersistenceManagerFactory is designed to be thread-safe. A PersistenceManager is not." Digging in it looks like our PMF is, in fact, giving a new PM on each call for a new instance. I think we should fix this in two phases: *Break fix:* Put a lock around the HMS so it's single threaded. I have verified this works with the above test. *Long term fix:* Figure out if we are using DN wrong, there is a bug in the version we are using, or something else. > HMS cannot handle concurrent requests > - > > Key: HIVE-4762 > URL: https://issues.apache.org/jira/browse/HIVE-4762 > Project: Hive > Issue Type: Sub-task >Affects Versions: 0.11.0 >Reporter: Brock Noland >Assignee: Brock Noland >Priority: Critical > Fix For: 0.12.0 > > > It appears our use of DataNucleaus is not correct or perhaps there is a bug > in the ancient version of DN we are using. On startup having multiple threads > performing "show tables" results in failures. Additionally concurrent DML > will fail event after startup. I used the program below to demonstrate this. > {noformat} > package org.apache.hadoop.hive.ql; > import java.sql.Connection; > import java.sql.DriverManager; > import java.sql.Statement; > import java.util.concurrent.ExecutorService; > import java.util.concurrent.Executors; > import org.apache.hive.jdbc.HiveDriver; > public class MultiThreadTest { > public static class QueryRunner implements Runnable { > int id; > double averageElapsedTime; > Connection connection; > Statement statement; > QueryRunner(int id) { > this.id = id; > } > @Override > public void run() { > long count = 0; > double elapsedTime = 0; > try { > connection = > DriverManager.getConnection("jdbc:hive2://localhost:1/default", "brock", > "password"); > statement = connection.createStatement(); > //statement.execute("DROP TABLE IF EXISTS t" + id); > for (int i = 0; i < 10; i++) { > // statement.execute("CREATE TABLE t" + id + " (key int)"); > long start = System.currentTimeMillis(); > // statement.execute("DROP TABLE t" + id); > statement.execute("SHOW TABLES"); > elapsedTime += System.currentTimeMillis() - start; > count++; > } > if(statement != null) { > statement.close(); > } > if(connection != null) { > connection.close(); > } > } catch (Exception e) { > e.printStackTrace(); > } finally { > if(count > 0) { > averageElapsedTime = elapsedTime / (double)count; > } > } > } > } > public static void main(String[] args) throws Exception { > int numThreads = 50; > Class.forName(HiveDriver.class.getName()); > ExecutorService executor = Executors.newFixedThreadPool(numThreads); > QueryRunner[] queryRunners = new QueryRunner[numThreads]; > for (int i = 0; i < numThreads; i++) { > queryRunners[i] = new QueryRunner(i); > executor.execute(queryRunners[i]); > } > executor.shutdown(); > while(!executor.isTerminated()) { > System.out.println("Waiting..."); > Thread.sleep(1000L); > } > for (int i = 0; i < numThreads; i++) { > System.out.println(Math.round(queryRunners[i].averageElapsedTime)); > } > } > } > {noformat} -- This message is automatically generated by JIRA. If you think it was sent incorrectly, please contact your JIRA administrators For more information on JIRA, see: http://www.atlassian.com/software/jira