[ 
https://issues.apache.org/jira/browse/CASSANDRA-13266?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

liangsibin updated CASSANDRA-13266:
-----------------------------------
    Description: 
When I bulkload sstable created with CQLSSTableWriter, it's sometimes very 
slow.  
use 2 nodes write SSTable and bulkload
1、Use CQLSSTableWriter create SSTable (60 threads)
2、When the directory over 100000 rows,bulkload the directory (20 threads)
the normal bulkload speed is about 70M/s per node,and bulkload 141G SStables 
cost 90 minutes but sometimes is very slow,the same data cost 4 hours why?
here is the code bulkload sstable

|public class JmxBulkLoader {
        
    static final Logger LOGGER = LoggerFactory.getLogger(JmxBulkLoader.class);
        private JMXConnector connector;
        private StorageServiceMBean storageBean;
        private Timer timer = new Timer();

        public JmxBulkLoader(String host, int port) throws Exception {
                connect(host, port);
        }


        private void connect(String host, int port) throws IOException, 
MalformedObjectNameException {
                JMXServiceURL jmxUrl = new JMXServiceURL(
                                
String.format("service:jmx:rmi:///jndi/rmi://%s:%d/jmxrmi", host, port));
                Map<String, Object> env = new HashMap<String, Object>();
                connector = JMXConnectorFactory.connect(jmxUrl, env);
                MBeanServerConnection mbeanServerConn = 
connector.getMBeanServerConnection();
                ObjectName name = new 
ObjectName("org.apache.cassandra.db:type=StorageService");
                storageBean = JMX.newMBeanProxy(mbeanServerConn, name, 
StorageServiceMBean.class);
        }

        public void close() throws IOException {
                connector.close();
        }

        public void bulkLoad(String path) {
                LOGGER.info("begin load data to cassandra " + new 
Path(path).getName());
                timer.start();
                storageBean.bulkLoad(path);
                timer.end();
                LOGGER.info("bulk load took " + timer.getTimeTakenMillis() + 
"ms, path: " + new Path(path).getName());
        }
}
bulkload thread 
|public class BulkThread implements Runnable {

        private String path;
        private String jmxHost;
        private int jmxPort;
        
        public BulkThread(String path, String jmxHost, int jmxPort) {
                super();
                this.path = path;
                this.jmxHost = jmxHost;
                this.jmxPort = jmxPort;
        }
        @Override
        public void run() {
                JmxBulkLoader bulkLoader = null;
                try {
                        bulkLoader = new JmxBulkLoader(jmxHost, jmxPort);
                        bulkLoader.bulkLoad(path);
                } catch (Exception e) {
                        e.printStackTrace();
                } finally {
                        if (bulkLoader != null)
                                try {
                                        bulkLoader.close();
                                        bulkLoader = null;
                                } catch (IOException e) {
                                        e.printStackTrace();
                                }
                }
        }
}

  was:
When I bulkload sstable created with CQLSSTableWriter, it's sometimes very 
slow.  
use 2 nodes write SSTable and bulkload
1、Use CQLSSTableWriter create SSTable (60 threads)
2、When the directory over 100000 rows,bulkload the directory (20 threads)
the normal bulkload speed is about 70M/s per node,and bulkload 141G SStables 
cost 90 minutes but sometimes is very slow,the same data cost 4 hours why?
here is the code bulkload sstable

public class JmxBulkLoader {
        
    static final Logger LOGGER = LoggerFactory.getLogger(JmxBulkLoader.class);
        private JMXConnector connector;
        private StorageServiceMBean storageBean;
        private Timer timer = new Timer();

        public JmxBulkLoader(String host, int port) throws Exception {
                connect(host, port);
        }


        private void connect(String host, int port) throws IOException, 
MalformedObjectNameException {
                JMXServiceURL jmxUrl = new JMXServiceURL(
                                
String.format("service:jmx:rmi:///jndi/rmi://%s:%d/jmxrmi", host, port));
                Map<String, Object> env = new HashMap<String, Object>();
                connector = JMXConnectorFactory.connect(jmxUrl, env);
                MBeanServerConnection mbeanServerConn = 
connector.getMBeanServerConnection();
                ObjectName name = new 
ObjectName("org.apache.cassandra.db:type=StorageService");
                storageBean = JMX.newMBeanProxy(mbeanServerConn, name, 
StorageServiceMBean.class);
        }

        public void close() throws IOException {
                connector.close();
        }

        public void bulkLoad(String path) {
                LOGGER.info("begin load data to cassandra " + new 
Path(path).getName());
                timer.start();
                storageBean.bulkLoad(path);
                timer.end();
                LOGGER.info("bulk load took " + timer.getTimeTakenMillis() + 
"ms, path: " + new Path(path).getName());
        }
}

bulkload thread 
public class BulkThread implements Runnable {

        private String path;
        private String jmxHost;
        private int jmxPort;
        
        public BulkThread(String path, String jmxHost, int jmxPort) {
                super();
                this.path = path;
                this.jmxHost = jmxHost;
                this.jmxPort = jmxPort;
        }
        @Override
        public void run() {
                JmxBulkLoader bulkLoader = null;
                try {
                        bulkLoader = new JmxBulkLoader(jmxHost, jmxPort);
                        bulkLoader.bulkLoad(path);
                } catch (Exception e) {
                        e.printStackTrace();
                } finally {
                        if (bulkLoader != null)
                                try {
                                        bulkLoader.close();
                                        bulkLoader = null;
                                } catch (IOException e) {
                                        e.printStackTrace();
                                }
                }
        }
}


> Bulk loading sometimes is very slow?
> ------------------------------------
>
>                 Key: CASSANDRA-13266
>                 URL: https://issues.apache.org/jira/browse/CASSANDRA-13266
>             Project: Cassandra
>          Issue Type: Improvement
>            Reporter: liangsibin
>
> When I bulkload sstable created with CQLSSTableWriter, it's sometimes very 
> slow.  
> use 2 nodes write SSTable and bulkload
> 1、Use CQLSSTableWriter create SSTable (60 threads)
> 2、When the directory over 100000 rows,bulkload the directory (20 threads)
> the normal bulkload speed is about 70M/s per node,and bulkload 141G SStables 
> cost 90 minutes but sometimes is very slow,the same data cost 4 hours why?
> here is the code bulkload sstable
> |public class JmxBulkLoader {
>       
>     static final Logger LOGGER = LoggerFactory.getLogger(JmxBulkLoader.class);
>       private JMXConnector connector;
>       private StorageServiceMBean storageBean;
>       private Timer timer = new Timer();
>       public JmxBulkLoader(String host, int port) throws Exception {
>               connect(host, port);
>       }
>       private void connect(String host, int port) throws IOException, 
> MalformedObjectNameException {
>               JMXServiceURL jmxUrl = new JMXServiceURL(
>                               
> String.format("service:jmx:rmi:///jndi/rmi://%s:%d/jmxrmi", host, port));
>               Map<String, Object> env = new HashMap<String, Object>();
>               connector = JMXConnectorFactory.connect(jmxUrl, env);
>               MBeanServerConnection mbeanServerConn = 
> connector.getMBeanServerConnection();
>               ObjectName name = new 
> ObjectName("org.apache.cassandra.db:type=StorageService");
>               storageBean = JMX.newMBeanProxy(mbeanServerConn, name, 
> StorageServiceMBean.class);
>       }
>       public void close() throws IOException {
>               connector.close();
>       }
>       public void bulkLoad(String path) {
>               LOGGER.info("begin load data to cassandra " + new 
> Path(path).getName());
>               timer.start();
>               storageBean.bulkLoad(path);
>               timer.end();
>               LOGGER.info("bulk load took " + timer.getTimeTakenMillis() + 
> "ms, path: " + new Path(path).getName());
>       }
> }
> bulkload thread 
> |public class BulkThread implements Runnable {
>       private String path;
>       private String jmxHost;
>       private int jmxPort;
>       
>       public BulkThread(String path, String jmxHost, int jmxPort) {
>               super();
>               this.path = path;
>               this.jmxHost = jmxHost;
>               this.jmxPort = jmxPort;
>       }
>       @Override
>       public void run() {
>               JmxBulkLoader bulkLoader = null;
>               try {
>                       bulkLoader = new JmxBulkLoader(jmxHost, jmxPort);
>                       bulkLoader.bulkLoad(path);
>               } catch (Exception e) {
>                       e.printStackTrace();
>               } finally {
>                       if (bulkLoader != null)
>                               try {
>                                       bulkLoader.close();
>                                       bulkLoader = null;
>                               } catch (IOException e) {
>                                       e.printStackTrace();
>                               }
>               }
>       }
> }



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)

Reply via email to