Your config. looks fine.

Only think that gives me pause is:

"-XX:NewSize=6m -XX:MaxNewSize=6m"

Any reason for the above?

If you study your GC logs, lots of pauses?

Oh, and this: replication is set to 6.  Why 6?  Each write must commit to 6
datanodes before complete.  In the tests posted on wiki, we replicate to 3
nodes.

In end of this message you say you are doing gets?  Numbers you posted were
for writes?

St.Ack


On Wed, Aug 12, 2009 at 1:15 PM, llpind <[email protected]> wrote:

>
> Not sure why my performance is so slow.  Here is my configuration:
>
> box1:
> 10395 SecondaryNameNode
> 11628 Jps
> 10131 NameNode
> 10638 HQuorumPeer
> 10705 HMaster
>
> box 2-5:
> 6741 HQuorumPeer
> 6841 HRegionServer
> 7881 Jps
> 6610 DataNode
>
>
> hbase site: =======================
> <?xml version="1.0"?>
> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
> <!--
> /**
>  * Copyright 2007 The Apache Software Foundation
>  *
>  * Licensed to the Apache Software Foundation (ASF) under one
>  * or more contributor license agreements.  See the NOTICE file
>  * distributed with this work for additional information
>  * regarding copyright ownership.  The ASF licenses this file
>  * to you under the Apache License, Version 2.0 (the
>  * "License"); you may not use this file except in compliance
>  * with the License.  You may obtain a copy of the License at
>  *
>  *     http://www.apache.org/licenses/LICENSE-2.0
>  *
>  * Unless required by applicable law or agreed to in writing, software
>  * distributed under the License is distributed on an "AS IS" BASIS,
>  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
>  * See the License for the specific language governing permissions and
>  * limitations under the License.
>  */
> -->
> <configuration>
>  <property>
>    <name>hbase.rootdir</name>
>    <value>hdfs://box1:9000/hbase</value>
>    <description>The directory shared by region servers.
>    </description>
>  </property>
>  <property>
>    <name>hbase.master.port</name>
>    <value>60000</value>
>    <description>The port that the HBase master runs at.
>    </description>
>  </property>
>  <property>
>    <name>hbase.cluster.distributed</name>
>    <value>true</value>
>    <description>The mode the cluster will be in. Possible values are
>      false: standalone and pseudo-distributed setups with managed Zookeeper
>      true: fully-distributed with unmanaged Zookeeper Quorum (see
> hbase-env.sh)
>    </description>
>  </property>
>  <property>
>    <name>hbase.regionserver.lease.period</name>
>    <value>120000</value>
>    <description>HRegion server lease period in milliseconds. Default is
>    60 seconds. Clients must report in within this period else they are
>    considered dead.</description>
>  </property>
>
>  <property>
>      <name>hbase.zookeeper.property.clientPort</name>
>      <value>2222</value>
>      <description>Property from ZooKeeper's config zoo.cfg.
>      The port at which the clients will connect.
>      </description>
>  </property>
>  <property>
>      <name>hbase.zookeeper.property.dataDir</name>
>      <value>/home/hadoop/zookeeper</value>
>  </property>
>  <property>
>      <name>hbase.zookeeper.property.syncLimit</name>
>      <value>5</value>
>  </property>
>  <property>
>      <name>hbase.zookeeper.property.tickTime</name>
>      <value>2000</value>
>  </property>
>  <property>
>      <name>hbase.zookeeper.property.initLimit</name>
>      <value>10</value>
>  </property>
>  <property>
>      <name>hbase.zookeeper.quorum</name>
>      <value>box1,box2,box3,box4</value>
>      <description>Comma separated list of servers in the ZooKeeper Quorum.
>      For example,
> "host1.mydomain.com,host2.mydomain.com,host3.mydomain.com".
>      By default this is set to localhost for local and pseudo-distributed
> modes
>      of operation. For a fully-distributed setup, this should be set to a
> full
>      list of ZooKeeper quorum servers. If HBASE_MANAGES_ZK is set in
> hbase-env.sh
>      this is the list of servers which we will start/stop ZooKeeper on.
>      </description>
>  </property>
>  <property>
>    <name>hfile.block.cache.size</name>
>    <value>.5</value>
>    <description>text</description>
>  </property>
>
> </configuration>
>
>
> hbase env:====================================================
>
> export HBASE_CLASSPATH=${HADOOP_CONF_DIR}
>
> export HBASE_HEAPSIZE=3000
>
> export HBASE_OPTS="-XX:NewSize=6m -XX:MaxNewSize=6m -XX:+UseConcMarkSweepGC
> -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps
> -XX:+CMSIncrementalMode
> -Xloggc:/home/hadoop/hbase-0.20.0/logs/gc-hbase.log"
>
> export HBASE_MANAGES_ZK=true
>
> Hadoop core site===========================================================
>
> <?xml version="1.0"?>
> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
>
> <!-- Put site-specific property overrides in this file. -->
>
> <configuration>
> <property>
>   <name>fs.default.name</name>
>   <value>hdfs://box1:9000</value>
>   <description>The name of the default file system.  A URI whose
>   scheme and authority determine the FileSystem implementation.  The
>   uri's scheme determines the config property (fs.SCHEME.impl) naming
>   the FileSystem implementation class.  The uri's authority is used to
>   determine the host, port, etc. for a filesystem.</description>
> </property>
> <property>
>  <name>hadoop.tmp.dir</name>
>  <value>/data/hadoop-0.20.0-${user.name}</value>
>  <description>A base for other temporary directories.</description>
> </property>
> </configuration>
>
> ==============
>
> replication is set to 6.
>
> hadoop env=================
>
> export HADOOP_HEAPSIZE=3000
> export HADOOP_NAMENODE_OPTS="-Dcom.sun.management.jmxremote
> $HADOOP_NAMENODE_OPTS"
> export HADOOP_SECONDARYNAMENODE_OPTS="-Dcom.sun.management.jmxremote
> $HADOOP_SECONDARYNAMENODE_OPTS"
> export HADOOP_DATANODE_OPTS="-Dcom.sun.management.jmxremote
> $HADOOP_DATANODE_OPTS"
> export HADOOP_BALANCER_OPTS="-Dcom.sun.management.jmxremote
> $HADOOP_BALANCER_OPTS"
> export HADOOP_JOBTRACKER_OPTS="-Dcom.sun.management.jmxremote
> $HADOOP_JOBTRACKER_OPTS"
>  ==================
>
>
> Very basic setup.  then i start the cluster do simple random Get operations
> on a tall table (~60 M rows):
>
> {NAME => 'tallTable', FAMILIES => [{NAME => 'family1', COMPRESSION =>
> 'NONE', VERSIONS => '3', TTL => '2147483647', BLOCKSIZE => '65536',
> IN_MEMORY => 'false', BLOCKCACHE => 'true'}]}
>
> Is this fairly normal speeds?  I'm unsure if this is a result of having a
> small cluster?  Please advise...
>
> stack-3 wrote:
> >
> > Yeah, seems slow.  In old hbase, it could do 5-10k writes a second going
> > by
> > performance eval page up on wiki.  SequentialWrite was about same as
> > RandomWrite.  Check out the stats on hw up on that page and description
> of
> > how test was set up.  Can you figure where its slow?
> >
> > St.Ack
> >
> > On Wed, Aug 12, 2009 at 10:10 AM, llpind <[email protected]> wrote:
> >
> >>
> >> Thanks Stack.
> >>
> >> I will try mapred with more clients.   I tried it without mapred using 3
> >> clients Random Write operations here was the output:
> >>
> >> 09/08/12 09:22:52 INFO hbase.PerformanceEvaluation: client-0 Start
> >> randomWrite at offset 0 for 1048576 rows
> >> 09/08/12 09:22:52 INFO hbase.PerformanceEvaluation: client-1 Start
> >> randomWrite at offset 1048576 for 1048576 rows
> >> 09/08/12 09:22:52 INFO hbase.PerformanceEvaluation: client-2 Start
> >> randomWrite at offset 2097152 for 1048576 rows
> >> 09/08/12 09:24:23 INFO hbase.PerformanceEvaluation: client-1
> >> 1048576/1153427/2097152
> >> 09/08/12 09:24:23 INFO hbase.PerformanceEvaluation: client-2
> >> 2097152/2201997/3145728
> >> 09/08/12 09:24:25 INFO hbase.PerformanceEvaluation: client-0
> >> 0/104857/1048576
> >> 09/08/12 09:27:42 INFO hbase.PerformanceEvaluation: client-0
> >> 0/209714/1048576
> >> 09/08/12 09:27:46 INFO hbase.PerformanceEvaluation: client-1
> >> 1048576/1258284/2097152
> >> 09/08/12 09:27:46 INFO hbase.PerformanceEvaluation: client-2
> >> 2097152/2306854/3145728
> >> 09/08/12 09:32:32 INFO hbase.PerformanceEvaluation: client-1
> >> 1048576/1363141/2097152
> >> 09/08/12 09:32:33 INFO hbase.PerformanceEvaluation: client-0
> >> 0/314571/1048576
> >> 09/08/12 09:32:41 INFO hbase.PerformanceEvaluation: client-2
> >> 2097152/2411711/3145728
> >> 09/08/12 09:35:31 INFO hbase.PerformanceEvaluation: client-0
> >> 0/419428/1048576
> >> 09/08/12 09:35:34 INFO hbase.PerformanceEvaluation: client-1
> >> 1048576/1467998/2097152
> >> 09/08/12 09:35:53 INFO hbase.PerformanceEvaluation: client-2
> >> 2097152/2516568/3145728
> >> 09/08/12 09:39:02 INFO hbase.PerformanceEvaluation: client-0
> >> 0/524285/1048576
> >> 09/08/12 09:39:03 INFO hbase.PerformanceEvaluation: client-2
> >> 2097152/2621425/3145728
> >> 09/08/12 09:40:07 INFO hbase.PerformanceEvaluation: client-1
> >> 1048576/1572855/2097152
> >> 09/08/12 09:42:53 INFO hbase.PerformanceEvaluation: client-0
> >> 0/629142/1048576
> >> 09/08/12 09:44:25 INFO hbase.PerformanceEvaluation: client-2
> >> 2097152/2726282/3145728
> >> 09/08/12 09:44:44 INFO hbase.PerformanceEvaluation: client-1
> >> 1048576/1677712/2097152
> >> 09/08/12 09:46:43 INFO hbase.PerformanceEvaluation: client-0
> >> 0/733999/1048576
> >> 09/08/12 09:48:11 INFO hbase.PerformanceEvaluation: client-2
> >> 2097152/2831139/3145728
> >> 09/08/12 09:48:29 INFO hbase.PerformanceEvaluation: client-1
> >> 1048576/1782569/2097152
> >> 09/08/12 09:50:12 INFO hbase.PerformanceEvaluation: client-0
> >> 0/838856/1048576
> >> 09/08/12 09:52:47 INFO hbase.PerformanceEvaluation: client-2
> >> 2097152/2935996/3145728
> >> 09/08/12 09:53:51 INFO hbase.PerformanceEvaluation: client-1
> >> 1048576/1887426/2097152
> >> 09/08/12 09:56:32 INFO hbase.PerformanceEvaluation: client-0
> >> 0/943713/1048576
> >> 09/08/12 09:58:32 INFO hbase.PerformanceEvaluation: client-2
> >> 2097152/3040853/3145728
> >> 09/08/12 09:59:14 INFO hbase.PerformanceEvaluation: client-1
> >> 1048576/1992283/2097152
> >> 09/08/12 10:02:28 INFO hbase.PerformanceEvaluation: client-0
> >> 0/1048570/1048576
> >> 09/08/12 10:02:30 INFO hbase.PerformanceEvaluation: client-0 Finished
> >> randomWrite in 2376615ms at offset 0 for 1048576 rows
> >> 09/08/12 10:02:30 INFO hbase.PerformanceEvaluation: Finished 0 in
> >> 2376615ms
> >> writing 1048576 rows
> >> 09/08/12 10:06:35 INFO hbase.PerformanceEvaluation: client-2
> >> 2097152/3145710/3145728
> >> 09/08/12 10:06:38 INFO hbase.PerformanceEvaluation: client-2 Finished
> >> randomWrite in 2623395ms at offset 2097152 for 1048576 rows
> >> 09/08/12 10:06:38 INFO hbase.PerformanceEvaluation: Finished 2 in
> >> 2623395ms
> >> writing 1048576 rows
> >> 09/08/12 10:06:42 INFO hbase.PerformanceEvaluation: client-1
> >> 1048576/2097140/2097152
> >> 09/08/12 10:06:43 INFO hbase.PerformanceEvaluation: client-1 Finished
> >> randomWrite in 2630199ms at offset 1048576 for 1048576 rows
> >> 09/08/12 10:06:43 INFO hbase.PerformanceEvaluation: Finished 1 in
> >> 2630199ms
> >> writing 1048576 rows
> >>
> >>
> >>
> >> Seems kind of slow for ~3M records.  I have a 4 node cluster up at the
> >> moment.  HMaster & Namenode running on same box.
> >> --
> >> View this message in context:
> >>
> http://www.nabble.com/HBase-in-a-real-world-application-tp24920888p24940922.html
> >> Sent from the HBase User mailing list archive at Nabble.com.
> >>
> >>
> >
> >
>
> --
> View this message in context:
> http://www.nabble.com/HBase-in-a-real-world-application-tp24920888p24943406.html
> Sent from the HBase User mailing list archive at Nabble.com.
>
>

Reply via email to