Author: jbellis
Date: Fri Aug 7 20:57:02 2009
New Revision: 802185
URL: http://svn.apache.org/viewvc?rev=802185&view=rev
Log:
add SSTableWriter buffer size option; increase default.
patch by jbellis; reviewed by Sammy Yu for CASSANDRA-339
Modified:
incubator/cassandra/trunk/conf/storage-conf.xml
incubator/cassandra/trunk/src/java/org/apache/cassandra/config/DatabaseDescriptor.java
incubator/cassandra/trunk/src/java/org/apache/cassandra/io/SSTableWriter.java
incubator/cassandra/trunk/test/system/stress.py
Modified: incubator/cassandra/trunk/conf/storage-conf.xml
URL:
http://svn.apache.org/viewvc/incubator/cassandra/trunk/conf/storage-conf.xml?rev=802185&r1=802184&r2=802185&view=diff
==============================================================================
--- incubator/cassandra/trunk/conf/storage-conf.xml (original)
+++ incubator/cassandra/trunk/conf/storage-conf.xml Fri Aug 7 20:57:02 2009
@@ -190,29 +190,41 @@
<!-- Memory, Disk, and Performance
-->
<!--======================================================================-->
- <!-- Add column indexes to a row after its contents reach this size -->
- <ColumnIndexSizeInKB>256</ColumnIndexSizeInKB>
+ <!-- Buffer size to use when flushing memtables to disk.
+ (Only one memtable is ever flushed at a time.)
+ Increase (decrease) the index buffer size relative to the data buffer
+ if you have few (many) columns per key. -->
+ <FlushDataBufferSizeInMB>32</FlushDataBufferSizeInMB>
+ <FlushIndexBufferSizeInMB>8</FlushIndexBufferSizeInMB>
+
+ <!-- Add column indexes to a row after its contents reach this size.
+ Increase if your column values are large, or if you have a very large
+ number of columns. The competing causes are, Cassandra has to
+ deserialize this much of the row to read a single column, so you
+ want it to be small - at least if you do many partial-row reads
+ - but all the index data is read for each access, so
+ you don't want to generate that wastefully either. -->
+ <ColumnIndexSizeInKB>64</ColumnIndexSizeInKB>
<!--
- The maximum amount of data to store in memory before flushing to
+ The maximum amount of data to store in memory per ColumnFamily before
flushing to
disk. Note: There is one memtable per column family, and this threshold
is based solely on the amount of data stored, not actual heap memory
usage (there is some overhead in indexing the columns).
-->
- <MemtableSizeInMB>32</MemtableSizeInMB>
-
+ <MemtableSizeInMB>64</MemtableSizeInMB>
<!--
- The maximum number of columns in millions to store in memory
+ The maximum number of columns in millions to store in memory per
ColumnFamily
before flushing to disk. This is also a per-memtable setting.
Use with MemtableSizeInMB to tune memory usage.
-->
- <MemtableObjectCountInMillions>0.01</MemtableObjectCountInMillions>
+ <MemtableObjectCountInMillions>0.1</MemtableObjectCountInMillions>
<!-- Unlike most systems, in Cassandra writes are faster than
reads, so you can afford more of those in parallel.
A good rule of thumb is 2 concurrent reads per processor core.
- You especially want more concurrentwrites if you are using
- CommitLogSync + CommitLogSyncDelay. -->
+ Increase ConcurrentWrites to the number of clients writing
+ at once if you enable CommitLogSync + CommitLogSyncDelay. -->
<ConcurrentReads>8</ConcurrentReads>
<ConcurrentWrites>32</ConcurrentWrites>
Modified:
incubator/cassandra/trunk/src/java/org/apache/cassandra/config/DatabaseDescriptor.java
URL:
http://svn.apache.org/viewvc/incubator/cassandra/trunk/src/java/org/apache/cassandra/config/DatabaseDescriptor.java?rev=802185&r1=802184&r2=802185&view=diff
==============================================================================
---
incubator/cassandra/trunk/src/java/org/apache/cassandra/config/DatabaseDescriptor.java
(original)
+++
incubator/cassandra/trunk/src/java/org/apache/cassandra/config/DatabaseDescriptor.java
Fri Aug 7 20:57:02 2009
@@ -67,6 +67,9 @@
private static int consistencyThreads_ = 4; // not configurable
private static int concurrentReaders_ = 8;
private static int concurrentWriters_ = 32;
+
+ private static int flushDataBufferSizeInMB_ = 32;
+ private static int flushIndexBufferSizeInMB_ = 32;
private static List<String> tables_ = new ArrayList<String>();
private static Set<String> applicationColumnFamilies_ = new
HashSet<String>();
@@ -224,6 +227,17 @@
concurrentWriters_ = Integer.parseInt(rawWriters);
}
+ String rawFlushData =
xmlUtils.getNodeValue("/Storage/FlushDataBufferSizeInMB");
+ if (rawFlushData != null)
+ {
+ flushDataBufferSizeInMB_ = Integer.parseInt(rawFlushData);
+ }
+ String rawFlushIndex =
xmlUtils.getNodeValue("/Storage/FlushIndexBufferSizeInMB");
+ if (rawFlushIndex != null)
+ {
+ flushIndexBufferSizeInMB_ = Integer.parseInt(rawFlushIndex);
+ }
+
/* TCP port on which the storage system listens */
String port = xmlUtils.getNodeValue("/Storage/StoragePort");
if ( port != null )
@@ -909,4 +923,14 @@
{
return commitLogSync_;
}
+
+ public static int getFlushDataBufferSizeInMB()
+ {
+ return flushDataBufferSizeInMB_;
+ }
+
+ public static int getFlushIndexBufferSizeInMB()
+ {
+ return flushIndexBufferSizeInMB_;
+ }
}
Modified:
incubator/cassandra/trunk/src/java/org/apache/cassandra/io/SSTableWriter.java
URL:
http://svn.apache.org/viewvc/incubator/cassandra/trunk/src/java/org/apache/cassandra/io/SSTableWriter.java?rev=802185&r1=802184&r2=802185&view=diff
==============================================================================
---
incubator/cassandra/trunk/src/java/org/apache/cassandra/io/SSTableWriter.java
(original)
+++
incubator/cassandra/trunk/src/java/org/apache/cassandra/io/SSTableWriter.java
Fri Aug 7 20:57:02 2009
@@ -11,6 +11,7 @@
import org.apache.cassandra.dht.IPartitioner;
import org.apache.cassandra.utils.BloomFilter;
+import org.apache.cassandra.config.DatabaseDescriptor;
import
com.reardencommerce.kernel.collections.shared.evictable.ConcurrentLinkedHashMap;
public class SSTableWriter extends SSTable
@@ -26,8 +27,8 @@
public SSTableWriter(String filename, int keyCount, IPartitioner
partitioner) throws IOException
{
super(filename, partitioner);
- dataFile = new BufferedRandomAccessFile(path, "rw", 4 * 1024 * 1024);
- indexFile = new BufferedRandomAccessFile(indexFilename(), "rw", 1024 *
1024);
+ dataFile = new BufferedRandomAccessFile(path, "rw",
DatabaseDescriptor.getFlushDataBufferSizeInMB() * 1024 * 1024);
+ indexFile = new BufferedRandomAccessFile(indexFilename(), "rw",
DatabaseDescriptor.getFlushIndexBufferSizeInMB() * 1024 * 1024);
bf = new BloomFilter(keyCount, 15);
}
Modified: incubator/cassandra/trunk/test/system/stress.py
URL:
http://svn.apache.org/viewvc/incubator/cassandra/trunk/test/system/stress.py?rev=802185&r1=802184&r2=802185&view=diff
==============================================================================
--- incubator/cassandra/trunk/test/system/stress.py (original)
+++ incubator/cassandra/trunk/test/system/stress.py Fri Aug 7 20:57:02 2009
@@ -30,7 +30,7 @@
self.count = 0
client = get_client(port=9160)
client.transport.open()
- for i in xrange(0, 1000):
+ for i in xrange(0, 200):
data = md5(str(i)).hexdigest()
for j in xrange(0, 1000):
key = '%s.%s.%s' % (time.time(), id, j)