date:20111219

Author: xedin
Date: Mon Dec 19 09:20:18 2011
New Revision: 1220663

URL: http://svn.apache.org/viewvc?rev=1220663view=rev
Log:
fsync the directory after new sstable or commitlog segment are created
patch by Pavel Yaskevich; reviewed by Jonathan Ellis for CASSANDRA-3250

Modified:
cassandra/branches/cassandra-1.0/CHANGES.txt

cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/io/util/SequentialWriter.java

cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/utils/CLibrary.java

Modified: cassandra/branches/cassandra-1.0/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/cassandra/branches/cassandra-1.0/CHANGES.txt?rev=1220663r1=1220662r2=1220663view=diff
==
--- cassandra/branches/cassandra-1.0/CHANGES.txt (original)
+++ cassandra/branches/cassandra-1.0/CHANGES.txt Mon Dec 19 09:20:18 2011
@@ -4,6 +4,8 @@
  * CLibrary.createHardLinkWithExec() to check for errors (CASSANDRA-3101)
 Merged from 0.8:
  * prevent new nodes from thinking down nodes are up forever (CASSANDRA-3626)
+ * fsync the directory after new sstable or commitlog segment are created 
(CASSANDRA-3250)
+
 
 1.0.6
  * (CQL) fix cqlsh support for replicate_on_write (CASSANDRA-3596)

Modified: 
cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/io/util/SequentialWriter.java
URL: 
http://svn.apache.org/viewvc/cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/io/util/SequentialWriter.java?rev=1220663r1=1220662r2=1220663view=diff
==
--- 
cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/io/util/SequentialWriter.java
 (original)
+++ 
cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/io/util/SequentialWriter.java
 Mon Dec 19 09:20:18 2011
@@ -39,6 +39,9 @@ public class SequentialWriter extends Ou
 protected byte[] buffer;
 private final boolean skipIOCache;
 private final int fd;
+private final int directoryFD;
+// directory should be synced only after first file sync, in other words, 
only once per file
+private boolean directorySynced = false;
 
 protected long current = 0, bufferOffset;
 protected int validBufferBytes;
@@ -60,6 +63,7 @@ public class SequentialWriter extends Ou
 buffer = new byte[bufferSize];
 this.skipIOCache = skipIOCache;
 fd = CLibrary.getfd(out.getFD());
+directoryFD = CLibrary.tryOpenDirectory(file.getParent());
 stream = new DataOutputStream(this);
 }
 
@@ -148,6 +152,12 @@ public class SequentialWriter extends Ou
 flushInternal();
 out.getFD().sync();
 
+if (!directorySynced)
+{
+CLibrary.trySync(directoryFD);
+directorySynced = true;
+}
+
 syncNeeded = false;
 }
 }
@@ -288,6 +298,7 @@ public class SequentialWriter extends Ou
 CLibrary.trySkipCache(fd, 0, 0);
 
 out.close();
+CLibrary.tryCloseFD(directoryFD);
 }
 
 /**

Modified: 
cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/utils/CLibrary.java
URL: 
http://svn.apache.org/viewvc/cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/utils/CLibrary.java?rev=1220663r1=1220662r2=1220663view=diff
==
--- 
cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/utils/CLibrary.java
 (original)
+++ 
cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/utils/CLibrary.java
 Mon Dec 19 09:20:18 2011
@@ -46,6 +46,7 @@ public final class CLibrary
 private static final int F_SETFL   = 4;  /* set file status flags */
 private static final int F_NOCACHE = 48; /* Mac OS X specific flag, turns 
cache on/off */
 private static final int O_DIRECT  = 04; /* fcntl.h */
+private static final int O_RDONLY  = ; /* fcntl.h */
 
 private static final int POSIX_FADV_NORMAL = 0; /* fadvise.h */
 private static final int POSIX_FADV_RANDOM = 1; /* fadvise.h */
@@ -84,7 +85,11 @@ public final class CLibrary
 
 // fadvice
 public static native int posix_fadvise(int fd, long offset, int len, int 
flag) throws LastErrorException;
-
+
+public static native int open(String path, int flags) throws 
LastErrorException;
+public static native int fsync(int fd) throws LastErrorException;
+public static native int close(int fd) throws LastErrorException;
+
 private static int errno(RuntimeException e)
 {
 assert e instanceof LastErrorException;
@@ -261,6 +266,73 @@ public final class CLibrary
 return result;
 }
 
+public static int tryOpenDirectory(String path)
+{
+int fd = -1;
+
+try
+{
+return open(path, O_RDONLY);
+}
+catch (UnsatisfiedLinkError e)
+{
+// JNA is

svn commit: r1220664 - /cassandra/branches/cassandra-1.0/CHANGES.txt

Author: xedin
Date: Mon Dec 19 09:22:42 2011
New Revision: 1220664

URL: http://svn.apache.org/viewvc?rev=1220664view=rev
Log:
fix CHANGES.txt

Modified:
cassandra/branches/cassandra-1.0/CHANGES.txt

Modified: cassandra/branches/cassandra-1.0/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/cassandra/branches/cassandra-1.0/CHANGES.txt?rev=1220664r1=1220663r2=1220664view=diff
==
--- cassandra/branches/cassandra-1.0/CHANGES.txt (original)
+++ cassandra/branches/cassandra-1.0/CHANGES.txt Mon Dec 19 09:22:42 2011
@@ -2,9 +2,9 @@
  * fix assertion when dropping a columnfamily with no sstables (CASSANDRA-3614)
  * more efficient allocation of small bloom filters (CASSANDRA-3618)
  * CLibrary.createHardLinkWithExec() to check for errors (CASSANDRA-3101)
+ * fsync the directory after new sstable or commitlog segment are created 
(CASSANDRA-3250)
 Merged from 0.8:
  * prevent new nodes from thinking down nodes are up forever (CASSANDRA-3626)
- * fsync the directory after new sstable or commitlog segment are created 
(CASSANDRA-3250)
 
 
 1.0.6

svn commit: r1220667 - in /cassandra/trunk: ./ contrib/ interface/thrift/gen-java/org/apache/cassandra/thrift/ src/java/org/apache/cassandra/db/compaction/

2011-12-19 Thread slebresne

Author: slebresne
Date: Mon Dec 19 09:25:25 2011
New Revision: 1220667

URL: http://svn.apache.org/viewvc?rev=1220667view=rev
Log:
merge from 1.0

Modified:
cassandra/trunk/   (props changed)
cassandra/trunk/CHANGES.txt
cassandra/trunk/contrib/   (props changed)

cassandra/trunk/interface/thrift/gen-java/org/apache/cassandra/thrift/Cassandra.java
   (props changed)

cassandra/trunk/interface/thrift/gen-java/org/apache/cassandra/thrift/Column.java
   (props changed)

cassandra/trunk/interface/thrift/gen-java/org/apache/cassandra/thrift/InvalidRequestException.java
   (props changed)

cassandra/trunk/interface/thrift/gen-java/org/apache/cassandra/thrift/NotFoundException.java
   (props changed)

cassandra/trunk/interface/thrift/gen-java/org/apache/cassandra/thrift/SuperColumn.java
   (props changed)

cassandra/trunk/src/java/org/apache/cassandra/db/compaction/CompactionTask.java

Propchange: cassandra/trunk/
--
--- svn:mergeinfo (original)
+++ svn:mergeinfo Mon Dec 19 09:25:25 2011
@@ -4,7 +4,7 @@
 
/cassandra/branches/cassandra-0.8:1090934-1125013,1125019-1198724,1198726-1206097,1206099-1212854,1212938
 /cassandra/branches/cassandra-0.8.0:1125021-1130369
 /cassandra/branches/cassandra-0.8.1:1101014-1125018
-/cassandra/branches/cassandra-1.0:1167085-1213775
+/cassandra/branches/cassandra-1.0:1167085-1213775,1220665
 
/cassandra/branches/cassandra-1.0.0:1167104-1167229,1167232-1181093,1181741,1181816,1181820,1182951,1183243
 /cassandra/branches/cassandra-1.0.5:1208016
 /cassandra/tags/cassandra-0.7.0-rc3:1051699-1053689

Modified: cassandra/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/cassandra/trunk/CHANGES.txt?rev=1220667r1=1220666r2=1220667view=diff
==
--- cassandra/trunk/CHANGES.txt (original)
+++ cassandra/trunk/CHANGES.txt Mon Dec 19 09:25:25 2011
@@ -30,6 +30,12 @@
 
 1.0.7
  * fix assertion when dropping a columnfamily with no sstables (CASSANDRA-3614)
+ * more efficient allocation of small bloom filters (CASSANDRA-3618)
+ * CLibrary.createHardLinkWithExec() to check for errors (CASSANDRA-3101)
+ * fsync the directory after new sstable or commitlog segment are created 
(CASSANDRA-3250)
+ * Avoid creating empty and non cleaned writer during compaction 
(CASSANDRA-3616)
+Merged from 0.8:
+ * prevent new nodes from thinking down nodes are up forever (CASSANDRA-3626)
 
 
 1.0.6

Propchange: cassandra/trunk/contrib/
--
--- svn:mergeinfo (original)
+++ svn:mergeinfo Mon Dec 19 09:25:25 2011
@@ -4,7 +4,7 @@
 
/cassandra/branches/cassandra-0.8/contrib:1090934-1125013,1125019-1198724,1198726-1206097,1206099-1212854,1212938
 /cassandra/branches/cassandra-0.8.0/contrib:1125021-1130369
 /cassandra/branches/cassandra-0.8.1/contrib:1101014-1125018
-/cassandra/branches/cassandra-1.0/contrib:1167085-1213775
+/cassandra/branches/cassandra-1.0/contrib:1167085-1213775,1220665
 
/cassandra/branches/cassandra-1.0.0/contrib:1167104-1167229,1167232-1181093,1181741,1181816,1181820,1182951,1183243
 /cassandra/branches/cassandra-1.0.5/contrib:1208016
 /cassandra/tags/cassandra-0.7.0-rc3/contrib:1051699-1053689

Propchange: 
cassandra/trunk/interface/thrift/gen-java/org/apache/cassandra/thrift/Cassandra.java
--
--- svn:mergeinfo (original)
+++ svn:mergeinfo Mon Dec 19 09:25:25 2011
@@ -4,7 +4,7 @@
 
/cassandra/branches/cassandra-0.8/interface/thrift/gen-java/org/apache/cassandra/thrift/Cassandra.java:1090934-1125013,1125019-1198724,1198726-1206097,1206099-1212854,1212938
 
/cassandra/branches/cassandra-0.8.0/interface/thrift/gen-java/org/apache/cassandra/thrift/Cassandra.java:1125021-1130369
 
/cassandra/branches/cassandra-0.8.1/interface/thrift/gen-java/org/apache/cassandra/thrift/Cassandra.java:1101014-1125018
-/cassandra/branches/cassandra-1.0/interface/thrift/gen-java/org/apache/cassandra/thrift/Cassandra.java:1167085-1213775
+/cassandra/branches/cassandra-1.0/interface/thrift/gen-java/org/apache/cassandra/thrift/Cassandra.java:1167085-1213775,1220665
 
/cassandra/branches/cassandra-1.0.0/interface/thrift/gen-java/org/apache/cassandra/thrift/Cassandra.java:1167104-1167229,1167232-1181093,1181741,1181816,1181820,1182951,1183243
 
/cassandra/branches/cassandra-1.0.5/interface/thrift/gen-java/org/apache/cassandra/thrift/Cassandra.java:1208016
 
/cassandra/tags/cassandra-0.7.0-rc3/interface/thrift/gen-java/org/apache/cassandra/thrift/Cassandra.java:1051699-1053689

Propchange: 
cassandra/trunk/interface/thrift/gen-java/org/apache/cassandra/thrift/Column.java
--
--- svn:mergeinfo (original)
+++ svn:mergeinfo Mon Dec 19 09:25:25 2011
@@ -4,7 +4,7 @@

svn commit: r1220672 - in /cassandra/trunk: ./ contrib/ interface/thrift/gen-java/org/apache/cassandra/thrift/ src/java/org/apache/cassandra/db/context/ src/java/org/apache/cassandra/gms/ src/java/org

Author: xedin
Date: Mon Dec 19 09:33:17 2011
New Revision: 1220672

URL: http://svn.apache.org/viewvc?rev=1220672view=rev
Log:
merge from 1.0

Modified:
cassandra/trunk/   (props changed)
cassandra/trunk/build.xml
cassandra/trunk/contrib/   (props changed)

cassandra/trunk/interface/thrift/gen-java/org/apache/cassandra/thrift/Cassandra.java
   (props changed)

cassandra/trunk/interface/thrift/gen-java/org/apache/cassandra/thrift/Column.java
   (props changed)

cassandra/trunk/interface/thrift/gen-java/org/apache/cassandra/thrift/InvalidRequestException.java
   (props changed)

cassandra/trunk/interface/thrift/gen-java/org/apache/cassandra/thrift/NotFoundException.java
   (props changed)

cassandra/trunk/interface/thrift/gen-java/org/apache/cassandra/thrift/SuperColumn.java
   (props changed)
cassandra/trunk/src/java/org/apache/cassandra/db/context/CounterContext.java
cassandra/trunk/src/java/org/apache/cassandra/gms/Gossiper.java
cassandra/trunk/src/java/org/apache/cassandra/io/util/SequentialWriter.java

cassandra/trunk/src/java/org/apache/cassandra/service/AbstractCassandraDaemon.java
cassandra/trunk/src/java/org/apache/cassandra/service/MigrationManager.java
cassandra/trunk/src/java/org/apache/cassandra/service/StorageService.java
cassandra/trunk/src/java/org/apache/cassandra/utils/CLibrary.java

Propchange: cassandra/trunk/
--
--- svn:mergeinfo (original)
+++ svn:mergeinfo Mon Dec 19 09:33:17 2011
@@ -1,10 +1,10 @@
 
/cassandra/branches/cassandra-0.6:922689-1052356,1052358-1053452,1053454,1053456-1131291
 /cassandra/branches/cassandra-0.7:1026516-1211709
 /cassandra/branches/cassandra-0.7.0:1053690-1055654
-/cassandra/branches/cassandra-0.8:1090934-1125013,1125019-1198724,1198726-1206097,1206099-1212854,1212938
+/cassandra/branches/cassandra-0.8:1090934-1125013,1125019-1198724,1198726-1206097,1206099-1212854,1212938,1214916
 /cassandra/branches/cassandra-0.8.0:1125021-1130369
 /cassandra/branches/cassandra-0.8.1:1101014-1125018
-/cassandra/branches/cassandra-1.0:1167085-1213775,1220665
+/cassandra/branches/cassandra-1.0:1167085-1220666
 
/cassandra/branches/cassandra-1.0.0:1167104-1167229,1167232-1181093,1181741,1181816,1181820,1182951,1183243
 /cassandra/branches/cassandra-1.0.5:1208016
 /cassandra/tags/cassandra-0.7.0-rc3:1051699-1053689

Modified: cassandra/trunk/build.xml
URL: 
http://svn.apache.org/viewvc/cassandra/trunk/build.xml?rev=1220672r1=1220671r2=1220672view=diff
==
--- cassandra/trunk/build.xml (original)
+++ cassandra/trunk/build.xml Mon Dec 19 09:33:17 2011
@@ -376,7 +376,9 @@ url=${svn.entry.url}?pathrev=${svn.entry
   dependency groupId=com.thoughtworks.paranamer 
artifactId=paranamer-ant version=2.1/
   dependency groupId=junit artifactId=junit version=4.6 /
   dependency groupId=commons-logging artifactId=commons-logging 
version=1.1.1/
-  dependency groupId=org.apache.rat artifactId=apache-rat 
version=0.6 /
+  dependency groupId=org.apache.rat artifactId=apache-rat 
version=0.6
+ exclusion groupId=commons-lang artifactId=commons-lang/
+  /dependency
   dependency groupId=org.apache.hadoop artifactId=hadoop-core 
version=0.20.203.0/
   dependency groupId=net.sf.jopt-simple artifactId=jopt-simple 
version=3.2/
   dependency groupId=net.java.dev.jna artifactId=jna 
version=3.2.7/

Propchange: cassandra/trunk/contrib/
--
--- svn:mergeinfo (original)
+++ svn:mergeinfo Mon Dec 19 09:33:17 2011
@@ -1,10 +1,10 @@
 
/cassandra/branches/cassandra-0.6/contrib:922689-1052356,1052358-1053452,1053454,1053456-1068009
 /cassandra/branches/cassandra-0.7/contrib:1026516-1211709
 /cassandra/branches/cassandra-0.7.0/contrib:1053690-1055654
-/cassandra/branches/cassandra-0.8/contrib:1090934-1125013,1125019-1198724,1198726-1206097,1206099-1212854,1212938
+/cassandra/branches/cassandra-0.8/contrib:1090934-1125013,1125019-1198724,1198726-1206097,1206099-1212854,1212938,1214916
 /cassandra/branches/cassandra-0.8.0/contrib:1125021-1130369
 /cassandra/branches/cassandra-0.8.1/contrib:1101014-1125018
-/cassandra/branches/cassandra-1.0/contrib:1167085-1213775,1220665
+/cassandra/branches/cassandra-1.0/contrib:1167085-1220666
 
/cassandra/branches/cassandra-1.0.0/contrib:1167104-1167229,1167232-1181093,1181741,1181816,1181820,1182951,1183243
 /cassandra/branches/cassandra-1.0.5/contrib:1208016
 /cassandra/tags/cassandra-0.7.0-rc3/contrib:1051699-1053689

Propchange: 
cassandra/trunk/interface/thrift/gen-java/org/apache/cassandra/thrift/Cassandra.java
--
--- svn:mergeinfo (original)
+++ svn:mergeinfo Mon Dec 19 09:33:17 2011
@@ -1,10 +1,10 @@

[jira] [Commented] (CASSANDRA-3635) Throttle validation separately from other compaction

2011-12-19 Thread Sylvain Lebresne (Commented) (JIRA)

[
https://issues.apache.org/jira/browse/CASSANDRA-3635?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=13172147#comment-13172147
]

Sylvain Lebresne commented on CASSANDRA-3635:
-

bq. Lets say we create a tree on A first after completion, we can create a tree
on B and then on C

In theory we kind of could. We do need to make sure trees are computed on
roughly the same data on all nodes, so we'll need to keep the flush at the same
time, but then we don't have to start the computation on all node right away.
However, for that to work, we would need to keep references on the sstables
after the initial flush which adds it's sets of complication: if for some
reason a node never receive it's 'you can start computing your tree' message,
it will keep some sstables around forever. We can add a number of protection so
that this never happen, but still potentially a very nasty effect.

In any case, probably not a discussion related to this ticket.

Throttle validation separately from other compaction

Key: CASSANDRA-3635
URL: https://issues.apache.org/jira/browse/CASSANDRA-3635
Project: Cassandra
Issue Type: Improvement
Components: Core
Reporter: Sylvain Lebresne
Assignee: Sylvain Lebresne
Priority: Minor
Labels: repair
Fix For: 0.8.10, 1.0.7

Attachments: 0001-separate-validation-throttling.patch

Validation compaction is fairly ressource intensive. It is possible to
throttle it with other compaction, but there is cases where you really want
to throttle it rather aggressively but don't necessarily want to have minor
compactions throttled that much. The goal is to (optionally) allow to set a
separate throttling value for validation.
PS: I'm not pretending this will solve every repair problem or anything.

[jira] [Commented] (CASSANDRA-3619) Use a separate writer thread for the SSTableSimpleUnsortedWriter

2011-12-19 Thread T Jake Luciani (Issue Comment Edited) (JIRA)


[ 
https://issues.apache.org/jira/browse/CASSANDRA-3619?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=13172172#comment-13172172
 ] 

Hudson commented on CASSANDRA-3619:
---

Integrated in Cassandra #1259 (See 
[https://builds.apache.org/job/Cassandra/1259/])
Use separate writer thread in SSTableSimpleUnsortedWriter
patch by slebresne; reviewed by yukim for CASSANDRA-3619

slebresne : 
http://svn.apache.org/viewcvs.cgi/?root=Apache-SVNview=revrev=1220662
Files : 
* /cassandra/trunk/CHANGES.txt
* 
/cassandra/trunk/src/java/org/apache/cassandra/io/sstable/SSTableSimpleUnsortedWriter.java


 Use a separate writer thread for the SSTableSimpleUnsortedWriter
 

 Key: CASSANDRA-3619
 URL: https://issues.apache.org/jira/browse/CASSANDRA-3619
 Project: Cassandra
  Issue Type: Improvement
  Components: Tools
Affects Versions: 0.8.1
Reporter: Sylvain Lebresne
Assignee: Sylvain Lebresne
Priority: Minor
 Fix For: 1.1

 Attachments: 0001-Add-separate-writer-thread.patch


 Currently SSTableSimpleUnsortedWriter doesn't use any threading. This means 
 that the thread using it is blocked while the buffered data is written on 
 disk and that nothing is written on disk while data is added.

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: 
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] [Commented] (CASSANDRA-3641) inconsistent/corrupt counters w/ broken shards never converge

2011-12-19 Thread Sylvain Lebresne (Commented) (JIRA)


[ 
https://issues.apache.org/jira/browse/CASSANDRA-3641?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=13172181#comment-13172181
 ] 

Sylvain Lebresne commented on CASSANDRA-3641:
-

The fix lgtm, but I'd be in favor of removing the JMX reporting. Logging at 
ERROR seems enough and I fear a JMX counter will confuse users more than 
anything else (you can always use a specific log4j logger if you're so inclined 
to monitor logged errors through JMX).

Nit: I'd also change the comment from We should never see shards w/ same 
id+clock but different counts to We should never see *non-delta* shards w/ 
same id+clock but different counts, just to make sure someone reading this 
comment too quickly don't leave with the wrong info. 

 inconsistent/corrupt counters w/ broken shards never converge
 -

 Key: CASSANDRA-3641
 URL: https://issues.apache.org/jira/browse/CASSANDRA-3641
 Project: Cassandra
  Issue Type: Bug
Reporter: Peter Schuller
Assignee: Peter Schuller
 Attachments: 3641-0.8-internal-not-for-inclusion.txt, 3641-trunk.txt


 We ran into a case (which MIGHT be related to CASSANDRA-3070) whereby we had 
 counters that were corrupt (hopefully due to CASSANDRA-3178). The corruption 
 was that there would exist shards with the *same* node_id, *same* clock id, 
 but *different* counts.
 The counter column diffing and reconciliation code assumes that this never 
 happens, and ignores the count. The problem with this is that if there is an 
 inconsistency, the result of a reconciliation will depend on the order of the 
 shards.
 In our case for example, we would see the value of the counter randomly 
 fluctuating on a CL.ALL read, but we would get consistent (whatever the node 
 had) on CL.ONE (submitted to one of the nodes in the replica set for the key).
 In addition, read repair would not work despite digest mismatches because the 
 diffing algorithm also did not care about the counts when determining the 
 differences to send.
 I'm attaching patches that fixes this. The first patch is against our 0.8 
 branch, which is not terribly useful to people, but I include it because it 
 is the well-tested version that we have used on the production cluster which 
 was subject to this corruption.
 The other patch is against trunk, and contains the same change.
 What the patch does is:
 * On diffing, treat as DISJOINT if there is a count discrepancy.
 * On reconciliation, look at the count and *deterministically* pick the 
 higher one, and:
 ** log the fact that we detected a corrupt counter
 ** increment a JMX observable counter for monitoring purposes
 A cluster which is subject to such corruption and has this patch, will fix 
 itself with and AES + compact (or just repeated compactions assuming the 
 replicate-on-compact is able to deliver correctly).

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: 
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] [Commented] (CASSANDRA-3638) It may iterate the whole memtable while just query one row . This seriously affect the performance . of Cassandra

2011-12-19 Thread MaHaiyang (Commented) (JIRA)


[ 
https://issues.apache.org/jira/browse/CASSANDRA-3638?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=13172252#comment-13172252
 ] 

MaHaiyang commented on CASSANDRA-3638:
--


{code:title=RowIteratorFactory.java|borderStyle=solid}
public IColumnIterator computeNext()
{
while (iter.hasNext())
{
Map.EntryDecoratedKey, ColumnFamily entry = iter.next();
IColumnIterator ici = 
filter.getMemtableColumnIterator(entry.getValue(), entry.getKey(), comparator);
if (pred.apply(ici))  
return ici;
}
return endOfData();
{code} 
The 'iter' is a submap of memtable's columnFamilies  . If pred.apply(ici) == 
false , 'iter' will iterate to the end .
Suppose 'iter' is  [ c ,d ,e ,f ] ,  pred.startKey is a ,and endKey is b , 
pred.apply(ici)  will always be false . 
In fact ,in this case , 'iter' never need to iterate , and should directly 
return endOfData() . This is the problem!
So , can anybody review the code too ?



 It may iterate the whole memtable while just query one row . This seriously 
 affect the  performance . of Cassandra
 --

 Key: CASSANDRA-3638
 URL: https://issues.apache.org/jira/browse/CASSANDRA-3638
 Project: Cassandra
  Issue Type: Bug
  Components: Core
Affects Versions: 1.0.0
Reporter: MaHaiyang

 RangeSliceVerbHandler may  just only query one row , but cassandra may 
 iterate the whole memtable .
 the problem is in ColumnFamilyStore.getRangeSlice() method .
 {color:red} // this iterator may iterate the whole memtable!!{color}
 {code:title=ColumnFamilyStore.java|borderStyle=solid}
  public ListRow getRangeSlice(ByteBuffer superColumn, final AbstractBounds 
 range, int maxResults, IFilter columnFilter)
 throws ExecutionException, InterruptedException
 {
 ...
 DecoratedKey startWith = new DecoratedKey(range.left, null);
 DecoratedKey stopAt = new DecoratedKey(range.right, null);
 QueryFilter filter = new QueryFilter(null, new 
 QueryPath(columnFamily, superColumn, null), columnFilter);
 int gcBefore = (int)(System.currentTimeMillis() / 1000) - 
 metadata.getGcGraceSeconds();
 ListRow rows;
 ViewFragment view = markReferenced(startWith, stopAt);
 try
 {
 CloseableIteratorRow iterator = 
 RowIteratorFactory.getIterator(view.memtables, view.sstables, startWith, 
 stopAt, filter, getComparator(), this);
 rows = new ArrayListRow();
 try
 {
 // pull rows out of the iterator
 boolean first = true;
 while (iterator.hasNext()) // this iterator may iterate the 
 whole memtable!!   
{
 
 }
 }
   .
 }
.
 return rows;
 }
 {code} 
 {color:red} // Just only query one row ,but returned a sublist of 
 columnFamiles   {color}
 {code:title=Memtable.java|borderStyle=solid}
 // Just only query one row ,but returned a sublist of columnFamiles 
 public IteratorMap.EntryDecoratedKey, ColumnFamily 
 getEntryIterator(DecoratedKey startWith)
 {
 return columnFamilies.tailMap(startWith).entrySet().iterator();
 }
 {code} 
 {color:red} // entry.getKey() will never bigger or equal to startKey, and 
 then iterate the whole sublist of memtable {color} 
 {code:title=RowIteratorFactory.java|borderStyle=solid}
  public IColumnIterator computeNext()
 {
 while (iter.hasNext())
 {
 Map.EntryDecoratedKey, ColumnFamily entry = iter.next();
 IColumnIterator ici = 
 filter.getMemtableColumnIterator(entry.getValue(), entry.getKey(), 
 comparator);
 // entry.getKey() will never bigger or equal to startKey, and 
 then iterate the whole sublist of memtable 
 if (pred.apply(ici))  
 return ici;
 }
 return endOfData();
 {code} 

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: 
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] [Commented] (CASSANDRA-2474) CQL support for compound columns

2011-12-19 Thread T Jake Luciani (Commented) (JIRA)


[ 
https://issues.apache.org/jira/browse/CASSANDRA-2474?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=13172299#comment-13172299
 ] 

T Jake Luciani commented on CASSANDRA-2474:
---

In the non-sparse case you always would always ignore the column value? I think 
we need to expose that somehow. (first non-transposed, non-key, non-sparse 
column?)

Overall I like this because it forces a user to think at schema creation time 
and not access time. This approach makes sense for CQL only access, but for 
users who are coming from thrift they will be asking how do i access data from 
my current data model?

On the negative side, this approach feels a bit too restrictive since you 
*MUST* use the same kind of schema across all rows within a CF.  What if a user 
doesn't know what the sparse columns will be ahead of time?  

Also, I know that's best practice but want to make the point, what if a user 
wants to access data in composite form and raw mode, should we support 
multiple views on the CF?



 CQL support for compound columns
 

 Key: CASSANDRA-2474
 URL: https://issues.apache.org/jira/browse/CASSANDRA-2474
 Project: Cassandra
  Issue Type: New Feature
  Components: API, Core
Reporter: Eric Evans
Assignee: Pavel Yaskevich
  Labels: cql
 Fix For: 1.1

 Attachments: screenshot-1.jpg, screenshot-2.jpg


 For the most part, this boils down to supporting the specification of 
 compound column names (the CQL syntax is colon-delimted terms), and then 
 teaching the decoders (drivers) to create structures from the results.

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: 
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] [Issue Comment Edited] (CASSANDRA-2474) CQL support for compound columns


[ 
https://issues.apache.org/jira/browse/CASSANDRA-2474?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=13172299#comment-13172299
 ] 

T Jake Luciani edited comment on CASSANDRA-2474 at 12/19/11 2:21 PM:
-

In the non-sparse case you would always ignore the column value? I think we 
need to expose that somehow. (first non-transposed, non-key, non-sparse column?)

Overall I like this because it forces a user to think at schema creation time 
and not access time. This approach makes sense for CQL only access, but for 
users who are coming from thrift they will be asking how do i access data from 
my current data model?

On the negative side, this approach feels a bit too restrictive since you 
*MUST* use the same kind of schema across all rows within a CF.  What if a user 
doesn't know what the sparse columns will be ahead of time?  

Also, I know that's best practice but want to make the point, what if a user 
wants to access data in composite form and raw mode, should we support 
multiple views on the CF?



  was (Author: tjake):
In the non-sparse case you always would always ignore the column value? I 
think we need to expose that somehow. (first non-transposed, non-key, 
non-sparse column?)

Overall I like this because it forces a user to think at schema creation time 
and not access time. This approach makes sense for CQL only access, but for 
users who are coming from thrift they will be asking how do i access data from 
my current data model?

On the negative side, this approach feels a bit too restrictive since you 
*MUST* use the same kind of schema across all rows within a CF.  What if a user 
doesn't know what the sparse columns will be ahead of time?  

Also, I know that's best practice but want to make the point, what if a user 
wants to access data in composite form and raw mode, should we support 
multiple views on the CF?


  
 CQL support for compound columns
 

 Key: CASSANDRA-2474
 URL: https://issues.apache.org/jira/browse/CASSANDRA-2474
 Project: Cassandra
  Issue Type: New Feature
  Components: API, Core
Reporter: Eric Evans
Assignee: Pavel Yaskevich
  Labels: cql
 Fix For: 1.1

 Attachments: screenshot-1.jpg, screenshot-2.jpg


 For the most part, this boils down to supporting the specification of 
 compound column names (the CQL syntax is colon-delimted terms), and then 
 teaching the decoders (drivers) to create structures from the results.

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: 
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] [Reopened] (CASSANDRA-3638) It may iterate the whole memtable while just query one row . This seriously affect the performance . of Cassandra

2011-12-19 Thread Sylvain Lebresne (Reopened) (JIRA)


 [ 
https://issues.apache.org/jira/browse/CASSANDRA-3638?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Sylvain Lebresne reopened CASSANDRA-3638:
-

  Assignee: Sylvain Lebresne

 It may iterate the whole memtable while just query one row . This seriously 
 affect the  performance . of Cassandra
 --

 Key: CASSANDRA-3638
 URL: https://issues.apache.org/jira/browse/CASSANDRA-3638
 Project: Cassandra
  Issue Type: Bug
  Components: Core
Affects Versions: 1.0.0
Reporter: MaHaiyang
Assignee: Sylvain Lebresne

 RangeSliceVerbHandler may  just only query one row , but cassandra may 
 iterate the whole memtable .
 the problem is in ColumnFamilyStore.getRangeSlice() method .
 {color:red} // this iterator may iterate the whole memtable!!{color}
 {code:title=ColumnFamilyStore.java|borderStyle=solid}
  public ListRow getRangeSlice(ByteBuffer superColumn, final AbstractBounds 
 range, int maxResults, IFilter columnFilter)
 throws ExecutionException, InterruptedException
 {
 ...
 DecoratedKey startWith = new DecoratedKey(range.left, null);
 DecoratedKey stopAt = new DecoratedKey(range.right, null);
 QueryFilter filter = new QueryFilter(null, new 
 QueryPath(columnFamily, superColumn, null), columnFilter);
 int gcBefore = (int)(System.currentTimeMillis() / 1000) - 
 metadata.getGcGraceSeconds();
 ListRow rows;
 ViewFragment view = markReferenced(startWith, stopAt);
 try
 {
 CloseableIteratorRow iterator = 
 RowIteratorFactory.getIterator(view.memtables, view.sstables, startWith, 
 stopAt, filter, getComparator(), this);
 rows = new ArrayListRow();
 try
 {
 // pull rows out of the iterator
 boolean first = true;
 while (iterator.hasNext()) // this iterator may iterate the 
 whole memtable!!   
{
 
 }
 }
   .
 }
.
 return rows;
 }
 {code} 
 {color:red} // Just only query one row ,but returned a sublist of 
 columnFamiles   {color}
 {code:title=Memtable.java|borderStyle=solid}
 // Just only query one row ,but returned a sublist of columnFamiles 
 public IteratorMap.EntryDecoratedKey, ColumnFamily 
 getEntryIterator(DecoratedKey startWith)
 {
 return columnFamilies.tailMap(startWith).entrySet().iterator();
 }
 {code} 
 {color:red} // entry.getKey() will never bigger or equal to startKey, and 
 then iterate the whole sublist of memtable {color} 
 {code:title=RowIteratorFactory.java|borderStyle=solid}
  public IColumnIterator computeNext()
 {
 while (iter.hasNext())
 {
 Map.EntryDecoratedKey, ColumnFamily entry = iter.next();
 IColumnIterator ici = 
 filter.getMemtableColumnIterator(entry.getValue(), entry.getKey(), 
 comparator);
 // entry.getKey() will never bigger or equal to startKey, and 
 then iterate the whole sublist of memtable 
 if (pred.apply(ici))  
 return ici;
 }
 return endOfData();
 {code} 

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: 
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] [Updated] (CASSANDRA-3638) It may iterate the whole memtable while just query one row . This seriously affect the performance . of Cassandra

2011-12-19 Thread Sylvain Lebresne (Updated) (JIRA)


 [ 
https://issues.apache.org/jira/browse/CASSANDRA-3638?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Sylvain Lebresne updated CASSANDRA-3638:


Attachment: 3638.patch

I think you're right. And this go way back I believe, we've never used the 
stopAt bound to  stop iteration early in the range slice case.

Patch attached to fix this (against trunk).

 It may iterate the whole memtable while just query one row . This seriously 
 affect the  performance . of Cassandra
 --

 Key: CASSANDRA-3638
 URL: https://issues.apache.org/jira/browse/CASSANDRA-3638
 Project: Cassandra
  Issue Type: Bug
  Components: Core
Affects Versions: 1.0.0
Reporter: MaHaiyang
Assignee: Sylvain Lebresne
 Attachments: 3638.patch


 RangeSliceVerbHandler may  just only query one row , but cassandra may 
 iterate the whole memtable .
 the problem is in ColumnFamilyStore.getRangeSlice() method .
 {color:red} // this iterator may iterate the whole memtable!!{color}
 {code:title=ColumnFamilyStore.java|borderStyle=solid}
  public ListRow getRangeSlice(ByteBuffer superColumn, final AbstractBounds 
 range, int maxResults, IFilter columnFilter)
 throws ExecutionException, InterruptedException
 {
 ...
 DecoratedKey startWith = new DecoratedKey(range.left, null);
 DecoratedKey stopAt = new DecoratedKey(range.right, null);
 QueryFilter filter = new QueryFilter(null, new 
 QueryPath(columnFamily, superColumn, null), columnFilter);
 int gcBefore = (int)(System.currentTimeMillis() / 1000) - 
 metadata.getGcGraceSeconds();
 ListRow rows;
 ViewFragment view = markReferenced(startWith, stopAt);
 try
 {
 CloseableIteratorRow iterator = 
 RowIteratorFactory.getIterator(view.memtables, view.sstables, startWith, 
 stopAt, filter, getComparator(), this);
 rows = new ArrayListRow();
 try
 {
 // pull rows out of the iterator
 boolean first = true;
 while (iterator.hasNext()) // this iterator may iterate the 
 whole memtable!!   
{
 
 }
 }
   .
 }
.
 return rows;
 }
 {code} 
 {color:red} // Just only query one row ,but returned a sublist of 
 columnFamiles   {color}
 {code:title=Memtable.java|borderStyle=solid}
 // Just only query one row ,but returned a sublist of columnFamiles 
 public IteratorMap.EntryDecoratedKey, ColumnFamily 
 getEntryIterator(DecoratedKey startWith)
 {
 return columnFamilies.tailMap(startWith).entrySet().iterator();
 }
 {code} 
 {color:red} // entry.getKey() will never bigger or equal to startKey, and 
 then iterate the whole sublist of memtable {color} 
 {code:title=RowIteratorFactory.java|borderStyle=solid}
  public IColumnIterator computeNext()
 {
 while (iter.hasNext())
 {
 Map.EntryDecoratedKey, ColumnFamily entry = iter.next();
 IColumnIterator ici = 
 filter.getMemtableColumnIterator(entry.getValue(), entry.getKey(), 
 comparator);
 // entry.getKey() will never bigger or equal to startKey, and 
 then iterate the whole sublist of memtable 
 if (pred.apply(ici))  
 return ici;
 }
 return endOfData();
 {code} 

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: 
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] [Commented] (CASSANDRA-2474) CQL support for compound columns

2011-12-19 Thread Pavel Yaskevich (Commented) (JIRA)


[ 
https://issues.apache.org/jira/browse/CASSANDRA-2474?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=13172307#comment-13172307
 ] 

Pavel Yaskevich commented on CASSANDRA-2474:


bq. but for users who are coming from thrift they will be asking how do i 
access data from my current data model?

We can support special command to set metadata about CF transposition, 
something like TRANSPOSE cf AS (col, ...) WITH SPARSE (id, ...) or ALTER 
TABLE cf SET TRANSPOSED AS (col, ...) WITH SPARSE (id, ...) 

bq. On the negative side, this approach feels a bit too restrictive since you 
MUST use the same kind of schema across all rows within a CF. What if a user 
doesn't know what the sparse columns will be ahead of time?

User will be able to add sparse columns using ALTER TABLE command.

bq. Also, I know that's best practice but want to make the point, what if a 
user wants to access data in composite form and raw mode, should we support 
multiple views on the CF?

Can you elaborate raw mode?

 CQL support for compound columns
 

 Key: CASSANDRA-2474
 URL: https://issues.apache.org/jira/browse/CASSANDRA-2474
 Project: Cassandra
  Issue Type: New Feature
  Components: API, Core
Reporter: Eric Evans
Assignee: Pavel Yaskevich
  Labels: cql
 Fix For: 1.1

 Attachments: screenshot-1.jpg, screenshot-2.jpg


 For the most part, this boils down to supporting the specification of 
 compound column names (the CQL syntax is colon-delimted terms), and then 
 teaching the decoders (drivers) to create structures from the results.

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: 
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] [Commented] (CASSANDRA-2474) CQL support for compound columns

2011-12-19 Thread T Jake Luciani (Commented) (JIRA)

[
https://issues.apache.org/jira/browse/CASSANDRA-2474?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=13172317#comment-13172317
]

T Jake Luciani commented on CASSANDRA-2474:
---

bq. User will be able to add sparse columns using ALTER TABLE command.

Requiring a ALTER when you may not know what columns you have is too
restrictive. Example, a ETL from a 3rd party manufacturer that provides a
custom set of attributes per product: some standard (Unit Price, Model, Color,
etc) some specific (DPI, Shipping Size, Contrast Ratio). We don't want to go
back to having to know exactly what your data will look like before you can
write/read it. That's one of the important tenants of nosql I'd like to keep :)

bq. Can you elaborate raw mode?

I mean non-transposed mode.

CQL support for compound columns

Key: CASSANDRA-2474
URL: https://issues.apache.org/jira/browse/CASSANDRA-2474
Project: Cassandra
Issue Type: New Feature
Components: API, Core
Reporter: Eric Evans
Assignee: Pavel Yaskevich
Labels: cql
Fix For: 1.1

Attachments: screenshot-1.jpg, screenshot-2.jpg

For the most part, this boils down to supporting the specification of
compound column names (the CQL syntax is colon-delimted terms), and then
teaching the decoders (drivers) to create structures from the results.

[jira] [Commented] (CASSANDRA-2474) CQL support for compound columns

2011-12-19 Thread Pavel Yaskevich (Commented) (JIRA)

[
https://issues.apache.org/jira/browse/CASSANDRA-2474?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=13172330#comment-13172330
]

Pavel Yaskevich commented on CASSANDRA-2474:

{quote}
Requiring a ALTER when you may not know what columns you have is too
restrictive. Example, a ETL from a 3rd party manufacturer that provides a
custom set of attributes per product: some standard (Unit Price, Model, Color,
etc) some specific (DPI, Shipping Size, Contrast Ratio). We don't want to go
back to having to know exactly what your data will look like before you can
write/read it. That's one of the important tenants of nosql I'd like to keep :)
{quote}

User still will be able to add sparse columns on insert without alter as with
current insert (it will just use default value validator).

bq. I mean non-transposed mode.

I think we can do that.

CQL support for compound columns

Attachments: screenshot-1.jpg, screenshot-2.jpg

[jira] [Commented] (CASSANDRA-3632) using an ant builder in Eclipse is painful

2011-12-19 Thread T Jake Luciani (Commented) (JIRA)


[ 
https://issues.apache.org/jira/browse/CASSANDRA-3632?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=13172339#comment-13172339
 ] 

T Jake Luciani commented on CASSANDRA-3632:
---

+1!

 using an ant builder in Eclipse is painful
 --

 Key: CASSANDRA-3632
 URL: https://issues.apache.org/jira/browse/CASSANDRA-3632
 Project: Cassandra
  Issue Type: Bug
  Components: Packaging, Tools
Affects Versions: 1.0.6
Reporter: Eric Evans
Assignee: Eric Evans
Priority: Minor
 Attachments: 
 v1-0001-CASSANDRA-3632-remove-ant-builder-restore-java-builder.txt


 The {{generate-eclipse-files}} target creates project files that use an Ant 
 builder.  Besides being painfully slow (I've had the runs stack up behind 
 frequent saves), many of Eclipses errors and warnings do not show unless an 
 internal builder is used.

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: 
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] [Commented] (CASSANDRA-3638) It may iterate the whole memtable while just query one row . This seriously affect the performance . of Cassandra

2011-12-19 Thread Jonathan Ellis (Commented) (JIRA)


[ 
https://issues.apache.org/jira/browse/CASSANDRA-3638?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=13172347#comment-13172347
 ] 

Jonathan Ellis commented on CASSANDRA-3638:
---

+1

 It may iterate the whole memtable while just query one row . This seriously 
 affect the  performance . of Cassandra
 --

 Key: CASSANDRA-3638
 URL: https://issues.apache.org/jira/browse/CASSANDRA-3638
 Project: Cassandra
  Issue Type: Bug
  Components: Core
Affects Versions: 1.0.0
Reporter: MaHaiyang
Assignee: Sylvain Lebresne
 Fix For: 1.1

 Attachments: 3638.patch


 RangeSliceVerbHandler may  just only query one row , but cassandra may 
 iterate the whole memtable .
 the problem is in ColumnFamilyStore.getRangeSlice() method .
 {color:red} // this iterator may iterate the whole memtable!!{color}
 {code:title=ColumnFamilyStore.java|borderStyle=solid}
  public ListRow getRangeSlice(ByteBuffer superColumn, final AbstractBounds 
 range, int maxResults, IFilter columnFilter)
 throws ExecutionException, InterruptedException
 {
 ...
 DecoratedKey startWith = new DecoratedKey(range.left, null);
 DecoratedKey stopAt = new DecoratedKey(range.right, null);
 QueryFilter filter = new QueryFilter(null, new 
 QueryPath(columnFamily, superColumn, null), columnFilter);
 int gcBefore = (int)(System.currentTimeMillis() / 1000) - 
 metadata.getGcGraceSeconds();
 ListRow rows;
 ViewFragment view = markReferenced(startWith, stopAt);
 try
 {
 CloseableIteratorRow iterator = 
 RowIteratorFactory.getIterator(view.memtables, view.sstables, startWith, 
 stopAt, filter, getComparator(), this);
 rows = new ArrayListRow();
 try
 {
 // pull rows out of the iterator
 boolean first = true;
 while (iterator.hasNext()) // this iterator may iterate the 
 whole memtable!!   
{
 
 }
 }
   .
 }
.
 return rows;
 }
 {code} 
 {color:red} // Just only query one row ,but returned a sublist of 
 columnFamiles   {color}
 {code:title=Memtable.java|borderStyle=solid}
 // Just only query one row ,but returned a sublist of columnFamiles 
 public IteratorMap.EntryDecoratedKey, ColumnFamily 
 getEntryIterator(DecoratedKey startWith)
 {
 return columnFamilies.tailMap(startWith).entrySet().iterator();
 }
 {code} 
 {color:red} // entry.getKey() will never bigger or equal to startKey, and 
 then iterate the whole sublist of memtable {color} 
 {code:title=RowIteratorFactory.java|borderStyle=solid}
  public IColumnIterator computeNext()
 {
 while (iter.hasNext())
 {
 Map.EntryDecoratedKey, ColumnFamily entry = iter.next();
 IColumnIterator ici = 
 filter.getMemtableColumnIterator(entry.getValue(), entry.getKey(), 
 comparator);
 // entry.getKey() will never bigger or equal to startKey, and 
 then iterate the whole sublist of memtable 
 if (pred.apply(ici))  
 return ici;
 }
 return endOfData();
 {code} 

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: 
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

svn commit: r1220817 - /cassandra/branches/cassandra-1.0/build.xml

Author: eevans
Date: Mon Dec 19 15:58:44 2011
New Revision: 1220817

URL: http://svn.apache.org/viewvc?rev=1220817view=rev
Log:
remove ant builder; restore java builder

Patch by eevans; reviewed by tjake for CASSANDRA-3632

Modified:
cassandra/branches/cassandra-1.0/build.xml

Modified: cassandra/branches/cassandra-1.0/build.xml
URL: 
http://svn.apache.org/viewvc/cassandra/branches/cassandra-1.0/build.xml?rev=1220817r1=1220816r2=1220817view=diff
==
--- cassandra/branches/cassandra-1.0/build.xml (original)
+++ cassandra/branches/cassandra-1.0/build.xml Mon Dec 19 15:58:44 2011
@@ -1150,18 +1150,11 @@ url=${svn.entry.url}?pathrev=${svn.entry
   comment/comment
   projects
   /projects
-buildSpec
-  buildCommand
-nameorg.eclipse.ui.externaltools.ExternalToolBuilder/name
-triggersfull,incremental,auto,clean,/triggers
-arguments
-  dictionary
-keyLaunchConfigHandle/key
-
valuelt;projectgt;/.externalToolBuilders/Cassandra_Ant_Builder.launch/value
-  /dictionary
-/arguments
-  /buildCommand
-/buildSpec
+  buildSpec
+buildCommand
+  nameorg.eclipse.jdt.core.javabuilder/name
+/buildCommand
+  /buildSpec
   natures
 natureorg.eclipse.jdt.core.javanature/nature
   /natures
@@ -1208,31 +1201,6 @@ url=${svn.entry.url}?pathrev=${svn.entry
echo.perform();  
]] /script
 mkdir dir=.settings /
-mkdir dir=.externalToolBuilders /
-echo 
file=.externalToolBuilders/Cassandra_Ant_Builder.launch![CDATA[?xml 
version=1.0 encoding=UTF-8 standalone=no?
-launchConfiguration type=org.eclipse.ant.AntBuilderLaunchConfigurationType
-stringAttribute key=org.eclipse.ant.ui.ATTR_ANT_AFTER_CLEAN_TARGETS 
value=build-test,/
-stringAttribute key=org.eclipse.ant.ui.ATTR_ANT_AUTO_TARGETS 
value=build-test,/
-stringAttribute key=org.eclipse.ant.ui.ATTR_ANT_CLEAN_TARGETS 
value=clean/
-stringAttribute key=org.eclipse.ant.ui.ATTR_ANT_MANUAL_TARGETS 
value=build-test,/
-booleanAttribute key=org.eclipse.ant.ui.ATTR_TARGETS_UPDATED value=true/
-booleanAttribute key=org.eclipse.ant.ui.DEFAULT_VM_INSTALL value=false/
-listAttribute key=org.eclipse.debug.core.MAPPED_RESOURCE_PATHS
-listEntry value=/${eclipse.project.name}/build.xml/
-/listAttribute
-listAttribute key=org.eclipse.debug.core.MAPPED_RESOURCE_TYPES
-listEntry value=1/
-/listAttribute
-booleanAttribute key=org.eclipse.debug.ui.ATTR_LAUNCH_IN_BACKGROUND 
value=false/
-stringAttribute key=org.eclipse.jdt.launching.CLASSPATH_PROVIDER 
value=org.eclipse.ant.ui.AntClasspathProvider/
-booleanAttribute key=org.eclipse.jdt.launching.DEFAULT_CLASSPATH 
value=true/
-stringAttribute key=org.eclipse.jdt.launching.PROJECT_ATTR 
value=${eclipse.project.name}/
-stringAttribute key=org.eclipse.ui.externaltools.ATTR_LOCATION 
value=$${workspace_loc:/${eclipse.project.name}/build.xml}/
-stringAttribute key=org.eclipse.ui.externaltools.ATTR_RUN_BUILD_KINDS 
value=full,incremental,auto,clean,/
-booleanAttribute key=org.eclipse.ui.externaltools.ATTR_TRIGGERS_CONFIGURED 
value=true/
-stringAttribute key=org.eclipse.ui.externaltools.ATTR_WORKING_DIRECTORY 
value=$${workspace_loc:/${eclipse.project.name}}/
-/launchConfiguration]]
-/echo
   /target
 
   pathconvert property=eclipse.project.name

svn commit: r1220838 - /cassandra/trunk/build.xml

Author: eevans
Date: Mon Dec 19 16:49:30 2011
New Revision: 1220838

URL: http://svn.apache.org/viewvc?rev=1220838view=rev
Log:
remove ant builder; restore java builder

Patch by eevans; reviewed by Rick Shaw for CASSANDRA-3632

Modified:
cassandra/trunk/build.xml

Modified: cassandra/trunk/build.xml
URL: 
http://svn.apache.org/viewvc/cassandra/trunk/build.xml?rev=1220838r1=1220837r2=1220838view=diff
==
--- cassandra/trunk/build.xml (original)
+++ cassandra/trunk/build.xml Mon Dec 19 16:49:30 2011
@@ -1146,18 +1146,11 @@ url=${svn.entry.url}?pathrev=${svn.entry
   comment/comment
   projects
   /projects
-buildSpec
-  buildCommand
-nameorg.eclipse.ui.externaltools.ExternalToolBuilder/name
-triggersfull,incremental,auto,clean,/triggers
-arguments
-  dictionary
-keyLaunchConfigHandle/key
-
valuelt;projectgt;/.externalToolBuilders/Cassandra_Ant_Builder.launch/value
-  /dictionary
-/arguments
-  /buildCommand
-/buildSpec
+  buildSpec
+buildCommand
+  nameorg.eclipse.jdt.core.javabuilder/name
+/buildCommand
+  /buildSpec
   natures
 natureorg.eclipse.jdt.core.javanature/nature
   /natures
@@ -1205,31 +1198,6 @@ url=${svn.entry.url}?pathrev=${svn.entry
echo.perform();  
]] /script
 mkdir dir=.settings /
-mkdir dir=.externalToolBuilders /
-echo 
file=.externalToolBuilders/Cassandra_Ant_Builder.launch![CDATA[?xml 
version=1.0 encoding=UTF-8 standalone=no?
-launchConfiguration type=org.eclipse.ant.AntBuilderLaunchConfigurationType
-stringAttribute key=org.eclipse.ant.ui.ATTR_ANT_AFTER_CLEAN_TARGETS 
value=build-test,/
-stringAttribute key=org.eclipse.ant.ui.ATTR_ANT_AUTO_TARGETS 
value=build-test,/
-stringAttribute key=org.eclipse.ant.ui.ATTR_ANT_CLEAN_TARGETS 
value=clean/
-stringAttribute key=org.eclipse.ant.ui.ATTR_ANT_MANUAL_TARGETS 
value=build-test,/
-booleanAttribute key=org.eclipse.ant.ui.ATTR_TARGETS_UPDATED value=true/
-booleanAttribute key=org.eclipse.ant.ui.DEFAULT_VM_INSTALL value=false/
-listAttribute key=org.eclipse.debug.core.MAPPED_RESOURCE_PATHS
-listEntry value=/${eclipse.project.name}/build.xml/
-/listAttribute
-listAttribute key=org.eclipse.debug.core.MAPPED_RESOURCE_TYPES
-listEntry value=1/
-/listAttribute
-booleanAttribute key=org.eclipse.debug.ui.ATTR_LAUNCH_IN_BACKGROUND 
value=false/
-stringAttribute key=org.eclipse.jdt.launching.CLASSPATH_PROVIDER 
value=org.eclipse.ant.ui.AntClasspathProvider/
-booleanAttribute key=org.eclipse.jdt.launching.DEFAULT_CLASSPATH 
value=true/
-stringAttribute key=org.eclipse.jdt.launching.PROJECT_ATTR 
value=${eclipse.project.name}/
-stringAttribute key=org.eclipse.ui.externaltools.ATTR_LOCATION 
value=$${workspace_loc:/${eclipse.project.name}/build.xml}/
-stringAttribute key=org.eclipse.ui.externaltools.ATTR_RUN_BUILD_KINDS 
value=full,incremental,auto,clean,/
-booleanAttribute key=org.eclipse.ui.externaltools.ATTR_TRIGGERS_CONFIGURED 
value=true/
-stringAttribute key=org.eclipse.ui.externaltools.ATTR_WORKING_DIRECTORY 
value=$${workspace_loc:/${eclipse.project.name}}/
-/launchConfiguration]]
-/echo
   /target
 
   pathconvert property=eclipse.project.name

svn commit: r1220839 - /cassandra/trunk/src/java/org/apache/cassandra/cql/QueryProcessor.java

Author: eevans
Date: Mon Dec 19 16:50:08 2011
New Revision: 1220839

URL: http://svn.apache.org/viewvc?rev=1220839view=rev
Log:
properly report number of markers in a statement

Patch by eevans; reviewed by Rick Shaw for CASSANDRA-2475

Modified:
cassandra/trunk/src/java/org/apache/cassandra/cql/QueryProcessor.java

Modified: cassandra/trunk/src/java/org/apache/cassandra/cql/QueryProcessor.java
URL: 
http://svn.apache.org/viewvc/cassandra/trunk/src/java/org/apache/cassandra/cql/QueryProcessor.java?rev=1220839r1=1220838r2=1220839view=diff
==
--- cassandra/trunk/src/java/org/apache/cassandra/cql/QueryProcessor.java 
(original)
+++ cassandra/trunk/src/java/org/apache/cassandra/cql/QueryProcessor.java Mon 
Dec 19 16:50:08 2011
@@ -1109,7 +1109,7 @@ public class QueryProcessor
 if (logger.isTraceEnabled())
 logger.trace(String.format(Stored prepared statement #%d with %d 
bind markers,
statementId,
-   clientState.getPrepared().size()));
+   statement.boundTerms));
 
 return new CqlPreparedResult(statementId, statement.boundTerms);
 }

svn commit: r1220840 - in /cassandra/trunk/src/java/org/apache/cassandra/cql: CQLStatement.java Cql.g QueryProcessor.java Term.java

Author: eevans
Date: Mon Dec 19 16:50:35 2011
New Revision: 1220840

URL: http://svn.apache.org/viewvc?rev=1220840view=rev
Log:
index bind markers using parser

Patch by eevans; reviewed by Rick Shaw for CASSANDRA-2475

Modified:
cassandra/trunk/src/java/org/apache/cassandra/cql/CQLStatement.java
cassandra/trunk/src/java/org/apache/cassandra/cql/Cql.g
cassandra/trunk/src/java/org/apache/cassandra/cql/QueryProcessor.java
cassandra/trunk/src/java/org/apache/cassandra/cql/Term.java

Modified: cassandra/trunk/src/java/org/apache/cassandra/cql/CQLStatement.java
URL: 
http://svn.apache.org/viewvc/cassandra/trunk/src/java/org/apache/cassandra/cql/CQLStatement.java?rev=1220840r1=1220839r2=1220840view=diff
==
--- cassandra/trunk/src/java/org/apache/cassandra/cql/CQLStatement.java 
(original)
+++ cassandra/trunk/src/java/org/apache/cassandra/cql/CQLStatement.java Mon Dec 
19 16:50:35 2011
@@ -26,9 +26,10 @@ public class CQLStatement
 public Object statement;
 public int boundTerms = 0;
 
-public CQLStatement(StatementType type, Object statement)
+public CQLStatement(StatementType type, Object statement, int lastMarker)
 {
 this.type = type;
 this.statement = statement;
+this.boundTerms = lastMarker + 1;
 }
 }

Modified: cassandra/trunk/src/java/org/apache/cassandra/cql/Cql.g
URL: 
http://svn.apache.org/viewvc/cassandra/trunk/src/java/org/apache/cassandra/cql/Cql.g?rev=1220840r1=1220839r2=1220840view=diff
==
--- cassandra/trunk/src/java/org/apache/cassandra/cql/Cql.g (original)
+++ cassandra/trunk/src/java/org/apache/cassandra/cql/Cql.g Mon Dec 19 16:50:35 
2011
@@ -42,6 +42,7 @@ options {
 
 @members {
 private ListString recognitionErrors = new ArrayListString();
+private int currentBindMarkerIdx = -1;
 
 public void displayRecognitionError(String[] tokenNames, 
RecognitionException e)
 {
@@ -111,20 +112,20 @@ options {
 }
 
 query returns [CQLStatement stmnt]
-: selectStatement   { $stmnt = new CQLStatement(StatementType.SELECT, 
$selectStatement.expr); }
-| insertStatement endStmnt { $stmnt = new 
CQLStatement(StatementType.INSERT, $insertStatement.expr); }
-| updateStatement endStmnt { $stmnt = new 
CQLStatement(StatementType.UPDATE, $updateStatement.expr); }
-| batchStatement { $stmnt = new CQLStatement(StatementType.BATCH, 
$batchStatement.expr); }
-| useStatement  { $stmnt = new CQLStatement(StatementType.USE, 
$useStatement.keyspace); }
-| truncateStatement { $stmnt = new CQLStatement(StatementType.TRUNCATE, 
$truncateStatement.cf); }
-| deleteStatement endStmnt { $stmnt = new 
CQLStatement(StatementType.DELETE, $deleteStatement.expr); }
-| createKeyspaceStatement { $stmnt = new 
CQLStatement(StatementType.CREATE_KEYSPACE, $createKeyspaceStatement.expr); }
-| createColumnFamilyStatement { $stmnt = new 
CQLStatement(StatementType.CREATE_COLUMNFAMILY, 
$createColumnFamilyStatement.expr); }
-| createIndexStatement { $stmnt = new 
CQLStatement(StatementType.CREATE_INDEX, $createIndexStatement.expr); }
-| dropIndexStatement   { $stmnt = new 
CQLStatement(StatementType.DROP_INDEX, $dropIndexStatement.expr); }
-| dropKeyspaceStatement { $stmnt = new 
CQLStatement(StatementType.DROP_KEYSPACE, $dropKeyspaceStatement.ksp); }
-| dropColumnFamilyStatement { $stmnt = new 
CQLStatement(StatementType.DROP_COLUMNFAMILY, $dropColumnFamilyStatement.cfam); 
}
-| alterTableStatement { $stmnt = new 
CQLStatement(StatementType.ALTER_TABLE, $alterTableStatement.expr); }
+: selectStatement   { $stmnt = new CQLStatement(StatementType.SELECT, 
$selectStatement.expr, currentBindMarkerIdx); }
+| insertStatement endStmnt { $stmnt = new 
CQLStatement(StatementType.INSERT, $insertStatement.expr, 
currentBindMarkerIdx); }
+| updateStatement endStmnt { $stmnt = new 
CQLStatement(StatementType.UPDATE, $updateStatement.expr, 
currentBindMarkerIdx); }
+| batchStatement { $stmnt = new CQLStatement(StatementType.BATCH, 
$batchStatement.expr, currentBindMarkerIdx); }
+| useStatement  { $stmnt = new CQLStatement(StatementType.USE, 
$useStatement.keyspace, currentBindMarkerIdx); }
+| truncateStatement { $stmnt = new CQLStatement(StatementType.TRUNCATE, 
$truncateStatement.cf, currentBindMarkerIdx); }
+| deleteStatement endStmnt { $stmnt = new 
CQLStatement(StatementType.DELETE, $deleteStatement.expr, 
currentBindMarkerIdx); }
+| createKeyspaceStatement { $stmnt = new 
CQLStatement(StatementType.CREATE_KEYSPACE, $createKeyspaceStatement.expr, 
currentBindMarkerIdx); }
+| createColumnFamilyStatement { $stmnt = new 
CQLStatement(StatementType.CREATE_COLUMNFAMILY, 
$createColumnFamilyStatement.expr, currentBindMarkerIdx); }
+| createIndexStatement { $stmnt = new 
CQLStatement(StatementType.CREATE_INDEX,

svn commit: r1220843 - /cassandra/trunk/src/java/org/apache/cassandra/cql/Term.java

Author: eevans
Date: Mon Dec 19 16:55:52 2011
New Revision: 1220843

URL: http://svn.apache.org/viewvc?rev=1220843view=rev
Log:
clean up Term ctors

Patch by eevans; reviewed by Rick Shaw for CASSANDRA-2475

Modified:
cassandra/trunk/src/java/org/apache/cassandra/cql/Term.java

Modified: cassandra/trunk/src/java/org/apache/cassandra/cql/Term.java
URL: 
http://svn.apache.org/viewvc/cassandra/trunk/src/java/org/apache/cassandra/cql/Term.java?rev=1220843r1=1220842r2=1220843view=diff
==
--- cassandra/trunk/src/java/org/apache/cassandra/cql/Term.java (original)
+++ cassandra/trunk/src/java/org/apache/cassandra/cql/Term.java Mon Dec 19 
16:55:52 2011
@@ -37,7 +37,13 @@ public class Term
 private final String text;
 private final TermType type;
 private Integer bindIndex = -1;
-
+
+public Term(String text, TermType type)
+{
+this.text = text == null ?  : text;
+this.type = type;
+}
+
 /**
  * Create new Term instance from a string, and an integer that corresponds
  * with the token ID from CQLParser.
@@ -47,28 +53,19 @@ public class Term
  */
 public Term(String text, int type)
 {
-this.text = text == null ?  : text;
-this.type = TermType.forInt(type);
-}
-
-public Term(String text, TermType type)
-{
-this.text = text == null ?  : text;
-this.type = type;
+this(text == null ?  : text, TermType.forInt(type));
 }
 
 public Term(long value, TermType type)
 {
-this.text = String.valueOf(value);
-this.type = type;
+this(String.valueOf(value), type);
 }
-
+
 protected Term()
 {
-this.text = ;
-this.type = TermType.STRING;
+this(, TermType.STRING);
 }
-
+
 public Term(String text, int type, int index)
 {
 this(text, type);

svn commit: r1220847 - /cassandra/trunk/src/java/org/apache/cassandra/db/ColumnFamilyStore.java

2011-12-19 Thread jbellis

Author: jbellis
Date: Mon Dec 19 17:03:13 2011
New Revision: 1220847

URL: http://svn.apache.org/viewvc?rev=1220847view=rev
Log:
clarify that loadNewSSTables doesn't 'rebuild' indexes, just adds entries for 
the new data

Modified:
cassandra/trunk/src/java/org/apache/cassandra/db/ColumnFamilyStore.java

Modified: 
cassandra/trunk/src/java/org/apache/cassandra/db/ColumnFamilyStore.java
URL: 
http://svn.apache.org/viewvc/cassandra/trunk/src/java/org/apache/cassandra/db/ColumnFamilyStore.java?rev=1220847r1=1220846r2=1220847view=diff
==
--- cassandra/trunk/src/java/org/apache/cassandra/db/ColumnFamilyStore.java 
(original)
+++ cassandra/trunk/src/java/org/apache/cassandra/db/ColumnFamilyStore.java Mon 
Dec 19 17:03:13 2011
@@ -577,10 +577,9 @@ public class ColumnFamilyStore implement
 return;
 }
 
-logger.info(Loading new SSTable Set for  + table.name + / + 
columnFamily + :  + sstables);
+logger.info(Loading new SSTables and building secondary indexes for  
+ table.name + / + columnFamily + :  + sstables);
 SSTableReader.acquireReferences(sstables);
 data.addSSTables(sstables); // this will call updateCacheSizes() for us
-logger.info(Requesting a full secondary index re-build for  + 
table.name + / + columnFamily);
 try
 {
 indexManager.maybeBuildSecondaryIndexes(sstables, 
indexManager.getIndexedColumns());

[jira] [Commented] (CASSANDRA-2474) CQL support for compound columns

2011-12-19 Thread Jonathan Ellis (Commented) (JIRA)

[
https://issues.apache.org/jira/browse/CASSANDRA-2474?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=13172398#comment-13172398
]

Jonathan Ellis commented on CASSANDRA-2474:
---

bq. In the non-sparse case you always would always ignore the column value?

No, it would be associated with the last column definition.

bq. for users who are coming from thrift they will be asking how do i access
data from my current data model?

bq. Requiring a ALTER when you may not know what columns you have is too
restrictive. Example, a ETL from a 3rd party manufacturer that provides a
custom set of attributes per product: some standard (Unit Price, Model, Color,
etc) some specific (DPI, Shipping Size, Contrast Ratio).

But that's exactly when you *do* know what columns you have. (We're totally
fine with having most sparse columns be null.) What we can't do is query
undefined columns. *This is inherent to any transposition approach*, even
the earlier ones above. The only way we can support that is by destructuring
each column into a resultset of (key, columnname, columnvalue).

I can't think of a single example where wide rows with actual different
structure (as opposed to just sparse columns as in your example here) is the
Right Way to do things. If this is actually necessary though then I think we
should add separate syntax for the destructuring approach.

bq. what if a user wants to access data in composite form and raw
[non-transposed] mode, should we support multiple views on the CF

No. Nested-but-not-transposed data aka documents is another separate case.

CQL support for compound columns

Attachments: screenshot-1.jpg, screenshot-2.jpg

[jira] [Commented] (CASSANDRA-3571) make stream throttling configurable at runtime with nodetool

2011-12-19 Thread Yuki Morishita (Commented) (JIRA)


[ 
https://issues.apache.org/jira/browse/CASSANDRA-3571?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=13172399#comment-13172399
 ] 

Yuki Morishita commented on CASSANDRA-3571:
---

Peter,

Thanks for the update.
I think it is better to have getter because when I look up 
StreamThroughputMbPerSec value using jconsole, I cannot see what current value 
is set.

 make stream throttling configurable at runtime with nodetool
 

 Key: CASSANDRA-3571
 URL: https://issues.apache.org/jira/browse/CASSANDRA-3571
 Project: Cassandra
  Issue Type: Improvement
Reporter: Peter Schuller
Assignee: Peter Schuller
Priority: Minor
 Attachments: CASSANDRA-3571-1.0-rebased.txt, CASSANDRA-3571-1.0.txt


 Attaching patch that does this, against 1.0.

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: 
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] [Updated] (CASSANDRA-2749) fine-grained control over data directories

2011-12-19 Thread Sylvain Lebresne (Updated) (JIRA)


 [ 
https://issues.apache.org/jira/browse/CASSANDRA-2749?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Sylvain Lebresne updated CASSANDRA-2749:


Attachment: 0002-fix-unit-tests-v2.patch
0001-2749-v2.patch


Patch attached (0001-2749-v2.patch and 0002-fix-unit-tests-v2.patch) that:
# only support the new layout with sstables in cf directories
# migrate automagically files the first time
# adds the keyspace into the file name (to not rely on the directory sstables 
are in)
# limit keyspace and cf names to 32 characters

During the initial upgrade, we also check if the user would be fine with the 
current keyspace and cf name. If not, we just refuse to start. I hope this 
won't happen to anyone because renaming a CF/Keyspace in a rolling fashion is 
not something fun (or even possible for that matter). Note that this check 
don't fully enforce the 32 chars limitation however but rather tries to be as 
permissive as possible checking that any file path resulting for the upgrade 
should be less than 256 chars (the windows limit).

PS: the first patch is the bulk of the change, the second one is the unit 
tests. The later is huge because it renames the sstables in test/data to 
include the keyspace name in them.


 fine-grained control over data directories
 --

 Key: CASSANDRA-2749
 URL: https://issues.apache.org/jira/browse/CASSANDRA-2749
 Project: Cassandra
  Issue Type: New Feature
  Components: Core
Reporter: Jonathan Ellis
Priority: Minor
 Fix For: 1.1

 Attachments: 0001-2749-v2.patch, 
 0001-Make-it-possible-to-put-column-families-in-subdirect.patch, 
 0001-add-new-directory-layout.patch, 
 0001-non-backwards-compatible-patch-for-2749-putting-cfs-.patch.gz, 
 0002-fix-unit-tests-v2.patch, 0002-fix-unit-tests.patch, 2749.tar.gz, 
 2749_backwards_compatible_v1.patch, 2749_backwards_compatible_v2.patch, 
 2749_backwards_compatible_v3.patch, 2749_backwards_compatible_v4.patch, 
 2749_backwards_compatible_v4_rebase1.patch, 2749_not_backwards.tar.gz, 
 2749_proper.tar.gz


 Currently Cassandra supports multiple data directories but no way to control 
 what sstables are placed where. Particularly for systems with mixed SSDs and 
 rotational disks, it would be nice to pin frequently accessed columnfamilies 
 to the SSDs.
 Postgresql does this with tablespaces 
 (http://www.postgresql.org/docs/9.0/static/manage-ag-tablespaces.html) but we 
 should probably avoid using that name because of confusing similarity to 
 keyspaces.

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: 
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] [Commented] (CASSANDRA-2474) CQL support for compound columns

2011-12-19 Thread T Jake Luciani (Commented) (JIRA)


[ 
https://issues.apache.org/jira/browse/CASSANDRA-2474?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=13172416#comment-13172416
 ] 

T Jake Luciani commented on CASSANDRA-2474:
---

bq. But that's exactly when you do know what columns you have.

In my example, when a new column is added to the file and inserted by the 
loader, it's hidden from view till someone explicitly adds it as a sparse 
column.  That makes us no longer schemaless.  

bq. Nested-but-not-transposed data aka documents is another separate case.
 
This is the case I'm thinking of then.  Would this be handled in CQL or a 
document api?


 CQL support for compound columns
 

 Key: CASSANDRA-2474
 URL: https://issues.apache.org/jira/browse/CASSANDRA-2474
 Project: Cassandra
  Issue Type: New Feature
  Components: API, Core
Reporter: Eric Evans
Assignee: Pavel Yaskevich
  Labels: cql
 Fix For: 1.1

 Attachments: screenshot-1.jpg, screenshot-2.jpg


 For the most part, this boils down to supporting the specification of 
 compound column names (the CQL syntax is colon-delimted terms), and then 
 teaching the decoders (drivers) to create structures from the results.

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: 
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] [Commented] (CASSANDRA-3632) using an ant builder in Eclipse is painful


[ 
https://issues.apache.org/jira/browse/CASSANDRA-3632?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=13172422#comment-13172422
 ] 

Hudson commented on CASSANDRA-3632:
---

Integrated in Cassandra #1260 (See 
[https://builds.apache.org/job/Cassandra/1260/])
remove ant builder; restore java builder

Patch by eevans; reviewed by Rick Shaw for CASSANDRA-3632

eevans : http://svn.apache.org/viewcvs.cgi/?root=Apache-SVNview=revrev=1220838
Files : 
* /cassandra/trunk/build.xml


 using an ant builder in Eclipse is painful
 --

 Key: CASSANDRA-3632
 URL: https://issues.apache.org/jira/browse/CASSANDRA-3632
 Project: Cassandra
  Issue Type: Bug
  Components: Packaging, Tools
Affects Versions: 1.0.6
Reporter: Eric Evans
Assignee: Eric Evans
Priority: Minor
 Attachments: 
 v1-0001-CASSANDRA-3632-remove-ant-builder-restore-java-builder.txt


 The {{generate-eclipse-files}} target creates project files that use an Ant 
 builder.  Besides being painfully slow (I've had the runs stack up behind 
 frequent saves), many of Eclipses errors and warnings do not show unless an 
 internal builder is used.

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: 
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] [Commented] (CASSANDRA-2475) Prepared statements


[ 
https://issues.apache.org/jira/browse/CASSANDRA-2475?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=13172423#comment-13172423
 ] 

Hudson commented on CASSANDRA-2475:
---

Integrated in Cassandra #1260 (See 
[https://builds.apache.org/job/Cassandra/1260/])
clean up Term ctors

Patch by eevans; reviewed by Rick Shaw for CASSANDRA-2475
index bind markers using parser

Patch by eevans; reviewed by Rick Shaw for CASSANDRA-2475
properly report number of markers in a statement

Patch by eevans; reviewed by Rick Shaw for CASSANDRA-2475

eevans : http://svn.apache.org/viewcvs.cgi/?root=Apache-SVNview=revrev=1220843
Files : 
* /cassandra/trunk/src/java/org/apache/cassandra/cql/Term.java

eevans : http://svn.apache.org/viewcvs.cgi/?root=Apache-SVNview=revrev=1220840
Files : 
* /cassandra/trunk/src/java/org/apache/cassandra/cql/CQLStatement.java
* /cassandra/trunk/src/java/org/apache/cassandra/cql/Cql.g
* /cassandra/trunk/src/java/org/apache/cassandra/cql/QueryProcessor.java
* /cassandra/trunk/src/java/org/apache/cassandra/cql/Term.java

eevans : http://svn.apache.org/viewcvs.cgi/?root=Apache-SVNview=revrev=1220839
Files : 
* /cassandra/trunk/src/java/org/apache/cassandra/cql/QueryProcessor.java


 Prepared statements
 ---

 Key: CASSANDRA-2475
 URL: https://issues.apache.org/jira/browse/CASSANDRA-2475
 Project: Cassandra
  Issue Type: New Feature
  Components: API, Core
Affects Versions: 1.0.5
Reporter: Eric Evans
Assignee: Rick Shaw
Priority: Minor
  Labels: cql
 Fix For: 1.1

 Attachments: 2475-v1.patch, 2475-v2.patch, 2475-v3.1.patch, 
 2475-v3.2-Thrift.patch, v1-0001-CASSANDRA-2475-prepared-statement-patch.txt, 
 v1-0002-regenerated-thrift-java.txt, 
 v10-0001-CASSANDRA-2475-properly-report-number-of-markers-in-a-.txt, 
 v10-0002-index-bind-markers-using-parser.txt, 
 v10-0003-clean-up-Term-ctors.txt, 
 v2-0001-CASSANDRA-2475-rickshaw-2475-v3.1.patch.txt, 
 v2-0002-rickshaw-2475-v3.2-Thrift.patch-w-changes.txt, 
 v2-0003-eevans-increment-thrift-version-by-1-not-3.txt, 
 v2-0004-eevans-misc-cleanups.txt, 
 v2-0005-eevans-refactor-for-better-encapsulation-of-prepare.txt, 
 v2-0006-eevans-log-queries-at-TRACE.txt, 
 v2-0007-use-an-LRU-map-for-storage-of-prepared-statements.txt




--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: 
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] [Created] (CASSANDRA-3647) Support arbitrarily nested documents

2011-12-19 Thread Jonathan Ellis (Created) (JIRA)

Support arbitrarily nested documents
--

 Key: CASSANDRA-3647
 URL: https://issues.apache.org/jira/browse/CASSANDRA-3647
 Project: Cassandra
  Issue Type: New Feature
  Components: API, Core
Reporter: Jonathan Ellis


Composite columns introduce the ability to have arbitrarily nested data in a 
Cassandra row.  We should expose this through CQL.

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: 
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] [Commented] (CASSANDRA-3647) Support arbitrarily nested documents

2011-12-19 Thread Jonathan Ellis (Commented) (JIRA)


[ 
https://issues.apache.org/jira/browse/CASSANDRA-3647?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=13172424#comment-13172424
 ] 

Jonathan Ellis commented on CASSANDRA-3647:
---

Ed's AnyType is useful here, since you really need self describing data for 
documents: CASSANDRA-3281

 Support arbitrarily nested documents
 --

 Key: CASSANDRA-3647
 URL: https://issues.apache.org/jira/browse/CASSANDRA-3647
 Project: Cassandra
  Issue Type: New Feature
  Components: API, Core
Reporter: Jonathan Ellis

 Composite columns introduce the ability to have arbitrarily nested data in a 
 Cassandra row.  We should expose this through CQL.

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: 
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] [Commented] (CASSANDRA-2474) CQL support for compound columns

2011-12-19 Thread Jonathan Ellis (Commented) (JIRA)

[
https://issues.apache.org/jira/browse/CASSANDRA-2474?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=13172426#comment-13172426
]

Jonathan Ellis commented on CASSANDRA-2474:
---

bq. In my example, when a new column is added to the file and inserted by the
loader, it's hidden from view till someone explicitly adds it as a sparse
column. That makes us no longer schemaless.

Right. But schemaless is a non-feature; painless schema is what people
care about. (1000 columns? No problem! ALTER without rewriting your data? No
problem!)

bq. This is the case I'm thinking of then. Would this be handled in CQL or a
document api?

I think it should be CQL, although we can support additional APIs on top of
that. CASSANDRA-3647

CQL support for compound columns

Attachments: screenshot-1.jpg, screenshot-2.jpg

[jira] [Commented] (CASSANDRA-2474) CQL support for compound columns

2011-12-19 Thread T Jake Luciani (Commented) (JIRA)


[ 
https://issues.apache.org/jira/browse/CASSANDRA-2474?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=13172437#comment-13172437
 ] 

T Jake Luciani commented on CASSANDRA-2474:
---

ok +1

On the Hive side we can support the same semantics, however achieving the same 
syntax will be hard.  On the other hand since this is now DDL and no longer DML 
I think it's not a big deal.



 CQL support for compound columns
 

 Key: CASSANDRA-2474
 URL: https://issues.apache.org/jira/browse/CASSANDRA-2474
 Project: Cassandra
  Issue Type: New Feature
  Components: API, Core
Reporter: Eric Evans
Assignee: Pavel Yaskevich
  Labels: cql
 Fix For: 1.1

 Attachments: screenshot-1.jpg, screenshot-2.jpg


 For the most part, this boils down to supporting the specification of 
 compound column names (the CQL syntax is colon-delimted terms), and then 
 teaching the decoders (drivers) to create structures from the results.

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: 
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] [Commented] (CASSANDRA-3641) inconsistent/corrupt counters w/ broken shards never converge

2011-12-19 Thread Peter Schuller (Commented) (JIRA)

[
https://issues.apache.org/jira/browse/CASSANDRA-3641?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=13172436#comment-13172436
]

Peter Schuller commented on CASSANDRA-3641:
---

I'll fix the comment (it was written before I understood fully the role of
deltas).

As for the JMX counter: I kind of see your concern, but at the same time - most
people that have monitoring of Cassandra at all will have setups to easily
monitor/graph/alert on JMX exposed values and I really think it's a shame if we
can't add additional instrumentation because it would confuse users.

How about putting it somewhere else, where it's clearly nothing you need to
worry about normally? I actually had an original patch before I submitted
upstream where I had created a separate MBean I called RedFlags because I
found no good place to put the counter. The idea was that it felt completely
overkill to have a dedicated MBean for the purpose, but at the same time I
really wanted it accounted for. RedFlags was intended as a place to put
counters that you essentially always expect to be exactly 0 during healthy
production use.

I could see putting more stuff there like exception counts in places where any
exception indicates a sever problem, or a count of out of disk space conditions
preventing or affecting (different bucket) compaction, or a count of GC pauses
above a certain threshold, etc.

If you agree I'll volunteer to go through and add some things I can think of,
along with this count.

Else I can certainly re-submit without the JMX counter. Or just submit a
separate JIRA for it (but that's only worth it if you might be okay with a
RedFlags style approach and it's not just this one counter).

inconsistent/corrupt counters w/ broken shards never converge
-

Key: CASSANDRA-3641
URL: https://issues.apache.org/jira/browse/CASSANDRA-3641
Project: Cassandra
Issue Type: Bug
Reporter: Peter Schuller
Assignee: Peter Schuller
Attachments: 3641-0.8-internal-not-for-inclusion.txt, 3641-trunk.txt

We ran into a case (which MIGHT be related to CASSANDRA-3070) whereby we had
counters that were corrupt (hopefully due to CASSANDRA-3178). The corruption
was that there would exist shards with the *same* node_id, *same* clock id,
but *different* counts.
The counter column diffing and reconciliation code assumes that this never
happens, and ignores the count. The problem with this is that if there is an
inconsistency, the result of a reconciliation will depend on the order of the
shards.
In our case for example, we would see the value of the counter randomly
fluctuating on a CL.ALL read, but we would get consistent (whatever the node
had) on CL.ONE (submitted to one of the nodes in the replica set for the key).
In addition, read repair would not work despite digest mismatches because the
diffing algorithm also did not care about the counts when determining the
differences to send.
I'm attaching patches that fixes this. The first patch is against our 0.8
branch, which is not terribly useful to people, but I include it because it
is the well-tested version that we have used on the production cluster which
was subject to this corruption.
The other patch is against trunk, and contains the same change.
What the patch does is:
* On diffing, treat as DISJOINT if there is a count discrepancy.
* On reconciliation, look at the count and *deterministically* pick the
higher one, and:
** log the fact that we detected a corrupt counter
** increment a JMX observable counter for monitoring purposes
A cluster which is subject to such corruption and has this patch, will fix
itself with and AES + compact (or just repeated compactions assuming the
replicate-on-compact is able to deliver correctly).

[jira] [Created] (CASSANDRA-3648) Repair should validate checksums before streaming

2011-12-19 Thread Jonathan Ellis (Created) (JIRA)

Repair should validate checksums before streaming
-

 Key: CASSANDRA-3648
 URL: https://issues.apache.org/jira/browse/CASSANDRA-3648
 Project: Cassandra
  Issue Type: New Feature
Reporter: Jonathan Ellis
Priority: Minor
 Fix For: 1.2


We have block checksums and sha1 full-file hashes, but currently we do not 
validate these before streaming in a repair.

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: 
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] [Commented] (CASSANDRA-3648) Repair should validate checksums before streaming

2011-12-19 Thread Jonathan Ellis (Commented) (JIRA)


[ 
https://issues.apache.org/jira/browse/CASSANDRA-3648?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=13172464#comment-13172464
 ] 

Jonathan Ellis commented on CASSANDRA-3648:
---

However, I'm unsure how useful this is without also validating during read 
repair.  Presumably we do this during the normal read path for block checksums, 
but full file hashes are too heavyweight for that.

 Repair should validate checksums before streaming
 -

 Key: CASSANDRA-3648
 URL: https://issues.apache.org/jira/browse/CASSANDRA-3648
 Project: Cassandra
  Issue Type: New Feature
Reporter: Jonathan Ellis
Priority: Minor
 Fix For: 1.2


 We have block checksums and sha1 full-file hashes, but currently we do not 
 validate these before streaming in a repair.

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: 
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] [Commented] (CASSANDRA-3592) Major Compaction Incredibly Slow

2011-12-19 Thread Jonathan Ellis (Commented) (JIRA)


[ 
https://issues.apache.org/jira/browse/CASSANDRA-3592?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=13172467#comment-13172467
 ] 

Jonathan Ellis commented on CASSANDRA-3592:
---

If you have a test system available, one way to make progress here would be to 
bisect the changelog to see what checkin caused the slowness.

 Major Compaction Incredibly Slow
 

 Key: CASSANDRA-3592
 URL: https://issues.apache.org/jira/browse/CASSANDRA-3592
 Project: Cassandra
  Issue Type: Bug
  Components: Core
Affects Versions: 1.0.3
 Environment: RHEL6 - 24 core machines 24 GB mem total, 11 GB java heap
 java version 1.6.0_26
 6 node cluster (4@0.8.6, 2@1.0.3)
Reporter: Dan Hendry
  Labels: compaction

 Twice now (on different nodes), I have observed major compaction for certain 
 column families take *significantly* longer on 1.0.3 in comparison to 0.8.6. 
 For example,
 On the 0.8.6 node, the post compaction log message:
 {noformat}CompactionManager.java (line 608) Compacted to XXX. 339,164,959,170 
 to 158,825,469,883 (~46% of original) bytes for 25,996 keys.  Time: 
 26,934,317ms.{noformat}
 On the 1.0.3 node, the post compaction log message:
 {noformat} CompactionTask.java (line 213) Compacted to [XXX].  
 222,338,354,529 to 147,751,403,084 (~66% of original) bytes for 26,100 keys 
 at 0.562045MB/s. Time: 250,703,563ms.{noformat}
 So... literally an order of magnitude slower on 1.0.3 in comparison to 0.8.6.
 Relevant configuration settings:
 * compaction_throughput_mb_per_sec: 0 (why? because the compaction throttling 
 logic as currently implemented is highly unsuitable for wide rows but thats a 
 different issue)
 * in_memory_compaction_limit_in_mb: 128
 Column family characteristics:
 * Many wide rows (~5% of rows greater than  10MB and hundreds of rows 
 greater than 100 MB, with many small columns).
 * Heavy use of expiring columns - each row represents data for a particular 
 hour so typically all columns in the row will expire together.
 * The significant size shrinkage as reported by the log messages is due 
 mainly to expired data being cleaned up (I typically trigger major compaction 
 when 30-50% of the on disk data has expired which is about once every 3 weeks 
 per node).
 * Perhaps obviously: size tiered compaction and no compression (the schema 
 has not changed since the partial upgrade to 1.0.x)
 * Standard column family
 Performance notes during compaction:
 * Nice CPU usage and load average is basically the same between 0.8.6 and 
 1.0.3 - ie, compaction IS running and is not getting stalled or hung up 
 anywhere. 
 * Compaction is IO bound on the 0.8.6 machines - the disks see heavy, 
 constant utilization when compaction is running.
 * Compaction is uses virtually no IO on the 1.0.3 machines - disk utilization 
 is virtually no different when compacting vs not compacting (but at the same 
 time, CPU usage and load average clearly indicate that compaction IS running).
 Finally, I have not had time to profile more thoroughly but jconsole always 
 shows the following stacktrace for the active compaction thread (for the 
 1.0.3 machine):
 {noformat}
 Stack trace: 
  
 org.apache.cassandra.db.ColumnFamilyStore.removeDeletedStandard(ColumnFamilyStore.java:851)
 org.apache.cassandra.db.ColumnFamilyStore.removeDeletedColumnsOnly(ColumnFamilyStore.java:835)
 org.apache.cassandra.db.ColumnFamilyStore.removeDeleted(ColumnFamilyStore.java:826)
 org.apache.cassandra.db.compaction.PrecompactedRow.removeDeletedAndOldShards(PrecompactedRow.java:77)
 org.apache.cassandra.db.compaction.PrecompactedRow.init(PrecompactedRow.java:102)
 org.apache.cassandra.db.compaction.CompactionController.getCompactedRow(CompactionController.java:133)
 org.apache.cassandra.db.compaction.CompactionIterable$Reducer.getReduced(CompactionIterable.java:102)
 org.apache.cassandra.db.compaction.CompactionIterable$Reducer.getReduced(CompactionIterable.java:87)
 org.apache.cassandra.utils.MergeIterator$ManyToOne.consume(MergeIterator.java:116)
 org.apache.cassandra.utils.MergeIterator$ManyToOne.computeNext(MergeIterator.java:99)
 com.google.common.collect.AbstractIterator.tryToComputeNext(AbstractIterator.java:140)
 com.google.common.collect.AbstractIterator.hasNext(AbstractIterator.java:135)
 com.google.common.collect.Iterators$7.computeNext(Iterators.java:614)
 com.google.common.collect.AbstractIterator.tryToComputeNext(AbstractIterator.java:140)
 com.google.common.collect.AbstractIterator.hasNext(AbstractIterator.java:135)
 org.apache.cassandra.db.compaction.CompactionTask.execute(CompactionTask.java:172)
 org.apache.cassandra.db.compaction.CompactionManager$4.call(CompactionManager.java:277)
 java.util.concurrent.FutureTask$Sync.innerRun(FutureTask.java:303)

[jira] [Updated] (CASSANDRA-2261) During Compaction, Corrupt SSTables with rows that cause failures should be identified and blacklisted.


 [ 
https://issues.apache.org/jira/browse/CASSANDRA-2261?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Jonathan Ellis updated CASSANDRA-2261:
--

Reviewer: xedin  (was: stuhood)

 During Compaction, Corrupt SSTables with rows that cause failures should be 
 identified and blacklisted.
 ---

 Key: CASSANDRA-2261
 URL: https://issues.apache.org/jira/browse/CASSANDRA-2261
 Project: Cassandra
  Issue Type: Improvement
  Components: Core
Reporter: Benjamin Coverston
Assignee: Benjamin Coverston
Priority: Minor
  Labels: not_a_pony
 Fix For: 1.1

 Attachments: 2261-v2.patch, 2261.patch


 When a compaction of a set of SSTables fails because of corruption it will 
 continue to try to compact that SSTable causing pending compactions to build 
 up.
 One way to mitigate this problem would be to log the error, then identify the 
 specific SSTable that caused the failure, subsequently blacklisting that 
 SSTable and ensuring that it is no longer included in future compactions. For 
 this we could simply store the problematic SSTable's name in memory.
 If it's not possible to identify the SSTable that caused the issue, then 
 perhaps blacklisting the (ordered) permutation of SSTables to be compacted 
 together is something that can be done to solve this problem in a more 
 general case, and avoid issues where two (or more) SSTables have trouble 
 compacting a particular row. For this option we would probably want to store 
 the lists of the bad combinations in the system table somewhere s.t. these 
 can survive a node failure (there have been a few cases where I have seen a 
 compaction cause a node failure).

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: 
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] [Resolved] (CASSANDRA-3645) Can't delete row with cqlsh via row key

2011-12-19 Thread Jonathan Ellis (Resolved) (JIRA)


 [ 
https://issues.apache.org/jira/browse/CASSANDRA-3645?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Jonathan Ellis resolved CASSANDRA-3645.
---

Resolution: Duplicate

You're right, this is normal tombstone behavior.  (See CASSANDRA-2569 for an 
earlier example.)

 Can't delete row with cqlsh via row key
 ---

 Key: CASSANDRA-3645
 URL: https://issues.apache.org/jira/browse/CASSANDRA-3645
 Project: Cassandra
  Issue Type: Bug
  Components: Core
Affects Versions: 1.0.6
Reporter: Oleksandr Shyshko
  Labels: cql, cqlsh, delete

 This is probably not a bug, but standard tombstone/deletion behavior.
 Maybe it would be nice to have a built-in filter for tombstones, so they 
 won't appears in queries.
 Reproduce by:
 ==
 cqlsh CREATE KEYSPACE ss WITH strategy_class = 'SimpleStrategy' AND 
 strategy_options:replication_factor = 1;
 cqlsh use ss;
 cqlsh:ss create columnfamily users (name text primary key, pass text);
 cqlsh:ss select * from users;
 cqlsh:ss insert into users (name, pass) values ('john', 'secret');
 cqlsh:ss select * from users;
  name |   pass |
  john | secret |
 cqlsh:ss delete from users where name = 'john';
 cqlsh:ss select * from users;
  name |
  john |
 cqlsh:ss
 ==
 Desired behavior:
 ==
 cqlsh:ss delete from users where name = 'john';
 cqlsh:ss select * from users;
 cqlsh:ss
 ==

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: 
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] [Commented] (CASSANDRA-3615) CommitLog BufferOverflowException

2011-12-19 Thread Rick Branson (Commented) (JIRA)


[ 
https://issues.apache.org/jira/browse/CASSANDRA-3615?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=13172495#comment-13172495
 ] 

Rick Branson commented on CASSANDRA-3615:
-

Any hints on how to reproduce this one?

 CommitLog BufferOverflowException
 -

 Key: CASSANDRA-3615
 URL: https://issues.apache.org/jira/browse/CASSANDRA-3615
 Project: Cassandra
  Issue Type: Bug
  Components: Core
Affects Versions: 1.1
Reporter: Rick Branson
Assignee: Rick Branson
 Attachments: cl-buffer-overflow.patch


 Reported on mailing list 
 http://mail-archives.apache.org/mod_mbox/cassandra-dev/201112.mbox/%3CCAJHHpg2Rw_BWFJ9DycRGSYkmwMwrJDK3%3Dzw3HwRoutWHbUcULw%40mail.gmail.com%3E
 ERROR 14:07:31,215 Fatal exception in thread
 Thread[COMMIT-LOG-WRITER,5,main]
 java.nio.BufferOverflowException
 at java.nio.Buffer.nextPutIndex(Buffer.java:501)
 at java.nio.DirectByteBuffer.putInt(DirectByteBuffer.java:654)
 at
 org.apache.cassandra.db.commitlog.CommitLogSegment.write(CommitLogSegment.java:259)
 at
 org.apache.cassandra.db.commitlog.CommitLog$LogRecordAdder.run(CommitLog.java:568)
 at
 org.apache.cassandra.db.commitlog.PeriodicCommitLogExecutorService$1.runMayThrow(PeriodicCommitLogExecutorService.java:49)
 at org.apache.cassandra.utils.WrappedRunnable.run(WrappedRunnable.java:30)
 at java.lang.Thread.run(Thread.java:662)
  INFO 14:07:31,504 flushing high-traffic column family CFS(Keyspace='***',
 ColumnFamily='***') (estimated 103394287 bytes)
 It happened during a fairly standard load process using M/R.

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: 
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] [Commented] (CASSANDRA-1391) Allow Concurrent Schema Migrations

2011-12-19 Thread Jonathan Ellis (Commented) (JIRA)


[ 
https://issues.apache.org/jira/browse/CASSANDRA-1391?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=13172502#comment-13172502
 ] 

Jonathan Ellis commented on CASSANDRA-1391:
---

I'd rather do them together in this case, it's pretty hard to work in trunk w/o 
schema announce working.

 Allow Concurrent Schema Migrations
 --

 Key: CASSANDRA-1391
 URL: https://issues.apache.org/jira/browse/CASSANDRA-1391
 Project: Cassandra
  Issue Type: Improvement
  Components: Core
Affects Versions: 0.7.0
Reporter: Stu Hood
Assignee: Pavel Yaskevich
 Fix For: 1.1

 Attachments: 
 0001-new-migration-schema-and-avro-methods-cleanup.patch, 
 0002-avro-removal.patch, 0003-oldVersion-removed-nit-fixed.patch, 
 CASSANDRA-1391.patch


 CASSANDRA-1292 fixed multiple migrations started from the same node to 
 properly queue themselves, but it is still possible for migrations initiated 
 on different nodes to conflict and leave the cluster in a bad state. Since 
 the system_add/drop/rename methods are accessible directly from the client 
 API, they should be completely safe for concurrent use.
 It should be possible to allow for most types of concurrent migrations by 
 converting the UUID schema ID into a VersionVectorClock (as provided by 
 CASSANDRA-580).

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: 
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] [Updated] (CASSANDRA-1600) Merge get_indexed_slices with get_range_slices


 [ 
https://issues.apache.org/jira/browse/CASSANDRA-1600?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Jonathan Ellis updated CASSANDRA-1600:
--

Reviewer: jbellis  (was: slebresne)
Assignee: Sylvain Lebresne

 Merge get_indexed_slices with get_range_slices
 --

 Key: CASSANDRA-1600
 URL: https://issues.apache.org/jira/browse/CASSANDRA-1600
 Project: Cassandra
  Issue Type: New Feature
  Components: API
Reporter: Stu Hood
Assignee: Sylvain Lebresne
 Fix For: 1.1

 Attachments: 
 0001-Add-optional-FilterClause-to-KeyRange-and-support-do-v2.patch, 
 0001-Add-optional-FilterClause-to-KeyRange-and-support-doin.txt, 
 0002-allow-get_range_slices-to-apply-filter-to-a-sequenti-v2.patch, 
 0002-allow-get_range_slices-to-apply-filter-to-a-sequential.txt


 From a comment on 1157:
 {quote}
 IndexClause only has a start key for get_indexed_slices, but it would seem 
 that the reasoning behind using 'KeyRange' for get_range_slices applies there 
 as well, since if you know the range you care about in the primary index, you 
 don't want to continue scanning until you exhaust 'count' (or the cluster).
 Since it would appear that get_indexed_slices would benefit from a KeyRange, 
 why not smash get_(range|indexed)_slices together, and make IndexClause an 
 optional field on KeyRange?
 {quote}

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: 
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] [Updated] (CASSANDRA-2246) Enable Pig to use indexed data as described in CASSANDRA-2245


 [ 
https://issues.apache.org/jira/browse/CASSANDRA-2246?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Jonathan Ellis updated CASSANDRA-2246:
--

Description: 
in contrib/pig, add query parameters to CassandraStorage keyspace/column family 
string to specify column search predicates.

For example:
rows = LOAD 'cassandra://mykeyspace/mycolumnfamily?country=UK' using 
CassandraStorage();

This depends on CASSANDRA-1600

  was:
in contrib/pig, add query parameters to CassandraStorage keyspace/column family 
string to specify column search predicates.

For example:
rows = LOAD 'cassandra://mykeyspace/mycolumnfamily?country=UK' using 
CassandraStorage();

This depends on CASSANDRA-2245


 Enable Pig to use indexed data as described in CASSANDRA-2245
 -

 Key: CASSANDRA-2246
 URL: https://issues.apache.org/jira/browse/CASSANDRA-2246
 Project: Cassandra
  Issue Type: Improvement
  Components: Contrib
Affects Versions: 0.7.2
Reporter: Matt Kennedy
Priority: Minor
  Labels: hadoop
 Fix For: 1.1

   Original Estimate: 24h
  Remaining Estimate: 24h

 in contrib/pig, add query parameters to CassandraStorage keyspace/column 
 family string to specify column search predicates.
 For example:
 rows = LOAD 'cassandra://mykeyspace/mycolumnfamily?country=UK' using 
 CassandraStorage();
 This depends on CASSANDRA-1600

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: 
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] [Commented] (CASSANDRA-2392) Saving IndexSummaries to disk

2011-12-19 Thread Jonathan Ellis (Commented) (JIRA)


[ 
https://issues.apache.org/jira/browse/CASSANDRA-2392?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=13172509#comment-13172509
 ] 

Jonathan Ellis commented on CASSANDRA-2392:
---

To answer the question: yes, let's ignore caches here.  Would like to do this 
for 1.1 as well.

 Saving IndexSummaries to disk
 -

 Key: CASSANDRA-2392
 URL: https://issues.apache.org/jira/browse/CASSANDRA-2392
 Project: Cassandra
  Issue Type: Improvement
Reporter: Chris Goffinet
Assignee: Vijay
Priority: Minor
 Fix For: 1.1


 For nodes with millions of keys, doing rolling restarts that take over 10 
 minutes per node can be painful if you have 100 node cluster. All of our time 
 is spent on doing index summary computations on startup. It would be great if 
 we could save those to disk as well. Our indexes are quite large.

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: 
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] [Commented] (CASSANDRA-2893) Add row-level isolation

2011-12-19 Thread Brandon Williams (Commented) (JIRA)


[ 
https://issues.apache.org/jira/browse/CASSANDRA-2893?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=13172539#comment-13172539
 ] 

Brandon Williams commented on CASSANDRA-2893:
-

Sylvain, can you rebase for trunk? Thanks.

 Add row-level isolation
 ---

 Key: CASSANDRA-2893
 URL: https://issues.apache.org/jira/browse/CASSANDRA-2893
 Project: Cassandra
  Issue Type: Improvement
Reporter: Jonathan Ellis
Assignee: Sylvain Lebresne
Priority: Minor
 Fix For: 1.1

 Attachments: 0001-Move-deletion-infos-into-ISortedColumns.patch, 
 0002-Make-memtable-use-CF.addAll.patch, 
 0003-Add-AtomicSortedColumn-and-snapTree.patch, snaptree-0.1-SNAPSHOT.jar


 This could be done using an the atomic ConcurrentMap operations from the 
 Memtable and something like http://code.google.com/p/pcollections/ to replace 
 the ConcurrentSkipListMap in ThreadSafeSortedColumns.  The trick is that 
 pcollections does not provide a SortedMap, so we probably need to write our 
 own.
 Googling [persistent sortedmap] I found 
 http://code.google.com/p/actord/source/browse/trunk/actord/src/main/scala/ff/collection
  (in scala) and http://clojure.org/data_structures#Data Structures-Maps.

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: 
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

[cassandra-jdbc] push by wfs...@gmail.com - Resolve Issue #10. Missing support for Boolean in getBoolean()... on 2011-12-19 17:35 GMT

2011-12-19 Thread cassandra-jdbc . apache-extras . org


Revision: 2fbcd5bc3bb3
Author:   Rick Shaw wfs...@gmail.com
Date: Mon Dec 19 09:03:13 2011
Log:  Resolve Issue #10. Missing support for Boolean in getBoolean()

* Silly omission in getBoolean() method.
* Fixed same problem in getInteger().
* Tidied up other methods for consistency.
* Added a junit test for testing regressions.


Signed-off-by: Rick Shaw wfs...@gmail.com
http://code.google.com/a/apache-extras.org/p/cassandra-jdbc/source/detail?r=2fbcd5bc3bb3

Added:
 /src/test/java/org/apache/cassandra/cql/jdbc/JdbcRegressionTest.java
Modified:
 /src/main/java/org/apache/cassandra/cql/jdbc/CResultSet.java
 /src/test/java/org/apache/cassandra/cql/jdbc/DataSourceTest.java

===
--- /dev/null
+++ /src/test/java/org/apache/cassandra/cql/jdbc/JdbcRegressionTest.java	 
Mon Dec 19 09:03:13 2011

@@ -0,0 +1,94 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+package org.apache.cassandra.cql.jdbc;
+
+import static org.junit.Assert.*;
+
+import java.sql.DriverManager;
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.sql.Statement;
+
+import org.apache.cassandra.cql.ConnectionDetails;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class JdbcRegressionTest
+{
+private static java.sql.Connection con = null;
+
+@BeforeClass
+public static void setUpBeforeClass() throws Exception
+{
+Class.forName(org.apache.cassandra.cql.jdbc.CassandraDriver);
+con =  
DriverManager.getConnection(String.format(jdbc:cassandra://%s:%d/%s,

+ConnectionDetails.getHost(),
+ConnectionDetails.getPort(),
+JdbcTestKeyspace));
+Statement stmt = con.createStatement();
+
+// Create KeySpace
+String createKS = CREATE KEYSPACE 'JdbcTestKeyspace' WITH 
++ strategy_class = SimpleStrategy AND  
strategy_options:replication_factor = 1;;

+stmt.execute(createKS);
+
+// Create the target Column family
+String createCF = CREATE COLUMNFAMILY RegressionTest (KEY text  
PRIMARY KEY,

++ bValue boolean, 
++ iValue int 
++ ) WITH comparator = ascii AND  
default_validation = bigint;;

+
+
+stmt.execute(createCF);
+stmt.close();
+con.close();
+
+// open it up again to see the new CF
+con =  
DriverManager.getConnection(String.format(jdbc:cassandra://%s:%d/%s,

+ConnectionDetails.getHost(),
+ConnectionDetails.getPort(),
+JdbcTestKeyspace));
+
+}
+
+@Test
+public void testIssue10() throws Exception
+{
+String insert = INSERT INTO RegressionTest (KEY,bValue,iValue)  
VALUES( 'key0',true, 2000);;

+Statement statement = con.createStatement();
+
+statement.executeUpdate(insert);
+statement.close();
+
+Thread.sleep(3000);
+
+statement = con.createStatement();
+ResultSet result = statement.executeQuery(SELECT bValue,iValue  
FROM RegressionTest WHERE KEY=key0;);

+result.next();
+boolean b = result.getBoolean(1);
+System.out.println(b =+ b);
+assertTrue(b);
+int i = result.getInt(2);
+System.out.println(i =+ i);
+assertEquals(2000, i);
+   }
+
+}
===
--- /src/main/java/org/apache/cassandra/cql/jdbc/CResultSet.java	Fri Nov  4  
20:25:07 2011
+++ /src/main/java/org/apache/cassandra/cql/jdbc/CResultSet.java	Mon Dec 19  
09:03:13 2011

@@ -36,6 +36,27 @@
 import org.apache.cassandra.thrift.CqlRow;
 import org.apache.cassandra.utils.ByteBufferUtil;

+/**
+ * pThe Supported Data types in CQL are as follows:/p
+ * table
+ * trthtype/ththjava type/ththdescription/th/tr
+ * trtdascii/tdtdString/tdtdASCII character string/td/tr
+ * trtdbigint/tdtdLong/tdtd64-bit signed long/td/tr
+ * trtdblob/tdtdByteBuffer/tdtdArbitrary bytes (no  
validation)/td/tr

+ * trtdboolean/tdtdBoolean/tdtdtrue or false/td/tr
+ * trtdcounter/tdtdLong/tdtdCounter column (64-bit  
long)/td/tr
+ * trtddecimal/tdtdBigDecimal/tdtdVariable-precision  
decimal/td/tr
+ * trtddouble/tdtdDouble/tdtd64-bit

[jira] [Commented] (CASSANDRA-3610) Checksum improvement for CompressedRandomAccessReader

2011-12-19 Thread Sylvain Lebresne (Commented) (JIRA)


[ 
https://issues.apache.org/jira/browse/CASSANDRA-3610?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=13172543#comment-13172543
 ] 

Sylvain Lebresne commented on CASSANDRA-3610:
-

The patch lgtm, but pulling hadoop-commons for just that feels a bit too much 
imo. I'd prefer extracting the code from there (The initial patch were they 
added their optimized CRC32 was HADOOP-6148).

 Checksum improvement for CompressedRandomAccessReader
 -

 Key: CASSANDRA-3610
 URL: https://issues.apache.org/jira/browse/CASSANDRA-3610
 Project: Cassandra
  Issue Type: Improvement
  Components: Core
Affects Versions: 1.1
 Environment: JVM
Reporter: Vijay
Assignee: Vijay
Priority: Minor
 Fix For: 1.1

 Attachments: 0001-use-pure-java-CRC32.patch


 When compression is on, Currently we see checksum taking up about 40% of the 
 CPU more than snappy library.
 Looks like hadoop solved it by implementing their own checksum, we can either 
 use it or implement something like that.
 http://images.slidesharecdn.com/1toddlipconyanpeichen-cloudera-hadoopandperformance-final-10132228-phpapp01-slide-15-768.jpg?1321043717
 in our test env it provided 50% improvement over native implementation which 
 uses jni to call the OS.

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: 
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] [Commented] (CASSANDRA-3628) Make Pig/CassandraStorage delete functionality disabled by default and configurable

2011-12-19 Thread Brandon Williams (Commented) (JIRA)

[
https://issues.apache.org/jira/browse/CASSANDRA-3628?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=13172552#comment-13172552
]

Brandon Williams commented on CASSANDRA-3628:
-

It seems like 'glossing over nulls when deletes are disabled' might be a bad
idea, and make for very difficult to debug problems later.

Make Pig/CassandraStorage delete functionality disabled by default and
configurable
---

Key: CASSANDRA-3628
URL: https://issues.apache.org/jira/browse/CASSANDRA-3628
Project: Cassandra
Issue Type: Task
Reporter: Jeremy Hanna
Assignee: Jeremy Hanna
Labels: pig
Fix For: 1.0.7, 1.1

Attachments: 3628.txt

Right now, there is a way to delete column with the CassandraStorage
loadstorefunc. In practice it is a bad idea to have that enabled by default.
A scenario: do an outer join and you don't have a value for something and
then you write out to cassandra all of the attributes of that relation.
You've just inadvertently deleted a column for all the rows that didn't have
that value as a result of the outer join. It can be argued that you want to
be careful with how you project after the join. However, I would think
disabling by default and having a configurable property to enable it for the
instances when you explicitly want to use it is the right plan.
Fwiw, we had a bug in one of our scripts that did exactly as described above.
It's good to fix the bug. It's bad to implicitly delete data.

[jira] [Commented] (CASSANDRA-3610) Checksum improvement for CompressedRandomAccessReader

2011-12-19 Thread Vijay (Commented) (JIRA)


[ 
https://issues.apache.org/jira/browse/CASSANDRA-3610?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=13172557#comment-13172557
 ] 

Vijay commented on CASSANDRA-3610:
--

Sure will do, and i will also add the footnote about the same.
Just a note there is already a dependency in org.apache.cassandra.hadoop 
package.

 Checksum improvement for CompressedRandomAccessReader
 -

 Key: CASSANDRA-3610
 URL: https://issues.apache.org/jira/browse/CASSANDRA-3610
 Project: Cassandra
  Issue Type: Improvement
  Components: Core
Affects Versions: 1.1
 Environment: JVM
Reporter: Vijay
Assignee: Vijay
Priority: Minor
 Fix For: 1.1

 Attachments: 0001-use-pure-java-CRC32.patch


 When compression is on, Currently we see checksum taking up about 40% of the 
 CPU more than snappy library.
 Looks like hadoop solved it by implementing their own checksum, we can either 
 use it or implement something like that.
 http://images.slidesharecdn.com/1toddlipconyanpeichen-cloudera-hadoopandperformance-final-10132228-phpapp01-slide-15-768.jpg?1321043717
 in our test env it provided 50% improvement over native implementation which 
 uses jni to call the OS.

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: 
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] [Commented] (CASSANDRA-3327) Support TimeUUID in CassandraStorage

2011-12-19 Thread Rick Branson (Commented) (JIRA)


[ 
https://issues.apache.org/jira/browse/CASSANDRA-3327?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=13172568#comment-13172568
 ] 

Rick Branson commented on CASSANDRA-3327:
-

Reviewed this, I am +1

 Support TimeUUID in CassandraStorage
 

 Key: CASSANDRA-3327
 URL: https://issues.apache.org/jira/browse/CASSANDRA-3327
 Project: Cassandra
  Issue Type: Bug
  Components: Contrib
Affects Versions: 0.8.7
 Environment: Cassandra 0.8.6 Build #348 (CASSANDRA-2777 + 
 CASSANDRA-2810)
Reporter: Manuel Kreutz
Assignee: Brandon Williams
  Labels: pig
 Fix For: 0.8.10

 Attachments: 3327-v2.txt, 3327.txt


 Cassandra CLI:
 {code}
 grunt raw = LOAD 'cassandra://TEST/CF'
  USING CassandraStorage()
  AS (
  key:chararray,
  columns:bag {
  column:tuple(
  name,
  value
  )
  });
 grunt describe raw;
 raw: {key: chararray,columns: {(name: bytearray,value: bytearray)}}
 log_test =
 FOREACH raw
 GENERATE
 (CHARARRAY) key,
 flatten(columns);
 grunt DUMP log_test;
 {code}
 Returns:
 {code}
 org.apache.pig.impl.logicalLayer.FrontendException: ERROR 1066: Unable to 
 open iterator for alias log_test. Backend error : Unexpected data type 
 java.util.UUID found in stream. Note only standard Pig type is supported when 
 you output from UDF/LoadFunc
 at org.apache.pig.PigServer.openIterator(PigServer.java:890)
 at 
 org.apache.pig.tools.grunt.GruntParser.processDump(GruntParser.java:655)
 at 
 org.apache.pig.tools.pigscript.parser.PigScriptParser.parse(PigScriptParser.java:303)
 at 
 org.apache.pig.tools.grunt.GruntParser.parseStopOnError(GruntParser.java:188)
 at 
 org.apache.pig.tools.grunt.GruntParser.parseStopOnError(GruntParser.java:164)
 at org.apache.pig.tools.grunt.Grunt.run(Grunt.java:67)
 at org.apache.pig.Main.run(Main.java:487)
 at org.apache.pig.Main.main(Main.java:108)
 Caused by: java.lang.RuntimeException: Unexpected data type java.util.UUID 
 found in stream. Note only standard Pig type is supported when you output 
 from UDF/LoadFunc
 at 
 org.apache.pig.data.BinInterSedes.writeDatum(BinInterSedes.java:478)
 at 
 org.apache.pig.data.BinInterSedes.writeTuple(BinInterSedes.java:542)
 at 
 org.apache.pig.data.BinInterSedes.writeDatum(BinInterSedes.java:357)
 at 
 org.apache.pig.impl.io.InterRecordWriter.write(InterRecordWriter.java:73)
 at org.apache.pig.impl.io.InterStorage.putNext(InterStorage.java:87)
 at 
 org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigOutputFormat$PigRecordWriter.write(PigOutputFormat.java:138)
 at 
 org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigOutputFormat$PigRecordWriter.write(PigOutputFormat.java:97)
 at 
 org.apache.hadoop.mapred.MapTask$NewDirectOutputCollector.write(MapTask.java:498)
 at 
 org.apache.hadoop.mapreduce.TaskInputOutputContext.write(TaskInputOutputContext.java:80)
 at 
 org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigMapOnly$Map.collect(PigMapOnly.java:48)
 at 
 org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigMapBase.runPipeline(PigMapBase.java:263)
 at 
 org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigMapBase.map(PigMapBase.java:256)
 at 
 org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigMapBase.map(PigMapBase.java:58)
 at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:144)
 at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:621)
 at org.apache.hadoop.mapred.MapTask.run(MapTask.java:305)
 {code}
 According to driftx on IRC the setTupleValue function in CassandraStorage 
 needs to handle the uuid case and cast it to a DataByteArray.

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: 
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] [Commented] (CASSANDRA-3610) Checksum improvement for CompressedRandomAccessReader

2011-12-19 Thread Jonathan Ellis (Commented) (JIRA)


[ 
https://issues.apache.org/jira/browse/CASSANDRA-3610?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=13172567#comment-13172567
 ] 

Jonathan Ellis commented on CASSANDRA-3610:
---

bq. The initial patch were they added their optimized CRC32 was HADOOP-6148

Related: HADOOP-7443

bq. there is already a dependency in org.apache.cassandra.hadoop package

But, that's only required if you're going to use the hadoop classes -- we don't 
ship any hadoop jars, and the rest of C* works fine w/o them.

 Checksum improvement for CompressedRandomAccessReader
 -

 Key: CASSANDRA-3610
 URL: https://issues.apache.org/jira/browse/CASSANDRA-3610
 Project: Cassandra
  Issue Type: Improvement
  Components: Core
Affects Versions: 1.1
 Environment: JVM
Reporter: Vijay
Assignee: Vijay
Priority: Minor
 Fix For: 1.1

 Attachments: 0001-use-pure-java-CRC32.patch


 When compression is on, Currently we see checksum taking up about 40% of the 
 CPU more than snappy library.
 Looks like hadoop solved it by implementing their own checksum, we can either 
 use it or implement something like that.
 http://images.slidesharecdn.com/1toddlipconyanpeichen-cloudera-hadoopandperformance-final-10132228-phpapp01-slide-15-768.jpg?1321043717
 in our test env it provided 50% improvement over native implementation which 
 uses jni to call the OS.

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: 
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] [Updated] (CASSANDRA-3327) Support TimeUUID in CassandraStorage

2011-12-19 Thread Brandon Williams (Updated) (JIRA)


 [ 
https://issues.apache.org/jira/browse/CASSANDRA-3327?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Brandon Williams updated CASSANDRA-3327:


Reviewer: rbranson  (was: jeromatron)

 Support TimeUUID in CassandraStorage
 

 Key: CASSANDRA-3327
 URL: https://issues.apache.org/jira/browse/CASSANDRA-3327
 Project: Cassandra
  Issue Type: Bug
  Components: Contrib
Affects Versions: 0.8.7
 Environment: Cassandra 0.8.6 Build #348 (CASSANDRA-2777 + 
 CASSANDRA-2810)
Reporter: Manuel Kreutz
Assignee: Brandon Williams
  Labels: pig
 Fix For: 0.8.10

 Attachments: 3327-v2.txt, 3327.txt


 Cassandra CLI:
 {code}
 grunt raw = LOAD 'cassandra://TEST/CF'
  USING CassandraStorage()
  AS (
  key:chararray,
  columns:bag {
  column:tuple(
  name,
  value
  )
  });
 grunt describe raw;
 raw: {key: chararray,columns: {(name: bytearray,value: bytearray)}}
 log_test =
 FOREACH raw
 GENERATE
 (CHARARRAY) key,
 flatten(columns);
 grunt DUMP log_test;
 {code}
 Returns:
 {code}
 org.apache.pig.impl.logicalLayer.FrontendException: ERROR 1066: Unable to 
 open iterator for alias log_test. Backend error : Unexpected data type 
 java.util.UUID found in stream. Note only standard Pig type is supported when 
 you output from UDF/LoadFunc
 at org.apache.pig.PigServer.openIterator(PigServer.java:890)
 at 
 org.apache.pig.tools.grunt.GruntParser.processDump(GruntParser.java:655)
 at 
 org.apache.pig.tools.pigscript.parser.PigScriptParser.parse(PigScriptParser.java:303)
 at 
 org.apache.pig.tools.grunt.GruntParser.parseStopOnError(GruntParser.java:188)
 at 
 org.apache.pig.tools.grunt.GruntParser.parseStopOnError(GruntParser.java:164)
 at org.apache.pig.tools.grunt.Grunt.run(Grunt.java:67)
 at org.apache.pig.Main.run(Main.java:487)
 at org.apache.pig.Main.main(Main.java:108)
 Caused by: java.lang.RuntimeException: Unexpected data type java.util.UUID 
 found in stream. Note only standard Pig type is supported when you output 
 from UDF/LoadFunc
 at 
 org.apache.pig.data.BinInterSedes.writeDatum(BinInterSedes.java:478)
 at 
 org.apache.pig.data.BinInterSedes.writeTuple(BinInterSedes.java:542)
 at 
 org.apache.pig.data.BinInterSedes.writeDatum(BinInterSedes.java:357)
 at 
 org.apache.pig.impl.io.InterRecordWriter.write(InterRecordWriter.java:73)
 at org.apache.pig.impl.io.InterStorage.putNext(InterStorage.java:87)
 at 
 org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigOutputFormat$PigRecordWriter.write(PigOutputFormat.java:138)
 at 
 org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigOutputFormat$PigRecordWriter.write(PigOutputFormat.java:97)
 at 
 org.apache.hadoop.mapred.MapTask$NewDirectOutputCollector.write(MapTask.java:498)
 at 
 org.apache.hadoop.mapreduce.TaskInputOutputContext.write(TaskInputOutputContext.java:80)
 at 
 org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigMapOnly$Map.collect(PigMapOnly.java:48)
 at 
 org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigMapBase.runPipeline(PigMapBase.java:263)
 at 
 org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigMapBase.map(PigMapBase.java:256)
 at 
 org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigMapBase.map(PigMapBase.java:58)
 at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:144)
 at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:621)
 at org.apache.hadoop.mapred.MapTask.run(MapTask.java:305)
 {code}
 According to driftx on IRC the setTupleValue function in CassandraStorage 
 needs to handle the uuid case and cast it to a DataByteArray.

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: 
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

svn commit: r1220926 - /cassandra/branches/cassandra-0.8/contrib/pig/src/java/org/apache/cassandra/hadoop/pig/CassandraStorage.java

2011-12-19 Thread brandonwilliams

Author: brandonwilliams
Date: Mon Dec 19 20:11:40 2011
New Revision: 1220926

URL: http://svn.apache.org/viewvc?rev=1220926view=rev
Log:
TimeUUID support in CassandraStorage.
Patch by brandonwilliams, reviewed by Rick Branson for CASSANDRA-3327

Modified:

cassandra/branches/cassandra-0.8/contrib/pig/src/java/org/apache/cassandra/hadoop/pig/CassandraStorage.java

Modified: 
cassandra/branches/cassandra-0.8/contrib/pig/src/java/org/apache/cassandra/hadoop/pig/CassandraStorage.java
URL: 
http://svn.apache.org/viewvc/cassandra/branches/cassandra-0.8/contrib/pig/src/java/org/apache/cassandra/hadoop/pig/CassandraStorage.java?rev=1220926r1=1220925r2=1220926view=diff
==
--- 
cassandra/branches/cassandra-0.8/contrib/pig/src/java/org/apache/cassandra/hadoop/pig/CassandraStorage.java
 (original)
+++ 
cassandra/branches/cassandra-0.8/contrib/pig/src/java/org/apache/cassandra/hadoop/pig/CassandraStorage.java
 Mon Dec 19 20:11:40 2011
@@ -27,6 +27,7 @@ import org.apache.cassandra.db.marshal.I
 import org.apache.cassandra.db.marshal.TypeParser;
 import org.apache.cassandra.thrift.*;
 import org.apache.cassandra.utils.FBUtilities;
+import org.apache.cassandra.utils.UUIDGen;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 
@@ -171,6 +172,8 @@ public class CassandraStorage extends Lo
pair.set(position, ((BigInteger) value).intValue());
else if (value instanceof ByteBuffer)
pair.set(position, new 
DataByteArray(ByteBufferUtil.getArray((ByteBuffer) value)));
+   else if (value instanceof UUID)
+   pair.set(position, new 
DataByteArray(UUIDGen.decompose((java.util.UUID) value)));
else
pair.set(position, value);
 }

[jira] [Commented] (CASSANDRA-3610) Checksum improvement for CompressedRandomAccessReader

2011-12-19 Thread Vijay (Commented) (JIRA)


[ 
https://issues.apache.org/jira/browse/CASSANDRA-3610?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=13172572#comment-13172572
 ] 

Vijay commented on CASSANDRA-3610:
--

Sure i can copy it but do we need CRC32C (HADOOP-7443) and make it 
configurable? i think we should be fine by just using CRC32.

 Checksum improvement for CompressedRandomAccessReader
 -

 Key: CASSANDRA-3610
 URL: https://issues.apache.org/jira/browse/CASSANDRA-3610
 Project: Cassandra
  Issue Type: Improvement
  Components: Core
Affects Versions: 1.1
 Environment: JVM
Reporter: Vijay
Assignee: Vijay
Priority: Minor
 Fix For: 1.1

 Attachments: 0001-use-pure-java-CRC32.patch


 When compression is on, Currently we see checksum taking up about 40% of the 
 CPU more than snappy library.
 Looks like hadoop solved it by implementing their own checksum, we can either 
 use it or implement something like that.
 http://images.slidesharecdn.com/1toddlipconyanpeichen-cloudera-hadoopandperformance-final-10132228-phpapp01-slide-15-768.jpg?1321043717
 in our test env it provided 50% improvement over native implementation which 
 uses jni to call the OS.

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: 
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] [Commented] (CASSANDRA-3628) Make Pig/CassandraStorage delete functionality disabled by default and configurable

2011-12-19 Thread Jeremy Hanna (Commented) (JIRA)

[
https://issues.apache.org/jira/browse/CASSANDRA-3628?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=13172573#comment-13172573
]

Jeremy Hanna commented on CASSANDRA-3628:
-

What are you suggesting - converting the columns written to blank values?

Make Pig/CassandraStorage delete functionality disabled by default and
configurable
---

Key: CASSANDRA-3628
URL: https://issues.apache.org/jira/browse/CASSANDRA-3628
Project: Cassandra
Issue Type: Task
Reporter: Jeremy Hanna
Assignee: Jeremy Hanna
Labels: pig
Fix For: 1.0.7, 1.1

Attachments: 3628.txt

[jira] [Commented] (CASSANDRA-3628) Make Pig/CassandraStorage delete functionality disabled by default and configurable

2011-12-19 Thread Brandon Williams (Commented) (JIRA)

[
https://issues.apache.org/jira/browse/CASSANDRA-3628?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=13172574#comment-13172574
]

Brandon Williams commented on CASSANDRA-3628:
-

I'm suggesting that passing null values when you don't intend to delete is
invalid and should be reported as such, rather than silently accepting them.
NPE isn't the clearest way to convey this obviously, but seems more correct
than silent acceptance.

Make Pig/CassandraStorage delete functionality disabled by default and
configurable
---

Key: CASSANDRA-3628
URL: https://issues.apache.org/jira/browse/CASSANDRA-3628
Project: Cassandra
Issue Type: Task
Reporter: Jeremy Hanna
Assignee: Jeremy Hanna
Labels: pig
Fix For: 1.0.7, 1.1

Attachments: 3628.txt

svn commit: r1220934 - in /cassandra/branches/cassandra-1.0: ./ contrib/ contrib/pig/src/java/org/apache/cassandra/hadoop/pig/ interface/thrift/gen-java/org/apache/cassandra/thrift/

2011-12-19 Thread brandonwilliams

Author: brandonwilliams
Date: Mon Dec 19 20:26:47 2011
New Revision: 1220934

URL: http://svn.apache.org/viewvc?rev=1220934view=rev
Log:
Merge 3327 from 0.8

Modified:
cassandra/branches/cassandra-1.0/   (props changed)
cassandra/branches/cassandra-1.0/contrib/   (props changed)

cassandra/branches/cassandra-1.0/contrib/pig/src/java/org/apache/cassandra/hadoop/pig/CassandraStorage.java

cassandra/branches/cassandra-1.0/interface/thrift/gen-java/org/apache/cassandra/thrift/Cassandra.java
   (props changed)

cassandra/branches/cassandra-1.0/interface/thrift/gen-java/org/apache/cassandra/thrift/Column.java
   (props changed)

cassandra/branches/cassandra-1.0/interface/thrift/gen-java/org/apache/cassandra/thrift/InvalidRequestException.java
   (props changed)

cassandra/branches/cassandra-1.0/interface/thrift/gen-java/org/apache/cassandra/thrift/NotFoundException.java
   (props changed)

cassandra/branches/cassandra-1.0/interface/thrift/gen-java/org/apache/cassandra/thrift/SuperColumn.java
   (props changed)

Propchange: cassandra/branches/cassandra-1.0/
--
--- svn:mergeinfo (original)
+++ svn:mergeinfo Mon Dec 19 20:26:47 2011
@@ -1,7 +1,7 @@
 
/cassandra/branches/cassandra-0.6:922689-1052356,1052358-1053452,1053454,1053456-1131291
 /cassandra/branches/cassandra-0.7:1026516-1211709
 /cassandra/branches/cassandra-0.7.0:1053690-1055654
-/cassandra/branches/cassandra-0.8:1090934-1125013,1125019-1212854,1212938,1214916
+/cassandra/branches/cassandra-0.8:1090934-1125013,1125019-1212854,1212938,1214916,1220926
 /cassandra/branches/cassandra-0.8.0:1125021-1130369
 /cassandra/branches/cassandra-0.8.1:1101014-1125018
 /cassandra/branches/cassandra-1.0:1167106,1167185

Propchange: cassandra/branches/cassandra-1.0/contrib/
--
--- svn:mergeinfo (original)
+++ svn:mergeinfo Mon Dec 19 20:26:47 2011
@@ -1,7 +1,7 @@
 
/cassandra/branches/cassandra-0.6/contrib:922689-1052356,1052358-1053452,1053454,1053456-1068009
 /cassandra/branches/cassandra-0.7/contrib:1026516-1211709
 /cassandra/branches/cassandra-0.7.0/contrib:1053690-1055654
-/cassandra/branches/cassandra-0.8/contrib:1090934-1125013,1125019-1212854,1212938,1214916
+/cassandra/branches/cassandra-0.8/contrib:1090934-1125013,1125019-1212854,1212938,1214916,1220926
 /cassandra/branches/cassandra-0.8.0/contrib:1125021-1130369
 /cassandra/branches/cassandra-0.8.1/contrib:1101014-1125018
 /cassandra/branches/cassandra-1.0/contrib:1167106,1167185

Modified: 
cassandra/branches/cassandra-1.0/contrib/pig/src/java/org/apache/cassandra/hadoop/pig/CassandraStorage.java
URL: 
http://svn.apache.org/viewvc/cassandra/branches/cassandra-1.0/contrib/pig/src/java/org/apache/cassandra/hadoop/pig/CassandraStorage.java?rev=1220934r1=1220933r2=1220934view=diff
==
--- 
cassandra/branches/cassandra-1.0/contrib/pig/src/java/org/apache/cassandra/hadoop/pig/CassandraStorage.java
 (original)
+++ 
cassandra/branches/cassandra-1.0/contrib/pig/src/java/org/apache/cassandra/hadoop/pig/CassandraStorage.java
 Mon Dec 19 20:26:47 2011
@@ -27,6 +27,7 @@ import org.apache.cassandra.db.marshal.I
 import org.apache.cassandra.db.marshal.TypeParser;
 import org.apache.cassandra.thrift.*;
 import org.apache.cassandra.utils.Hex;
+import org.apache.cassandra.utils.UUIDGen;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 
@@ -171,6 +172,8 @@ public class CassandraStorage extends Lo
pair.set(position, ((BigInteger) value).intValue());
else if (value instanceof ByteBuffer)
pair.set(position, new 
DataByteArray(ByteBufferUtil.getArray((ByteBuffer) value)));
+   else if (value instanceof UUID)
+   pair.set(position, new 
DataByteArray(UUIDGen.decompose((java.util.UUID) value)));
else
pair.set(position, value);
 }

Propchange: 
cassandra/branches/cassandra-1.0/interface/thrift/gen-java/org/apache/cassandra/thrift/Cassandra.java
--
--- svn:mergeinfo (original)
+++ svn:mergeinfo Mon Dec 19 20:26:47 2011
@@ -1,7 +1,7 @@
 
/cassandra/branches/cassandra-0.6/interface/thrift/gen-java/org/apache/cassandra/thrift/Cassandra.java:922689-1052356,1052358-1053452,1053454,1053456-1131291
 
/cassandra/branches/cassandra-0.7/interface/thrift/gen-java/org/apache/cassandra/thrift/Cassandra.java:1026516-1211709
 
/cassandra/branches/cassandra-0.7.0/interface/thrift/gen-java/org/apache/cassandra/thrift/Cassandra.java:1053690-1055654
-/cassandra/branches/cassandra-0.8/interface/thrift/gen-java/org/apache/cassandra/thrift/Cassandra.java:1090934-1125013,1125019-1212854,1212938,1214916

[jira] [Commented] (CASSANDRA-2474) CQL support for compound columns

2011-12-19 Thread Sylvain Lebresne (Commented) (JIRA)


[ 
https://issues.apache.org/jira/browse/CASSANDRA-2474?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=13172594#comment-13172594
 ] 

Sylvain Lebresne commented on CASSANDRA-2474:
-

For the sparse example:
{noformat}
CREATE TABLE timeline (
userid int primary key,
posted_at uuid,
posted_by int,
body text
) TRANSPOSED AS (posted_at), SPARSE(posted_by, body);
{noformat}
Not sure I'm very fond of that. The fact that the type of 'posted_by' and 
'body' are actually not the type of the component itself but the type of the 
value while they do correspond to an actual component means that:
# you have not way to give the type of that last component; and if it's not 
text, the notation won't look so nice.
# the notation only work if the 'sparse' component is the last one, which may 
be the case for transposition of super columns, but feels arbitrarily limited 
otherwise.

I think that the fact that 'posted_by' and 'body' are actually string literals 
is not very intuitive and only mildly consistent with the rest of the syntax.

I'd also note that as far as I can tell, we wouldn't be able to handle the 
dynamic composite type with this in a meaningful way. But as you said above, 
this can be handled by a destructuring syntax. Which I think we definitively 
need.

 CQL support for compound columns
 

 Key: CASSANDRA-2474
 URL: https://issues.apache.org/jira/browse/CASSANDRA-2474
 Project: Cassandra
  Issue Type: New Feature
  Components: API, Core
Reporter: Eric Evans
Assignee: Pavel Yaskevich
  Labels: cql
 Fix For: 1.1

 Attachments: screenshot-1.jpg, screenshot-2.jpg


 For the most part, this boils down to supporting the specification of 
 compound column names (the CQL syntax is colon-delimted terms), and then 
 teaching the decoders (drivers) to create structures from the results.

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: 
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] [Updated] (CASSANDRA-2474) CQL support for compound columns

2011-12-19 Thread Jonathan Ellis (Issue Comment Edited) (JIRA)


 [ 
https://issues.apache.org/jira/browse/CASSANDRA-2474?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Jonathan Ellis updated CASSANDRA-2474:
--

Attachment: 2474-transposed-1.PNG
2474-transposed-raw.PNG

The crucial part of this latest proposal is that it really highlights that 
transposition really is just an implementation detail from the relational 
perspective.  So, to flesh that out:

{code}
INSERT INTO timeline (user_id, posted_at, posted_by, body)
VALUES ('tjefferson', '1818', 'jadams', 'Revolution was effected before the war 
commenced');

INSERT INTO timeline (user_id, posted_at, posted_by, body)
VALUES ('tjefferson', '1763', 'jadams', 'Democracy will soon degenerate into an 
anarchy');

INSERT INTO timeline (user_id, posted_at, posted_by, body)
VALUES ('tjefferson', '1790', 'gwashington', 'To be prepared for war is one of 
the most effectual means of preserving peace');

INSERT INTO timeline (user_id, posted_at, posted_by, body)
VALUES ('bfranklin', '1781', 'tjefferson', 'Every government degenerates when 
trusted to the rulers of the people alone');
{code}

... corresponding to the data in !2474-transposed-1.PNG!, which in raw form 
looks like !2474-transposed-raw.PNG!

Does that make sense?  We're using TRANSPOSED AS similarly to how databases 
have used storage hints like CLUSTERED.  It doesn't affect the relational model 
of the data, but it gives you different performance characteristics.  (The 
analogy is particularly apt in that both CLUSTERED and TRANSPOSED AS affect 
ordering of results.)

 CQL support for compound columns
 

 Key: CASSANDRA-2474
 URL: https://issues.apache.org/jira/browse/CASSANDRA-2474
 Project: Cassandra
  Issue Type: New Feature
  Components: API, Core
Reporter: Eric Evans
Assignee: Pavel Yaskevich
  Labels: cql
 Fix For: 1.1

 Attachments: 2474-transposed-1.PNG, 2474-transposed-raw.PNG, 
 screenshot-1.jpg, screenshot-2.jpg


 For the most part, this boils down to supporting the specification of 
 compound column names (the CQL syntax is colon-delimted terms), and then 
 teaching the decoders (drivers) to create structures from the results.

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: 
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] [Issue Comment Edited] (CASSANDRA-2474) CQL support for compound columns


[ 
https://issues.apache.org/jira/browse/CASSANDRA-2474?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=13172604#comment-13172604
 ] 

Jonathan Ellis edited comment on CASSANDRA-2474 at 12/19/11 8:59 PM:
-

The crucial part of this latest proposal is that it really highlights that 
transposition really is just an implementation detail from the relational 
perspective.  So, to flesh that out:

{code}
INSERT INTO timeline (user_id, posted_at, posted_by, body)
VALUES ('tjefferson', '1818', 'jadams', 'Revolution was effected before the war 
commenced');

INSERT INTO timeline (user_id, posted_at, posted_by, body)
VALUES ('tjefferson', '1763', 'jadams', 'Democracy will soon degenerate into an 
anarchy');

INSERT INTO timeline (user_id, posted_at, posted_by, body)
VALUES ('tjefferson', '1790', 'gwashington', 'To be prepared for war is one of 
the most effectual means of preserving peace');

INSERT INTO timeline (user_id, posted_at, posted_by, body)
VALUES ('bfranklin', '1781', 'tjefferson', 'Every government degenerates when 
trusted to the rulers of the people alone');
{code}

... corresponding to the data in !2474-transposed-1.PNG!

which in raw form looks like !2474-transposed-raw.PNG!

Does that make sense?  We're using TRANSPOSED AS similarly to how databases 
have used storage hints like CLUSTERED.  It doesn't affect the relational model 
of the data, but it gives you different performance characteristics.  (The 
analogy is particularly apt in that both CLUSTERED and TRANSPOSED AS affect 
ordering of results.)

  was (Author: jbellis):
The crucial part of this latest proposal is that it really highlights that 
transposition really is just an implementation detail from the relational 
perspective.  So, to flesh that out:

{code}
INSERT INTO timeline (user_id, posted_at, posted_by, body)
VALUES ('tjefferson', '1818', 'jadams', 'Revolution was effected before the war 
commenced');

INSERT INTO timeline (user_id, posted_at, posted_by, body)
VALUES ('tjefferson', '1763', 'jadams', 'Democracy will soon degenerate into an 
anarchy');

INSERT INTO timeline (user_id, posted_at, posted_by, body)
VALUES ('tjefferson', '1790', 'gwashington', 'To be prepared for war is one of 
the most effectual means of preserving peace');

INSERT INTO timeline (user_id, posted_at, posted_by, body)
VALUES ('bfranklin', '1781', 'tjefferson', 'Every government degenerates when 
trusted to the rulers of the people alone');
{code}

... corresponding to the data in !2474-transposed-1.PNG!, which in raw form 
looks like !2474-transposed-raw.PNG!

Does that make sense?  We're using TRANSPOSED AS similarly to how databases 
have used storage hints like CLUSTERED.  It doesn't affect the relational model 
of the data, but it gives you different performance characteristics.  (The 
analogy is particularly apt in that both CLUSTERED and TRANSPOSED AS affect 
ordering of results.)
  
 CQL support for compound columns
 

 Key: CASSANDRA-2474
 URL: https://issues.apache.org/jira/browse/CASSANDRA-2474
 Project: Cassandra
  Issue Type: New Feature
  Components: API, Core
Reporter: Eric Evans
Assignee: Pavel Yaskevich
  Labels: cql
 Fix For: 1.1

 Attachments: 2474-transposed-1.PNG, 2474-transposed-raw.PNG, 
 screenshot-1.jpg, screenshot-2.jpg


 For the most part, this boils down to supporting the specification of 
 compound column names (the CQL syntax is colon-delimted terms), and then 
 teaching the decoders (drivers) to create structures from the results.

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: 
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] [Issue Comment Edited] (CASSANDRA-2474) CQL support for compound columns

2011-12-19 Thread Jonathan Ellis (Issue Comment Edited) (JIRA)


[ 
https://issues.apache.org/jira/browse/CASSANDRA-2474?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=13172608#comment-13172608
 ] 

Jonathan Ellis edited comment on CASSANDRA-2474 at 12/19/11 9:03 PM:
-

So, you could do queries like

{code}
SELECT * FROM timeline WHERE user_id = 'tjefferson' AND posted_at  1770;
{code}

Which would give the resultset shown in !2474-transposed-select.PNG!

  was (Author: jbellis):
So, you could do queries like

{code}
SELECT * FROM timeline WHERE user_id = 'tjefferson' AND posted_ad  1770;
{code}

Which would give the resultset shown in !2474-transposed-select.PNG!
  
 CQL support for compound columns
 

 Key: CASSANDRA-2474
 URL: https://issues.apache.org/jira/browse/CASSANDRA-2474
 Project: Cassandra
  Issue Type: New Feature
  Components: API, Core
Reporter: Eric Evans
Assignee: Pavel Yaskevich
  Labels: cql
 Fix For: 1.1

 Attachments: 2474-transposed-1.PNG, 2474-transposed-raw.PNG, 
 2474-transposed-select.PNG, screenshot-1.jpg, screenshot-2.jpg


 For the most part, this boils down to supporting the specification of 
 compound column names (the CQL syntax is colon-delimted terms), and then 
 teaching the decoders (drivers) to create structures from the results.

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: 
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] [Issue Comment Edited] (CASSANDRA-2474) CQL support for compound columns

2011-12-19 Thread Jonathan Ellis (Issue Comment Edited) (JIRA)


[ 
https://issues.apache.org/jira/browse/CASSANDRA-2474?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=13172604#comment-13172604
 ] 

Jonathan Ellis edited comment on CASSANDRA-2474 at 12/19/11 9:02 PM:
-

The crucial part of this latest proposal is that it really highlights that 
transposition really is just an implementation detail from the relational 
perspective.  So, to flesh that out:

{code}
INSERT INTO timeline (user_id, posted_at, posted_by, body)
VALUES ('tjefferson', '1818', 'jadams', 'Revolution was effected before the war 
commenced');

INSERT INTO timeline (user_id, posted_at, posted_by, body)
VALUES ('tjefferson', '1763', 'jadams', 'Democracy will soon degenerate into an 
anarchy');

INSERT INTO timeline (user_id, posted_at, posted_by, body)
VALUES ('tjefferson', '1790', 'gwashington', 'To be prepared for war is one of 
the most effectual means of preserving peace');

INSERT INTO timeline (user_id, posted_at, posted_by, body)
VALUES ('bfranklin', '1781', 'tjefferson', 'Every government degenerates when 
trusted to the rulers of the people alone');
{code}

... corresponding to the data in !2474-transposed-1.PNG!

which in raw form looks like !2474-transposed-raw.PNG!

Does that make sense?  We're using TRANSPOSED AS similarly to how databases 
have used storage hints like CLUSTERED.  It doesn't affect the relational model 
of the data, but it gives you different performance characteristics.  (The 
analogy is particularly apt in that both CLUSTERED and TRANSPOSED AS affect 
ordering of results. EDIT: oops, goofed in ordering of the tjefferson row in my 
diagram.)

  was (Author: jbellis):
The crucial part of this latest proposal is that it really highlights that 
transposition really is just an implementation detail from the relational 
perspective.  So, to flesh that out:

{code}
INSERT INTO timeline (user_id, posted_at, posted_by, body)
VALUES ('tjefferson', '1818', 'jadams', 'Revolution was effected before the war 
commenced');

INSERT INTO timeline (user_id, posted_at, posted_by, body)
VALUES ('tjefferson', '1763', 'jadams', 'Democracy will soon degenerate into an 
anarchy');

INSERT INTO timeline (user_id, posted_at, posted_by, body)
VALUES ('tjefferson', '1790', 'gwashington', 'To be prepared for war is one of 
the most effectual means of preserving peace');

INSERT INTO timeline (user_id, posted_at, posted_by, body)
VALUES ('bfranklin', '1781', 'tjefferson', 'Every government degenerates when 
trusted to the rulers of the people alone');
{code}

... corresponding to the data in !2474-transposed-1.PNG!

which in raw form looks like !2474-transposed-raw.PNG!

Does that make sense?  We're using TRANSPOSED AS similarly to how databases 
have used storage hints like CLUSTERED.  It doesn't affect the relational model 
of the data, but it gives you different performance characteristics.  (The 
analogy is particularly apt in that both CLUSTERED and TRANSPOSED AS affect 
ordering of results.)
  
 CQL support for compound columns
 

 Key: CASSANDRA-2474
 URL: https://issues.apache.org/jira/browse/CASSANDRA-2474
 Project: Cassandra
  Issue Type: New Feature
  Components: API, Core
Reporter: Eric Evans
Assignee: Pavel Yaskevich
  Labels: cql
 Fix For: 1.1

 Attachments: 2474-transposed-1.PNG, 2474-transposed-raw.PNG, 
 2474-transposed-select.PNG, screenshot-1.jpg, screenshot-2.jpg


 For the most part, this boils down to supporting the specification of 
 compound column names (the CQL syntax is colon-delimted terms), and then 
 teaching the decoders (drivers) to create structures from the results.

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: 
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] [Updated] (CASSANDRA-2474) CQL support for compound columns


 [ 
https://issues.apache.org/jira/browse/CASSANDRA-2474?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Jonathan Ellis updated CASSANDRA-2474:
--

Attachment: 2474-transposed-select.PNG

So, you could do queries like

{code}
SELECT * FROM timeline WHERE user_id = 'tjefferson' AND posted_ad  1770;
{code}

Which would give the resultset shown in !2474-transposed-select.PNG!

 CQL support for compound columns
 

 Key: CASSANDRA-2474
 URL: https://issues.apache.org/jira/browse/CASSANDRA-2474
 Project: Cassandra
  Issue Type: New Feature
  Components: API, Core
Reporter: Eric Evans
Assignee: Pavel Yaskevich
  Labels: cql
 Fix For: 1.1

 Attachments: 2474-transposed-1.PNG, 2474-transposed-raw.PNG, 
 2474-transposed-select.PNG, screenshot-1.jpg, screenshot-2.jpg


 For the most part, this boils down to supporting the specification of 
 compound column names (the CQL syntax is colon-delimted terms), and then 
 teaching the decoders (drivers) to create structures from the results.

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: 
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] [Commented] (CASSANDRA-2474) CQL support for compound columns

2011-12-19 Thread Jonathan Ellis (Commented) (JIRA)


[ 
https://issues.apache.org/jira/browse/CASSANDRA-2474?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=13172625#comment-13172625
 ] 

Jonathan Ellis commented on CASSANDRA-2474:
---

bq. you have not way to give the type of that last component; and if it's not 
text, the notation won't look so nice

Okay, so we can make the following minor changes to make the syntax more 
flexible:

- Drop the AS and the first list from TRANSPOSED AS; all columns are 
transposed, so we can just list the sparse ones (which may occur anywhere in 
the list):
- Allow an optional WITH (or AND, if there is already a WITH SPARSE) clause of 
COLUMN NAMES that includes the name type.

So my first example would become
{noformat}
CREATE TABLE timeline (
userid int primary key,
posted_at uuid,
posted_by int,
body text
) TRANSPOSED WITH SPARSE(posted_by, body);
{noformat}

A more complex one including some non-utf8 types might be
{noformat}
CREATE TABLE events (
series text primary key,
ts1 int,
cat text,
subcat text,
1337 uuid,
92d21d0a-d6cb-437c-9d3f-b67aa733a19f bigint
) TRANSPOSED WITH COLUMN NAMES (1337 int, 
92d21d0a-d6cb-437c-9d3f-b67aa733a19f uuid);
{noformat}

(I'm waving my hands a bit here and using  to denote quotes-to-help-parsing 
rather than quotes-to-indicate-string.  This is the convention postgresql uses.)

bq. the notation only work if the 'sparse' component is the last one, which may 
be the case for transposition of super columns, but feels arbitrarily limited 
otherwise

True, but none of the other proposals even come *close* to being as friendly as 
this one for typical cases, so I think we're in the right space to make common 
things easy and unusual things possible.  Tree-like sparse models may also 
fall into the belongs in the CASSANDRA-3647 document/destructuring api ticket. 
 If not, maybe we need the three-tuple model as well (but I'd rather let that 
slide until/unless we have a real need for it).

 CQL support for compound columns
 

 Key: CASSANDRA-2474
 URL: https://issues.apache.org/jira/browse/CASSANDRA-2474
 Project: Cassandra
  Issue Type: New Feature
  Components: API, Core
Reporter: Eric Evans
Assignee: Pavel Yaskevich
  Labels: cql
 Fix For: 1.1

 Attachments: 2474-transposed-1.PNG, 2474-transposed-raw.PNG, 
 2474-transposed-select.PNG, screenshot-1.jpg, screenshot-2.jpg


 For the most part, this boils down to supporting the specification of 
 compound column names (the CQL syntax is colon-delimted terms), and then 
 teaching the decoders (drivers) to create structures from the results.

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: 
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] [Commented] (CASSANDRA-2474) CQL support for compound columns

2011-12-19 Thread Pavel Yaskevich (Commented) (JIRA)


[ 
https://issues.apache.org/jira/browse/CASSANDRA-2474?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=13172640#comment-13172640
 ] 

Pavel Yaskevich commented on CASSANDRA-2474:


How about we change it to:

{code}
CREATE TRANSPOSED TABLE events (
series text primary key,
ts1 int SPARSE,
cat text,
subcat text,
1337 uuid DENSE(int),
92d21d0a-d6cb-437c-9d3f-b67aa733a19f bigint SPARSE(uuid)
);

{code}

Where SPARSE could be with or without an argument identifying column 
comparator. DENSE could only with with argument, columns without keyword are 
DENSE by default?... This allows to specify comparator and order of 
sparse/dense columns.

 CQL support for compound columns
 

 Key: CASSANDRA-2474
 URL: https://issues.apache.org/jira/browse/CASSANDRA-2474
 Project: Cassandra
  Issue Type: New Feature
  Components: API, Core
Reporter: Eric Evans
Assignee: Pavel Yaskevich
  Labels: cql
 Fix For: 1.1

 Attachments: 2474-transposed-1.PNG, 2474-transposed-raw.PNG, 
 2474-transposed-select.PNG, screenshot-1.jpg, screenshot-2.jpg


 For the most part, this boils down to supporting the specification of 
 compound column names (the CQL syntax is colon-delimted terms), and then 
 teaching the decoders (drivers) to create structures from the results.

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: 
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] [Commented] (CASSANDRA-2474) CQL support for compound columns

2011-12-19 Thread Jonathan Ellis (Commented) (JIRA)


[ 
https://issues.apache.org/jira/browse/CASSANDRA-2474?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=13172646#comment-13172646
 ] 

Jonathan Ellis commented on CASSANDRA-2474:
---

I like it better with the TRANSPOSED information in a separate block where it 
messes w/ standard CREATE grammar less.

 CQL support for compound columns
 

 Key: CASSANDRA-2474
 URL: https://issues.apache.org/jira/browse/CASSANDRA-2474
 Project: Cassandra
  Issue Type: New Feature
  Components: API, Core
Reporter: Eric Evans
Assignee: Pavel Yaskevich
  Labels: cql
 Fix For: 1.1

 Attachments: 2474-transposed-1.PNG, 2474-transposed-raw.PNG, 
 2474-transposed-select.PNG, screenshot-1.jpg, screenshot-2.jpg


 For the most part, this boils down to supporting the specification of 
 compound column names (the CQL syntax is colon-delimted terms), and then 
 teaching the decoders (drivers) to create structures from the results.

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: 
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] [Commented] (CASSANDRA-3327) Support TimeUUID in CassandraStorage

2011-12-19 Thread Pavel Yaskevich (Issue Comment Edited) (JIRA)


[ 
https://issues.apache.org/jira/browse/CASSANDRA-3327?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=13172654#comment-13172654
 ] 

Hudson commented on CASSANDRA-3327:
---

Integrated in Cassandra-0.8 #420 (See 
[https://builds.apache.org/job/Cassandra-0.8/420/])
TimeUUID support in CassandraStorage.
Patch by brandonwilliams, reviewed by Rick Branson for CASSANDRA-3327

brandonwilliams : 
http://svn.apache.org/viewcvs.cgi/?root=Apache-SVNview=revrev=1220926
Files : 
* 
/cassandra/branches/cassandra-0.8/contrib/pig/src/java/org/apache/cassandra/hadoop/pig/CassandraStorage.java


 Support TimeUUID in CassandraStorage
 

 Key: CASSANDRA-3327
 URL: https://issues.apache.org/jira/browse/CASSANDRA-3327
 Project: Cassandra
  Issue Type: Bug
  Components: Contrib
Affects Versions: 0.8.7
 Environment: Cassandra 0.8.6 Build #348 (CASSANDRA-2777 + 
 CASSANDRA-2810)
Reporter: Manuel Kreutz
Assignee: Brandon Williams
  Labels: pig
 Fix For: 0.8.10, 1.0.7

 Attachments: 3327-v2.txt, 3327.txt


 Cassandra CLI:
 {code}
 grunt raw = LOAD 'cassandra://TEST/CF'
  USING CassandraStorage()
  AS (
  key:chararray,
  columns:bag {
  column:tuple(
  name,
  value
  )
  });
 grunt describe raw;
 raw: {key: chararray,columns: {(name: bytearray,value: bytearray)}}
 log_test =
 FOREACH raw
 GENERATE
 (CHARARRAY) key,
 flatten(columns);
 grunt DUMP log_test;
 {code}
 Returns:
 {code}
 org.apache.pig.impl.logicalLayer.FrontendException: ERROR 1066: Unable to 
 open iterator for alias log_test. Backend error : Unexpected data type 
 java.util.UUID found in stream. Note only standard Pig type is supported when 
 you output from UDF/LoadFunc
 at org.apache.pig.PigServer.openIterator(PigServer.java:890)
 at 
 org.apache.pig.tools.grunt.GruntParser.processDump(GruntParser.java:655)
 at 
 org.apache.pig.tools.pigscript.parser.PigScriptParser.parse(PigScriptParser.java:303)
 at 
 org.apache.pig.tools.grunt.GruntParser.parseStopOnError(GruntParser.java:188)
 at 
 org.apache.pig.tools.grunt.GruntParser.parseStopOnError(GruntParser.java:164)
 at org.apache.pig.tools.grunt.Grunt.run(Grunt.java:67)
 at org.apache.pig.Main.run(Main.java:487)
 at org.apache.pig.Main.main(Main.java:108)
 Caused by: java.lang.RuntimeException: Unexpected data type java.util.UUID 
 found in stream. Note only standard Pig type is supported when you output 
 from UDF/LoadFunc
 at 
 org.apache.pig.data.BinInterSedes.writeDatum(BinInterSedes.java:478)
 at 
 org.apache.pig.data.BinInterSedes.writeTuple(BinInterSedes.java:542)
 at 
 org.apache.pig.data.BinInterSedes.writeDatum(BinInterSedes.java:357)
 at 
 org.apache.pig.impl.io.InterRecordWriter.write(InterRecordWriter.java:73)
 at org.apache.pig.impl.io.InterStorage.putNext(InterStorage.java:87)
 at 
 org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigOutputFormat$PigRecordWriter.write(PigOutputFormat.java:138)
 at 
 org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigOutputFormat$PigRecordWriter.write(PigOutputFormat.java:97)
 at 
 org.apache.hadoop.mapred.MapTask$NewDirectOutputCollector.write(MapTask.java:498)
 at 
 org.apache.hadoop.mapreduce.TaskInputOutputContext.write(TaskInputOutputContext.java:80)
 at 
 org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigMapOnly$Map.collect(PigMapOnly.java:48)
 at 
 org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigMapBase.runPipeline(PigMapBase.java:263)
 at 
 org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigMapBase.map(PigMapBase.java:256)
 at 
 org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigMapBase.map(PigMapBase.java:58)
 at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:144)
 at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:621)
 at org.apache.hadoop.mapred.MapTask.run(MapTask.java:305)
 {code}
 According to driftx on IRC the setTupleValue function in CassandraStorage 
 needs to handle the uuid case and cast it to a DataByteArray.

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: 
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] [Commented] (CASSANDRA-2474) CQL support for compound columns

2011-12-19 Thread Pavel Yaskevich (Commented) (JIRA)


[ 
https://issues.apache.org/jira/browse/CASSANDRA-2474?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=13172670#comment-13172670
 ] 

Pavel Yaskevich commented on CASSANDRA-2474:


Here is another option for this:

{code}
TRANSPOSED AS (column ?MODIFIER(comparator), ) 
{code}

where MODIFIER = SPARSE | DENSE and comparator = utf8, int, uuid, timeuuid, 
...; if no MODIFIER set then MODIFIER = DENSE(default_type)

{code}
CREATE TABLE timeline (
userid int primary key,
posted_at uuid,
posted_by int,
body text,
92d21d0a-d6cb-437c-9d3f-b67aa733a19f bigint
) TRANSPOSED AS (posted_at, posted_by SPARSE, 
92d21d0a-d6cb-437c-9d3f-b67aa733a19f DENSE(uuid), body SPARSE);
{code}

 CQL support for compound columns
 

 Key: CASSANDRA-2474
 URL: https://issues.apache.org/jira/browse/CASSANDRA-2474
 Project: Cassandra
  Issue Type: New Feature
  Components: API, Core
Reporter: Eric Evans
Assignee: Pavel Yaskevich
  Labels: cql
 Fix For: 1.1

 Attachments: 2474-transposed-1.PNG, 2474-transposed-raw.PNG, 
 2474-transposed-select.PNG, screenshot-1.jpg, screenshot-2.jpg


 For the most part, this boils down to supporting the specification of 
 compound column names (the CQL syntax is colon-delimted terms), and then 
 teaching the decoders (drivers) to create structures from the results.

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: 
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] [Issue Comment Edited] (CASSANDRA-2474) CQL support for compound columns


[ 
https://issues.apache.org/jira/browse/CASSANDRA-2474?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=13172670#comment-13172670
 ] 

Pavel Yaskevich edited comment on CASSANDRA-2474 at 12/19/11 10:12 PM:
---

Here is another option for this:

{code}
TRANSPOSED AS (column ?MODIFIER(?comparator), ) 
{code}

where MODIFIER = SPARSE | DENSE and comparator = utf8, int, uuid, timeuuid, 
...; if no MODIFIER set then MODIFIER = DENSE(default_type)

{code}
CREATE TABLE timeline (
userid int primary key,
posted_at uuid,
posted_by int,
body text,
92d21d0a-d6cb-437c-9d3f-b67aa733a19f bigint
) TRANSPOSED AS (posted_at, posted_by SPARSE, 
92d21d0a-d6cb-437c-9d3f-b67aa733a19f DENSE(uuid), body SPARSE);
{code}

  was (Author: xedin):
Here is another option for this:

{code}
TRANSPOSED AS (column ?MODIFIER(comparator), ) 
{code}

where MODIFIER = SPARSE | DENSE and comparator = utf8, int, uuid, timeuuid, 
...; if no MODIFIER set then MODIFIER = DENSE(default_type)

{code}
CREATE TABLE timeline (
userid int primary key,
posted_at uuid,
posted_by int,
body text,
92d21d0a-d6cb-437c-9d3f-b67aa733a19f bigint
) TRANSPOSED AS (posted_at, posted_by SPARSE, 
92d21d0a-d6cb-437c-9d3f-b67aa733a19f DENSE(uuid), body SPARSE);
{code}
  
 CQL support for compound columns
 

 Key: CASSANDRA-2474
 URL: https://issues.apache.org/jira/browse/CASSANDRA-2474
 Project: Cassandra
  Issue Type: New Feature
  Components: API, Core
Reporter: Eric Evans
Assignee: Pavel Yaskevich
  Labels: cql
 Fix For: 1.1

 Attachments: 2474-transposed-1.PNG, 2474-transposed-raw.PNG, 
 2474-transposed-select.PNG, screenshot-1.jpg, screenshot-2.jpg


 For the most part, this boils down to supporting the specification of 
 compound column names (the CQL syntax is colon-delimted terms), and then 
 teaching the decoders (drivers) to create structures from the results.

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: 
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] [Updated] (CASSANDRA-3568) cassandra-cli and nodetool should connect to localhost by default

2011-12-19 Thread Rick Branson (Updated) (JIRA)


 [ 
https://issues.apache.org/jira/browse/CASSANDRA-3568?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Rick Branson updated CASSANDRA-3568:


Attachment: 3568-v2.txt

CLI will no longer bail if it can't connect to the initial host, so we no 
longer need the special case for localhost and users can still connect 
anywhere after launching the CLI.

 cassandra-cli and nodetool should connect to localhost by default
 -

 Key: CASSANDRA-3568
 URL: https://issues.apache.org/jira/browse/CASSANDRA-3568
 Project: Cassandra
  Issue Type: Improvement
  Components: Core
Reporter: Rick Branson
Assignee: Rick Branson
Priority: Minor
 Attachments: 3568-v2.txt, 3568.txt


 The command line tools (cassandra-cli and noetool) should connect by default 
 to localhost. This behavior is a bit more user-friendly and reflects somewhat 
 of a convention among command-line database tools for popular open source 
 databases such as MySQL and PostgreSQL.

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: 
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] [Updated] (CASSANDRA-3554) Hints are not replayed unless node was marked down


 [ 
https://issues.apache.org/jira/browse/CASSANDRA-3554?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Jonathan Ellis updated CASSANDRA-3554:
--

Attachment: 3554-1.0.txt

combined patch against 1.0

 Hints are not replayed unless node was marked down
 --

 Key: CASSANDRA-3554
 URL: https://issues.apache.org/jira/browse/CASSANDRA-3554
 Project: Cassandra
  Issue Type: Bug
Affects Versions: 1.0.0
Reporter: Jonathan Ellis
Assignee: Jonathan Ellis
  Labels: hintedhandoff, jmx
 Fix For: 1.0.7

 Attachments: 0001-cleanup.patch, 0002-deliver.patch, 3554-1.0.txt


 If B drops a write from A because it is overwhelmed (but not dead), A will 
 hint the write.  But it will never get notified that B is back up (since it 
 was never down), so it will never attempt hint delivery.

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: 
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

[cassandra-node] push by tomaz.muraus - Allow user to pass optional connection info to amendError and if provi... on 2011-12-19 22:51 GMT

2011-12-19 Thread cassandra-node . apache-extras . org


Revision: 3d53cb35fd30
Author:   Tomaz Muraus to...@tomaz.me
Date: Mon Dec 19 14:50:34 2011
Log:  Allow user to pass optional connection info to amendError and if  
provided, attach it to the error object.

http://code.google.com/a/apache-extras.org/p/cassandra-node/source/detail?r=3d53cb35fd30

Modified:
 /lib/driver.js

===
--- /lib/driver.js  Fri Dec 16 08:46:02 2011
+++ /lib/driver.js  Mon Dec 19 14:50:34 2011
@@ -126,8 +126,14 @@
 UUID = module.exports.UUID = require('uuid-js');


-/** make sure that err.message is set to something that makes sense. */
-function amendError(err) {
+/**
+ * Make sure that err.message is set to something that makes sense.
+ *
+ * @param {Object} err Error object.
+ * @param {Object} connectionInfo Optional connection info object which is
+ * attached to the error.
+ */
+function amendError(err, connectionInfo) {
   if (!err.message || err.message.length === 0) {
 if (err.name === NotFoundException) {
   err.message = ColumnFamily or Keyspace does not exist;
@@ -135,6 +141,8 @@
   err.message = err.why;
 }
   }
+
+  err.connectionInfo = connectionInfo;
   return err;
 }

@@ -309,6 +317,8 @@
 ],

 function(err, res, conn) {
+  var connectionInfo;
+
   if (conn) {
 self.pool.release(conn);
   }
@@ -322,7 +332,8 @@
 errback();
   }
   else {
-err = amendError(err);
+connectionInfo = (conn) ? conn.connectionInfo : null;
+err = amendError(err, connectionInfo);
 callback(err, res);
   }
 }
@@ -379,7 +390,7 @@

   this.con.on('error', function(err) {
 clearTimeout(timeoutId);
-amendError(err);
+amendError(err, self.connectionInfo);
 callback(err);
   });

@@ -411,7 +422,7 @@
 self.client.login(creds, function(err) {
   if (timeoutId) {
 timeoutId = clearTimeout(timeoutId);
-if (err) { amendError(err); }
+if (err) { amendError(err, self.connectionInfo); }
 cb(err);
   }
 });
@@ -432,7 +443,7 @@
 if (timeoutId) {
   timeoutId = clearTimeout(timeoutId);
   if (err) {
-amendError(err);
+amendError(err, self.connectionInfo);
 cb(err);
   } else {
 for (var i = 0; i  def.cf_defs.length; i++) {
@@ -465,7 +476,7 @@
   self.client.set_keyspace(self.connectionInfo.keyspace, function(err)  
{

 if (timeoutId) {
   timeoutId = clearTimeout(timeoutId);
-  if (err) { amendError(err); }
+  if (err) { amendError(err, self.connectionInfo); }
   cb(err);
 }
   });
@@ -540,7 +551,7 @@
   }

   if (err) {
-amendError(err);
+amendError(err, self.connectionInfo);
 callback(err, null);
   } else if (!res) {
 callback(new Error('No results'), null);

[jira] [Updated] (CASSANDRA-3430) Break Big Compaction Lock apart


 [ 
https://issues.apache.org/jira/browse/CASSANDRA-3430?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Jonathan Ellis updated CASSANDRA-3430:
--

Fix Version/s: (was: 1.0.7)
   1.1

 Break Big Compaction Lock apart
 ---

 Key: CASSANDRA-3430
 URL: https://issues.apache.org/jira/browse/CASSANDRA-3430
 Project: Cassandra
  Issue Type: Improvement
  Components: Core
Reporter: Jonathan Ellis
Assignee: Jonathan Ellis
Priority: Minor
  Labels: compaction
 Fix For: 1.1

 Attachments: 3430-1.0.txt, 3430-1.1.txt




--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: 
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] [Reopened] (CASSANDRA-3250) fsync the directory after new sstable or commit log segment are created

2011-12-19 Thread Jonathan Ellis (Reopened) (JIRA)

[
https://issues.apache.org/jira/browse/CASSANDRA-3250?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]

Jonathan Ellis reopened CASSANDRA-3250:
---

... actually, can you back this out of 1.0 and put it in 1.1 instead? It
*should* be fine but let's not take chances with regressions.

fsync the directory after new sstable or commit log segment are created
---

Key: CASSANDRA-3250
URL: https://issues.apache.org/jira/browse/CASSANDRA-3250
Project: Cassandra
Issue Type: Bug
Components: Core
Reporter: Zhu Han
Assignee: Pavel Yaskevich
Priority: Minor
Fix For: 1.0.7

Attachments: CASSANDRA-3250-v2.patch, CASSANDRA-3250-v3.patch,
CASSANDRA-3250.patch

The mannual of fsync said:
bq. Calling fsync() does not necessarily ensure that the entry in
the directory containing the file has also reached disk. For that an
explicit fsync() on a file descriptor for the directory is also needed.
At least on ext4, syncing the directory is a must to have step, as described
by [1]. Otherwise, the new sstables or commit logs could be missed after
crash even if itself is synced.
Unfortunately, JVM does not provide an approach to sync the directory...
[1]
http://www.linuxfoundation.org/news-media/blogs/browse/2009/03/don%E2%80%99t-fear-fsync

[jira] [Updated] (CASSANDRA-3554) Hints are not replayed unless node was marked down


 [ 
https://issues.apache.org/jira/browse/CASSANDRA-3554?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Jonathan Ellis updated CASSANDRA-3554:
--

Attachment: 3554-1.0.txt

updated 1.0 rebase that is pre-1034 friendly

 Hints are not replayed unless node was marked down
 --

 Key: CASSANDRA-3554
 URL: https://issues.apache.org/jira/browse/CASSANDRA-3554
 Project: Cassandra
  Issue Type: Bug
Affects Versions: 1.0.0
Reporter: Jonathan Ellis
Assignee: Jonathan Ellis
  Labels: hintedhandoff, jmx
 Fix For: 1.0.7

 Attachments: 0001-cleanup.patch, 0002-deliver.patch, 3554-1.0.txt


 If B drops a write from A because it is overwhelmed (but not dead), A will 
 hint the write.  But it will never get notified that B is back up (since it 
 was never down), so it will never attempt hint delivery.

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: 
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] [Updated] (CASSANDRA-3554) Hints are not replayed unless node was marked down


 [ 
https://issues.apache.org/jira/browse/CASSANDRA-3554?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Jonathan Ellis updated CASSANDRA-3554:
--

Attachment: (was: 3554-1.0.txt)

 Hints are not replayed unless node was marked down
 --

 Key: CASSANDRA-3554
 URL: https://issues.apache.org/jira/browse/CASSANDRA-3554
 Project: Cassandra
  Issue Type: Bug
Affects Versions: 1.0.0
Reporter: Jonathan Ellis
Assignee: Jonathan Ellis
  Labels: hintedhandoff, jmx
 Fix For: 1.0.7

 Attachments: 0001-cleanup.patch, 0002-deliver.patch, 3554-1.0.txt


 If B drops a write from A because it is overwhelmed (but not dead), A will 
 hint the write.  But it will never get notified that B is back up (since it 
 was never down), so it will never attempt hint delivery.

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: 
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

svn commit: r1221015 - in /cassandra/branches/cassandra-1.0: CHANGES.txt src/java/org/apache/cassandra/io/util/SequentialWriter.java src/java/org/apache/cassandra/utils/CLibrary.java

Author: xedin
Date: Mon Dec 19 23:42:28 2011
New Revision: 1221015

URL: http://svn.apache.org/viewvc?rev=1221015view=rev
Log:
Revert fsync the directory after new sstable or commitlog segment are created

Modified:
cassandra/branches/cassandra-1.0/CHANGES.txt

cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/io/util/SequentialWriter.java

cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/utils/CLibrary.java

Modified: cassandra/branches/cassandra-1.0/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/cassandra/branches/cassandra-1.0/CHANGES.txt?rev=1221015r1=1221014r2=1221015view=diff
==
--- cassandra/branches/cassandra-1.0/CHANGES.txt (original)
+++ cassandra/branches/cassandra-1.0/CHANGES.txt Mon Dec 19 23:42:28 2011
@@ -2,7 +2,6 @@
  * fix assertion when dropping a columnfamily with no sstables (CASSANDRA-3614)
  * more efficient allocation of small bloom filters (CASSANDRA-3618)
  * CLibrary.createHardLinkWithExec() to check for errors (CASSANDRA-3101)
- * fsync the directory after new sstable or commitlog segment are created 
(CASSANDRA-3250)
  * Avoid creating empty and non cleaned writer during compaction 
(CASSANDRA-3616)
 Merged from 0.8:
  * prevent new nodes from thinking down nodes are up forever (CASSANDRA-3626)

Modified: 
cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/io/util/SequentialWriter.java
URL: 
http://svn.apache.org/viewvc/cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/io/util/SequentialWriter.java?rev=1221015r1=1221014r2=1221015view=diff
==
--- 
cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/io/util/SequentialWriter.java
 (original)
+++ 
cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/io/util/SequentialWriter.java
 Mon Dec 19 23:42:28 2011
@@ -39,9 +39,6 @@ public class SequentialWriter extends Ou
 protected byte[] buffer;
 private final boolean skipIOCache;
 private final int fd;
-private final int directoryFD;
-// directory should be synced only after first file sync, in other words, 
only once per file
-private boolean directorySynced = false;
 
 protected long current = 0, bufferOffset;
 protected int validBufferBytes;
@@ -63,7 +60,6 @@ public class SequentialWriter extends Ou
 buffer = new byte[bufferSize];
 this.skipIOCache = skipIOCache;
 fd = CLibrary.getfd(out.getFD());
-directoryFD = CLibrary.tryOpenDirectory(file.getParent());
 stream = new DataOutputStream(this);
 }
 
@@ -152,12 +148,6 @@ public class SequentialWriter extends Ou
 flushInternal();
 out.getFD().sync();
 
-if (!directorySynced)
-{
-CLibrary.trySync(directoryFD);
-directorySynced = true;
-}
-
 syncNeeded = false;
 }
 }
@@ -298,7 +288,6 @@ public class SequentialWriter extends Ou
 CLibrary.trySkipCache(fd, 0, 0);
 
 out.close();
-CLibrary.tryCloseFD(directoryFD);
 }
 
 /**

Modified: 
cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/utils/CLibrary.java
URL: 
http://svn.apache.org/viewvc/cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/utils/CLibrary.java?rev=1221015r1=1221014r2=1221015view=diff
==
--- 
cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/utils/CLibrary.java
 (original)
+++ 
cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/utils/CLibrary.java
 Mon Dec 19 23:42:28 2011
@@ -46,7 +46,6 @@ public final class CLibrary
 private static final int F_SETFL   = 4;  /* set file status flags */
 private static final int F_NOCACHE = 48; /* Mac OS X specific flag, turns 
cache on/off */
 private static final int O_DIRECT  = 04; /* fcntl.h */
-private static final int O_RDONLY  = ; /* fcntl.h */
 
 private static final int POSIX_FADV_NORMAL = 0; /* fadvise.h */
 private static final int POSIX_FADV_RANDOM = 1; /* fadvise.h */
@@ -85,11 +84,7 @@ public final class CLibrary
 
 // fadvice
 public static native int posix_fadvise(int fd, long offset, int len, int 
flag) throws LastErrorException;
-
-public static native int open(String path, int flags) throws 
LastErrorException;
-public static native int fsync(int fd) throws LastErrorException;
-public static native int close(int fd) throws LastErrorException;
-
+
 private static int errno(RuntimeException e)
 {
 assert e instanceof LastErrorException;
@@ -266,73 +261,6 @@ public final class CLibrary
 return result;
 }
 
-public static int tryOpenDirectory(String path)
-{
-int fd = -1;
-
-try
-{
-return open(path, O_RDONLY);

[cassandra-node] push by tomaz.muraus - Update package.json and CHANGES. on 2011-12-19 22:58 GMT

2011-12-19 Thread cassandra-node . apache-extras . org


Revision: 142906874cbf
Author:   Tomaz Muraus to...@tomaz.me
Date: Mon Dec 19 14:58:12 2011
Log:  Update package.json and CHANGES.

http://code.google.com/a/apache-extras.org/p/cassandra-node/source/detail?r=142906874cbf

Modified:
 /CHANGES
 /package.json

===
--- /CHANGESFri Dec  2 12:57:55 2011
+++ /CHANGESMon Dec 19 14:58:12 2011
@@ -1,3 +1,8 @@
+Changes with cassandra-client 0.6.1:
+
+- Attach 'connectionInfo' object to the error object which is passed to the
+  callbacks.
+
 Changes with cassandra-client 0.6.0:

 - Set a timeout for login, learn and use steps. Defaults to 1000ms, 2000ms  
and

===
--- /package.json   Fri Dec 16 08:57:33 2011
+++ /package.json   Mon Dec 19 14:58:12 2011
@@ -8,7 +8,7 @@
   ],
   name: cassandra-client,
   description: Node.js CQL driver for Apache Cassandra,
-  version: 0.6.0,
+  version: 0.6.1,
   homepage: https://github.com/racker/node-cassandra-client;,
   repository: {
 type: git,

svn commit: r1221020 - in /cassandra/trunk: ./ contrib/ interface/thrift/gen-java/org/apache/cassandra/thrift/