Repository: cassandra Updated Branches: refs/heads/cassandra-3.0 5289a564c -> 9c9f26382
(Hadoop) fix CIF describeSplits() not handling 0 size estimates patch by Mike Adamson; reviewed by Paulo Motta for CASSANDRA-10600 Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/9c9f2638 Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/9c9f2638 Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/9c9f2638 Branch: refs/heads/cassandra-3.0 Commit: 9c9f263822ab0c7669f162380af6645ef8309083 Parents: 5289a56 Author: Mike Adamson <madam...@datastax.com> Authored: Tue Oct 27 11:51:57 2015 +0000 Committer: Aleksey Yeschenko <alek...@apache.org> Committed: Thu Nov 5 22:31:29 2015 +0000 ---------------------------------------------------------------------- CHANGES.txt | 1 + .../cassandra/hadoop/cql3/CqlInputFormat.java | 22 ++++++++++++++------ 2 files changed, 17 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/cassandra/blob/9c9f2638/CHANGES.txt ---------------------------------------------------------------------- diff --git a/CHANGES.txt b/CHANGES.txt index 1ff2fdb..b469594 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 3.0 + * (Hadoop) fix CIF describeSplits() not handling 0 size estimates (CASSANDRA-10600) * Fix reading of legacy sstables (CASSANDRA-10590) * Use CQL type names in schema metadata tables (CASSANDRA-10365) * Guard batchlog replay against integer division by zero (CASSANDRA-9223) http://git-wip-us.apache.org/repos/asf/cassandra/blob/9c9f2638/src/java/org/apache/cassandra/hadoop/cql3/CqlInputFormat.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/hadoop/cql3/CqlInputFormat.java b/src/java/org/apache/cassandra/hadoop/cql3/CqlInputFormat.java index ec5167b..1b11391 100644 --- a/src/java/org/apache/cassandra/hadoop/cql3/CqlInputFormat.java +++ b/src/java/org/apache/cassandra/hadoop/cql3/CqlInputFormat.java @@ -246,19 +246,29 @@ public class CqlInputFormat extends org.apache.hadoop.mapreduce.InputFormat<Long ResultSet resultSet = session.execute(query, keyspace, table, tokenRange.getStart().toString(), tokenRange.getEnd().toString()); Row row = resultSet.one(); - // If we have no data on this split, return the full split i.e., do not sub-split + + long meanPartitionSize = 0; + long partitionCount = 0; + int splitCount = 0; + + if (row != null) + { + meanPartitionSize = row.getLong("mean_partition_size"); + partitionCount = row.getLong("partitions_count"); + + splitCount = (int)((meanPartitionSize * partitionCount) / splitSize); + } + + // If we have no data on this split or the size estimate is 0, + // return the full split i.e., do not sub-split // Assume smallest granularity of partition count available from CASSANDRA-7688 - if (row == null) + if (splitCount == 0) { Map<TokenRange, Long> wrappedTokenRange = new HashMap<>(); wrappedTokenRange.put(tokenRange, (long) 128); return wrappedTokenRange; } - long meanPartitionSize = row.getLong("mean_partition_size"); - long partitionCount = row.getLong("partitions_count"); - - int splitCount = (int)((meanPartitionSize * partitionCount) / splitSize); List<TokenRange> splitRanges = tokenRange.splitEvenly(splitCount); Map<TokenRange, Long> rangesWithLength = new HashMap<>(); for (TokenRange range : splitRanges)