Author: jbellis
Date: Fri Sep 9 15:46:13 2011
New Revision: 1167254
URL: http://svn.apache.org/viewvc?rev=1167254&view=rev
Log:
cleanup read path (StorageProxy side)
patch by jbellis; reviewed by slebrense for CASSANDRA-3161
Modified:
cassandra/trunk/src/java/org/apache/cassandra/service/RowDigestResolver.java
cassandra/trunk/src/java/org/apache/cassandra/service/RowRepairResolver.java
cassandra/trunk/src/java/org/apache/cassandra/service/StorageProxy.java
Modified:
cassandra/trunk/src/java/org/apache/cassandra/service/RowDigestResolver.java
URL:
http://svn.apache.org/viewvc/cassandra/trunk/src/java/org/apache/cassandra/service/RowDigestResolver.java?rev=1167254&r1=1167253&r2=1167254&view=diff
==============================================================================
---
cassandra/trunk/src/java/org/apache/cassandra/service/RowDigestResolver.java
(original)
+++
cassandra/trunk/src/java/org/apache/cassandra/service/RowDigestResolver.java
Fri Sep 9 15:46:13 2011
@@ -33,7 +33,10 @@ public class RowDigestResolver extends A
{
super(key, table);
}
-
+
+ /**
+ * Special case of resolve() so that CL.ONE reads never throw
DigestMismatchException in the foreground
+ */
public Row getData() throws IOException
{
for (Map.Entry<Message, ReadResponse> entry : replies.entrySet())
@@ -62,14 +65,10 @@ public class RowDigestResolver extends A
logger.debug("resolving " + replies.size() + " responses");
long startTime = System.currentTimeMillis();
- ColumnFamily data = null;
// validate digests against each other; throw immediately on mismatch.
- // also, collects data results into versions/endpoints lists.
- //
- // results are cleared as we process them, to avoid unnecessary
duplication of work
- // when resolve() is called a second time for read repair on responses
that were not
- // necessary to satisfy ConsistencyLevel.
+ // also extract the data reply, if any.
+ ColumnFamily data = null;
ByteBuffer digest = null;
for (Map.Entry<Message, ReadResponse> entry : replies.entrySet())
{
Modified:
cassandra/trunk/src/java/org/apache/cassandra/service/RowRepairResolver.java
URL:
http://svn.apache.org/viewvc/cassandra/trunk/src/java/org/apache/cassandra/service/RowRepairResolver.java?rev=1167254&r1=1167253&r2=1167254&view=diff
==============================================================================
---
cassandra/trunk/src/java/org/apache/cassandra/service/RowRepairResolver.java
(original)
+++
cassandra/trunk/src/java/org/apache/cassandra/service/RowRepairResolver.java
Fri Sep 9 15:46:13 2011
@@ -27,6 +27,9 @@ import java.util.Collections;
import java.util.List;
import java.util.Map;
+import com.google.common.base.Function;
+import com.google.common.collect.Iterables;
+
import org.apache.cassandra.db.*;
import org.apache.cassandra.db.columniterator.IdentityQueryFilter;
import org.apache.cassandra.db.filter.QueryFilter;
@@ -59,45 +62,42 @@ public class RowRepairResolver extends A
{
if (logger.isDebugEnabled())
logger.debug("resolving " + replies.size() + " responses");
-
long startTime = System.currentTimeMillis();
- List<ColumnFamily> versions = new ArrayList<ColumnFamily>();
- List<InetAddress> endpoints = new ArrayList<InetAddress>();
-
- // case 1: validate digests against each other; throw immediately on
mismatch.
- // also, collects data results into versions/endpoints lists.
- //
- // results are cleared as we process them, to avoid unnecessary
duplication of work
- // when resolve() is called a second time for read repair on responses
that were not
- // necessary to satisfy ConsistencyLevel.
- for (Map.Entry<Message, ReadResponse> entry : replies.entrySet())
- {
- Message message = entry.getKey();
- ReadResponse response = entry.getValue();
- assert !response.isDigestQuery();
- versions.add(response.row().cf);
- endpoints.add(message.getFrom());
- }
ColumnFamily resolved;
- if (versions.size() > 1)
+ if (replies.size() > 1)
{
- for (ColumnFamily cf : versions)
+ // compute maxLiveColumns to prevent short reads -- see
https://issues.apache.org/jira/browse/CASSANDRA-2643
+ for (Map.Entry<Message, ReadResponse> entry : replies.entrySet())
{
+ ReadResponse response = entry.getValue();
+ assert !response.isDigestQuery() : "Received digest response
to repair read from " + entry.getKey().getFrom();
+
+ ColumnFamily cf = response.row().cf;
int liveColumns = cf.getLiveColumnCount();
if (liveColumns > maxLiveColumns)
maxLiveColumns = liveColumns;
}
- resolved = resolveSuperset(versions);
+
+ // merge the row versions
+ resolved = resolveSuperset(Iterables.transform(replies.values(),
new Function<ReadResponse, ColumnFamily>()
+ {
+ public ColumnFamily apply(ReadResponse response)
+ {
+ return response.row().cf;
+ }
+ }));
if (logger.isDebugEnabled())
logger.debug("versions merged");
- // resolved can be null even if versions doesn't have all nulls
because of the call to removeDeleted in resolveSuperSet
+
+ // send updates to any replica that was missing part of the full
row
+ // (resolved can be null even if versions doesn't have all nulls
because of the call to removeDeleted in resolveSuperSet)
if (resolved != null)
- repairResults = scheduleRepairs(resolved, table, key,
versions, endpoints);
+ repairResults = scheduleRepairs(resolved, table, key, replies);
}
else
{
- resolved = versions.get(0);
+ resolved = replies.values().iterator().next().row().cf;
}
if (logger.isDebugEnabled())
@@ -110,13 +110,15 @@ public class RowRepairResolver extends A
* For each row version, compare with resolved (the superset of all row
versions);
* if it is missing anything, send a mutation to the endpoint it come from.
*/
- public static List<IAsyncResult> scheduleRepairs(ColumnFamily resolved,
String table, DecoratedKey<?> key, List<ColumnFamily> versions,
List<InetAddress> endpoints)
+ public static List<IAsyncResult> scheduleRepairs(ColumnFamily resolved,
String table, DecoratedKey<?> key, Map<Message,ReadResponse> replies)
{
- List<IAsyncResult> results = new
ArrayList<IAsyncResult>(versions.size());
+ List<IAsyncResult> results = new
ArrayList<IAsyncResult>(replies.size());
- for (int i = 0; i < versions.size(); i++)
+ for (Map.Entry<Message, ReadResponse> entry : replies.entrySet())
{
- ColumnFamily diffCf = ColumnFamily.diff(versions.get(i), resolved);
+ InetAddress from = entry.getKey().getFrom();
+ ColumnFamily cf = entry.getValue().row().cf;
+ ColumnFamily diffCf = ColumnFamily.diff(cf, resolved);
if (diffCf == null) // no repair needs to happen
continue;
@@ -126,21 +128,21 @@ public class RowRepairResolver extends A
Message repairMessage;
try
{
- repairMessage =
rowMutation.getMessage(Gossiper.instance.getVersion(endpoints.get(i)));
+ repairMessage =
rowMutation.getMessage(Gossiper.instance.getVersion(from));
}
catch (IOException e)
{
throw new IOError(e);
}
- results.add(MessagingService.instance().sendRR(repairMessage,
endpoints.get(i)));
+ results.add(MessagingService.instance().sendRR(repairMessage,
from));
}
return results;
}
- static ColumnFamily resolveSuperset(List<ColumnFamily> versions)
+ static ColumnFamily resolveSuperset(Iterable<ColumnFamily> versions)
{
- assert versions.size() > 0;
+ assert Iterables.size(versions) > 0;
ColumnFamily resolved = null;
for (ColumnFamily cf : versions)
Modified:
cassandra/trunk/src/java/org/apache/cassandra/service/StorageProxy.java
URL:
http://svn.apache.org/viewvc/cassandra/trunk/src/java/org/apache/cassandra/service/StorageProxy.java?rev=1167254&r1=1167253&r2=1167254&view=diff
==============================================================================
--- cassandra/trunk/src/java/org/apache/cassandra/service/StorageProxy.java
(original)
+++ cassandra/trunk/src/java/org/apache/cassandra/service/StorageProxy.java Fri
Sep 9 15:46:13 2011
@@ -573,24 +573,23 @@ public class StorageProxy implements Sto
* 4. If the digests (if any) match the data return the data
* 5. else carry out read repair by getting data from all the nodes.
*/
- private static List<Row> fetchRows(List<ReadCommand> commands,
ConsistencyLevel consistency_level) throws IOException, UnavailableException,
TimeoutException
+ private static List<Row> fetchRows(List<ReadCommand> initialCommands,
ConsistencyLevel consistency_level) throws IOException, UnavailableException,
TimeoutException
{
- List<ReadCallback<Row>> readCallbacks = new
ArrayList<ReadCallback<Row>>();
- List<Row> rows = new ArrayList<Row>();
+ List<Row> rows = new ArrayList<Row>(initialCommands.size());
List<ReadCommand> commandsToRetry = Collections.emptyList();
- List<ReadCommand> repairCommands = Collections.emptyList();
do
{
- readCallbacks.clear();
- List<ReadCommand> commandsToSend = commandsToRetry.isEmpty() ?
commands : commandsToRetry;
+ List<ReadCommand> commands = commandsToRetry.isEmpty() ?
initialCommands : commandsToRetry;
+ ReadCallback<Row>[] readCallbacks = new
ReadCallback[commands.size()];
if (!commandsToRetry.isEmpty())
logger.debug("Retrying {} commands", commandsToRetry.size());
// send out read requests
- for (ReadCommand command : commandsToSend)
+ for (int i = 0; i < commands.size(); i++)
{
+ ReadCommand command = commands.get(i);
assert !command.isDigestQuery();
logger.debug("Command/ConsistencyLevel is {}/{}", command,
consistency_level);
@@ -602,7 +601,7 @@ public class StorageProxy implements Sto
ReadCallback<Row> handler = getReadCallback(resolver, command,
consistency_level, endpoints);
handler.assureSufficientLiveNodes();
assert !handler.endpoints.isEmpty();
- readCallbacks.add(handler);
+ readCallbacks[i] = handler;
// The data-request message is sent to dataPoint, the node
that will actually get the data for us
InetAddress dataPoint = handler.endpoints.get(0);
@@ -643,15 +642,13 @@ public class StorageProxy implements Sto
}
}
- if (repairCommands != Collections.EMPTY_LIST)
- repairCommands.clear();
-
// read results and make a second pass for any digest mismatches
+ List<ReadCommand> repairCommands = null;
List<RepairCallback> repairResponseHandlers = null;
- for (int i = 0; i < commandsToSend.size(); i++)
+ for (int i = 0; i < commands.size(); i++)
{
- ReadCallback<Row> handler = readCallbacks.get(i);
- ReadCommand command = commandsToSend.get(i);
+ ReadCallback<Row> handler = readCallbacks[i];
+ ReadCommand command = commands.get(i);
try
{
long startTime2 = System.currentTimeMillis();
@@ -675,17 +672,17 @@ public class StorageProxy implements Sto
RowRepairResolver resolver = new
RowRepairResolver(command.table, command.key);
RepairCallback repairHandler = new
RepairCallback(resolver, handler.endpoints);
- if (repairCommands == Collections.EMPTY_LIST)
+ if (repairCommands == null)
+ {
repairCommands = new ArrayList<ReadCommand>();
+ repairResponseHandlers = new
ArrayList<RepairCallback>();
+ }
repairCommands.add(command);
+ repairResponseHandlers.add(repairHandler);
MessageProducer producer = new
CachingMessageProducer(command);
for (InetAddress endpoint : handler.endpoints)
MessagingService.instance().sendRR(producer, endpoint,
repairHandler);
-
- if (repairResponseHandlers == null)
- repairResponseHandlers = new
ArrayList<RepairCallback>();
- repairResponseHandlers.add(repairHandler);
}
}