This is an automated email from the ASF dual-hosted git repository.
smiklosovic pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/cassandra.git
The following commit(s) were added to refs/heads/trunk by this push:
new b11909b611 Make sstabledump possible to show tombstones only
b11909b611 is described below
commit b11909b611de811ed2f030848820a17c77df7013
Author: Stefan Miklosovic <[email protected]>
AuthorDate: Tue Sep 24 19:04:17 2024 +0200
Make sstabledump possible to show tombstones only
patch by Stefan Miklosovic; reviewed by Brad Schoening for CASSANDRA-19939
---
CHANGES.txt | 1 +
.../pages/managing/tools/sstable/sstabledump.adoc | 11 +-
.../apache/cassandra/tools/JsonTransformer.java | 227 +++++++++++++--------
.../org/apache/cassandra/tools/SSTableExport.java | 102 +++++----
.../apache/cassandra/tools/SSTableExportTest.java | 3 +-
5 files changed, 219 insertions(+), 125 deletions(-)
diff --git a/CHANGES.txt b/CHANGES.txt
index bb087d07c0..1255333b9c 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
5.1
+ * Make sstabledump possible to show tombstones only (CASSANDRA-19939)
* Ensure that RFP queries potentially stale replicas even with only key
columns in the row filter (CASSANDRA-19938)
* Allow nodes to change IP address while upgrading to TCM (CASSANDRA-19921)
* Retain existing keyspace params on system tables after upgrade
(CASSANDRA-19916)
diff --git
a/doc/modules/cassandra/pages/managing/tools/sstable/sstabledump.adoc
b/doc/modules/cassandra/pages/managing/tools/sstable/sstabledump.adoc
index 90f66b8854..df00fac767 100644
--- a/doc/modules/cassandra/pages/managing/tools/sstable/sstabledump.adoc
+++ b/doc/modules/cassandra/pages/managing/tools/sstable/sstabledump.adoc
@@ -17,9 +17,10 @@ sstabledump <options> <sstable file path>
|-d |CQL row per line internal representation
|-e |Enumerate partition keys only
|-k <arg> |Partition key
-|-x <arg> |Excluded partition key(s)
-|-t |Print raw timestamps instead of iso8601 date strings
|-l |Output each row as a separate JSON object
+|-o |Enumerate tombstones only
+|-t |Print raw timestamps instead of iso8601 date strings
+|-x <arg> |Excluded partition key(s)
|===
If necessary, use sstableutil first to find out the sstables used by a
@@ -238,6 +239,12 @@ cat eventlog_dump_2018Jul26_excludekeys
}
....
+== Dump tombstones only
+
+It is possible to display only tombstones since CASSANDRA-19939. You enable
this feature by `-o` flag. This option
+is useful to use if you are interested only in tombstones and the output is
very long. This way, you find tombstones
+faster.
+
== Display raw timestamps
By default, dates are displayed in iso8601 date format. Using the -t
diff --git a/src/java/org/apache/cassandra/tools/JsonTransformer.java
b/src/java/org/apache/cassandra/tools/JsonTransformer.java
index 8debfd3b75..9ffa6be00a 100644
--- a/src/java/org/apache/cassandra/tools/JsonTransformer.java
+++ b/src/java/org/apache/cassandra/tools/JsonTransformer.java
@@ -61,6 +61,7 @@ import org.apache.cassandra.schema.ColumnMetadata;
import org.apache.cassandra.schema.TableMetadata;
import org.apache.cassandra.transport.ProtocolVersion;
import org.apache.cassandra.utils.ByteBufferUtil;
+import org.apache.cassandra.utils.FBUtilities;
import static org.apache.cassandra.utils.Clock.Global.currentTimeMillis;
@@ -83,14 +84,20 @@ public final class JsonTransformer
private boolean rawTime = false;
+ private boolean tombstonesOnly = false;
+
+ private long nowInSeconds;
+
private long currentPosition = 0;
- private JsonTransformer(JsonGenerator json, ISSTableScanner
currentScanner, boolean rawTime, TableMetadata metadata, boolean isJsonLines)
+ private JsonTransformer(JsonGenerator json, ISSTableScanner
currentScanner, boolean rawTime, boolean tombstonesOnly, TableMetadata
metadata, long nowInSeconds, boolean isJsonLines)
{
this.json = json;
this.metadata = metadata;
this.currentScanner = currentScanner;
this.rawTime = rawTime;
+ this.tombstonesOnly = tombstonesOnly;
+ this.nowInSeconds = nowInSeconds;
if (isJsonLines)
{
@@ -107,24 +114,24 @@ public final class JsonTransformer
}
}
- public static void toJson(ISSTableScanner currentScanner,
Stream<UnfilteredRowIterator> partitions, boolean rawTime, TableMetadata
metadata, OutputStream out)
+ public static void toJson(ISSTableScanner currentScanner,
Stream<UnfilteredRowIterator> partitions, boolean rawTime, boolean
tombstonesOnly, TableMetadata metadata, long nowInSeconds, OutputStream out)
throws IOException
{
try (JsonGenerator json = jsonFactory.createGenerator(new
OutputStreamWriter(out, StandardCharsets.UTF_8)))
{
- JsonTransformer transformer = new JsonTransformer(json,
currentScanner, rawTime, metadata, false);
+ JsonTransformer transformer = new JsonTransformer(json,
currentScanner, rawTime, tombstonesOnly, metadata, nowInSeconds, false);
json.writeStartArray();
partitions.forEach(transformer::serializePartition);
json.writeEndArray();
}
}
- public static void toJsonLines(ISSTableScanner currentScanner,
Stream<UnfilteredRowIterator> partitions, boolean rawTime, TableMetadata
metadata, OutputStream out)
- throws IOException
+ public static void toJsonLines(ISSTableScanner currentScanner,
Stream<UnfilteredRowIterator> partitions, boolean rawTime, boolean
tombstonesOnly, TableMetadata metadata, long nowInSeconds, OutputStream out)
+ throws IOException
{
try (JsonGenerator json = jsonFactory.createGenerator(new
OutputStreamWriter(out, StandardCharsets.UTF_8)))
{
- JsonTransformer transformer = new JsonTransformer(json,
currentScanner, rawTime, metadata, true);
+ JsonTransformer transformer = new JsonTransformer(json,
currentScanner, rawTime, tombstonesOnly, metadata, nowInSeconds, true);
partitions.forEach(transformer::serializePartition);
}
}
@@ -133,7 +140,7 @@ public final class JsonTransformer
{
try (JsonGenerator json = jsonFactory.createGenerator(new
OutputStreamWriter(out, StandardCharsets.UTF_8)))
{
- JsonTransformer transformer = new JsonTransformer(json,
currentScanner, rawTime, metadata, false);
+ JsonTransformer transformer = new JsonTransformer(json,
currentScanner, rawTime, false, metadata, FBUtilities.nowInSeconds(), false);
json.writeStartArray();
keys.forEach(transformer::serializePartitionKey);
json.writeEndArray();
@@ -206,49 +213,18 @@ public final class JsonTransformer
{
try
{
- json.writeStartObject();
- json.writeObjectField("table kind", metadata.kind.name());
-
- json.writeFieldName("partition");
- json.writeStartObject();
- json.writeFieldName("key");
- serializePartitionKey(partition.partitionKey());
- json.writeNumberField("position",
this.currentScanner.getCurrentPosition());
-
- if (!partition.partitionLevelDeletion().isLive())
- serializeDeletion(partition.partitionLevelDeletion());
-
- json.writeEndObject();
-
- json.writeFieldName("rows");
- json.writeStartArray();
- updatePosition();
-
- if (partition.staticRow() != null)
+ boolean shouldSerialize = true;
+ if (tombstonesOnly)
{
- if (!partition.staticRow().isEmpty())
- serializeRow(partition.staticRow());
- updatePosition();
- }
+ shouldSerialize = partition.partitionLevelDeletion() != null
&& !partition.partitionLevelDeletion().isLive();
- Unfiltered unfiltered;
- while (partition.hasNext())
- {
- unfiltered = partition.next();
- if (unfiltered instanceof Row)
- {
- serializeRow((Row) unfiltered);
- }
- else if (unfiltered instanceof RangeTombstoneMarker)
- {
- serializeTombstone((RangeTombstoneMarker) unfiltered);
- }
- updatePosition();
+ // check if some row should be printed
+ if (!shouldSerialize)
+ shouldSerialize = containsSerializableRow(partition);
}
- json.writeEndArray();
-
- json.writeEndObject();
+ if (shouldSerialize)
+ serializePartitionInternal(partition);
}
catch (IOException e)
@@ -258,61 +234,146 @@ public final class JsonTransformer
}
}
- private void serializeRow(Row row)
+ private void serializePartitionInternal(UnfilteredRowIterator partition)
throws IOException
{
- try
+ json.writeStartObject();
+ json.writeObjectField("table kind", metadata.kind.name());
+
+ json.writeFieldName("partition");
+ json.writeStartObject();
+ json.writeFieldName("key");
+ serializePartitionKey(partition.partitionKey());
+ json.writeNumberField("position",
this.currentScanner.getCurrentPosition());
+
+ if (!partition.partitionLevelDeletion().isLive())
+ serializeDeletion(partition.partitionLevelDeletion());
+
+ json.writeEndObject();
+
+ json.writeFieldName("rows");
+ json.writeStartArray();
+ updatePosition();
+
+ if (partition.staticRow() != null)
{
- json.writeStartObject();
- String rowType = row.isStatic() ? "static_block" : "row";
- json.writeFieldName("type");
- json.writeString(rowType);
- json.writeNumberField("position", this.currentPosition);
+ if (!partition.staticRow().isEmpty())
+ serializeRow(partition.staticRow());
+ updatePosition();
+ }
- // Only print clustering information for non-static rows.
- if (!row.isStatic())
+ Unfiltered unfiltered;
+ while (partition.hasNext())
+ {
+ unfiltered = partition.next();
+ if (unfiltered instanceof Row)
{
- serializeClustering(row.clustering());
+ serializeRow((Row) unfiltered);
}
+ else if (unfiltered instanceof RangeTombstoneMarker)
+ {
+ serializeTombstone((RangeTombstoneMarker) unfiltered);
+ }
+ updatePosition();
+ }
+
+ json.writeEndArray();
+
+ json.writeEndObject();
+ }
+
+ private void serializeRow(Row row)
+ {
+ try
+ {
+ if (shouldSerializeRow(row))
+ serializeRowInternal(row);
+ }
+ catch (IOException e)
+ {
+ logger.error("Fatal error parsing row.", e);
+ }
+ }
- LivenessInfo liveInfo = row.primaryKeyLivenessInfo();
- if (!liveInfo.isEmpty())
+ private boolean containsSerializableRow(UnfilteredRowIterator partition)
+ {
+ boolean shouldSerialize = false;
+ Unfiltered unfiltered;
+ while (partition.hasNext())
+ {
+ unfiltered = partition.next();
+ if (unfiltered instanceof Row)
{
- objectIndenter.setCompact(false);
- json.writeFieldName("liveness_info");
- objectIndenter.setCompact(true);
- json.writeStartObject();
- json.writeFieldName("tstamp");
- json.writeString(dateString(TimeUnit.MICROSECONDS,
liveInfo.timestamp()));
- if (liveInfo.isExpiring())
+ if (shouldSerializeRow((Row) unfiltered))
{
- json.writeNumberField("ttl", liveInfo.ttl());
- json.writeFieldName("expires_at");
- json.writeString(dateString(TimeUnit.SECONDS,
liveInfo.localExpirationTime()));
- json.writeFieldName("expired");
- json.writeBoolean(liveInfo.localExpirationTime() <
(currentTimeMillis() / 1000));
+ shouldSerialize = true;
+ break;
}
- json.writeEndObject();
- objectIndenter.setCompact(false);
}
-
- // If this is a deletion, indicate that, otherwise write cells.
- if (!row.deletion().isLive())
+ else if (unfiltered instanceof RangeTombstoneMarker)
{
- serializeDeletion(row.deletion().time());
+ shouldSerialize = true;
+ break;
}
- json.writeFieldName("cells");
- json.writeStartArray();
- for (ColumnData cd : row)
+ }
+
+ partition.close();
+
+ return shouldSerialize;
+ }
+
+ private boolean shouldSerializeRow(Row row)
+ {
+ return !tombstonesOnly || row.hasDeletion(nowInSeconds);
+ }
+
+ private void serializeRowInternal(Row row) throws IOException
+ {
+ json.writeStartObject();
+ String rowType = row.isStatic() ? "static_block" : "row";
+ json.writeFieldName("type");
+ json.writeString(rowType);
+ json.writeNumberField("position", this.currentPosition);
+
+ // Only print clustering information for non-static rows.
+ if (!row.isStatic())
+ {
+ serializeClustering(row.clustering());
+ }
+
+ LivenessInfo liveInfo = row.primaryKeyLivenessInfo();
+ if (!liveInfo.isEmpty())
+ {
+ objectIndenter.setCompact(false);
+ json.writeFieldName("liveness_info");
+ objectIndenter.setCompact(true);
+ json.writeStartObject();
+ json.writeFieldName("tstamp");
+ json.writeString(dateString(TimeUnit.MICROSECONDS,
liveInfo.timestamp()));
+ if (liveInfo.isExpiring())
{
- serializeColumnData(cd, liveInfo);
+ json.writeNumberField("ttl", liveInfo.ttl());
+ json.writeFieldName("expires_at");
+ json.writeString(dateString(TimeUnit.SECONDS,
liveInfo.localExpirationTime()));
+ json.writeFieldName("expired");
+ json.writeBoolean(liveInfo.localExpirationTime() <
(currentTimeMillis() / 1000));
}
- json.writeEndArray();
json.writeEndObject();
+ objectIndenter.setCompact(false);
}
- catch (IOException e)
+
+ // If this is a deletion, indicate that, otherwise write cells.
+ if (!row.deletion().isLive())
{
- logger.error("Fatal error parsing row.", e);
+ serializeDeletion(row.deletion().time());
}
+ json.writeFieldName("cells");
+ json.writeStartArray();
+ for (ColumnData cd : row)
+ {
+ serializeColumnData(cd, liveInfo);
+ }
+ json.writeEndArray();
+ json.writeEndObject();
}
private void serializeTombstone(RangeTombstoneMarker tombstone)
diff --git a/src/java/org/apache/cassandra/tools/SSTableExport.java
b/src/java/org/apache/cassandra/tools/SSTableExport.java
index 05ec576553..0181e9f29f 100644
--- a/src/java/org/apache/cassandra/tools/SSTableExport.java
+++ b/src/java/org/apache/cassandra/tools/SSTableExport.java
@@ -36,6 +36,7 @@ import org.apache.commons.cli.PosixParser;
import org.apache.cassandra.config.DatabaseDescriptor;
import org.apache.cassandra.db.DecoratedKey;
import org.apache.cassandra.db.PartitionPosition;
+import org.apache.cassandra.db.rows.Row;
import org.apache.cassandra.db.rows.UnfilteredRowIterator;
import org.apache.cassandra.dht.AbstractBounds;
import org.apache.cassandra.dht.Bounds;
@@ -67,6 +68,7 @@ public class SSTableExport
private static final String DEBUG_OUTPUT_OPTION = "d";
private static final String EXCLUDE_KEY_OPTION = "x";
private static final String ENUMERATE_KEYS_OPTION = "e";
+ private static final String ENUMERATE_TOMBSTONES_OPTION = "o";
private static final String RAW_TIMESTAMPS = "t";
private static final String PARTITION_JSON_LINES = "l";
@@ -87,16 +89,19 @@ public class SSTableExport
excludeKey.setArgs(500);
options.addOption(excludeKey);
- Option optEnumerate = new Option(ENUMERATE_KEYS_OPTION, false,
"enumerate partition keys only");
+ Option optEnumerate = new Option(ENUMERATE_KEYS_OPTION, false,
"Enumerate partition keys only");
options.addOption(optEnumerate);
+ Option optTombstones = new Option(ENUMERATE_TOMBSTONES_OPTION, false,
"Enumerate tombstones only");
+ options.addOption(optTombstones);
+
Option debugOutput = new Option(DEBUG_OUTPUT_OPTION, false, "CQL row
per line internal representation");
options.addOption(debugOutput);
Option rawTimestamps = new Option(RAW_TIMESTAMPS, false, "Print raw
timestamps instead of iso8601 date strings");
options.addOption(rawTimestamps);
- Option partitionJsonLines= new Option(PARTITION_JSON_LINES, false,
"Output json lines, by partition");
+ Option partitionJsonLines = new Option(PARTITION_JSON_LINES, false,
"Output json lines, by partition");
options.addOption(partitionJsonLines);
}
@@ -160,6 +165,11 @@ public class SSTableExport
System.out);
}
}
+ else if (cmd.hasOption(ENUMERATE_TOMBSTONES_OPTION))
+ {
+ final ISSTableScanner currentScanner = sstable.getScanner();
+ process(currentScanner, Util.iterToStream(currentScanner),
metadata);
+ }
else
{
IPartitioner partitioner = sstable.getPartitioner();
@@ -179,43 +189,9 @@ public class SSTableExport
{
currentScanner = sstable.getScanner();
}
- Stream<UnfilteredRowIterator> partitions =
Util.iterToStream(currentScanner).filter(i ->
- excludes.isEmpty() ||
!excludes.contains(metadata.partitionKeyType.getString(i.partitionKey().getKey()))
- );
- if (cmd.hasOption(DEBUG_OUTPUT_OPTION))
- {
- AtomicLong position = new AtomicLong();
- partitions.forEach(partition ->
- {
- position.set(currentScanner.getCurrentPosition());
-
- if (!partition.partitionLevelDeletion().isLive())
- {
- System.out.println("[" +
metadata.partitionKeyType.getString(partition.partitionKey().getKey()) + "]@" +
- position.get() + " " +
partition.partitionLevelDeletion());
- }
- if (!partition.staticRow().isEmpty())
- {
- System.out.println("[" +
metadata.partitionKeyType.getString(partition.partitionKey().getKey()) + "]@" +
- position.get() + " " +
partition.staticRow().toString(metadata, true));
- }
- partition.forEachRemaining(row ->
- {
- System.out.println(
- "[" +
metadata.partitionKeyType.getString(partition.partitionKey().getKey()) + "]@"
- + position.get() + " " + row.toString(metadata,
false, true));
- position.set(currentScanner.getCurrentPosition());
- });
- });
- }
- else if (cmd.hasOption(PARTITION_JSON_LINES))
- {
- JsonTransformer.toJsonLines(currentScanner, partitions,
cmd.hasOption(RAW_TIMESTAMPS), metadata, System.out);
- }
- else
- {
- JsonTransformer.toJson(currentScanner, partitions,
cmd.hasOption(RAW_TIMESTAMPS), metadata, System.out);
- }
+
+ Stream<UnfilteredRowIterator> partitions =
Util.iterToStream(currentScanner).filter(i -> excludes.isEmpty() ||
!excludes.contains(metadata.partitionKeyType.getString(i.partitionKey().getKey())));
+ process(currentScanner, partitions, metadata);
}
}
catch (IOException e)
@@ -226,6 +202,54 @@ public class SSTableExport
System.exit(0);
}
+ private static void process(ISSTableScanner scanner,
Stream<UnfilteredRowIterator> partitions, TableMetadata metadata) throws
IOException
+ {
+ long nowInSeconds = FBUtilities.nowInSeconds();
+ boolean hasTombstoneOption =
cmd.hasOption(ENUMERATE_TOMBSTONES_OPTION);
+
+ if (cmd.hasOption(DEBUG_OUTPUT_OPTION))
+ {
+ AtomicLong position = new AtomicLong();
+ partitions.forEach(partition ->
+ {
+ position.set(scanner.getCurrentPosition());
+
+ if (!partition.partitionLevelDeletion().isLive())
+ {
+ System.out.println('[' +
metadata.partitionKeyType.getString(partition.partitionKey().getKey()) + "]@" +
+ position.get() + ' ' +
partition.partitionLevelDeletion());
+ }
+ if (!partition.staticRow().isEmpty())
+ {
+ System.out.println('[' +
metadata.partitionKeyType.getString(partition.partitionKey().getKey()) + "]@" +
+ position.get() + ' ' +
partition.staticRow().toString(metadata, true));
+ }
+ partition.forEachRemaining(row ->
+ {
+ boolean shouldPrint = true;
+ if (hasTombstoneOption && row.isRow())
+ shouldPrint = ((Row) row).hasDeletion(nowInSeconds);
+
+ if (shouldPrint)
+ {
+ System.out.println('[' +
metadata.partitionKeyType.getString(partition.partitionKey().getKey()) + "]@"
+ + position.get() + ' ' +
row.toString(metadata, false, true));
+ }
+
+ position.set(scanner.getCurrentPosition());
+ });
+ });
+ }
+ else if (cmd.hasOption(PARTITION_JSON_LINES))
+ {
+ JsonTransformer.toJsonLines(scanner, partitions,
cmd.hasOption(RAW_TIMESTAMPS), hasTombstoneOption, metadata, nowInSeconds,
System.out);
+ }
+ else
+ {
+ JsonTransformer.toJson(scanner, partitions,
cmd.hasOption(RAW_TIMESTAMPS), hasTombstoneOption, metadata, nowInSeconds,
System.out);
+ }
+ }
+
private static void printUsage()
{
String usage = String.format("sstabledump <sstable file path>
<options>%n");
diff --git a/test/unit/org/apache/cassandra/tools/SSTableExportTest.java
b/test/unit/org/apache/cassandra/tools/SSTableExportTest.java
index 2a29664a9a..77144f14c4 100644
--- a/test/unit/org/apache/cassandra/tools/SSTableExportTest.java
+++ b/test/unit/org/apache/cassandra/tools/SSTableExportTest.java
@@ -60,9 +60,10 @@ public class SSTableExportTest extends OfflineToolUtils
String help = "usage: sstabledump <sstable file path> <options>\n" +
"Dump contents of given SSTable to standard output in
JSON format.\n" +
" -d CQL row per line internal
representation\n" +
- " -e enumerate partition keys only\n" +
+ " -e Enumerate partition keys only\n" +
" -k <arg> List of included partition keys\n" +
" -l Output json lines, by partition\n" +
+ " -o Enumerate tombstones only\n" +
" -t Print raw timestamps instead of iso8601
date strings\n" +
" -x <arg> List of excluded partition keys\n";
Assertions.assertThat(tool.getStdout()).isEqualTo(help);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]