Repository: cassandra Updated Branches: refs/heads/trunk 9e3483f84 -> 6dfd11c30
Improve sstablemetadata output Patch by Chris Lohfink; reviewed by marcuse for CASSANDRA-11483 Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/6dfd11c3 Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/6dfd11c3 Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/6dfd11c3 Branch: refs/heads/trunk Commit: 6dfd11c30a9c85581b77c93cfcdbef37a5d497c6 Parents: 9e3483f Author: Chris Lohfink <[email protected]> Authored: Fri Jul 21 20:04:37 2017 -0700 Committer: Marcus Eriksson <[email protected]> Committed: Tue Aug 8 13:57:43 2017 +0200 ---------------------------------------------------------------------- CHANGES.txt | 1 + .../apache/cassandra/tools/SSTableExport.java | 69 +-- .../cassandra/tools/SSTableMetadataViewer.java | 600 +++++++++++++++---- src/java/org/apache/cassandra/tools/Util.java | 282 +++++++++ .../cassandra/utils/EstimatedHistogram.java | 27 +- 5 files changed, 792 insertions(+), 187 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/cassandra/blob/6dfd11c3/CHANGES.txt ---------------------------------------------------------------------- diff --git a/CHANGES.txt b/CHANGES.txt index 69b8eaa..0ddcf94 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 4.0 + * Improve sstablemetadata output (CASSANDRA-11483) * Support for migrating legacy users to roles has been dropped (CASSANDRA-13371) * Introduce error metrics for repair (CASSANDRA-13387) * Refactoring to primitive functional interfaces in AuthCache (CASSANDRA-13732) http://git-wip-us.apache.org/repos/asf/cassandra/blob/6dfd11c3/src/java/org/apache/cassandra/tools/SSTableExport.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/tools/SSTableExport.java b/src/java/org/apache/cassandra/tools/SSTableExport.java index 5d22462..26dae7f 100644 --- a/src/java/org/apache/cassandra/tools/SSTableExport.java +++ b/src/java/org/apache/cassandra/tools/SSTableExport.java @@ -19,28 +19,33 @@ package org.apache.cassandra.tools; import java.io.File; import java.io.IOException; -import java.util.*; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; import java.util.concurrent.atomic.AtomicLong; import java.util.stream.Collectors; import java.util.stream.Stream; -import java.util.stream.StreamSupport; - -import org.apache.commons.cli.*; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.config.DatabaseDescriptor; -import org.apache.cassandra.cql3.ColumnIdentifier; import org.apache.cassandra.db.DecoratedKey; import org.apache.cassandra.db.PartitionPosition; -import org.apache.cassandra.db.SerializationHeader; -import org.apache.cassandra.db.marshal.UTF8Type; import org.apache.cassandra.db.rows.UnfilteredRowIterator; -import org.apache.cassandra.dht.*; +import org.apache.cassandra.dht.AbstractBounds; +import org.apache.cassandra.dht.Bounds; +import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.io.sstable.Descriptor; import org.apache.cassandra.io.sstable.ISSTableScanner; import org.apache.cassandra.io.sstable.KeyIterator; import org.apache.cassandra.io.sstable.format.SSTableReader; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.CommandLineParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; +import org.apache.commons.cli.PosixParser; import org.apache.cassandra.io.sstable.metadata.MetadataComponent; import org.apache.cassandra.io.sstable.metadata.MetadataType; import org.apache.cassandra.schema.TableMetadataRef; @@ -86,48 +91,6 @@ public class SSTableExport } /** - * Construct table schema from info stored in SSTable's Stats.db - * - * @param desc SSTable's descriptor - * @return Restored TableMetadata - * @throws IOException when Stats.db cannot be read - */ - public static TableMetadata metadataFromSSTable(Descriptor desc) throws IOException - { - if (!desc.version.isCompatible()) - throw new IOException("Cannot process old and unsupported SSTable version."); - - EnumSet<MetadataType> types = EnumSet.of(MetadataType.STATS, MetadataType.HEADER); - Map<MetadataType, MetadataComponent> sstableMetadata = desc.getMetadataSerializer().deserialize(desc, types); - SerializationHeader.Component header = (SerializationHeader.Component) sstableMetadata.get(MetadataType.HEADER); - IPartitioner partitioner = FBUtilities.newPartitioner(desc); - - TableMetadata.Builder builder = TableMetadata.builder("keyspace", "table").partitioner(partitioner); - header.getStaticColumns().entrySet().stream() - .forEach(entry -> { - ColumnIdentifier ident = ColumnIdentifier.getInterned(UTF8Type.instance.getString(entry.getKey()), true); - builder.addStaticColumn(ident, entry.getValue()); - }); - header.getRegularColumns().entrySet().stream() - .forEach(entry -> { - ColumnIdentifier ident = ColumnIdentifier.getInterned(UTF8Type.instance.getString(entry.getKey()), true); - builder.addRegularColumn(ident, entry.getValue()); - }); - builder.addPartitionKeyColumn("PartitionKey", header.getKeyType()); - for (int i = 0; i < header.getClusteringTypes().size(); i++) - { - builder.addClusteringColumn("clustering" + (i > 0 ? i : ""), header.getClusteringTypes().get(i)); - } - return builder.build(); - } - - private static <T> Stream<T> iterToStream(Iterator<T> iter) - { - Spliterator<T> splititer = Spliterators.spliteratorUnknownSize(iter, Spliterator.IMMUTABLE); - return StreamSupport.stream(splititer, false); - } - - /** * Given arguments specifying an SSTable, and optionally an output file, export the contents of the SSTable to JSON. * * @param args @@ -171,12 +134,12 @@ public class SSTableExport Descriptor desc = Descriptor.fromFilename(ssTableFileName); try { - TableMetadata metadata = metadataFromSSTable(desc); + TableMetadata metadata = Util.metadataFromSSTable(desc); if (cmd.hasOption(ENUMERATE_KEYS_OPTION)) { try (KeyIterator iter = new KeyIterator(desc, metadata)) { - JsonTransformer.keysToJson(null, iterToStream(iter), + JsonTransformer.keysToJson(null, Util.iterToStream(iter), cmd.hasOption(RAW_TIMESTAMPS), metadata, System.out); @@ -202,7 +165,7 @@ public class SSTableExport { currentScanner = sstable.getScanner(); } - Stream<UnfilteredRowIterator> partitions = iterToStream(currentScanner).filter(i -> + Stream<UnfilteredRowIterator> partitions = Util.iterToStream(currentScanner).filter(i -> excludes.isEmpty() || !excludes.contains(metadata.partitionKeyType.getString(i.partitionKey().getKey())) ); if (cmd.hasOption(DEBUG_OUTPUT_OPTION)) http://git-wip-us.apache.org/repos/asf/cassandra/blob/6dfd11c3/src/java/org/apache/cassandra/tools/SSTableMetadataViewer.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/tools/SSTableMetadataViewer.java b/src/java/org/apache/cassandra/tools/SSTableMetadataViewer.java index d240465..1ff2cb6 100755 --- a/src/java/org/apache/cassandra/tools/SSTableMetadataViewer.java +++ b/src/java/org/apache/cassandra/tools/SSTableMetadataViewer.java @@ -17,26 +17,50 @@ */ package org.apache.cassandra.tools; -import java.io.*; +import static org.apache.cassandra.tools.Util.BLUE; +import static org.apache.cassandra.tools.Util.CYAN; +import static org.apache.cassandra.tools.Util.RESET; +import static org.apache.cassandra.tools.Util.WHITE; + +import java.io.DataInputStream; +import java.io.File; +import java.io.IOException; +import java.io.PrintStream; +import java.io.PrintWriter; import java.nio.ByteBuffer; import java.nio.file.Files; import java.util.Arrays; +import java.util.Comparator; import java.util.EnumSet; import java.util.List; import java.util.Map; +import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; +import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.DecoratedKey; import org.apache.cassandra.db.SerializationHeader; import org.apache.cassandra.db.marshal.AbstractType; import org.apache.cassandra.db.marshal.UTF8Type; import org.apache.cassandra.db.rows.EncodingStats; +import org.apache.cassandra.db.rows.Row; +import org.apache.cassandra.db.rows.Unfiltered; +import org.apache.cassandra.db.rows.UnfilteredRowIterator; import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.io.compress.CompressionMetadata; import org.apache.cassandra.io.sstable.Component; import org.apache.cassandra.io.sstable.Descriptor; +import org.apache.cassandra.io.sstable.ISSTableScanner; import org.apache.cassandra.io.sstable.IndexSummary; -import org.apache.cassandra.io.sstable.metadata.*; +import org.apache.cassandra.io.sstable.format.SSTableReader; +import org.apache.cassandra.io.sstable.metadata.CompactionMetadata; +import org.apache.cassandra.io.sstable.metadata.MetadataComponent; +import org.apache.cassandra.io.sstable.metadata.MetadataType; +import org.apache.cassandra.io.sstable.metadata.StatsMetadata; +import org.apache.cassandra.io.sstable.metadata.ValidationMetadata; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.schema.TableMetadataRef; +import org.apache.cassandra.tools.Util.TermHistogram; import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.Pair; import org.apache.commons.cli.CommandLine; @@ -46,169 +70,411 @@ import org.apache.commons.cli.Option; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; import org.apache.commons.cli.PosixParser; +import org.joda.time.Duration; +import org.joda.time.format.PeriodFormat; + +import com.google.common.collect.MinMaxPriorityQueue; /** * Shows the contents of sstable metadata */ public class SSTableMetadataViewer { - private static final String GCGS_KEY = "gc_grace_seconds"; + private static final Options options = new Options(); + private static CommandLine cmd; + private static final String COLORS = "c"; + private static final String UNICODE = "u"; + private static final String GCGS_KEY = "g"; + private static final String TIMESTAMP_UNIT = "t"; + private static final String SCAN = "s"; + private static Comparator<ValuedByteBuffer> VCOMP = Comparator.comparingLong(ValuedByteBuffer::getValue).reversed(); - /** - * @param args a list of sstables whose metadata we're interested in - */ - public static void main(String[] args) throws IOException + static { - PrintStream out = System.out; - Option optGcgs = new Option(null, GCGS_KEY, true, "The "+GCGS_KEY+" to use when calculating droppable tombstones"); + DatabaseDescriptor.clientInitialization(); + } - Options options = new Options(); - options.addOption(optGcgs); - CommandLine cmd = null; - CommandLineParser parser = new PosixParser(); - try + boolean color; + boolean unicode; + int gc; + PrintStream out; + String[] files; + TimeUnit tsUnit; + + public SSTableMetadataViewer() + { + this(true, true, 0, TimeUnit.MICROSECONDS, System.out); + } + + public SSTableMetadataViewer(boolean color, boolean unicode, int gc, TimeUnit tsUnit, PrintStream out) + { + this.color = color; + this.tsUnit = tsUnit; + this.unicode = unicode; + this.out = out; + this.gc = gc; + } + + public static String deletion(long time) + { + if (time == 0 || time == Integer.MAX_VALUE) { - cmd = parser.parse(options, args); + return "no tombstones"; } - catch (ParseException e) + return toDateString(time, TimeUnit.SECONDS); + } + + public static String toDateString(long time, TimeUnit unit) + { + if (time == 0) { - printHelp(options, out); + return null; } + return new java.text.SimpleDateFormat("MM/dd/yyyy HH:mm:ss").format(new java.util.Date(unit.toMillis(time))); + } - if (cmd.getArgs().length == 0) + public static String toDurationString(long duration, TimeUnit unit) + { + if (duration == 0) + { + return null; + } + else if (duration == Integer.MAX_VALUE) { - printHelp(options, out); + return "never"; } - int gcgs = Integer.parseInt(cmd.getOptionValue(GCGS_KEY, "0")); - Util.initDatabaseDescriptor(); + return PeriodFormat.getDefault().print(new Duration(unit.toMillis(duration)).toPeriod()); + } - for (String fname : cmd.getArgs()) + public static String toByteString(long bytes) + { + if (bytes == 0) + return null; + else if (bytes < 1024) + return bytes + " B"; + + int exp = (int) (Math.log(bytes) / Math.log(1024)); + char pre = "kMGTP".charAt(exp - 1); + return String.format("%.1f %sB", bytes / Math.pow(1024, exp), pre); + } + + public String scannedOverviewOutput(String key, long value) + { + StringBuilder sb = new StringBuilder(); + if (color) sb.append(CYAN); + sb.append('['); + if (color) sb.append(RESET); + sb.append(key); + if (color) sb.append(CYAN); + sb.append("] "); + if (color) sb.append(RESET); + sb.append(value); + return sb.toString(); + } + + private void printScannedOverview(Descriptor descriptor, StatsMetadata stats) throws IOException + { + TableMetadata cfm = Util.metadataFromSSTable(descriptor); + SSTableReader reader = SSTableReader.openNoValidation(descriptor, TableMetadataRef.forOfflineTools(cfm)); + try (ISSTableScanner scanner = reader.getScanner()) { - if (new File(fname).exists()) + long bytes = scanner.getLengthInBytes(); + MinMaxPriorityQueue<ValuedByteBuffer> widestPartitions = MinMaxPriorityQueue + .orderedBy(VCOMP) + .maximumSize(5) + .create(); + MinMaxPriorityQueue<ValuedByteBuffer> largestPartitions = MinMaxPriorityQueue + .orderedBy(VCOMP) + .maximumSize(5) + .create(); + MinMaxPriorityQueue<ValuedByteBuffer> mostTombstones = MinMaxPriorityQueue + .orderedBy(VCOMP) + .maximumSize(5) + .create(); + long partitionCount = 0; + long rowCount = 0; + long tombstoneCount = 0; + long cellCount = 0; + double totalCells = stats.totalColumnsSet; + int lastPercent = 0; + long lastPercentTime = 0; + while (scanner.hasNext()) { - Descriptor descriptor = Descriptor.fromFilename(fname); - Map<MetadataType, MetadataComponent> metadata = descriptor.getMetadataSerializer().deserialize(descriptor, EnumSet.allOf(MetadataType.class)); - ValidationMetadata validation = (ValidationMetadata) metadata.get(MetadataType.VALIDATION); - StatsMetadata stats = (StatsMetadata) metadata.get(MetadataType.STATS); - CompactionMetadata compaction = (CompactionMetadata) metadata.get(MetadataType.COMPACTION); - CompressionMetadata compression = null; - File compressionFile = new File(descriptor.filenameFor(Component.COMPRESSION_INFO)); - if (compressionFile.exists()) - compression = CompressionMetadata.create(fname); - SerializationHeader.Component header = (SerializationHeader.Component) metadata.get(MetadataType.HEADER); - - out.printf("SSTable: %s%n", descriptor); - if (validation != null) - { - out.printf("Partitioner: %s%n", validation.partitioner); - out.printf("Bloom Filter FP chance: %f%n", validation.bloomFilterFPChance); - } - if (stats != null) + try (UnfilteredRowIterator partition = scanner.next()) { - out.printf("Minimum timestamp: %s%n", stats.minTimestamp); - out.printf("Maximum timestamp: %s%n", stats.maxTimestamp); - out.printf("SSTable min local deletion time: %s%n", stats.minLocalDeletionTime); - out.printf("SSTable max local deletion time: %s%n", stats.maxLocalDeletionTime); - out.printf("Compressor: %s%n", compression != null ? compression.compressor().getClass().getName() : "-"); - if (compression != null) - out.printf("Compression ratio: %s%n", stats.compressionRatio); - out.printf("TTL min: %s%n", stats.minTTL); - out.printf("TTL max: %s%n", stats.maxTTL); - - if (validation != null && header != null) - printMinMaxToken(descriptor, FBUtilities.newPartitioner(descriptor), header.getKeyType(), out); - - if (header != null && header.getClusteringTypes().size() == stats.minClusteringValues.size()) + + long psize = 0; + long pcount = 0; + int ptombcount = 0; + partitionCount++; + if (!partition.staticRow().isEmpty()) + { + rowCount++; + pcount++; + psize += partition.staticRow().dataSize(); + } + if (!partition.partitionLevelDeletion().isLive()) { - List<AbstractType<?>> clusteringTypes = header.getClusteringTypes(); - List<ByteBuffer> minClusteringValues = stats.minClusteringValues; - List<ByteBuffer> maxClusteringValues = stats.maxClusteringValues; - String[] minValues = new String[clusteringTypes.size()]; - String[] maxValues = new String[clusteringTypes.size()]; - for (int i = 0; i < clusteringTypes.size(); i++) + tombstoneCount++; + ptombcount++; + } + while (partition.hasNext()) + { + Unfiltered unfiltered = partition.next(); + switch (unfiltered.kind()) { - minValues[i] = clusteringTypes.get(i).getString(minClusteringValues.get(i)); - maxValues[i] = clusteringTypes.get(i).getString(maxClusteringValues.get(i)); + case ROW: + rowCount++; + Row row = (Row) unfiltered; + psize += row.dataSize(); + pcount++; + for (org.apache.cassandra.db.rows.Cell cell : row.cells()) + { + cellCount++; + double percentComplete = Math.min(1.0, cellCount / totalCells); + if (lastPercent != (int) (percentComplete * 100) && + (System.currentTimeMillis() - lastPercentTime) > 1000) + { + lastPercentTime = System.currentTimeMillis(); + lastPercent = (int) (percentComplete * 100); + if (color) + out.printf("\r%sAnalyzing SSTable... %s%s %s(%%%s)", BLUE, CYAN, + Util.progress(percentComplete, 30, unicode), + RESET, + (int) (percentComplete * 100)); + else + out.printf("\rAnalyzing SSTable... %s (%%%s)", + Util.progress(percentComplete, 30, unicode), + (int) (percentComplete * 100)); + out.flush(); + } + if (cell.isTombstone()) + { + tombstoneCount++; + ptombcount++; + } + } + break; + case RANGE_TOMBSTONE_MARKER: + tombstoneCount++; + ptombcount++; + break; } - out.printf("minClusteringValues: %s%n", Arrays.toString(minValues)); - out.printf("maxClusteringValues: %s%n", Arrays.toString(maxValues)); } - out.printf("Estimated droppable tombstones: %s%n", stats.getEstimatedDroppableTombstoneRatio((int) (System.currentTimeMillis() / 1000) - gcgs)); - out.printf("SSTable Level: %d%n", stats.sstableLevel); - out.printf("Repaired at: %d%n", stats.repairedAt); - out.printf("Pending repair: %s%n", stats.pendingRepair); - out.printf("Replay positions covered: %s%n", stats.commitLogIntervals); - out.printf("totalColumnsSet: %s%n", stats.totalColumnsSet); - out.printf("totalRows: %s%n", stats.totalRows); - out.println("Estimated tombstone drop times:"); - - stats.estimatedTombstoneDropTime.forEach((point, value) -> { - out.printf("%-10s:%10s%n", point, value); - }); - printHistograms(stats, out); - } - if (compaction != null) - { - out.printf("Estimated cardinality: %s%n", compaction.cardinalityEstimator.cardinality()); - } - if (header != null) - { - EncodingStats encodingStats = header.getEncodingStats(); - AbstractType<?> keyType = header.getKeyType(); - List<AbstractType<?>> clusteringTypes = header.getClusteringTypes(); - Map<ByteBuffer, AbstractType<?>> staticColumns = header.getStaticColumns(); - Map<String, String> statics = staticColumns.entrySet().stream() - .collect(Collectors.toMap( - e -> UTF8Type.instance.getString(e.getKey()), - e -> e.getValue().toString())); - Map<ByteBuffer, AbstractType<?>> regularColumns = header.getRegularColumns(); - Map<String, String> regulars = regularColumns.entrySet().stream() - .collect(Collectors.toMap( - e -> UTF8Type.instance.getString(e.getKey()), - e -> e.getValue().toString())); - - out.printf("EncodingStats minTTL: %s%n", encodingStats.minTTL); - out.printf("EncodingStats minLocalDeletionTime: %s%n", encodingStats.minLocalDeletionTime); - out.printf("EncodingStats minTimestamp: %s%n", encodingStats.minTimestamp); - out.printf("KeyType: %s%n", keyType.toString()); - out.printf("ClusteringTypes: %s%n", clusteringTypes.toString()); - out.printf("StaticColumns: {%s}%n", FBUtilities.toString(statics)); - out.printf("RegularColumns: {%s}%n", FBUtilities.toString(regulars)); + + widestPartitions.add(new ValuedByteBuffer(partition.partitionKey().getKey(), pcount)); + largestPartitions.add(new ValuedByteBuffer(partition.partitionKey().getKey(), psize)); + mostTombstones.add(new ValuedByteBuffer(partition.partitionKey().getKey(), ptombcount)); } } - else + + out.printf("\r%80s\r", " "); + field("Size", bytes); + field("Partitions", partitionCount); + field("Rows", rowCount); + field("Tombstones", tombstoneCount); + field("Cells", cellCount); + field("Widest Partitions", ""); + Util.iterToStream(widestPartitions.iterator()).sorted(VCOMP).forEach(p -> + { + out.println(" " + scannedOverviewOutput(cfm.partitionKeyType.getString(p.buffer), p.value)); + }); + field("Largest Partitions", ""); + Util.iterToStream(largestPartitions.iterator()).sorted(VCOMP).forEach(p -> + { + out.print(" "); + out.print(scannedOverviewOutput(cfm.partitionKeyType.getString(p.buffer), p.value)); + if (color) + out.print(WHITE); + out.print(" ("); + out.print(toByteString(p.value)); + out.print(")"); + if (color) + out.print(RESET); + out.println(); + }); + StringBuilder tleaders = new StringBuilder(); + Util.iterToStream(mostTombstones.iterator()).sorted(VCOMP).forEach(p -> + { + if (p.value > 0) + { + tleaders.append(" "); + tleaders.append(scannedOverviewOutput(cfm.partitionKeyType.getString(p.buffer), p.value)); + tleaders.append(System.lineSeparator()); + } + }); + String tombstoneLeaders = tleaders.toString(); + if (tombstoneLeaders.length() > 10) { - out.println("No such file: " + fname); + field("Tombstone Leaders", ""); + out.print(tombstoneLeaders); } } + finally + { + reader.selfRef().ensureReleased(); + } } - private static void printHelp(Options options, PrintStream out) + private void printSStableMetadata(String fname, boolean scan) throws IOException { - out.println(); - new HelpFormatter().printHelp("Usage: sstablemetadata [--"+GCGS_KEY+" n] <sstable filenames>", "Dump contents of given SSTable to standard output in JSON format.", options, ""); - System.exit(1); + Descriptor descriptor = Descriptor.fromFilename(fname); + Map<MetadataType, MetadataComponent> metadata = descriptor.getMetadataSerializer() + .deserialize(descriptor, EnumSet.allOf(MetadataType.class)); + ValidationMetadata validation = (ValidationMetadata) metadata.get(MetadataType.VALIDATION); + StatsMetadata stats = (StatsMetadata) metadata.get(MetadataType.STATS); + CompactionMetadata compaction = (CompactionMetadata) metadata.get(MetadataType.COMPACTION); + CompressionMetadata compression = null; + File compressionFile = new File(descriptor.filenameFor(Component.COMPRESSION_INFO)); + if (compressionFile.exists()) + compression = CompressionMetadata.create(fname); + SerializationHeader.Component header = (SerializationHeader.Component) metadata + .get(MetadataType.HEADER); + + field("SSTable", descriptor); + if (scan && descriptor.version.getVersion().compareTo("ma") >= 0) + { + printScannedOverview(descriptor, stats); + } + if (validation != null) + { + field("Partitioner", validation.partitioner); + field("Bloom Filter FP chance", validation.bloomFilterFPChance); + } + if (stats != null) + { + field("Minimum timestamp", stats.minTimestamp, toDateString(stats.minTimestamp, tsUnit)); + field("Maximum timestamp", stats.maxTimestamp, toDateString(stats.maxTimestamp, tsUnit)); + field("SSTable min local deletion time", stats.minLocalDeletionTime, deletion(stats.minLocalDeletionTime)); + field("SSTable max local deletion time", stats.maxLocalDeletionTime, deletion(stats.maxLocalDeletionTime)); + field("Compressor", compression != null ? compression.compressor().getClass().getName() : "-"); + if (compression != null) + field("Compression ratio", stats.compressionRatio); + field("TTL min", stats.minTTL, toDurationString(stats.minTTL, TimeUnit.SECONDS)); + field("TTL max", stats.maxTTL, toDurationString(stats.maxTTL, TimeUnit.SECONDS)); + + if (validation != null && header != null) + printMinMaxToken(descriptor, FBUtilities.newPartitioner(descriptor), header.getKeyType()); + + if (header != null && header.getClusteringTypes().size() == stats.minClusteringValues.size()) + { + List<AbstractType<?>> clusteringTypes = header.getClusteringTypes(); + List<ByteBuffer> minClusteringValues = stats.minClusteringValues; + List<ByteBuffer> maxClusteringValues = stats.maxClusteringValues; + String[] minValues = new String[clusteringTypes.size()]; + String[] maxValues = new String[clusteringTypes.size()]; + for (int i = 0; i < clusteringTypes.size(); i++) + { + minValues[i] = clusteringTypes.get(i).getString(minClusteringValues.get(i)); + maxValues[i] = clusteringTypes.get(i).getString(maxClusteringValues.get(i)); + } + field("minClusteringValues", Arrays.toString(minValues)); + field("maxClusteringValues", Arrays.toString(maxValues)); + } + field("Estimated droppable tombstones", + stats.getEstimatedDroppableTombstoneRatio((int) (System.currentTimeMillis() / 1000) - this.gc)); + field("SSTable Level", stats.sstableLevel); + field("Repaired at", stats.repairedAt, toDateString(stats.repairedAt, TimeUnit.MILLISECONDS)); + field("Pending repair", stats.pendingRepair); + field("Replay positions covered", stats.commitLogIntervals); + field("totalColumnsSet", stats.totalColumnsSet); + field("totalRows", stats.totalRows); + field("Estimated tombstone drop times", ""); + + TermHistogram estDropped = new TermHistogram(stats.estimatedTombstoneDropTime, + "Drop Time", + offset -> String.format("%d %s", + offset, + Util.wrapQuiet(toDateString(offset, TimeUnit.SECONDS), + color)), + Object::toString); + estDropped.printHistogram(out, color, unicode); + field("Partition Size", ""); + TermHistogram rowSize = new TermHistogram(stats.estimatedPartitionSize, + "Size (bytes)", + offset -> String.format("%d %s", + offset, + Util.wrapQuiet(toByteString(offset), color)), + Object::toString); + rowSize.printHistogram(out, color, unicode); + field("Column Count", ""); + TermHistogram cellCount = new TermHistogram(stats.estimatedColumnCount, + "Columns", + Object::toString, + Object::toString); + cellCount.printHistogram(out, color, unicode); + } + if (compaction != null) + { + field("Estimated cardinality", compaction.cardinalityEstimator.cardinality()); + } + if (header != null) + { + EncodingStats encodingStats = header.getEncodingStats(); + AbstractType<?> keyType = header.getKeyType(); + List<AbstractType<?>> clusteringTypes = header.getClusteringTypes(); + Map<ByteBuffer, AbstractType<?>> staticColumns = header.getStaticColumns(); + Map<String, String> statics = staticColumns.entrySet().stream() + .collect(Collectors.toMap(e -> UTF8Type.instance.getString(e.getKey()), + e -> e.getValue().toString())); + Map<ByteBuffer, AbstractType<?>> regularColumns = header.getRegularColumns(); + Map<String, String> regulars = regularColumns.entrySet().stream() + .collect(Collectors.toMap(e -> UTF8Type.instance.getString(e.getKey()), + e -> e.getValue().toString())); + + field("EncodingStats minTTL", encodingStats.minTTL, + toDurationString(encodingStats.minTTL, TimeUnit.SECONDS)); + field("EncodingStats minLocalDeletionTime", encodingStats.minLocalDeletionTime, + toDateString(encodingStats.minLocalDeletionTime, TimeUnit.MILLISECONDS)); + field("EncodingStats minTimestamp", encodingStats.minTimestamp, + toDateString(encodingStats.minTimestamp, tsUnit)); + field("KeyType", keyType.toString()); + field("ClusteringTypes", clusteringTypes.toString()); + field("StaticColumns", FBUtilities.toString(statics)); + field("RegularColumns", FBUtilities.toString(regulars)); + } + } + + private void field(String field, Object value) + { + field(field, value, null); } - private static void printHistograms(StatsMetadata metadata, PrintStream out) + private void field(String field, Object value, String comment) { - long[] offsets = metadata.estimatedPartitionSize.getBucketOffsets(); - long[] ersh = metadata.estimatedPartitionSize.getBuckets(false); - long[] ecch = metadata.estimatedColumnCount.getBuckets(false); + StringBuilder sb = new StringBuilder(); + if (color) sb.append(BLUE); + sb.append(field); + if (color) sb.append(CYAN); + sb.append(": "); + if (color) sb.append(RESET); + sb.append(value == null? "--" : value.toString()); - out.println(String.format("%-10s%18s%18s", - "Count", "Row Size", "Cell Count")); + if (comment != null) + { + if (color) sb.append(WHITE); + sb.append(" ("); + sb.append(comment); + sb.append(")"); + if (color) sb.append(RESET); + } + this.out.println(sb.toString()); + } - for (int i = 0; i < offsets.length; i++) + private static void printUsage() + { + try (PrintWriter errWriter = new PrintWriter(System.err, true)) { - out.println(String.format("%-10d%18s%18s", - offsets[i], - (i < ersh.length ? ersh[i] : ""), - (i < ecch.length ? ecch[i] : ""))); + HelpFormatter formatter = new HelpFormatter(); + formatter.printHelp(errWriter, 120, "sstablemetadata <options> <sstable...>", + String.format("%nDump information about SSTable[s] for Apache Cassandra 3.x%nOptions:"), + options, 2, 1, "", true); + errWriter.println(); } } - private static void printMinMaxToken(Descriptor descriptor, IPartitioner partitioner, AbstractType<?> keyType, PrintStream out) throws IOException + private void printMinMaxToken(Descriptor descriptor, IPartitioner partitioner, AbstractType<?> keyType) + throws IOException { File summariesFile = new File(descriptor.filenameFor(Component.SUMMARY)); if (!summariesFile.exists()) @@ -216,10 +482,90 @@ public class SSTableMetadataViewer try (DataInputStream iStream = new DataInputStream(Files.newInputStream(summariesFile.toPath()))) { - Pair<DecoratedKey, DecoratedKey> firstLast = new IndexSummary.IndexSummarySerializer().deserializeFirstLastKey(iStream, partitioner); - out.printf("First token: %s (key=%s)%n", firstLast.left.getToken(), keyType.getString(firstLast.left.getKey())); - out.printf("Last token: %s (key=%s)%n", firstLast.right.getToken(), keyType.getString(firstLast.right.getKey())); + Pair<DecoratedKey, DecoratedKey> firstLast = new IndexSummary.IndexSummarySerializer() + .deserializeFirstLastKey(iStream, partitioner); + field("First token", firstLast.left.getToken(), keyType.getString(firstLast.left.getKey())); + field("Last token", firstLast.right.getToken(), keyType.getString(firstLast.right.getKey())); } } + /** + * @param args + * a list of sstables whose metadata we're interested in + */ + public static void main(String[] args) throws IOException + { + CommandLineParser parser = new PosixParser(); + + Option disableColors = new Option(COLORS, "colors", false, "Use ANSI color sequences"); + disableColors.setOptionalArg(true); + options.addOption(disableColors); + Option unicode = new Option(UNICODE, "unicode", false, "Use unicode to draw histograms and progress bars"); + unicode.setOptionalArg(true); + + options.addOption(unicode); + Option gcgs = new Option(GCGS_KEY, "gc_grace_seconds", true, "Time to use when calculating droppable tombstones"); + gcgs.setOptionalArg(true); + options.addOption(gcgs); + Option tsUnit = new Option(TIMESTAMP_UNIT, "timestamp_unit", true, "Time unit that cell timestamps are written with"); + tsUnit.setOptionalArg(true); + options.addOption(tsUnit); + + Option scanEnabled = new Option(SCAN, "scan", false, + "Full sstable scan for additional details. Only available in 3.0+ sstables. Defaults: false"); + scanEnabled.setOptionalArg(true); + options.addOption(scanEnabled); + try + { + cmd = parser.parse(options, args); + } + catch (ParseException e1) + { + System.err.println(e1.getMessage()); + printUsage(); + System.exit(1); + } + + if (cmd.getArgs().length < 1) + { + System.err.println("You must supply at least one sstable"); + printUsage(); + System.exit(1); + } + boolean enabledColors = cmd.hasOption(COLORS); + boolean enabledUnicode = cmd.hasOption(UNICODE); + boolean fullScan = cmd.hasOption(SCAN); + int gc = Integer.parseInt(cmd.getOptionValue(GCGS_KEY, "0")); + TimeUnit ts = TimeUnit.valueOf(cmd.getOptionValue(TIMESTAMP_UNIT, "MICROSECONDS")); + SSTableMetadataViewer metawriter = new SSTableMetadataViewer(enabledColors, enabledUnicode, gc, ts, System.out); + for (String fname : cmd.getArgs()) + { + File sstable = new File(fname); + if (sstable.exists()) + { + metawriter.printSStableMetadata(sstable.getAbsolutePath(), fullScan); + } + else + { + System.out.println("No such file: " + fname); + } + } + } + + private static class ValuedByteBuffer + { + public long value; + public ByteBuffer buffer; + + public ValuedByteBuffer(ByteBuffer buffer, long value) + { + this.value = value; + this.buffer = buffer; + } + + public long getValue() + { + return value; + } + } } http://git-wip-us.apache.org/repos/asf/cassandra/blob/6dfd11c3/src/java/org/apache/cassandra/tools/Util.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/tools/Util.java b/src/java/org/apache/cassandra/tools/Util.java index 76011a9..7a6f698 100644 --- a/src/java/org/apache/cassandra/tools/Util.java +++ b/src/java/org/apache/cassandra/tools/Util.java @@ -18,11 +18,250 @@ package org.apache.cassandra.tools; +import static java.lang.String.format; + +import java.io.IOException; +import java.io.PrintStream; +import java.util.EnumSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Spliterator; +import java.util.Spliterators; +import java.util.TreeMap; +import java.util.Map.Entry; +import java.util.function.Function; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; + import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.cql3.ColumnIdentifier; +import org.apache.cassandra.db.SerializationHeader; +import org.apache.cassandra.db.marshal.UTF8Type; +import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.exceptions.ConfigurationException; +import org.apache.cassandra.io.sstable.Descriptor; +import org.apache.cassandra.io.sstable.metadata.MetadataComponent; +import org.apache.cassandra.io.sstable.metadata.MetadataType; +import org.apache.cassandra.schema.TableMetadata; +import org.apache.cassandra.utils.EstimatedHistogram; +import org.apache.cassandra.utils.FBUtilities; +import org.apache.cassandra.utils.streamhist.TombstoneHistogram; + +import com.google.common.base.Strings; +import com.google.common.collect.Lists; +@SuppressWarnings("serial") public final class Util { + static final String RESET = "\u001B[0m"; + static final String BLUE = "\u001B[34m"; + static final String CYAN = "\u001B[36m"; + static final String WHITE = "\u001B[37m"; + private static final List<String> ANSI_COLORS = Lists.newArrayList(RESET, BLUE, CYAN, WHITE); + + private static final String FULL_BAR_UNICODE = Strings.repeat("\u2593", 30); + private static final String EMPTY_BAR_UNICODE = Strings.repeat("\u2591", 30); + private static final String FULL_BAR_ASCII = Strings.repeat("#", 30); + private static final String EMPTY_BAR_ASCII = Strings.repeat("-", 30); + + private static final TreeMap<Double, String> BARS_UNICODE = new TreeMap<Double, String>() + {{ + this.put(1.0, "\u2589"); // full, actually using 7/8th for bad font impls of fullblock + this.put(7.0 / 8.0, "\u2589"); // 7/8ths left block + this.put(3.0 / 4.0, "\u258A"); // 3/4th block + this.put(5.0 / 8.0, "\u258B"); // 5/8th + this.put(3.0 / 8.0, "\u258D"); // three eighths, skips 1/2 due to font inconsistencies + this.put(1.0 / 4.0, "\u258E"); // 1/4th + this.put(1.0 / 8.0, "\u258F"); // 1/8th + }}; + + private static final TreeMap<Double, String> BARS_ASCII = new TreeMap<Double, String>() + {{ + this.put(1.00, "O"); + this.put(0.75, "o"); + this.put(0.30, "."); + }}; + + private static TreeMap<Double, String> barmap(boolean unicode) + { + return unicode ? BARS_UNICODE : BARS_ASCII; + } + + public static String progress(double percentComplete, int width, boolean unicode) + { + assert percentComplete >= 0 && percentComplete <= 1; + int cols = (int) (percentComplete * width); + return (unicode ? FULL_BAR_UNICODE : FULL_BAR_ASCII).substring(width - cols) + + (unicode ? EMPTY_BAR_UNICODE : EMPTY_BAR_ASCII ).substring(cols); + } + + public static String stripANSI(String string) + { + return ANSI_COLORS.stream().reduce(string, (a, b) -> a.replace(b, "")); + } + + public static int countANSI(String string) + { + return string.length() - stripANSI(string).length(); + } + + public static String wrapQuiet(String toWrap, boolean color) + { + if (Strings.isNullOrEmpty(toWrap)) + { + return ""; + } + StringBuilder sb = new StringBuilder(); + if (color) sb.append(WHITE); + sb.append("("); + sb.append(toWrap); + sb.append(")"); + if (color) sb.append(RESET); + return sb.toString(); + } + + public static class TermHistogram + { + public long max; + public long min; + public double sum; + int maxCountLength = 5; + int maxOffsetLength = 5; + Map<? extends Number, Long> histogram; + Function<Long, String> offsetName; + Function<Long, String> countName; + String title; + + public TermHistogram(Map<? extends Number, Long> histogram, + String title, + Function<Long, String> offsetName, + Function<Long, String> countName) + { + this.offsetName = offsetName; + this.countName = countName; + this.histogram = histogram; + this.title = title; + maxOffsetLength = title.length(); + histogram.entrySet().stream().forEach(e -> + { + max = Math.max(max, e.getValue()); + min = Math.min(min, e.getValue()); + sum += e.getValue(); + // find max width, but remove ansi sequences first + maxCountLength = Math.max(maxCountLength, stripANSI(countName.apply(e.getValue())).length()); + maxOffsetLength = Math.max(maxOffsetLength, stripANSI(offsetName.apply(e.getKey().longValue())).length()); + }); + } + + public TermHistogram(TombstoneHistogram histogram, + String title, + Function<Long, String> offsetName, + Function<Long, String> countName) + { + this(new TreeMap<Number, Long>() + { + { + histogram.forEach((point, value) -> { + this.put(point, (long) value); + }); + } + }, title, offsetName, countName); + } + + public TermHistogram(EstimatedHistogram histogram, + String title, + Function<Long, String> offsetName, + Function<Long, String> countName) + { + this(new TreeMap<Number, Long>() + { + { + long[] counts = histogram.getBuckets(false); + long[] offsets = histogram.getBucketOffsets(); + for (int i = 0; i < counts.length; i++) + { + long e = counts[i]; + if (e > 0) + { + put(offsets[i], e); + } + } + } + }, title, offsetName, countName); + } + + public String bar(long count, int length, String color, boolean unicode) + { + if (color == null) color = ""; + StringBuilder sb = new StringBuilder(color); + long barVal = count; + int intWidth = (int) (barVal * 1.0 / max * length); + double remainderWidth = (barVal * 1.0 / max * length) - intWidth; + sb.append(Strings.repeat(barmap(unicode).get(1.0), intWidth)); + + if (barmap(unicode).floorKey(remainderWidth) != null) + sb.append(barmap(unicode).get(barmap(unicode).floorKey(remainderWidth))); + + if(!Strings.isNullOrEmpty(color)) + sb.append(RESET); + + return sb.toString(); + } + + public void printHistogram(PrintStream out, boolean color, boolean unicode) + { + // String.format includes ansi sequences in the count, so need to modify the lengths + int offsetTitleLength = color ? maxOffsetLength + BLUE.length() : maxOffsetLength; + out.printf(" %-" + offsetTitleLength + "s %s %-" + maxCountLength + "s %s %sHistogram%s %n", + color ? BLUE + title : title, + color ? CYAN + "|" + BLUE : "|", + "Count", + wrapQuiet("%", color), + color ? BLUE : "", + color ? RESET : ""); + histogram.entrySet().stream().forEach(e -> + { + String offset = offsetName.apply(e.getKey().longValue()); + long count = e.getValue(); + String histo = bar(count, 30, color? WHITE : null, unicode); + int mol = color ? maxOffsetLength + countANSI(offset) : maxOffsetLength; + int mcl = color ? maxCountLength + countANSI(countName.apply(count)) : maxCountLength; + out.printf(" %-" + mol + "s %s %" + mcl + "s %s %s%n", + offset, + color ? CYAN + "|" + RESET : "|", + countName.apply(count), + wrapQuiet(String.format("%3s", (int) (100 * ((double) count / sum))), color), + histo); + }); + EstimatedHistogram eh = new EstimatedHistogram(165); + for (Entry<? extends Number, Long> e : histogram.entrySet()) + { + eh.add(e.getKey().longValue(), e.getValue()); + } + String[] percentiles = new String[]{"50th", "75th", "95th", "98th", "99th", "Min", "Max"}; + long[] data = new long[] + { + eh.percentile(.5), + eh.percentile(.75), + eh.percentile(.95), + eh.percentile(.98), + eh.percentile(.99), + eh.min(), + eh.max(), + }; + out.println((color ? BLUE : "") + " Percentiles" + (color ? RESET : "")); + + for (int i = 0; i < percentiles.length; i++) + { + out.println(format(" %s%-10s%s%s", + (color ? BLUE : ""), + percentiles[i], + (color ? RESET : ""), + offsetName.apply(data[i]))); + } + } + } private Util() { } @@ -54,4 +293,47 @@ public final class Util } } } + + public static <T> Stream<T> iterToStream(Iterator<T> iter) + { + Spliterator<T> splititer = Spliterators.spliteratorUnknownSize(iter, Spliterator.IMMUTABLE); + return StreamSupport.stream(splititer, false); + } + + /** + * Construct table schema from info stored in SSTable's Stats.db + * + * @param desc SSTable's descriptor + * @return Restored CFMetaData + * @throws IOException when Stats.db cannot be read + */ + public static TableMetadata metadataFromSSTable(Descriptor desc) throws IOException + { + if (desc.version.getVersion().compareTo("ma") < 0) + throw new IOException("pre-3.0 SSTable is not supported."); + + EnumSet<MetadataType> types = EnumSet.of(MetadataType.STATS, MetadataType.HEADER); + Map<MetadataType, MetadataComponent> sstableMetadata = desc.getMetadataSerializer().deserialize(desc, types); + SerializationHeader.Component header = (SerializationHeader.Component) sstableMetadata.get(MetadataType.HEADER); + + IPartitioner partitioner = FBUtilities.newPartitioner(desc); + + TableMetadata.Builder builder = TableMetadata.builder("keyspace", "table").partitioner(partitioner); + header.getStaticColumns().entrySet().stream() + .forEach(entry -> { + ColumnIdentifier ident = ColumnIdentifier.getInterned(UTF8Type.instance.getString(entry.getKey()), true); + builder.addStaticColumn(ident, entry.getValue()); + }); + header.getRegularColumns().entrySet().stream() + .forEach(entry -> { + ColumnIdentifier ident = ColumnIdentifier.getInterned(UTF8Type.instance.getString(entry.getKey()), true); + builder.addRegularColumn(ident, entry.getValue()); + }); + builder.addPartitionKeyColumn("PartitionKey", header.getKeyType()); + for (int i = 0; i < header.getClusteringTypes().size(); i++) + { + builder.addClusteringColumn("clustering" + (i > 0 ? i : ""), header.getClusteringTypes().get(i)); + } + return builder.build(); + } } http://git-wip-us.apache.org/repos/asf/cassandra/blob/6dfd11c3/src/java/org/apache/cassandra/utils/EstimatedHistogram.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/utils/EstimatedHistogram.java b/src/java/org/apache/cassandra/utils/EstimatedHistogram.java index 0914a58..f25cc1e 100644 --- a/src/java/org/apache/cassandra/utils/EstimatedHistogram.java +++ b/src/java/org/apache/cassandra/utils/EstimatedHistogram.java @@ -112,11 +112,7 @@ public class EstimatedHistogram return bucketOffsets; } - /** - * Increments the count of the bucket closest to n, rounding UP. - * @param n - */ - public void add(long n) + private int findIndex(long n) { int index = Arrays.binarySearch(bucketOffsets, n); if (index < 0) @@ -124,8 +120,25 @@ public class EstimatedHistogram // inexact match, take the first bucket higher than n index = -index - 1; } - // else exact match; we're good - buckets.incrementAndGet(index); + return index; + } + + /** + * Increments the count of the bucket closest to n, rounding UP. + * @param n + */ + public void add(long n) + { + buckets.incrementAndGet(findIndex(n)); + } + + /** + * Increments the count of the bucket closest to n, rounding UP by delta + * @param n + */ + public void add(long n, long delta) + { + buckets.addAndGet(findIndex(n), delta); } /** --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
