[
https://issues.apache.org/jira/browse/CASSANDRA-13848?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Kevin Wern updated CASSANDRA-13848:
-----------------------------------
Reviewer: Jeff Jirsa
Status: Patch Available (was: Open)
>From 834cab8a0a67dbbefa608ddd47109bb9883025a2 Mon Sep 17 00:00:00 2001
From: Kevin Wern <[email protected]>
Date: Mon, 9 Oct 2017 04:26:25 -0400
Subject: [PATCH] sstabledump: add -l option for jsonl
---
.../apache/cassandra/tools/JsonTransformer.java | 35 +++++++++++++++++-----
.../org/apache/cassandra/tools/SSTableExport.java | 8 +++++
2 files changed, 36 insertions(+), 7 deletions(-)
diff --git a/src/java/org/apache/cassandra/tools/JsonTransformer.java
b/src/java/org/apache/cassandra/tools/JsonTransformer.java
index e6aaf07..0c7ed7e 100644
--- a/src/java/org/apache/cassandra/tools/JsonTransformer.java
+++ b/src/java/org/apache/cassandra/tools/JsonTransformer.java
@@ -56,6 +56,7 @@ import org.codehaus.jackson.JsonGenerator;
import org.codehaus.jackson.impl.Indenter;
import org.codehaus.jackson.util.DefaultPrettyPrinter.NopIndenter;
import org.codehaus.jackson.util.DefaultPrettyPrinter;
+import org.codehaus.jackson.util.MinimalPrettyPrinter;
public final class JsonTransformer
{
@@ -78,17 +79,26 @@ public final class JsonTransformer
private long currentPosition = 0;
- private JsonTransformer(JsonGenerator json, ISSTableScanner
currentScanner, boolean rawTime, TableMetadata metadata)
+ private JsonTransformer(JsonGenerator json, ISSTableScanner
currentScanner, boolean rawTime, TableMetadata metadata, boolean isJsonLines)
{
this.json = json;
this.metadata = metadata;
this.currentScanner = currentScanner;
this.rawTime = rawTime;
- DefaultPrettyPrinter prettyPrinter = new DefaultPrettyPrinter();
- prettyPrinter.indentObjectsWith(objectIndenter);
- prettyPrinter.indentArraysWith(arrayIndenter);
- json.setPrettyPrinter(prettyPrinter);
+ if (isJsonLines)
+ {
+ MinimalPrettyPrinter minimalPrettyPrinter = new
MinimalPrettyPrinter();
+ minimalPrettyPrinter.setRootValueSeparator("\n");
+ json.setPrettyPrinter(minimalPrettyPrinter);
+ }
+ else
+ {
+ DefaultPrettyPrinter prettyPrinter = new DefaultPrettyPrinter();
+ prettyPrinter.indentObjectsWith(objectIndenter);
+ prettyPrinter.indentArraysWith(arrayIndenter);
+ json.setPrettyPrinter(prettyPrinter);
+ }
}
public static void toJson(ISSTableScanner currentScanner,
Stream<UnfilteredRowIterator> partitions, boolean rawTime, TableMetadata
metadata, OutputStream out)
@@ -96,18 +106,28 @@ public final class JsonTransformer
{
try (JsonGenerator json = jsonFactory.createJsonGenerator(new
OutputStreamWriter(out, StandardCharsets.UTF_8)))
{
- JsonTransformer transformer = new JsonTransformer(json,
currentScanner, rawTime, metadata);
+ JsonTransformer transformer = new JsonTransformer(json,
currentScanner, rawTime, metadata, false);
json.writeStartArray();
partitions.forEach(transformer::serializePartition);
json.writeEndArray();
}
}
+ public static void toJsonLines(ISSTableScanner currentScanner,
Stream<UnfilteredRowIterator> partitions, boolean rawTime, TableMetadata
metadata, OutputStream out)
+ throws IOException
+ {
+ try (JsonGenerator json = jsonFactory.createJsonGenerator(new
OutputStreamWriter(out, StandardCharsets.UTF_8)))
+ {
+ JsonTransformer transformer = new JsonTransformer(json,
currentScanner, rawTime, metadata, true);
+ partitions.forEach(transformer::serializePartition);
+ }
+ }
+
public static void keysToJson(ISSTableScanner currentScanner,
Stream<DecoratedKey> keys, boolean rawTime, TableMetadata metadata,
OutputStream out) throws IOException
{
try (JsonGenerator json = jsonFactory.createJsonGenerator(new
OutputStreamWriter(out, StandardCharsets.UTF_8)))
{
- JsonTransformer transformer = new JsonTransformer(json,
currentScanner, rawTime, metadata);
+ JsonTransformer transformer = new JsonTransformer(json,
currentScanner, rawTime, metadata, false);
json.writeStartArray();
keys.forEach(transformer::serializePartitionKey);
json.writeEndArray();
@@ -221,6 +241,7 @@ public final class JsonTransformer
json.writeEndObject();
}
}
+
catch (IOException e)
{
String key =
metadata.partitionKeyType.getString(partition.partitionKey().getKey());
diff --git a/src/java/org/apache/cassandra/tools/SSTableExport.java
b/src/java/org/apache/cassandra/tools/SSTableExport.java
index 95e3ed6..4079ee7 100644
--- a/src/java/org/apache/cassandra/tools/SSTableExport.java
+++ b/src/java/org/apache/cassandra/tools/SSTableExport.java
@@ -62,6 +62,7 @@ public class SSTableExport
private static final String EXCLUDE_KEY_OPTION = "x";
private static final String ENUMERATE_KEYS_OPTION = "e";
private static final String RAW_TIMESTAMPS = "t";
+ private static final String PARTITION_JSON_LINES = "l";
private static final Options options = new Options();
private static CommandLine cmd;
@@ -88,6 +89,9 @@ public class SSTableExport
Option rawTimestamps = new Option(RAW_TIMESTAMPS, false, "Print raw
timestamps instead of iso8601 date strings");
options.addOption(rawTimestamps);
+
+ Option partitionJsonLines= new Option(PARTITION_JSON_LINES, false,
"Output json lines, by partition");
+ options.addOption(partitionJsonLines);
}
/**
@@ -194,6 +198,10 @@ public class SSTableExport
});
});
}
+ else if (cmd.hasOption(PARTITION_JSON_LINES))
+ {
+ JsonTransformer.toJsonLines(currentScanner, partitions,
cmd.hasOption(RAW_TIMESTAMPS), metadata, System.out);
+ }
else
{
JsonTransformer.toJson(currentScanner, partitions,
cmd.hasOption(RAW_TIMESTAMPS), metadata, System.out);
--
2.10.1 (Apple Git-78)
> Allow sstabledump to do a json object per partition to better handle large
> sstables
> -----------------------------------------------------------------------------------
>
> Key: CASSANDRA-13848
> URL: https://issues.apache.org/jira/browse/CASSANDRA-13848
> Project: Cassandra
> Issue Type: New Feature
> Components: Tools
> Reporter: Jeff Jirsa
> Assignee: Kevin Wern
> Priority: Trivial
> Labels: lhf
>
> sstable2json / sstabledump make a huge json document of the whole file. For
> very large sstables this makes it impossible to load in memory to do anything
> with it. Allowing users to Break it into small json objects per partition
> would be useful.
--
This message was sent by Atlassian JIRA
(v6.4.14#64029)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]