This is an automated email from the ASF dual-hosted git repository.

bankim pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
     new 55cab44  [tools] Kudu table schema in Avro format
55cab44 is described below

commit 55cab441c2d66c92f0c769e98d6a2f079e5563b2
Author: Abhishek Chennaka <[email protected]>
AuthorDate: Tue Jan 4 23:36:07 2022 -0500

    [tools] Kudu table schema in Avro format
    
    This patch introduces a new flag -avro_format_schema to the existing
    "kudu table describe" CLI tool to output the table schema in Avro
    format. The other information displayed by the tool like owner,
    comments, partitions won't be displayed if this flag is set to true.
    This flag cannot be used in conjunction with other flags.
    
    Change-Id: I98194739e8132f93e916d1a78d338d28dd9e8075
    Reviewed-on: http://gerrit.cloudera.org:8080/18121
    Tested-by: Kudu Jenkins
    Reviewed-by: Bankim Bhavsar <[email protected]>
---
 src/kudu/tools/kudu-admin-test.cc   | 101 ++++++++++++++++++++++++++++++++++++
 src/kudu/tools/tool_action_table.cc |  91 +++++++++++++++++++++++++++++++-
 2 files changed, 190 insertions(+), 2 deletions(-)

diff --git a/src/kudu/tools/kudu-admin-test.cc 
b/src/kudu/tools/kudu-admin-test.cc
index 5f3a5bb..cdd7a6b 100644
--- a/src/kudu/tools/kudu-admin-test.cc
+++ b/src/kudu/tools/kudu-admin-test.cc
@@ -1906,6 +1906,107 @@ TEST_F(AdminCliTest, TestDescribeTable) {
       ")\n"
       "OWNER alice\n"
       "REPLICAS 1");
+
+  s = RunKuduTool({
+                      "table",
+                      "describe",
+                      cluster_->master()->bound_rpc_addr().ToString(),
+                      kAnotherTableId,
+                      "-show_avro_format_schema"
+                  }, &stdout, &stderr);
+  ASSERT_TRUE(s.ok()) << ToolRunInfo(s, stdout, stderr);
+
+  ASSERT_STR_CONTAINS(
+      stdout,
+      Substitute(
+          "{\n"
+          "    \"name\": \"table\",\n"
+          "    \"type\": \"TestAnotherTable\",\n"
+          "    \"namespace\": \"kudu.cluster.$0\",\n"
+          "    \"fields\": [\n"
+          "        {\n"
+          "            \"name\": \"key_hash0\",\n"
+          "            \"type\": \"int\"\n"
+          "        },\n"
+          "        {\n"
+          "            \"name\": \"key_hash1\",\n"
+          "            \"type\": \"int\"\n"
+          "        },\n"
+          "        {\n"
+          "            \"name\": \"key_hash2\",\n"
+          "            \"type\": \"int\"\n"
+          "        },\n"
+          "        {\n"
+          "            \"name\": \"key_range\",\n"
+          "            \"type\": \"int\"\n"
+          "        },\n"
+          "        {\n"
+          "            \"name\": \"int8_val\",\n"
+          "            \"type\": \"int\"\n"
+          "        },\n"
+          "        {\n"
+          "            \"name\": \"int16_val\",\n"
+          "            \"type\": \"int\"\n"
+          "        },\n"
+          "        {\n"
+          "            \"name\": \"int32_val\",\n"
+          "            \"type\": \"int\"\n"
+          "        },\n"
+          "        {\n"
+          "            \"name\": \"int64_val\",\n"
+          "            \"type\": \"long\"\n"
+          "        },\n"
+          "        {\n"
+          "            \"name\": \"timestamp_val\",\n"
+          "            \"type\": [\n"
+          "                {\n"
+          "                    \"type\": \"long\",\n"
+          "                    \"logicalType\": \"time-micros\"\n"
+          "                }\n"
+          "            ]\n"
+          "        },\n"
+          "        {\n"
+          "            \"name\": \"date_val\",\n"
+          "            \"type\": [\n"
+          "                {\n"
+          "                    \"type\": \"int\",\n"
+          "                    \"logicalType\": \"date\"\n"
+          "                }\n"
+          "            ]\n"
+          "        },\n"
+          "        {\n"
+          "            \"name\": \"string_val\",\n"
+          "            \"type\": \"string\"\n"
+          "        },\n"
+          "        {\n"
+          "            \"name\": \"bool_val\",\n"
+          "            \"type\": \"bool\"\n"
+          "        },\n"
+          "        {\n"
+          "            \"name\": \"float_val\",\n"
+          "            \"type\": \"float\"\n"
+          "        },\n"
+          "        {\n"
+          "            \"name\": \"double_val\",\n"
+          "            \"type\": \"double\"\n"
+          "        },\n"
+          "        {\n"
+          "            \"name\": \"binary_val\",\n"
+          "            \"type\": \"bytes\"\n"
+          "        },\n"
+          "        {\n"
+          "            \"name\": \"decimal_val\",\n"
+          "            \"type\": [\n"
+          "                {\n"
+          "                    \"type\": \"bytes\",\n"
+          "                    \"logicalType\": \"decimal\"\n"
+          "                }\n"
+          "            ]\n"
+          "        }\n"
+          "    ]\n"
+          "}\n",
+      client_->cluster_id())
+  );
 }
 
 TEST_F(AdminCliTest, TestDescribeTableNoOwner) {
diff --git a/src/kudu/tools/tool_action_table.cc 
b/src/kudu/tools/tool_action_table.cc
index 91614f9..d8d8dab 100644
--- a/src/kudu/tools/tool_action_table.cc
+++ b/src/kudu/tools/tool_action_table.cc
@@ -39,11 +39,11 @@
 #include "kudu/client/client-internal.h"
 #include "kudu/client/client.h"
 #include "kudu/client/replica_controller-internal.h"
-#include "kudu/client/table_alterer-internal.h"
 #include "kudu/client/scan_batch.h"
 #include "kudu/client/scan_predicate.h"
 #include "kudu/client/schema.h"
 #include "kudu/client/shared_ptr.h" // IWYU pragma: keep
+#include "kudu/client/table_alterer-internal.h"
 #include "kudu/client/value.h"
 #include "kudu/common/partial_row.h"
 #include "kudu/common/partition.h"
@@ -59,6 +59,7 @@
 #include "kudu/tools/tool_action.h"
 #include "kudu/tools/tool_action_common.h"
 #include "kudu/util/jsonreader.h"
+#include "kudu/util/jsonwriter.h"
 #include "kudu/util/status.h"
 #include "kudu/util/string_case.h"
 
@@ -124,6 +125,10 @@ DEFINE_int32(scan_batch_size, -1,
              "means the server-side default is used, where the server-side "
              "default is controlled by the tablet server's "
              "--scanner_default_batch_size_bytes flag.");
+DEFINE_bool(show_avro_format_schema, false,
+            "Display the table schema in avro format. When enabled it only 
outputs the "
+            "table schema in Avro format without any other information like "
+            "partition/owner/comments. It cannot be used in conjunction with 
other flags");
 
 DECLARE_bool(row_count_only);
 DECLARE_bool(show_scanner_stats);
@@ -234,6 +239,84 @@ enum PartitionAction {
   DROP,
 };
 
+Status AddLogicalType(JsonWriter *writer, const char *type, const char 
*logical_type) {
+  writer->StartArray();
+  writer->StartObject();
+  writer->String("type");
+  writer->String(type);
+  writer->String("logicalType");
+  writer->String(logical_type);
+  writer->EndObject();
+  writer->EndArray();
+  return Status::OK();
+}
+
+Status PopulateAvroSchema(const string &table_name,
+                          const string &cluster_id,
+                          const KuduSchema &schema) {
+  std::ostringstream out;
+  JsonWriter writer(&out, JsonWriter::Mode::PRETTY);
+  // Start writing in Json format
+  writer.StartObject();
+  vector<string> json_attributes = {"name", "table", "type", table_name,
+                                    "namespace", "kudu.cluster." + cluster_id, 
"fields"};
+  for (const string &json: json_attributes) {
+    writer.String(json);
+  }
+  writer.StartArray();
+
+  // Each column type is a nested field
+  for (int i = 0; i < schema.num_columns(); i++) {
+    writer.StartObject();
+    writer.String("name");
+    writer.String(schema.Column(i).name());
+    writer.String("type");
+    switch (schema.Column(i).type()) {
+      case kudu::client::KuduColumnSchema::INT8:
+      case kudu::client::KuduColumnSchema::INT16:
+      case kudu::client::KuduColumnSchema::INT32:
+        writer.String("int");
+        break;
+      case kudu::client::KuduColumnSchema::INT64:
+        writer.String("long");
+        break;
+      case kudu::client::KuduColumnSchema::STRING:
+        writer.String("string");
+        break;
+      case kudu::client::KuduColumnSchema::BOOL:
+        writer.String("bool");
+        break;
+      case kudu::client::KuduColumnSchema::FLOAT:
+        writer.String("float");
+        break;
+      case kudu::client::KuduColumnSchema::DOUBLE:
+        writer.String("double");
+        break;
+      case kudu::client::KuduColumnSchema::BINARY:
+        writer.String("bytes");
+        break;
+      case kudu::client::KuduColumnSchema::VARCHAR:
+        writer.String("string");
+        break;
+      // Each logical type in avro schema has sub-nested fields
+      case kudu::client::KuduColumnSchema::UNIXTIME_MICROS:
+        RETURN_NOT_OK(AddLogicalType(&writer, "long", "time-micros"));
+        break;
+      case kudu::client::KuduColumnSchema::DATE:
+        RETURN_NOT_OK(AddLogicalType(&writer, "int", "date"));
+        break;
+      case kudu::client::KuduColumnSchema::DECIMAL:
+        RETURN_NOT_OK(AddLogicalType(&writer, "bytes", "decimal"));
+        break;
+    }
+    writer.EndObject();
+  }
+  writer.EndArray();
+  writer.EndObject();
+  cout << out.str() << endl;
+  return Status::OK();
+}
+
 Status DeleteTable(const RunnerContext& context) {
   const string& table_name = FindOrDie(context.required_args, kTableNameArg);
   client::sp::shared_ptr<KuduClient> client;
@@ -251,6 +334,10 @@ Status DescribeTable(const RunnerContext& context) {
 
   // The schema.
   const KuduSchema& schema = table->schema();
+  if (FLAGS_show_avro_format_schema) {
+    return PopulateAvroSchema(FindOrDie(context.required_args, kTableNameArg),
+                                         client->cluster_id(), schema);
+  }
   cout << "TABLE " << table_name << " " << schema.ToString() << endl;
 
   // The partition schema with current range partitions.
@@ -285,7 +372,6 @@ Status DescribeTable(const RunnerContext& context) {
 
   // The comment.
   cout << "COMMENT " << table->comment() << endl;
-
   return Status::OK();
 }
 
@@ -1374,6 +1460,7 @@ unique_ptr<Mode> BuildTableMode() {
       .Description("Describe a table")
       .AddRequiredParameter({ kTableNameArg, "Name of the table to describe" })
       .AddOptionalParameter("show_attributes")
+      .AddOptionalParameter("show_avro_format_schema")
       .Build();
 
   unique_ptr<Action> list_tables =

Reply via email to