http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/flag_tags-test.cc
----------------------------------------------------------------------
diff --git a/be/src/kudu/util/flag_tags-test.cc 
b/be/src/kudu/util/flag_tags-test.cc
new file mode 100644
index 0000000..4626d0d
--- /dev/null
+++ b/be/src/kudu/util/flag_tags-test.cc
@@ -0,0 +1,135 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <string>
+#include <unordered_set>
+#include <vector>
+
+#include <gtest/gtest.h>
+#include <gflags/gflags.h>
+#include <gflags/gflags_declare.h>
+
+#include "kudu/gutil/map-util.h"
+#include "kudu/gutil/macros.h"
+#include "kudu/gutil/strings/substitute.h"
+#include "kudu/util/flag_tags.h"
+#include "kudu/util/flags.h"
+#include "kudu/util/logging.h"
+#include "kudu/util/logging_test_util.h"
+#include "kudu/util/test_macros.h"
+#include "kudu/util/test_util.h"
+
+DECLARE_bool(never_fsync);
+
+DEFINE_int32(flag_with_no_tags, 0, "test flag that has no tags");
+
+DEFINE_int32(flag_with_one_tag, 0, "test flag that has 1 tag");
+TAG_FLAG(flag_with_one_tag, stable);
+
+DEFINE_int32(flag_with_two_tags, 0, "test flag that has 2 tags");
+TAG_FLAG(flag_with_two_tags, evolving);
+TAG_FLAG(flag_with_two_tags, unsafe);
+
+DEFINE_bool(test_unsafe_flag, false, "an unsafe flag");
+TAG_FLAG(test_unsafe_flag, unsafe);
+
+DEFINE_bool(test_experimental_flag, false, "an experimental flag");
+TAG_FLAG(test_experimental_flag, experimental);
+
+DEFINE_bool(test_sensitive_flag, false, "a sensitive flag");
+TAG_FLAG(test_sensitive_flag, sensitive);
+
+using std::string;
+using std::unordered_set;
+
+namespace kudu {
+
+class FlagTagsTest : public KuduTest {};
+
+TEST_F(FlagTagsTest, TestTags) {
+  unordered_set<string> tags;
+  GetFlagTags("flag_with_no_tags", &tags);
+  EXPECT_EQ(0, tags.size());
+
+  GetFlagTags("flag_with_one_tag", &tags);
+  EXPECT_EQ(1, tags.size());
+  EXPECT_TRUE(ContainsKey(tags, "stable"));
+
+  GetFlagTags("flag_with_two_tags", &tags);
+  EXPECT_EQ(2, tags.size());
+  EXPECT_TRUE(ContainsKey(tags, "evolving"));
+  EXPECT_TRUE(ContainsKey(tags, "unsafe"));
+
+  GetFlagTags("missing_flag", &tags);
+  EXPECT_EQ(0, tags.size());
+}
+
+TEST_F(FlagTagsTest, TestUnlockFlags) {
+  // Setting an unsafe flag without unlocking should crash.
+  {
+    gflags::FlagSaver s;
+    gflags::SetCommandLineOption("test_unsafe_flag", "true");
+    ASSERT_DEATH({ HandleCommonFlags(); },
+                 "Flag --test_unsafe_flag is unsafe and unsupported.*"
+                 "Use --unlock_unsafe_flags to proceed");
+  }
+
+  // Setting an unsafe flag with unlocking should proceed with a warning.
+  {
+    StringVectorSink sink;
+    ScopedRegisterSink reg(&sink);
+    gflags::FlagSaver s;
+    gflags::SetCommandLineOption("test_unsafe_flag", "true");
+    gflags::SetCommandLineOption("unlock_unsafe_flags", "true");
+    HandleCommonFlags();
+    ASSERT_EQ(1, sink.logged_msgs().size());
+    ASSERT_STR_CONTAINS(sink.logged_msgs()[0], "Enabled unsafe flag: 
--test_unsafe_flag");
+  }
+
+  // Setting an experimental flag without unlocking should crash.
+  {
+    gflags::FlagSaver s;
+    gflags::SetCommandLineOption("test_experimental_flag", "true");
+    ASSERT_DEATH({ HandleCommonFlags(); },
+                 "Flag --test_experimental_flag is experimental and 
unsupported.*"
+                 "Use --unlock_experimental_flags to proceed");
+  }
+
+  // Setting an experimental flag with unlocking should proceed with a warning.
+  {
+    StringVectorSink sink;
+    ScopedRegisterSink reg(&sink);
+    gflags::FlagSaver s;
+    gflags::SetCommandLineOption("test_experimental_flag", "true");
+    gflags::SetCommandLineOption("unlock_experimental_flags", "true");
+    HandleCommonFlags();
+    ASSERT_EQ(1, sink.logged_msgs().size());
+    ASSERT_STR_CONTAINS(sink.logged_msgs()[0],
+                        "Enabled experimental flag: --test_experimental_flag");
+  }
+}
+
+TEST_F(FlagTagsTest, TestSensitiveFlags) {
+  // Setting a sensitive flag should return a redacted value.
+  {
+    kudu::g_should_redact = kudu::RedactContext::LOG;
+    ASSERT_STR_CONTAINS(CommandlineFlagsIntoString(EscapeMode::NONE), 
strings::Substitute(
+                        "--test_sensitive_flag=$0", kRedactionMessage));
+  }
+}
+
+} // namespace kudu

http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/flag_tags.cc
----------------------------------------------------------------------
diff --git a/be/src/kudu/util/flag_tags.cc b/be/src/kudu/util/flag_tags.cc
new file mode 100644
index 0000000..7e11ea2
--- /dev/null
+++ b/be/src/kudu/util/flag_tags.cc
@@ -0,0 +1,91 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "kudu/util/flag_tags.h"
+
+#include <map>
+#include <ostream>
+#include <string>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include <glog/logging.h>
+
+#include "kudu/gutil/map-util.h"
+#include "kudu/gutil/singleton.h"
+
+using std::multimap;
+using std::pair;
+using std::string;
+using std::unordered_set;
+using std::vector;
+
+namespace kudu {
+namespace flag_tags_internal {
+
+// Singleton registry storing the set of tags for each flag.
+class FlagTagRegistry {
+ public:
+  static FlagTagRegistry* GetInstance() {
+    return Singleton<FlagTagRegistry>::get();
+  }
+
+  void Tag(const string& name, const string& tag) {
+    tag_map_.insert(TagMap::value_type(name, tag));
+  }
+
+  void GetTags(const string& name, unordered_set<string>* tags) {
+    tags->clear();
+    pair<TagMap::const_iterator, TagMap::const_iterator> range =
+      tag_map_.equal_range(name);
+    for (auto it = range.first; it != range.second; ++it) {
+      if (!InsertIfNotPresent(tags, it->second)) {
+        LOG(DFATAL) << "Flag " << name << " was tagged more than once with the 
tag '"
+                    << it->second << "'";
+      }
+    }
+  }
+
+ private:
+  friend class Singleton<FlagTagRegistry>;
+  FlagTagRegistry() {}
+
+  typedef multimap<string, string> TagMap;
+  TagMap tag_map_;
+
+  DISALLOW_COPY_AND_ASSIGN(FlagTagRegistry);
+};
+
+
+FlagTagger::FlagTagger(const char* name, const char* tag) {
+  FlagTagRegistry::GetInstance()->Tag(name, tag);
+}
+
+FlagTagger::~FlagTagger() {
+}
+
+} // namespace flag_tags_internal
+
+using flag_tags_internal::FlagTagRegistry;
+
+void GetFlagTags(const string& flag_name,
+                 unordered_set<string>* tags) {
+  FlagTagRegistry::GetInstance()->GetTags(flag_name, tags);
+}
+
+} // namespace kudu

http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/flag_tags.h
----------------------------------------------------------------------
diff --git a/be/src/kudu/util/flag_tags.h b/be/src/kudu/util/flag_tags.h
new file mode 100644
index 0000000..bf0c400
--- /dev/null
+++ b/be/src/kudu/util/flag_tags.h
@@ -0,0 +1,169 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+//
+// Flag Tags provide a way to attach arbitrary textual tags to gflags in
+// a global registry. Kudu uses the following flag tags:
+//
+// - "stable":
+//         These flags are considered user-facing APIs. Therefore, the
+//         semantics of the flag should not be changed except between major
+//         versions. Similarly, they must not be removed except between major
+//         versions.
+//
+// - "evolving":
+//         These flags are considered user-facing APIs, but are not yet
+//         locked down. For example, they may pertain to a newly introduced
+//         feature that is still being actively developed. These may be changed
+//         between minor versions, but should be suitably release-noted.
+//
+//         This is the default assumed stability level, but can be tagged
+//         if you'd like to make it explicit.
+//
+// - "experimental":
+//         These flags are considered user-facing APIs, but are related to
+//         an experimental feature, or otherwise likely to change or be
+//         removed at any point. Users should not expect any compatibility
+//         of these flags.
+//
+//         Users must pass --unlock_experimental_flags to use any of these
+//         flags.
+//
+// - "hidden":
+//         These flags are for internal use only (e.g. testing) and should
+//         not be included in user-facing documentation.
+//
+// - "advanced":
+//         These flags are for advanced users or debugging purposes. While
+//         they aren't likely to be actively harmful (see "unsafe" below),
+//         they're also likely to be used only rarely and should be relegated
+//         to more detailed sections of documentation.
+//
+// - "unsafe":
+//         These flags are for internal use only (e.g. testing), and changing
+//         them away from the defaults may result in arbitrarily bad things
+//         happening. These flags are automatically excluded from user-facing
+//         documentation even if they are not also marked 'hidden'.
+//
+//         Users must pass --unlock_unsafe_flags to use any of these
+//         flags.
+//
+// - "runtime":
+//         These flags can be safely changed at runtime via an RPC to the
+//         server. Changing a flag at runtime that does not have this tag is 
allowed
+//         only if the user specifies a "force_unsafe_change" flag in the RPC.
+//
+//         NOTE: because gflags are simple global variables, it's important to
+//         think very carefully before tagging a flag with 'runtime'. In 
particular,
+//         if a string-type flag is marked 'runtime', you should never access 
it
+//         using the raw 'FLAGS_foo_bar' name. Instead, you must use the
+//         google::GetCommandLineFlagInfo(...) API to make a copy of the flag 
value
+//         under a lock. Otherwise, the 'std::string' instance could be mutated
+//         underneath the reader causing a crash.
+//
+//         For primitive-type flags, we assume that reading a variable is 
atomic.
+//         That is to say that a reader will either see the old value or the 
new
+//         one, but not some invalid value. However, for the runtime change to
+//         have any effect, you must be sure to use the FLAGS_foo_bar variable 
directly
+//         rather than initializing some instance variable during program 
startup.
+//
+// - "sensitive":
+//         The values of these flags are considered sensitive and will be 
redacted
+//         if redaction is enabled.
+//
+// A given flag may have zero or more tags associated with it. The system does
+// not make any attempt to check integrity of the tags - for example, it allows
+// you to mark a flag as both stable and unstable, even though this makes no
+// real sense. Nevertheless, you should strive to meet the following 
requirements:
+//
+// - A flag should have exactly no more than one of 
stable/evolving/experimental
+//   indicating its stability. 'evolving' is considered the default.
+// - A flag should have no more than one of advanced/hidden indicating 
visibility
+//   in documentation. If neither is specified, the flag will be in the main
+//   section of the documentation.
+// - It is likely that most 'experimental' flags will also be 'advanced' or 
'hidden',
+//   and that 'stable' flags are not likely to be 'hidden' or 'unsafe'.
+//
+// To add a tag to a flag, use the TAG_FLAG macro. For example:
+//
+//  DEFINE_bool(sometimes_crash, false, "This flag makes Kudu crash a lot");
+//  TAG_FLAG(sometimes_crash, unsafe);
+//  TAG_FLAG(sometimes_crash, runtime);
+//
+// To fetch the list of tags associated with a flag, use 'GetFlagTags'.
+
+#ifndef KUDU_UTIL_FLAG_TAGS_H
+#define KUDU_UTIL_FLAG_TAGS_H
+
+#include "kudu/gutil/macros.h"
+
+#include <string>
+#include <unordered_set>
+
+namespace kudu {
+
+struct FlagTags {
+  enum {
+    stable,
+    evolving,
+    experimental,
+    hidden,
+    advanced,
+    unsafe,
+    runtime,
+    sensitive
+  };
+};
+
+// Tag the flag 'flag_name' with the given tag 'tag'.
+//
+// This verifies that 'flag_name' is a valid gflag, which must be defined
+// or declared above the use of the TAG_FLAG macro.
+//
+// This also validates that 'tag' is a valid flag as defined in the FlagTags
+// enum above.
+#define TAG_FLAG(flag_name, tag) \
+  COMPILE_ASSERT(sizeof(decltype(FLAGS_##flag_name)), flag_does_not_exist); \
+  COMPILE_ASSERT(sizeof(::kudu::FlagTags::tag), invalid_tag);   \
+  namespace {                                                     \
+    ::kudu::flag_tags_internal::FlagTagger t_##flag_name##_##tag( \
+        AS_STRING(flag_name), AS_STRING(tag));                    \
+  }
+
+// Fetch the list of flags associated with the given flag.
+//
+// If the flag is invalid or has no tags, sets 'tags' to be empty.
+void GetFlagTags(const std::string& flag_name,
+                 std::unordered_set<std::string>* tags);
+
+// ------------------------------------------------------------
+// Internal implementation details
+// ------------------------------------------------------------
+namespace flag_tags_internal {
+
+class FlagTagger {
+ public:
+  FlagTagger(const char* name, const char* tag);
+  ~FlagTagger();
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(FlagTagger);
+};
+
+} // namespace flag_tags_internal
+
+} // namespace kudu
+#endif /* KUDU_UTIL_FLAG_TAGS_H */

http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/flag_validators-test.cc
----------------------------------------------------------------------
diff --git a/be/src/kudu/util/flag_validators-test.cc 
b/be/src/kudu/util/flag_validators-test.cc
new file mode 100644
index 0000000..77efab3
--- /dev/null
+++ b/be/src/kudu/util/flag_validators-test.cc
@@ -0,0 +1,252 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstdlib>
+#include <functional>
+#include <map>
+#include <ostream>
+#include <string>
+#include <utility>
+
+#include <gflags/gflags.h>
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+
+#include "kudu/gutil/macros.h"
+#include "kudu/util/debug/leakcheck_disabler.h"
+#include "kudu/util/flags.h"
+#include "kudu/util/flag_validators.h"
+#include "kudu/util/test_macros.h"
+#include "kudu/util/test_util.h"
+
+DEFINE_string(grouped_0, "", "First flag to set.");
+DEFINE_string(grouped_1, "", "Second flag to set.");
+DEFINE_string(grouped_2, "", "Third flag to set.");
+DEFINE_string(grouped_3, "", "Fourth flag to set.");
+
+namespace kudu {
+
+static bool CheckGroupedFlags01() {
+  const bool is_set_0 = !FLAGS_grouped_0.empty();
+  const bool is_set_1 = !FLAGS_grouped_1.empty();
+
+  if (is_set_0 != is_set_1) {
+    LOG(ERROR) << "--grouped_0 and --grouped_1 must be set as a group";
+    return false;
+  }
+
+  return true;
+}
+GROUP_FLAG_VALIDATOR(test_group_validator01, CheckGroupedFlags01)
+
+static bool CheckGroupedFlags23() {
+  const bool is_set_2 = !FLAGS_grouped_2.empty();
+  const bool is_set_3 = !FLAGS_grouped_3.empty();
+
+  if (is_set_2 != is_set_3) {
+    LOG(ERROR) << "--grouped_2 and --grouped_3 must be set as a group";
+    return false;
+  }
+
+  return true;
+}
+GROUP_FLAG_VALIDATOR(test_group_validator23, CheckGroupedFlags23)
+
+class FlagsValidatorsBasicTest : public KuduTest {
+ public:
+  void RunTest(const char** argv, int argc) {
+    char** casted_argv = const_cast<char**>(argv);
+    // ParseCommandLineFlags() calls exit(1) if it finds inconsistency in 
flags.
+    ASSERT_EQ(1, ParseCommandLineFlags(&argc, &casted_argv, true));
+  }
+};
+
+TEST_F(FlagsValidatorsBasicTest, Grouped) {
+  const auto& validators = GetFlagValidators();
+  ASSERT_EQ(2, validators.size());
+  const auto& it = validators.find("test_group_validator01");
+  ASSERT_NE(validators.end(), it);
+  const auto& validator = it->second;
+  EXPECT_TRUE(validator());
+  FLAGS_grouped_0 = "0";
+  EXPECT_FALSE(validator());
+  FLAGS_grouped_1 = "1";
+  EXPECT_TRUE(validator());
+  FLAGS_grouped_0 = "";
+  EXPECT_FALSE(validator());
+  FLAGS_grouped_1 = "";
+  EXPECT_TRUE(validator());
+}
+
+class FlagsValidatorsDeathTest : public KuduTest {
+ public:
+  void Run(const char** argv, int argc) {
+    debug::ScopedLeakCheckDisabler disabler;
+    char** casted_argv = const_cast<char**>(argv);
+    // ParseCommandLineFlags() calls exit(1) if one of the custom validators
+    // finds inconsistency in flags.
+    ParseCommandLineFlags(&argc, &casted_argv, true);
+    exit(0);
+  }
+
+  void RunSuccess(const char** argv, int argc) {
+    EXPECT_EXIT(Run(argv, argc), ::testing::ExitedWithCode(0), ".*");
+  }
+
+  void RunFailure(const char** argv, int argc) {
+    EXPECT_EXIT(Run(argv, argc), ::testing::ExitedWithCode(1),
+        ".* Detected inconsistency in command-line flags; exiting");
+  }
+};
+
+TEST_F(FlagsValidatorsDeathTest, GroupedSuccessNoFlags) {
+  const char* argv[] = { "argv_set_0" };
+  NO_FATALS(RunSuccess(argv, ARRAYSIZE(argv)));
+}
+
+TEST_F(FlagsValidatorsDeathTest, GroupedSuccessSimple) {
+  static const size_t kArgvSize = 1 + 2;
+  const char* argv_sets[][kArgvSize] = {
+    {
+      "argv_set_0",
+      "--grouped_0=first",
+      "--grouped_1=second",
+    },
+    {
+      "argv_set_1",
+      "--grouped_0=second",
+      "--grouped_1=first",
+    },
+    {
+      "argv_set_2",
+      "--grouped_0=",
+      "--grouped_1=",
+    },
+    {
+      "argv_set_3",
+      "--grouped_1=",
+      "--grouped_0=",
+    },
+    {
+      "argv_set_4",
+      "--grouped_2=2",
+      "--grouped_3=3",
+    },
+    {
+      "argv_set_5",
+      "--grouped_3=",
+      "--grouped_2=",
+    },
+  };
+  for (auto argv : argv_sets) {
+    RunSuccess(argv, kArgvSize);
+  }
+}
+
+TEST_F(FlagsValidatorsDeathTest, GroupedFailureSimple) {
+  static const size_t kArgvSize = 1 + 1;
+  const char* argv_sets[][kArgvSize] = {
+    {
+      "argv_set_0",
+      "--grouped_0=a",
+    },
+    {
+      "argv_set_1",
+      "--grouped_1=b",
+    },
+    {
+      "argv_set_2",
+      "--grouped_2=2",
+    },
+    {
+      "argv_set_3",
+      "--grouped_3=3",
+    },
+  };
+  for (auto argv : argv_sets) {
+    RunFailure(argv, kArgvSize);
+  }
+}
+
+// Test for correct behavior when only one of two group validators is failing.
+TEST_F(FlagsValidatorsDeathTest, GroupedFailureOneOfTwoValidators) {
+  static const size_t kArgvSize = 4 + 1;
+  const char* argv_sets[][kArgvSize] = {
+    {
+      "argv_set_0",
+      "--grouped_0=0",
+      "--grouped_1=1",
+      "--grouped_2=",
+      "--grouped_3=3",
+    },
+    {
+      "argv_set_1",
+      "--grouped_2=",
+      "--grouped_3=3",
+      "--grouped_0=0",
+      "--grouped_1=1",
+    },
+    {
+      "argv_set_2",
+      "--grouped_0=0",
+      "--grouped_1=",
+      "--grouped_2=2",
+      "--grouped_3=3",
+    },
+    {
+      "argv_set_3",
+      "--grouped_3=3",
+      "--grouped_2=2",
+      "--grouped_1=1",
+      "--grouped_0=",
+    },
+  };
+  for (auto argv : argv_sets) {
+    RunFailure(argv, kArgvSize);
+  }
+}
+
+TEST_F(FlagsValidatorsDeathTest, GroupedFailureWithEmptyValues) {
+  static const size_t kArgvSize = 1 + 2;
+  const char* argv_sets[][kArgvSize] = {
+    {
+      "argv_set_0",
+      "--grouped_0=a",
+      "--grouped_1=",
+    },
+    {
+      "argv_set_1",
+      "--grouped_1=",
+      "--grouped_0=a",
+    },
+    {
+      "argv_set_2",
+      "--grouped_0=",
+      "--grouped_1=b",
+    },
+    {
+      "argv_set_3",
+      "--grouped_1=b",
+      "--grouped_0=",
+    },
+  };
+  for (auto argv : argv_sets) {
+    RunFailure(argv, kArgvSize);
+  }
+}
+
+} // namespace kudu

http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/flag_validators.cc
----------------------------------------------------------------------
diff --git a/be/src/kudu/util/flag_validators.cc 
b/be/src/kudu/util/flag_validators.cc
new file mode 100644
index 0000000..f90fe2e
--- /dev/null
+++ b/be/src/kudu/util/flag_validators.cc
@@ -0,0 +1,67 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "kudu/util/flag_validators.h"
+
+#include <string>
+
+#include "kudu/gutil/map-util.h"
+#include "kudu/gutil/singleton.h"
+
+using std::string;
+
+namespace kudu {
+namespace flag_validation_internal {
+
+// A singleton registry for storing group flag validators.
+class FlagValidatorRegistry {
+ public:
+  static FlagValidatorRegistry* GetInstance() {
+    return Singleton<FlagValidatorRegistry>::get();
+  }
+
+  void Register(const string& name, const FlagValidator& func) {
+    InsertOrDie(&validators_, name, func);
+  }
+
+  const FlagValidatorsMap& validators() {
+    return validators_;
+  }
+
+ private:
+  friend class Singleton<FlagValidatorRegistry>;
+  FlagValidatorRegistry() {}
+
+  FlagValidatorsMap validators_;
+
+  DISALLOW_COPY_AND_ASSIGN(FlagValidatorRegistry);
+};
+
+
+Registrator::Registrator(const char* name, const FlagValidator& validator) {
+  FlagValidatorRegistry::GetInstance()->Register(name, validator);
+}
+
+} // namespace flag_validation_internal
+
+
+const FlagValidatorsMap& GetFlagValidators() {
+  using flag_validation_internal::FlagValidatorRegistry;
+  return FlagValidatorRegistry::GetInstance()->validators();
+}
+
+} // namespace kudu

http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/flag_validators.h
----------------------------------------------------------------------
diff --git a/be/src/kudu/util/flag_validators.h 
b/be/src/kudu/util/flag_validators.h
new file mode 100644
index 0000000..02cc2dd
--- /dev/null
+++ b/be/src/kudu/util/flag_validators.h
@@ -0,0 +1,102 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "kudu/gutil/macros.h"
+
+#include <functional>
+#include <map>
+#include <string>
+
+namespace kudu {
+
+// The validation function: takes no parameters and returns a boolean. A group
+// validator should return 'true' if validation was successful, or 'false'
+// otherwise.
+typedef std::function<bool(void)> FlagValidator;
+
+// The group validator registry's representation for as seen from the outside:
+// the key is the name of the group validator, the value is the validation
+// function.
+typedef std::map<std::string, FlagValidator> FlagValidatorsMap;
+
+// Register a 'group' validator for command-line flags. In contrast with the
+// standard (built-in) gflag validators registered by the DEFINE_validator()
+// macro, group validators are run at a later phase in the context of the 
main()
+// function. A group validator has a guarantee that all command-line flags have
+// been parsed, individually validated (via standard validators), and their
+// values are already set at the time when the validator runs.
+//
+// The first macro parameter is the name of the validator, the second parameter
+// is the validation function as is. The name must be unique across all
+// registered group validators.
+//
+// The validation function takes no parameters and returns 'true' in case of
+// successful validation, otherwise it returns 'false'. If at least one of the
+// registered group validators returns 'false', exit(1) is called.
+//
+// Usage guideline:
+//
+//   * Use the DEFINE_validator() macro if you need to validate an individual
+//     gflag's value
+//
+//   * Use the GROUP_FLAG_VALIDATOR() macro only if you need to validate a set
+//     of gflag values against one another, having the guarantee that their
+//     values are already set when the validation function runs.
+//
+// Sample usage:
+//
+//  static bool ValidateGroupedFlags() {
+//    bool has_a = !FLAGS_a.empty();
+//    bool has_b = !FLAGS_b.empty();
+//
+//    if (has_a != has_b) {
+//      LOG(ERROR) << "--a and --b must be set as a group";
+//      return false;
+//    }
+//
+//    return true;
+//  }
+//  GROUP_FLAG_VALIDATOR(grouped_flags_validator, ValidateGroupedFlags);
+//
+#define GROUP_FLAG_VALIDATOR(name, func) \
+  namespace {                                               \
+    ::kudu::flag_validation_internal::Registrator v_##name( \
+        AS_STRING(name), (func));                           \
+  }
+
+// Get all registered group flag validators.
+const FlagValidatorsMap& GetFlagValidators();
+
+namespace flag_validation_internal {
+
+// This is a utility class which registers a group validator upon 
instantiation.
+class Registrator {
+ public:
+  // The constructor registers a group validator with the specified name and
+  // the given validation function. The name must be unique among all group
+  // validators.
+  Registrator(const char* name, const FlagValidator& validator);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(Registrator);
+};
+
+} // namespace flag_validation_internal
+
+} // namespace kudu

http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/flags-test.cc
----------------------------------------------------------------------
diff --git a/be/src/kudu/util/flags-test.cc b/be/src/kudu/util/flags-test.cc
new file mode 100644
index 0000000..9ebc178
--- /dev/null
+++ b/be/src/kudu/util/flags-test.cc
@@ -0,0 +1,109 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <string>
+#include <vector>
+
+#include <gflags/gflags.h>
+#include <gflags/gflags_declare.h>
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+
+#include "kudu/gutil/macros.h"
+#include "kudu/gutil/strings/substitute.h"
+#include "kudu/util/env.h"
+#include "kudu/util/flags.h"
+#include "kudu/util/flag_tags.h"
+#include "kudu/util/logging.h"
+#include "kudu/util/slice.h"
+#include "kudu/util/status.h"
+#include "kudu/util/test_macros.h"
+#include "kudu/util/test_util.h"
+
+// Test gflags
+DEFINE_string(test_nondefault_ff, "default",
+             "Check if we track non defaults from flagfile");
+DEFINE_string(test_nondefault_explicit, "default",
+             "Check if we track explicitly set non defaults");
+DEFINE_string(test_default_ff, "default",
+             "Check if we track defaults from flagfile");
+DEFINE_string(test_default_explicit, "default",
+             "Check if we track explicitly set defaults");
+DEFINE_bool(test_sensitive_flag, false, "a sensitive flag");
+TAG_FLAG(test_sensitive_flag, sensitive);
+
+DECLARE_bool(never_fsync);
+
+namespace kudu {
+
+class FlagsTest : public KuduTest {};
+
+TEST_F(FlagsTest, TestNonDefaultFlags) {
+  // Memorize the default flags
+  GFlagsMap default_flags = GetFlagsMap();
+
+  std::string flagfile_path(GetTestPath("test_nondefault_flags"));
+  std::string flagfile_contents = "--test_nondefault_ff=nondefault\n"
+                                  "--test_default_ff=default";
+
+  CHECK_OK(WriteStringToFile(Env::Default(),
+                             Slice(flagfile_contents.data(),
+                                   flagfile_contents.size()),
+                             flagfile_path));
+
+  std::string flagfile_flag = strings::Substitute("--flagfile=$0", 
flagfile_path);
+  int argc = 4;
+  const char* argv[4] = {
+    "some_executable_file",
+    "--test_nondefault_explicit=nondefault",
+    "--test_default_explicit=default",
+    flagfile_flag.c_str()
+  };
+
+  char** casted_argv = const_cast<char**>(argv);
+  ParseCommandLineFlags(&argc, &casted_argv, true);
+
+  std::vector<const char*> expected_flags = {
+    "--test_nondefault_explicit=nondefault",
+    "--test_nondefault_ff=nondefault",
+    flagfile_flag.c_str()
+  };
+
+  std::vector<const char*> unexpected_flags = {
+    "--test_default_explicit",
+    "--test_default_ff"
+  };
+
+  // Setting a sensitive flag with non-default value should return
+  // a redacted value.
+  FLAGS_test_sensitive_flag = true;
+  kudu::g_should_redact = kudu::RedactContext::LOG;
+  std::string result = GetNonDefaultFlags(default_flags);
+
+  for (const auto& expected : expected_flags) {
+    ASSERT_STR_CONTAINS(result, expected);
+  }
+
+  for (const auto& unexpected : unexpected_flags) {
+    ASSERT_STR_NOT_CONTAINS(result, unexpected);
+  }
+
+  ASSERT_STR_CONTAINS(result, strings::Substitute("--test_sensitive_flag=$0",
+                                                  kRedactionMessage));
+}
+
+} // namespace kudu

http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/flags.cc
----------------------------------------------------------------------
diff --git a/be/src/kudu/util/flags.cc b/be/src/kudu/util/flags.cc
new file mode 100644
index 0000000..047c893
--- /dev/null
+++ b/be/src/kudu/util/flags.cc
@@ -0,0 +1,604 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "kudu/util/flags.h"
+
+
+#include <cstdlib>
+#include <functional>
+#include <iostream>
+#include <string>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include <sys/stat.h>
+#include <unistd.h> // IWYU pragma: keep
+
+#include <boost/algorithm/string/predicate.hpp>
+#include <gflags/gflags.h>
+#include <gflags/gflags_declare.h>
+#include <glog/logging.h>
+#ifdef TCMALLOC_ENABLED
+#include <gperftools/heap-profiler.h>
+#endif
+
+#include "kudu/gutil/macros.h"
+#include "kudu/gutil/map-util.h"
+#include "kudu/gutil/stringprintf.h"
+#include "kudu/gutil/strings/join.h"
+#include "kudu/gutil/strings/numbers.h"
+#include "kudu/gutil/strings/split.h"
+#include "kudu/gutil/strings/stringpiece.h"
+#include "kudu/gutil/strings/substitute.h"
+#include "kudu/util/flag_tags.h"
+#include "kudu/util/flag_validators.h"
+#include "kudu/util/logging.h"
+#include "kudu/util/metrics.h"
+#include "kudu/util/os-util.h"
+#include "kudu/util/path_util.h"
+#include "kudu/util/string_case.h"
+#include "kudu/util/url-coding.h"
+#include "kudu/util/version_info.h"
+
+using google::CommandLineFlagInfo;
+
+using std::cout;
+using std::endl;
+using std::string;
+using std::ostringstream;
+using std::unordered_set;
+using std::vector;
+
+using strings::Substitute;
+
+// Because every binary initializes its flags here, we use it as a convenient 
place
+// to offer some global flags as well.
+DEFINE_bool(dump_metrics_json, false,
+            "Dump a JSON document describing all of the metrics which may be 
emitted "
+            "by this binary.");
+TAG_FLAG(dump_metrics_json, hidden);
+
+#ifdef TCMALLOC_ENABLED
+DEFINE_bool(enable_process_lifetime_heap_profiling, false, "Enables heap "
+    "profiling for the lifetime of the process. Profile output will be stored 
in the "
+    "directory specified by -heap_profile_path.");
+TAG_FLAG(enable_process_lifetime_heap_profiling, stable);
+TAG_FLAG(enable_process_lifetime_heap_profiling, advanced);
+
+DEFINE_string(heap_profile_path, "", "Output path to store heap profiles. If 
not set " \
+    "profiles are stored in /tmp/<process-name>.<pid>.<n>.heap.");
+TAG_FLAG(heap_profile_path, stable);
+TAG_FLAG(heap_profile_path, advanced);
+
+DEFINE_int64(heap_sample_every_n_bytes, 0,
+             "Enable heap occupancy sampling. If this flag is set to some 
positive "
+             "value N, a memory allocation will be sampled approximately every 
N bytes. "
+             "Lower values of N incur larger overhead but give more accurate 
results. "
+             "A value such as 524288 (512KB) is a reasonable choice with 
relatively "
+             "low overhead.");
+TAG_FLAG(heap_sample_every_n_bytes, advanced);
+TAG_FLAG(heap_sample_every_n_bytes, experimental);
+#endif
+
+DEFINE_bool(disable_core_dumps, false, "Disable core dumps when this process 
crashes.");
+TAG_FLAG(disable_core_dumps, advanced);
+TAG_FLAG(disable_core_dumps, evolving);
+
+DEFINE_string(umask, "077",
+              "The umask that will be used when creating files and 
directories. "
+              "Permissions of top-level data directories will also be modified 
at "
+              "start-up to conform to the given umask. Changing this value may 
"
+              "enable unauthorized local users to read or modify data stored 
by Kudu.");
+TAG_FLAG(umask, advanced);
+
+static bool ValidateUmask(const char* /*flagname*/, const string& value) {
+  uint32_t parsed;
+  if (!safe_strtou32_base(value.c_str(), &parsed, 8)) {
+    LOG(ERROR) << "Invalid umask: must be an octal string";
+    return false;
+  }
+
+  // Verify that the umask doesn't restrict the permissions of the owner.
+  // If it did, we'd end up creating files that we can't read.
+  if ((parsed & 0700) != 0) {
+    LOG(ERROR) << "Invalid umask value: must not restrict owner permissions";
+    return false;
+  }
+  return true;
+}
+
+DEFINE_validator(umask, &ValidateUmask);
+
+DEFINE_bool(unlock_experimental_flags, false,
+            "Unlock flags marked as 'experimental'. These flags are not 
guaranteed to "
+            "be maintained across releases of Kudu, and may enable features or 
behavior "
+            "known to be unstable. Use at your own risk.");
+TAG_FLAG(unlock_experimental_flags, advanced);
+TAG_FLAG(unlock_experimental_flags, stable);
+
+DEFINE_bool(unlock_unsafe_flags, false,
+            "Unlock flags marked as 'unsafe'. These flags are not guaranteed 
to "
+            "be maintained across releases of Kudu, and enable features or 
behavior "
+            "known to be unsafe. Use at your own risk.");
+TAG_FLAG(unlock_unsafe_flags, advanced);
+TAG_FLAG(unlock_unsafe_flags, stable);
+
+DEFINE_string(redact, "all",
+              "Comma-separated list that controls redaction context. Supported 
options "
+              "are 'all','log', and 'none'. If 'all' is specified, sensitive 
data "
+              "(sensitive configuration flags and row data) will be redacted 
from "
+              "the web UI as well as glog and error messages. If 'log' is 
specified, "
+              "sensitive data will only be redacted from glog and error 
messages. "
+              "If 'none' is specified, no redaction will occur.");
+TAG_FLAG(redact, advanced);
+TAG_FLAG(redact, evolving);
+
+static bool ValidateRedact(const char* /*flagname*/, const string& value) {
+  kudu::g_should_redact = kudu::RedactContext::NONE;
+
+  // Flag value is case insensitive.
+  string redact_flags;
+  kudu::ToUpperCase(value, &redact_flags);
+
+  // 'all', 'none', and '' must be specified without any other option.
+  if (redact_flags == "ALL") {
+    kudu::g_should_redact = kudu::RedactContext::ALL;
+    return true;
+  }
+  if (redact_flags == "NONE" || redact_flags.empty()) {
+    return true;
+  }
+
+  for (const auto& t : strings::Split(redact_flags, ",", 
strings::SkipEmpty())) {
+    if (t == "LOG") {
+      kudu::g_should_redact = kudu::RedactContext::LOG;
+    } else if (t == "ALL" || t == "NONE") {
+      LOG(ERROR) << "Invalid redaction options: "
+                 << value << ", '" << t << "' must be specified by itself.";
+      return false;
+    } else {
+      LOG(ERROR) << "Invalid redaction context: " << t <<
+                    ". Available types are 'all', 'log', and 'none'.";
+      return false;
+    }
+  }
+  return true;
+}
+
+DEFINE_validator(redact, &ValidateRedact);
+// Tag a bunch of the flags that we inherit from glog/gflags.
+
+//------------------------------------------------------------
+// GLog flags
+//------------------------------------------------------------
+// Most of these are considered stable. The ones related to email are
+// marked unsafe because sending email inline from a server is a pretty
+// bad idea.
+DECLARE_string(alsologtoemail);
+TAG_FLAG(alsologtoemail, hidden);
+TAG_FLAG(alsologtoemail, unsafe);
+
+// --alsologtostderr is deprecated in favor of --stderrthreshold
+DECLARE_bool(alsologtostderr);
+TAG_FLAG(alsologtostderr, hidden);
+TAG_FLAG(alsologtostderr, runtime);
+
+DECLARE_bool(colorlogtostderr);
+TAG_FLAG(colorlogtostderr, stable);
+TAG_FLAG(colorlogtostderr, runtime);
+
+DECLARE_bool(drop_log_memory);
+TAG_FLAG(drop_log_memory, advanced);
+TAG_FLAG(drop_log_memory, runtime);
+
+DECLARE_string(log_backtrace_at);
+TAG_FLAG(log_backtrace_at, advanced);
+
+DECLARE_string(log_dir);
+TAG_FLAG(log_dir, stable);
+
+DECLARE_string(log_link);
+TAG_FLAG(log_link, stable);
+TAG_FLAG(log_link, advanced);
+
+DECLARE_bool(log_prefix);
+TAG_FLAG(log_prefix, stable);
+TAG_FLAG(log_prefix, advanced);
+TAG_FLAG(log_prefix, runtime);
+
+DECLARE_int32(logbuflevel);
+TAG_FLAG(logbuflevel, advanced);
+TAG_FLAG(logbuflevel, runtime);
+DECLARE_int32(logbufsecs);
+TAG_FLAG(logbufsecs, advanced);
+TAG_FLAG(logbufsecs, runtime);
+
+DECLARE_int32(logemaillevel);
+TAG_FLAG(logemaillevel, hidden);
+TAG_FLAG(logemaillevel, unsafe);
+
+DECLARE_string(logmailer);
+TAG_FLAG(logmailer, hidden);
+
+DECLARE_bool(logtostderr);
+TAG_FLAG(logtostderr, stable);
+TAG_FLAG(logtostderr, runtime);
+
+DECLARE_int32(max_log_size);
+TAG_FLAG(max_log_size, stable);
+TAG_FLAG(max_log_size, runtime);
+
+DECLARE_int32(minloglevel);
+TAG_FLAG(minloglevel, stable);
+TAG_FLAG(minloglevel, advanced);
+TAG_FLAG(minloglevel, runtime);
+
+DECLARE_int32(stderrthreshold);
+TAG_FLAG(stderrthreshold, stable);
+TAG_FLAG(stderrthreshold, advanced);
+TAG_FLAG(stderrthreshold, runtime);
+
+DECLARE_bool(stop_logging_if_full_disk);
+TAG_FLAG(stop_logging_if_full_disk, stable);
+TAG_FLAG(stop_logging_if_full_disk, advanced);
+TAG_FLAG(stop_logging_if_full_disk, runtime);
+
+DECLARE_int32(v);
+TAG_FLAG(v, stable);
+TAG_FLAG(v, advanced);
+TAG_FLAG(v, runtime);
+
+DECLARE_string(vmodule);
+TAG_FLAG(vmodule, stable);
+TAG_FLAG(vmodule, advanced);
+
+DECLARE_bool(symbolize_stacktrace);
+TAG_FLAG(symbolize_stacktrace, stable);
+TAG_FLAG(symbolize_stacktrace, runtime);
+TAG_FLAG(symbolize_stacktrace, advanced);
+
+//------------------------------------------------------------
+// GFlags flags
+//------------------------------------------------------------
+DECLARE_string(flagfile);
+TAG_FLAG(flagfile, stable);
+
+DECLARE_string(fromenv);
+TAG_FLAG(fromenv, stable);
+TAG_FLAG(fromenv, advanced);
+
+DECLARE_string(tryfromenv);
+TAG_FLAG(tryfromenv, stable);
+TAG_FLAG(tryfromenv, advanced);
+
+DECLARE_string(undefok);
+TAG_FLAG(undefok, stable);
+TAG_FLAG(undefok, advanced);
+
+DECLARE_int32(tab_completion_columns);
+TAG_FLAG(tab_completion_columns, stable);
+TAG_FLAG(tab_completion_columns, hidden);
+
+DECLARE_string(tab_completion_word);
+TAG_FLAG(tab_completion_word, stable);
+TAG_FLAG(tab_completion_word, hidden);
+
+DECLARE_bool(help);
+TAG_FLAG(help, stable);
+
+DECLARE_bool(helpfull);
+// We hide -helpfull because it's the same as -help for now.
+TAG_FLAG(helpfull, stable);
+TAG_FLAG(helpfull, hidden);
+
+DECLARE_string(helpmatch);
+TAG_FLAG(helpmatch, stable);
+TAG_FLAG(helpmatch, advanced);
+
+DECLARE_string(helpon);
+TAG_FLAG(helpon, stable);
+TAG_FLAG(helpon, advanced);
+
+DECLARE_bool(helppackage);
+TAG_FLAG(helppackage, stable);
+TAG_FLAG(helppackage, advanced);
+
+DECLARE_bool(helpshort);
+TAG_FLAG(helpshort, stable);
+TAG_FLAG(helpshort, advanced);
+
+DECLARE_bool(helpxml);
+TAG_FLAG(helpxml, stable);
+TAG_FLAG(helpxml, advanced);
+
+DECLARE_bool(version);
+TAG_FLAG(version, stable);
+
+//------------------------------------------------------------
+// TCMalloc flags.
+// These are tricky because tcmalloc doesn't use gflags. So we have to
+// reach into its internal namespace.
+//------------------------------------------------------------
+#define TCM_NAMESPACE 
FLAG__namespace_do_not_use_directly_use_DECLARE_int64_instead
+namespace TCM_NAMESPACE {
+extern int64_t FLAGS_tcmalloc_sample_parameter;
+} // namespace TCM_NAMESPACE
+
+namespace kudu {
+
+// After flags have been parsed, the umask value is filled in here.
+uint32_t g_parsed_umask = -1;
+
+namespace {
+
+void AppendXMLTag(const char* tag, const string& txt, string* r) {
+  strings::SubstituteAndAppend(r, "<$0>$1</$0>", tag, 
EscapeForHtmlToString(txt));
+}
+
+static string DescribeOneFlagInXML(const CommandLineFlagInfo& flag) {
+  unordered_set<string> tags;
+  GetFlagTags(flag.name, &tags);
+
+  string r("<flag>");
+  AppendXMLTag("file", flag.filename, &r);
+  AppendXMLTag("name", flag.name, &r);
+  AppendXMLTag("meaning", flag.description, &r);
+  AppendXMLTag("default", flag.default_value, &r);
+  AppendXMLTag("current", flag.current_value, &r);
+  AppendXMLTag("type", flag.type, &r);
+  AppendXMLTag("tags", JoinStrings(tags, ","), &r);
+  r += "</flag>";
+  return r;
+}
+
+void DumpFlagsXML() {
+  vector<CommandLineFlagInfo> flags;
+  GetAllFlags(&flags);
+
+  cout << "<?xml version=\"1.0\"?>" << endl;
+  cout << "<AllFlags>" << endl;
+  cout << strings::Substitute(
+      "<program>$0</program>",
+      EscapeForHtmlToString(BaseName(google::ProgramInvocationShortName()))) 
<< endl;
+  cout << strings::Substitute(
+      "<usage>$0</usage>",
+      EscapeForHtmlToString(google::ProgramUsage())) << endl;
+
+  for (const CommandLineFlagInfo& flag : flags) {
+    cout << DescribeOneFlagInXML(flag) << endl;
+  }
+
+  cout << "</AllFlags>" << endl;
+}
+
+// Check that, if any flags tagged with 'tag' have been specified to
+// non-default values, that 'unlocked' is true. If so (i.e. if the
+// flags have been appropriately unlocked), emits a warning message
+// for each flag and returns false. Otherwise, emits an error message
+// and returns true.
+bool CheckFlagsAndWarn(const string& tag, bool unlocked) {
+  vector<CommandLineFlagInfo> flags;
+  GetAllFlags(&flags);
+
+  int use_count = 0;
+  for (const auto& f : flags) {
+    if (f.is_default) continue;
+    unordered_set<string> tags;
+    GetFlagTags(f.name, &tags);
+    if (!ContainsKey(tags, tag)) continue;
+
+    if (unlocked) {
+      LOG(WARNING) << "Enabled " << tag << " flag: --" << f.name << "=" << 
f.current_value;
+    } else {
+      LOG(ERROR) << "Flag --" << f.name << " is " << tag << " and 
unsupported.";
+      use_count++;
+    }
+  }
+
+  if (!unlocked && use_count > 0) {
+    LOG(ERROR) << use_count << " " << tag << " flag(s) in use.";
+    LOG(ERROR) << "Use --unlock_" << tag << "_flags to proceed at your own 
risk.";
+    return true;
+  }
+  return false;
+}
+
+// Check that any flags specified on the command line are allowed
+// to be set. This ensures that, if the user is using any unsafe
+// or experimental flags, they have explicitly unlocked them.
+void CheckFlagsAllowed() {
+  bool should_exit = false;
+  should_exit |= CheckFlagsAndWarn("unsafe", FLAGS_unlock_unsafe_flags);
+  should_exit |= CheckFlagsAndWarn("experimental", 
FLAGS_unlock_experimental_flags);
+  if (should_exit) {
+    exit(1);
+  }
+}
+
+// Run 'late phase' custom validators: these can be run only when all flags are
+// already parsed and individually validated.
+void RunCustomValidators() {
+  const auto& validators(GetFlagValidators());
+  bool found_inconsistency = false;
+  for (const auto& e : validators) {
+    found_inconsistency |= !e.second();
+  }
+  if (found_inconsistency) {
+    LOG(ERROR) << "Detected inconsistency in command-line flags; exiting";
+    exit(1);
+  }
+}
+
+void SetUmask() {
+  // We already validated with a nice error message using the ValidateUmask
+  // FlagValidator above.
+  CHECK(safe_strtou32_base(FLAGS_umask.c_str(), &g_parsed_umask, 8));
+  uint32_t old_mask = umask(g_parsed_umask);
+  if (old_mask != g_parsed_umask) {
+    VLOG(2) << "Changed umask from " << StringPrintf("%03o", old_mask) << " to 
"
+            << StringPrintf("%03o", g_parsed_umask);
+  }
+}
+
+} // anonymous namespace
+
+// If --redact indicates, redact the flag tagged as 'sensitive'.
+// Otherwise, return its value as-is. If EscapeMode is set to HTML,
+// return HTML escaped string.
+string CheckFlagAndRedact(const CommandLineFlagInfo& flag, EscapeMode mode) {
+  string ret_value;
+  unordered_set<string> tags;
+  GetFlagTags(flag.name, &tags);
+
+  if (ContainsKey(tags, "sensitive") && KUDU_SHOULD_REDACT()) {
+    ret_value = kRedactionMessage;
+  } else {
+    ret_value = flag.current_value;
+  }
+  if (mode == EscapeMode::HTML) {
+    ret_value = EscapeForHtmlToString(ret_value);
+  }
+  return ret_value;
+}
+
+int ParseCommandLineFlags(int* argc, char*** argv, bool remove_flags) {
+  // The logbufsecs default is 30 seconds which is a bit too long.
+  google::SetCommandLineOptionWithMode("logbufsecs", "5",
+                                       
google::FlagSettingMode::SET_FLAGS_DEFAULT);
+
+  int ret = google::ParseCommandLineNonHelpFlags(argc, argv, remove_flags);
+  HandleCommonFlags();
+  return ret;
+}
+
+void HandleCommonFlags() {
+  if (FLAGS_helpxml) {
+    DumpFlagsXML();
+    exit(1);
+  } else if (FLAGS_dump_metrics_json) {
+    MetricPrototypeRegistry::get()->WriteAsJson();
+    exit(0);
+  } else if (FLAGS_version) {
+    cout << VersionInfo::GetAllVersionInfo() << endl;
+    exit(0);
+  }
+
+  google::HandleCommandLineHelpFlags();
+  CheckFlagsAllowed();
+  RunCustomValidators();
+
+  if (FLAGS_disable_core_dumps) {
+    DisableCoreDumps();
+  }
+
+  SetUmask();
+
+#ifdef TCMALLOC_ENABLED
+  if (FLAGS_heap_profile_path.empty()) {
+    FLAGS_heap_profile_path = strings::Substitute(
+        "/tmp/$0.$1", google::ProgramInvocationShortName(), getpid());
+  }
+
+  if (FLAGS_enable_process_lifetime_heap_profiling) {
+    HeapProfilerStart(FLAGS_heap_profile_path.c_str());
+  }
+  // Set the internal tcmalloc flag unless it was already set using the 
built-in
+  // environment-variable-based method. It doesn't appear that this is settable
+  // in any less hacky fashion.
+  if (!getenv("TCMALLOC_SAMPLE_PARAMETER")) {
+    TCM_NAMESPACE::FLAGS_tcmalloc_sample_parameter = 
FLAGS_heap_sample_every_n_bytes;
+  } else if 
(!google::GetCommandLineFlagInfoOrDie("heap_sample_every_n_bytes").is_default) {
+    LOG(ERROR) << "Heap sampling configured using both 
--heap-sample-every-n-bytes and "
+               << "TCMALLOC_SAMPLE_PARAMETER. Ignoring command line flag.";
+  }
+#endif
+}
+
+string CommandlineFlagsIntoString(EscapeMode mode) {
+  string ret_value;
+  vector<CommandLineFlagInfo> flags;
+  GetAllFlags(&flags);
+
+  for (const auto& f : flags) {
+    ret_value += "--";
+    if (mode == EscapeMode::HTML) {
+      ret_value += EscapeForHtmlToString(f.name);
+    } else if (mode == EscapeMode::NONE) {
+      ret_value += f.name;
+    }
+    ret_value += "=";
+    ret_value += CheckFlagAndRedact(f, mode);
+    ret_value += "\n";
+  }
+  return ret_value;
+}
+
+string GetNonDefaultFlags(const GFlagsMap& default_flags) {
+  ostringstream args;
+  vector<CommandLineFlagInfo> flags;
+  GetAllFlags(&flags);
+  for (const auto& flag : flags) {
+    if (!flag.is_default) {
+      // This only means that the flag has been rewritten. It doesn't
+      // mean that this has been done in the command line, or even
+      // that it's truly different from the default value.
+      // Next, we try to check both.
+      const auto& default_flag = default_flags.find(flag.name);
+      // it's very unlikely, but still possible that we don't have the flag in 
defaults
+      if (default_flag == default_flags.end() ||
+          flag.current_value != default_flag->second.current_value) {
+        if (!args.str().empty()) {
+          args << '\n';
+        }
+
+        // Redact the flags tagged as sensitive, if redaction is enabled.
+        string flag_value = CheckFlagAndRedact(flag, EscapeMode::NONE);
+        args << "--" << flag.name << '=' << flag_value;
+      }
+    }
+  }
+  return args.str();
+}
+
+GFlagsMap GetFlagsMap() {
+  vector<CommandLineFlagInfo> default_flags;
+  GetAllFlags(&default_flags);
+  GFlagsMap flags_by_name;
+  for (auto& flag : default_flags) {
+    flags_by_name.emplace(flag.name, std::move(flag));
+  }
+  return flags_by_name;
+}
+
+Status ParseTriState(const char* flag_name, const std::string& flag_value,
+    TriStateFlag* tri_state) {
+  if (boost::iequals(flag_value, "required")) {
+    *tri_state = TriStateFlag::REQUIRED;
+  } else if (boost::iequals(flag_value, "optional")) {
+    *tri_state = TriStateFlag::OPTIONAL;
+  } else if (boost::iequals(flag_value, "disabled")) {
+    *tri_state = TriStateFlag::DISABLED;
+  } else {
+    return Status::InvalidArgument(strings::Substitute(
+          "$0 flag must be one of 'required', 'optional', or 'disabled'",
+          flag_name));
+  }
+  return Status::OK();
+}
+
+} // namespace kudu

http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/flags.h
----------------------------------------------------------------------
diff --git a/be/src/kudu/util/flags.h b/be/src/kudu/util/flags.h
new file mode 100644
index 0000000..83cb152
--- /dev/null
+++ b/be/src/kudu/util/flags.h
@@ -0,0 +1,89 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+#ifndef KUDU_UTIL_FLAGS_H
+#define KUDU_UTIL_FLAGS_H
+
+#include <cstdint>
+#include <string>
+#include <unordered_map>
+
+#include "kudu/util/status.h"
+
+namespace google {
+  struct CommandLineFlagInfo;
+}
+
+namespace kudu {
+
+// The umask of the process, set based on the --umask flag during
+// HandleCommonFlags().
+extern uint32_t g_parsed_umask;
+
+// Looks for flags in argv and parses them.  Rearranges argv to put
+// flags first, or removes them entirely if remove_flags is true.
+// If a flag is defined more than once in the command line or flag
+// file, the last definition is used.  Returns the index (into argv)
+// of the first non-flag argument.
+//
+// This is a wrapper around google::ParseCommandLineFlags, but integrates
+// with Kudu flag tags. For example, --helpxml will include the list of
+// tags for each flag. This should be be used instead of
+// google::ParseCommandLineFlags in any user-facing binary.
+//
+// See gflags.h for more information.
+int ParseCommandLineFlags(int* argc, char*** argv, bool remove_flags);
+
+// Handle common flags such as -version, -disable_core_dumps, etc.
+// This includes the GFlags common flags such as "-help".
+//
+// Requires that flags have already been parsed using
+// google::ParseCommandLineNonHelpFlags().
+void HandleCommonFlags();
+
+enum class EscapeMode {
+  HTML,
+  NONE
+};
+
+// Stick the flags into a string. If redaction is enabled, the values of
+// flags tagged as sensitive will be redacted. Otherwise, the values
+// will be written to the string as-is. The values will be HTML escaped
+// if EscapeMode is HTML.
+std::string CommandlineFlagsIntoString(EscapeMode mode);
+
+typedef std::unordered_map<std::string, google::CommandLineFlagInfo> GFlagsMap;
+
+// Get all the flags different from their defaults. The output is a nicely
+// formatted string with --flag=value pairs per line. Redact any flags that
+// are tagged as sensitive, if redaction is enabled.
+std::string GetNonDefaultFlags(const GFlagsMap& default_flags);
+
+GFlagsMap GetFlagsMap();
+
+enum class TriStateFlag {
+  DISABLED,
+  OPTIONAL,
+  REQUIRED,
+};
+
+Status ParseTriState(const char* flag_name, const std::string& flag_value,
+    TriStateFlag* tri_state);
+
+std::string CheckFlagAndRedact(const google::CommandLineFlagInfo& flag, 
EscapeMode mode);
+
+} // namespace kudu
+#endif /* KUDU_UTIL_FLAGS_H */

http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/group_varint-inl.h
----------------------------------------------------------------------
diff --git a/be/src/kudu/util/group_varint-inl.h 
b/be/src/kudu/util/group_varint-inl.h
new file mode 100644
index 0000000..27e289f
--- /dev/null
+++ b/be/src/kudu/util/group_varint-inl.h
@@ -0,0 +1,294 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+#ifndef KUDU_UTIL_GROUP_VARINT_INL_H
+#define KUDU_UTIL_GROUP_VARINT_INL_H
+
+#include <emmintrin.h>
+#ifdef __linux__
+#include <endian.h>
+#endif
+#include <smmintrin.h>
+#include <tmmintrin.h>
+#include <xmmintrin.h>
+
+#include <cstdint>
+#include <cstring>
+
+#include <boost/utility/binary.hpp>
+#include <boost/parameter/name.hpp>
+#include <boost/preprocessor/arithmetic/dec.hpp>
+#include <boost/preprocessor/arithmetic/inc.hpp>
+#include <boost/preprocessor/control/iif.hpp>
+#include <boost/preprocessor/control/while.hpp>
+#include <boost/preprocessor/list/fold_left.hpp>
+#include <boost/preprocessor/logical/bitand.hpp>
+#include <boost/preprocessor/logical/bool.hpp>
+#include <boost/preprocessor/logical/compl.hpp>
+#include <boost/preprocessor/seq/elem.hpp>
+#include <boost/preprocessor/seq/fold_left.hpp>
+#include <boost/preprocessor/seq/size.hpp>
+#include <boost/preprocessor/tuple/elem.hpp>
+#include <boost/preprocessor/variadic/elem.hpp>
+#include <glog/logging.h>
+
+#ifndef __linux__
+#include "kudu/gutil/port.h"
+#endif
+#include "kudu/util/faststring.h"
+
+namespace kudu {
+namespace coding {
+
+extern bool SSE_TABLE_INITTED;
+extern uint8_t SSE_TABLE[256 * 16] __attribute__((aligned(16)));
+extern uint8_t VARINT_SELECTOR_LENGTHS[256];
+
+const uint32_t MASKS[4] = { 0xff, 0xffff, 0xffffff, 0xffffffff };
+
+
+// Calculate the number of bytes to encode the given unsigned int.
+inline size_t CalcRequiredBytes32(uint32_t i) {
+  // | 1 because the result is undefined for the 0 case
+  return sizeof(uint32_t) - __builtin_clz(i|1)/8;
+}
+
+// Decode a set of 4 group-varint encoded integers from the given pointer.
+//
+// Requires that there are at up to 3 extra bytes remaining in 'src' after
+// the last integer.
+//
+// Returns a pointer following the last decoded integer.
+inline const uint8_t *DecodeGroupVarInt32(
+  const uint8_t *src,
+  uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d) {
+
+  uint8_t a_sel = (*src & BOOST_BINARY(11 00 00 00)) >> 6;
+  uint8_t b_sel = (*src & BOOST_BINARY(00 11 00 00)) >> 4;
+  uint8_t c_sel = (*src & BOOST_BINARY(00 00 11 00)) >> 2;
+  uint8_t d_sel = (*src & BOOST_BINARY(00 00 00 11 ));
+
+  src++; // skip past selector byte
+
+  *a = *reinterpret_cast<const uint32_t *>(src) & MASKS[a_sel];
+  src += a_sel + 1;
+
+  *b = *reinterpret_cast<const uint32_t *>(src) & MASKS[b_sel];
+  src += b_sel + 1;
+
+  *c = *reinterpret_cast<const uint32_t *>(src) & MASKS[c_sel];
+  src += c_sel + 1;
+
+  *d = *reinterpret_cast<const uint32_t *>(src) & MASKS[d_sel];
+  src += d_sel + 1;
+
+  return src;
+}
+
+// Decode total length of the encoded integers from the given pointer,
+// include the tag byte.
+inline size_t DecodeGroupVarInt32_GetGroupSize(const uint8_t *src) {
+  return VARINT_SELECTOR_LENGTHS[*src] + 1;
+}
+
+// Decode a set of 4 group-varint encoded integers from the given pointer.
+//
+// Returns a pointer following the last decoded integer.
+inline const uint8_t *DecodeGroupVarInt32_SlowButSafe(
+  const uint8_t *src,
+  uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d) {
+
+  // VARINT_SELECTOR_LENGTHS[] isn't initialized until SSE_TABLE_INITTED is 
true
+  DCHECK(SSE_TABLE_INITTED);
+
+  const size_t total_len = DecodeGroupVarInt32_GetGroupSize(src);
+
+  uint8_t safe_buf[17];
+  memcpy(safe_buf, src, total_len);
+  DecodeGroupVarInt32(safe_buf, a, b, c, d);
+  return src + total_len;
+}
+
+
+inline void DoExtractM128(__m128i results,
+                          uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d) {
+#define SSE_USE_EXTRACT_PS
+#ifdef SSE_USE_EXTRACT_PS
+  // _mm_extract_ps turns into extractps, which is slightly faster
+  // than _mm_extract_epi32 (which turns into pextrd)
+  // Apparently pextrd involves one more micro-op
+  // than extractps.
+  //
+  // A uint32 cfile macro-benchmark is about 3% faster with this code path.
+  *a = _mm_extract_ps((__v4sf)results, 0);
+  *b = _mm_extract_ps((__v4sf)results, 1);
+  *c = _mm_extract_ps((__v4sf)results, 2);
+  *d = _mm_extract_ps((__v4sf)results, 3);
+#else
+  *a = _mm_extract_epi32(results, 0);
+  *b = _mm_extract_epi32(results, 1);
+  *c = _mm_extract_epi32(results, 2);
+  *d = _mm_extract_epi32(results, 3);
+#endif
+}
+
+// Same as above, but uses SSE so may be faster.
+// TODO: remove this and just automatically pick the right implementation at 
runtime.
+//
+// NOTE: the src buffer must be have at least 17 bytes remaining in it, so this
+// code path is not usable at the end of a block.
+inline const uint8_t *DecodeGroupVarInt32_SSE(
+  const uint8_t *src,
+  uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d) {
+
+  DCHECK(SSE_TABLE_INITTED);
+
+  uint8_t sel_byte = *src++;
+  __m128i shuffle_mask = _mm_load_si128(
+    reinterpret_cast<__m128i *>(&SSE_TABLE[sel_byte * 16]));
+  __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src));
+
+  __m128i results = _mm_shuffle_epi8(data, shuffle_mask);
+
+  // It would look like the following would be most efficient,
+  // since it turns into a single movdqa instruction:
+  //   *reinterpret_cast<__m128i *>(ret) = results;
+  // (where ret is an aligned array of ints, which the user must pass)
+  // but it is actually slower than the below alternatives by a
+  // good amount -- even though these result in more instructions.
+  DoExtractM128(results, a, b, c, d);
+  src += VARINT_SELECTOR_LENGTHS[sel_byte];
+
+  return src;
+}
+
+// Optimized function which decodes a group of uint32s from 'src' into 'ret',
+// which should have enough space for 4 uint32s. During decoding, adds 'add'
+// to the vector in parallel.
+//
+// NOTE: the src buffer must be have at least 17 bytes remaining in it, so this
+// code path is not usable at the end of a block.
+inline const uint8_t *DecodeGroupVarInt32_SSE_Add(
+  const uint8_t *src,
+  uint32_t *ret,
+  __m128i add) {
+
+  DCHECK(SSE_TABLE_INITTED);
+
+  uint8_t sel_byte = *src++;
+  __m128i shuffle_mask = _mm_load_si128(
+    reinterpret_cast<__m128i *>(&SSE_TABLE[sel_byte * 16]));
+  __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src));
+
+  __m128i decoded_deltas = _mm_shuffle_epi8(data, shuffle_mask);
+  __m128i results = _mm_add_epi32(decoded_deltas, add);
+
+  DoExtractM128(results, &ret[0], &ret[1], &ret[2], &ret[3]);
+
+  src += VARINT_SELECTOR_LENGTHS[sel_byte];
+  return src;
+}
+
+
+// Append a set of group-varint encoded integers to the given faststring.
+inline void AppendGroupVarInt32(
+  faststring *s,
+  uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
+
+  uint8_t a_tag = CalcRequiredBytes32(a) - 1;
+  uint8_t b_tag = CalcRequiredBytes32(b) - 1;
+  uint8_t c_tag = CalcRequiredBytes32(c) - 1;
+  uint8_t d_tag = CalcRequiredBytes32(d) - 1;
+
+  uint8_t prefix_byte =
+    (a_tag << 6) |
+    (b_tag << 4) |
+    (c_tag << 2) |
+    (d_tag);
+
+  uint8_t size = 1 +
+    a_tag + 1 +
+    b_tag + 1 +
+    c_tag + 1 +
+    d_tag + 1;
+
+  size_t old_size = s->size();
+
+  // Reserving 4 extra bytes means we can use simple
+  // 4-byte stores instead of variable copies here --
+  // if we hang off the end of the array into the "empty" area, it's OK.
+  // We'll chop it back off down below.
+  s->resize(old_size + size + 4);
+  uint8_t *ptr = &((*s)[old_size]);
+
+#if __BYTE_ORDER != __LITTLE_ENDIAN
+#error dont support big endian currently
+#endif
+
+  *ptr++ = prefix_byte;
+  memcpy(ptr, &a, 4);
+  ptr += a_tag + 1;
+  memcpy(ptr, &b, 4);
+  ptr += b_tag + 1;
+  memcpy(ptr, &c, 4);
+  ptr += c_tag + 1;
+  memcpy(ptr, &d, 4);
+
+  s->resize(old_size + size);
+}
+
+// Append a sequence of uint32s encoded using group-varint.
+//
+// 'frame_of_reference' is also subtracted from each integer
+// before encoding.
+//
+// If frame_of_reference is greater than any element in the array,
+// results are undefined.
+//
+// For best performance, users should already have reserved adequate
+// space in 's' (CalcRequiredBytes32 can be handy here)
+inline void AppendGroupVarInt32Sequence(faststring *s, uint32_t 
frame_of_reference,
+                                        uint32_t *ints, size_t size) {
+  uint32_t *p = ints;
+  while (size >= 4) {
+    AppendGroupVarInt32(s,
+                        p[0] - frame_of_reference,
+                        p[1] - frame_of_reference,
+                        p[2] - frame_of_reference,
+                        p[3] - frame_of_reference);
+    size -= 4;
+    p += 4;
+  }
+
+
+  uint32_t trailer[4] = {0, 0, 0, 0};
+  uint32_t *trailer_p = &trailer[0];
+
+  if (size > 0) {
+    while (size > 0) {
+      *trailer_p++ = *p++ - frame_of_reference;
+      size--;
+    }
+
+    AppendGroupVarInt32(s, trailer[0], trailer[1], trailer[2], trailer[3]);
+  }
+}
+
+
+} // namespace coding
+} // namespace kudu
+
+#endif

http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/group_varint-test.cc
----------------------------------------------------------------------
diff --git a/be/src/kudu/util/group_varint-test.cc 
b/be/src/kudu/util/group_varint-test.cc
new file mode 100644
index 0000000..983fb0f
--- /dev/null
+++ b/be/src/kudu/util/group_varint-test.cc
@@ -0,0 +1,144 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+#include <string>
+#ifdef NDEBUG
+#include <vector>
+#endif
+
+#include <gtest/gtest.h>
+
+#include "kudu/util/faststring.h"
+#include "kudu/util/group_varint-inl.h"
+#ifdef NDEBUG
+#include "kudu/util/stopwatch.h"
+#endif
+
+namespace kudu {
+namespace coding {
+
+extern void DumpSSETable();
+
+// Encodes the given four ints as group-varint, then
+// decodes and ensures the result is the same.
+static void DoTestRoundTripGVI32(
+  uint32_t a, uint32_t b, uint32_t c, uint32_t d,
+  bool use_sse = false) {
+  faststring buf;
+  AppendGroupVarInt32(&buf, a, b, c, d);
+
+  int real_size = buf.size();
+
+  // The implementations actually read past the group varint,
+  // so append some extra padding data to ensure that it's not reading
+  // uninitialized memory. The SSE implementation uses 128-bit reads
+  // and the non-SSE one uses 32-bit reads.
+  buf.append(std::string(use_sse ? 16 : 4, 'x'));
+
+  uint32_t ret[4];
+
+  const uint8_t *end;
+
+  if (use_sse) {
+    end = DecodeGroupVarInt32_SSE(
+      buf.data(), &ret[0], &ret[1], &ret[2], &ret[3]);
+  } else {
+    end = DecodeGroupVarInt32(
+      buf.data(), &ret[0], &ret[1], &ret[2], &ret[3]);
+  }
+
+  ASSERT_EQ(a, ret[0]);
+  ASSERT_EQ(b, ret[1]);
+  ASSERT_EQ(c, ret[2]);
+  ASSERT_EQ(d, ret[3]);
+  ASSERT_EQ(end, buf.data() + real_size);
+}
+
+
+TEST(TestGroupVarInt, TestSSETable) {
+  DumpSSETable();
+  faststring buf;
+  AppendGroupVarInt32(&buf, 0, 0, 0, 0);
+  DoTestRoundTripGVI32(0, 0, 0, 0, true);
+  DoTestRoundTripGVI32(1, 2, 3, 4, true);
+  DoTestRoundTripGVI32(1, 2000, 3, 200000, true);
+}
+
+TEST(TestGroupVarInt, TestGroupVarInt) {
+  faststring buf;
+  AppendGroupVarInt32(&buf, 0, 0, 0, 0);
+  ASSERT_EQ(5UL, buf.size());
+  ASSERT_EQ(0, memcmp("\x00\x00\x00\x00\x00", buf.data(), 5));
+  buf.clear();
+
+  // All 1-byte
+  AppendGroupVarInt32(&buf, 1, 2, 3, 254);
+  ASSERT_EQ(5UL, buf.size());
+  ASSERT_EQ(0, memcmp("\x00\x01\x02\x03\xfe", buf.data(), 5));
+  buf.clear();
+
+  // Mixed 1-byte and 2-byte
+  AppendGroupVarInt32(&buf, 256, 2, 3, 65535);
+  ASSERT_EQ(7UL, buf.size());
+  ASSERT_EQ(BOOST_BINARY(01 00 00 01), buf.at(0));
+  ASSERT_EQ(256, *reinterpret_cast<const uint16_t *>(&buf[1]));
+  ASSERT_EQ(2, *reinterpret_cast<const uint8_t *>(&buf[3]));
+  ASSERT_EQ(3, *reinterpret_cast<const uint8_t *>(&buf[4]));
+  ASSERT_EQ(65535, *reinterpret_cast<const uint16_t *>(&buf[5]));
+}
+
+
+// Round-trip encode/decodes using group varint
+TEST(TestGroupVarInt, TestRoundTrip) {
+  // A few simple tests.
+  DoTestRoundTripGVI32(0, 0, 0, 0);
+  DoTestRoundTripGVI32(1, 2, 3, 4);
+  DoTestRoundTripGVI32(1, 2000, 3, 200000);
+
+  // Then a randomized test.
+  for (int i = 0; i < 10000; i++) {
+    DoTestRoundTripGVI32(random(), random(), random(), random());
+  }
+}
+
+#ifdef NDEBUG
+TEST(TestGroupVarInt, EncodingBenchmark) {
+  int n_ints = 1000000;
+
+  std::vector<uint32_t> ints;
+  ints.reserve(n_ints);
+  for (int i = 0; i < n_ints; i++) {
+    ints.push_back(i);
+  }
+
+  faststring s;
+  // conservative reservation
+  s.reserve(ints.size() * 4);
+
+  LOG_TIMING(INFO, "Benchmark") {
+    for (int i = 0; i < 100; i++) {
+      s.clear();
+      AppendGroupVarInt32Sequence(&s, 0, &ints[0], n_ints);
+    }
+  }
+}
+#endif
+} // namespace coding
+} // namespace kudu

http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/group_varint.cc
----------------------------------------------------------------------
diff --git a/be/src/kudu/util/group_varint.cc b/be/src/kudu/util/group_varint.cc
new file mode 100644
index 0000000..47fbeb4
--- /dev/null
+++ b/be/src/kudu/util/group_varint.cc
@@ -0,0 +1,81 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstdint>
+#include <cstring>
+#include <ostream>
+#include <string>
+
+#include <boost/utility/binary.hpp>
+#include <glog/logging.h>
+
+#include "kudu/util/group_varint-inl.h"
+#include "kudu/util/hexdump.h"
+#include "kudu/util/slice.h"
+
+namespace kudu {
+namespace coding {
+
+bool SSE_TABLE_INITTED = false;
+uint8_t SSE_TABLE[256 * 16] __attribute__((aligned(16)));
+uint8_t VARINT_SELECTOR_LENGTHS[256];
+
+__attribute__((constructor))
+static void InitializeSSETables() {
+  memset(SSE_TABLE, 0xff, sizeof(SSE_TABLE));
+
+  for (int i = 0; i < 256; i++) {
+    uint32_t *entry = reinterpret_cast<uint32_t *>(&SSE_TABLE[i * 16]);
+
+    uint8_t selectors[] = {
+      static_cast<uint8_t>((i & BOOST_BINARY(11 00 00 00)) >> 6),
+      static_cast<uint8_t>((i & BOOST_BINARY(00 11 00 00)) >> 4),
+      static_cast<uint8_t>((i & BOOST_BINARY(00 00 11 00)) >> 2),
+      static_cast<uint8_t>((i & BOOST_BINARY(00 00 00 11))) };
+
+    // 00000000 ->
+    // 00 ff ff ff  01 ff ff ff  02 ff ff ff  03 ff ff ff
+
+    // 01000100 ->
+    // 00 01 ff ff  02 ff ff ff  03 04 ff ff  05 ff ff ff
+
+    uint8_t offset = 0;
+
+    for (int j = 0; j < 4; j++) {
+      uint8_t num_bytes = selectors[j] + 1;
+      uint8_t *entry_bytes = reinterpret_cast<uint8_t *>(&entry[j]);
+
+      for (int k = 0; k < num_bytes; k++) {
+        *entry_bytes++ = offset++;
+      }
+    }
+
+    VARINT_SELECTOR_LENGTHS[i] = offset;
+  }
+
+  SSE_TABLE_INITTED = true;
+}
+
+void DumpSSETable() {
+  LOG(INFO) << "SSE table:\n"
+            << kudu::HexDump(Slice(SSE_TABLE, sizeof(SSE_TABLE)));
+}
+
+
+
+} // namespace coding
+} // namespace kudu

http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/hash_util-test.cc
----------------------------------------------------------------------
diff --git a/be/src/kudu/util/hash_util-test.cc 
b/be/src/kudu/util/hash_util-test.cc
new file mode 100644
index 0000000..4e40dd5
--- /dev/null
+++ b/be/src/kudu/util/hash_util-test.cc
@@ -0,0 +1,42 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstdint>
+
+#include <gtest/gtest.h>
+
+#include "kudu/util/hash_util.h"
+
+namespace kudu {
+
+// Test Murmur2 Hash64 returns the expected values for inputs. These tests are
+// duplicated on the Java side to ensure that hash computations are stable
+// across both platforms.
+TEST(HashUtilTest, TestMurmur2Hash64) {
+  uint64_t hash;
+
+  hash = HashUtil::MurmurHash2_64("ab", 2, 0);
+  ASSERT_EQ(7115271465109541368, hash);
+
+  hash = HashUtil::MurmurHash2_64("abcdefg", 7, 0);
+  ASSERT_EQ(2601573339036254301, hash);
+
+  hash = HashUtil::MurmurHash2_64("quick brown fox", 15, 42);
+  ASSERT_EQ(3575930248840144026, hash);
+}
+
+} // namespace kudu

http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/hash_util.h
----------------------------------------------------------------------
diff --git a/be/src/kudu/util/hash_util.h b/be/src/kudu/util/hash_util.h
new file mode 100644
index 0000000..d3a513b
--- /dev/null
+++ b/be/src/kudu/util/hash_util.h
@@ -0,0 +1,71 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef KUDU_UTIL_HASH_UTIL_H
+#define KUDU_UTIL_HASH_UTIL_H
+
+#include <stdint.h>
+
+#include "kudu/gutil/port.h"
+
+namespace kudu {
+
+/// Utility class to compute hash values.
+class HashUtil {
+ public:
+
+  static const uint64_t MURMUR_PRIME = 0xc6a4a7935bd1e995;
+  static const int MURMUR_R = 47;
+
+  /// Murmur2 hash implementation returning 64-bit hashes.
+  ATTRIBUTE_NO_SANITIZE_INTEGER
+  static uint64_t MurmurHash2_64(const void* input, int len, uint64_t seed) {
+    uint64_t h = seed ^ (len * MURMUR_PRIME);
+
+    const uint64_t* data = reinterpret_cast<const uint64_t*>(input);
+    const uint64_t* end = data + (len / sizeof(uint64_t));
+
+    while (data != end) {
+      uint64_t k = *data++;
+      k *= MURMUR_PRIME;
+      k ^= k >> MURMUR_R;
+      k *= MURMUR_PRIME;
+      h ^= k;
+      h *= MURMUR_PRIME;
+    }
+
+    const uint8_t* data2 = reinterpret_cast<const uint8_t*>(data);
+    switch (len & 7) {
+      case 7: h ^= static_cast<uint64_t>(data2[6]) << 48;
+      case 6: h ^= static_cast<uint64_t>(data2[5]) << 40;
+      case 5: h ^= static_cast<uint64_t>(data2[4]) << 32;
+      case 4: h ^= static_cast<uint64_t>(data2[3]) << 24;
+      case 3: h ^= static_cast<uint64_t>(data2[2]) << 16;
+      case 2: h ^= static_cast<uint64_t>(data2[1]) << 8;
+      case 1: h ^= static_cast<uint64_t>(data2[0]);
+              h *= MURMUR_PRIME;
+    }
+
+    h ^= h >> MURMUR_R;
+    h *= MURMUR_PRIME;
+    h ^= h >> MURMUR_R;
+    return h;
+  }
+};
+
+} // namespace kudu
+#endif

http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/hdr_histogram-test.cc
----------------------------------------------------------------------
diff --git a/be/src/kudu/util/hdr_histogram-test.cc 
b/be/src/kudu/util/hdr_histogram-test.cc
new file mode 100644
index 0000000..5d51e98
--- /dev/null
+++ b/be/src/kudu/util/hdr_histogram-test.cc
@@ -0,0 +1,116 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstdint>
+
+#include <gtest/gtest.h>
+
+#include "kudu/util/hdr_histogram.h"
+#include "kudu/util/test_util.h"
+#include "kudu/util/test_macros.h"
+
+namespace kudu {
+
+static const int kSigDigits = 2;
+
+class HdrHistogramTest : public KuduTest {
+};
+
+TEST_F(HdrHistogramTest, SimpleTest) {
+  uint64_t highest_val = 10000LU;
+
+  HdrHistogram hist(highest_val, kSigDigits);
+  ASSERT_EQ(0, hist.CountInBucketForValue(1));
+  hist.Increment(1);
+  ASSERT_EQ(1, hist.CountInBucketForValue(1));
+  hist.IncrementBy(1, 3);
+  ASSERT_EQ(4, hist.CountInBucketForValue(1));
+  hist.Increment(10);
+  ASSERT_EQ(1, hist.CountInBucketForValue(10));
+  hist.Increment(20);
+  ASSERT_EQ(1, hist.CountInBucketForValue(20));
+  ASSERT_EQ(0, hist.CountInBucketForValue(1000));
+  hist.Increment(1000);
+  hist.Increment(1001);
+  ASSERT_EQ(2, hist.CountInBucketForValue(1000));
+
+  ASSERT_EQ(1 + 1 * 3 + 10 + 20 + 1000 + 1001,
+            hist.TotalSum());
+}
+
+TEST_F(HdrHistogramTest, TestCoordinatedOmission) {
+  uint64_t interval = 1000;
+  int loop_iters = 100;
+  int64_t normal_value = 10;
+  HdrHistogram hist(1000000LU, kSigDigits);
+  for (int i = 1; i <= loop_iters; i++) {
+    // Simulate a periodic "large value" that would exhibit coordinated
+    // omission were this loop to sleep on 'interval'.
+    int64_t value = (i % normal_value == 0) ? interval * 10 : normal_value;
+
+    hist.IncrementWithExpectedInterval(value, interval);
+  }
+  ASSERT_EQ(loop_iters - (loop_iters / normal_value),
+            hist.CountInBucketForValue(normal_value));
+  for (int i = interval; i <= interval * 10; i += interval) {
+    ASSERT_EQ(loop_iters / normal_value, hist.CountInBucketForValue(i));
+  }
+}
+
+static const int kExpectedSum =
+  10 * 80 + 100 * 10 + 1000 * 5 + 10000 * 3 + 100000 * 1 + 1000000 * 1;
+static const int kExpectedMax = 1000000;
+static const int kExpectedCount = 100;
+static const int kExpectedMin = 10;
+static void load_percentiles(HdrHistogram* hist) {
+  hist->IncrementBy(10, 80);
+  hist->IncrementBy(100, 10);
+  hist->IncrementBy(1000, 5);
+  hist->IncrementBy(10000, 3);
+  hist->IncrementBy(100000, 1);
+  hist->IncrementBy(1000000, 1);
+}
+
+static void validate_percentiles(HdrHistogram* hist, uint64_t specified_max) {
+  double expected_mean =
+    static_cast<double>(kExpectedSum) / (80 + 10 + 5 + 3 + 1 + 1);
+
+  ASSERT_EQ(kExpectedMin, hist->MinValue());
+  ASSERT_EQ(kExpectedMax, hist->MaxValue());
+  ASSERT_EQ(kExpectedSum, hist->TotalSum());
+  ASSERT_NEAR(expected_mean, hist->MeanValue(), 0.001);
+  ASSERT_EQ(kExpectedCount, hist->TotalCount());
+  ASSERT_EQ(10, hist->ValueAtPercentile(80));
+  ASSERT_EQ(kExpectedCount, hist->ValueAtPercentile(90));
+  ASSERT_EQ(hist->LowestEquivalentValue(specified_max), 
hist->ValueAtPercentile(99));
+  ASSERT_EQ(hist->LowestEquivalentValue(specified_max), 
hist->ValueAtPercentile(99.99));
+  ASSERT_EQ(hist->LowestEquivalentValue(specified_max), 
hist->ValueAtPercentile(100));
+}
+
+TEST_F(HdrHistogramTest, PercentileAndCopyTest) {
+  uint64_t specified_max = 10000;
+  HdrHistogram hist(specified_max, kSigDigits);
+  load_percentiles(&hist);
+  NO_FATALS(validate_percentiles(&hist, specified_max));
+
+  HdrHistogram copy(hist);
+  NO_FATALS(validate_percentiles(&copy, specified_max));
+
+  ASSERT_EQ(hist.TotalSum(), copy.TotalSum());
+}
+
+} // namespace kudu

Reply via email to