http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/02f3e3fc/be/src/gutil/strings/util.cc ---------------------------------------------------------------------- diff --git a/be/src/gutil/strings/util.cc b/be/src/gutil/strings/util.cc index 8447662..c16d9b2 100644 --- a/be/src/gutil/strings/util.cc +++ b/be/src/gutil/strings/util.cc @@ -5,7 +5,7 @@ // TODO(user): visit each const_cast. Some of them are no longer necessary // because last Single Unix Spec and grte v2 are more const-y. -#include "gutil/strings/util.h" +#include "kudu/gutil/strings/util.h" #include <assert.h> #include <stdarg.h> @@ -25,11 +25,12 @@ using std::string; using std::vector; #include <glog/logging.h> -#include "gutil/logging-inl.h" -#include "gutil/strings/ascii_ctype.h" -#include "gutil/strings/numbers.h" -#include "gutil/strings/stringpiece.h" -#include "gutil/stl_util.h" // for string_as_array, STLAppendToString +#include "kudu/gutil/logging-inl.h" +#include "kudu/gutil/strings/ascii_ctype.h" +#include "kudu/gutil/strings/numbers.h" +#include "kudu/gutil/strings/stringpiece.h" +#include "kudu/gutil/stl_util.h" // for string_as_array, STLAppendToString +#include "kudu/gutil/utf/utf.h" #ifdef OS_WINDOWS #ifdef min // windows.h defines this to something silly @@ -55,9 +56,9 @@ char* strnstr(const char* haystack, const char* needle, } size_t needle_len = strlen(needle); char* where; - while ((where = strnchr(haystack, *needle, haystack_len)) != NULL) { + while ((where = strnchr(haystack, *needle, haystack_len)) != nullptr) { if (where - haystack + needle_len > haystack_len) { - return NULL; + return nullptr; } if (strncmp(where, needle, needle_len) == 0) { return where; @@ -65,18 +66,18 @@ char* strnstr(const char* haystack, const char* needle, haystack_len -= where + 1 - haystack; haystack = where + 1; } - return NULL; + return nullptr; } const char* strnprefix(const char* haystack, int haystack_size, const char* needle, int needle_size) { if (needle_size > haystack_size) { - return NULL; + return nullptr; } else { if (strncmp(haystack, needle, needle_size) == 0) { return haystack + needle_size; } else { - return NULL; + return nullptr; } } } @@ -84,12 +85,12 @@ const char* strnprefix(const char* haystack, int haystack_size, const char* strncaseprefix(const char* haystack, int haystack_size, const char* needle, int needle_size) { if (needle_size > haystack_size) { - return NULL; + return nullptr; } else { if (strncasecmp(haystack, needle, needle_size) == 0) { return haystack + needle_size; } else { - return NULL; + return nullptr; } } } @@ -102,20 +103,20 @@ char* strcasesuffix(char* str, const char* suffix) { if (lenstr >= lensuffix && 0 == strcasecmp(strbeginningoftheend, suffix)) { return (strbeginningoftheend); } else { - return (NULL); + return (nullptr); } } const char* strnsuffix(const char* haystack, int haystack_size, const char* needle, int needle_size) { if (needle_size > haystack_size) { - return NULL; + return nullptr; } else { const char* start = haystack + haystack_size - needle_size; if (strncmp(start, needle, needle_size) == 0) { return start; } else { - return NULL; + return nullptr; } } } @@ -123,20 +124,20 @@ const char* strnsuffix(const char* haystack, int haystack_size, const char* strncasesuffix(const char* haystack, int haystack_size, const char* needle, int needle_size) { if (needle_size > haystack_size) { - return NULL; + return nullptr; } else { const char* start = haystack + haystack_size - needle_size; if (strncasecmp(start, needle, needle_size) == 0) { return start; } else { - return NULL; + return nullptr; } } } char* strchrnth(const char* str, const char& c, int n) { - if (str == NULL) - return NULL; + if (str == nullptr) + return nullptr; if (n <= 0) return const_cast<char*>(str); const char* sp; @@ -148,18 +149,18 @@ char* strchrnth(const char* str, const char& c, int n) { break; } } - return (k < n) ? NULL : const_cast<char*>(sp); + return (k < n) ? nullptr : const_cast<char*>(sp); } char* AdjustedLastPos(const char* str, char separator, int n) { - if ( str == NULL ) - return NULL; - const char* pos = NULL; + if ( str == nullptr ) + return nullptr; + const char* pos = nullptr; if ( n > 0 ) pos = strchrnth(str, separator, n); // if n <= 0 or separator appears fewer than n times, get the last occurrence - if ( pos == NULL) + if ( pos == nullptr) pos = strrchr(str, separator); return const_cast<char*>(pos); } @@ -237,7 +238,7 @@ void StringReplace(const StringPiece& s, const StringPiece& oldsub, int GlobalReplaceSubstring(const StringPiece& substring, const StringPiece& replacement, string* s) { - CHECK(s != NULL); + CHECK(s != nullptr); if (s->empty() || substring.empty()) return 0; string tmp; @@ -305,7 +306,7 @@ char *gstrcasestr(const char* haystack, const char* needle) { do { do { if ((sc = *haystack++) == 0) - return NULL; + return nullptr; } while (ascii_tolower(sc) != c); } while (strncasecmp(haystack, needle, len) != 0); haystack--; @@ -332,7 +333,7 @@ const char *gstrncasestr(const char* haystack, const char* needle, size_t len) { do { if (len-- <= needle_len || 0 == (sc = *haystack++)) - return NULL; + return nullptr; } while (ascii_tolower(sc) != c); } while (strncasecmp(haystack, needle, needle_len) != 0); haystack--; @@ -360,22 +361,22 @@ char *gstrncasestr_split(const char* str, const char* prefix, char non_alpha, const char* suffix, size_t n) { - int prelen = prefix == NULL ? 0 : strlen(prefix); - int suflen = suffix == NULL ? 0 : strlen(suffix); + int prelen = prefix == nullptr ? 0 : strlen(prefix); + int suflen = suffix == nullptr ? 0 : strlen(suffix); // adjust the string and its length to avoid unnessary searching. // an added benefit is to avoid unnecessary range checks in the if // statement in the inner loop. - if (suflen + prelen >= n) return NULL; + if (suflen + prelen >= n) return nullptr; str += prelen; n -= prelen; n -= suflen; - const char* where = NULL; + const char* where = nullptr; // for every occurance of non_alpha in the string ... while ((where = static_cast<const char*>( - memchr(str, non_alpha, n))) != NULL) { + memchr(str, non_alpha, n))) != nullptr) { // ... test whether it is followed by suffix and preceded by prefix if ((!suflen || strncasecmp(where + 1, suffix, suflen) == 0) && (!prelen || strncasecmp(where - prelen, prefix, prelen) == 0)) { @@ -386,7 +387,7 @@ char *gstrncasestr_split(const char* str, str = where + 1; } - return NULL; + return nullptr; } // ---------------------------------------------------------------------- @@ -413,7 +414,7 @@ char *strcasestr_alnum(const char *haystack, const char *needle) { // Skip non-alnums at beginning while ( !ascii_isalnum(*haystack) ) if ( *haystack++ == '\0' ) - return NULL; + return nullptr; haystack_ptr = haystack; while ( *needle_ptr != '\0' ) { @@ -424,7 +425,7 @@ char *strcasestr_alnum(const char *haystack, const char *needle) { while ( !ascii_isalnum(*haystack_ptr) ) if ( *haystack_ptr++ == '\0' ) - return NULL; + return nullptr; if ( ascii_tolower(*needle_ptr) == ascii_tolower(*haystack_ptr) ) { // Case-insensitive match - advance @@ -435,7 +436,7 @@ char *strcasestr_alnum(const char *haystack, const char *needle) { haystack++; while ( !ascii_isalnum(*haystack) ) if ( *haystack++ == '\0' ) - return NULL; + return nullptr; haystack_ptr = haystack; needle_ptr = needle; } @@ -476,7 +477,7 @@ int CountSubstring(StringPiece text, StringPiece substring) { const char* strstr_delimited(const char* haystack, const char* needle, char delim) { - if (!needle || !haystack) return NULL; + if (!needle || !haystack) return nullptr; if (*needle == '\0') return haystack; int needle_len = strlen(needle); @@ -503,12 +504,12 @@ const char* strstr_delimited(const char* haystack, // No match. Consume non-delimiter characters until we run out of them. while (*haystack != delim) { - if (*haystack == '\0') return NULL; + if (*haystack == '\0') return nullptr; ++haystack; } } LOG(FATAL) << "Unreachable statement"; - return NULL; + return nullptr; } @@ -522,8 +523,8 @@ char* gstrsep(char** stringp, const char* delim) { int c, sc; char *tok; - if ((s = *stringp) == NULL) - return NULL; + if ((s = *stringp) == nullptr) + return nullptr; tok = s; while (true) { @@ -532,7 +533,7 @@ char* gstrsep(char** stringp, const char* delim) { do { if ((sc = *spanp++) == c) { if (c == 0) - s = NULL; + s = nullptr; else s[-1] = 0; *stringp = s; @@ -541,7 +542,7 @@ char* gstrsep(char** stringp, const char* delim) { } while (sc != 0); } - return NULL; /* should not happen */ + return nullptr; /* should not happen */ } void FastStringAppend(string* s, const char* data, int len) { @@ -592,7 +593,7 @@ char* FastTimeToBuffer(time_t s, char* buffer) { } struct tm tm; - if (PortableSafeGmtime(&s, &tm) == NULL) { + if (PortableSafeGmtime(&s, &tm) == nullptr) { // Error message must fit in 30-char buffer. memcpy(buffer, "Invalid:", sizeof("Invalid:")); FastInt64ToBufferLeft(s, buffer+strlen(buffer)); @@ -677,17 +678,17 @@ char* FastTimeToBuffer(time_t s, char* buffer) { // and didn't want to (or cannot) modify the string // ---------------------------------------------------------------------- char* strdup_with_new(const char* the_string) { - if (the_string == NULL) - return NULL; + if (the_string == nullptr) + return nullptr; else return strndup_with_new(the_string, strlen(the_string)); } char* strndup_with_new(const char* the_string, int max_length) { - if (the_string == NULL) - return NULL; + if (the_string == nullptr) + return nullptr; - char* result = new char[max_length + 1]; + auto result = new char[max_length + 1]; result[max_length] = '\0'; // terminate the string because strncpy might not return strncpy(result, the_string, max_length); } @@ -709,17 +710,17 @@ char* strndup_with_new(const char* the_string, int max_length) { // Precondition: (end_ptr != NULL) // ---------------------------------------------------------------------- const char* ScanForFirstWord(const char* the_string, const char** end_ptr) { - CHECK(end_ptr != NULL) << ": precondition violated"; + CHECK(end_ptr != nullptr) << ": precondition violated"; - if (the_string == NULL) // empty string - return NULL; + if (the_string == nullptr) // empty string + return nullptr; const char* curr = the_string; while ((*curr != '\0') && ascii_isspace(*curr)) // skip initial spaces ++curr; if (*curr == '\0') // no valid word found - return NULL; + return nullptr; // else has a valid word const char* first_word = curr; @@ -744,7 +745,7 @@ const char *AdvanceIdentifier(const char *str) { // We could have used ascii_isalpha and ascii_isalnum. char ch = *str++; if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_')) - return NULL; + return nullptr; while (true) { ch = *str; if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || @@ -766,6 +767,146 @@ bool IsIdentifier(const char *str) { return end && *end == '\0'; } +static bool IsWildcard(Rune character) { + return character == '*' || character == '?'; +} + +// Move the strings pointers to the point where they start to differ. +template <typename CHAR, typename NEXT> +static void EatSameChars(const CHAR** pattern, const CHAR* pattern_end, + const CHAR** string, const CHAR* string_end, + NEXT next) { + const CHAR* escape = nullptr; + while (*pattern != pattern_end && *string != string_end) { + if (!escape && IsWildcard(**pattern)) { + // We don't want to match wildcard here, except if it's escaped. + return; + } + + // Check if the escapement char is found. If so, skip it and move to the + // next character. + if (!escape && **pattern == '\\') { + escape = *pattern; + next(pattern, pattern_end); + continue; + } + + // Check if the chars match, if so, increment the ptrs. + const CHAR* pattern_next = *pattern; + const CHAR* string_next = *string; + Rune pattern_char = next(&pattern_next, pattern_end); + if (pattern_char == next(&string_next, string_end) && + pattern_char != Runeerror && + pattern_char <= Runemax) { + *pattern = pattern_next; + *string = string_next; + } else { + // Uh ho, it did not match, we are done. If the last char was an + // escapement, that means that it was an error to advance the ptr here, + // let's put it back where it was. This also mean that the MatchPattern + // function will return false because if we can't match an escape char + // here, then no one will. + if (escape) { + *pattern = escape; + } + return; + } + + escape = nullptr; + } +} + +template <typename CHAR, typename NEXT> +static void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) { + while (*pattern != end) { + if (!IsWildcard(**pattern)) + return; + next(pattern, end); + } +} + +template <typename CHAR, typename NEXT> +static bool MatchPatternT(const CHAR* eval, const CHAR* eval_end, + const CHAR* pattern, const CHAR* pattern_end, + int depth, + NEXT next) { + const int kMaxDepth = 16; + if (depth > kMaxDepth) + return false; + + // Eat all the matching chars. + EatSameChars(&pattern, pattern_end, &eval, eval_end, next); + + // If the string is empty, then the pattern must be empty too, or contains + // only wildcards. + if (eval == eval_end) { + EatWildcard(&pattern, pattern_end, next); + return pattern == pattern_end; + } + + // Pattern is empty but not string, this is not a match. + if (pattern == pattern_end) + return false; + + // If this is a question mark, then we need to compare the rest with + // the current string or the string with one character eaten. + const CHAR* next_pattern = pattern; + next(&next_pattern, pattern_end); + if (pattern[0] == '?') { + if (MatchPatternT(eval, eval_end, next_pattern, pattern_end, + depth + 1, next)) + return true; + const CHAR* next_eval = eval; + next(&next_eval, eval_end); + if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end, + depth + 1, next)) + return true; + } + + // This is a *, try to match all the possible substrings with the remainder + // of the pattern. + if (pattern[0] == '*') { + // Collapse duplicate wild cards (********** into *) so that the + // method does not recurse unnecessarily. http://crbug.com/52839 + EatWildcard(&next_pattern, pattern_end, next); + + while (eval != eval_end) { + if (MatchPatternT(eval, eval_end, next_pattern, pattern_end, + depth + 1, next)) + return true; + eval++; + } + + // We reached the end of the string, let see if the pattern contains only + // wildcards. + if (eval == eval_end) { + EatWildcard(&pattern, pattern_end, next); + if (pattern != pattern_end) + return false; + return true; + } + } + + return false; +} + +struct NextCharUTF8 { + Rune operator()(const char** p, const char* end) { + Rune c; + int offset = charntorune(&c, *p, static_cast<int>(end - *p)); + *p += offset; + return c; + } +}; + +bool MatchPattern(const StringPiece& eval, + const StringPiece& pattern) { + return MatchPatternT(eval.data(), eval.data() + eval.size(), + pattern.data(), pattern.data() + pattern.size(), + 0, NextCharUTF8()); +} + + // ---------------------------------------------------------------------- // FindTagValuePair @@ -783,7 +924,7 @@ bool FindTagValuePair(const char* arg_str, char tag_value_separator, char **tag, int *tag_len, char **value, int *value_len) { char* in_str = const_cast<char*>(arg_str); // For msvc8. - if (in_str == NULL) + if (in_str == nullptr) return false; char tv_sep_or_term[3] = {tag_value_separator, string_terminal, '\0'}; char attr_sep_or_term[3] = {attribute_separator, string_terminal, '\0'}; @@ -791,20 +932,20 @@ bool FindTagValuePair(const char* arg_str, char tag_value_separator, // Look for beginning of tag *tag = strpbrk(in_str, attr_sep_or_term); // If string_terminal is '\0', strpbrk won't find it but return null. - if (*tag == NULL || **tag == string_terminal) + if (*tag == nullptr || **tag == string_terminal) *tag = in_str; else (*tag)++; // Move past separator // Now look for value... char *tv_sep_pos = strpbrk(*tag, tv_sep_or_term); - if (tv_sep_pos == NULL || *tv_sep_pos == string_terminal) + if (tv_sep_pos == nullptr || *tv_sep_pos == string_terminal) return false; // ...and end of value char *attr_sep_pos = strpbrk(tv_sep_pos, attr_sep_or_term); *tag_len = tv_sep_pos - *tag; *value = tv_sep_pos + 1; - if (attr_sep_pos != NULL) + if (attr_sep_pos != nullptr) *value_len = attr_sep_pos - *value; else *value_len = strlen(*value); @@ -858,7 +999,7 @@ void InsertString(string *const s, tmp.reserve(s_len + separator_len * num_indices); vector<uint32>::const_iterator const ind_end(indices.end()); - vector<uint32>::const_iterator ind_pos(indices.begin()); + auto ind_pos(indices.begin()); uint32 last_pos(0); while (ind_pos != ind_end) { @@ -951,8 +1092,8 @@ StringPiece FindEol(StringPiece s) { // return true if string s contains only whitespace characters //------------------------------------------------------------------------ bool OnlyWhitespace(const StringPiece& s) { - for ( int i = 0; i < s.size(); ++i ) { - if ( !ascii_isspace(s[i]) ) return false; + for (const auto& c : s) { + if ( !ascii_isspace(c) ) return false; } return true; } @@ -1051,3 +1192,27 @@ bool GetlineFromStdioFile(FILE* file, string* str, char delim) { str->push_back(c); } } + +namespace { + +template <typename CHAR> +size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) { + for (size_t i = 0; i < dst_size; ++i) { + if ((dst[i] = src[i]) == 0) // We hit and copied the terminating NULL. + return i; + } + + // We were left off at dst_size. We over copied 1 byte. Null terminate. + if (dst_size != 0) + dst[dst_size - 1] = 0; + + // Count the rest of the |src|, and return it's length in characters. + while (src[dst_size]) ++dst_size; + return dst_size; +} + +} // namespace + +size_t strings::strlcpy(char* dst, const char* src, size_t dst_size) { + return lcpyT<char>(dst, src, dst_size); +}
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/02f3e3fc/be/src/gutil/strings/util.h ---------------------------------------------------------------------- diff --git a/be/src/gutil/strings/util.h b/be/src/gutil/strings/util.h index 562e497..59db97d 100644 --- a/be/src/gutil/strings/util.h +++ b/be/src/gutil/strings/util.h @@ -42,9 +42,9 @@ using std::string; #include <vector> using std::vector; -#include "gutil/integral_types.h" -#include "gutil/port.h" -#include "gutil/strings/stringpiece.h" +#include "kudu/gutil/integral_types.h" +#include "kudu/gutil/port.h" +#include "kudu/gutil/strings/stringpiece.h" // Newer functions. @@ -179,6 +179,14 @@ inline bool HasSuffixString(const StringPiece& str, return str.ends_with(suffix); } +// Returns true if the string passed in matches the pattern. The pattern +// string can contain wildcards like * and ? +// The backslash character (\) is an escape character for * and ? +// We limit the patterns to having a max of 16 * or ? characters. +// ? matches 0 or 1 character, while * matches 0 or more characters. +bool MatchPattern(const StringPiece& string, + const StringPiece& pattern); + // Returns where suffix begins in str, or NULL if str doesn't end with suffix. inline char* strsuffix(char* str, const char* suffix) { const int lenstr = strlen(str); @@ -337,6 +345,18 @@ inline char* safestrncpy(char* dest, const char* src, size_t n) { return dest; } +namespace strings { + +// BSD-style safe and consistent string copy functions. +// Copies |src| to |dst|, where |dst_size| is the total allocated size of |dst|. +// Copies at most |dst_size|-1 characters, and always NULL terminates |dst|, as +// long as |dst_size| is not 0. Returns the length of |src| in characters. +// If the return value is >= dst_size, then the output was truncated. +// NOTE: All sizes are in number of characters, NOT in bytes. +size_t strlcpy(char* dst, const char* src, size_t dst_size); + +} // namespace strings + // Replaces the first occurrence (if replace_all is false) or all occurrences // (if replace_all is true) of oldsub in s with newsub. In the second version, // *res must be distinct from all the other arguments. http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/02f3e3fc/be/src/gutil/strtoint.cc ---------------------------------------------------------------------- diff --git a/be/src/gutil/strtoint.cc b/be/src/gutil/strtoint.cc index 9df29a3..bb96a57 100644 --- a/be/src/gutil/strtoint.cc +++ b/be/src/gutil/strtoint.cc @@ -5,8 +5,8 @@ // #include <errno.h> -#include "gutil/port.h" -#include "gutil/strtoint.h" +#include "kudu/gutil/port.h" +#include "kudu/gutil/strtoint.h" // Replacement strto[u]l functions that have identical overflow and underflow // characteristics for both ILP-32 and LP-64 platforms, including errno http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/02f3e3fc/be/src/gutil/strtoint.h ---------------------------------------------------------------------- diff --git a/be/src/gutil/strtoint.h b/be/src/gutil/strtoint.h index 581ebf9..b581385 100644 --- a/be/src/gutil/strtoint.h +++ b/be/src/gutil/strtoint.h @@ -33,9 +33,9 @@ #include <stdlib.h> // For strtol* functions. #include <string> using std::string; -#include "gutil/integral_types.h" -#include "gutil/macros.h" -#include "gutil/port.h" +#include "kudu/gutil/integral_types.h" +#include "kudu/gutil/macros.h" +#include "kudu/gutil/port.h" // Adapter functions for handling overflow and errno. int32 strto32_adapter(const char *nptr, char **endptr, int base); http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/02f3e3fc/be/src/gutil/synchronization_profiling.h ---------------------------------------------------------------------- diff --git a/be/src/gutil/synchronization_profiling.h b/be/src/gutil/synchronization_profiling.h index 3292787..f008871 100644 --- a/be/src/gutil/synchronization_profiling.h +++ b/be/src/gutil/synchronization_profiling.h @@ -35,7 +35,7 @@ #ifndef BASE_AUXILIARY_SYNCHRONIZATION_PROFILING_H_ #define BASE_AUXILIARY_SYNCHRONIZATION_PROFILING_H_ -#include "gutil/basictypes.h" +#include "kudu/gutil/basictypes.h" namespace gutil { http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/02f3e3fc/be/src/gutil/sysinfo.cc ---------------------------------------------------------------------- diff --git a/be/src/gutil/sysinfo.cc b/be/src/gutil/sysinfo.cc index f9232d1..6006b6d 100644 --- a/be/src/gutil/sysinfo.cc +++ b/be/src/gutil/sysinfo.cc @@ -53,10 +53,10 @@ #include <shlwapi.h> // for SHGetValueA() #include <tlhelp32.h> // for Module32First() #endif -#include "gutil/dynamic_annotations.h" // for RunningOnValgrind -#include "gutil/macros.h" -#include "gutil/sysinfo.h" -#include "gutil/walltime.h" +#include "kudu/gutil/dynamic_annotations.h" // for RunningOnValgrind +#include "kudu/gutil/macros.h" +#include "kudu/gutil/sysinfo.h" +#include "kudu/gutil/walltime.h" #include <glog/logging.h> // This isn't in the 'base' namespace in tcmallc. But, tcmalloc @@ -74,6 +74,7 @@ namespace base { static double cpuinfo_cycles_per_second = 1.0; // 0.0 might be dangerous static int cpuinfo_num_cpus = 1; // Conservative guess +static int cpuinfo_max_cpu_index = -1; void SleepForNanoseconds(int64_t nanoseconds) { // Sleep for nanosecond duration @@ -144,6 +145,25 @@ static bool ReadIntFromFile(const char *file, int *value) { return false; } +static int ReadMaxCPUIndex() { + char buf[1024]; + CHECK(SlurpSmallTextFile("/sys/devices/system/cpu/present", buf, arraysize(buf))); + + // On a single-core machine, 'buf' will contain the string '0' with a newline. + if (strcmp(buf, "0\n") == 0) { + return 0; + } + + // On multi-core, it will have a CPU range like '0-7'. + CHECK_EQ(0, memcmp(buf, "0-", 2)) << "bad list of possible CPUs: " << buf; + + char* max_cpu_str = &buf[2]; + char* err; + int val = strtol(max_cpu_str, &err, 10); + CHECK(*err == '\n' || *err == '\0') << "unable to parse max CPU index from: " << buf; + return val; +} + #endif // WARNING: logging calls back to InitializeSystemInfo() so it must @@ -286,6 +306,7 @@ static void InitializeSystemInfo() { if (num_cpus > 0) { cpuinfo_num_cpus = num_cpus; } + cpuinfo_max_cpu_index = ReadMaxCPUIndex(); #elif defined __FreeBSD__ // For this sysctl to work, the machine must be configured without @@ -364,6 +385,13 @@ static void InitializeSystemInfo() { // Generic cycles per second counter cpuinfo_cycles_per_second = EstimateCyclesPerSecond(1000); #endif + + // On platforms where we can't determine the max CPU index, just use the + // number of CPUs. This might break if CPUs are taken offline, but + // better than a wild guess. + if (cpuinfo_max_cpu_index < 0) { + cpuinfo_max_cpu_index = cpuinfo_num_cpus - 1; + } } double CyclesPerSecond(void) { @@ -376,4 +404,9 @@ int NumCPUs(void) { return cpuinfo_num_cpus; } +int MaxCPUIndex(void) { + InitializeSystemInfo(); + return cpuinfo_max_cpu_index; +} + } // namespace base http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/02f3e3fc/be/src/gutil/sysinfo.h ---------------------------------------------------------------------- diff --git a/be/src/gutil/sysinfo.h b/be/src/gutil/sysinfo.h index 1ce95d0..ec3abe7 100644 --- a/be/src/gutil/sysinfo.h +++ b/be/src/gutil/sysinfo.h @@ -41,6 +41,10 @@ namespace base { // value of sched_getcpu(). extern int NumCPUs(); +// Return the maximum CPU index that may be returned by sched_getcpu(). For example, on +// an 8-core machine, this will return '7' even if some of the CPUs have been disabled. +extern int MaxCPUIndex(); + void SleepForNanoseconds(int64_t nanoseconds); void SleepForMilliseconds(int64_t milliseconds); http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/02f3e3fc/be/src/gutil/template_util.h ---------------------------------------------------------------------- diff --git a/be/src/gutil/template_util.h b/be/src/gutil/template_util.h index 35b1fa0..aebfa16 100644 --- a/be/src/gutil/template_util.h +++ b/be/src/gutil/template_util.h @@ -59,6 +59,11 @@ struct big_ { char dummy[2]; }; +// Types YesType and NoType are guaranteed such that sizeof(YesType) < +// sizeof(NoType). +typedef small_ YesType; +typedef big_ NoType; + // Identity metafunction. template <class T> struct identity_ { @@ -88,6 +93,16 @@ typedef integral_constant<bool, false> false_type; typedef true_type true_; typedef false_type false_; +template <class T> struct is_non_const_reference : false_type {}; +template <class T> struct is_non_const_reference<T&> : true_type {}; +template <class T> struct is_non_const_reference<const T&> : false_type {}; + +template <class T> struct is_const : false_type {}; +template <class T> struct is_const<const T> : true_type {}; + +template <class T> struct is_void : false_type {}; +template <> struct is_void<void> : true_type {}; + // if_ is a templatized conditional statement. // if_<cond, A, B> is a compile time evaluation of cond. // if_<>::type contains A if cond is true, B otherwise. @@ -127,6 +142,22 @@ template<typename A, typename B> struct or_ : public integral_constant<bool, (A::value || B::value)> { }; +// Used to determine if a type is a struct/union/class. Inspired by Boost's +// is_class type_trait implementation. +struct IsClassHelper { + template <typename C> + static YesType Test(void(C::*)(void)); + + template <typename C> + static NoType Test(...); +}; + +template <typename T> +struct is_class + : integral_constant<bool, + sizeof(IsClassHelper::Test<T>(0)) == + sizeof(YesType)> { +}; } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/02f3e3fc/be/src/gutil/threading/thread_collision_warner.cc ---------------------------------------------------------------------- diff --git a/be/src/gutil/threading/thread_collision_warner.cc b/be/src/gutil/threading/thread_collision_warner.cc new file mode 100644 index 0000000..89270d8 --- /dev/null +++ b/be/src/gutil/threading/thread_collision_warner.cc @@ -0,0 +1,82 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "kudu/gutil/threading/thread_collision_warner.h" + +#include <glog/logging.h> + +#include "kudu/gutil/linux_syscall_support.h" + +namespace base { + +void DCheckAsserter::warn() { + LOG(FATAL) << "Thread Collision"; +} + +#if 0 +// Original source from Chromium -- we didn't import their threading library +// into Cloudera source as of yet + +static subtle::Atomic32 CurrentThread() { + const PlatformThreadId current_thread_id = PlatformThread::CurrentId(); + // We need to get the thread id into an atomic data type. This might be a + // truncating conversion, but any loss-of-information just increases the + // chance of a fault negative, not a false positive. + const subtle::Atomic32 atomic_thread_id = + static_cast<subtle::Atomic32>(current_thread_id); + + return atomic_thread_id; +} +#else + +static subtle::Atomic64 CurrentThread() { +#if defined(__APPLE__) + uint64_t tid; + CHECK_EQ(0, pthread_threadid_np(NULL, &tid)); + return tid; +#elif defined(__linux__) + return syscall(__NR_gettid); +#endif +} + +#endif + +void ThreadCollisionWarner::EnterSelf() { + // If the active thread is 0 then I'll write the current thread ID + // if two or more threads arrive here only one will succeed to + // write on valid_thread_id_ the current thread ID. + subtle::Atomic64 current_thread_id = CurrentThread(); + + int64_t previous_value = subtle::NoBarrier_CompareAndSwap(&valid_thread_id_, + 0, + current_thread_id); + if (previous_value != 0 && previous_value != current_thread_id) { + // gotcha! a thread is trying to use the same class and that is + // not current thread. + asserter_->warn(); + } + + subtle::NoBarrier_AtomicIncrement(&counter_, 1); +} + +void ThreadCollisionWarner::Enter() { + subtle::Atomic64 current_thread_id = CurrentThread(); + + if (subtle::NoBarrier_CompareAndSwap(&valid_thread_id_, + 0, + current_thread_id) != 0) { + // gotcha! another thread is trying to use the same class. + asserter_->warn(); + } + + subtle::NoBarrier_AtomicIncrement(&counter_, 1); +} + +void ThreadCollisionWarner::Leave() { + if (subtle::Barrier_AtomicIncrement(&counter_, -1) == 0) { + subtle::NoBarrier_Store(&valid_thread_id_, 0); + } +} + +} // namespace base http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/02f3e3fc/be/src/gutil/threading/thread_collision_warner.h ---------------------------------------------------------------------- diff --git a/be/src/gutil/threading/thread_collision_warner.h b/be/src/gutil/threading/thread_collision_warner.h new file mode 100644 index 0000000..d59ea67 --- /dev/null +++ b/be/src/gutil/threading/thread_collision_warner.h @@ -0,0 +1,248 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_THREADING_THREAD_COLLISION_WARNER_H_ +#define BASE_THREADING_THREAD_COLLISION_WARNER_H_ + +#include <memory> + +#include "kudu/gutil/atomicops.h" +#include "kudu/gutil/port.h" +#include "kudu/gutil/macros.h" + +#ifndef BASE_EXPORT +#define BASE_EXPORT +#endif + +// A helper class alongside macros to be used to verify assumptions about thread +// safety of a class. +// +// Example: Queue implementation non thread-safe but still usable if clients +// are synchronized somehow. +// +// In this case the macro DFAKE_SCOPED_LOCK has to be +// used, it checks that if a thread is inside the push/pop then +// noone else is still inside the pop/push +// +// class NonThreadSafeQueue { +// public: +// ... +// void push(int) { DFAKE_SCOPED_LOCK(push_pop_); ... } +// int pop() { DFAKE_SCOPED_LOCK(push_pop_); ... } +// ... +// private: +// DFAKE_MUTEX(push_pop_); +// }; +// +// +// Example: Queue implementation non thread-safe but still usable if clients +// are synchronized somehow, it calls a method to "protect" from +// a "protected" method +// +// In this case the macro DFAKE_SCOPED_RECURSIVE_LOCK +// has to be used, it checks that if a thread is inside the push/pop +// then noone else is still inside the pop/push +// +// class NonThreadSafeQueue { +// public: +// void push(int) { +// DFAKE_SCOPED_LOCK(push_pop_); +// ... +// } +// int pop() { +// DFAKE_SCOPED_RECURSIVE_LOCK(push_pop_); +// bar(); +// ... +// } +// void bar() { DFAKE_SCOPED_RECURSIVE_LOCK(push_pop_); ... } +// ... +// private: +// DFAKE_MUTEX(push_pop_); +// }; +// +// +// Example: Queue implementation not usable even if clients are synchronized, +// so only one thread in the class life cycle can use the two members +// push/pop. +// +// In this case the macro DFAKE_SCOPED_LOCK_THREAD_LOCKED pins the +// specified +// critical section the first time a thread enters push or pop, from +// that time on only that thread is allowed to execute push or pop. +// +// class NonThreadSafeQueue { +// public: +// ... +// void push(int) { DFAKE_SCOPED_LOCK_THREAD_LOCKED(push_pop_); ... } +// int pop() { DFAKE_SCOPED_LOCK_THREAD_LOCKED(push_pop_); ... } +// ... +// private: +// DFAKE_MUTEX(push_pop_); +// }; +// +// +// Example: Class that has to be contructed/destroyed on same thread, it has +// a "shareable" method (with external synchronization) and a not +// shareable method (even with external synchronization). +// +// In this case 3 Critical sections have to be defined +// +// class ExoticClass { +// public: +// ExoticClass() { DFAKE_SCOPED_LOCK_THREAD_LOCKED(ctor_dtor_); ... } +// ~ExoticClass() { DFAKE_SCOPED_LOCK_THREAD_LOCKED(ctor_dtor_); ... } +// +// void Shareable() { DFAKE_SCOPED_LOCK(shareable_section_); ... } +// void NotShareable() { DFAKE_SCOPED_LOCK_THREAD_LOCKED(ctor_dtor_); ... } +// ... +// private: +// DFAKE_MUTEX(ctor_dtor_); +// DFAKE_MUTEX(shareable_section_); +// }; + + +#if !defined(NDEBUG) + +// Defines a class member that acts like a mutex. It is used only as a +// verification tool. +#define DFAKE_MUTEX(obj) \ + mutable base::ThreadCollisionWarner obj +// Asserts the call is never called simultaneously in two threads. Used at +// member function scope. +#define DFAKE_SCOPED_LOCK(obj) \ + base::ThreadCollisionWarner::ScopedCheck s_check_##obj(&obj) +// Asserts the call is never called simultaneously in two threads. Used at +// member function scope. Same as DFAKE_SCOPED_LOCK but allows recursive locks. +#define DFAKE_SCOPED_RECURSIVE_LOCK(obj) \ + base::ThreadCollisionWarner::ScopedRecursiveCheck sr_check_##obj(&obj) +// Asserts the code is always executed in the same thread. +#define DFAKE_SCOPED_LOCK_THREAD_LOCKED(obj) \ + base::ThreadCollisionWarner::Check check_##obj(&obj) + +#else + +#define DFAKE_MUTEX(obj) typedef void InternalFakeMutexType##obj +#define DFAKE_SCOPED_LOCK(obj) ((void)0) +#define DFAKE_SCOPED_RECURSIVE_LOCK(obj) ((void)0) +#define DFAKE_SCOPED_LOCK_THREAD_LOCKED(obj) ((void)0) + +#endif + +namespace base { + +// The class ThreadCollisionWarner uses an Asserter to notify the collision +// AsserterBase is the interfaces and DCheckAsserter is the default asserter +// used. During the unit tests is used another class that doesn't "DCHECK" +// in case of collision (check thread_collision_warner_unittests.cc) +struct BASE_EXPORT AsserterBase { + virtual ~AsserterBase() {} + virtual void warn() = 0; +}; + +struct BASE_EXPORT DCheckAsserter : public AsserterBase { + virtual ~DCheckAsserter() {} + virtual void warn() OVERRIDE; +}; + +class BASE_EXPORT ThreadCollisionWarner { + public: + // The parameter asserter is there only for test purpose + explicit ThreadCollisionWarner(AsserterBase* asserter = new DCheckAsserter()) + : valid_thread_id_(0), + counter_(0), + asserter_(asserter) {} + + ~ThreadCollisionWarner() { + delete asserter_; + } + + // This class is meant to be used through the macro + // DFAKE_SCOPED_LOCK_THREAD_LOCKED + // it doesn't leave the critical section, as opposed to ScopedCheck, + // because the critical section being pinned is allowed to be used only + // from one thread + class BASE_EXPORT Check { + public: + explicit Check(ThreadCollisionWarner* warner) + : warner_(warner) { + warner_->EnterSelf(); + } + + ~Check() {} + + private: + ThreadCollisionWarner* warner_; + + DISALLOW_COPY_AND_ASSIGN(Check); + }; + + // This class is meant to be used through the macro + // DFAKE_SCOPED_LOCK + class BASE_EXPORT ScopedCheck { + public: + explicit ScopedCheck(ThreadCollisionWarner* warner) + : warner_(warner) { + warner_->Enter(); + } + + ~ScopedCheck() { + warner_->Leave(); + } + + private: + ThreadCollisionWarner* warner_; + + DISALLOW_COPY_AND_ASSIGN(ScopedCheck); + }; + + // This class is meant to be used through the macro + // DFAKE_SCOPED_RECURSIVE_LOCK + class BASE_EXPORT ScopedRecursiveCheck { + public: + explicit ScopedRecursiveCheck(ThreadCollisionWarner* warner) + : warner_(warner) { + warner_->EnterSelf(); + } + + ~ScopedRecursiveCheck() { + warner_->Leave(); + } + + private: + ThreadCollisionWarner* warner_; + + DISALLOW_COPY_AND_ASSIGN(ScopedRecursiveCheck); + }; + + private: + // This method stores the current thread identifier and does a DCHECK + // if a another thread has already done it, it is safe if same thread + // calls this multiple time (recursion allowed). + void EnterSelf(); + + // Same as EnterSelf but recursion is not allowed. + void Enter(); + + // Removes the thread_id stored in order to allow other threads to + // call EnterSelf or Enter. + void Leave(); + + // This stores the thread id that is inside the critical section, if the + // value is 0 then no thread is inside. + volatile subtle::Atomic64 valid_thread_id_; + + // Counter to trace how many time a critical section was "pinned" + // (when allowed) in order to unpin it when counter_ reaches 0. + volatile subtle::Atomic64 counter_; + + // Here only for class unit tests purpose, during the test I need to not + // DCHECK but notify the collision with something else. + AsserterBase* asserter_; + + DISALLOW_COPY_AND_ASSIGN(ThreadCollisionWarner); +}; + +} // namespace base + +#endif // BASE_THREADING_THREAD_COLLISION_WARNER_H_ http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/02f3e3fc/be/src/gutil/type_traits.h ---------------------------------------------------------------------- diff --git a/be/src/gutil/type_traits.h b/be/src/gutil/type_traits.h index e408fd0..a4e874f 100644 --- a/be/src/gutil/type_traits.h +++ b/be/src/gutil/type_traits.h @@ -63,7 +63,7 @@ using std::make_pair; using std::pair; // For pair -#include "gutil/template_util.h" // For true_type and false_type +#include "kudu/gutil/template_util.h" // For true_type and false_type namespace base { @@ -116,6 +116,10 @@ template<> struct is_integral<__wchar_t> : true_type { }; #else template<> struct is_integral<wchar_t> : true_type { }; #endif +#if defined(__APPLE__) +template<> struct is_integral<int64_t> : true_type { }; +template<> struct is_integral<uint64_t> : true_type { }; +#endif template<> struct is_integral<short> : true_type { }; template<> struct is_integral<unsigned short> : true_type { }; template<> struct is_integral<int> : true_type { }; @@ -350,10 +354,10 @@ struct is_convertible // these types are PODs, for human use. They may be made more contentful // later. The typedef is just to make it legal to put a semicolon after // these macros. -#define DECLARE_POD(TypeName) typedef int Dummy_Type_For_DECLARE_POD +#define DECLARE_POD(TypeName) typedef int Dummy_Type_For_DECLARE_POD ATTRIBUTE_UNUSED #define DECLARE_NESTED_POD(TypeName) DECLARE_POD(TypeName) #define PROPAGATE_POD_FROM_TEMPLATE_ARGUMENT(TemplateName) \ - typedef int Dummy_Type_For_PROPAGATE_POD_FROM_TEMPLATE_ARGUMENT -#define ENFORCE_POD(TypeName) typedef int Dummy_Type_For_ENFORCE_POD + typedef int Dummy_Type_For_PROPAGATE_POD_FROM_TEMPLATE_ARGUMENT ATTRIBUTE_UNUSED +#define ENFORCE_POD(TypeName) typedef int Dummy_Type_For_ENFORCE_POD ATTRIBUTE_UNUSED #endif // BASE_TYPE_TRAITS_H_ http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/02f3e3fc/be/src/gutil/utf/rune.c ---------------------------------------------------------------------- diff --git a/be/src/gutil/utf/rune.c b/be/src/gutil/utf/rune.c new file mode 100644 index 0000000..1e7299c --- /dev/null +++ b/be/src/gutil/utf/rune.c @@ -0,0 +1,350 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "gutil/utf/utf.h" +#include "gutil/utf/utfdef.h" + +enum +{ + Bit1 = 7, + Bitx = 6, + Bit2 = 5, + Bit3 = 4, + Bit4 = 3, + Bit5 = 2, + + T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */ + Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */ + T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */ + T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */ + T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */ + T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */ + + Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0111 1111 */ + Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0111 1111 1111 */ + Rune3 = (1<<(Bit3+2*Bitx))-1, /* 1111 1111 1111 1111 */ + Rune4 = (1<<(Bit4+3*Bitx))-1, + /* 0001 1111 1111 1111 1111 1111 */ + + Maskx = (1<<Bitx)-1, /* 0011 1111 */ + Testx = Maskx ^ 0xFF, /* 1100 0000 */ + + Bad = Runeerror, +}; + +/* + * Modified by Wei-Hwa Huang, Google Inc., on 2004-09-24 + * This is a slower but "safe" version of the old chartorune + * that works on strings that are not necessarily null-terminated. + * + * If you know for sure that your string is null-terminated, + * chartorune will be a bit faster. + * + * It is guaranteed not to attempt to access "length" + * past the incoming pointer. This is to avoid + * possible access violations. If the string appears to be + * well-formed but incomplete (i.e., to get the whole Rune + * we'd need to read past str+length) then we'll set the Rune + * to Bad and return 0. + * + * Note that if we have decoding problems for other + * reasons, we return 1 instead of 0. + */ +int +charntorune(Rune *rune, const char *str, int length) +{ + int c, c1, c2, c3; + long l; + + /* When we're not allowed to read anything */ + if(length <= 0) { + goto badlen; + } + + /* + * one character sequence (7-bit value) + * 00000-0007F => T1 + */ + c = *(uchar*)str; + if(c < Tx) { + *rune = c; + return 1; + } + + // If we can't read more than one character we must stop + if(length <= 1) { + goto badlen; + } + + /* + * two character sequence (11-bit value) + * 0080-07FF => T2 Tx + */ + c1 = *(uchar*)(str+1) ^ Tx; + if(c1 & Testx) + goto bad; + if(c < T3) { + if(c < T2) + goto bad; + l = ((c << Bitx) | c1) & Rune2; + if(l <= Rune1) + goto bad; + *rune = l; + return 2; + } + + // If we can't read more than two characters we must stop + if(length <= 2) { + goto badlen; + } + + /* + * three character sequence (16-bit value) + * 0800-FFFF => T3 Tx Tx + */ + c2 = *(uchar*)(str+2) ^ Tx; + if(c2 & Testx) + goto bad; + if(c < T4) { + l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3; + if(l <= Rune2) + goto bad; + *rune = l; + return 3; + } + + if (length <= 3) + goto badlen; + + /* + * four character sequence (21-bit value) + * 10000-1FFFFF => T4 Tx Tx Tx + */ + c3 = *(uchar*)(str+3) ^ Tx; + if (c3 & Testx) + goto bad; + if (c < T5) { + l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4; + if (l <= Rune3) + goto bad; + *rune = l; + return 4; + } + + // Support for 5-byte or longer UTF-8 would go here, but + // since we don't have that, we'll just fall through to bad. + + /* + * bad decoding + */ +bad: + *rune = Bad; + return 1; +badlen: + *rune = Bad; + return 0; + +} + + +/* + * This is the older "unsafe" version, which works fine on + * null-terminated strings. + */ +int +chartorune(Rune *rune, const char *str) +{ + int c, c1, c2, c3; + long l; + + /* + * one character sequence + * 00000-0007F => T1 + */ + c = *(uchar*)str; + if(c < Tx) { + *rune = c; + return 1; + } + + /* + * two character sequence + * 0080-07FF => T2 Tx + */ + c1 = *(uchar*)(str+1) ^ Tx; + if(c1 & Testx) + goto bad; + if(c < T3) { + if(c < T2) + goto bad; + l = ((c << Bitx) | c1) & Rune2; + if(l <= Rune1) + goto bad; + *rune = l; + return 2; + } + + /* + * three character sequence + * 0800-FFFF => T3 Tx Tx + */ + c2 = *(uchar*)(str+2) ^ Tx; + if(c2 & Testx) + goto bad; + if(c < T4) { + l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3; + if(l <= Rune2) + goto bad; + *rune = l; + return 3; + } + + /* + * four character sequence (21-bit value) + * 10000-1FFFFF => T4 Tx Tx Tx + */ + c3 = *(uchar*)(str+3) ^ Tx; + if (c3 & Testx) + goto bad; + if (c < T5) { + l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4; + if (l <= Rune3) + goto bad; + *rune = l; + return 4; + } + + /* + * Support for 5-byte or longer UTF-8 would go here, but + * since we don't have that, we'll just fall through to bad. + */ + + /* + * bad decoding + */ +bad: + *rune = Bad; + return 1; +} + +int +isvalidcharntorune(const char* str, int length, Rune* rune, int* consumed) { + *consumed = charntorune(rune, str, length); + return *rune != Runeerror || *consumed == 3; +} + +int +runetochar(char *str, const Rune *rune) +{ + /* Runes are signed, so convert to unsigned for range check. */ + unsigned long c; + + /* + * one character sequence + * 00000-0007F => 00-7F + */ + c = *rune; + if(c <= Rune1) { + str[0] = c; + return 1; + } + + /* + * two character sequence + * 0080-07FF => T2 Tx + */ + if(c <= Rune2) { + str[0] = T2 | (c >> 1*Bitx); + str[1] = Tx | (c & Maskx); + return 2; + } + + /* + * If the Rune is out of range, convert it to the error rune. + * Do this test here because the error rune encodes to three bytes. + * Doing it earlier would duplicate work, since an out of range + * Rune wouldn't have fit in one or two bytes. + */ + if (c > Runemax) + c = Runeerror; + + /* + * three character sequence + * 0800-FFFF => T3 Tx Tx + */ + if (c <= Rune3) { + str[0] = T3 | (c >> 2*Bitx); + str[1] = Tx | ((c >> 1*Bitx) & Maskx); + str[2] = Tx | (c & Maskx); + return 3; + } + + /* + * four character sequence (21-bit value) + * 10000-1FFFFF => T4 Tx Tx Tx + */ + str[0] = T4 | (c >> 3*Bitx); + str[1] = Tx | ((c >> 2*Bitx) & Maskx); + str[2] = Tx | ((c >> 1*Bitx) & Maskx); + str[3] = Tx | (c & Maskx); + return 4; +} + +int +runelen(Rune rune) +{ + char str[10]; + + return runetochar(str, &rune); +} + +int +runenlen(const Rune *r, int nrune) +{ + int nb, c; + + nb = 0; + while(nrune--) { + c = *r++; + if (c <= Rune1) + nb++; + else if (c <= Rune2) + nb += 2; + else if (c <= Rune3) + nb += 3; + else /* assert(c <= Rune4) */ + nb += 4; + } + return nb; +} + +int +fullrune(const char *str, int n) +{ + if (n > 0) { + int c = *(uchar*)str; + if (c < Tx) + return 1; + if (n > 1) { + if (c < T3) + return 1; + if (n > 2) { + if (c < T4 || n > 3) + return 1; + } + } + } + return 0; +} http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/02f3e3fc/be/src/gutil/utf/rune.cc ---------------------------------------------------------------------- diff --git a/be/src/gutil/utf/rune.cc b/be/src/gutil/utf/rune.cc deleted file mode 100644 index 908a9e0..0000000 --- a/be/src/gutil/utf/rune.cc +++ /dev/null @@ -1,354 +0,0 @@ -/* - * The authors of this software are Rob Pike and Ken Thompson. - * Copyright (c) 2002 by Lucent Technologies. - * Permission to use, copy, modify, and distribute this software for any - * purpose without fee is hereby granted, provided that this entire notice - * is included in all copies of any software which is or includes a copy - * or modification of this software and in all copies of the supporting - * documentation for such software. - * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED - * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY - * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY - * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. - */ -#include <stdarg.h> -#include <string.h> -#include "gutil/utf/utf.h" -#include "gutil/utf/utfdef.h" - -extern "C" { - -enum -{ - Bit1 = 7, - Bitx = 6, - Bit2 = 5, - Bit3 = 4, - Bit4 = 3, - Bit5 = 2, - - T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */ - Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */ - T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */ - T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */ - T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */ - T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */ - - Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0111 1111 */ - Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0111 1111 1111 */ - Rune3 = (1<<(Bit3+2*Bitx))-1, /* 1111 1111 1111 1111 */ - Rune4 = (1<<(Bit4+3*Bitx))-1, - /* 0001 1111 1111 1111 1111 1111 */ - - Maskx = (1<<Bitx)-1, /* 0011 1111 */ - Testx = Maskx ^ 0xFF, /* 1100 0000 */ - - Bad = Runeerror, -}; - -/* - * Modified by Wei-Hwa Huang, Google Inc., on 2004-09-24 - * This is a slower but "safe" version of the old chartorune - * that works on strings that are not necessarily null-terminated. - * - * If you know for sure that your string is null-terminated, - * chartorune will be a bit faster. - * - * It is guaranteed not to attempt to access "length" - * past the incoming pointer. This is to avoid - * possible access violations. If the string appears to be - * well-formed but incomplete (i.e., to get the whole Rune - * we'd need to read past str+length) then we'll set the Rune - * to Bad and return 0. - * - * Note that if we have decoding problems for other - * reasons, we return 1 instead of 0. - */ -int -charntorune(Rune *rune, const char *str, int length) -{ - int c, c1, c2, c3; - long l; - - /* When we're not allowed to read anything */ - if(length <= 0) { - goto badlen; - } - - /* - * one character sequence (7-bit value) - * 00000-0007F => T1 - */ - c = *(uchar*)str; - if(c < Tx) { - *rune = c; - return 1; - } - - // If we can't read more than one character we must stop - if(length <= 1) { - goto badlen; - } - - /* - * two character sequence (11-bit value) - * 0080-07FF => T2 Tx - */ - c1 = *(uchar*)(str+1) ^ Tx; - if(c1 & Testx) - goto bad; - if(c < T3) { - if(c < T2) - goto bad; - l = ((c << Bitx) | c1) & Rune2; - if(l <= Rune1) - goto bad; - *rune = l; - return 2; - } - - // If we can't read more than two characters we must stop - if(length <= 2) { - goto badlen; - } - - /* - * three character sequence (16-bit value) - * 0800-FFFF => T3 Tx Tx - */ - c2 = *(uchar*)(str+2) ^ Tx; - if(c2 & Testx) - goto bad; - if(c < T4) { - l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3; - if(l <= Rune2) - goto bad; - *rune = l; - return 3; - } - - if (length <= 3) - goto badlen; - - /* - * four character sequence (21-bit value) - * 10000-1FFFFF => T4 Tx Tx Tx - */ - c3 = *(uchar*)(str+3) ^ Tx; - if (c3 & Testx) - goto bad; - if (c < T5) { - l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4; - if (l <= Rune3) - goto bad; - *rune = l; - return 4; - } - - // Support for 5-byte or longer UTF-8 would go here, but - // since we don't have that, we'll just fall through to bad. - - /* - * bad decoding - */ -bad: - *rune = Bad; - return 1; -badlen: - *rune = Bad; - return 0; - -} - - -/* - * This is the older "unsafe" version, which works fine on - * null-terminated strings. - */ -int -chartorune(Rune *rune, const char *str) -{ - int c, c1, c2, c3; - long l; - - /* - * one character sequence - * 00000-0007F => T1 - */ - c = *(uchar*)str; - if(c < Tx) { - *rune = c; - return 1; - } - - /* - * two character sequence - * 0080-07FF => T2 Tx - */ - c1 = *(uchar*)(str+1) ^ Tx; - if(c1 & Testx) - goto bad; - if(c < T3) { - if(c < T2) - goto bad; - l = ((c << Bitx) | c1) & Rune2; - if(l <= Rune1) - goto bad; - *rune = l; - return 2; - } - - /* - * three character sequence - * 0800-FFFF => T3 Tx Tx - */ - c2 = *(uchar*)(str+2) ^ Tx; - if(c2 & Testx) - goto bad; - if(c < T4) { - l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3; - if(l <= Rune2) - goto bad; - *rune = l; - return 3; - } - - /* - * four character sequence (21-bit value) - * 10000-1FFFFF => T4 Tx Tx Tx - */ - c3 = *(uchar*)(str+3) ^ Tx; - if (c3 & Testx) - goto bad; - if (c < T5) { - l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4; - if (l <= Rune3) - goto bad; - *rune = l; - return 4; - } - - /* - * Support for 5-byte or longer UTF-8 would go here, but - * since we don't have that, we'll just fall through to bad. - */ - - /* - * bad decoding - */ -bad: - *rune = Bad; - return 1; -} - -int -isvalidcharntorune(const char* str, int length, Rune* rune, int* consumed) { - *consumed = charntorune(rune, str, length); - return *rune != Runeerror || *consumed == 3; -} - -int -runetochar(char *str, const Rune *rune) -{ - /* Runes are signed, so convert to unsigned for range check. */ - unsigned long c; - - /* - * one character sequence - * 00000-0007F => 00-7F - */ - c = *rune; - if(c <= Rune1) { - str[0] = c; - return 1; - } - - /* - * two character sequence - * 0080-07FF => T2 Tx - */ - if(c <= Rune2) { - str[0] = T2 | (c >> 1*Bitx); - str[1] = Tx | (c & Maskx); - return 2; - } - - /* - * If the Rune is out of range, convert it to the error rune. - * Do this test here because the error rune encodes to three bytes. - * Doing it earlier would duplicate work, since an out of range - * Rune wouldn't have fit in one or two bytes. - */ - if (c > Runemax) - c = Runeerror; - - /* - * three character sequence - * 0800-FFFF => T3 Tx Tx - */ - if (c <= Rune3) { - str[0] = T3 | (c >> 2*Bitx); - str[1] = Tx | ((c >> 1*Bitx) & Maskx); - str[2] = Tx | (c & Maskx); - return 3; - } - - /* - * four character sequence (21-bit value) - * 10000-1FFFFF => T4 Tx Tx Tx - */ - str[0] = T4 | (c >> 3*Bitx); - str[1] = Tx | ((c >> 2*Bitx) & Maskx); - str[2] = Tx | ((c >> 1*Bitx) & Maskx); - str[3] = Tx | (c & Maskx); - return 4; -} - -int -runelen(Rune rune) -{ - char str[10]; - - return runetochar(str, &rune); -} - -int -runenlen(const Rune *r, int nrune) -{ - int nb, c; - - nb = 0; - while(nrune--) { - c = *r++; - if (c <= Rune1) - nb++; - else if (c <= Rune2) - nb += 2; - else if (c <= Rune3) - nb += 3; - else /* assert(c <= Rune4) */ - nb += 4; - } - return nb; -} - -int -fullrune(const char *str, int n) -{ - if (n > 0) { - int c = *(uchar*)str; - if (c < Tx) - return 1; - if (n > 1) { - if (c < T3) - return 1; - if (n > 2) { - if (c < T4 || n > 3) - return 1; - } - } - } - return 0; -} - -} http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/02f3e3fc/be/src/gutil/walltime.cc ---------------------------------------------------------------------- diff --git a/be/src/gutil/walltime.cc b/be/src/gutil/walltime.cc index 04d7f4b..89a805e 100644 --- a/be/src/gutil/walltime.cc +++ b/be/src/gutil/walltime.cc @@ -1,16 +1,21 @@ // Copyright 2012 Google Inc. All Rights Reserved. // -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at // -// http://www.apache.org/licenses/LICENSE-2.0 +// http://www.apache.org/licenses/LICENSE-2.0 // -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. // // Author: [email protected] (Tomasz Kaftal) // @@ -19,7 +24,7 @@ #define _GNU_SOURCE // Linux wants that for strptime in time.h #endif -#include "gutil/walltime.h" +#include "kudu/gutil/walltime.h" #include <stdio.h> #include <string.h> @@ -68,7 +73,7 @@ static void StringAppendStrftime(string* dst, int length = sizeof(space); for (int sanity = 0; sanity < 5; ++sanity) { length *= 2; - char* buf = new char[length]; + auto buf = new char[length]; result = strftime(buf, length, format, tm); if ((result >= 0) && (result < length)) { @@ -136,7 +141,7 @@ bool WallTime_Parse_Timezone(const char* time_spec, memset(&split_time, 0, sizeof(split_time)); } const char* parsed = strptime(time_spec, format, &split_time); - if (parsed == NULL) return false; + if (parsed == nullptr) return false; // If format ends with "%S", match fractional seconds double fraction = 0.0; @@ -166,6 +171,18 @@ bool WallTime_Parse_Timezone(const char* time_spec, return true; } +WallTime WallTime_Now() { +#if defined(__APPLE__) + mach_timespec_t ts; + walltime_internal::GetCurrentTime(&ts); + return ts.tv_sec + ts.tv_nsec / static_cast<double>(1e9); +#else + timespec ts; + clock_gettime(CLOCK_REALTIME, &ts); + return ts.tv_sec + ts.tv_nsec / static_cast<double>(1e9); +#endif // defined(__APPLE__) +} + void StringAppendStrftime(string* dst, const char* format, time_t when, @@ -173,9 +190,9 @@ void StringAppendStrftime(string* dst, struct tm tm; bool conversion_error; if (local) { - conversion_error = (localtime_r(&when, &tm) == NULL); + conversion_error = (localtime_r(&when, &tm) == nullptr); } else { - conversion_error = (gmtime_r(&when, &tm) == NULL); + conversion_error = (gmtime_r(&when, &tm) == nullptr); } if (conversion_error) { // If we couldn't convert the time, don't append anything. @@ -186,6 +203,6 @@ void StringAppendStrftime(string* dst, string LocalTimeAsString() { string ret; - StringAppendStrftime(&ret, "%Y-%m-%d %H:%M:%S %Z", time(NULL), true); + StringAppendStrftime(&ret, "%Y-%m-%d %H:%M:%S %Z", time(nullptr), true); return ret; } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/02f3e3fc/be/src/gutil/walltime.h ---------------------------------------------------------------------- diff --git a/be/src/gutil/walltime.h b/be/src/gutil/walltime.h index 2f04ebe..e6a1294 100644 --- a/be/src/gutil/walltime.h +++ b/be/src/gutil/walltime.h @@ -1,21 +1,27 @@ // Copyright 2012 Google Inc. All Rights Reserved. // -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at // -// http://www.apache.org/licenses/LICENSE-2.0 +// http://www.apache.org/licenses/LICENSE-2.0 // -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. #ifndef GUTIL_WALLTIME_H_ #define GUTIL_WALLTIME_H_ #include <sys/time.h> +#include <glog/logging.h> #include <string> using std::string; @@ -24,17 +30,10 @@ using std::string; #include <mach/mach.h> #include <mach/mach_time.h> -#include "gutil/once.h" +#include "kudu/gutil/once.h" #endif // defined(__APPLE__) -#include "common/logging.h" -#include "gutil/integral_types.h" - -#define NANOS_PER_SEC 1000000000ll -#define NANOS_PER_MICRO 1000ll -#define MICROS_PER_SEC 1000000ll -#define MICROS_PER_MILLI 1000ll -#define MILLIS_PER_SEC 1000ll +#include "kudu/gutil/integral_types.h" typedef double WallTime; @@ -58,8 +57,10 @@ bool WallTime_Parse_Timezone(const char* time_spec, bool local, WallTime* result); +// Return current time in seconds as a WallTime. +WallTime WallTime_Now(); + typedef int64 MicrosecondsInt64; -typedef int64 NanosecondsInt64; namespace walltime_internal { @@ -79,7 +80,7 @@ inline void GetCurrentTime(mach_timespec_t* ts) { inline MicrosecondsInt64 GetCurrentTimeMicros() { mach_timespec_t ts; GetCurrentTime(&ts); - return ts.tv_sec * MICROS_PER_SEC + ts.tv_nsec / NANOS_PER_MICRO; + return ts.tv_sec * 1e6 + ts.tv_nsec / 1e3; } inline int64_t GetMonoTimeNanos() { @@ -94,7 +95,7 @@ inline int64_t GetMonoTimeNanos() { } inline MicrosecondsInt64 GetMonoTimeMicros() { - return GetMonoTimeNanos() / NANOS_PER_MICRO; + return GetMonoTimeNanos() / 1e3; } inline MicrosecondsInt64 GetThreadCpuTimeMicros() { @@ -120,8 +121,7 @@ inline MicrosecondsInt64 GetThreadCpuTimeMicros() { return 0; } - return thread_info_data.user_time.seconds * MICROS_PER_SEC + - thread_info_data.user_time.microseconds; + return thread_info_data.user_time.seconds * 1e6 + thread_info_data.user_time.microseconds; } #else @@ -129,20 +129,13 @@ inline MicrosecondsInt64 GetThreadCpuTimeMicros() { inline MicrosecondsInt64 GetClockTimeMicros(clockid_t clock) { timespec ts; clock_gettime(clock, &ts); - return ts.tv_sec * MICROS_PER_SEC + ts.tv_nsec / NANOS_PER_MICRO; -} - -inline NanosecondsInt64 GetClockTimeNanos(clockid_t clock) { - timespec ts; - clock_gettime(clock, &ts); - return ts.tv_sec * NANOS_PER_SEC + ts.tv_nsec; + return ts.tv_sec * 1e6 + ts.tv_nsec / 1e3; } #endif // defined(__APPLE__) } // namespace walltime_internal - // Returns the time since the Epoch measured in microseconds. inline MicrosecondsInt64 GetCurrentTimeMicros() { #if defined(__APPLE__) @@ -152,15 +145,6 @@ inline MicrosecondsInt64 GetCurrentTimeMicros() { #endif // defined(__APPLE__) } -// Returns the time since the Epoch measured in microseconds. -inline NanosecondsInt64 GetMonoTimeNanos() { -#if defined(__APPLE__) - return walltime_internal::GetMonoTimeNanos(); -#else - return walltime_internal::GetClockTimeNanos(CLOCK_MONOTONIC); -#endif // defined(__APPLE__) -} - // Returns the time since some arbitrary reference point, measured in microseconds. // Guaranteed to be monotonic (and therefore useful for measuring intervals) inline MicrosecondsInt64 GetMonoTimeMicros() { @@ -191,5 +175,5 @@ class CycleClock { CycleClock(); }; -#include "gutil/cycleclock-inl.h" // inline method bodies +#include "kudu/gutil/cycleclock-inl.h" // inline method bodies #endif // GUTIL_WALLTIME_H_
