This is an automated email from the ASF dual-hosted git repository. adar pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kudu.git
commit aea815f1fe21c23284592fa365d7fa7fc1dbdd93 Author: Volodymyr Verovkin <[email protected]> AuthorDate: Mon Oct 14 01:37:28 2019 -0700 [KUDU-2632] Add a DATE type backed by INT32 (Part 1, C++ client) This adds a new DATE type, represented by an INT32 and that should store the number of days from the Unix epoch, January 1, 1970. Range: 0001-01-01 to 9999-12-31 or -719162 to 2932896 (days; computed with mktime()/(24*60*60), timezone = GMT, daylight saving = 0) Range validation is done in KuduPartialRow::SetDate() function. Change-Id: I1d803b6eb573a0b36c99c5a2012f12319a548986 Reviewed-on: http://gerrit.cloudera.org:8080/14427 Tested-by: Kudu Jenkins Reviewed-by: Adar Dembo <[email protected]> Reviewed-by: Alexey Serbin <[email protected]> --- src/kudu/client/client-unittest.cc | 2 + src/kudu/client/predicate-test.cc | 22 +++++++ src/kudu/client/scan_batch.cc | 14 +++++ src/kudu/client/scan_batch.h | 4 +- src/kudu/client/schema.cc | 4 ++ src/kudu/client/schema.h | 3 +- src/kudu/client/value.cc | 1 + src/kudu/common/common.proto | 1 + src/kudu/common/key_util.cc | 3 +- src/kudu/common/partial_row.cc | 47 ++++++++++++-- src/kudu/common/partial_row.h | 10 ++- src/kudu/common/partition.cc | 14 +++++ src/kudu/common/row.h | 6 ++ src/kudu/common/types-test.cc | 32 +++++++++- src/kudu/common/types.cc | 20 +++++- src/kudu/common/types.h | 39 ++++++++++-- src/kudu/hms/hms_catalog-test.cc | 1 + src/kudu/hms/hms_catalog.cc | 1 + src/kudu/integration-tests/all_types-itest.cc | 88 +++++++++++++++++++++++++++ src/kudu/integration-tests/data_gen_util.cc | 3 + src/kudu/integration-tests/hms_itest-base.cc | 1 + src/kudu/tools/kudu-admin-test.cc | 3 + src/kudu/tools/tool_action_perf.cc | 3 + src/kudu/tools/tool_action_table.cc | 10 +++ 24 files changed, 312 insertions(+), 20 deletions(-) diff --git a/src/kudu/client/client-unittest.cc b/src/kudu/client/client-unittest.cc index da58bfa..ca52595 100644 --- a/src/kudu/client/client-unittest.cc +++ b/src/kudu/client/client-unittest.cc @@ -267,6 +267,7 @@ TEST(ClientUnitTest, TestKuduSchemaToString) { b2.AddColumn("k1")->Type(KuduColumnSchema::INT32)->NotNull(); b2.AddColumn("k2")->Type(KuduColumnSchema::UNIXTIME_MICROS)->NotNull(); b2.AddColumn("k3")->Type(KuduColumnSchema::INT8)->NotNull(); + b2.AddColumn("date_val")->Type(KuduColumnSchema::DATE)->NotNull(); b2.AddColumn("dec_val")->Type(KuduColumnSchema::DECIMAL)->Nullable()->Precision(9)->Scale(2); b2.AddColumn("int_val")->Type(KuduColumnSchema::INT32)->NotNull(); b2.AddColumn("string_val")->Type(KuduColumnSchema::STRING)->Nullable(); @@ -279,6 +280,7 @@ TEST(ClientUnitTest, TestKuduSchemaToString) { " k1 INT32 NOT NULL,\n" " k2 UNIXTIME_MICROS NOT NULL,\n" " k3 INT8 NOT NULL,\n" + " date_val DATE NOT NULL,\n" " dec_val DECIMAL(9, 2) NULLABLE,\n" " int_val INT32 NOT NULL,\n" " string_val STRING NULLABLE,\n" diff --git a/src/kudu/client/predicate-test.cc b/src/kudu/client/predicate-test.cc index dd0928f..0b85d7a 100644 --- a/src/kudu/client/predicate-test.cc +++ b/src/kudu/client/predicate-test.cc @@ -17,10 +17,12 @@ #include <algorithm> #include <cstdint> +#include <initializer_list> #include <limits> #include <memory> #include <ostream> #include <string> +#include <utility> #include <vector> #include <glog/logging.h> @@ -740,6 +742,26 @@ TEST_F(PredicateTest, TestTimestampPredicates) { CheckIntPredicates<int64_t>(table); } +TEST_F(PredicateTest, TestDatePredicates) { + shared_ptr<KuduTable> table = CreateAndOpenTable(KuduColumnSchema::DATE); + shared_ptr<KuduSession> session = CreateSession(); + + int i = 0; + for (int32_t value : CreateIntValues<int32_t>()) { + unique_ptr<KuduInsert> insert(table->NewInsert()); + ASSERT_OK(insert->mutable_row()->SetInt64("key", i++)); + ASSERT_OK(insert->mutable_row()->SetDate("value", value)); + ASSERT_OK(session->Apply(insert.release())); + } + unique_ptr<KuduInsert> null_insert(table->NewInsert()); + ASSERT_OK(null_insert->mutable_row()->SetInt64("key", i++)); + ASSERT_OK(null_insert->mutable_row()->SetNull("value")); + ASSERT_OK(session->Apply(null_insert.release())); + ASSERT_OK(session->Flush()); + + CheckIntPredicates<int32_t>(table); +} + TEST_F(PredicateTest, TestFloatPredicates) { shared_ptr<KuduTable> table = CreateAndOpenTable(KuduColumnSchema::FLOAT); shared_ptr<KuduSession> session = CreateSession(); diff --git a/src/kudu/client/scan_batch.cc b/src/kudu/client/scan_batch.cc index ec5b155..324d01b 100644 --- a/src/kudu/client/scan_batch.cc +++ b/src/kudu/client/scan_batch.cc @@ -144,6 +144,10 @@ Status KuduScanBatch::RowPtr::GetUnixTimeMicros(const Slice& col_name, int64_t* return Get<TypeTraits<UNIXTIME_MICROS> >(col_name, val); } +Status KuduScanBatch::RowPtr::GetDate(const Slice& col_name, int32_t* days_since_unix_epoch) const { + return Get<TypeTraits<DATE> >(col_name, days_since_unix_epoch); +} + Status KuduScanBatch::RowPtr::GetFloat(const Slice& col_name, float* val) const { return Get<TypeTraits<FLOAT> >(col_name, val); } @@ -194,6 +198,10 @@ Status KuduScanBatch::RowPtr::GetUnixTimeMicros(int col_idx, int64_t* val) const return Get<TypeTraits<UNIXTIME_MICROS> >(col_idx, val); } +Status KuduScanBatch::RowPtr::GetDate(int col_idx, int32_t* days_since_unix_epoch) const { + return Get<TypeTraits<DATE> >(col_idx, days_since_unix_epoch); +} + Status KuduScanBatch::RowPtr::GetFloat(int col_idx, float* val) const { return Get<TypeTraits<FLOAT> >(col_idx, val); } @@ -273,6 +281,9 @@ Status KuduScanBatch::RowPtr::Get<TypeTraits<UNIXTIME_MICROS> >( const Slice& col_name, int64_t* val) const; template +Status KuduScanBatch::RowPtr::Get<TypeTraits<DATE> >(const Slice& col_name, int32_t* val) const; + +template Status KuduScanBatch::RowPtr::Get<TypeTraits<FLOAT> >(const Slice& col_name, float* val) const; template @@ -309,6 +320,9 @@ template Status KuduScanBatch::RowPtr::Get<TypeTraits<UNIXTIME_MICROS> >(int col_idx, int64_t* val) const; template +Status KuduScanBatch::RowPtr::Get<TypeTraits<DATE> >(int col_idx, int32_t* val) const; + +template Status KuduScanBatch::RowPtr::Get<TypeTraits<FLOAT> >(int col_idx, float* val) const; template diff --git a/src/kudu/client/scan_batch.h b/src/kudu/client/scan_batch.h index 41e381c..2889138 100644 --- a/src/kudu/client/scan_batch.h +++ b/src/kudu/client/scan_batch.h @@ -207,7 +207,8 @@ class KUDU_EXPORT KuduScanBatch::RowPtr { Status GetInt32(const Slice& col_name, int32_t* val) const WARN_UNUSED_RESULT; Status GetInt64(const Slice& col_name, int64_t* val) const WARN_UNUSED_RESULT; Status GetUnixTimeMicros(const Slice& col_name, int64_t* micros_since_utc_epoch) - const WARN_UNUSED_RESULT; + const WARN_UNUSED_RESULT; + Status GetDate(const Slice& col_name, int32_t* days_since_unix_epoch) const WARN_UNUSED_RESULT; Status GetFloat(const Slice& col_name, float* val) const WARN_UNUSED_RESULT; Status GetDouble(const Slice& col_name, double* val) const WARN_UNUSED_RESULT; @@ -242,6 +243,7 @@ class KUDU_EXPORT KuduScanBatch::RowPtr { Status GetInt32(int col_idx, int32_t* val) const WARN_UNUSED_RESULT; Status GetInt64(int col_idx, int64_t* val) const WARN_UNUSED_RESULT; Status GetUnixTimeMicros(int col_idx, int64_t* micros_since_utc_epoch) const WARN_UNUSED_RESULT; + Status GetDate(int col_idx, int32_t* days_since_unix_epoch) const WARN_UNUSED_RESULT; Status GetFloat(int col_idx, float* val) const WARN_UNUSED_RESULT; Status GetDouble(int col_idx, double* val) const WARN_UNUSED_RESULT; diff --git a/src/kudu/client/schema.cc b/src/kudu/client/schema.cc index 6170fa6..7ca7d74 100644 --- a/src/kudu/client/schema.cc +++ b/src/kudu/client/schema.cc @@ -126,6 +126,7 @@ kudu::DataType ToInternalDataType(KuduColumnSchema::DataType type, case KuduColumnSchema::INT32: return kudu::INT32; case KuduColumnSchema::INT64: return kudu::INT64; case KuduColumnSchema::UNIXTIME_MICROS: return kudu::UNIXTIME_MICROS; + case KuduColumnSchema::DATE: return kudu::DATE; case KuduColumnSchema::FLOAT: return kudu::FLOAT; case KuduColumnSchema::DOUBLE: return kudu::DOUBLE; case KuduColumnSchema::VARCHAR: return kudu::VARCHAR; @@ -153,6 +154,7 @@ KuduColumnSchema::DataType FromInternalDataType(kudu::DataType type) { case kudu::INT32: return KuduColumnSchema::INT32; case kudu::INT64: return KuduColumnSchema::INT64; case kudu::UNIXTIME_MICROS: return KuduColumnSchema::UNIXTIME_MICROS; + case kudu::DATE: return KuduColumnSchema::DATE; case kudu::FLOAT: return KuduColumnSchema::FLOAT; case kudu::DOUBLE: return KuduColumnSchema::DOUBLE; case kudu::VARCHAR: return KuduColumnSchema::VARCHAR; @@ -679,6 +681,8 @@ string KuduColumnSchema::DataTypeToString(DataType type) { return "BINARY"; case UNIXTIME_MICROS: return "UNIXTIME_MICROS"; + case DATE: + return "DATE"; case DECIMAL: return "DECIMAL"; case VARCHAR: diff --git a/src/kudu/client/schema.h b/src/kudu/client/schema.h index 23133d6..eb0dcb1 100644 --- a/src/kudu/client/schema.h +++ b/src/kudu/client/schema.h @@ -224,7 +224,8 @@ class KUDU_EXPORT KuduColumnSchema { UNIXTIME_MICROS = 9, DECIMAL = 10, VARCHAR = 11, - TIMESTAMP = UNIXTIME_MICROS //!< deprecated, use UNIXTIME_MICROS + TIMESTAMP = UNIXTIME_MICROS, //!< deprecated, use UNIXTIME_MICROS + DATE = 12 }; /// @param [in] type diff --git a/src/kudu/client/value.cc b/src/kudu/client/value.cc index 51a8832..39042d5 100644 --- a/src/kudu/client/value.cc +++ b/src/kudu/client/value.cc @@ -128,6 +128,7 @@ Status KuduValue::Data::CheckTypeAndGetPointer(const string& col_name, case kudu::INT16: case kudu::INT32: case kudu::INT64: + case kudu::DATE: case kudu::UNIXTIME_MICROS: RETURN_NOT_OK(CheckAndPointToInt(col_name, ti->size(), val_void)); break; diff --git a/src/kudu/common/common.proto b/src/kudu/common/common.proto index 8fb6a4a..ff3ab01 100644 --- a/src/kudu/common/common.proto +++ b/src/kudu/common/common.proto @@ -57,6 +57,7 @@ enum DataType { DECIMAL128 = 17; IS_DELETED = 18; // virtual column; not a real data type VARCHAR = 19; + DATE = 20; } enum EncodingType { diff --git a/src/kudu/common/key_util.cc b/src/kudu/common/key_util.cc index ec98dec..ac4b8b8 100644 --- a/src/kudu/common/key_util.cc +++ b/src/kudu/common/key_util.cc @@ -17,7 +17,6 @@ #include "kudu/common/key_util.h" -#include <cmath> #include <cstring> #include <iterator> #include <limits> @@ -358,6 +357,7 @@ bool IncrementCell(const ColumnSchema& col, void* cell_ptr, Arena* arena) { HANDLE_TYPE(INT8); HANDLE_TYPE(INT16); HANDLE_TYPE(INT32); + HANDLE_TYPE(DATE); HANDLE_TYPE(UNIXTIME_MICROS); HANDLE_TYPE(INT64); HANDLE_TYPE(INT128); @@ -388,6 +388,7 @@ bool TryDecrementCell(const ColumnSchema &col, void *cell_ptr) { HANDLE_TYPE(INT8); HANDLE_TYPE(INT16); HANDLE_TYPE(INT32); + HANDLE_TYPE(DATE); HANDLE_TYPE(UNIXTIME_MICROS); HANDLE_TYPE(INT64); HANDLE_TYPE(INT128); diff --git a/src/kudu/common/partial_row.cc b/src/kudu/common/partial_row.cc index 2467423..e72b9d6 100644 --- a/src/kudu/common/partial_row.cc +++ b/src/kudu/common/partial_row.cc @@ -46,6 +46,17 @@ using strings::Substitute; namespace kudu { +namespace { +Status CheckDateValueInRange(int col_idx, int32_t val, const Schema& schema) { + if (PREDICT_FALSE(!DataTypeTraits<DATE>::IsValidValue(val))) { + const ColumnSchema& col = schema.column(col_idx); + return Status::InvalidArgument( + Substitute("value $0 out of range for date column '$1'", val, col.name())); + } + return Status::OK(); +} +} // anonymous namespace + KuduPartialRow::KuduPartialRow(const Schema* schema) : schema_(schema) { DCHECK(schema_->initialized()); @@ -198,6 +209,10 @@ Status KuduPartialRow::Set(int32_t column_idx, const uint8_t* val) { RETURN_NOT_OK(SetUnixTimeMicros(column_idx, *reinterpret_cast<const int64_t*>(val))); break; } + case DATE: { + RETURN_NOT_OK(SetDate(column_idx, *reinterpret_cast<const int32_t*>(val))); + break; + } case DECIMAL32: { RETURN_NOT_OK(Set<TypeTraits<DECIMAL32> >(column_idx, *reinterpret_cast<const int32_t*>(val))); @@ -268,8 +283,11 @@ Status KuduPartialRow::SetInt32(const Slice& col_name, int32_t val) { Status KuduPartialRow::SetInt64(const Slice& col_name, int64_t val) { return Set<TypeTraits<INT64> >(col_name, val); } -Status KuduPartialRow::SetUnixTimeMicros(const Slice& col_name, int64_t val) { - return Set<TypeTraits<UNIXTIME_MICROS> >(col_name, val); +Status KuduPartialRow::SetUnixTimeMicros(const Slice& col_name, int64_t micros_since_utc_epoch) { + return Set<TypeTraits<UNIXTIME_MICROS> >(col_name, micros_since_utc_epoch); +} +Status KuduPartialRow::SetDate(const Slice& col_name, int32_t days_since_unix_epoch) { + return Set<TypeTraits<DATE> >(col_name, days_since_unix_epoch); } Status KuduPartialRow::SetFloat(const Slice& col_name, float val) { return Set<TypeTraits<FLOAT> >(col_name, val); @@ -297,8 +315,12 @@ Status KuduPartialRow::SetInt32(int col_idx, int32_t val) { Status KuduPartialRow::SetInt64(int col_idx, int64_t val) { return Set<TypeTraits<INT64> >(col_idx, val); } -Status KuduPartialRow::SetUnixTimeMicros(int col_idx, int64_t val) { - return Set<TypeTraits<UNIXTIME_MICROS> >(col_idx, val); +Status KuduPartialRow::SetUnixTimeMicros(int col_idx, int64_t micros_since_utc_epoch) { + return Set<TypeTraits<UNIXTIME_MICROS> >(col_idx, micros_since_utc_epoch); +} +Status KuduPartialRow::SetDate(int col_idx, int32_t days_since_unix_epoch) { + RETURN_NOT_OK(CheckDateValueInRange(col_idx, days_since_unix_epoch, *schema_)); + return Set<TypeTraits<DATE> >(col_idx, days_since_unix_epoch); } Status KuduPartialRow::SetFloat(int col_idx, float val) { return Set<TypeTraits<FLOAT> >(col_idx, val); @@ -516,6 +538,11 @@ Status KuduPartialRow::Set<TypeTraits<UNIXTIME_MICROS> >( bool owned); template +Status KuduPartialRow::Set<TypeTraits<DATE> >(int col_idx, + const TypeTraits<DATE>::cpp_type& val, + bool owned); + +template Status KuduPartialRow::Set<TypeTraits<STRING> >(int col_idx, const TypeTraits<STRING>::cpp_type& val, bool owned); @@ -588,6 +615,11 @@ Status KuduPartialRow::Set<TypeTraits<UNIXTIME_MICROS> >( bool owned); template +Status KuduPartialRow::Set<TypeTraits<DATE> >(const Slice& col_name, + const TypeTraits<DATE>::cpp_type& val, + bool owned); + +template Status KuduPartialRow::Set<TypeTraits<FLOAT> >(const Slice& col_name, const TypeTraits<FLOAT>::cpp_type& val, bool owned); @@ -679,6 +711,10 @@ Status KuduPartialRow::GetUnixTimeMicros(const Slice& col_name, int64_t* micros_since_utc_epoch) const { return Get<TypeTraits<UNIXTIME_MICROS> >(col_name, micros_since_utc_epoch); } +Status KuduPartialRow::GetDate(const Slice& col_name, + int32_t* days_since_unix_epoch) const { + return Get<TypeTraits<DATE> >(col_name, days_since_unix_epoch); +} Status KuduPartialRow::GetFloat(const Slice& col_name, float* val) const { return Get<TypeTraits<FLOAT> >(col_name, val); } @@ -724,6 +760,9 @@ Status KuduPartialRow::GetInt64(int col_idx, int64_t* val) const { Status KuduPartialRow::GetUnixTimeMicros(int col_idx, int64_t* micros_since_utc_epoch) const { return Get<TypeTraits<UNIXTIME_MICROS> >(col_idx, micros_since_utc_epoch); } +Status KuduPartialRow::GetDate(int col_idx, int32_t* days_since_unix_epoch) const { + return Get<TypeTraits<DATE> >(col_idx, days_since_unix_epoch); +} Status KuduPartialRow::GetFloat(int col_idx, float* val) const { return Get<TypeTraits<FLOAT> >(col_idx, val); } diff --git a/src/kudu/common/partial_row.h b/src/kudu/common/partial_row.h index d2fc631..e0b4b64 100644 --- a/src/kudu/common/partial_row.h +++ b/src/kudu/common/partial_row.h @@ -110,6 +110,8 @@ class KUDU_EXPORT KuduPartialRow { Status SetInt64(const Slice& col_name, int64_t val) WARN_UNUSED_RESULT; Status SetUnixTimeMicros(const Slice& col_name, int64_t micros_since_utc_epoch) WARN_UNUSED_RESULT; + Status SetDate(const Slice& col_name, + int32_t days_since_unix_epoch) WARN_UNUSED_RESULT; Status SetFloat(const Slice& col_name, float val) WARN_UNUSED_RESULT; Status SetDouble(const Slice& col_name, double val) WARN_UNUSED_RESULT; @@ -141,6 +143,7 @@ class KUDU_EXPORT KuduPartialRow { Status SetInt32(int col_idx, int32_t val) WARN_UNUSED_RESULT; Status SetInt64(int col_idx, int64_t val) WARN_UNUSED_RESULT; Status SetUnixTimeMicros(int col_idx, int64_t micros_since_utc_epoch) WARN_UNUSED_RESULT; + Status SetDate(int col_idx, int32_t days_since_unix_epoch) WARN_UNUSED_RESULT; Status SetFloat(int col_idx, float val) WARN_UNUSED_RESULT; Status SetDouble(int col_idx, double val) WARN_UNUSED_RESULT; @@ -446,9 +449,9 @@ class KUDU_EXPORT KuduPartialRow { Status GetInt16(const Slice& col_name, int16_t* val) const WARN_UNUSED_RESULT; Status GetInt32(const Slice& col_name, int32_t* val) const WARN_UNUSED_RESULT; Status GetInt64(const Slice& col_name, int64_t* val) const WARN_UNUSED_RESULT; - Status GetUnixTimeMicros(const Slice& col_name, - int64_t* micros_since_utc_epoch) const WARN_UNUSED_RESULT; - + Status GetUnixTimeMicros(const Slice& col_name, int64_t* micros_since_utc_epoch) + const WARN_UNUSED_RESULT; + Status GetDate(const Slice& col_name, int32_t* days_since_unix_epoch) const WARN_UNUSED_RESULT; Status GetFloat(const Slice& col_name, float* val) const WARN_UNUSED_RESULT; Status GetDouble(const Slice& col_name, double* val) const WARN_UNUSED_RESULT; #if KUDU_INT128_SUPPORTED @@ -483,6 +486,7 @@ class KUDU_EXPORT KuduPartialRow { Status GetInt32(int col_idx, int32_t* val) const WARN_UNUSED_RESULT; Status GetInt64(int col_idx, int64_t* val) const WARN_UNUSED_RESULT; Status GetUnixTimeMicros(int col_idx, int64_t* micros_since_utc_epoch) const WARN_UNUSED_RESULT; + Status GetDate(int col_idx, int32_t* days_since_unix_epoch) const WARN_UNUSED_RESULT; Status GetFloat(int col_idx, float* val) const WARN_UNUSED_RESULT; Status GetDouble(int col_idx, double* val) const WARN_UNUSED_RESULT; diff --git a/src/kudu/common/partition.cc b/src/kudu/common/partition.cc index 571ec4a..0797a98 100644 --- a/src/kudu/common/partition.cc +++ b/src/kudu/common/partition.cc @@ -27,6 +27,7 @@ #include <vector> #include <glog/logging.h> +#include <google/protobuf/stubs/port.h> #include "kudu/common/common.pb.h" #include "kudu/common/key_encoder.h" @@ -1013,6 +1014,9 @@ namespace { case UNIXTIME_MICROS: RETURN_NOT_OK(row->SetInt64(idx, INT64_MIN + 1)); break; + case DATE: + RETURN_NOT_OK(row->SetDate(idx, DataTypeTraits<DATE>::kMinValue + 1)); + break; case VARCHAR: RETURN_NOT_OK(row->SetVarchar(idx, Slice("\0", 1))); break; @@ -1092,6 +1096,16 @@ namespace { } break; } + case DATE: { + int32_t value; + RETURN_NOT_OK(row->GetDate(idx, &value)); + if (value < DataTypeTraits<DATE>::kMaxValue) { + RETURN_NOT_OK(row->SetDate(idx, value + 1)); + } else { + *success = false; + } + break; + } case DECIMAL32: case DECIMAL64: case DECIMAL128: { diff --git a/src/kudu/common/row.h b/src/kudu/common/row.h index 3ffffcd..5ea8850 100644 --- a/src/kudu/common/row.h +++ b/src/kudu/common/row.h @@ -676,6 +676,12 @@ class RowBuilder { Advance(); } + void AddDate(int32_t days_since_unix_epoch) { + CheckNextType(DATE); + *reinterpret_cast<int32_t *>(&buf_[byte_idx_]) = days_since_unix_epoch; + Advance(); + } + void AddUint64(uint64_t val) { CheckNextType(UINT64); *reinterpret_cast<uint64_t *>(&buf_[byte_idx_]) = val; diff --git a/src/kudu/common/types-test.cc b/src/kudu/common/types-test.cc index 2d3e9a8..805abe8 100644 --- a/src/kudu/common/types-test.cc +++ b/src/kudu/common/types-test.cc @@ -15,8 +15,11 @@ // specific language governing permissions and limitations // under the License. -#include <cstdint> +#include "kudu/common/types.h" + #include <cmath> +#include <cstdint> +#include <limits> #include <string> #include <tuple> // IWYU pragma: keep #include <vector> @@ -25,7 +28,6 @@ #include <gtest/gtest.h> #include "kudu/common/common.pb.h" -#include "kudu/common/types.h" #include "kudu/gutil/mathlimits.h" #include "kudu/gutil/strings/substitute.h" #include "kudu/util/slice.h" @@ -40,7 +42,31 @@ using std::vector; namespace kudu { -class TestTypes : public KuduTest {}; +class TestTypes : public KuduTest { + protected: + static void TestDateToString(const string& expected, int32_t date) { + const TypeInfo* info = GetTypeInfo(DATE); + string result; + info->AppendDebugStringForValue(&date, &result); + ASSERT_EQ(expected, result); + } +}; + +TEST_F(TestTypes, TestDatePrinting) { + TestDateToString("1-01-01", *DataTypeTraits<DATE>::min_value()); + TestDateToString("9999-12-31", *DataTypeTraits<DATE>::max_value()); + TestDateToString("1970-01-01", 0); + TestDateToString("1942-08-16", -10000); + TestDateToString("1997-05-19", 10000); + TestDateToString("value -2147483648 out of range for DATE type", + std::numeric_limits<int32_t>::min()); + TestDateToString("value 2147483647 out of range for DATE type", + std::numeric_limits<int32_t>::max()); + TestDateToString("value -719163 out of range for DATE type", + *DataTypeTraits<DATE>::min_value() - 1); + TestDateToString("value 2932897 out of range for DATE type", + *DataTypeTraits<DATE>::max_value() + 1); +} TEST_F(TestTypes, TestTimestampPrinting) { const TypeInfo* info = GetTypeInfo(UNIXTIME_MICROS); diff --git a/src/kudu/common/types.cc b/src/kudu/common/types.cc index 7572f69..df57db4 100644 --- a/src/kudu/common/types.cc +++ b/src/kudu/common/types.cc @@ -21,6 +21,8 @@ #include <unordered_map> #include "kudu/gutil/singleton.h" +#include "kudu/gutil/strings/substitute.h" +#include "kudu/gutil/walltime.h" #include "kudu/util/logging.h" using std::string; @@ -29,8 +31,10 @@ using std::unordered_map; namespace kudu { +using strings::Substitute; + template<typename TypeTraitsClass> -TypeInfo::TypeInfo(TypeTraitsClass t) +TypeInfo::TypeInfo(TypeTraitsClass /*t*/) : type_(TypeTraitsClass::type), physical_type_(TypeTraitsClass::physical_type), name_(TypeTraitsClass::name()), @@ -79,6 +83,7 @@ class TypeInfoResolver { AddMapping<UINT64>(); AddMapping<INT64>(); AddMapping<UNIXTIME_MICROS>(); + AddMapping<DATE>(); AddMapping<STRING>(); AddMapping<BOOL>(); AddMapping<FLOAT>(); @@ -109,4 +114,17 @@ const TypeInfo* GetTypeInfo(DataType type) { return Singleton<TypeInfoResolver>::get()->GetTypeInfo(type); } +void DataTypeTraits<DATE>::AppendDebugStringForValue(const void* val, string* str) { + constexpr static const char* kDateFormat = "%F"; // the ISO 8601 date format + static constexpr time_t kSecondsInDay = 24 * 60 * 60; + + int32_t days_since_unix_epoch = *reinterpret_cast<const int32_t*>(val); + if (IsValidValue(days_since_unix_epoch)) { + time_t seconds = static_cast<time_t>(days_since_unix_epoch) * kSecondsInDay; + StringAppendStrftime(str, kDateFormat, seconds, false); + } else { + str->append(Substitute("value $0 out of range for DATE type", days_since_unix_epoch)); + } +} + } // namespace kudu diff --git a/src/kudu/common/types.h b/src/kudu/common/types.h index e77671e..d13c0c8 100644 --- a/src/kudu/common/types.h +++ b/src/kudu/common/types.h @@ -547,12 +547,12 @@ struct DataTypeTraits<STRING> : public DerivedTypeTraits<BINARY>{ } }; -static const char* kDateFormat = "%Y-%m-%dT%H:%M:%S"; -static const char* kDateMicrosAndTzFormat = "%s.%06dZ"; template<> struct DataTypeTraits<UNIXTIME_MICROS> : public DerivedTypeTraits<INT64>{ - static const int US_TO_S = 1000L * 1000L; + static const int kMicrosInSecond = 1000L * 1000L; + constexpr static const char* kDateFormat = "%Y-%m-%dT%H:%M:%S"; + constexpr static const char* kDateMicrosAndTzFormat = "%s.%06dZ"; static const char* name() { return "unixtime_micros"; @@ -560,13 +560,13 @@ struct DataTypeTraits<UNIXTIME_MICROS> : public DerivedTypeTraits<INT64>{ static void AppendDebugStringForValue(const void* val, std::string* str) { int64_t timestamp_micros = *reinterpret_cast<const int64_t *>(val); - time_t secs_since_epoch = timestamp_micros / US_TO_S; + time_t secs_since_epoch = timestamp_micros / kMicrosInSecond; // If the time is negative we need to take into account that any microseconds // will actually decrease the time in seconds by one. - int remaining_micros = timestamp_micros % US_TO_S; + int remaining_micros = static_cast<int>(timestamp_micros % kMicrosInSecond); if (remaining_micros < 0) { secs_since_epoch--; - remaining_micros = US_TO_S - std::abs(remaining_micros); + remaining_micros = kMicrosInSecond - std::abs(remaining_micros); } struct tm tm_info; gmtime_r(&secs_since_epoch, &tm_info); @@ -579,6 +579,31 @@ struct DataTypeTraits<UNIXTIME_MICROS> : public DerivedTypeTraits<INT64>{ }; template<> +struct DataTypeTraits<DATE> : public DerivedTypeTraits<INT32>{ + static constexpr int32_t kMinValue = -719162; // mktime(0001-01-01) + static constexpr int32_t kMaxValue = 2932896; // mktime(9999-12-31) + typedef int32_t cpp_type; + + static const char* name() { + return "date"; + } + + static void AppendDebugStringForValue(const void* val, std::string* str); + + static const cpp_type* min_value() { + static int32_t value = kMinValue; + return &value; + } + static const cpp_type* max_value() { + static int32_t value = kMaxValue; + return &value; + } + static bool IsValidValue(int32_t val) { + return val >= kMinValue && val <= kMaxValue; + } +}; + +template<> struct DataTypeTraits<DECIMAL32> : public DerivedTypeTraits<INT32>{ static const char* name() { return "decimal"; @@ -700,6 +725,7 @@ class Variant { case UINT16: numeric_.u16 = *static_cast<const uint16_t *>(value); break; + case DATE: case DECIMAL32: case INT32: numeric_.i32 = *static_cast<const int32_t *>(value); @@ -781,6 +807,7 @@ class Variant { case UINT8: return &(numeric_.u8); case INT16: return &(numeric_.i16); case UINT16: return &(numeric_.u16); + case DATE: case DECIMAL32: case INT32: return &(numeric_.i32); case UINT32: return &(numeric_.u32); diff --git a/src/kudu/hms/hms_catalog-test.cc b/src/kudu/hms/hms_catalog-test.cc index 45c9732..b2d48b7 100644 --- a/src/kudu/hms/hms_catalog-test.cc +++ b/src/kudu/hms/hms_catalog-test.cc @@ -195,6 +195,7 @@ class HmsCatalogTest : public KuduTest { b.AddColumn("int32_val", DataType::INT32); b.AddColumn("int64_val", DataType::INT64); b.AddColumn("timestamp_val", DataType::UNIXTIME_MICROS); + b.AddColumn("date_val", DataType::DATE); b.AddColumn("string_val", DataType::STRING); b.AddColumn("bool_val", DataType::BOOL); b.AddColumn("float_val", DataType::FLOAT); diff --git a/src/kudu/hms/hms_catalog.cc b/src/kudu/hms/hms_catalog.cc index a01c4da..2f08fba 100644 --- a/src/kudu/hms/hms_catalog.cc +++ b/src/kudu/hms/hms_catalog.cc @@ -333,6 +333,7 @@ string column_to_field_type(const ColumnSchema& column) { case VARCHAR: return Substitute("varchar($0)", column.type_attributes().length); case UNIXTIME_MICROS: return "timestamp"; + case DATE: return "date"; default: LOG(FATAL) << "unhandled column type: " << column.TypeToString(); } __builtin_unreachable(); diff --git a/src/kudu/integration-tests/all_types-itest.cc b/src/kudu/integration-tests/all_types-itest.cc index 201d813..aa242d7 100644 --- a/src/kudu/integration-tests/all_types-itest.cc +++ b/src/kudu/integration-tests/all_types-itest.cc @@ -20,6 +20,7 @@ #include <functional> #include <ostream> #include <string> +#include <utility> #include <vector> #include <gflags/gflags.h> @@ -247,12 +248,88 @@ struct IntKeysTestSetup { int rows_per_tablet_; }; +struct DateKeysTestSetup { + + DateKeysTestSetup() + : min_value(DataTypeTraits<DATE>::kMinValue), + max_rows_(DataTypeTraits<DATE>::kMaxValue - min_value), + increment_(max_rows_ / kNumTablets), + rows_per_tablet_(std::min(increment_, FLAGS_num_rows_per_tablet)) { + } + + void AddKeyColumnsToSchema(KuduSchemaBuilder* builder) const { + auto column_spec = builder->AddColumn("key"); + column_spec->Type(KuduColumnSchema::DATE) + ->NotNull()->PrimaryKey(); + } + + vector<const KuduPartialRow*> GenerateSplitRows(const KuduSchema& schema) const { + vector<int> splits; + splits.reserve(kNumTablets - 1); + for (int64_t i = 1; i < kNumTablets; i++) { + splits.push_back(min_value + i * increment_); + } + vector<const KuduPartialRow*> rows; + for (int val : splits) { + KuduPartialRow* row = schema.NewRow(); + CHECK_OK(row->SetDate(0, val)); + rows.push_back(row); + } + return rows; + } + + Status GenerateRowKey(KuduInsert* insert, int split_idx, int row_idx) const { + int val = min_value + (split_idx * increment_) + row_idx; + return insert->mutable_row()->SetDate(0, val); + } + + Status VerifyIntRowKey(int val, int split_idx, int row_idx) const { + int expected = min_value + (split_idx * increment_) + row_idx; + if (val != expected) { + return Status::Corruption(strings::Substitute("Keys didn't match. Expected: $0 Got: $1", + expected, val)); + } + return Status::OK(); + } + + Status VerifyRowKey(const KuduRowResult& result, int split_idx, int row_idx) const { + int val; + RETURN_NOT_OK(result.GetDate(0, &val)); + return VerifyIntRowKey(val, split_idx, row_idx); + } + + Status VerifyRowKeyRaw(const uint8_t* raw_key, int split_idx, int row_idx) const { + int val = UnalignedLoad<int32_t>(raw_key); + return VerifyIntRowKey(val, split_idx, row_idx); + } + + int GetRowsPerTablet() const { + return rows_per_tablet_; + } + + int GetMaxRows() const { + return max_rows_; + } + + vector<string> GetKeyColumns() const { + vector<string> key_col; + key_col.emplace_back("key"); + return key_col; + } + + int min_value; + int max_rows_; + int increment_; + int rows_per_tablet_; +}; + struct ExpectedVals { int8_t expected_int8_val; int16_t expected_int16_val; int32_t expected_int32_val; int64_t expected_int64_val; int64_t expected_timestamp_val; + int32_t expected_date_val; string slice_content; Slice expected_slice_val; Slice expected_binary_val; @@ -285,6 +362,7 @@ class AllTypesItest : public KuduTest { builder.AddColumn("int32_val")->Type(KuduColumnSchema::INT32); builder.AddColumn("int64_val")->Type(KuduColumnSchema::INT64); builder.AddColumn("timestamp_val")->Type(KuduColumnSchema::UNIXTIME_MICROS); + builder.AddColumn("date_val")->Type(KuduColumnSchema::DATE); builder.AddColumn("string_val")->Type(KuduColumnSchema::STRING); builder.AddColumn("varchar_val")->Type(KuduColumnSchema::VARCHAR)->Length(kMaxVarcharLength); builder.AddColumn("bool_val")->Type(KuduColumnSchema::BOOL); @@ -359,6 +437,7 @@ class AllTypesItest : public KuduTest { RETURN_NOT_OK(row->SetInt32("int32_val", int_val)); RETURN_NOT_OK(row->SetInt64("int64_val", int_val)); RETURN_NOT_OK(row->SetUnixTimeMicros("timestamp_val", int_val)); + RETURN_NOT_OK(row->SetDate("date_val", int_val)); string content = strings::Substitute("hello $0", int_val); Slice slice_val(content); RETURN_NOT_OK(row->SetStringCopy("string_val", slice_val)); @@ -402,6 +481,7 @@ class AllTypesItest : public KuduTest { projection->push_back("int32_val"); projection->push_back("int64_val"); projection->push_back("timestamp_val"); + projection->push_back("date_val"); projection->push_back("string_val"); projection->push_back("binary_val"); projection->push_back("varchar_val"); @@ -421,6 +501,7 @@ class AllTypesItest : public KuduTest { vals.expected_int32_val = static_cast<int32_t>(expected_int_val); vals.expected_int64_val = expected_int_val; vals.expected_timestamp_val = expected_int_val; + vals.expected_date_val = static_cast<int32_t>(expected_int_val); vals.slice_content = strings::Substitute("hello $0", expected_int_val); vals.expected_slice_val = Slice(vals.slice_content); vals.expected_varchar_val = Slice(vals.slice_content); @@ -452,6 +533,9 @@ class AllTypesItest : public KuduTest { int64_t timestamp_val; ASSERT_OK(row.GetUnixTimeMicros("timestamp_val", ×tamp_val)); ASSERT_EQ(timestamp_val, vals.expected_timestamp_val); + int32_t date_val; + ASSERT_OK(row.GetDate("date_val", &date_val)); + ASSERT_EQ(date_val, vals.expected_date_val); Slice string_val; ASSERT_OK(row.GetString("string_val", &string_val)); ASSERT_EQ(string_val, vals.expected_slice_val); @@ -567,6 +651,7 @@ typedef ::testing::Types<IntKeysTestSetup<KeyTypeWrapper<INT8> >, IntKeysTestSetup<KeyTypeWrapper<DECIMAL64> >, IntKeysTestSetup<KeyTypeWrapper<DECIMAL128> >, IntKeysTestSetup<KeyTypeWrapper<UNIXTIME_MICROS> >, + DateKeysTestSetup, SliceKeysTestSetup<KeyTypeWrapper<STRING> >, SliceKeysTestSetup<KeyTypeWrapper<BINARY> > > KeyTypes; @@ -663,6 +748,9 @@ TYPED_TEST(AllTypesItest, TestTimestampPadding) { case KuduColumnSchema::UNIXTIME_MICROS: ASSERT_EQ(*reinterpret_cast<const int64_t*>(row_data), vals.expected_timestamp_val); break; + case KuduColumnSchema::DATE: + ASSERT_EQ(*reinterpret_cast<const int32_t*>(row_data), vals.expected_date_val); + break; case KuduColumnSchema::STRING: ASSERT_EQ(*reinterpret_cast<const Slice*>(row_data), vals.expected_slice_val); break; diff --git a/src/kudu/integration-tests/data_gen_util.cc b/src/kudu/integration-tests/data_gen_util.cc index 8f5d886..98a6d2f 100644 --- a/src/kudu/integration-tests/data_gen_util.cc +++ b/src/kudu/integration-tests/data_gen_util.cc @@ -66,6 +66,9 @@ void WriteValueToColumn(const client::KuduSchema& schema, case client::KuduColumnSchema::UNIXTIME_MICROS: CHECK_OK(row->SetUnixTimeMicros(col_idx, value)); break; + case client::KuduColumnSchema::DATE: + CHECK_OK(row->SetDate(col_idx, value)); + break; case client::KuduColumnSchema::DECIMAL: CHECK_OK(row->SetUnscaledDecimal(col_idx, value)); break; diff --git a/src/kudu/integration-tests/hms_itest-base.cc b/src/kudu/integration-tests/hms_itest-base.cc index 46c265b..4660de9 100644 --- a/src/kudu/integration-tests/hms_itest-base.cc +++ b/src/kudu/integration-tests/hms_itest-base.cc @@ -86,6 +86,7 @@ Status HmsITestBase::CreateKuduTable(const string& database_name, b.AddColumn("int32_val")->Type(KuduColumnSchema::INT32); b.AddColumn("int64_val")->Type(KuduColumnSchema::INT64); b.AddColumn("timestamp_val")->Type(KuduColumnSchema::UNIXTIME_MICROS); + b.AddColumn("date_val")->Type(KuduColumnSchema::DATE); b.AddColumn("string_val")->Type(KuduColumnSchema::STRING); b.AddColumn("bool_val")->Type(KuduColumnSchema::BOOL); b.AddColumn("float_val")->Type(KuduColumnSchema::FLOAT); diff --git a/src/kudu/tools/kudu-admin-test.cc b/src/kudu/tools/kudu-admin-test.cc index 6d8f3c6..bb68170 100644 --- a/src/kudu/tools/kudu-admin-test.cc +++ b/src/kudu/tools/kudu-admin-test.cc @@ -1783,6 +1783,7 @@ TEST_F(AdminCliTest, TestDescribeTable) { ->Compression(KuduColumnStorageAttributes::CompressionType::ZLIB) ->Default(KuduValue::FromInt(123)); builder.AddColumn("timestamp_val")->Type(KuduColumnSchema::UNIXTIME_MICROS); + builder.AddColumn("date_val")->Type(KuduColumnSchema::DATE); builder.AddColumn("string_val")->Type(KuduColumnSchema::STRING) ->Encoding(KuduColumnStorageAttributes::EncodingType::PREFIX_ENCODING) ->Default(KuduValue::CopyString(Slice("hello"))); @@ -1845,6 +1846,7 @@ TEST_F(AdminCliTest, TestDescribeTable) { " int32_val INT32 NULLABLE,\n" " int64_val INT64 NULLABLE,\n" " timestamp_val UNIXTIME_MICROS NULLABLE,\n" + " date_val DATE NULLABLE,\n" " string_val STRING NULLABLE,\n" " bool_val BOOL NULLABLE,\n" " float_val FLOAT NULLABLE,\n" @@ -1885,6 +1887,7 @@ TEST_F(AdminCliTest, TestDescribeTable) { " int32_val INT32 NULLABLE BIT_SHUFFLE LZ4 - -,\n" " int64_val INT64 NULLABLE AUTO_ENCODING ZLIB 123 123,\n" " timestamp_val UNIXTIME_MICROS NULLABLE AUTO_ENCODING DEFAULT_COMPRESSION - -,\n" + " date_val DATE NULLABLE AUTO_ENCODING DEFAULT_COMPRESSION - -,\n" " string_val STRING NULLABLE PREFIX_ENCODING DEFAULT_COMPRESSION \"hello\" \"hello\",\n" " bool_val BOOL NULLABLE AUTO_ENCODING DEFAULT_COMPRESSION false false,\n" " float_val FLOAT NULLABLE AUTO_ENCODING DEFAULT_COMPRESSION - -,\n" diff --git a/src/kudu/tools/tool_action_perf.cc b/src/kudu/tools/tool_action_perf.cc index ef8b2c1..c48bb08 100644 --- a/src/kudu/tools/tool_action_perf.cc +++ b/src/kudu/tools/tool_action_perf.cc @@ -467,6 +467,9 @@ Status GenerateRowData(Generator* gen, KuduPartialRow* row, case UNIXTIME_MICROS: RETURN_NOT_OK(row->SetUnixTimeMicros(idx, gen->Next<int64_t>())); break; + case DATE: + RETURN_NOT_OK(row->SetDate(idx, gen->Next<int32_t>())); + break; case FLOAT: RETURN_NOT_OK(row->SetFloat(idx, gen->Next<float>())); break; diff --git a/src/kudu/tools/tool_action_table.cc b/src/kudu/tools/tool_action_table.cc index c72e4ed..b20b7ff 100644 --- a/src/kudu/tools/tool_action_table.cc +++ b/src/kudu/tools/tool_action_table.cc @@ -273,6 +273,7 @@ Status LocateRow(const RunnerContext& context) { case KuduColumnSchema::INT16: case KuduColumnSchema::INT32: case KuduColumnSchema::INT64: + case KuduColumnSchema::DATE: case KuduColumnSchema::UNIXTIME_MICROS: { int64_t value; RETURN_NOT_OK_PREPEND( @@ -563,6 +564,14 @@ Status ConvertToKuduPartialRow( RETURN_NOT_OK(range_bound_partial_row->SetInt64(col_name, value)); break; } + case KuduColumnSchema::DATE: { + int32_t value; + RETURN_NOT_OK_PREPEND( + reader.ExtractInt32(values[i], /*field=*/nullptr, &value), + error_msg); + RETURN_NOT_OK(range_bound_partial_row->SetDate(col_name, value)); + break; + } case KuduColumnSchema::UNIXTIME_MICROS: { int64_t value; RETURN_NOT_OK_PREPEND( @@ -715,6 +724,7 @@ Status ParseValueOfType(const string& default_value, case KuduColumnSchema::DataType::INT16: case KuduColumnSchema::DataType::INT32: case KuduColumnSchema::DataType::INT64: + case KuduColumnSchema::DataType::DATE: case KuduColumnSchema::DataType::UNIXTIME_MICROS: { int64_t int_value; RETURN_NOT_OK_PREPEND(
