This is an automated email from the ASF dual-hosted git repository.
bkietz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 3752e34 ARROW-8727: [C++] Don't require stack allocation of any
object to use StringConverter, hide behind ParseValue function
3752e34 is described below
commit 3752e34adf2984c613ea13718937452fcd146fce
Author: Wes McKinney <[email protected]>
AuthorDate: Fri May 8 16:24:28 2020 -0400
ARROW-8727: [C++] Don't require stack allocation of any object to use
StringConverter, hide behind ParseValue function
Using non-stateful functions or functors is more natural for template-based
code generation of e.g. kernels.
Closes #7120 from wesm/string-converter-static
Authored-by: Wes McKinney <[email protected]>
Signed-off-by: Benjamin Kietzman <[email protected]>
---
cpp/src/arrow/c/bridge.cc | 3 +-
cpp/src/arrow/compute/kernels/cast.cc | 13 +-
cpp/src/arrow/csv/converter.cc | 8 +-
cpp/src/arrow/filesystem/hdfs.cc | 10 +-
cpp/src/arrow/ipc/json_internal.cc | 6 +-
cpp/src/arrow/ipc/json_simple.cc | 9 +-
cpp/src/arrow/json/converter.cc | 36 ++-
cpp/src/arrow/python/deserialize.cc | 5 +-
cpp/src/arrow/scalar.cc | 15 +-
cpp/src/arrow/util/decimal.cc | 6 +-
cpp/src/arrow/util/uri.cc | 3 +-
cpp/src/arrow/util/value_parsing.cc | 44 ++-
cpp/src/arrow/util/value_parsing.h | 133 ++++-----
cpp/src/arrow/util/value_parsing_benchmark.cc | 6 +-
cpp/src/arrow/util/value_parsing_test.cc | 407 ++++++++++++--------------
15 files changed, 337 insertions(+), 367 deletions(-)
diff --git a/cpp/src/arrow/c/bridge.cc b/cpp/src/arrow/c/bridge.cc
index d1dc77c..44b2b76 100644
--- a/cpp/src/arrow/c/bridge.cc
+++ b/cpp/src/arrow/c/bridge.cc
@@ -652,9 +652,8 @@ class FormatStringParser {
template <typename IntType = int32_t>
Result<IntType> ParseInt(util::string_view v) {
using ArrowIntType = typename CTypeTraits<IntType>::ArrowType;
- internal::StringConverter<ArrowIntType> converter;
IntType value;
- if (!converter(v.data(), v.size(), &value)) {
+ if (!internal::ParseValue<ArrowIntType>(v.data(), v.size(), &value)) {
return Invalid();
}
return value;
diff --git a/cpp/src/arrow/compute/kernels/cast.cc
b/cpp/src/arrow/compute/kernels/cast.cc
index f023133..ab11a6c 100644
--- a/cpp/src/arrow/compute/kernels/cast.cc
+++ b/cpp/src/arrow/compute/kernels/cast.cc
@@ -1073,15 +1073,13 @@ struct CastFunctor<
typename TypeTraits<I>::ArrayType input_array(input.Copy());
auto out_data = output->GetMutableValues<out_type>(1);
- internal::StringConverter<O> converter;
-
for (int64_t i = 0; i < input.length; ++i, ++out_data) {
if (input_array.IsNull(i)) {
continue;
}
auto str = input_array.GetView(i);
- if (!converter(str.data(), str.length(), out_data)) {
+ if (!internal::ParseValue<O>(str.data(), str.length(), out_data)) {
ctx->SetStatus(Status::Invalid("Failed to cast String '", str, "' into
",
output->type->ToString()));
return;
@@ -1100,7 +1098,6 @@ struct CastFunctor<BooleanType, I,
enable_if_t<is_string_like_type<I>::value>> {
typename TypeTraits<I>::ArrayType input_array(input.Copy());
internal::FirstTimeBitmapWriter writer(output->buffers[1]->mutable_data(),
output->offset, input.length);
- internal::StringConverter<BooleanType> converter;
for (int64_t i = 0; i < input.length; ++i) {
if (input_array.IsNull(i)) {
@@ -1110,7 +1107,7 @@ struct CastFunctor<BooleanType, I,
enable_if_t<is_string_like_type<I>::value>> {
bool value;
auto str = input_array.GetView(i);
- if (!converter(str.data(), str.length(), &value)) {
+ if (!internal::ParseValue<BooleanType>(str.data(), str.length(),
&value)) {
ctx->SetStatus(Status::Invalid("Failed to cast String '",
input_array.GetString(i), "' into ",
output->type->ToString()));
@@ -1139,15 +1136,15 @@ struct CastFunctor<TimestampType, I,
enable_if_t<is_string_like_type<I>::value>>
typename TypeTraits<I>::ArrayType input_array(input.Copy());
auto out_data = output->GetMutableValues<out_type>(1);
- internal::StringConverter<TimestampType> converter(output->type);
+
+ const TimeUnit::type unit = checked_cast<const
TimestampType&>(*output->type).unit();
for (int64_t i = 0; i < input.length; ++i, ++out_data) {
if (input_array.IsNull(i)) {
continue;
}
-
const auto str = input_array.GetView(i);
- if (!converter(str.data(), str.length(), out_data)) {
+ if (!internal::ParseTimestampISO8601(str.data(), str.length(), unit,
out_data)) {
ctx->SetStatus(Status::Invalid("Failed to cast String '", str, "' into
",
output->type->ToString()));
return;
diff --git a/cpp/src/arrow/csv/converter.cc b/cpp/src/arrow/csv/converter.cc
index 0bc325d..837ba80 100644
--- a/cpp/src/arrow/csv/converter.cc
+++ b/cpp/src/arrow/csv/converter.cc
@@ -40,7 +40,6 @@ namespace arrow {
namespace csv {
using internal::checked_cast;
-using internal::StringConverter;
using internal::Trie;
using internal::TrieBuilder;
@@ -349,10 +348,9 @@ class NumericConverter : public ConcreteConverter {
Result<std::shared_ptr<Array>> Convert(const BlockParser& parser,
int32_t col_index) override {
using BuilderType = typename TypeTraits<T>::BuilderType;
- using value_type = typename StringConverter<T>::value_type;
+ using value_type = typename T::c_type;
BuilderType builder(type_, pool_);
- StringConverter<T> converter;
auto visit = [&](const uint8_t* data, uint32_t size, bool quoted) ->
Status {
// XXX should quoted values be allowed at all?
@@ -364,8 +362,8 @@ class NumericConverter : public ConcreteConverter {
if (!std::is_same<BooleanType, T>::value) {
TrimWhiteSpace(&data, &size);
}
- if (ARROW_PREDICT_FALSE(
- !converter(reinterpret_cast<const char*>(data), size, &value))) {
+ if (ARROW_PREDICT_FALSE(!internal::ParseValue<T>(
+ reinterpret_cast<const char*>(data), size, &value))) {
return GenericConversionError(type_, data, size);
}
builder.UnsafeAppend(value);
diff --git a/cpp/src/arrow/filesystem/hdfs.cc b/cpp/src/arrow/filesystem/hdfs.cc
index aa52f2b..0d5a60a 100644
--- a/cpp/src/arrow/filesystem/hdfs.cc
+++ b/cpp/src/arrow/filesystem/hdfs.cc
@@ -31,6 +31,7 @@
namespace arrow {
+using internal::ParseValue;
using internal::Uri;
namespace fs {
@@ -312,9 +313,8 @@ Result<HdfsOptions> HdfsOptions::FromUri(const Uri& uri) {
auto it = options_map.find("replication");
if (it != options_map.end()) {
const auto& v = it->second;
- ::arrow::internal::StringConverter<Int16Type> converter;
int16_t replication;
- if (!converter(v.data(), v.size(), &replication)) {
+ if (!ParseValue<Int16Type>(v.data(), v.size(), &replication)) {
return Status::Invalid("Invalid value for option 'replication': '", v,
"'");
}
options.ConfigureReplication(replication);
@@ -324,9 +324,8 @@ Result<HdfsOptions> HdfsOptions::FromUri(const Uri& uri) {
it = options_map.find("buffer_size");
if (it != options_map.end()) {
const auto& v = it->second;
- ::arrow::internal::StringConverter<Int32Type> converter;
int32_t buffer_size;
- if (!converter(v.data(), v.size(), &buffer_size)) {
+ if (!ParseValue<Int32Type>(v.data(), v.size(), &buffer_size)) {
return Status::Invalid("Invalid value for option 'buffer_size': '", v,
"'");
}
options.ConfigureBufferSize(buffer_size);
@@ -336,9 +335,8 @@ Result<HdfsOptions> HdfsOptions::FromUri(const Uri& uri) {
it = options_map.find("default_block_size");
if (it != options_map.end()) {
const auto& v = it->second;
- ::arrow::internal::StringConverter<Int64Type> converter;
int64_t default_block_size;
- if (!converter(v.data(), v.size(), &default_block_size)) {
+ if (!ParseValue<Int64Type>(v.data(), v.size(), &default_block_size)) {
return Status::Invalid("Invalid value for option 'default_block_size':
'", v, "'");
}
options.ConfigureBlockSize(default_block_size);
diff --git a/cpp/src/arrow/ipc/json_internal.cc
b/cpp/src/arrow/ipc/json_internal.cc
index c771593..69ae80d 100644
--- a/cpp/src/arrow/ipc/json_internal.cc
+++ b/cpp/src/arrow/ipc/json_internal.cc
@@ -58,6 +58,7 @@ constexpr char kYearMonth[] = "YEAR_MONTH";
class MemoryPool;
using internal::checked_cast;
+using internal::ParseValue;
namespace ipc {
namespace internal {
@@ -1351,6 +1352,7 @@ class ArrayReader {
template <typename T>
Status GetIntArray(const RjArray& json_array, const int32_t length,
std::shared_ptr<Buffer>* out) {
+ using ArrowType = typename CTypeTraits<T>::ArrowType;
ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateBuffer(length * sizeof(T),
pool_));
T* values = reinterpret_cast<T*>(buffer->mutable_data());
@@ -1367,11 +1369,11 @@ class ArrayReader {
} else {
// Read 64-bit integers as strings, as JSON numbers cannot represent
// them exactly.
- ::arrow::internal::StringConverter<typename CTypeTraits<T>::ArrowType>
converter;
+
for (int i = 0; i < length; ++i) {
const rj::Value& val = json_array[i];
DCHECK(val.IsString());
- if (!converter(val.GetString(), val.GetStringLength(), &values[i])) {
+ if (!ParseValue<ArrowType>(val.GetString(), val.GetStringLength(),
&values[i])) {
return Status::Invalid("Failed to parse integer: '",
std::string(val.GetString(),
val.GetStringLength()),
"'");
diff --git a/cpp/src/arrow/ipc/json_simple.cc b/cpp/src/arrow/ipc/json_simple.cc
index 52ec527..32a86ba 100644
--- a/cpp/src/arrow/ipc/json_simple.cc
+++ b/cpp/src/arrow/ipc/json_simple.cc
@@ -39,6 +39,9 @@
namespace rj = arrow::rapidjson;
namespace arrow {
+
+using internal::ParseValue;
+
namespace ipc {
namespace internal {
namespace json {
@@ -319,7 +322,7 @@ class DecimalConverter final : public
ConcreteConverter<DecimalConverter> {
class TimestampConverter final : public ConcreteConverter<TimestampConverter> {
public:
explicit TimestampConverter(const std::shared_ptr<DataType>& type)
- : from_string_(type) {
+ : parse_ctx_{checked_cast<const TimestampType&>(*type).unit()} {
this->type_ = type;
builder_ = std::make_shared<TimestampBuilder>(type, default_memory_pool());
}
@@ -335,7 +338,7 @@ class TimestampConverter final : public
ConcreteConverter<TimestampConverter> {
RETURN_NOT_OK(ConvertNumber<Int64Type>(json_obj, *this->type_, &value));
} else if (json_obj.IsString()) {
auto view = util::string_view(json_obj.GetString(),
json_obj.GetStringLength());
- if (!from_string_(view.data(), view.size(), &value)) {
+ if (!ParseValue<TimestampType>(view.data(), view.size(), &value,
&parse_ctx_)) {
return Status::Invalid("couldn't parse timestamp from ", view);
}
} else {
@@ -347,7 +350,7 @@ class TimestampConverter final : public
ConcreteConverter<TimestampConverter> {
std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
private:
- ::arrow::internal::StringConverter<TimestampType> from_string_;
+ ::arrow::internal::ParseTimestampContext parse_ctx_;
std::shared_ptr<TimestampBuilder> builder_;
};
diff --git a/cpp/src/arrow/json/converter.cc b/cpp/src/arrow/json/converter.cc
index 0872bb7..2346e12 100644
--- a/cpp/src/arrow/json/converter.cc
+++ b/cpp/src/arrow/json/converter.cc
@@ -103,12 +103,38 @@ class BooleanConverter : public PrimitiveConverter {
};
template <typename T>
-class NumericConverter : public PrimitiveConverter {
+struct ParserAdapter {
+ using value_type = typename T::c_type;
+
+ void InitParser(const DataType& type) {}
+
+ bool ConvertOne(const char* s, size_t length, value_type* out) {
+ return internal::ParseValue<T>(s, length, out);
+ }
+};
+
+template <>
+struct ParserAdapter<TimestampType> {
+ void InitParser(const DataType& type) {
+ this->unit = internal::checked_cast<const TimestampType&>(type).unit();
+ }
+
+ bool ConvertOne(const char* s, size_t length, int64_t* out) {
+ return internal::ParseTimestampISO8601(s, length, unit, out);
+ }
+
+ TimeUnit::type unit;
+};
+
+template <typename T>
+class NumericConverter : public PrimitiveConverter, public ParserAdapter<T> {
public:
- using value_type = typename internal::StringConverter<T>::value_type;
+ using value_type = typename T::c_type;
NumericConverter(MemoryPool* pool, const std::shared_ptr<DataType>& type)
- : PrimitiveConverter(pool, type), convert_one_(type) {}
+ : PrimitiveConverter(pool, type) {
+ ParserAdapter<T>::InitParser(*type);
+ }
Status Convert(const std::shared_ptr<Array>& in, std::shared_ptr<Array>*
out) override {
if (in->type_id() == Type::NA) {
@@ -122,7 +148,7 @@ class NumericConverter : public PrimitiveConverter {
auto visit_valid = [&](string_view repr) {
value_type value;
- if (!convert_one_(repr.data(), repr.size(), &value)) {
+ if (!ParserAdapter<T>::ConvertOne(repr.data(), repr.size(), &value)) {
return GenericConversionError(*out_type_, ", couldn't parse:", repr);
}
@@ -138,8 +164,6 @@ class NumericConverter : public PrimitiveConverter {
RETURN_NOT_OK(VisitDictionaryEntries(dict_array, visit_valid, visit_null));
return builder.Finish(out);
}
-
- internal::StringConverter<T> convert_one_;
};
template <typename DateTimeType>
diff --git a/cpp/src/arrow/python/deserialize.cc
b/cpp/src/arrow/python/deserialize.cc
index 418eb87..a39d1d0 100644
--- a/cpp/src/arrow/python/deserialize.cc
+++ b/cpp/src/arrow/python/deserialize.cc
@@ -50,7 +50,7 @@
namespace arrow {
using internal::checked_cast;
-using internal::StringConverter;
+using internal::ParseValue;
namespace py {
@@ -232,10 +232,9 @@ Status GetPythonTypes(const UnionArray& data,
std::vector<int8_t>* result) {
ARROW_CHECK(result != nullptr);
auto type = data.type();
for (int i = 0; i < type->num_children(); ++i) {
- StringConverter<Int8Type> converter;
int8_t tag = 0;
const std::string& data = type->child(i)->name();
- if (!converter(data.c_str(), data.size(), &tag)) {
+ if (!ParseValue<Int8Type>(data.c_str(), data.size(), &tag)) {
return Status::SerializationError("Cannot convert string: \"",
type->child(i)->name(), "\" to
int8_t");
}
diff --git a/cpp/src/arrow/scalar.cc b/cpp/src/arrow/scalar.cc
index 23234a0..a4fb02b 100644
--- a/cpp/src/arrow/scalar.cc
+++ b/cpp/src/arrow/scalar.cc
@@ -220,16 +220,25 @@ std::string Scalar::ToString() const {
}
struct ScalarParseImpl {
- template <typename T, typename Converter = internal::StringConverter<T>,
- typename Value = typename Converter::value_type>
+ // XXX Use of detail here not ideal
+ template <typename T,
+ typename Value = typename
internal::detail::StringConverter<T>::value_type>
Status Visit(const T& t) {
Value value;
- if (!Converter{type_}(s_.data(), s_.size(), &value)) {
+ if (!internal::ParseValue<T>(s_.data(), s_.size(), &value)) {
return Status::Invalid("error parsing '", s_, "' as scalar of type ", t);
}
return Finish(std::move(value));
}
+ Status Visit(const TimestampType& t) {
+ int64_t value;
+ if (!internal::ParseTimestampISO8601(s_.data(), s_.size(), t.unit(),
&value)) {
+ return Status::Invalid("error parsing '", s_, "' as scalar of type ", t);
+ }
+ return Finish(value);
+ }
+
Status Visit(const BinaryType&) { return FinishWithBuffer(); }
Status Visit(const LargeBinaryType&) { return FinishWithBuffer(); }
diff --git a/cpp/src/arrow/util/decimal.cc b/cpp/src/arrow/util/decimal.cc
index bda0ea3..354f979 100644
--- a/cpp/src/arrow/util/decimal.cc
+++ b/cpp/src/arrow/util/decimal.cc
@@ -180,12 +180,11 @@ static constexpr int64_t kPowersOfTen[kInt64DecimalDigits
+ 1] = {1LL,
// the appropriate power of 10 necessary to add source parsed as uint64 and
// then adds the parsed value of source.
static inline void ShiftAndAdd(const char* data, size_t length, Decimal128*
out) {
- internal::StringConverter<Int64Type> converter;
for (size_t posn = 0; posn < length;) {
const size_t group_size = std::min(kInt64DecimalDigits, length - posn);
const int64_t multiple = kPowersOfTen[group_size];
int64_t chunk = 0;
- ARROW_CHECK(converter(data + posn, group_size, &chunk));
+ ARROW_CHECK(internal::ParseValue<Int64Type>(data + posn, group_size,
&chunk));
*out *= multiple;
*out += chunk;
@@ -275,8 +274,7 @@ bool ParseDecimalComponents(const char* s, size_t size,
DecimalComponents* out)
++pos;
}
out->has_exponent = true;
- internal::StringConverter<Int32Type> exponent_converter;
- return exponent_converter(s + pos, size - pos, &(out->exponent));
+ return internal::ParseValue<Int32Type>(s + pos, size - pos,
&(out->exponent));
}
return pos == size;
}
diff --git a/cpp/src/arrow/util/uri.cc b/cpp/src/arrow/util/uri.cc
index a9bf0c5..b116a13 100644
--- a/cpp/src/arrow/util/uri.cc
+++ b/cpp/src/arrow/util/uri.cc
@@ -255,9 +255,8 @@ Status Uri::Parse(const std::string& uri_string) {
// Parse port number
auto port_text = TextRangeToView(impl_->uri_.portText);
if (port_text.size()) {
- StringConverter<UInt16Type> port_converter;
uint16_t port_num;
- if (!port_converter(port_text.data(), port_text.size(), &port_num)) {
+ if (!ParseValue<UInt16Type>(port_text.data(), port_text.size(),
&port_num)) {
return Status::Invalid("Invalid port number '", port_text, "' in URI '",
uri_string,
"'");
}
diff --git a/cpp/src/arrow/util/value_parsing.cc
b/cpp/src/arrow/util/value_parsing.cc
index 0fe7e62..008590d 100644
--- a/cpp/src/arrow/util/value_parsing.cc
+++ b/cpp/src/arrow/util/value_parsing.cc
@@ -24,9 +24,10 @@
namespace arrow {
namespace internal {
+namespace detail {
-struct StringToFloatConverter::Impl {
- Impl()
+struct StringToFloatConverterImpl {
+ StringToFloatConverterImpl()
: main_converter_(flags_, main_junk_value_, main_junk_value_, "inf",
"nan"),
fallback_converter_(flags_, fallback_junk_value_,
fallback_junk_value_, "inf",
"nan") {}
@@ -43,23 +44,18 @@ struct StringToFloatConverter::Impl {
util::double_conversion::StringToDoubleConverter fallback_converter_;
};
-constexpr int StringToFloatConverter::Impl::flags_;
-constexpr double StringToFloatConverter::Impl::main_junk_value_;
-constexpr double StringToFloatConverter::Impl::fallback_junk_value_;
+static const StringToFloatConverterImpl g_string_to_float;
-StringToFloatConverter::StringToFloatConverter() : impl_(new Impl()) {}
-
-StringToFloatConverter::~StringToFloatConverter() {}
-
-bool StringToFloatConverter::StringToFloat(const char* s, size_t length,
float* out) {
+bool StringToFloat(const char* s, size_t length, float* out) {
int processed_length;
float v;
- v = impl_->main_converter_.StringToFloat(s, static_cast<int>(length),
- &processed_length);
- if (ARROW_PREDICT_FALSE(v == static_cast<float>(impl_->main_junk_value_))) {
- v = impl_->fallback_converter_.StringToFloat(s, static_cast<int>(length),
- &processed_length);
- if (ARROW_PREDICT_FALSE(v ==
static_cast<float>(impl_->fallback_junk_value_))) {
+ v = g_string_to_float.main_converter_.StringToFloat(s,
static_cast<int>(length),
+ &processed_length);
+ if (ARROW_PREDICT_FALSE(v ==
static_cast<float>(g_string_to_float.main_junk_value_))) {
+ v = g_string_to_float.fallback_converter_.StringToFloat(s,
static_cast<int>(length),
+ &processed_length);
+ if (ARROW_PREDICT_FALSE(v ==
+
static_cast<float>(g_string_to_float.fallback_junk_value_))) {
return false;
}
}
@@ -67,15 +63,15 @@ bool StringToFloatConverter::StringToFloat(const char* s,
size_t length, float*
return true;
}
-bool StringToFloatConverter::StringToFloat(const char* s, size_t length,
double* out) {
+bool StringToFloat(const char* s, size_t length, double* out) {
int processed_length;
double v;
- v = impl_->main_converter_.StringToDouble(s, static_cast<int>(length),
- &processed_length);
- if (ARROW_PREDICT_FALSE(v == impl_->main_junk_value_)) {
- v = impl_->fallback_converter_.StringToDouble(s, static_cast<int>(length),
- &processed_length);
- if (ARROW_PREDICT_FALSE(v == impl_->fallback_junk_value_)) {
+ v = g_string_to_float.main_converter_.StringToDouble(s,
static_cast<int>(length),
+ &processed_length);
+ if (ARROW_PREDICT_FALSE(v == g_string_to_float.main_junk_value_)) {
+ v = g_string_to_float.fallback_converter_.StringToDouble(s,
static_cast<int>(length),
+
&processed_length);
+ if (ARROW_PREDICT_FALSE(v == g_string_to_float.fallback_junk_value_)) {
return false;
}
}
@@ -83,6 +79,8 @@ bool StringToFloatConverter::StringToFloat(const char* s,
size_t length, double*
return true;
}
+} // namespace detail
+
// ----------------------------------------------------------------------
// strptime-like parsing
diff --git a/cpp/src/arrow/util/value_parsing.h
b/cpp/src/arrow/util/value_parsing.h
index d797891..1f6c80e 100644
--- a/cpp/src/arrow/util/value_parsing.h
+++ b/cpp/src/arrow/util/value_parsing.h
@@ -55,26 +55,16 @@ class ARROW_EXPORT TimestampParser {
namespace internal {
-/// \brief A class providing conversion from strings to some Arrow data types
-///
-/// Conversion is triggered by calling operator(). It returns true on
-/// success, false on failure.
-///
-/// The class may have a non-trivial construction cost in some cases,
-/// so it's recommended to use a single instance many times, if doing bulk
-/// conversion. Instances of this class are not guaranteed to be thread-safe.
-///
+namespace detail {
+
template <typename ARROW_TYPE, typename Enable = void>
-class StringConverter;
+struct StringConverter;
template <>
-class StringConverter<BooleanType> {
- public:
- explicit StringConverter(const std::shared_ptr<DataType>& = NULLPTR) {}
-
+struct StringConverter<BooleanType> {
using value_type = bool;
- bool operator()(const char* s, size_t length, value_type* out) {
+ static bool Convert(const char* s, size_t length, value_type* out) {
if (length == 1) {
// "0" or "1"?
if (s[0] == '0') {
@@ -109,45 +99,32 @@ class StringConverter<BooleanType> {
// - https://github.com/google/double-conversion [used here]
// - https://github.com/achan001/dtoa-fast
-class ARROW_EXPORT StringToFloatConverter {
- public:
- StringToFloatConverter();
- ~StringToFloatConverter();
-
- bool StringToFloat(const char* s, size_t length, float* out);
- bool StringToFloat(const char* s, size_t length, double* out);
+ARROW_EXPORT
+bool StringToFloat(const char* s, size_t length, float* out);
- protected:
- struct Impl;
- std::unique_ptr<Impl> impl_;
-};
+ARROW_EXPORT
+bool StringToFloat(const char* s, size_t length, double* out);
-template <class ARROW_TYPE>
-class StringToFloatConverterMixin : public StringToFloatConverter {
- public:
- using value_type = typename ARROW_TYPE::c_type;
-
- explicit StringToFloatConverterMixin(const std::shared_ptr<DataType>& =
NULLPTR) {}
+template <>
+struct StringConverter<FloatType> {
+ using value_type = float;
- bool operator()(const char* s, size_t length, value_type* out) {
+ static bool Convert(const char* s, size_t length, value_type* out) {
return ARROW_PREDICT_TRUE(StringToFloat(s, length, out));
}
};
template <>
-class StringConverter<FloatType> : public
StringToFloatConverterMixin<FloatType> {
- using StringToFloatConverterMixin<FloatType>::StringToFloatConverterMixin;
-};
+struct StringConverter<DoubleType> {
+ using value_type = double;
-template <>
-class StringConverter<DoubleType> : public
StringToFloatConverterMixin<DoubleType> {
- using StringToFloatConverterMixin<DoubleType>::StringToFloatConverterMixin;
+ static bool Convert(const char* s, size_t length, value_type* out) {
+ return ARROW_PREDICT_TRUE(StringToFloat(s, length, out));
+ }
};
// NOTE: HalfFloatType would require a half<->float conversion library
-namespace detail {
-
inline uint8_t ParseDecimalDigit(char c) { return static_cast<uint8_t>(c -
'0'); }
#define PARSE_UNSIGNED_ITERATION(C_TYPE) \
@@ -261,17 +238,11 @@ inline bool ParseUnsigned(const char* s, size_t length,
uint64_t* out) {
#undef PARSE_UNSIGNED_ITERATION
#undef PARSE_UNSIGNED_ITERATION_LAST
-} // namespace detail
-
template <class ARROW_TYPE>
-class StringToUnsignedIntConverterMixin {
- public:
+struct StringToUnsignedIntConverterMixin {
using value_type = typename ARROW_TYPE::c_type;
- explicit StringToUnsignedIntConverterMixin(const std::shared_ptr<DataType>&
= NULLPTR) {
- }
-
- bool operator()(const char* s, size_t length, value_type* out) {
+ static bool Convert(const char* s, size_t length, value_type* out) {
if (ARROW_PREDICT_FALSE(length == 0)) {
return false;
}
@@ -280,39 +251,39 @@ class StringToUnsignedIntConverterMixin {
length--;
s++;
}
- return detail::ParseUnsigned(s, length, out);
+ return ParseUnsigned(s, length, out);
}
};
template <>
-class StringConverter<UInt8Type> : public
StringToUnsignedIntConverterMixin<UInt8Type> {
+struct StringConverter<UInt8Type> : public
StringToUnsignedIntConverterMixin<UInt8Type> {
using
StringToUnsignedIntConverterMixin<UInt8Type>::StringToUnsignedIntConverterMixin;
};
template <>
-class StringConverter<UInt16Type> : public
StringToUnsignedIntConverterMixin<UInt16Type> {
+struct StringConverter<UInt16Type>
+ : public StringToUnsignedIntConverterMixin<UInt16Type> {
using
StringToUnsignedIntConverterMixin<UInt16Type>::StringToUnsignedIntConverterMixin;
};
template <>
-class StringConverter<UInt32Type> : public
StringToUnsignedIntConverterMixin<UInt32Type> {
+struct StringConverter<UInt32Type>
+ : public StringToUnsignedIntConverterMixin<UInt32Type> {
using
StringToUnsignedIntConverterMixin<UInt32Type>::StringToUnsignedIntConverterMixin;
};
template <>
-class StringConverter<UInt64Type> : public
StringToUnsignedIntConverterMixin<UInt64Type> {
+struct StringConverter<UInt64Type>
+ : public StringToUnsignedIntConverterMixin<UInt64Type> {
using
StringToUnsignedIntConverterMixin<UInt64Type>::StringToUnsignedIntConverterMixin;
};
template <class ARROW_TYPE>
-class StringToSignedIntConverterMixin {
- public:
+struct StringToSignedIntConverterMixin {
using value_type = typename ARROW_TYPE::c_type;
using unsigned_type = typename std::make_unsigned<value_type>::type;
- explicit StringToSignedIntConverterMixin(const std::shared_ptr<DataType>& =
NULLPTR) {}
-
- bool operator()(const char* s, size_t length, value_type* out) {
+ static bool Convert(const char* s, size_t length, value_type* out) {
static constexpr unsigned_type max_positive =
static_cast<unsigned_type>(std::numeric_limits<value_type>::max());
// Assuming two's complement
@@ -335,7 +306,7 @@ class StringToSignedIntConverterMixin {
length--;
s++;
}
- if (!ARROW_PREDICT_TRUE(detail::ParseUnsigned(s, length,
&unsigned_value))) {
+ if (!ARROW_PREDICT_TRUE(ParseUnsigned(s, length, &unsigned_value))) {
return false;
}
if (negative) {
@@ -357,29 +328,27 @@ class StringToSignedIntConverterMixin {
};
template <>
-class StringConverter<Int8Type> : public
StringToSignedIntConverterMixin<Int8Type> {
+struct StringConverter<Int8Type> : public
StringToSignedIntConverterMixin<Int8Type> {
using
StringToSignedIntConverterMixin<Int8Type>::StringToSignedIntConverterMixin;
};
template <>
-class StringConverter<Int16Type> : public
StringToSignedIntConverterMixin<Int16Type> {
+struct StringConverter<Int16Type> : public
StringToSignedIntConverterMixin<Int16Type> {
using
StringToSignedIntConverterMixin<Int16Type>::StringToSignedIntConverterMixin;
};
template <>
-class StringConverter<Int32Type> : public
StringToSignedIntConverterMixin<Int32Type> {
+struct StringConverter<Int32Type> : public
StringToSignedIntConverterMixin<Int32Type> {
using
StringToSignedIntConverterMixin<Int32Type>::StringToSignedIntConverterMixin;
};
template <>
-class StringConverter<Int64Type> : public
StringToSignedIntConverterMixin<Int64Type> {
+struct StringConverter<Int64Type> : public
StringToSignedIntConverterMixin<Int64Type> {
using
StringToSignedIntConverterMixin<Int64Type>::StringToSignedIntConverterMixin;
};
// Inline-able ISO-8601 parser
-namespace detail {
-
using ts_type = TimestampType::c_type;
template <class TimePoint>
@@ -484,6 +453,14 @@ static inline bool ParseHH_MM_SS(const char* s,
std::chrono::duration<ts_type>*
} // namespace detail
+/// \brief Attempt to convert a string to the primitive type corresponding to
+/// an Arrow data type
+template <typename T, typename ParseContext = void>
+inline bool ParseValue(const char* s, size_t length, typename T::c_type* out,
+ const ParseContext* ctx = NULLPTR) {
+ return detail::StringConverter<T>::Convert(s, length, out);
+}
+
static inline bool ParseTimestampISO8601(const char* s, size_t length,
TimeUnit::type unit,
TimestampType::c_type* out) {
@@ -584,22 +561,16 @@ static inline bool ParseTimestampStrptime(const char*
buf, size_t length,
return true;
}
-// A StringConverter that parses ISO8601 at a fixed unit
-template <>
-class StringConverter<TimestampType> {
- public:
- using value_type = TimestampType::c_type;
-
- explicit StringConverter(const std::shared_ptr<DataType>& type)
- : unit_(checked_cast<TimestampType*>(type.get())->unit()) {}
-
- bool operator()(const char* s, size_t length, value_type* out) {
- return ParseTimestampISO8601(s, length, unit_, out);
- }
-
- private:
- const TimeUnit::type unit_;
+/// \brief Parsing options for timestamps
+struct ParseTimestampContext {
+ TimeUnit::type unit;
};
+template <>
+inline bool ParseValue<TimestampType, ParseTimestampContext>(
+ const char* s, size_t length, int64_t* out, const ParseTimestampContext*
ctx) {
+ return ParseTimestampISO8601(s, length, ctx->unit, out);
+}
+
} // namespace internal
} // namespace arrow
diff --git a/cpp/src/arrow/util/value_parsing_benchmark.cc
b/cpp/src/arrow/util/value_parsing_benchmark.cc
index f09c016..c113c24 100644
--- a/cpp/src/arrow/util/value_parsing_benchmark.cc
+++ b/cpp/src/arrow/util/value_parsing_benchmark.cc
@@ -107,13 +107,12 @@ static std::vector<c_float> MakeFloats(int32_t num_items)
{
template <typename ARROW_TYPE, typename C_TYPE = typename ARROW_TYPE::c_type>
static void IntegerParsing(benchmark::State& state) { // NOLINT non-const
reference
auto strings = MakeIntStrings<C_TYPE>(1000);
- StringConverter<ARROW_TYPE> converter;
while (state.KeepRunning()) {
C_TYPE total = 0;
for (const auto& s : strings) {
C_TYPE value;
- if (!converter(s.data(), s.length(), &value)) {
+ if (!ParseValue<ARROW_TYPE>(s.data(), s.length(), &value)) {
std::cerr << "Conversion failed for '" << s << "'";
std::abort();
}
@@ -127,13 +126,12 @@ static void IntegerParsing(benchmark::State& state) { //
NOLINT non-const refer
template <typename ARROW_TYPE, typename C_TYPE = typename ARROW_TYPE::c_type>
static void FloatParsing(benchmark::State& state) { // NOLINT non-const
reference
auto strings = MakeFloatStrings(1000);
- StringConverter<ARROW_TYPE> converter;
while (state.KeepRunning()) {
C_TYPE total = 0;
for (const auto& s : strings) {
C_TYPE value;
- if (!converter(s.data(), s.length(), &value)) {
+ if (!ParseValue<ARROW_TYPE>(s.data(), s.length(), &value)) {
std::cerr << "Conversion failed for '" << s << "'";
std::abort();
}
diff --git a/cpp/src/arrow/util/value_parsing_test.cc
b/cpp/src/arrow/util/value_parsing_test.cc
index 830ba67..2574401 100644
--- a/cpp/src/arrow/util/value_parsing_test.cc
+++ b/cpp/src/arrow/util/value_parsing_test.cc
@@ -27,58 +27,53 @@
namespace arrow {
namespace internal {
-template <typename ConverterType, typename C_TYPE>
-void AssertConversion(ConverterType& converter, const std::string& s, C_TYPE
expected) {
- typename ConverterType::value_type out;
- ASSERT_TRUE(converter(s.data(), s.length(), &out))
+template <typename T, typename Context = void>
+void AssertConversion(const std::string& s, typename T::c_type expected,
+ const Context* ctx = NULLPTR) {
+ typename T::c_type out;
+ ASSERT_TRUE(ParseValue<T>(s.data(), s.length(), &out, ctx))
<< "Conversion failed for '" << s << "' (expected to return " <<
expected << ")";
ASSERT_EQ(out, expected) << "Conversion failed for '" << s << "'";
}
-template <typename ConverterType>
-void AssertConversionFails(ConverterType& converter, const std::string& s) {
- typename ConverterType::value_type out;
- ASSERT_FALSE(converter(s.data(), s.length(), &out))
+template <typename T, typename Context = void>
+void AssertConversionFails(const std::string& s, const Context* ctx = NULLPTR)
{
+ typename T::c_type out;
+ ASSERT_FALSE(ParseValue<T>(s.data(), s.length(), &out, ctx))
<< "Conversion should have failed for '" << s << "' (returned " << out
<< ")";
}
TEST(StringConversion, ToBoolean) {
- StringConverter<BooleanType> converter;
-
- AssertConversion(converter, "true", true);
- AssertConversion(converter, "tRuE", true);
- AssertConversion(converter, "FAlse", false);
- AssertConversion(converter, "false", false);
- AssertConversion(converter, "1", true);
- AssertConversion(converter, "0", false);
-
- AssertConversionFails(converter, "");
+ AssertConversion<BooleanType>("true", true);
+ AssertConversion<BooleanType>("tRuE", true);
+ AssertConversion<BooleanType>("FAlse", false);
+ AssertConversion<BooleanType>("false", false);
+ AssertConversion<BooleanType>("1", true);
+ AssertConversion<BooleanType>("0", false);
+
+ AssertConversionFails<BooleanType>("");
}
TEST(StringConversion, ToFloat) {
- StringConverter<FloatType> converter;
-
- AssertConversion(converter, "1.5", 1.5f);
- AssertConversion(converter, "0", 0.0f);
+ AssertConversion<FloatType>("1.5", 1.5f);
+ AssertConversion<FloatType>("0", 0.0f);
// XXX ASSERT_EQ doesn't distinguish signed zeros
- AssertConversion(converter, "-0.0", -0.0f);
- AssertConversion(converter, "-1e20", -1e20f);
+ AssertConversion<FloatType>("-0.0", -0.0f);
+ AssertConversion<FloatType>("-1e20", -1e20f);
- AssertConversionFails(converter, "");
- AssertConversionFails(converter, "e");
+ AssertConversionFails<FloatType>("");
+ AssertConversionFails<FloatType>("e");
}
TEST(StringConversion, ToDouble) {
- StringConverter<DoubleType> converter;
-
- AssertConversion(converter, "1.5", 1.5);
- AssertConversion(converter, "0", 0);
+ AssertConversion<DoubleType>("1.5", 1.5);
+ AssertConversion<DoubleType>("0", 0);
// XXX ASSERT_EQ doesn't distinguish signed zeros
- AssertConversion(converter, "-0.0", -0.0);
- AssertConversion(converter, "-1e100", -1e100);
+ AssertConversion<DoubleType>("-0.0", -0.0);
+ AssertConversion<DoubleType>("-1e100", -1e100);
- AssertConversionFails(converter, "");
- AssertConversionFails(converter, "e");
+ AssertConversionFails<DoubleType>("");
+ AssertConversionFails<DoubleType>("e");
}
#if !defined(_WIN32) || defined(NDEBUG)
@@ -87,274 +82,256 @@ TEST(StringConversion, ToFloatLocale) {
// French locale uses the comma as decimal point
LocaleGuard locale_guard("fr_FR.UTF-8");
- StringConverter<FloatType> converter;
- AssertConversion(converter, "1.5", 1.5f);
+ AssertConversion<FloatType>("1.5", 1.5f);
}
TEST(StringConversion, ToDoubleLocale) {
// French locale uses the comma as decimal point
LocaleGuard locale_guard("fr_FR.UTF-8");
- StringConverter<DoubleType> converter;
- AssertConversion(converter, "1.5", 1.5f);
+ AssertConversion<DoubleType>("1.5", 1.5f);
}
#endif // _WIN32
TEST(StringConversion, ToInt8) {
- StringConverter<Int8Type> converter;
-
- AssertConversion(converter, "0", 0);
- AssertConversion(converter, "127", 127);
- AssertConversion(converter, "0127", 127);
- AssertConversion(converter, "-128", -128);
- AssertConversion(converter, "-00128", -128);
+ AssertConversion<Int8Type>("0", 0);
+ AssertConversion<Int8Type>("127", 127);
+ AssertConversion<Int8Type>("0127", 127);
+ AssertConversion<Int8Type>("-128", -128);
+ AssertConversion<Int8Type>("-00128", -128);
// Non-representable values
- AssertConversionFails(converter, "128");
- AssertConversionFails(converter, "-129");
+ AssertConversionFails<Int8Type>("128");
+ AssertConversionFails<Int8Type>("-129");
- AssertConversionFails(converter, "");
- AssertConversionFails(converter, "-");
- AssertConversionFails(converter, "0.0");
- AssertConversionFails(converter, "e");
+ AssertConversionFails<Int8Type>("");
+ AssertConversionFails<Int8Type>("-");
+ AssertConversionFails<Int8Type>("0.0");
+ AssertConversionFails<Int8Type>("e");
}
TEST(StringConversion, ToUInt8) {
- StringConverter<UInt8Type> converter;
-
- AssertConversion(converter, "0", 0);
- AssertConversion(converter, "26", 26);
- AssertConversion(converter, "255", 255);
- AssertConversion(converter, "0255", 255);
+ AssertConversion<UInt8Type>("0", 0);
+ AssertConversion<UInt8Type>("26", 26);
+ AssertConversion<UInt8Type>("255", 255);
+ AssertConversion<UInt8Type>("0255", 255);
// Non-representable values
- AssertConversionFails(converter, "-1");
- AssertConversionFails(converter, "256");
- AssertConversionFails(converter, "260");
- AssertConversionFails(converter, "1234");
-
- AssertConversionFails(converter, "");
- AssertConversionFails(converter, "-");
- AssertConversionFails(converter, "0.0");
- AssertConversionFails(converter, "e");
+ AssertConversionFails<UInt8Type>("-1");
+ AssertConversionFails<UInt8Type>("256");
+ AssertConversionFails<UInt8Type>("260");
+ AssertConversionFails<UInt8Type>("1234");
+
+ AssertConversionFails<UInt8Type>("");
+ AssertConversionFails<UInt8Type>("-");
+ AssertConversionFails<UInt8Type>("0.0");
+ AssertConversionFails<UInt8Type>("e");
}
TEST(StringConversion, ToInt16) {
- StringConverter<Int16Type> converter;
-
- AssertConversion(converter, "0", 0);
- AssertConversion(converter, "32767", 32767);
- AssertConversion(converter, "032767", 32767);
- AssertConversion(converter, "-32768", -32768);
- AssertConversion(converter, "-0032768", -32768);
+ AssertConversion<Int16Type>("0", 0);
+ AssertConversion<Int16Type>("32767", 32767);
+ AssertConversion<Int16Type>("032767", 32767);
+ AssertConversion<Int16Type>("-32768", -32768);
+ AssertConversion<Int16Type>("-0032768", -32768);
// Non-representable values
- AssertConversionFails(converter, "32768");
- AssertConversionFails(converter, "-32769");
+ AssertConversionFails<Int16Type>("32768");
+ AssertConversionFails<Int16Type>("-32769");
- AssertConversionFails(converter, "");
- AssertConversionFails(converter, "-");
- AssertConversionFails(converter, "0.0");
- AssertConversionFails(converter, "e");
+ AssertConversionFails<Int16Type>("");
+ AssertConversionFails<Int16Type>("-");
+ AssertConversionFails<Int16Type>("0.0");
+ AssertConversionFails<Int16Type>("e");
}
TEST(StringConversion, ToUInt16) {
- StringConverter<UInt16Type> converter;
-
- AssertConversion(converter, "0", 0);
- AssertConversion(converter, "6660", 6660);
- AssertConversion(converter, "65535", 65535);
- AssertConversion(converter, "065535", 65535);
+ AssertConversion<UInt16Type>("0", 0);
+ AssertConversion<UInt16Type>("6660", 6660);
+ AssertConversion<UInt16Type>("65535", 65535);
+ AssertConversion<UInt16Type>("065535", 65535);
// Non-representable values
- AssertConversionFails(converter, "-1");
- AssertConversionFails(converter, "65536");
- AssertConversionFails(converter, "123456");
-
- AssertConversionFails(converter, "");
- AssertConversionFails(converter, "-");
- AssertConversionFails(converter, "0.0");
- AssertConversionFails(converter, "e");
+ AssertConversionFails<UInt16Type>("-1");
+ AssertConversionFails<UInt16Type>("65536");
+ AssertConversionFails<UInt16Type>("123456");
+
+ AssertConversionFails<UInt16Type>("");
+ AssertConversionFails<UInt16Type>("-");
+ AssertConversionFails<UInt16Type>("0.0");
+ AssertConversionFails<UInt16Type>("e");
}
TEST(StringConversion, ToInt32) {
- StringConverter<Int32Type> converter;
-
- AssertConversion(converter, "0", 0);
- AssertConversion(converter, "2147483647", 2147483647);
- AssertConversion(converter, "02147483647", 2147483647);
- AssertConversion(converter, "-2147483648", -2147483648LL);
- AssertConversion(converter, "-002147483648", -2147483648LL);
+ AssertConversion<Int32Type>("0", 0);
+ AssertConversion<Int32Type>("2147483647", 2147483647);
+ AssertConversion<Int32Type>("02147483647", 2147483647);
+ AssertConversion<Int32Type>("-2147483648", -2147483648LL);
+ AssertConversion<Int32Type>("-002147483648", -2147483648LL);
// Non-representable values
- AssertConversionFails(converter, "2147483648");
- AssertConversionFails(converter, "-2147483649");
+ AssertConversionFails<Int32Type>("2147483648");
+ AssertConversionFails<Int32Type>("-2147483649");
- AssertConversionFails(converter, "");
- AssertConversionFails(converter, "-");
- AssertConversionFails(converter, "0.0");
- AssertConversionFails(converter, "e");
+ AssertConversionFails<Int32Type>("");
+ AssertConversionFails<Int32Type>("-");
+ AssertConversionFails<Int32Type>("0.0");
+ AssertConversionFails<Int32Type>("e");
}
TEST(StringConversion, ToUInt32) {
- StringConverter<UInt32Type> converter;
-
- AssertConversion(converter, "0", 0);
- AssertConversion(converter, "432198765", 432198765UL);
- AssertConversion(converter, "4294967295", 4294967295UL);
- AssertConversion(converter, "04294967295", 4294967295UL);
+ AssertConversion<UInt32Type>("0", 0);
+ AssertConversion<UInt32Type>("432198765", 432198765UL);
+ AssertConversion<UInt32Type>("4294967295", 4294967295UL);
+ AssertConversion<UInt32Type>("04294967295", 4294967295UL);
// Non-representable values
- AssertConversionFails(converter, "-1");
- AssertConversionFails(converter, "4294967296");
- AssertConversionFails(converter, "12345678901");
-
- AssertConversionFails(converter, "");
- AssertConversionFails(converter, "-");
- AssertConversionFails(converter, "0.0");
- AssertConversionFails(converter, "e");
+ AssertConversionFails<UInt32Type>("-1");
+ AssertConversionFails<UInt32Type>("4294967296");
+ AssertConversionFails<UInt32Type>("12345678901");
+
+ AssertConversionFails<UInt32Type>("");
+ AssertConversionFails<UInt32Type>("-");
+ AssertConversionFails<UInt32Type>("0.0");
+ AssertConversionFails<UInt32Type>("e");
}
TEST(StringConversion, ToInt64) {
- StringConverter<Int64Type> converter;
-
- AssertConversion(converter, "0", 0);
- AssertConversion(converter, "9223372036854775807", 9223372036854775807LL);
- AssertConversion(converter, "09223372036854775807", 9223372036854775807LL);
- AssertConversion(converter, "-9223372036854775808", -9223372036854775807LL -
1);
- AssertConversion(converter, "-009223372036854775808", -9223372036854775807LL
- 1);
+ AssertConversion<Int64Type>("0", 0);
+ AssertConversion<Int64Type>("9223372036854775807", 9223372036854775807LL);
+ AssertConversion<Int64Type>("09223372036854775807", 9223372036854775807LL);
+ AssertConversion<Int64Type>("-9223372036854775808", -9223372036854775807LL -
1);
+ AssertConversion<Int64Type>("-009223372036854775808", -9223372036854775807LL
- 1);
// Non-representable values
- AssertConversionFails(converter, "9223372036854775808");
- AssertConversionFails(converter, "-9223372036854775809");
+ AssertConversionFails<Int64Type>("9223372036854775808");
+ AssertConversionFails<Int64Type>("-9223372036854775809");
- AssertConversionFails(converter, "");
- AssertConversionFails(converter, "-");
- AssertConversionFails(converter, "0.0");
- AssertConversionFails(converter, "e");
+ AssertConversionFails<Int64Type>("");
+ AssertConversionFails<Int64Type>("-");
+ AssertConversionFails<Int64Type>("0.0");
+ AssertConversionFails<Int64Type>("e");
}
TEST(StringConversion, ToUInt64) {
- StringConverter<UInt64Type> converter;
-
- AssertConversion(converter, "0", 0);
- AssertConversion(converter, "18446744073709551615", 18446744073709551615ULL);
+ AssertConversion<UInt64Type>("0", 0);
+ AssertConversion<UInt64Type>("18446744073709551615",
18446744073709551615ULL);
// Non-representable values
- AssertConversionFails(converter, "-1");
- AssertConversionFails(converter, "18446744073709551616");
+ AssertConversionFails<UInt64Type>("-1");
+ AssertConversionFails<UInt64Type>("18446744073709551616");
- AssertConversionFails(converter, "");
- AssertConversionFails(converter, "-");
- AssertConversionFails(converter, "0.0");
- AssertConversionFails(converter, "e");
+ AssertConversionFails<UInt64Type>("");
+ AssertConversionFails<UInt64Type>("-");
+ AssertConversionFails<UInt64Type>("0.0");
+ AssertConversionFails<UInt64Type>("e");
}
TEST(StringConversion, ToTimestampDate_ISO8601) {
{
- StringConverter<TimestampType> converter(timestamp(TimeUnit::SECOND));
+ ParseTimestampContext ctx{TimeUnit::SECOND};
- AssertConversion(converter, "1970-01-01", 0);
- AssertConversion(converter, "1989-07-14", 616377600);
- AssertConversion(converter, "2000-02-29", 951782400);
- AssertConversion(converter, "3989-07-14", 63730281600LL);
- AssertConversion(converter, "1900-02-28", -2203977600LL);
+ AssertConversion<TimestampType>("1970-01-01", 0, &ctx);
+ AssertConversion<TimestampType>("1989-07-14", 616377600, &ctx);
+ AssertConversion<TimestampType>("2000-02-29", 951782400, &ctx);
+ AssertConversion<TimestampType>("3989-07-14", 63730281600LL, &ctx);
+ AssertConversion<TimestampType>("1900-02-28", -2203977600LL, &ctx);
- AssertConversionFails(converter, "");
- AssertConversionFails(converter, "1970");
- AssertConversionFails(converter, "19700101");
- AssertConversionFails(converter, "1970/01/01");
- AssertConversionFails(converter, "1970-01-01 ");
- AssertConversionFails(converter, "1970-01-01Z");
+ AssertConversionFails<TimestampType>("", &ctx);
+ AssertConversionFails<TimestampType>("1970", &ctx);
+ AssertConversionFails<TimestampType>("19700101", &ctx);
+ AssertConversionFails<TimestampType>("1970/01/01", &ctx);
+ AssertConversionFails<TimestampType>("1970-01-01 ", &ctx);
+ AssertConversionFails<TimestampType>("1970-01-01Z", &ctx);
// Invalid dates
- AssertConversionFails(converter, "1970-00-01");
- AssertConversionFails(converter, "1970-13-01");
- AssertConversionFails(converter, "1970-01-32");
- AssertConversionFails(converter, "1970-02-29");
- AssertConversionFails(converter, "2100-02-29");
+ AssertConversionFails<TimestampType>("1970-00-01", &ctx);
+ AssertConversionFails<TimestampType>("1970-13-01", &ctx);
+ AssertConversionFails<TimestampType>("1970-01-32", &ctx);
+ AssertConversionFails<TimestampType>("1970-02-29", &ctx);
+ AssertConversionFails<TimestampType>("2100-02-29", &ctx);
}
{
- StringConverter<TimestampType> converter(timestamp(TimeUnit::MILLI));
+ ParseTimestampContext ctx{TimeUnit::MILLI};
- AssertConversion(converter, "1970-01-01", 0);
- AssertConversion(converter, "1989-07-14", 616377600000LL);
- AssertConversion(converter, "3989-07-14", 63730281600000LL);
- AssertConversion(converter, "1900-02-28", -2203977600000LL);
+ AssertConversion<TimestampType>("1970-01-01", 0, &ctx);
+ AssertConversion<TimestampType>("1989-07-14", 616377600000LL, &ctx);
+ AssertConversion<TimestampType>("3989-07-14", 63730281600000LL, &ctx);
+ AssertConversion<TimestampType>("1900-02-28", -2203977600000LL, &ctx);
}
{
- StringConverter<TimestampType> converter(timestamp(TimeUnit::MICRO));
+ ParseTimestampContext ctx{TimeUnit::MICRO};
- AssertConversion(converter, "1970-01-01", 0);
- AssertConversion(converter, "1989-07-14", 616377600000000LL);
- AssertConversion(converter, "3989-07-14", 63730281600000000LL);
- AssertConversion(converter, "1900-02-28", -2203977600000000LL);
+ AssertConversion<TimestampType>("1970-01-01", 0, &ctx);
+ AssertConversion<TimestampType>("1989-07-14", 616377600000000LL, &ctx);
+ AssertConversion<TimestampType>("3989-07-14", 63730281600000000LL, &ctx);
+ AssertConversion<TimestampType>("1900-02-28", -2203977600000000LL, &ctx);
}
{
- StringConverter<TimestampType> converter(timestamp(TimeUnit::NANO));
+ ParseTimestampContext ctx{TimeUnit::NANO};
- AssertConversion(converter, "1970-01-01", 0);
- AssertConversion(converter, "1989-07-14", 616377600000000000LL);
- AssertConversion(converter, "2018-11-13", 1542067200000000000LL);
- AssertConversion(converter, "1900-02-28", -2203977600000000000LL);
+ AssertConversion<TimestampType>("1970-01-01", 0, &ctx);
+ AssertConversion<TimestampType>("1989-07-14", 616377600000000000LL, &ctx);
+ AssertConversion<TimestampType>("2018-11-13", 1542067200000000000LL, &ctx);
+ AssertConversion<TimestampType>("1900-02-28", -2203977600000000000LL,
&ctx);
}
}
TEST(StringConversion, ToTimestampDateTime_ISO8601) {
{
- StringConverter<TimestampType> converter(timestamp(TimeUnit::SECOND));
-
- AssertConversion(converter, "1970-01-01 00:00:00", 0);
- AssertConversion(converter, "2018-11-13 17", 1542128400);
- AssertConversion(converter, "2018-11-13T17", 1542128400);
- AssertConversion(converter, "2018-11-13 17Z", 1542128400);
- AssertConversion(converter, "2018-11-13T17Z", 1542128400);
- AssertConversion(converter, "2018-11-13 17:11", 1542129060);
- AssertConversion(converter, "2018-11-13T17:11", 1542129060);
- AssertConversion(converter, "2018-11-13 17:11Z", 1542129060);
- AssertConversion(converter, "2018-11-13T17:11Z", 1542129060);
- AssertConversion(converter, "2018-11-13 17:11:10", 1542129070);
- AssertConversion(converter, "2018-11-13T17:11:10", 1542129070);
- AssertConversion(converter, "2018-11-13 17:11:10Z", 1542129070);
- AssertConversion(converter, "2018-11-13T17:11:10Z", 1542129070);
- AssertConversion(converter, "1900-02-28 12:34:56", -2203932304LL);
+ ParseTimestampContext ctx{TimeUnit::SECOND};
+
+ AssertConversion<TimestampType>("1970-01-01 00:00:00", 0, &ctx);
+ AssertConversion<TimestampType>("2018-11-13 17", 1542128400, &ctx);
+ AssertConversion<TimestampType>("2018-11-13T17", 1542128400, &ctx);
+ AssertConversion<TimestampType>("2018-11-13 17Z", 1542128400, &ctx);
+ AssertConversion<TimestampType>("2018-11-13T17Z", 1542128400, &ctx);
+ AssertConversion<TimestampType>("2018-11-13 17:11", 1542129060, &ctx);
+ AssertConversion<TimestampType>("2018-11-13T17:11", 1542129060, &ctx);
+ AssertConversion<TimestampType>("2018-11-13 17:11Z", 1542129060, &ctx);
+ AssertConversion<TimestampType>("2018-11-13T17:11Z", 1542129060, &ctx);
+ AssertConversion<TimestampType>("2018-11-13 17:11:10", 1542129070, &ctx);
+ AssertConversion<TimestampType>("2018-11-13T17:11:10", 1542129070, &ctx);
+ AssertConversion<TimestampType>("2018-11-13 17:11:10Z", 1542129070, &ctx);
+ AssertConversion<TimestampType>("2018-11-13T17:11:10Z", 1542129070, &ctx);
+ AssertConversion<TimestampType>("1900-02-28 12:34:56", -2203932304LL,
&ctx);
// Invalid dates
- AssertConversionFails(converter, "1970-02-29 00:00:00");
- AssertConversionFails(converter, "2100-02-29 00:00:00");
+ AssertConversionFails<TimestampType>("1970-02-29 00:00:00", &ctx);
+ AssertConversionFails<TimestampType>("2100-02-29 00:00:00", &ctx);
// Invalid times
- AssertConversionFails(converter, "1970-01-01 24");
- AssertConversionFails(converter, "1970-01-01 00:60");
- AssertConversionFails(converter, "1970-01-01 00,00");
- AssertConversionFails(converter, "1970-01-01 24:00:00");
- AssertConversionFails(converter, "1970-01-01 00:60:00");
- AssertConversionFails(converter, "1970-01-01 00:00:60");
- AssertConversionFails(converter, "1970-01-01 00:00,00");
- AssertConversionFails(converter, "1970-01-01 00,00:00");
+ AssertConversionFails<TimestampType>("1970-01-01 24", &ctx);
+ AssertConversionFails<TimestampType>("1970-01-01 00:60", &ctx);
+ AssertConversionFails<TimestampType>("1970-01-01 00,00", &ctx);
+ AssertConversionFails<TimestampType>("1970-01-01 24:00:00", &ctx);
+ AssertConversionFails<TimestampType>("1970-01-01 00:60:00", &ctx);
+ AssertConversionFails<TimestampType>("1970-01-01 00:00:60", &ctx);
+ AssertConversionFails<TimestampType>("1970-01-01 00:00,00", &ctx);
+ AssertConversionFails<TimestampType>("1970-01-01 00,00:00", &ctx);
}
{
- StringConverter<TimestampType> converter(timestamp(TimeUnit::MILLI));
+ ParseTimestampContext ctx{TimeUnit::MILLI};
- AssertConversion(converter, "2018-11-13 17:11:10", 1542129070000LL);
- AssertConversion(converter, "2018-11-13T17:11:10Z", 1542129070000LL);
- AssertConversion(converter, "3989-07-14T11:22:33Z", 63730322553000LL);
- AssertConversion(converter, "1900-02-28 12:34:56", -2203932304000LL);
+ AssertConversion<TimestampType>("2018-11-13 17:11:10", 1542129070000LL,
&ctx);
+ AssertConversion<TimestampType>("2018-11-13T17:11:10Z", 1542129070000LL,
&ctx);
+ AssertConversion<TimestampType>("3989-07-14T11:22:33Z", 63730322553000LL,
&ctx);
+ AssertConversion<TimestampType>("1900-02-28 12:34:56", -2203932304000LL,
&ctx);
}
{
- StringConverter<TimestampType> converter(timestamp(TimeUnit::MICRO));
+ ParseTimestampContext ctx{TimeUnit::MICRO};
- AssertConversion(converter, "2018-11-13 17:11:10", 1542129070000000LL);
- AssertConversion(converter, "2018-11-13T17:11:10Z", 1542129070000000LL);
- AssertConversion(converter, "3989-07-14T11:22:33Z", 63730322553000000LL);
- AssertConversion(converter, "1900-02-28 12:34:56", -2203932304000000LL);
+ AssertConversion<TimestampType>("2018-11-13 17:11:10", 1542129070000000LL,
&ctx);
+ AssertConversion<TimestampType>("2018-11-13T17:11:10Z",
1542129070000000LL, &ctx);
+ AssertConversion<TimestampType>("3989-07-14T11:22:33Z",
63730322553000000LL, &ctx);
+ AssertConversion<TimestampType>("1900-02-28 12:34:56",
-2203932304000000LL, &ctx);
}
{
- StringConverter<TimestampType> converter(timestamp(TimeUnit::NANO));
+ ParseTimestampContext ctx{TimeUnit::NANO};
- AssertConversion(converter, "2018-11-13 17:11:10", 1542129070000000000LL);
- AssertConversion(converter, "2018-11-13T17:11:10Z", 1542129070000000000LL);
- AssertConversion(converter, "1900-02-28 12:34:56", -2203932304000000000LL);
+ AssertConversion<TimestampType>("2018-11-13 17:11:10",
1542129070000000000LL, &ctx);
+ AssertConversion<TimestampType>("2018-11-13T17:11:10Z",
1542129070000000000LL, &ctx);
+ AssertConversion<TimestampType>("1900-02-28 12:34:56",
-2203932304000000000LL, &ctx);
}
}