This is an automated email from the ASF dual-hosted git repository.
gangwu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-cpp.git
The following commit(s) were added to refs/heads/main by this push:
new b15ac656 refactor: use std::from_chars instead of stoi/stoll/stod
(#556)
b15ac656 is described below
commit b15ac656cb20788a2103a3321c45f6676958ba69
Author: Zehua Zou <[email protected]>
AuthorDate: Mon Feb 9 12:45:11 2026 +0800
refactor: use std::from_chars instead of stoi/stoll/stod (#556)
---
src/iceberg/avro/avro_schema_util.cc | 9 ++++-----
src/iceberg/avro/avro_writer.cc | 2 +-
src/iceberg/json_serde.cc | 12 +++++++++---
src/iceberg/metrics_config.cc | 11 ++++++-----
src/iceberg/parquet/parquet_schema_util.cc | 13 +++++--------
src/iceberg/parquet/parquet_writer.cc | 2 +-
src/iceberg/result.h | 4 ++--
src/iceberg/snapshot.cc | 4 ++--
src/iceberg/test/update_properties_test.cc | 2 +-
src/iceberg/transform.cc | 4 +++-
src/iceberg/update/snapshot_update.cc | 12 ++++++------
src/iceberg/update/update_properties.cc | 2 +-
src/iceberg/util/config.h | 10 ++++++----
src/iceberg/util/decimal.cc | 9 ++++-----
src/iceberg/util/string_util.h | 18 ++++++++++++------
15 files changed, 63 insertions(+), 51 deletions(-)
diff --git a/src/iceberg/avro/avro_schema_util.cc
b/src/iceberg/avro/avro_schema_util.cc
index b19ffce7..75db6d8d 100644
--- a/src/iceberg/avro/avro_schema_util.cc
+++ b/src/iceberg/avro/avro_schema_util.cc
@@ -18,7 +18,6 @@
*/
#include <format>
-#include <mutex>
#include <sstream>
#include <string_view>
@@ -31,13 +30,12 @@
#include <avro/ValidSchema.hh>
#include "iceberg/avro/avro_constants.h"
-#include "iceberg/avro/avro_register.h"
#include "iceberg/avro/avro_schema_util_internal.h"
#include "iceberg/metadata_columns.h"
#include "iceberg/name_mapping.h"
#include "iceberg/schema.h"
#include "iceberg/schema_util_internal.h"
-#include "iceberg/util/formatter.h"
+#include "iceberg/util/formatter.h" // IWYU pragma: keep
#include "iceberg/util/macros.h"
#include "iceberg/util/string_util.h"
#include "iceberg/util/visit_type.h"
@@ -471,7 +469,7 @@ Result<int32_t> GetId(const ::avro::NodePtr& node, const
std::string& attr_name,
return InvalidSchema("Missing avro attribute: {}", attr_name);
}
- return StringUtils::ParseInt<int32_t>(id_str.value());
+ return StringUtils::ParseNumber<int32_t>(id_str.value());
}
Result<int32_t> GetElementId(const ::avro::NodePtr& node) {
@@ -729,7 +727,8 @@ Result<FieldProjection> ProjectMap(const MapType& map_type,
const auto& expected_value_field = map_type.value();
FieldProjection result;
- int32_t avro_key_id, avro_value_id;
+ int32_t avro_key_id;
+ int32_t avro_value_id;
::avro::NodePtr map_node;
if (avro_node->type() == ::avro::AVRO_MAP) {
diff --git a/src/iceberg/avro/avro_writer.cc b/src/iceberg/avro/avro_writer.cc
index 91b2a93f..350530b3 100644
--- a/src/iceberg/avro/avro_writer.cc
+++ b/src/iceberg/avro/avro_writer.cc
@@ -76,7 +76,7 @@ Result<std::optional<int32_t>> ParseCodecLevel(const
WriterProperties& propertie
if (level_str.empty()) {
return std::nullopt;
}
- ICEBERG_ASSIGN_OR_RAISE(auto level,
StringUtils::ParseInt<int32_t>(level_str));
+ ICEBERG_ASSIGN_OR_RAISE(auto level,
StringUtils::ParseNumber<int32_t>(level_str));
return level;
}
diff --git a/src/iceberg/json_serde.cc b/src/iceberg/json_serde.cc
index 004e8b3f..7d6c9ee2 100644
--- a/src/iceberg/json_serde.cc
+++ b/src/iceberg/json_serde.cc
@@ -47,6 +47,7 @@
#include "iceberg/util/formatter.h" // IWYU pragma: keep
#include "iceberg/util/json_util_internal.h"
#include "iceberg/util/macros.h"
+#include "iceberg/util/string_util.h"
#include "iceberg/util/timepoint.h"
namespace iceberg {
@@ -497,15 +498,20 @@ Result<std::unique_ptr<Type>> TypeFromJson(const
nlohmann::json& json) {
std::regex fixed_regex(R"(fixed\[\s*(\d+)\s*\])");
std::smatch match;
if (std::regex_match(type_str, match, fixed_regex)) {
- return std::make_unique<FixedType>(std::stoi(match[1].str()));
+ ICEBERG_ASSIGN_OR_RAISE(auto length,
+
StringUtils::ParseNumber<int32_t>(match[1].str()));
+ return std::make_unique<FixedType>(length);
}
return JsonParseError("Invalid fixed type: {}", type_str);
} else if (type_str.starts_with("decimal")) {
std::regex decimal_regex(R"(decimal\(\s*(\d+)\s*,\s*(\d+)\s*\))");
std::smatch match;
if (std::regex_match(type_str, match, decimal_regex)) {
- return std::make_unique<DecimalType>(std::stoi(match[1].str()),
- std::stoi(match[2].str()));
+ ICEBERG_ASSIGN_OR_RAISE(auto precision,
+
StringUtils::ParseNumber<int32_t>(match[1].str()));
+ ICEBERG_ASSIGN_OR_RAISE(auto scale,
+
StringUtils::ParseNumber<int32_t>(match[2].str()));
+ return std::make_unique<DecimalType>(precision, scale);
}
return JsonParseError("Invalid decimal type: {}", type_str);
} else {
diff --git a/src/iceberg/metrics_config.cc b/src/iceberg/metrics_config.cc
index 8e2c7262..e378640e 100644
--- a/src/iceberg/metrics_config.cc
+++ b/src/iceberg/metrics_config.cc
@@ -19,7 +19,6 @@
#include "iceberg/metrics_config.h"
-#include <charconv>
#include <string>
#include <unordered_map>
@@ -29,6 +28,8 @@
#include "iceberg/table.h"
#include "iceberg/table_properties.h"
#include "iceberg/util/checked_cast.h"
+#include "iceberg/util/macros.h"
+#include "iceberg/util/string_util.h"
#include "iceberg/util/type_util.h"
namespace iceberg {
@@ -84,12 +85,12 @@ Result<MetricsMode>
MetricsMode::FromString(std::string_view mode) {
}
if (StringUtils::StartsWithIgnoreCase(mode, kTruncatePrefix) &&
mode.ends_with(")")) {
- int32_t length;
- auto [ptr, ec] = std::from_chars(mode.data() + 9 /* "truncate(" length */,
- mode.data() + mode.size() - 1, length);
- if (ec != std::errc{}) {
+ auto res = StringUtils::ParseNumber<int32_t>(
+ mode.substr(9 /* "truncate(" length */, mode.size() - 10));
+ if (!res.has_value()) {
return InvalidArgument("Invalid truncate mode: {}", mode);
}
+ int32_t length = res.value();
if (length == kDefaultTruncateLength) {
return kDefaultMetricsMode;
}
diff --git a/src/iceberg/parquet/parquet_schema_util.cc
b/src/iceberg/parquet/parquet_schema_util.cc
index 088c15c0..e9574a48 100644
--- a/src/iceberg/parquet/parquet_schema_util.cc
+++ b/src/iceberg/parquet/parquet_schema_util.cc
@@ -17,8 +17,6 @@
* under the License.
*/
-#include <charconv>
-
#include <arrow/extension_type.h>
#include <arrow/type.h>
#include <arrow/type_fwd.h>
@@ -32,8 +30,9 @@
#include "iceberg/result.h"
#include "iceberg/schema_util_internal.h"
#include "iceberg/util/checked_cast.h"
-#include "iceberg/util/formatter.h"
+#include "iceberg/util/formatter.h" // IWYU pragma: keep
#include "iceberg/util/macros.h"
+#include "iceberg/util/string_util.h"
namespace iceberg::parquet {
@@ -49,13 +48,11 @@ std::optional<int32_t> FieldIdFromMetadata(
return std::nullopt;
}
std::string field_id_str = metadata->value(key);
- int32_t field_id = -1;
- auto [_, ec] = std::from_chars(field_id_str.data(),
- field_id_str.data() + field_id_str.size(),
field_id);
- if (ec != std::errc() || field_id < 0) {
+ auto res = StringUtils::ParseNumber<int32_t>(field_id_str);
+ if (!res.has_value() || res.value() < 0) {
return std::nullopt;
}
- return field_id;
+ return res.value();
}
std::optional<int32_t> GetFieldId(const ::parquet::arrow::SchemaField&
parquet_field) {
diff --git a/src/iceberg/parquet/parquet_writer.cc
b/src/iceberg/parquet/parquet_writer.cc
index bac25c9e..c874665a 100644
--- a/src/iceberg/parquet/parquet_writer.cc
+++ b/src/iceberg/parquet/parquet_writer.cc
@@ -70,7 +70,7 @@ Result<std::optional<int32_t>> ParseCodecLevel(const
WriterProperties& propertie
if (level_str.empty()) {
return std::nullopt;
}
- ICEBERG_ASSIGN_OR_RAISE(auto level,
StringUtils::ParseInt<int32_t>(level_str));
+ ICEBERG_ASSIGN_OR_RAISE(auto level,
StringUtils::ParseNumber<int32_t>(level_str));
return level;
}
diff --git a/src/iceberg/result.h b/src/iceberg/result.h
index 223352f6..76550870 100644
--- a/src/iceberg/result.h
+++ b/src/iceberg/result.h
@@ -88,8 +88,8 @@ using Status = Result<void>;
return std::unexpected<Error>( \
{ErrorKind::k##name, std::format(fmt, std::forward<Args>(args)...)}); \
} \
- inline auto name(const std::string& message) -> std::unexpected<Error> { \
- return std::unexpected<Error>({ErrorKind::k##name, message}); \
+ inline auto name(std::string message) -> std::unexpected<Error> { \
+ return std::unexpected<Error>({ErrorKind::k##name, std::move(message)}); \
}
DEFINE_ERROR_FUNCTION(AlreadyExists)
diff --git a/src/iceberg/snapshot.cc b/src/iceberg/snapshot.cc
index cdbdd452..1b3182fd 100644
--- a/src/iceberg/snapshot.cc
+++ b/src/iceberg/snapshot.cc
@@ -167,7 +167,7 @@ Result<std::optional<int64_t>> Snapshot::FirstRowId() const
{
return std::nullopt;
}
- return StringUtils::ParseInt<int64_t>(it->second);
+ return StringUtils::ParseNumber<int64_t>(it->second);
}
Result<std::optional<int64_t>> Snapshot::AddedRows() const {
@@ -176,7 +176,7 @@ Result<std::optional<int64_t>> Snapshot::AddedRows() const {
return std::nullopt;
}
- return StringUtils::ParseInt<int64_t>(it->second);
+ return StringUtils::ParseNumber<int64_t>(it->second);
}
bool Snapshot::Equals(const Snapshot& other) const {
diff --git a/src/iceberg/test/update_properties_test.cc
b/src/iceberg/test/update_properties_test.cc
index 59fa1d8d..88655815 100644
--- a/src/iceberg/test/update_properties_test.cc
+++ b/src/iceberg/test/update_properties_test.cc
@@ -107,7 +107,7 @@ TEST_F(UpdatePropertiesTest,
UpgradeFormatVersionInvalidString) {
auto result = update->Apply();
EXPECT_THAT(result, IsError(ErrorKind::kInvalidArgument));
- EXPECT_THAT(result, HasErrorMessage("Failed to parse integer from string"));
+ EXPECT_THAT(result, HasErrorMessage("invalid argument"));
}
TEST_F(UpdatePropertiesTest, UpgradeFormatVersionOutOfRange) {
diff --git a/src/iceberg/transform.cc b/src/iceberg/transform.cc
index 004111c3..c210f9ed 100644
--- a/src/iceberg/transform.cc
+++ b/src/iceberg/transform.cc
@@ -31,6 +31,7 @@
#include "iceberg/util/checked_cast.h"
#include "iceberg/util/macros.h"
#include "iceberg/util/projection_util_internal.h"
+#include "iceberg/util/string_util.h"
#include "iceberg/util/transform_util.h"
namespace iceberg {
@@ -514,7 +515,8 @@ Result<std::shared_ptr<Transform>>
TransformFromString(std::string_view transfor
std::smatch match;
if (std::regex_match(str, match, param_regex)) {
const std::string type_str = match[1];
- const int32_t param = std::stoi(match[2]);
+ ICEBERG_ASSIGN_OR_RAISE(const auto param,
+ StringUtils::ParseNumber<int32_t>(match[2].str()));
if (type_str == kBucketName) {
return Transform::Bucket(param);
diff --git a/src/iceberg/update/snapshot_update.cc
b/src/iceberg/update/snapshot_update.cc
index 38c5129f..4865278f 100644
--- a/src/iceberg/update/snapshot_update.cc
+++ b/src/iceberg/update/snapshot_update.cc
@@ -49,19 +49,19 @@ Status UpdateTotal(std::unordered_map<std::string,
std::string>& summary,
auto total_it = previous_summary.find(total_property);
if (total_it != previous_summary.end()) {
ICEBERG_ASSIGN_OR_RAISE(auto new_total,
- StringUtils::ParseInt<int64_t>(total_it->second));
+
StringUtils::ParseNumber<int64_t>(total_it->second));
auto added_it = summary.find(added_property);
if (new_total >= 0 && added_it != summary.end()) {
ICEBERG_ASSIGN_OR_RAISE(auto added_value,
-
StringUtils::ParseInt<int64_t>(added_it->second));
+
StringUtils::ParseNumber<int64_t>(added_it->second));
new_total += added_value;
}
auto deleted_it = summary.find(deleted_property);
if (new_total >= 0 && deleted_it != summary.end()) {
ICEBERG_ASSIGN_OR_RAISE(auto deleted_value,
-
StringUtils::ParseInt<int64_t>(deleted_it->second));
+
StringUtils::ParseNumber<int64_t>(deleted_it->second));
new_total -= deleted_value;
}
@@ -276,9 +276,9 @@ Result<SnapshotUpdate::ApplyResult> SnapshotUpdate::Apply()
{
auto added_records_it = summary.find(SnapshotSummaryFields::kAddedRecords);
auto replaced_records_it =
summary.find(SnapshotSummaryFields::kDeletedRecords);
if (added_records_it != summary.cend() && replaced_records_it !=
summary.cend()) {
- ICEBERG_ASSIGN_OR_RAISE(auto added_records,
-
StringUtils::ParseInt<int64_t>(added_records_it->second));
- ICEBERG_ASSIGN_OR_RAISE(auto replaced_records,
StringUtils::ParseInt<int64_t>(
+ ICEBERG_ASSIGN_OR_RAISE(auto added_records,
StringUtils::ParseNumber<int64_t>(
+
added_records_it->second));
+ ICEBERG_ASSIGN_OR_RAISE(auto replaced_records,
StringUtils::ParseNumber<int64_t>(
replaced_records_it->second));
ICEBERG_PRECHECK(
added_records <= replaced_records,
diff --git a/src/iceberg/update/update_properties.cc
b/src/iceberg/update/update_properties.cc
index fe49df81..e6fe7a60 100644
--- a/src/iceberg/update/update_properties.cc
+++ b/src/iceberg/update/update_properties.cc
@@ -85,7 +85,7 @@ Result<UpdateProperties::ApplyResult>
UpdateProperties::Apply() {
auto iter = new_properties.find(TableProperties::kFormatVersion.key());
if (iter != new_properties.end()) {
ICEBERG_ASSIGN_OR_RAISE(auto parsed_version,
- StringUtils::ParseInt<int32_t>(iter->second));
+ StringUtils::ParseNumber<int32_t>(iter->second));
if (parsed_version > TableMetadata::kSupportedTableFormatVersion) {
return InvalidArgument(
diff --git a/src/iceberg/util/config.h b/src/iceberg/util/config.h
index df92561b..089c78f5 100644
--- a/src/iceberg/util/config.h
+++ b/src/iceberg/util/config.h
@@ -24,6 +24,8 @@
#include <unordered_map>
#include "iceberg/exception.h"
+#include "iceberg/util/macros.h"
+#include "iceberg/util/string_util.h"
namespace iceberg {
namespace internal {
@@ -50,10 +52,10 @@ U DefaultFromString(const std::string& val) {
return val;
} else if constexpr (std::is_same_v<U, bool>) {
return val == "true";
- } else if constexpr (std::is_signed_v<U> && std::is_integral_v<U>) {
- return static_cast<U>(std::stoll(val));
- } else if constexpr (std::is_floating_point_v<U>) {
- return static_cast<U>(std::stod(val));
+ } else if constexpr ((std::is_signed_v<U> && std::is_integral_v<U>) ||
+ std::is_floating_point_v<U>) {
+ ICEBERG_ASSIGN_OR_THROW(auto res, StringUtils::ParseNumber<U>(val));
+ return res;
} else {
throw IcebergError(
std::format("Explicit from_str() is required for {}",
typeid(U).name()));
diff --git a/src/iceberg/util/decimal.cc b/src/iceberg/util/decimal.cc
index 433d3586..aeb7c614 100644
--- a/src/iceberg/util/decimal.cc
+++ b/src/iceberg/util/decimal.cc
@@ -39,6 +39,7 @@
#include "iceberg/result.h"
#include "iceberg/util/int128.h"
#include "iceberg/util/macros.h"
+#include "iceberg/util/string_util.h"
namespace iceberg {
@@ -201,13 +202,11 @@ inline void ShiftAndAdd(std::string_view input,
uint128_t& out) {
for (size_t pos = 0; pos < input.size();) {
const size_t group_size = std::min(kInt64DecimalDigits, input.size() -
pos);
const uint64_t multiple = kUInt64PowersOfTen[group_size];
- uint64_t value = 0;
- auto [_, ec] =
- std::from_chars(input.data() + pos, input.data() + pos + group_size,
value);
- ICEBERG_DCHECK(ec == std::errc(), "Failed to parse digits in ShiftAndAdd");
+ auto res = StringUtils::ParseNumber<uint64_t>(input.substr(pos,
group_size));
+ ICEBERG_DCHECK(res.has_value(), "Failed to parse digits in ShiftAndAdd");
- out = out * multiple + value;
+ out = out * multiple + res.value();
pos += group_size;
}
}
diff --git a/src/iceberg/util/string_util.h b/src/iceberg/util/string_util.h
index dfedb4a7..3528204e 100644
--- a/src/iceberg/util/string_util.h
+++ b/src/iceberg/util/string_util.h
@@ -25,6 +25,7 @@
#include <string>
#include <string_view>
#include <typeinfo>
+#include <utility>
#include "iceberg/iceberg_export.h"
#include "iceberg/result.h"
@@ -67,17 +68,22 @@ class ICEBERG_EXPORT StringUtils {
}
template <typename T>
- static Result<T> ParseInt(std::string_view str) {
+ requires std::is_arithmetic_v<T> && (!std::same_as<T, bool>)
+ static Result<T> ParseNumber(std::string_view str) {
T value = 0;
auto [ptr, ec] = std::from_chars(str.data(), str.data() + str.size(),
value);
- if (ec == std::errc::invalid_argument) [[unlikely]] {
- return InvalidArgument("Failed to parse integer from string '{}':
invalid argument",
- str);
- } else if (ec == std::errc::result_out_of_range) [[unlikely]] {
+ if (ec == std::errc()) [[likely]] {
+ return value;
+ }
+ if (ec == std::errc::invalid_argument) {
+ return InvalidArgument("Failed to parse {} from string '{}': invalid
argument",
+ typeid(T).name(), str);
+ }
+ if (ec == std::errc::result_out_of_range) {
return InvalidArgument("Failed to parse {} from string '{}': value out
of range",
typeid(T).name(), str);
}
- return value;
+ std::unreachable();
}
};