danepitkin commented on code in PR #361:
URL: https://github.com/apache/arrow-nanoarrow/pull/361#discussion_r1454071863
##########
src/nanoarrow/utils.c:
##########
@@ -223,3 +223,186 @@ struct ArrowBufferAllocator ArrowBufferDeallocator(
allocator.private_data = private_data;
return allocator;
}
+
+static const int kInt32DecimalDigits = 9;
+
+static const uint64_t kUInt32PowersOfTen[] = {
+ 1ULL, 10ULL, 100ULL, 1000ULL, 10000ULL,
+ 100000ULL, 1000000ULL, 10000000ULL, 100000000ULL, 1000000000ULL};
+
+// Adapted from Arrow C++ to use 32-bit words for better C portability
+//
https://github.com/apache/arrow/blob/main/cpp/src/arrow/util/decimal.cc#L524-L544
Review Comment:
You can add a git hash in place of `main` if you'd like. (Press 'y' after
opening the above link)
It also might be nice to copy the comment over as well if applicable.
##########
src/nanoarrow/nanoarrow.h:
##########
@@ -280,6 +284,14 @@ void ArrowLayoutInit(struct ArrowLayout* layout, enum
ArrowType storage_type);
/// \brief Create a string view from a null-terminated string
static inline struct ArrowStringView ArrowCharView(const char* value);
+/// \brief Sets the integer value of an ArrowDecimal from a string
+ArrowErrorCode ArrowDecimalSetIntString(struct ArrowDecimal* decimal,
+ struct ArrowStringView value);
+
+/// \brief Get the integer value of an ArrowDecimal as string
+ArrowErrorCode ArrowDecimalAppendIntStringToBuffer(const struct ArrowDecimal*
decimal,
+ struct ArrowBuffer* buffer);
+
Review Comment:
IMO the function names are a bit confusing. Maybe something like
`ArrowDecimalSetFromString`? I think the `IntString` terminology is the part
that is hard for me to parse.
##########
src/nanoarrow/utils.c:
##########
@@ -223,3 +223,186 @@ struct ArrowBufferAllocator ArrowBufferDeallocator(
allocator.private_data = private_data;
return allocator;
}
+
+static const int kInt32DecimalDigits = 9;
+
+static const uint64_t kUInt32PowersOfTen[] = {
+ 1ULL, 10ULL, 100ULL, 1000ULL, 10000ULL,
+ 100000ULL, 1000000ULL, 10000000ULL, 100000000ULL, 1000000000ULL};
+
+// Adapted from Arrow C++ to use 32-bit words for better C portability
+//
https://github.com/apache/arrow/blob/main/cpp/src/arrow/util/decimal.cc#L524-L544
+static void ShiftAndAdd(struct ArrowStringView value, uint32_t* out, int64_t
out_size) {
+ // We use strtoll for parsing, which needs input that is null-terminated
+ char chunk_string[16];
+
+ for (int64_t posn = 0; posn < value.size_bytes;) {
+ int64_t remaining = value.size_bytes - posn;
+
+ int64_t group_size;
+ if (remaining > kInt32DecimalDigits) {
+ group_size = kInt32DecimalDigits;
+ } else {
+ group_size = remaining;
+ }
+
+ const uint64_t multiple = kUInt32PowersOfTen[group_size];
+
+ memcpy(chunk_string, value.data + posn, group_size);
+ chunk_string[group_size] = '\0';
+ uint32_t chunk = (uint32_t)strtoll(chunk_string, NULL, 10);
+
+ for (int64_t i = 0; i < out_size; i++) {
+ uint64_t tmp = out[i];
+ tmp *= multiple;
+ tmp += chunk;
+ out[i] = (uint32_t)(tmp & 0xFFFFFFFFULL);
+ chunk = (uint32_t)(tmp >> 32);
+ }
+ posn += group_size;
+ }
+}
+
+ArrowErrorCode ArrowDecimalSetIntString(struct ArrowDecimal* decimal,
+ struct ArrowStringView value) {
+ // Check for sign
+ int is_negative = value.data[0] == '-';
+ int has_sign = is_negative || value.data[0] == '+';
+ value.data += has_sign;
+ value.size_bytes -= has_sign;
+
+ // Check all characters are digits that are not the negative sign
+ for (int64_t i = 0; i < value.size_bytes; i++) {
+ char c = value.data[i];
+ if (c < '0' || c > '9') {
+ return EINVAL;
+ }
Review Comment:
this might be an unhelpful guess at micro-optimization.. but if you are
trying to avoid branching, you can set a bitmask here and check for failure
outside of the loop. e.g. `is_invalid &= c < '0' || c > '9';`. Maybe the
compiler would optimize this for us? I wouldn't know.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]