bkietz commented on code in PR #14602:
URL: https://github.com/apache/arrow/pull/14602#discussion_r1018189597
##########
python/pyarrow/src/arrow/python/datetime.cc:
##########
@@ -38,41 +39,27 @@ namespace internal {
namespace {
-// Same as Regex '([+-])(0[0-9]|1[0-9]|2[0-3]):([0-5][0-9])$'.
-// GCC 4.9 doesn't support regex, so handcode until support for it
-// is dropped.
bool MatchFixedOffset(const std::string& tz, std::string_view* sign,
std::string_view* hour, std::string_view* minute) {
+ static const std::regex regex("^([+-])(0[0-9]|1[0-9]|2[0-3]):([0-5][0-9])$");
if (tz.size() < 5) {
return false;
}
- const char* iter = tz.data();
- if (*iter == '+' || *iter == '-') {
- *sign = std::string_view(iter, 1);
- iter++;
- if (tz.size() < 6) {
- return false;
- }
- }
- if ((((*iter == '0' || *iter == '1') && *(iter + 1) >= '0' && *(iter + 1) <=
'9') ||
- (*iter == '2' && *(iter + 1) >= '0' && *(iter + 1) <= '3'))) {
- *hour = std::string_view(iter, 2);
- iter += 2;
- } else {
- return false;
- }
- if (*iter != ':') {
+ std::smatch match;
+ if (!std::regex_match(tz, match, regex)) {
return false;
}
- iter++;
+ DCHECK_EQ(match.size(), 4); // match #0 is the whole matched sequence
- if (*iter >= '0' && *iter <= '5' && *(iter + 1) >= '0' && *(iter + 1) <=
'9') {
- *minute = std::string_view(iter, 2);
- iter += 2;
- } else {
- return false;
- }
- return iter == (tz.data() + tz.size());
+ auto match_view = [&](int match_number) {
+ return std::string_view(&tz[match.position(match_number)],
+ match.length(match_number));
+ };
Review Comment:
I can't improve on this by much. `std::regex` really expects you to want a
string.
```c++
bool MatchFixedOffset(std::string_view tz, std::string_view* sign,
std::string_view* hour, std::string_view* minute) {
static const std::regex
regex("^([+-])(0[0-9]|1[0-9]|2[0-3]):([0-5][0-9])$");
if (tz.size() < 5) {
return false;
}
std::match_results<decltype(target.begin())> match;
if (!std::regex_match(tz.begin(), tz.end(), match, regex)) {
return false;
}
DCHECK_EQ(match.size(), 4); // match #0 is the whole matched sequence
auto match_view = [&](int i) {
return tz.substr(match.position(i), match.length(i));
};
*sign = match_view(1);
*hour = match_view(2);
*minute = match_view(3);
return true;
}
```
OTOH, this might be a common enough problem to make a generic helper
function for:
```c++
// in util/string.h or util/regex.h
bool Match(const std::regex& regex, std::string_view target,
std::initializer_list<std::string_view*> out_views) {
std::match_results<decltype(target.begin())> match;
if (!std::regex_match(target.begin(), target.end(), match, regex)) {
return false;
}
DCHECK_EQ(match.size(), out_views.size() + 1); // match #0 is the whole
matched sequence
size_t i = 1;
for (std::string_view* out_view : out_views) {
*out_view = target.substr(match.position(i), match.length(i));
++i;
}
return true;
}
bool MatchFixedOffset(const std::string& tz, std::string_view* sign,
std::string_view* hour, std::string_view* minute) {
if (tz.size() < 5) {
return false;
}
static const std::regex
regex("^([+-])(0[0-9]|1[0-9]|2[0-3]):([0-5][0-9])$");
return Match(tz, regex, {sign, hour, minute});
}
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]