pitrou commented on a change in pull request #9367:
URL: https://github.com/apache/arrow/pull/9367#discussion_r568015264



##########
File path: cpp/src/parquet/metadata.h
##########
@@ -58,16 +58,6 @@ class PARQUET_EXPORT ApplicationVersion {
   static const ApplicationVersion& PARQUET_816_FIXED_VERSION();
   static const ApplicationVersion& PARQUET_CPP_FIXED_STATS_VERSION();
   static const ApplicationVersion& PARQUET_MR_FIXED_STATS_VERSION();
-  // Regular expression for the version format
-  // major . minor . patch unknown - prerelease.x + build info
-  // Eg: 1.5.0ab-cdh5.5.0+cd
-  static constexpr char const* VERSION_FORMAT =
-      "^(\\d+)\\.(\\d+)\\.(\\d+)([^-+]*)?(?:-([^+]*))?(?:\\+(.*))?$";
-  // Regular expression for the application format
-  // application_name version VERSION_FORMAT (build build_name)
-  // Eg: parquet-cpp version 1.5.0ab-xyz5.5.0+cd (build abcd)
-  static constexpr char const* APPLICATION_FORMAT =
-      
"(.*?)\\s*(?:(version\\s*(?:([^(]*?)\\s*(?:\\(\\s*build\\s*([^)]*?)\\s*\\))?)?)?)";
 

Review comment:
       Does the comment "TODO (majetideepak): Implement support for 
pre_release" below need to be removed?

##########
File path: cpp/src/parquet/metadata.cc
##########
@@ -949,43 +910,283 @@ ApplicationVersion::ApplicationVersion(std::string 
application, int major, int m
                                        int patch)
     : application_(std::move(application)), version{major, minor, patch, "", 
"", ""} {}
 
-ApplicationVersion::ApplicationVersion(const std::string& created_by) {
-  // Use singletons to compile only once (ARROW-9863)
-  static regex app_regex{ApplicationVersion::APPLICATION_FORMAT};
-  static regex ver_regex{ApplicationVersion::VERSION_FORMAT};
-  smatch app_matches;
-  smatch ver_matches;
-
-  std::string created_by_lower = created_by;
-  std::transform(created_by_lower.begin(), created_by_lower.end(),
-                 created_by_lower.begin(), ::tolower);
-
-  bool app_success = regex_match(created_by_lower, app_matches, app_regex);
-  bool ver_success = false;
-  std::string version_str;
-
-  if (app_success && app_matches.size() >= 4) {
-    // first match is the entire string. sub-matches start from second.
-    application_ = app_matches[1];
-    version_str = app_matches[3];
-    build_ = app_matches[4];
-    ver_success = regex_match(version_str, ver_matches, ver_regex);
-  } else {
-    application_ = "unknown";
-  }
-
-  if (ver_success && ver_matches.size() >= 7) {
-    version.major = atoi(ver_matches[1].str().c_str());
-    version.minor = atoi(ver_matches[2].str().c_str());
-    version.patch = atoi(ver_matches[3].str().c_str());
-    version.unknown = ver_matches[4].str();
-    version.pre_release = ver_matches[5].str();
-    version.build_info = ver_matches[6].str();
-  } else {
-    version.major = 0;
-    version.minor = 0;
-    version.patch = 0;
+namespace {
+// Parse the application version format and set parsed values to
+// ApplicationVersion.
+//
+// The application version format must be compatible parquet-mr's
+// one. See also:
+//   * 
https://github.com/apache/parquet-mr/blob/master/parquet-common/src/main/java/org/apache/parquet/VersionParser.java
+//   * 
https://github.com/apache/parquet-mr/blob/master/parquet-common/src/main/java/org/apache/parquet/SemanticVersion.java
+//
+// The application version format:
+//   "${APPLICATION_NAME}"
+//   "${APPLICATION_NAME} version ${VERSION}"
+//   "${APPLICATION_NAME} version ${VERSION} (build ${BUILD_NAME})"
+//
+// Eg:
+//   parquet-cpp
+//   parquet-cpp version 1.5.0ab-xyz5.5.0+cd
+//   parquet-cpp version 1.5.0ab-xyz5.5.0+cd (build abcd)
+//
+// The VERSION format:
+//   "${MAJOR}.${MINOR}.${PATCH}"
+//   "${MAJOR}.${MINOR}.${PATCH}${UNKNOWN}"
+//   "${MAJOR}.${MINOR}.${PATCH}${UNKNOWN}-${PRE_RELEASE}"
+//   "${MAJOR}.${MINOR}.${PATCH}${UNKNOWN}-${PRE_RELEASE}+${BUILD_INFO}"
+//   "${MAJOR}.${MINOR}.${PATCH}${UNKNOWN}+${BUILD_INFO}"
+//   "${MAJOR}.${MINOR}.${PATCH}-${PRE_RELEASE}"
+//   "${MAJOR}.${MINOR}.${PATCH}-${PRE_RELEASE}+${BUILD_INFO}"
+//   "${MAJOR}.${MINOR}.${PATCH}+${BUILD_INFO}"
+//
+// Eg:
+//   1.5.0
+//   1.5.0ab
+//   1.5.0ab-cdh5.5.0
+//   1.5.0ab-cdh5.5.0+cd
+//   1.5.0ab+cd
+//   1.5.0-cdh5.5.0
+//   1.5.0-cdh5.5.0+cd
+//   1.5.0+cd
+class ApplicationVersionParser {
+ public:
+  ApplicationVersionParser(const std::string& created_by,
+                           ApplicationVersion& application_version)
+      : created_by_(created_by),
+        application_version_(application_version),
+        digit_("0123456789") {}
+
+  void Parse() {
+    application_version_.application_ = "unknown";
+    application_version_.version = {0, 0, 0, "", "", ""};
+
+    if (!ParseApplicationName()) {
+      return;
+    }
+    if (!ParseVersion()) {
+      return;
+    }
+    if (!ParseBuildName()) {
+      return;
+    }
+  }
+
+ private:
+  void RemovePrecedingSpaces(const std::string& string, size_t& start,
+                             const size_t& end) {
+    while (start < end && string[start] == ' ') {

Review comment:
       Note that `\s` in a regex may match other whitespace characters. But 
they're unlikely to appear in the `created_by` field anyway.

##########
File path: cpp/src/parquet/metadata.cc
##########
@@ -949,43 +910,283 @@ ApplicationVersion::ApplicationVersion(std::string 
application, int major, int m
                                        int patch)
     : application_(std::move(application)), version{major, minor, patch, "", 
"", ""} {}
 
-ApplicationVersion::ApplicationVersion(const std::string& created_by) {
-  // Use singletons to compile only once (ARROW-9863)
-  static regex app_regex{ApplicationVersion::APPLICATION_FORMAT};
-  static regex ver_regex{ApplicationVersion::VERSION_FORMAT};
-  smatch app_matches;
-  smatch ver_matches;
-
-  std::string created_by_lower = created_by;
-  std::transform(created_by_lower.begin(), created_by_lower.end(),
-                 created_by_lower.begin(), ::tolower);
-
-  bool app_success = regex_match(created_by_lower, app_matches, app_regex);
-  bool ver_success = false;
-  std::string version_str;
-
-  if (app_success && app_matches.size() >= 4) {
-    // first match is the entire string. sub-matches start from second.
-    application_ = app_matches[1];
-    version_str = app_matches[3];
-    build_ = app_matches[4];
-    ver_success = regex_match(version_str, ver_matches, ver_regex);
-  } else {
-    application_ = "unknown";
-  }
-
-  if (ver_success && ver_matches.size() >= 7) {
-    version.major = atoi(ver_matches[1].str().c_str());
-    version.minor = atoi(ver_matches[2].str().c_str());
-    version.patch = atoi(ver_matches[3].str().c_str());
-    version.unknown = ver_matches[4].str();
-    version.pre_release = ver_matches[5].str();
-    version.build_info = ver_matches[6].str();
-  } else {
-    version.major = 0;
-    version.minor = 0;
-    version.patch = 0;
+namespace {
+// Parse the application version format and set parsed values to
+// ApplicationVersion.
+//
+// The application version format must be compatible parquet-mr's
+// one. See also:
+//   * 
https://github.com/apache/parquet-mr/blob/master/parquet-common/src/main/java/org/apache/parquet/VersionParser.java
+//   * 
https://github.com/apache/parquet-mr/blob/master/parquet-common/src/main/java/org/apache/parquet/SemanticVersion.java
+//
+// The application version format:
+//   "${APPLICATION_NAME}"
+//   "${APPLICATION_NAME} version ${VERSION}"
+//   "${APPLICATION_NAME} version ${VERSION} (build ${BUILD_NAME})"
+//
+// Eg:
+//   parquet-cpp
+//   parquet-cpp version 1.5.0ab-xyz5.5.0+cd
+//   parquet-cpp version 1.5.0ab-xyz5.5.0+cd (build abcd)
+//
+// The VERSION format:
+//   "${MAJOR}.${MINOR}.${PATCH}"
+//   "${MAJOR}.${MINOR}.${PATCH}${UNKNOWN}"
+//   "${MAJOR}.${MINOR}.${PATCH}${UNKNOWN}-${PRE_RELEASE}"
+//   "${MAJOR}.${MINOR}.${PATCH}${UNKNOWN}-${PRE_RELEASE}+${BUILD_INFO}"
+//   "${MAJOR}.${MINOR}.${PATCH}${UNKNOWN}+${BUILD_INFO}"
+//   "${MAJOR}.${MINOR}.${PATCH}-${PRE_RELEASE}"
+//   "${MAJOR}.${MINOR}.${PATCH}-${PRE_RELEASE}+${BUILD_INFO}"
+//   "${MAJOR}.${MINOR}.${PATCH}+${BUILD_INFO}"
+//
+// Eg:
+//   1.5.0
+//   1.5.0ab
+//   1.5.0ab-cdh5.5.0
+//   1.5.0ab-cdh5.5.0+cd
+//   1.5.0ab+cd
+//   1.5.0-cdh5.5.0
+//   1.5.0-cdh5.5.0+cd
+//   1.5.0+cd
+class ApplicationVersionParser {
+ public:
+  ApplicationVersionParser(const std::string& created_by,
+                           ApplicationVersion& application_version)
+      : created_by_(created_by),
+        application_version_(application_version),
+        digit_("0123456789") {}
+
+  void Parse() {
+    application_version_.application_ = "unknown";
+    application_version_.version = {0, 0, 0, "", "", ""};
+
+    if (!ParseApplicationName()) {
+      return;
+    }
+    if (!ParseVersion()) {
+      return;
+    }
+    if (!ParseBuildName()) {
+      return;
+    }
+  }
+
+ private:
+  void RemovePrecedingSpaces(const std::string& string, size_t& start,
+                             const size_t& end) {
+    while (start < end && string[start] == ' ') {
+      ++start;
+    }
+  }
+
+  void RemoveTrailingSpaces(const std::string& string, const size_t& start, 
size_t& end) {
+    while (start < (end - 1) && (end - 1) < string.size() && string[end - 1] 
== ' ') {
+      --end;
+    }
   }
+
+  bool ParseApplicationName() {
+    std::string version_mark(" version ");
+    auto version_mark_position = created_by_.find(version_mark);
+    size_t application_name_end;
+    // No VERSION and BUILD_NAME.
+    if (version_mark_position == std::string::npos) {
+      version_start_ = std::string::npos;
+      application_name_end = created_by_.size();
+    } else {
+      version_start_ = version_mark_position + version_mark.size();
+      application_name_end = version_mark_position;
+    }
+
+    size_t application_name_start = 0;
+    RemovePrecedingSpaces(created_by_, application_name_start, 
application_name_end);
+    RemoveTrailingSpaces(created_by_, application_name_start, 
application_name_end);
+    application_version_.application_ = created_by_.substr(
+        application_name_start, application_name_end - application_name_start);
+
+    return true;
+  }
+
+  bool ParseVersion() {
+    // No VERSION.
+    if (version_start_ == std::string::npos) {
+      return true;
+    }
+
+    RemovePrecedingSpaces(created_by_, version_start_, created_by_.size());
+    version_end_ = created_by_.find(" (", version_start_);
+    // No BUILD_NAME.
+    if (version_end_ == std::string::npos) {
+      version_end_ = created_by_.size();
+    }
+    RemoveTrailingSpaces(created_by_, version_start_, version_end_);
+    // No VERSION.
+    if (version_start_ == version_end_) {
+      return false;
+    }
+    version_string_ = created_by_.substr(version_start_, version_end_ - 
version_start_);
+
+    if (!ParseVersionMajor()) {
+      return false;
+    }
+    if (!ParseVersionMinor()) {
+      return false;
+    }
+    if (!ParseVersionPatch()) {
+      return false;
+    }
+    if (!ParseVersionUnknown()) {
+      return false;
+    }
+    if (!ParseVersionPreRelease()) {
+      return false;
+    }
+    if (!ParseVersionBuildInfo()) {
+      return false;
+    }
+
+    return true;
+  }
+
+  bool ParseVersionMajor() {
+    size_t version_major_start = 0;
+    auto version_major_end = version_string_.find_first_not_of(digit_);
+    // No ".".
+    if (version_major_end == std::string::npos ||
+        version_string_[version_major_end] != '.') {
+      return false;
+    }
+    // No MAJOR.
+    if (version_major_end == version_major_start) {
+      return false;
+    }
+    auto version_major_string = version_string_.substr(
+        version_major_start, version_major_end - version_major_start);
+    application_version_.version.major = atoi(version_major_string.c_str());
+    version_parsing_position_ = version_major_end + 1;  // +1 is for '.'.
+    return true;
+  }
+
+  bool ParseVersionMinor() {
+    auto version_minor_start = version_parsing_position_;
+    auto version_minor_end =
+        version_string_.find_first_not_of(digit_, version_minor_start);
+    if (version_minor_end == std::string::npos ||

Review comment:
       Same here if `npos`.

##########
File path: cpp/src/parquet/metadata.cc
##########
@@ -949,43 +910,283 @@ ApplicationVersion::ApplicationVersion(std::string 
application, int major, int m
                                        int patch)
     : application_(std::move(application)), version{major, minor, patch, "", 
"", ""} {}
 
-ApplicationVersion::ApplicationVersion(const std::string& created_by) {
-  // Use singletons to compile only once (ARROW-9863)
-  static regex app_regex{ApplicationVersion::APPLICATION_FORMAT};
-  static regex ver_regex{ApplicationVersion::VERSION_FORMAT};
-  smatch app_matches;
-  smatch ver_matches;
-
-  std::string created_by_lower = created_by;
-  std::transform(created_by_lower.begin(), created_by_lower.end(),
-                 created_by_lower.begin(), ::tolower);
-
-  bool app_success = regex_match(created_by_lower, app_matches, app_regex);
-  bool ver_success = false;
-  std::string version_str;
-
-  if (app_success && app_matches.size() >= 4) {
-    // first match is the entire string. sub-matches start from second.
-    application_ = app_matches[1];
-    version_str = app_matches[3];
-    build_ = app_matches[4];
-    ver_success = regex_match(version_str, ver_matches, ver_regex);
-  } else {
-    application_ = "unknown";
-  }
-
-  if (ver_success && ver_matches.size() >= 7) {
-    version.major = atoi(ver_matches[1].str().c_str());
-    version.minor = atoi(ver_matches[2].str().c_str());
-    version.patch = atoi(ver_matches[3].str().c_str());
-    version.unknown = ver_matches[4].str();
-    version.pre_release = ver_matches[5].str();
-    version.build_info = ver_matches[6].str();
-  } else {
-    version.major = 0;
-    version.minor = 0;
-    version.patch = 0;
+namespace {
+// Parse the application version format and set parsed values to
+// ApplicationVersion.
+//
+// The application version format must be compatible parquet-mr's
+// one. See also:
+//   * 
https://github.com/apache/parquet-mr/blob/master/parquet-common/src/main/java/org/apache/parquet/VersionParser.java
+//   * 
https://github.com/apache/parquet-mr/blob/master/parquet-common/src/main/java/org/apache/parquet/SemanticVersion.java
+//
+// The application version format:
+//   "${APPLICATION_NAME}"
+//   "${APPLICATION_NAME} version ${VERSION}"
+//   "${APPLICATION_NAME} version ${VERSION} (build ${BUILD_NAME})"
+//
+// Eg:
+//   parquet-cpp
+//   parquet-cpp version 1.5.0ab-xyz5.5.0+cd
+//   parquet-cpp version 1.5.0ab-xyz5.5.0+cd (build abcd)
+//
+// The VERSION format:
+//   "${MAJOR}.${MINOR}.${PATCH}"
+//   "${MAJOR}.${MINOR}.${PATCH}${UNKNOWN}"
+//   "${MAJOR}.${MINOR}.${PATCH}${UNKNOWN}-${PRE_RELEASE}"
+//   "${MAJOR}.${MINOR}.${PATCH}${UNKNOWN}-${PRE_RELEASE}+${BUILD_INFO}"
+//   "${MAJOR}.${MINOR}.${PATCH}${UNKNOWN}+${BUILD_INFO}"
+//   "${MAJOR}.${MINOR}.${PATCH}-${PRE_RELEASE}"
+//   "${MAJOR}.${MINOR}.${PATCH}-${PRE_RELEASE}+${BUILD_INFO}"
+//   "${MAJOR}.${MINOR}.${PATCH}+${BUILD_INFO}"
+//
+// Eg:
+//   1.5.0
+//   1.5.0ab
+//   1.5.0ab-cdh5.5.0
+//   1.5.0ab-cdh5.5.0+cd
+//   1.5.0ab+cd
+//   1.5.0-cdh5.5.0
+//   1.5.0-cdh5.5.0+cd
+//   1.5.0+cd
+class ApplicationVersionParser {
+ public:
+  ApplicationVersionParser(const std::string& created_by,
+                           ApplicationVersion& application_version)
+      : created_by_(created_by),
+        application_version_(application_version),
+        digit_("0123456789") {}
+
+  void Parse() {
+    application_version_.application_ = "unknown";
+    application_version_.version = {0, 0, 0, "", "", ""};
+
+    if (!ParseApplicationName()) {
+      return;
+    }
+    if (!ParseVersion()) {
+      return;
+    }
+    if (!ParseBuildName()) {
+      return;
+    }
+  }
+
+ private:
+  void RemovePrecedingSpaces(const std::string& string, size_t& start,
+                             const size_t& end) {
+    while (start < end && string[start] == ' ') {
+      ++start;
+    }
+  }
+
+  void RemoveTrailingSpaces(const std::string& string, const size_t& start, 
size_t& end) {
+    while (start < (end - 1) && (end - 1) < string.size() && string[end - 1] 
== ' ') {
+      --end;
+    }
   }
+
+  bool ParseApplicationName() {
+    std::string version_mark(" version ");
+    auto version_mark_position = created_by_.find(version_mark);
+    size_t application_name_end;
+    // No VERSION and BUILD_NAME.
+    if (version_mark_position == std::string::npos) {
+      version_start_ = std::string::npos;
+      application_name_end = created_by_.size();
+    } else {
+      version_start_ = version_mark_position + version_mark.size();
+      application_name_end = version_mark_position;
+    }
+
+    size_t application_name_start = 0;
+    RemovePrecedingSpaces(created_by_, application_name_start, 
application_name_end);
+    RemoveTrailingSpaces(created_by_, application_name_start, 
application_name_end);
+    application_version_.application_ = created_by_.substr(
+        application_name_start, application_name_end - application_name_start);
+
+    return true;
+  }
+
+  bool ParseVersion() {
+    // No VERSION.
+    if (version_start_ == std::string::npos) {
+      return true;
+    }
+
+    RemovePrecedingSpaces(created_by_, version_start_, created_by_.size());
+    version_end_ = created_by_.find(" (", version_start_);
+    // No BUILD_NAME.
+    if (version_end_ == std::string::npos) {
+      version_end_ = created_by_.size();
+    }
+    RemoveTrailingSpaces(created_by_, version_start_, version_end_);
+    // No VERSION.
+    if (version_start_ == version_end_) {
+      return false;
+    }
+    version_string_ = created_by_.substr(version_start_, version_end_ - 
version_start_);
+
+    if (!ParseVersionMajor()) {
+      return false;
+    }
+    if (!ParseVersionMinor()) {
+      return false;
+    }
+    if (!ParseVersionPatch()) {
+      return false;
+    }
+    if (!ParseVersionUnknown()) {
+      return false;
+    }
+    if (!ParseVersionPreRelease()) {
+      return false;
+    }
+    if (!ParseVersionBuildInfo()) {
+      return false;
+    }
+
+    return true;
+  }
+
+  bool ParseVersionMajor() {
+    size_t version_major_start = 0;
+    auto version_major_end = version_string_.find_first_not_of(digit_);
+    // No ".".
+    if (version_major_end == std::string::npos ||

Review comment:
       If it's `npos`, then we can still parse the major number, no?

##########
File path: cpp/src/parquet/metadata_test.cc
##########
@@ -342,5 +342,123 @@ TEST(ApplicationVersion, Basics) {
       version1.HasCorrectStatistics(Type::BYTE_ARRAY, stats_int, 
SortOrder::UNSIGNED));
 }
 
+TEST(ApplicationVersion, Empty) {
+  ApplicationVersion version("");
+
+  ASSERT_EQ("", version.application_);
+  ASSERT_EQ("", version.build_);
+  ASSERT_EQ(0, version.version.major);
+  ASSERT_EQ(0, version.version.minor);
+  ASSERT_EQ(0, version.version.patch);
+  ASSERT_EQ("", version.version.unknown);
+  ASSERT_EQ("", version.version.pre_release);
+  ASSERT_EQ("", version.version.build_info);
+}
+
+TEST(ApplicationVersion, VersionEmpty) {
+  ApplicationVersion version("parquet-mr version ");
+
+  ASSERT_EQ("parquet-mr", version.application_);
+  ASSERT_EQ("", version.build_);
+  ASSERT_EQ(0, version.version.major);
+  ASSERT_EQ(0, version.version.minor);
+  ASSERT_EQ(0, version.version.patch);
+  ASSERT_EQ("", version.version.unknown);
+  ASSERT_EQ("", version.version.pre_release);
+  ASSERT_EQ("", version.version.build_info);
+}
+
+TEST(ApplicationVersion, VersionNoMajor) {
+  ApplicationVersion version("parquet-mr version .");
+
+  ASSERT_EQ("parquet-mr", version.application_);
+  ASSERT_EQ("", version.build_);
+  ASSERT_EQ(0, version.version.major);
+  ASSERT_EQ(0, version.version.minor);
+  ASSERT_EQ(0, version.version.patch);
+  ASSERT_EQ("", version.version.unknown);
+  ASSERT_EQ("", version.version.pre_release);
+  ASSERT_EQ("", version.version.build_info);
+}
+
+TEST(ApplicationVersion, VersionNoMinor) {
+  ApplicationVersion version("parquet-mr version 1.");
+
+  ASSERT_EQ("parquet-mr", version.application_);
+  ASSERT_EQ("", version.build_);
+  ASSERT_EQ(1, version.version.major);
+  ASSERT_EQ(0, version.version.minor);
+  ASSERT_EQ(0, version.version.patch);
+  ASSERT_EQ("", version.version.unknown);
+  ASSERT_EQ("", version.version.pre_release);
+  ASSERT_EQ("", version.version.build_info);
+}
+
+TEST(ApplicationVersion, VersionNoPatch) {
+  ApplicationVersion version("parquet-mr version 1.7.");
+
+  ASSERT_EQ("parquet-mr", version.application_);
+  ASSERT_EQ("", version.build_);
+  ASSERT_EQ(1, version.version.major);
+  ASSERT_EQ(7, version.version.minor);
+  ASSERT_EQ(0, version.version.patch);
+  ASSERT_EQ("", version.version.unknown);
+  ASSERT_EQ("", version.version.pre_release);
+  ASSERT_EQ("", version.version.build_info);
+}
+
+TEST(ApplicationVersion, VersionNoUnknown) {
+  ApplicationVersion version("parquet-mr version 1.7.9-cdh5.5.0+cd");
+
+  ASSERT_EQ("parquet-mr", version.application_);
+  ASSERT_EQ("", version.build_);
+  ASSERT_EQ(1, version.version.major);
+  ASSERT_EQ(7, version.version.minor);
+  ASSERT_EQ(9, version.version.patch);
+  ASSERT_EQ("", version.version.unknown);
+  ASSERT_EQ("cdh5.5.0", version.version.pre_release);
+  ASSERT_EQ("cd", version.version.build_info);
+}
+
+TEST(ApplicationVersion, VersionNoPreRelease) {
+  ApplicationVersion version("parquet-mr version 1.7.9ab+cd");
+
+  ASSERT_EQ("parquet-mr", version.application_);
+  ASSERT_EQ("", version.build_);
+  ASSERT_EQ(1, version.version.major);
+  ASSERT_EQ(7, version.version.minor);
+  ASSERT_EQ(9, version.version.patch);
+  ASSERT_EQ("ab", version.version.unknown);
+  ASSERT_EQ("", version.version.pre_release);
+  ASSERT_EQ("cd", version.version.build_info);
+}
+
+TEST(ApplicationVersion, VersionNoUnknownNoPreRelease) {
+  ApplicationVersion version("parquet-mr version 1.7.9+cd");
+
+  ASSERT_EQ("parquet-mr", version.application_);
+  ASSERT_EQ("", version.build_);
+  ASSERT_EQ(1, version.version.major);
+  ASSERT_EQ(7, version.version.minor);
+  ASSERT_EQ(9, version.version.patch);
+  ASSERT_EQ("", version.version.unknown);
+  ASSERT_EQ("", version.version.pre_release);
+  ASSERT_EQ("cd", version.version.build_info);
+}
+
+TEST(ApplicationVersion, FullWithSpaces) {
+  ApplicationVersion version(
+      " parquet-mr  version  1.5.3ab-cdh5.5.0+cd  (build  abcd ) ");
+
+  ASSERT_EQ("parquet-mr", version.application_);
+  ASSERT_EQ("abcd", version.build_);
+  ASSERT_EQ(1, version.version.major);
+  ASSERT_EQ(5, version.version.minor);
+  ASSERT_EQ(3, version.version.patch);
+  ASSERT_EQ("ab", version.version.unknown);
+  ASSERT_EQ("cdh5.5.0", version.version.pre_release);
+  ASSERT_EQ("cd", version.version.build_info);
+}
+

Review comment:
       Can you add a test with a malformed version string?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to