xkrogen commented on a change in pull request #31203:
URL: https://github.com/apache/spark/pull/31203#discussion_r563867053



##########
File path: core/src/test/scala/org/apache/spark/util/VersionUtilsSuite.scala
##########
@@ -98,4 +98,21 @@ class VersionUtilsSuite extends SparkFunSuite {
       }
     }
   }
+
+  test("SPARK-33212: retrieve major/minor/patch version parts") {
+    assert(VersionUtils.majorMinorPatchVersion("3.2.2").contains((3, 2, 2)))
+    assert(VersionUtils.majorMinorPatchVersion("3.2.2.4").contains((3, 2, 2)))
+    assert(VersionUtils.majorMinorPatchVersion("3.2.2-SNAPSHOT").contains((3, 
2, 2)))
+    assert(VersionUtils.majorMinorPatchVersion("3.2.2.4XXX").contains((3, 2, 
2)))
+    assert(VersionUtils.majorMinorPatchVersion("3.2").contains((3, 2, 0)))
+    assert(VersionUtils.majorMinorPatchVersion("3").contains((3, 0, 0)))
+
+    // illegal cases
+    assert(VersionUtils.majorMinorPatchVersion("ABC").isEmpty)
+    assert(VersionUtils.majorMinorPatchVersion("3X").isEmpty)
+    assert(VersionUtils.majorMinorPatchVersion("3.2-SNAPSHOT").isEmpty)
+    assert(VersionUtils.majorMinorPatchVersion("3.2ABC").isEmpty)
+    assert(VersionUtils.majorMinorPatchVersion("3-ABC").isEmpty)
+    assert(VersionUtils.majorMinorPatchVersion("3.2.4XYZ").isEmpty)

Review comment:
       You can consider simplifying this a little like:
   ```
   Seq("ABC", "3X", ...).foreach { versionStr =>
     assert(VersionUtils.majorMinorPatchVersion(versionStr).isEmpty, s"version 
$versionStr")
   }
   ```

##########
File path: 
sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HadoopVersionInfoSuite.scala
##########
@@ -68,4 +69,22 @@ class HadoopVersionInfoSuite extends SparkFunSuite {
       Utils.deleteRecursively(ivyPath)
     }
   }
+
+  test("SPARK-32212: test supportHadoopShadedClient()") {
+    assert(IsolatedClientLoader.supportHadoopShadedClient("3.2.2"))
+    assert(IsolatedClientLoader.supportHadoopShadedClient("3.2.3"))
+    assert(IsolatedClientLoader.supportHadoopShadedClient("3.2.2.1"))
+    assert(IsolatedClientLoader.supportHadoopShadedClient("3.2.2-XYZ"))
+    assert(IsolatedClientLoader.supportHadoopShadedClient("3.2.2.4-SNAPSHOT"))

Review comment:
       Maybe consider shortening like:
   ```
   Seq("3.2.2", "3.2.3", ...).foreach { versionStr =>
     assert(IsolatedClientLoader.supportHadoopShadedClient(versionStr), 
s"version $versionStr")
   }
   ```
   (and same for the negative cases)

##########
File path: core/src/main/scala/org/apache/spark/util/VersionUtils.scala
##########
@@ -63,4 +64,37 @@ private[spark] object VersionUtils {
           s" version string, but it could not find the major and minor version 
numbers.")
     }
   }
+
+  /**
+   * Retrieves the major, minor and patch parts from the input `version`. 
Returns `None` if the
+   * input is not of a valid format.

Review comment:
       I think we should mention that minor/patch versions are filled in as 0 
if they're not found. This is different from the behavior of other methods in 
this class (e.g. majorMinor will give an error if minor is not present)

##########
File path: 
sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
##########
@@ -88,31 +88,40 @@ private[hive] object IsolatedClientLoader extends Logging {
       barrierPrefixes = barrierPrefixes)
   }
 
-  def hiveVersion(version: String): HiveVersion = version match {
-    case "12" | "0.12" | "0.12.0" => hive.v12
-    case "13" | "0.13" | "0.13.0" | "0.13.1" => hive.v13
-    case "14" | "0.14" | "0.14.0" => hive.v14
-    case "1.0" | "1.0.0" | "1.0.1" => hive.v1_0
-    case "1.1" | "1.1.0" | "1.1.1" => hive.v1_1
-    case "1.2" | "1.2.0" | "1.2.1" | "1.2.2" => hive.v1_2
-    case "2.0" | "2.0.0" | "2.0.1" => hive.v2_0
-    case "2.1" | "2.1.0" | "2.1.1" => hive.v2_1
-    case "2.2" | "2.2.0" => hive.v2_2
-    case "2.3" | "2.3.0" | "2.3.1" | "2.3.2" | "2.3.3" | "2.3.4" | "2.3.5" | 
"2.3.6" | "2.3.7" |
-         "2.3.8" => hive.v2_3
-    case "3.0" | "3.0.0" => hive.v3_0
-    case "3.1" | "3.1.0" | "3.1.1" | "3.1.2" => hive.v3_1
-    case version =>
+  def hiveVersion(version: String): HiveVersion = {
+    VersionUtils.majorMinorPatchVersion(version).flatMap {
+      case (12, _, _) | (0, 12, _) => Some(hive.v12)
+      case (13, _, _) | (0, 13, _) => Some(hive.v13)
+      case (14, _, _) | (0, 14, _) => Some(hive.v14)
+      case (1, 0, _) => Some(hive.v1_0)
+      case (1, 1, _) => Some(hive.v1_1)
+      case (1, 2, _) => Some(hive.v1_2)
+      case (2, 0, _) => Some(hive.v2_0)
+      case (2, 1, _) => Some(hive.v2_1)
+      case (2, 2, _) => Some(hive.v2_2)
+      case (2, 3, _) => Some(hive.v2_3)
+      case (3, 0, _) => Some(hive.v3_0)
+      case (3, 1, _) => Some(hive.v3_1)
+      case _ => None
+    }.getOrElse {
       throw new UnsupportedOperationException(s"Unsupported Hive Metastore 
version ($version). " +
         s"Please set ${HiveUtils.HIVE_METASTORE_VERSION.key} with a valid 
version.")
+    }
+  }
+
+  def supportHadoopShadedClient(hadoopVersion: String): Boolean = {

Review comment:
       Should this be `supportsHadoopShadedClient` instead of 
`supportHadoopShadedClient` ?

##########
File path: core/src/main/scala/org/apache/spark/util/VersionUtils.scala
##########
@@ -63,4 +64,37 @@ private[spark] object VersionUtils {
           s" version string, but it could not find the major and minor version 
numbers.")
     }
   }
+
+  /**
+   * Retrieves the major, minor and patch parts from the input `version`. 
Returns `None` if the
+   * input is not of a valid format.
+   *
+   * Examples of valid version:
+   *  - 1
+   *  - 2.4
+   *  - 3.2.2
+   *  - 3.2.2.4
+   *  - 3.3.1-SNAPSHOT
+   *  - 3.2.2.4SNAPSHOT (we only retrieve the first 3 components)
+   *
+   * Examples of invalid version:
+   *  - ABC
+   *  - 1X
+   *  - 2.4XYZ
+   *  - 2.4-SNAPSHOT
+   *  - 3.4.5ABC
+   *
+   *  @return A non-empty option containing a 3-value tuple (major, minor, 
patch) iff the
+   *          input is a valid version. `None` otherwise.
+   */
+  def majorMinorPatchVersion(version: String): Option[(Int, Int, Int)] = {
+    majorMinorPatchRegex.findFirstMatchIn(version) match {
+      case Some(m) =>
+        val major = m.group(1).toInt
+        val minor = Option(m.group(2)).map(_.toInt).getOrElse(0)
+        val patch = Option(m.group(3)).map(_.toInt).getOrElse(0)
+        Some((major, minor, patch))
+      case None => None

Review comment:
       Maybe a little cleaner to use `map`, the `None => None` is a little 
awkward:
   ```
       majorMinorPatchRegex.findFirstMatchIn(version).map { m =>
         val major = m.group(1).toInt
         val minor = Option(m.group(2)).map(_.toInt).getOrElse(0)
         val patch = Option(m.group(3)).map(_.toInt).getOrElse(0)
         (major, minor, patch)
       }
   ```




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to