xkrogen commented on a change in pull request #31203:
URL: https://github.com/apache/spark/pull/31203#discussion_r562903674
##########
File path:
sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
##########
@@ -88,31 +89,71 @@ private[hive] object IsolatedClientLoader extends Logging {
barrierPrefixes = barrierPrefixes)
}
- def hiveVersion(version: String): HiveVersion = version match {
- case "12" | "0.12" | "0.12.0" => hive.v12
- case "13" | "0.13" | "0.13.0" | "0.13.1" => hive.v13
- case "14" | "0.14" | "0.14.0" => hive.v14
- case "1.0" | "1.0.0" | "1.0.1" => hive.v1_0
- case "1.1" | "1.1.0" | "1.1.1" => hive.v1_1
- case "1.2" | "1.2.0" | "1.2.1" | "1.2.2" => hive.v1_2
- case "2.0" | "2.0.0" | "2.0.1" => hive.v2_0
- case "2.1" | "2.1.0" | "2.1.1" => hive.v2_1
- case "2.2" | "2.2.0" => hive.v2_2
- case "2.3" | "2.3.0" | "2.3.1" | "2.3.2" | "2.3.3" | "2.3.4" | "2.3.5" |
"2.3.6" | "2.3.7" |
- "2.3.8" => hive.v2_3
- case "3.0" | "3.0.0" => hive.v3_0
- case "3.1" | "3.1.0" | "3.1.1" | "3.1.2" => hive.v3_1
- case version =>
+ def hiveVersion(version: String): HiveVersion = {
+ getVersionParts(version).flatMap {
+ case (12, _, _) | (0, 12, _) => Some(hive.v12)
+ case (13, _, _) | (0, 13, _) => Some(hive.v13)
+ case (14, _, _) | (0, 14, _) => Some(hive.v14)
+ case (1, 0, _) => Some(hive.v1_0)
+ case (1, 1, _) => Some(hive.v1_1)
+ case (1, 2, _) => Some(hive.v1_2)
+ case (2, 0, _) => Some(hive.v2_0)
+ case (2, 1, _) => Some(hive.v2_1)
+ case (2, 2, _) => Some(hive.v2_2)
+ case (2, 3, _) => Some(hive.v2_3)
+ case (3, 0, _) => Some(hive.v3_0)
+ case (3, 1, _) => Some(hive.v3_1)
+ case _ => None
+ }.getOrElse {
throw new UnsupportedOperationException(s"Unsupported Hive Metastore
version ($version). " +
s"Please set ${HiveUtils.HIVE_METASTORE_VERSION.key} with a valid
version.")
+ }
+ }
+
+ def supportHadoopShadedClient(hadoopVersion: String): Boolean = {
+ getVersionParts(hadoopVersion).exists {
+ case (3, 2, v) if v >= 2 => true
+ case _ => false
Review comment:
maybe
```
case (maj, _, _) if maj > 3 => true
case (3, min, _) if min > 2 => true
case (3, 2, patch) if patch >=2 => true
```
Seems like we can reasonably assume that future versions of Hadoop will
support the shaded client?
##########
File path:
sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
##########
@@ -88,31 +89,71 @@ private[hive] object IsolatedClientLoader extends Logging {
barrierPrefixes = barrierPrefixes)
}
- def hiveVersion(version: String): HiveVersion = version match {
- case "12" | "0.12" | "0.12.0" => hive.v12
- case "13" | "0.13" | "0.13.0" | "0.13.1" => hive.v13
- case "14" | "0.14" | "0.14.0" => hive.v14
- case "1.0" | "1.0.0" | "1.0.1" => hive.v1_0
- case "1.1" | "1.1.0" | "1.1.1" => hive.v1_1
- case "1.2" | "1.2.0" | "1.2.1" | "1.2.2" => hive.v1_2
- case "2.0" | "2.0.0" | "2.0.1" => hive.v2_0
- case "2.1" | "2.1.0" | "2.1.1" => hive.v2_1
- case "2.2" | "2.2.0" => hive.v2_2
- case "2.3" | "2.3.0" | "2.3.1" | "2.3.2" | "2.3.3" | "2.3.4" | "2.3.5" |
"2.3.6" | "2.3.7" =>
- hive.v2_3
- case "3.0" | "3.0.0" => hive.v3_0
- case "3.1" | "3.1.0" | "3.1.1" | "3.1.2" => hive.v3_1
- case version =>
+ def hiveVersion(version: String): HiveVersion = {
Review comment:
Seems a good improvement to me, that table is pretty unsightly as-is.
##########
File path:
sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
##########
@@ -88,31 +89,71 @@ private[hive] object IsolatedClientLoader extends Logging {
barrierPrefixes = barrierPrefixes)
}
- def hiveVersion(version: String): HiveVersion = version match {
- case "12" | "0.12" | "0.12.0" => hive.v12
- case "13" | "0.13" | "0.13.0" | "0.13.1" => hive.v13
- case "14" | "0.14" | "0.14.0" => hive.v14
- case "1.0" | "1.0.0" | "1.0.1" => hive.v1_0
- case "1.1" | "1.1.0" | "1.1.1" => hive.v1_1
- case "1.2" | "1.2.0" | "1.2.1" | "1.2.2" => hive.v1_2
- case "2.0" | "2.0.0" | "2.0.1" => hive.v2_0
- case "2.1" | "2.1.0" | "2.1.1" => hive.v2_1
- case "2.2" | "2.2.0" => hive.v2_2
- case "2.3" | "2.3.0" | "2.3.1" | "2.3.2" | "2.3.3" | "2.3.4" | "2.3.5" |
"2.3.6" | "2.3.7" |
- "2.3.8" => hive.v2_3
- case "3.0" | "3.0.0" => hive.v3_0
- case "3.1" | "3.1.0" | "3.1.1" | "3.1.2" => hive.v3_1
- case version =>
+ def hiveVersion(version: String): HiveVersion = {
+ getVersionParts(version).flatMap {
+ case (12, _, _) | (0, 12, _) => Some(hive.v12)
+ case (13, _, _) | (0, 13, _) => Some(hive.v13)
+ case (14, _, _) | (0, 14, _) => Some(hive.v14)
+ case (1, 0, _) => Some(hive.v1_0)
+ case (1, 1, _) => Some(hive.v1_1)
+ case (1, 2, _) => Some(hive.v1_2)
+ case (2, 0, _) => Some(hive.v2_0)
+ case (2, 1, _) => Some(hive.v2_1)
+ case (2, 2, _) => Some(hive.v2_2)
+ case (2, 3, _) => Some(hive.v2_3)
+ case (3, 0, _) => Some(hive.v3_0)
+ case (3, 1, _) => Some(hive.v3_1)
+ case _ => None
+ }.getOrElse {
throw new UnsupportedOperationException(s"Unsupported Hive Metastore
version ($version). " +
s"Please set ${HiveUtils.HIVE_METASTORE_VERSION.key} with a valid
version.")
+ }
+ }
+
+ def supportHadoopShadedClient(hadoopVersion: String): Boolean = {
+ getVersionParts(hadoopVersion).exists {
+ case (3, 2, v) if v >= 2 => true
+ case _ => false
+ }
+ }
+
+ /**
+ * Retrieves the major, minor and patch parts from the input `version`.
Returns `None` if the
+ * input is not of a valid format.
+ *
+ * Examples of valid version:
+ * - 1
+ * - 2.4
+ * - 3.2.2
+ * - 3.2.2.4
+ * - 3.3.1-SNAPSHOT
+ * - 3.2.2.4SNAPSHOT (we only retrieve the first 3 components)
+ *
+ * Examples of invalid version:
+ * - ABC
+ * - 1X
+ * - 2.4XYZ
+ * - 2.4-SNAPSHOT
+ * - 3.4.5ABC
+ */
+ def getVersionParts(version: String): Option[(Int, Int, Int)] = {
+ val matcher = VERSION_PATTERN.matcher(version)
+ if (matcher.matches() && matcher.groupCount() == 3) {
+ val major = matcher.group(1).toInt
+ val minor = if (matcher.group(2) == null) 0 else matcher.group(2).toInt
Review comment:
Maybe:
```
val minor = Option(matcher.group(2)).map(_.toInt).getOrElse(0)
```
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]