xkrogen commented on a change in pull request #31203: URL: https://github.com/apache/spark/pull/31203#discussion_r562903674
########## File path: sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala ########## @@ -88,31 +89,71 @@ private[hive] object IsolatedClientLoader extends Logging { barrierPrefixes = barrierPrefixes) } - def hiveVersion(version: String): HiveVersion = version match { - case "12" | "0.12" | "0.12.0" => hive.v12 - case "13" | "0.13" | "0.13.0" | "0.13.1" => hive.v13 - case "14" | "0.14" | "0.14.0" => hive.v14 - case "1.0" | "1.0.0" | "1.0.1" => hive.v1_0 - case "1.1" | "1.1.0" | "1.1.1" => hive.v1_1 - case "1.2" | "1.2.0" | "1.2.1" | "1.2.2" => hive.v1_2 - case "2.0" | "2.0.0" | "2.0.1" => hive.v2_0 - case "2.1" | "2.1.0" | "2.1.1" => hive.v2_1 - case "2.2" | "2.2.0" => hive.v2_2 - case "2.3" | "2.3.0" | "2.3.1" | "2.3.2" | "2.3.3" | "2.3.4" | "2.3.5" | "2.3.6" | "2.3.7" | - "2.3.8" => hive.v2_3 - case "3.0" | "3.0.0" => hive.v3_0 - case "3.1" | "3.1.0" | "3.1.1" | "3.1.2" => hive.v3_1 - case version => + def hiveVersion(version: String): HiveVersion = { + getVersionParts(version).flatMap { + case (12, _, _) | (0, 12, _) => Some(hive.v12) + case (13, _, _) | (0, 13, _) => Some(hive.v13) + case (14, _, _) | (0, 14, _) => Some(hive.v14) + case (1, 0, _) => Some(hive.v1_0) + case (1, 1, _) => Some(hive.v1_1) + case (1, 2, _) => Some(hive.v1_2) + case (2, 0, _) => Some(hive.v2_0) + case (2, 1, _) => Some(hive.v2_1) + case (2, 2, _) => Some(hive.v2_2) + case (2, 3, _) => Some(hive.v2_3) + case (3, 0, _) => Some(hive.v3_0) + case (3, 1, _) => Some(hive.v3_1) + case _ => None + }.getOrElse { throw new UnsupportedOperationException(s"Unsupported Hive Metastore version ($version). " + s"Please set ${HiveUtils.HIVE_METASTORE_VERSION.key} with a valid version.") + } + } + + def supportHadoopShadedClient(hadoopVersion: String): Boolean = { + getVersionParts(hadoopVersion).exists { + case (3, 2, v) if v >= 2 => true + case _ => false Review comment: maybe ``` case (maj, _, _) if maj > 3 => true case (3, min, _) if min > 2 => true case (3, 2, patch) if patch >=2 => true ``` Seems like we can reasonably assume that future versions of Hadoop will support the shaded client? ########## File path: sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala ########## @@ -88,31 +89,71 @@ private[hive] object IsolatedClientLoader extends Logging { barrierPrefixes = barrierPrefixes) } - def hiveVersion(version: String): HiveVersion = version match { - case "12" | "0.12" | "0.12.0" => hive.v12 - case "13" | "0.13" | "0.13.0" | "0.13.1" => hive.v13 - case "14" | "0.14" | "0.14.0" => hive.v14 - case "1.0" | "1.0.0" | "1.0.1" => hive.v1_0 - case "1.1" | "1.1.0" | "1.1.1" => hive.v1_1 - case "1.2" | "1.2.0" | "1.2.1" | "1.2.2" => hive.v1_2 - case "2.0" | "2.0.0" | "2.0.1" => hive.v2_0 - case "2.1" | "2.1.0" | "2.1.1" => hive.v2_1 - case "2.2" | "2.2.0" => hive.v2_2 - case "2.3" | "2.3.0" | "2.3.1" | "2.3.2" | "2.3.3" | "2.3.4" | "2.3.5" | "2.3.6" | "2.3.7" => - hive.v2_3 - case "3.0" | "3.0.0" => hive.v3_0 - case "3.1" | "3.1.0" | "3.1.1" | "3.1.2" => hive.v3_1 - case version => + def hiveVersion(version: String): HiveVersion = { Review comment: Seems a good improvement to me, that table is pretty unsightly as-is. ########## File path: sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala ########## @@ -88,31 +89,71 @@ private[hive] object IsolatedClientLoader extends Logging { barrierPrefixes = barrierPrefixes) } - def hiveVersion(version: String): HiveVersion = version match { - case "12" | "0.12" | "0.12.0" => hive.v12 - case "13" | "0.13" | "0.13.0" | "0.13.1" => hive.v13 - case "14" | "0.14" | "0.14.0" => hive.v14 - case "1.0" | "1.0.0" | "1.0.1" => hive.v1_0 - case "1.1" | "1.1.0" | "1.1.1" => hive.v1_1 - case "1.2" | "1.2.0" | "1.2.1" | "1.2.2" => hive.v1_2 - case "2.0" | "2.0.0" | "2.0.1" => hive.v2_0 - case "2.1" | "2.1.0" | "2.1.1" => hive.v2_1 - case "2.2" | "2.2.0" => hive.v2_2 - case "2.3" | "2.3.0" | "2.3.1" | "2.3.2" | "2.3.3" | "2.3.4" | "2.3.5" | "2.3.6" | "2.3.7" | - "2.3.8" => hive.v2_3 - case "3.0" | "3.0.0" => hive.v3_0 - case "3.1" | "3.1.0" | "3.1.1" | "3.1.2" => hive.v3_1 - case version => + def hiveVersion(version: String): HiveVersion = { + getVersionParts(version).flatMap { + case (12, _, _) | (0, 12, _) => Some(hive.v12) + case (13, _, _) | (0, 13, _) => Some(hive.v13) + case (14, _, _) | (0, 14, _) => Some(hive.v14) + case (1, 0, _) => Some(hive.v1_0) + case (1, 1, _) => Some(hive.v1_1) + case (1, 2, _) => Some(hive.v1_2) + case (2, 0, _) => Some(hive.v2_0) + case (2, 1, _) => Some(hive.v2_1) + case (2, 2, _) => Some(hive.v2_2) + case (2, 3, _) => Some(hive.v2_3) + case (3, 0, _) => Some(hive.v3_0) + case (3, 1, _) => Some(hive.v3_1) + case _ => None + }.getOrElse { throw new UnsupportedOperationException(s"Unsupported Hive Metastore version ($version). " + s"Please set ${HiveUtils.HIVE_METASTORE_VERSION.key} with a valid version.") + } + } + + def supportHadoopShadedClient(hadoopVersion: String): Boolean = { + getVersionParts(hadoopVersion).exists { + case (3, 2, v) if v >= 2 => true + case _ => false + } + } + + /** + * Retrieves the major, minor and patch parts from the input `version`. Returns `None` if the + * input is not of a valid format. + * + * Examples of valid version: + * - 1 + * - 2.4 + * - 3.2.2 + * - 3.2.2.4 + * - 3.3.1-SNAPSHOT + * - 3.2.2.4SNAPSHOT (we only retrieve the first 3 components) + * + * Examples of invalid version: + * - ABC + * - 1X + * - 2.4XYZ + * - 2.4-SNAPSHOT + * - 3.4.5ABC + */ + def getVersionParts(version: String): Option[(Int, Int, Int)] = { + val matcher = VERSION_PATTERN.matcher(version) + if (matcher.matches() && matcher.groupCount() == 3) { + val major = matcher.group(1).toInt + val minor = if (matcher.group(2) == null) 0 else matcher.group(2).toInt Review comment: Maybe: ``` val minor = Option(matcher.group(2)).map(_.toInt).getOrElse(0) ``` ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org