attilapiros commented on a change in pull request #33550:
URL: https://github.com/apache/spark/pull/33550#discussion_r688908463
##########
File path:
resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala
##########
@@ -234,13 +234,25 @@ private[spark] object KubernetesConf {
def getKubernetesAppId(): String =
s"spark-${UUID.randomUUID().toString.replaceAll("-", "")}"
- def getResourceNamePrefix(appName: String): String = {
- val id = KubernetesUtils.uniqueID()
- s"$appName-$id"
- .trim
- .toLowerCase(Locale.ROOT)
- .replaceAll("[^a-z0-9\\-]", "-")
- .replaceAll("-+", "-")
+ def getResourceNamePrefix(
+ appName: String,
+ maxLength: Int = Int.MaxValue,
+ overLengthFunc: String => Unit = _ => {}): String = {
Review comment:
I think it is more clean just to add a `extends Logging` to `object
KubernetesConf` and passing an `Option[String]` instead of a function and call
the `logWarning` here when the option is defined.
##########
File path:
resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
##########
@@ -104,11 +105,14 @@ private[spark] class BasicExecutorFeatureStep(
// hostname must be no longer than `KUBERNETES_DNSNAME_MAX_LENGTH`(63)
characters,
// so take the last 63 characters of the pod name as the hostname.
// This preserves uniqueness since the end of name contains executorId
- val hostname = name.substring(Math.max(0, name.length -
KUBERNETES_DNSNAME_MAX_LENGTH))
- // Remove non-word characters from the start of the hostname
- .replaceAll("^[^\\w]+", "")
- // Replace dangerous characters in the remaining string with a safe
alternative.
- .replaceAll("[^\\w-]+", "_")
+ val hostname = if (executorPodNamePrefix.length >
KUBERNETES_POD_NAME_PREFIX_MAX_LENGTH) {
+ val mamePrefix = KubernetesConf.getResourceNamePrefix(
Review comment:
Calling again `getResourceNamePrefix` worries me as this second call
will call `uniqueID` again and the two IDs will be different. This makes hard
to match files produced/used by one run. Could you please check the resources
names too from this point of view, before and after this PR? I mean where the
prefix is used like
```
private def dtSecretName: String =
s"${kubernetesConf.resourceNamePrefix}-delegation-tokens"
private def ktSecretName: String =
s"${kubernetesConf.resourceNamePrefix}-kerberos-keytab"
private def hasKerberosConf: Boolean = krb5CMap.isDefined |
krb5File.isDefined
private def newConfigMapName: String =
s"${kubernetesConf.resourceNamePrefix}-krb5-file"
```
##########
File path:
resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/BasicTestsSuite.scala
##########
@@ -107,6 +107,24 @@ private[spark] trait BasicTestsSuite { k8sSuite:
KubernetesSuite =>
runSparkRemoteCheckAndVerifyCompletion(appArgs =
Array(REMOTE_PAGE_RANK_FILE_NAME))
}
}
+
+ test("SPARK-36321: Do not fail application in kubernetes if name is too
long", k8sTestTag) {
+ def executorPodCheck(executorPod: Pod): Unit = {
+ assert(executorPod.getMetadata.getName.startsWith("abbbbb"))
Review comment:
please go up to "-" in this `startsWith` and then cut out the part from
this `executorPod.getMetadata.getName` where the prefix and the unique ID is
and in the second assert use that string in the `startsWith`
##########
File path:
resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala
##########
@@ -70,8 +70,16 @@ private[spark] class KubernetesClusterManager extends
ExternalClusterManager wit
// If/when feature steps are executed in client mode, they should instead
take care of this,
// and this code should be removed.
if (!sc.conf.contains(KUBERNETES_EXECUTOR_POD_NAME_PREFIX)) {
- sc.conf.set(KUBERNETES_EXECUTOR_POD_NAME_PREFIX,
- KubernetesConf.getResourceNamePrefix(sc.conf.get("spark.app.name")))
+ def warnFunc(shortPrefix: String): Unit = {
+ logWarning(s"Use $shortPrefix as the executor pod's name prefix due to
" +
+ s"spark.app.name is too long. Please set
'${KUBERNETES_EXECUTOR_POD_NAME_PREFIX.key}' " +
+ s"if you need a custom executor pod's name prefix.")
+ }
+ val appName = sc.conf.get("spark.app.name")
+ val podNamePrefix = KubernetesConf.getResourceNamePrefix(
+ appName, KUBERNETES_POD_NAME_PREFIX_MAX_LENGTH, warnFunc)
+ assert(KubernetesUtils.isValidExecutorPodNamePrefix(podNamePrefix))
Review comment:
I do not think this assert is needed here when `getResourceNamePrefix`
is well tested.
##########
File path:
resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesConfSuite.scala
##########
@@ -198,4 +198,13 @@ class KubernetesConfSuite extends SparkFunSuite {
assert(driverConf.nodeSelector === CUSTOM_NODE_SELECTOR)
assert(driverConf.driverNodeSelector === CUSTOM_DRIVER_NODE_SELECTOR)
}
+
+ test("resourceName prefix") {
+ val name = "a" + "b" * 100 + "c"
+ assert(KubernetesConf.getResourceNamePrefix(name).startsWith(name + "-"))
+
+ val expected = "a" + "b" * (KUBERNETES_POD_NAME_PREFIX_MAX_LENGTH - 1 - 1
- 16)
+ assert(KubernetesConf.getResourceNamePrefix(name,
KUBERNETES_POD_NAME_PREFIX_MAX_LENGTH)
+ .startsWith(expected + "-"))
Review comment:
What about extending this test with the cases which covers the bounds
like when `getResourceNamePrefix` is called with a string which length
- is below `KUBERNETES_POD_NAME_PREFIX_MAX_LENGTH` with one char
- is the same as `KUBERNETES_POD_NAME_PREFIX_MAX_LENGTH`
- it is longer with one char
##########
File path:
resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
##########
@@ -104,11 +105,14 @@ private[spark] class BasicExecutorFeatureStep(
// hostname must be no longer than `KUBERNETES_DNSNAME_MAX_LENGTH`(63)
characters,
// so take the last 63 characters of the pod name as the hostname.
// This preserves uniqueness since the end of name contains executorId
- val hostname = name.substring(Math.max(0, name.length -
KUBERNETES_DNSNAME_MAX_LENGTH))
- // Remove non-word characters from the start of the hostname
- .replaceAll("^[^\\w]+", "")
- // Replace dangerous characters in the remaining string with a safe
alternative.
- .replaceAll("[^\\w-]+", "_")
+ val hostname = if (executorPodNamePrefix.length >
KUBERNETES_POD_NAME_PREFIX_MAX_LENGTH) {
+ val mamePrefix = KubernetesConf.getResourceNamePrefix(
+ kubernetesConf.sparkConf.get("spark.app.name"),
KUBERNETES_POD_NAME_PREFIX_MAX_LENGTH)
+ .replaceAll("[^\\w-]+", "_")
Review comment:
Does this `replaceAll` really needed?
As `getResourceNamePrefix` already replaced the non-word and non "-" chars
to "-":
https://github.com/apache/spark/blob/5beb51810dfec69964e570d90d0634e5a8e0499d/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala#L246
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]