Github user skonto commented on a diff in the pull request: https://github.com/apache/spark/pull/11157#discussion_r60299848 --- Diff: core/src/main/scala/org/apache/spark/util/Utils.scala --- @@ -1978,57 +1978,134 @@ private[spark] object Utils extends Logging { } /** - * Attempt to start a service on the given port, or fail after a number of attempts. - * Each subsequent attempt uses 1 + the port used in the previous attempt (unless the port is 0). - * - * @param startPort The initial port to start the service on. - * @param startService Function to start service on a given port. - * This is expected to throw java.net.BindException on port collision. - * @param conf A SparkConf used to get the maximum number of retries when binding to a port. - * @param serviceName Name of the service. - * @return (service: T, port: Int) - */ - def startServiceOnPort[T]( - startPort: Int, + * Attempt to start a service on the given port, or fail after a number of attempts. + * Each subsequent attempt uses 1 + the port used in the previous attempt (unless the port is 0). + * It takes into consideration port restrictions through the env var AVAILABLE_PORTS + * + * @param startPort The initial port to start the service on. + * @param startService Function to start service on a given port. + * This is expected to throw java.net.BindException on port collision. + * @param conf A SparkConf used to get the maximum number of retries when binding to a port. + * @param serviceName Name of the service. + * @return (service: T, port: Int) + */ + def startServiceOnPort[T](startPort: Int, startService: Int => (T, Int), conf: SparkConf, serviceName: String = ""): (T, Int) = { + val serviceString = if (serviceName.isEmpty) "" else s" '$serviceName'" + + // define some helpers, they all share common data, maybe a service abstract class + // for all services could be a good fit here. + + def portRangeToList(ranges: String): List[(Long, Long)] = { + if (ranges == "") { + return List() + } + ranges.split(" ").map { r => val ret = r.substring(1, r.length - 1).split(",") + (ret(0).toLong, ret(1).toLong) + }.toList + } + + def startOnce(tryPort: Int): (Option[T], Int) = { + val serviceString = if (serviceName.isEmpty) "" else s" '$serviceName'" + try { + val (service, port) = startService(tryPort) + logInfo(s"Successfully started service$serviceString on port $port.") + (Some(service), port) + } catch { + case e: Exception if isBindCollision(e) => logWarning(s"Service$serviceString " + + s"could not bind on port $tryPort. ") + (None, -1) + } + } + + def retryPort(next: Int => Int, maxRetries: Int): (T, Int) = { + + for (offset <- 0 until maxRetries) { + val tryPort = next(offset) + try { + val (service, port) = startService(tryPort) + logInfo(s"Successfully started service$serviceString on port $port.") + return (service, port) + } catch { + case e: Exception if isBindCollision(e) => + if (offset >= maxRetries) { + val exceptionMessage = + s"${e.getMessage}: Service$serviceString failed after $maxRetries retries!" + val exception = new BindException(exceptionMessage) + // restore original stack trace + exception.setStackTrace(e.getStackTrace) + throw exception + } + logWarning(s"Service$serviceString could not bind on port $tryPort.") + } + } + // Should never happen + throw new SparkException(s"Failed to start service$serviceString on port $startPort") + } + + def startFromAvailable(rand: Boolean = false): (T, Int) = { + val ports = portRangeToList(sys.env.get("AVAILABLE_RAND_PORTS").get) --- End diff -- It is called when AVAILABLE_RAND_PORTS is set. So if spark.executor.port or spark.blockManager.port are set to a specific value then this will be used so no collision... Again... if they are not set.. startFromAvailable(true) -> retryPort(tryPort, maxRetries) will be called . The former shuffles the ports and pass them down to the latter. Also retryPort uses the old logic where each port will be tried until there is one free. Another approach is to preassign spark.executor.port,spark.blockManager.port with some offered values explicitly and carry those values with AVAILABLE_RAND_PORTS. This env variable is a clean way to do it i think without changing a lot the code base.
--- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org