This is an automated email from the ASF dual-hosted git repository.
fchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-celeborn.git
The following commit(s) were added to refs/heads/main by this push:
new 22f523537 [CELEBORN-1002] Add SBT MRClientProject
22f523537 is described below
commit 22f523537ec56f45a5e007938a85c841bb756ed3
Author: sychen <[email protected]>
AuthorDate: Sun Oct 8 10:03:21 2023 +0800
[CELEBORN-1002] Add SBT MRClientProject
### What changes were proposed in this pull request?
### Why are the changes needed?
```bash
./build/make-distribution.sh --sbt-enabled -Pmr
```
```bash
./build/make-distribution.sh --sbt-enabled --release
```
### Does this PR introduce _any_ user-facing change?
### How was this patch tested?
Closes #1930 from cxzl25/CELEBORN-1002.
Authored-by: sychen <[email protected]>
Signed-off-by: Fu Chen <[email protected]>
---
build/make-distribution.sh | 1 +
client-mr/mr-shaded/pom.xml | 1 +
project/CelebornBuild.scala | 99 ++++++++++++++++++++++++++++++++++++++++++++-
3 files changed, 100 insertions(+), 1 deletion(-)
diff --git a/build/make-distribution.sh b/build/make-distribution.sh
index 269808344..727600912 100755
--- a/build/make-distribution.sh
+++ b/build/make-distribution.sh
@@ -309,6 +309,7 @@ if [ "$SBT_ENABLED" == "true" ]; then
sbt_build_client -Pflink-1.14
sbt_build_client -Pflink-1.15
sbt_build_client -Pflink-1.17
+ sbt_build_client -Pmr
else
echo "build client with $@"
ENGINE_COUNT=0
diff --git a/client-mr/mr-shaded/pom.xml b/client-mr/mr-shaded/pom.xml
index 0ce68acc8..9ecb39b9f 100644
--- a/client-mr/mr-shaded/pom.xml
+++ b/client-mr/mr-shaded/pom.xml
@@ -81,6 +81,7 @@
<include>org.apache.commons:commons-lang3</include>
<include>org.scala-lang:scala-library</include>
<include>org.lz4:lz4-java</include>
+ <include>com.github.luben:zstd-jni</include>
<include>org.roaringbitmap:RoaringBitmap</include>
</includes>
</artifactSet>
diff --git a/project/CelebornBuild.scala b/project/CelebornBuild.scala
index 31d3c836e..e7600c2ed 100644
--- a/project/CelebornBuild.scala
+++ b/project/CelebornBuild.scala
@@ -71,6 +71,8 @@ object Dependencies {
val guava = "com.google.guava" % "guava" % guavaVersion
val hadoopClientApi = "org.apache.hadoop" % "hadoop-client-api" %
hadoopVersion
val hadoopClientRuntime = "org.apache.hadoop" % "hadoop-client-runtime" %
hadoopVersion
+ val hadoopMapreduceClientApp = "org.apache.hadoop" %
"hadoop-mapreduce-client-app" % hadoopVersion excludeAll (
+ ExclusionRule("com.google.guava", "guava"))
val ioDropwizardMetricsCore = "io.dropwizard.metrics" % "metrics-core" %
metricsVersion
val ioDropwizardMetricsGraphite = "io.dropwizard.metrics" %
"metrics-graphite" % metricsVersion
val ioDropwizardMetricsJvm = "io.dropwizard.metrics" % "metrics-jvm" %
metricsVersion
@@ -212,7 +214,7 @@ object CelebornBuild extends sbt.internal.BuildDef {
CelebornClient.client,
CelebornService.service,
CelebornWorker.worker,
- CelebornMaster.master) ++ maybeSparkClientModules ++
maybeFlinkClientModules
+ CelebornMaster.master) ++ maybeSparkClientModules ++
maybeFlinkClientModules ++ maybeMRClientModules
}
// ThisBuild / parallelExecution := false
@@ -267,6 +269,15 @@ object Utils {
lazy val maybeFlinkClientModules: Seq[Project] =
flinkClientProjects.map(_.modules).getOrElse(Seq.empty)
+ val MR_VERSION = profiles.filter(_.startsWith("mr")).headOption
+
+ lazy val mrClientProjects = MR_VERSION match {
+ case Some("mr") => Some(MRClientProjects)
+ case _ => None
+ }
+
+ lazy val maybeMRClientModules: Seq[Project] =
mrClientProjects.map(_.modules).getOrElse(Seq.empty)
+
def defaultScalaVersion(): String = {
// 1. Inherit the scala version of the spark project
// 2. if the spark profile not specified, using the DEFAULT_SCALA_VERSION
@@ -872,3 +883,89 @@ trait FlinkClientProjects {
)
}
}
+
+////////////////////////////////////////////////////////
+// MR Client //
+////////////////////////////////////////////////////////
+object MRClientProjects {
+
+ def mrClient: Project = {
+ Project("celeborn-client-mr", file("client-mr/mr"))
+ .dependsOn(CelebornCommon.common, CelebornClient.client)
+ .settings(
+ commonSettings,
+ libraryDependencies ++= Seq(
+ Dependencies.hadoopClientApi,
+ Dependencies.hadoopClientRuntime,
+ Dependencies.hadoopMapreduceClientApp
+ ) ++ commonUnitTestDependencies
+ )
+ }
+
+ def mrClientShade: Project = {
+ Project("celeborn-client-mr-shaded", file("client-mr/mr-shaded"))
+ .dependsOn(mrClient)
+ .settings(
+ commonSettings,
+
+ // align final shaded jar name with maven.
+ (assembly / assemblyJarName) := {
+ val extension = artifact.value.extension
+
s"${moduleName.value}_${scalaBinaryVersion.value}-${version.value}.$extension"
+ },
+
+ (assembly / test) := {},
+
+ (assembly / logLevel) := Level.Info,
+
+ // include `scala-library` from assembly.
+ (assembly / assemblyPackageScala / assembleArtifact) := true,
+
+ (assembly / assemblyExcludedJars) := {
+ val cp = (assembly / fullClasspath).value
+ cp filter { v =>
+ val name = v.data.getName
+ !(name.startsWith("celeborn-") ||
+ name.startsWith("protobuf-java-") ||
+ name.startsWith("guava-") ||
+ name.startsWith("netty-") ||
+ name.startsWith("commons-lang3-") ||
+ name.startsWith("RoaringBitmap-") ||
+ name.startsWith("lz4-java-") ||
+ name.startsWith("zstd-jni-") ||
+ name.startsWith("scala-library-"))
+ }
+ },
+
+ (assembly / assemblyShadeRules) := Seq(
+ ShadeRule.rename("com.google.protobuf.**" ->
"org.apache.celeborn.shaded.com.google.protobuf.@1").inAll,
+ ShadeRule.rename("com.google.common.**" ->
"org.apache.celeborn.shaded.com.google.common.@1").inAll,
+ ShadeRule.rename("io.netty.**" ->
"org.apache.celeborn.shaded.io.netty.@1").inAll,
+ ShadeRule.rename("org.apache.commons.**" ->
"org.apache.celeborn.shaded.org.apache.commons.@1").inAll,
+ ShadeRule.rename("org.roaringbitmap.**" ->
"org.apache.celeborn.shaded.org.roaringbitmap.@1").inAll
+ ),
+
+ (assembly / assemblyMergeStrategy) := {
+ case m if m.toLowerCase(Locale.ROOT).endsWith("manifest.mf") =>
MergeStrategy.discard
+ case m if m.startsWith("META-INF/license/") => MergeStrategy.discard
+ case m if m == "META-INF/LICENSE.txt" => MergeStrategy.discard
+ case m if m == "META-INF/NOTICE.txt" => MergeStrategy.discard
+ case m if m == "LICENSE.txt" => MergeStrategy.discard
+ case m if m == "NOTICE.txt" => MergeStrategy.discard
+ // Drop all proto files that are not needed as artifacts of the
build.
+ case m if m.toLowerCase(Locale.ROOT).endsWith(".proto") =>
MergeStrategy.discard
+ case m if
m.toLowerCase(Locale.ROOT).startsWith("meta-inf/native-image") =>
MergeStrategy.discard
+ // Drop netty jnilib
+ case m if m.toLowerCase(Locale.ROOT).endsWith(".jnilib") =>
MergeStrategy.discard
+ // rename netty native lib
+ case "META-INF/native/libnetty_transport_native_epoll_x86_64.so" =>
CustomMergeStrategy.rename(_ =>
"META-INF/native/liborg_apache_celeborn_shaded_netty_transport_native_epoll_x86_64.so")
+ case "META-INF/native/libnetty_transport_native_epoll_aarch_64.so"
=> CustomMergeStrategy.rename(_ =>
"META-INF/native/liborg_apache_celeborn_shaded_netty_transport_native_epoll_aarch_64.so")
+ case _ => MergeStrategy.first
+ }
+ )
+ }
+
+ def modules: Seq[Project] = {
+ Seq(mrClient, mrClientShade)
+ }
+}