svn commit: r29928 - in /dev/spark/3.0.0-SNAPSHOT-2018_10_07_12_02-ebd899b-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s

2018-10-07 Thread pwendell
Author: pwendell
Date: Sun Oct  7 19:17:10 2018
New Revision: 29928

Log:
Apache Spark 3.0.0-SNAPSHOT-2018_10_07_12_02-ebd899b docs


[This commit notification would consist of 1481 parts, 
which exceeds the limit of 50 ones, so it was shortened to the summary.]

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-25321][ML] Revert SPARK-14681 to avoid API breaking change

2018-10-07 Thread dongjoon
Repository: spark
Updated Branches:
  refs/heads/master 669ade3a8 -> ebd899b8a


[SPARK-25321][ML] Revert SPARK-14681 to avoid API breaking change

## What changes were proposed in this pull request?

This is the same as #22492 but for master branch. Revert SPARK-14681 to avoid 
API breaking changes.

cc: WeichenXu123

## How was this patch tested?

Existing unit tests.

Closes #22618 from mengxr/SPARK-25321.master.

Authored-by: WeichenXu 
Signed-off-by: Dongjoon Hyun 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ebd899b8
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ebd899b8
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ebd899b8

Branch: refs/heads/master
Commit: ebd899b8a865395e6f1137163cb508086696879b
Parents: 669ade3
Author: WeichenXu 
Authored: Sun Oct 7 10:06:44 2018 -0700
Committer: Dongjoon Hyun 
Committed: Sun Oct 7 10:06:44 2018 -0700

--
 .../classification/DecisionTreeClassifier.scala |  14 +-
 .../spark/ml/classification/GBTClassifier.scala |   6 +-
 .../classification/RandomForestClassifier.scala |   6 +-
 .../ml/regression/DecisionTreeRegressor.scala   |  13 +-
 .../spark/ml/regression/GBTRegressor.scala  |   6 +-
 .../ml/regression/RandomForestRegressor.scala   |   6 +-
 .../scala/org/apache/spark/ml/tree/Node.scala   | 247 ---
 .../spark/ml/tree/impl/RandomForest.scala   |  10 +-
 .../org/apache/spark/ml/tree/treeModels.scala   |  36 +--
 .../DecisionTreeClassifierSuite.scala   |  31 +--
 .../ml/classification/GBTClassifierSuite.scala  |   4 +-
 .../RandomForestClassifierSuite.scala   |   5 +-
 .../regression/DecisionTreeRegressorSuite.scala |  14 --
 .../spark/ml/tree/impl/RandomForestSuite.scala  |  22 +-
 .../apache/spark/ml/tree/impl/TreeTests.scala   |  12 +-
 project/MimaExcludes.scala  |   7 -
 16 files changed, 107 insertions(+), 332 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/ebd899b8/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
--
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
 
b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
index 8a57bfc..6648e78 100644
--- 
a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
@@ -168,7 +168,7 @@ object DecisionTreeClassifier extends 
DefaultParamsReadable[DecisionTreeClassifi
 @Since("1.4.0")
 class DecisionTreeClassificationModel private[ml] (
 @Since("1.4.0")override val uid: String,
-@Since("1.4.0")override val rootNode: ClassificationNode,
+@Since("1.4.0")override val rootNode: Node,
 @Since("1.6.0")override val numFeatures: Int,
 @Since("1.5.0")override val numClasses: Int)
   extends ProbabilisticClassificationModel[Vector, 
DecisionTreeClassificationModel]
@@ -181,7 +181,7 @@ class DecisionTreeClassificationModel private[ml] (
* Construct a decision tree classification model.
* @param rootNode  Root node of tree, with other nodes attached.
*/
-  private[ml] def this(rootNode: ClassificationNode, numFeatures: Int, 
numClasses: Int) =
+  private[ml] def this(rootNode: Node, numFeatures: Int, numClasses: Int) =
 this(Identifiable.randomUID("dtc"), rootNode, numFeatures, numClasses)
 
   override def predict(features: Vector): Double = {
@@ -279,9 +279,8 @@ object DecisionTreeClassificationModel extends 
MLReadable[DecisionTreeClassifica
   val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
   val numFeatures = (metadata.metadata \ "numFeatures").extract[Int]
   val numClasses = (metadata.metadata \ "numClasses").extract[Int]
-  val root = loadTreeNodes(path, metadata, sparkSession, isClassification 
= true)
-  val model = new DecisionTreeClassificationModel(metadata.uid,
-root.asInstanceOf[ClassificationNode], numFeatures, numClasses)
+  val root = loadTreeNodes(path, metadata, sparkSession)
+  val model = new DecisionTreeClassificationModel(metadata.uid, root, 
numFeatures, numClasses)
   metadata.getAndSetParams(model)
   model
 }
@@ -296,10 +295,9 @@ object DecisionTreeClassificationModel extends 
MLReadable[DecisionTreeClassifica
 require(oldModel.algo == OldAlgo.Classification,
   s"Cannot convert non-classification DecisionTreeModel (old API) to" +
 s" DecisionTreeClassificationModel (new API).  Algo is: 
${oldModel.algo}")
-val rootNode = Node.fromOld(oldModel.topNode, categoricalFeatures, 
isClassification = true)
+val rootNode = Node.fromOld(oldModel.topNode, 

spark git commit: [SPARK-25657][SQL][TEST] Refactor HashBenchmark to use main method

2018-10-07 Thread dongjoon
Repository: spark
Updated Branches:
  refs/heads/master b1328cc58 -> 669ade3a8


[SPARK-25657][SQL][TEST] Refactor HashBenchmark to use main method

## What changes were proposed in this pull request?

Refactor `HashBenchmark` to use main method.
1. use `spark-submit`:
```console
bin/spark-submit --class  org.apache.spark.sql.HashBenchmark --jars 
./core/target/spark-core_2.11-3.0.0-SNAPSHOT-tests.jar 
./sql/catalyst/target/spark-catalyst_2.11-3.0.0-SNAPSHOT-tests.jar
```

2. Generate benchmark result:
```console
SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "catalyst/test:runMain 
org.apache.spark.sql.HashBenchmark"
```

## How was this patch tested?
manual tests

Closes #22651 from wangyum/SPARK-25657.

Lead-authored-by: Yuming Wang 
Co-authored-by: Yuming Wang 
Co-authored-by: Dongjoon Hyun 
Signed-off-by: Dongjoon Hyun 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/669ade3a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/669ade3a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/669ade3a

Branch: refs/heads/master
Commit: 669ade3a8eed0016b5ece57d776cea0616417088
Parents: b1328cc
Author: Yuming Wang 
Authored: Sun Oct 7 09:49:37 2018 -0700
Committer: Dongjoon Hyun 
Committed: Sun Oct 7 09:49:37 2018 -0700

--
 .../benchmarks/HashBenchmark-results.txt|  70 +
 .../org/apache/spark/sql/HashBenchmark.scala| 152 +++
 2 files changed, 129 insertions(+), 93 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/669ade3a/sql/catalyst/benchmarks/HashBenchmark-results.txt
--
diff --git a/sql/catalyst/benchmarks/HashBenchmark-results.txt 
b/sql/catalyst/benchmarks/HashBenchmark-results.txt
new file mode 100644
index 000..2459b35
--- /dev/null
+++ b/sql/catalyst/benchmarks/HashBenchmark-results.txt
@@ -0,0 +1,70 @@
+
+single ints
+
+
+OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Hash For single ints:Best/Avg Time(ms)Rate(M/s)   Per 
Row(ns)   Relative
+
+interpreted version   5615 / 5616 95.6 
 10.5   1.0X
+codegen version   8400 / 8407 63.9 
 15.6   0.7X
+codegen version 64-bit8139 / 8145 66.0 
 15.2   0.7X
+codegen HiveHash version  7213 / 7348 74.4 
 13.4   0.8X
+
+
+
+single longs
+
+
+OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Hash For single longs:   Best/Avg Time(ms)Rate(M/s)   Per 
Row(ns)   Relative
+
+interpreted version   6053 / 6054 88.7 
 11.3   1.0X
+codegen version   9367 / 9369 57.3 
 17.4   0.6X
+codegen version 64-bit8041 / 8051 66.8 
 15.0   0.8X
+codegen HiveHash version  7546 / 7575 71.1 
 14.1   0.8X
+
+
+
+normal
+
+
+OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Hash For normal: Best/Avg Time(ms)Rate(M/s)   Per 
Row(ns)   Relative
+
+interpreted version   3181 / 3182  0.7
1517.0   1.0X
+codegen version   2403 / 2403  0.9
1145.7   1.3X
+codegen version 64-bit 915 /  916  2.3 
436.2   3.5X
+codegen HiveHash version  4505 / 4527  0.5
2148.3   0.7X
+
+
+
+array

spark git commit: [SPARK-25658][SQL][TEST] Refactor HashByteArrayBenchmark to use main method

2018-10-07 Thread dongjoon
Repository: spark
Updated Branches:
  refs/heads/master 3eb842969 -> b1328cc58


[SPARK-25658][SQL][TEST] Refactor HashByteArrayBenchmark to use main method

## What changes were proposed in this pull request?

Refactor `HashByteArrayBenchmark` to use main method.
1. use `spark-submit`:
```console
bin/spark-submit --class  org.apache.spark.sql.HashByteArrayBenchmark --jars 
./core/target/spark-core_2.11-3.0.0-SNAPSHOT-tests.jar 
./sql/catalyst/target/spark-catalyst_2.11-3.0.0-SNAPSHOT-tests.jar
```

2. Generate benchmark result:
```console
SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "catalyst/test:runMain 
org.apache.spark.sql.HashByteArrayBenchmark"
```

## How was this patch tested?

manual tests

Closes #22652 from wangyum/SPARK-25658.

Lead-authored-by: Yuming Wang 
Co-authored-by: Yuming Wang 
Co-authored-by: Dongjoon Hyun 
Signed-off-by: Dongjoon Hyun 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b1328cc5
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b1328cc5
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b1328cc5

Branch: refs/heads/master
Commit: b1328cc58ebb73bc191de5546735cffe0c68255e
Parents: 3eb8429
Author: Yuming Wang 
Authored: Sun Oct 7 09:44:01 2018 -0700
Committer: Dongjoon Hyun 
Committed: Sun Oct 7 09:44:01 2018 -0700

--
 .../HashByteArrayBenchmark-results.txt  |  77 
 .../spark/sql/HashByteArrayBenchmark.scala  | 120 ---
 2 files changed, 102 insertions(+), 95 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/b1328cc5/sql/catalyst/benchmarks/HashByteArrayBenchmark-results.txt
--
diff --git a/sql/catalyst/benchmarks/HashByteArrayBenchmark-results.txt 
b/sql/catalyst/benchmarks/HashByteArrayBenchmark-results.txt
new file mode 100644
index 000..a4304ee
--- /dev/null
+++ b/sql/catalyst/benchmarks/HashByteArrayBenchmark-results.txt
@@ -0,0 +1,77 @@
+
+Benchmark for MurMurHash 3 and xxHash64
+
+
+OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Hash byte arrays with length 8:  Best/Avg Time(ms)Rate(M/s)   Per 
Row(ns)   Relative
+
+Murmur3_x86_32  16 /   16127.7 
  7.8   1.0X
+xxHash 64-bit   23 /   23 90.7 
 11.0   0.7X
+HiveHasher  16 /   16134.8 
  7.4   1.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Hash byte arrays with length 16: Best/Avg Time(ms)Rate(M/s)   Per 
Row(ns)   Relative
+
+Murmur3_x86_32  26 /   26 79.5 
 12.6   1.0X
+xxHash 64-bit   26 /   27 79.3 
 12.6   1.0X
+HiveHasher  30 /   30 70.1 
 14.3   0.9X
+
+OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Hash byte arrays with length 24: Best/Avg Time(ms)Rate(M/s)   Per 
Row(ns)   Relative
+
+Murmur3_x86_32  36 /   36 58.1 
 17.2   1.0X
+xxHash 64-bit   30 /   30 70.2 
 14.2   1.2X
+HiveHasher  45 /   45 46.4 
 21.5   0.8X
+
+OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Hash byte arrays with length 31: Best/Avg Time(ms)Rate(M/s)   Per 
Row(ns)   Relative
+
+Murmur3_x86_32  50 /   50 41.8 
 23.9   1.0X
+xxHash 64-bit   43 /   43 49.3 
 20.3   1.2X
+HiveHasher  58 /   58 35.9 
 27.8   0.9X
+
+OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Hash byte arrays with length 95: 

spark git commit: [SPARK-25461][PYSPARK][SQL] Add document for mismatch between return type of Pandas.Series and return type of pandas udf

2018-10-07 Thread gurwls223
Repository: spark
Updated Branches:
  refs/heads/master fba722e31 -> 3eb842969


[SPARK-25461][PYSPARK][SQL] Add document for mismatch between return type of 
Pandas.Series and return type of pandas udf

## What changes were proposed in this pull request?

For Pandas UDFs, we get arrow type from defined Catalyst return data type of 
UDFs. We use this arrow type to do serialization of data. If the defined return 
data type doesn't match with actual return type of Pandas.Series returned by 
Pandas UDFs, it has a risk to return incorrect data from Python side.

Currently we don't have reliable approach to check if the data conversion is 
safe or not. We leave some document to notify this to users for now. When there 
is next upgrade of PyArrow available we can use to check it, we should add the 
option to check it.

## How was this patch tested?

Only document change.

Closes #22610 from viirya/SPARK-25461.

Authored-by: Liang-Chi Hsieh 
Signed-off-by: hyukjinkwon 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3eb84296
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3eb84296
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3eb84296

Branch: refs/heads/master
Commit: 3eb842969906d6e81a137af6dc4339881df0a315
Parents: fba722e
Author: Liang-Chi Hsieh 
Authored: Sun Oct 7 23:18:46 2018 +0800
Committer: hyukjinkwon 
Committed: Sun Oct 7 23:18:46 2018 +0800

--
 python/pyspark/sql/functions.py | 6 ++
 1 file changed, 6 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/3eb84296/python/pyspark/sql/functions.py
--
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 7685264..be089ee 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -2948,6 +2948,12 @@ def pandas_udf(f=None, returnType=None, 
functionType=None):
 can fail on special rows, the workaround is to incorporate the 
condition into the functions.
 
 .. note:: The user-defined functions do not take keyword arguments on the 
calling side.
+
+.. note:: The data type of returned `pandas.Series` from the user-defined 
functions should be
+matched with defined returnType (see :meth:`types.to_arrow_type` and
+:meth:`types.from_arrow_type`). When there is mismatch between them, 
Spark might do
+conversion on returned data. The conversion is not guaranteed to be 
correct and results
+should be checked for accuracy by users.
 """
 # decorator @pandas_udf(returnType, functionType)
 is_decorator = f is None or isinstance(f, (str, DataType))


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



svn commit: r29927 - in /dev/spark/3.0.0-SNAPSHOT-2018_10_07_08_02-fba722e-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s

2018-10-07 Thread pwendell
Author: pwendell
Date: Sun Oct  7 15:17:22 2018
New Revision: 29927

Log:
Apache Spark 3.0.0-SNAPSHOT-2018_10_07_08_02-fba722e docs


[This commit notification would consist of 1485 parts, 
which exceeds the limit of 50 ones, so it was shortened to the summary.]

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-25539][BUILD] Upgrade lz4-java to 1.5.0 get speed improvement

2018-10-07 Thread srowen
Repository: spark
Updated Branches:
  refs/heads/master 8bb242902 -> fba722e31


[SPARK-25539][BUILD] Upgrade lz4-java to 1.5.0 get speed improvement

## What changes were proposed in this pull request?

This PR upgrade `lz4-java` to 1.5.0 get speed improvement.

**General speed improvements**

LZ4 decompression speed has always been a strong point. In v1.8.2, this gets 
even better, as it improves decompression speed by about 10%, thanks in a large 
part to suggestion from svpv .

For example, on a Mac OS-X laptop with an Intel Core i7-5557U CPU  3.10GHz,
running lz4 -bsilesia.tar compiled with default compiler llvm v9.1.0:

Version | v1.8.1 | v1.8.2 | Improvement
-- | -- | -- | --
Decompression speed | 2490 MB/s | 2770 MB/s | +11%

Compression speeds also receive a welcomed boost, though improvement is not 
evenly distributed, with higher levels benefiting quite a lot more.

Version | v1.8.1 | v1.8.2 | Improvement
-- | -- | -- | --
lz4 -1 | 504 MB/s | 516 MB/s | +2%
lz4 -9 | 23.2 MB/s | 25.6 MB/s | +10%
lz4 -12 | 3.5 Mb/s | 9.5 MB/s | +170%

More details:
https://github.com/lz4/lz4/releases/tag/v1.8.3

**Below is my benchmark result**
set `spark.sql.parquet.compression.codec` to `lz4` and disable orc benchmark, 
then run `FilterPushdownBenchmark`.
lz4-java 1.5.0:
```
[success] Total time: 5585 s, completed Sep 26, 2018 5:22:16 PM
```
lz4-java 1.4.0:
```
[success] Total time: 5591 s, completed Sep 26, 2018 5:22:24 PM
```
Some benchmark result:
```
lz4-java 1.5.0 Select 1 row with 500 filters:   Best/Avg Time(ms)
Rate(M/s)   Per Row(ns)   Relative

Parquet Vectorized1953 / 1980  0.0  
1952502908.0   1.0X
Parquet Vectorized (Pushdown) 2541 / 2585  0.0  
2541019869.0   0.8X

lz4-java 1.4.0 Select 1 row with 500 filters:   Best/Avg Time(ms)
Rate(M/s)   Per Row(ns)   Relative

Parquet Vectorized1979 / 2103  0.0  
1979328144.0   1.0X
Parquet Vectorized (Pushdown) 2596 / 2909  0.0  
2596222118.0   0.8X
```
Complete benchmark result:
https://issues.apache.org/jira/secure/attachment/12941360/FilterPushdownBenchmark-lz4-java-140-results.txt
https://issues.apache.org/jira/secure/attachment/12941361/FilterPushdownBenchmark-lz4-java-150-results.txt

## How was this patch tested?

manual tests

Closes #22551 from wangyum/SPARK-25539.

Authored-by: Yuming Wang 
Signed-off-by: Sean Owen 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fba722e3
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fba722e3
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fba722e3

Branch: refs/heads/master
Commit: fba722e319e356113a69c54f59e23150017634ae
Parents: 8bb2429
Author: Yuming Wang 
Authored: Sun Oct 7 09:51:33 2018 -0500
Committer: Sean Owen 
Committed: Sun Oct 7 09:51:33 2018 -0500

--
 dev/deps/spark-deps-hadoop-2.6 | 2 +-
 dev/deps/spark-deps-hadoop-2.7 | 2 +-
 dev/deps/spark-deps-hadoop-3.1 | 2 +-
 pom.xml| 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/fba722e3/dev/deps/spark-deps-hadoop-2.6
--
diff --git a/dev/deps/spark-deps-hadoop-2.6 b/dev/deps/spark-deps-hadoop-2.6
index 22e86ef..e0e3e0a 100644
--- a/dev/deps/spark-deps-hadoop-2.6
+++ b/dev/deps/spark-deps-hadoop-2.6
@@ -138,7 +138,7 @@ libfb303-0.9.3.jar
 libthrift-0.9.3.jar
 log4j-1.2.17.jar
 logging-interceptor-3.8.1.jar
-lz4-java-1.4.0.jar
+lz4-java-1.5.0.jar
 machinist_2.11-0.6.1.jar
 macro-compat_2.11-1.1.1.jar
 mesos-1.4.0-shaded-protobuf.jar

http://git-wip-us.apache.org/repos/asf/spark/blob/fba722e3/dev/deps/spark-deps-hadoop-2.7
--
diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7
index 19dd786..3b17f88 100644
--- a/dev/deps/spark-deps-hadoop-2.7
+++ b/dev/deps/spark-deps-hadoop-2.7
@@ -139,7 +139,7 @@ libfb303-0.9.3.jar
 libthrift-0.9.3.jar
 log4j-1.2.17.jar
 logging-interceptor-3.8.1.jar
-lz4-java-1.4.0.jar
+lz4-java-1.5.0.jar
 machinist_2.11-0.6.1.jar
 macro-compat_2.11-1.1.1.jar
 mesos-1.4.0-shaded-protobuf.jar

http://git-wip-us.apache.org/repos/asf/spark/blob/fba722e3/dev/deps/spark-deps-hadoop-3.1
--
diff --git a/dev/deps/spark-deps-hadoop-3.1 b/dev/deps/spark-deps-hadoop-3.1
index ea0f487..c818b2c 100644
--- a/dev/deps/spark-deps-hadoop-3.1
+++ b/dev/deps/spark-deps-hadoop-3.1
@@