Github user xuchuanyin commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2824#discussion_r227625617
--- Diff:
integration/spark2/src/main/scala/org/apache/spark/sql/execution/datasources/SparkCarbonTableFormat.scala
---
@@ -172,33 +172,8 @@ with Serializable {
dataSchema: StructType,
context: TaskAttemptContext): OutputWriter = {
val model =
CarbonTableOutputFormat.getLoadModel(context.getConfiguration)
- val isCarbonUseMultiDir =
CarbonProperties.getInstance().isUseMultiTempDir
- var storeLocation: Array[String] = Array[String]()
- val isCarbonUseLocalDir = CarbonProperties.getInstance()
- .getProperty("carbon.use.local.dir",
"false").equalsIgnoreCase("true")
-
-
val taskNumber = generateTaskNumber(path, context,
model.getSegmentId)
- val tmpLocationSuffix =
- File.separator + "carbon" + System.nanoTime() + File.separator +
taskNumber
- if (isCarbonUseLocalDir) {
- val yarnStoreLocations =
Util.getConfiguredLocalDirs(SparkEnv.get.conf)
- if (!isCarbonUseMultiDir && null != yarnStoreLocations &&
yarnStoreLocations.nonEmpty) {
- // use single dir
- storeLocation = storeLocation :+
-
(yarnStoreLocations(Random.nextInt(yarnStoreLocations.length)) +
tmpLocationSuffix)
- if (storeLocation == null || storeLocation.isEmpty) {
- storeLocation = storeLocation :+
- (System.getProperty("java.io.tmpdir") + tmpLocationSuffix)
- }
- } else {
- // use all the yarn dirs
- storeLocation = yarnStoreLocations.map(_ + tmpLocationSuffix)
- }
- } else {
- storeLocation =
- storeLocation :+ (System.getProperty("java.io.tmpdir") +
tmpLocationSuffix)
- }
+ val storeLocation = CommonUtil.getTempStoreLocations(taskNumber)
--- End diff --
@jackylk @QiangCai
I've debugged and reviewed the code again and found it works as expected:
all the emp locations were cleared.
The `TempStoreLocations` generated at the begining of data loading is just
the same as that at the closure of `CarbonTableOutputFormat` in which these
locations will be cleared.
---