liyunzhang_intel created PIG-5241:
-------------------------------------
Summary: Specify the hdfs path directly to spark and avoid the
unnecessary download and upload in SparkLauncher.java
Key: PIG-5241
URL: https://issues.apache.org/jira/browse/PIG-5241
Project: Pig
Issue Type: Sub-task
Reporter: liyunzhang_intel
//TODO: Specify the hdfs path directly to spark and avoid the unnecessary
download and upload in SparkLauncher.java
{code}
private void cacheFiles(String cacheFiles) throws IOException {
if (cacheFiles != null && !cacheFiles.isEmpty()) {
File tmpFolder = Files.createTempDirectory("cache").toFile();
tmpFolder.deleteOnExit();
for (String file : cacheFiles.split(",")) {
String fileName = extractFileName(file.trim());
Path src = new Path(extractFileUrl(file.trim()));
File tmpFile = new File(tmpFolder, fileName);
Path tmpFilePath = new Path(tmpFile.getAbsolutePath());
FileSystem fs = tmpFilePath.getFileSystem(jobConf);
//TODO: Specify the hdfs path directly to spark and avoid the
unnecessary download and upload in SparkLauncher.java
fs.copyToLocalFile(src, tmpFilePath);
tmpFile.deleteOnExit();
LOG.info(String.format("CacheFile:%s", fileName));
addResourceToSparkJobWorkingDirectory(tmpFile, fileName,
ResourceType.FILE);
}
}
}
{code}
--
This message was sent by Atlassian JIRA
(v6.3.15#6346)