Hi,
I am trying to setup a workflow for execution on a cluster where each compute
node has access to the shared data directory for input and output files via
NFS. When running on Condor, I noticed the following files in the .openmole
directory:
total 23M
-rw-r--r-- 1 as12312 vip 511 May 1 01:48
f14f6be2-ea76-41aa-b714-f04766a2781b.condor
-rw-r--r-- 1 as12312 vip 39K May 1 01:49
f14f6be2-ea76-41aa-b714-f04766a2781b.err
-rw-r--r-- 1 as12312 vip 0 May 1 01:48
f14f6be2-ea76-41aa-b714-f04766a2781b.out
-rw-r--r-- 1 as12312 vip 2.5K May 1 01:48
job_2d5f861f-430f-4ee3-9ae1-cd1f435c1c7d.in
-rw-r--r-- 1 as12312 vip 9.9K May 1 01:48
job_6f09ff72-1707-46bf-b54b-eb5a7d79c298.tgz
-rw-r--r-- 1 as12312 vip 1.8K May 1 01:50
output_a6476ae9-fa21-4695-8ba3-81f034388077.txt
-rw-r--r-- 1 as12312 vip 557 May 1 01:50
result_2d220ac0-38ec-4213-9d9b-366fc50a01b0.xml.gz
-rw-r--r-- 1 as12312 vip 1.5K May 1 01:48
run_09ccc83c-3695-4720-b295-b6d55d627ff7.sh
-rw-r--r-- 1 as12312 vip 23M May 1 01:50
uplodedTar_5a736889-01e4-4ea7-bf0a-3225c8ebd659.tgz
As can be seen, the uploadedTar_[…].tgz file is rather large considering that
all input/output files are accessible via NFS. Looking at the content of the
archive (files/filesInfo.xml) suggests that it contains the 3D NIfTI volume
image files.
Why are these files even archived and uploaded to the remote when I use the
“link = true” option of “inputFiles” ?
Andreas
P.S.: For reference, here the semi-complete workflow:
val dofPath = join(dofRig, dofPre + refId + s",$${${srcId.name}}" +
dofSuf).getAbsolutePath
val logPath = join(logDir, dofRig.getName, refId + s",$${${srcId.name}}" +
logSuf).getAbsolutePath
val dofRelPath = relativize(Workspace.rootFS, dofPath)
val logRelPath = relativize(Workspace.rootFS, logPath)
val begin = EmptyTask() set (
name := "ComputeRigidTemplateDofsBegin",
inputs += (refIm, srcId, srcIm),
outputs += (refIm, srcId, srcIm, dof)
) source FileSource(dofPath, dof)
val regTask = ScalaTask(
s"""
| Config.parse(\"\"\"${Config()}\"\"\", "${Config().base}")
| val ${refIm.name} = FileUtil.join(workDir, "$refId$refSuf")
| val ${srcIm.name} = FileUtil.join(workDir, "$imgPre" + srcId +
"$imgSuf")
| val ${dof.name} = FileUtil.join(workDir, "rootfs", s"$dofRelPath")
| val ${log.name} = FileUtil.join(workDir, "rootfs", s"$logRelPath")
| IRTK.ireg(${refIm.name}, ${srcIm.name}, None, ${dof.name},
Some(${log.name}),
| "Transformation model" -> "Rigid",
| "Background value" -> $bgVal
| )
""".stripMargin) set (
name := "ComputeRigidTemplateDofs",
imports += ("com.andreasschuh.repeat.core.{Config, FileUtil,
IRTK}", "sys.process._"),
usedClasses += (Config.getClass, FileUtil.getClass, IRTK.getClass),
inputs += srcId,
inputFiles += (refIm, refId + refSuf, Workspace.shared),
inputFiles += (srcIm, imgPre + "${srcId}" + imgSuf, Workspace.shared),
outputs += (refIm, srcId, srcIm),
outputFiles += (join("rootfs", dofRelPath), dof),
outputFiles += (join("rootfs", logRelPath), log)
)
// If workspace is accessible by compute node, read/write files directly
without copy
if (Workspace.shared) {
Workspace.rootFS.mkdirs()
regTask.addResource(Workspace.rootFS, "rootfs", link = true, inWorkDir =
true)
}
// Otherwise, output files have to be copied to local workspace if not
shared
val reg = regTask hook (
CopyFileHook(dof, dofPath),
CopyFileHook(log, logPath)
)
val cond1 = s"${dof.name}.lastModified() > ${refIm.name}.lastModified()"
val cond2 = s"${dof.name}.lastModified() > ${srcIm.name}.lastModified()"
begin -- Skip(reg on Env.short by 10, cond1 + " && " + cond2)
_______________________________________________
OpenMOLE-users mailing list
[email protected]
http://fedex.iscpif.fr/mailman/listinfo/openmole-users