updated Hadoop commands, absolutizing aligner conf path
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/21eae7d0 Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/21eae7d0 Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/21eae7d0 Branch: refs/heads/master Commit: 21eae7d08b7e655d7f682b1cc64d5d9505e49743 Parents: dca7dba Author: Matt Post <[email protected]> Authored: Mon Mar 28 10:56:53 2016 -0400 Committer: Matt Post <[email protected]> Committed: Mon Mar 28 10:56:53 2016 -0400 ---------------------------------------------------------------------- scripts/training/pipeline.pl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/21eae7d0/scripts/training/pipeline.pl ---------------------------------------------------------------------- diff --git a/scripts/training/pipeline.pl b/scripts/training/pipeline.pl index b82691c..f43ee8d 100755 --- a/scripts/training/pipeline.pl +++ b/scripts/training/pipeline.pl @@ -323,6 +323,8 @@ if (! -x $NORMALIZER) { exit 1; } +my $ALIGNER_CONF = get_absolute_path($ALIGNER_CONF); + # capitalize these to offset a common error: $FIRST_STEP = uc($FIRST_STEP); $LAST_STEP = uc($LAST_STEP); @@ -1146,7 +1148,7 @@ if (! defined $GRAMMAR_FILE) { system("mv $thrax_file.tmp $thrax_file"); $cachepipe->cmd("thrax-run", - "$HADOOP/bin/hadoop jar $THRAX/bin/thrax.jar -D mapred.child.java.opts='-Xmx$HADOOP_MEM' -D hadoop.tmp.dir=$TMPDIR $thrax_file $THRAXDIR > thrax.log 2>&1; rm -f grammar grammar.gz; $HADOOP/bin/hadoop fs -getmerge $THRAXDIR/final/ grammar.gz", #; $HADOOP/bin/hadoop fs -rm -r $THRAXDIR", + "$HADOOP/bin/hadoop jar $THRAX/bin/thrax.jar -D mapreduce.task.timeout=0 -D mapreduce.map.java.opts='-Xmx$HADOOP_MEM' -D mapreduce.reduce.java.opts='-Xmx$HADOOP_MEM' -D hadoop.tmp.dir=$TMPDIR $thrax_file $THRAXDIR > thrax.log 2>&1; rm -f grammar grammar.gz; $HADOOP/bin/hadoop fs -getmerge $THRAXDIR/final/ grammar.gz", #; $HADOOP/bin/hadoop fs -rm -r $THRAXDIR", "$DATA_DIRS{train}/thrax-input-file", $thrax_file, "grammar.gz");
