Repository: incubator-joshua
Updated Branches:
  refs/heads/master 6da3961be -> d677bcddb


bugfixes related to class-based LMs


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/d677bcdd
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/d677bcdd
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/d677bcdd

Branch: refs/heads/master
Commit: d677bcddb7388ff78d81848703d953ceef5a58d5
Parents: 6da3961
Author: Matt Post <[email protected]>
Authored: Tue Apr 19 15:36:54 2016 -0400
Committer: Matt Post <[email protected]>
Committed: Tue Apr 19 15:36:54 2016 -0400

----------------------------------------------------------------------
 scripts/training/pipeline.pl | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d677bcdd/scripts/training/pipeline.pl
----------------------------------------------------------------------
diff --git a/scripts/training/pipeline.pl b/scripts/training/pipeline.pl
index b7b5f86..8629508 100755
--- a/scripts/training/pipeline.pl
+++ b/scripts/training/pipeline.pl
@@ -174,7 +174,7 @@ my $DO_BUILD_LM_FROM_CORPUS = 1;
 my $DO_BUILD_CLASS_LM = 0;
 my $CLASS_LM_CORPUS = undef;
 my $CLASS_MAP = undef;
-my $CLASS_LM_ORDER = 5;
+my $CLASS_LM_ORDER = 9;
 
 # whether to tokenize and lowercase training, tuning, and test data
 my $DO_PREPARE_CORPORA = 1;
@@ -296,6 +296,7 @@ my $retval = GetOptions(
   "class-lm!"     => \$DO_BUILD_CLASS_LM,
   "class-lm-corpus=s"   => \$CLASS_LM_CORPUS,
   "class-map=s"     => \$CLASS_MAP,
+  "class-lm-order=s"     => \$CLASS_LM_ORDER,
   "optimizer-run=i" => \$OPTIMIZER_RUN,
 );
 
@@ -656,7 +657,7 @@ if (@CORPORA > 0) {
 
   # used for parsing
   if (exists $prefixes->{shortened}) {
-    $TRAIN{mixedcase} = "$DATA_DIRS{train}/$prefixes->{shortened}.$TARGET.gz";
+    $TRAIN{mixedcase} = "$DATA_DIRS{train}/$prefixes->{shortened}.$TARGET";
   }
 
   $TRAIN{prefix} = "$DATA_DIRS{train}/corpus";
@@ -1420,7 +1421,7 @@ for my $i (0..$#LMFILES) {
 }
 
 if ($DO_BUILD_CLASS_LM) {
-  push(@feature_functions, "LanguageModel -lm_type kenlm -lm_order 9 -lm_file 
$RUNDIR/class_lm.gz -class_map $CLASS_MAP");
+  push(@feature_functions, "LanguageModel -lm_type kenlm -lm_order 
$CLASS_LM_ORDER -lm_file $RUNDIR/class_lm.gz -class_map $CLASS_MAP");
   $weightstr .= "lm_$lm_index 1 ";
 }
 

Reply via email to