Repository: incubator-joshua Updated Branches: refs/heads/master 6da3961be -> d677bcddb
bugfixes related to class-based LMs Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/d677bcdd Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/d677bcdd Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/d677bcdd Branch: refs/heads/master Commit: d677bcddb7388ff78d81848703d953ceef5a58d5 Parents: 6da3961 Author: Matt Post <[email protected]> Authored: Tue Apr 19 15:36:54 2016 -0400 Committer: Matt Post <[email protected]> Committed: Tue Apr 19 15:36:54 2016 -0400 ---------------------------------------------------------------------- scripts/training/pipeline.pl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d677bcdd/scripts/training/pipeline.pl ---------------------------------------------------------------------- diff --git a/scripts/training/pipeline.pl b/scripts/training/pipeline.pl index b7b5f86..8629508 100755 --- a/scripts/training/pipeline.pl +++ b/scripts/training/pipeline.pl @@ -174,7 +174,7 @@ my $DO_BUILD_LM_FROM_CORPUS = 1; my $DO_BUILD_CLASS_LM = 0; my $CLASS_LM_CORPUS = undef; my $CLASS_MAP = undef; -my $CLASS_LM_ORDER = 5; +my $CLASS_LM_ORDER = 9; # whether to tokenize and lowercase training, tuning, and test data my $DO_PREPARE_CORPORA = 1; @@ -296,6 +296,7 @@ my $retval = GetOptions( "class-lm!" => \$DO_BUILD_CLASS_LM, "class-lm-corpus=s" => \$CLASS_LM_CORPUS, "class-map=s" => \$CLASS_MAP, + "class-lm-order=s" => \$CLASS_LM_ORDER, "optimizer-run=i" => \$OPTIMIZER_RUN, ); @@ -656,7 +657,7 @@ if (@CORPORA > 0) { # used for parsing if (exists $prefixes->{shortened}) { - $TRAIN{mixedcase} = "$DATA_DIRS{train}/$prefixes->{shortened}.$TARGET.gz"; + $TRAIN{mixedcase} = "$DATA_DIRS{train}/$prefixes->{shortened}.$TARGET"; } $TRAIN{prefix} = "$DATA_DIRS{train}/corpus"; @@ -1420,7 +1421,7 @@ for my $i (0..$#LMFILES) { } if ($DO_BUILD_CLASS_LM) { - push(@feature_functions, "LanguageModel -lm_type kenlm -lm_order 9 -lm_file $RUNDIR/class_lm.gz -class_map $CLASS_MAP"); + push(@feature_functions, "LanguageModel -lm_type kenlm -lm_order $CLASS_LM_ORDER -lm_file $RUNDIR/class_lm.gz -class_map $CLASS_MAP"); $weightstr .= "lm_$lm_index 1 "; }
