http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/26dcdb67/joshua-core/src/test/java/org/apache/joshua/packed/test.sh ---------------------------------------------------------------------- diff --git a/joshua-core/src/test/java/org/apache/joshua/packed/test.sh b/joshua-core/src/test/java/org/apache/joshua/packed/test.sh deleted file mode 100644 index be6cf27..0000000 --- a/joshua-core/src/test/java/org/apache/joshua/packed/test.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# test the vocabulary -# javac VocabTest.java -# java -cp .:${JOSHUA}/bin VocabTest small_packed
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/26dcdb67/joshua-core/src/test/java/org/apache/joshua/system/LmOovFeatureTest.java ---------------------------------------------------------------------- diff --git a/joshua-core/src/test/java/org/apache/joshua/system/LmOovFeatureTest.java b/joshua-core/src/test/java/org/apache/joshua/system/LmOovFeatureTest.java index 9a72d67..df50309 100644 --- a/joshua-core/src/test/java/org/apache/joshua/system/LmOovFeatureTest.java +++ b/joshua-core/src/test/java/org/apache/joshua/system/LmOovFeatureTest.java @@ -20,32 +20,32 @@ import static org.testng.Assert.assertEquals; +import java.io.File; import java.io.IOException; import org.apache.joshua.decoder.Decoder; -import org.apache.joshua.decoder.JoshuaConfiguration; import org.apache.joshua.decoder.Translation; import org.apache.joshua.decoder.segment_file.Sentence; import org.testng.annotations.AfterMethod; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; +import com.typesafe.config.Config; +import com.typesafe.config.ConfigValueFactory; + public class LmOovFeatureTest { - private static final String CONFIG = "src/test/resources/lm_oov/joshua.config"; + private static final File CONFIG = new File("src/test/resources/lm_oov/joshua.config"); private static final String INPUT = "a chat-rooms full"; // expecting 2 lm oovs ('a' & 'full') and 2 grammar OOVs ('chat-rooms' & 'full') and score -198.000 private static final String EXPECTED_FEATURES = "pt_0=-2.000000 lm_0_oov=2.000000 lm_0=-206.718124 glue_0=3.000000 OOVPenalty=-200.000000 | -198.000"; - - private JoshuaConfiguration joshuaConfig = null; + + private static final Config FLAGS = Decoder.createDecoderFlagsFromFile(CONFIG).withValue("output_format", ConfigValueFactory.fromAnyRef("%f | %c")); private Decoder decoder = null; @BeforeMethod public void setUp() throws Exception { - joshuaConfig = new JoshuaConfiguration(); - joshuaConfig.readConfigFile(CONFIG); - joshuaConfig.outputFormat = "%f | %c"; - decoder = new Decoder(joshuaConfig); + decoder = new Decoder(FLAGS); } @AfterMethod @@ -62,7 +62,7 @@ public class LmOovFeatureTest { } private Translation decode(String input) { - final Sentence sentence = new Sentence(input, 0, joshuaConfig); + final Sentence sentence = new Sentence(input, 0, decoder.getDecoderConfig().getFlags()); return decoder.decode(sentence); } http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/26dcdb67/joshua-core/src/test/resources/decoder/left-state/joshua.config ---------------------------------------------------------------------- diff --git a/joshua-core/src/test/resources/decoder/left-state/joshua.config b/joshua-core/src/test/resources/decoder/left-state/joshua.config index f0b7ad2..2578732 100644 --- a/joshua-core/src/test/resources/decoder/left-state/joshua.config +++ b/joshua-core/src/test/resources/decoder/left-state/joshua.config @@ -1,47 +1,51 @@ -feature_function = StateMinimizingLanguageModel -lm_type kenlm -lm_order 5 -lm_file src/test/resources/decoder/left-state/lm.gz +feature_functions = [ + {class=StateMinimizingLanguageModel, lm_type=kenlm, lm_order=5, lm_file=src/test/resources/decoder/left-state/lm.gz}, + {class=WordPenalty}, + {class=OOVPenalty} +] -tm = thrax pt 12 src/test/resources/decoder/left-state/grammar.gz -tm = thrax glue -1 src/test/resources/decoder/left-state/glue-grammar +grammars = [ + {class=TextGrammar, owner=pt, span_limit=12, path=src/test/resources/decoder/left-state/grammar.gz}, + {class=TextGrammar, owner=glue, span_limit=-1, path=src/test/resources/decoder/left-state/glue-grammar} +] mark_oovs=false #tm config default_non_terminal=X -goalSymbol=GOAL +goal_symbol=GOAL #pruning config pop-limit=100 #nbest config use_unique_nbest=true -top-n = 300 +top_n = 300 -feature_function = WordPenalty -feature_function = OOVPenalty - -output-format = "%c %s" +output_format = "\"%c %s\"" # Model Weights #### - -lm_0 1.2373676802179452 -pt_0 2.4497429277910214 -pt_1 -0.7224581556224123 -pt_2 0.31689069155153504 -pt_3 -0.33861043967238036 -pt_4 -0.03553113401320236 -pt_5 -0.19138972284064748 -pt_6 -0.3417994095521415 -pt_7 0.9936312455671283 -pt_8 -0.9070737587091975 -pt_9 -0.8202511858619419 -pt_10 -0.2593091306160006 -pt_11 -0.25597137004462134 -pt_12 -0.3538894647790496 -pt_13 -0.36212061186692646 -pt_14 0.32923261148678096 -pt_15 -0.5524863522177359 -pt_16 -0.23451595442127693 -glue_0 -1 -WordPenalty -3.6942747832593694 -OOVPenalty 1.0 +weights = { + lm_0 = 1.2373676802179452 + pt_0 = 2.4497429277910214 + pt_1 = -0.7224581556224123 + pt_2 = 0.31689069155153504 + pt_3 = -0.33861043967238036 + pt_4 = -0.03553113401320236 + pt_5 = -0.19138972284064748 + pt_6 = -0.3417994095521415 + pt_7 = 0.9936312455671283 + pt_8 = -0.9070737587091975 + pt_9 = -0.8202511858619419 + pt_10 = -0.2593091306160006 + pt_11 = -0.25597137004462134 + pt_12 = -0.3538894647790496 + pt_13 = -0.36212061186692646 + pt_14 = 0.32923261148678096 + pt_15 = -0.5524863522177359 + pt_16 = -0.23451595442127693 + glue_0 = -1 + WordPenalty = -3.6942747832593694 + OOVPenalty = 1.0 +} http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/26dcdb67/joshua-core/src/test/resources/decoder/left-state/output.gold ---------------------------------------------------------------------- diff --git a/joshua-core/src/test/resources/decoder/left-state/output.gold b/joshua-core/src/test/resources/decoder/left-state/output.gold index 7c62979..7161f17 100644 --- a/joshua-core/src/test/resources/decoder/left-state/output.gold +++ b/joshua-core/src/test/resources/decoder/left-state/output.gold @@ -7,7 +7,7 @@ "-227.952 of rabindranath was born in a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-227.982 rabindranath was born in the a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-228.069 rabindranath was born in kolkata is one পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" -"-228.132 rabindranath was born in kolkata in a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" +"-228.133 rabindranath was born in kolkata in a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-228.160 rabindranath was born in kolkata one পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-228.266 rabindranath born in kolkata a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-228.386 rabindranath 's birth in kolkata is one পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" @@ -20,11 +20,11 @@ "-228.745 rabindranath was born in one পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-228.777 of rabindranath born in kolkata is a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-228.805 rabindranath 's birth of kolkata was a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" -"-228.806 rabindranath born in kolkata was a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" +"-228.807 rabindranath born in kolkata was a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-228.823 rabindranath born in the one পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-228.854 rabindranath born in kolkata 's a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-228.862 rabindranath 's birth was the a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" -"-228.863 rabindranath 's was born in kolkata is a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" +"-228.864 rabindranath 's was born in kolkata is a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-228.866 of rabindranath was born in kolkata a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-228.892 rabindranath was born in kolkata in পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-228.945 rabindranath 's birth in kolkata a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" @@ -34,7 +34,7 @@ "-229.107 rabindranath 's birth of kolkata was one পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-229.108 rabindranath born in kolkata was one পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-229.157 rabindranath 's birth was kolkata is a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" -"-229.157 rabindranath tagore was born in a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" +"-229.158 rabindranath tagore was born in a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-229.171 rabindranath was born kolkata is a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-229.182 rabindranath born in kolkata in a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-229.202 rabindranath 's was born in the a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" @@ -44,7 +44,7 @@ "-229.353 rabindranath 's was born in kolkata in a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-229.378 rabindranath 's birth was in a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-229.380 rabindranath 's was born in kolkata one পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" -"-229.383 rabindranath tagore was born in kolkata is a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" +"-229.384 rabindranath tagore was born in kolkata is a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-229.438 rabindranath 's birth in the one পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-229.469 rabindranath 's birth the kolkata is a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-229.485 rabindranath 's birth in kolkata was a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" @@ -53,7 +53,7 @@ "-229.503 rabindranath born in kolkata 's one পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-229.511 rabindranath tagore 's birth in kolkata is a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-229.533 rabindranath 's birth in kolkata 's a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" -"-229.631 of rabindranath was born in the a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" +"-229.632 of rabindranath was born in the a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-229.637 rabindranath 's was born in the one পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-229.663 rabindranath 's birth in kolkata is in a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-229.710 rabindranath was born in kolkata , a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" @@ -70,7 +70,7 @@ "-229.831 rabindranath 's birth was in kolkata a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-229.861 rabindranath 's birth in kolkata in a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-229.873 rabindranath tagore was born in kolkata in a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" -"-229.888 rabindranath 's birth in kolkata one পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" +"-229.889 rabindranath 's birth in kolkata one পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-229.892 rabindranath born in the in a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-229.900 rabindranath tagore was born in kolkata one পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-229.922 rabindranath born in in kolkata is a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" @@ -79,7 +79,7 @@ "-229.971 rabindranath born was the a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-229.975 rabindranath was born in kolkata 's a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-229.976 rabindranath 's birth kolkata was a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" -"-230.043 rabindranath born in kolkata 's in a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" +"-230.044 rabindranath born in kolkata 's in a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-230.048 rabindranath 's was born in kolkata is one পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-230.066 of rabindranath was born in the one পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-230.099 rabindranath was born the a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" @@ -149,12 +149,12 @@ "-230.750 rabindranath was born in the at one পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-230.760 rabindranath born in kolkata , a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-230.769 rabindranath 's birth in the kolkata a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" -"-230.774 rabindranath 's birth was in kolkata one পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" +"-230.775 rabindranath 's birth was in kolkata one পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-230.800 rabindranath born in in a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-230.803 rabindranath born in kolkata 's in পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-230.813 rabindranath was born in kolkata at a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-230.820 rabindranath born was in the a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" -"-230.820 of rabindranath was born kolkata is a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" +"-230.821 of rabindranath was born kolkata is a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-230.852 rabindranath 's birth was kolkata 's a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-230.861 rabindranath 's birth was is a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-230.865 rabindranath was born kolkata 's a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" @@ -171,7 +171,7 @@ "-230.956 the birth of rabindranath was in the a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-230.958 rabindranath 's birth was the in পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-230.966 rabindranath 's birth of kolkata was in a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" -"-230.967 rabindranath born in kolkata was in a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" +"-230.968 rabindranath born in kolkata was in a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-230.996 of rabindranath was born in kolkata is in a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-231.004 birth of rabindranath tagore was the a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-231.010 rabindranath 's birth in calcutta a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" @@ -186,8 +186,8 @@ "-231.089 rabindranath 's birth the in the a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-231.106 rabindranath born in in kolkata is one পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-231.108 of rabindranath born in kolkata 's a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" -"-231.114 rabindranath tagore 's birth of kolkata was a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" -"-231.120 rabindranath was born in a in পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" +"-231.115 rabindranath tagore 's birth of kolkata was a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" +"-231.121 rabindranath was born in a in পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-231.135 of rabindranath was born in the in a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-231.156 rabindranath born in the at one পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-231.163 rabindranath 's birth the kolkata 's a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" @@ -203,7 +203,7 @@ "-231.226 rabindranath tagore was born in the in a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-231.227 the born in kolkata is a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-231.238 of rabindranath born in kolkata is in a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" -"-231.254 rabindranath born was in the one পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" +"-231.255 rabindranath born was in the one পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-231.255 rabindranath tagore 's birth in kolkata a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-231.256 the was born in kolkata a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-231.267 rabindranath 's birth in the in পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" @@ -245,7 +245,7 @@ "-231.500 rabindranath 's birth was kolkata 's one পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-231.507 rabindranath 's birth was in kolkata in পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-231.508 the birth of rabindranath was kolkata a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" -"-231.511 rabindranath tagore 's was born in the a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" +"-231.512 rabindranath tagore 's was born in the a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-231.514 rabindranath was born kolkata 's one পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-231.520 birth of rabindranath tagore was in a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-231.524 rabindranath 's birth the in the one পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" @@ -257,7 +257,7 @@ "-231.607 rabindranath tagore 's birth was the one পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-231.622 rabindranath born in kolkata of পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-231.625 of rabindranath was born in kolkata 's a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" -"-231.631 rabindranath 's birth in in the one পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" +"-231.632 rabindranath 's birth in in the one পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-231.641 birth of rabindranath tagore was in kolkata is a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-231.646 rabindranath 's birth in kolkata was in a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-231.656 rabindranath 's born was kolkata is a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" @@ -288,13 +288,13 @@ "-231.795 rabindranath tagore 's birth in kolkata was a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-231.796 rabindranath 's born was the one পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-231.801 of rabindranath was born in a in kolkata পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" -"-231.804 of rabindranath was born of kolkata a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" -"-231.808 rabindranath tagore 's birth was in kolkata is a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" +"-231.805 of rabindranath was born of kolkata a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" +"-231.809 rabindranath tagore 's birth was in kolkata is a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-231.812 of rabindranath was born in kolkata at one পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-231.812 rabindranath 's birth the kolkata 's one পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-231.838 rabindranath 's born in kolkata is in পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-231.839 rabindranath tagore was born the a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" -"-231.841 rabindranath born in calcutta one পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" +"-231.842 rabindranath born in calcutta one পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-231.843 rabindranath tagore 's birth in kolkata 's a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-231.844 rabindranath 's was born in kolkata 's one পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" "-231.845 rabindranath tagore was born in kolkata is in a পিরালৠবà§à¦°à¦¾à¦¹à§à¦®à¦£ in the family" @@ -557,7 +557,7 @@ "-25.195 in recent times india 's along with united states relation between improved ." "-25.200 recently india with relation of united states improved ." "-25.201 recently indian along with united states relationship between improved ." -"-25.205 recently india 's with the united states relation between improved ." +"-25.204 recently india 's with the united states relation between improved ." "-25.206 recently the with the united states relation improved ." "-25.210 recently with relation between of united states with the improved ." "-25.212 recently has been with united states relationship between with the development ." @@ -597,4 +597,4 @@ "-25.340 in recent times of india along with united states relation improved ." "-25.345 in recent times india with relation between of united states improved ." "-25.348 during the recent time of india with united states relation improved ." -"-25.348 in recent times india with the united relationship between improved ." +"-25.348 in recent times india with the united relationship between improved ." \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/26dcdb67/joshua-core/src/test/resources/decoder/lowercaser/joshua.config ---------------------------------------------------------------------- diff --git a/joshua-core/src/test/resources/decoder/lowercaser/joshua.config b/joshua-core/src/test/resources/decoder/lowercaser/joshua.config index 6f5a46b..086311c 100644 --- a/joshua-core/src/test/resources/decoder/lowercaser/joshua.config +++ b/joshua-core/src/test/resources/decoder/lowercaser/joshua.config @@ -1,140 +1,17 @@ -# This file is a template for the Joshua pipeline; variables enclosed -# in <angle-brackets> are substituted by the pipeline script as -# appropriate. This file also serves to document Joshua's many -# parameters. - -# These are the grammar file specifications. Joshua supports an -# arbitrary number of grammar files, each specified on its own line -# using the following format: -# -# tm = TYPE OWNER LIMIT FILE -# -# TYPE is "packed", "thrax", or "samt". The latter denotes the format -# used in Zollmann and Venugopal's SAMT decoder -# (http://www.cs.cmu.edu/~zollmann/samt/). -# -# OWNER is the "owner" of the rules in the grammar; this is used to -# determine which set of phrasal features apply to the grammar's -# rules. Having different owners allows different features to be -# applied to different grammars, and for grammars to share features -# across files. -# -# LIMIT is the maximum input span permitted for the application of -# grammar rules found in the grammar file. A value of -1 implies no limit. -# -# FILE is the grammar file (or directory when using packed grammars). -# The file can be compressed with gzip, which is determined by the -# presence or absence of a ".gz" file extension. -# -# By a convention defined by Chiang (2007), the grammars are split -# into two files: the main translation grammar containing all the -# learned translation rules, and a glue grammar which supports -# monotonic concatenation of hierarchical phrases. The glue grammar's -# main distinction from the regular grammar is that the span limit -# does not apply to it. - -tm = hiero -maxspan 20 -path src/test/resources/decoder/lowercaser/grammar.test -owner pt -tm = thrax -path src/test/resources/decoder/lowercaser/grammar.glue -maxspan -1 -owner glue - -# This symbol is used over unknown words in the source language - -default-non-terminal = X - -# This is the goal nonterminal, used to determine when a complete -# parse is found. It should correspond to the root-level rules in the -# glue grammar. - -goal-symbol = GOAL - -# Language model config. -# -# Multiple language models are supported. For each language model, -# create one of the following lines: -# -# feature-function = LanguageModel -lm_type TYPE -lm_order ORDER -lm_file FILE -# feature-function = StateMinimizingLanguageModel -lm_order ORDER -lm_file FILE -# -# - TYPE is one of "kenlm" or "berkeleylm" -# - ORDER is the order of the language model (default 5) -# - FILE is the path to the LM file. This can be binarized if appropriate to the type -# (e.g., KenLM has a compiled format) -# -# A state-minimizing LM collapses left-state. Currently only KenLM supports this. -# -# For each LM, add a weight lm_INDEX below, where indexing starts from 0. - - - -# The suffix _OOV is appended to unknown source-language words if this -# is set to true. - -mark-oovs = false - -# The search algorithm: "cky" for hierarchical / phrase-based decoding, -# "stack" for phrase-based decoding -search = cky - -# The pop-limit for decoding. This determines how many hypotheses are -# considered over each span of the input. - -pop-limit = 100 - -# How many hypotheses to output - -top-n = 1 - -# Whether those hypotheses should be distinct strings - -use-unique-nbest = true - -# This is the default format of the ouput printed to STDOUT. The variables that can be -# substituted are: -# -# %i: the sentence number (0-indexed) -# %s: the translated sentence -# %t: the derivation tree -# %f: the feature string -# %c: the model cost - -output-format = %s - -# When printing the trees (%t in 'output-format'), this controls whether the alignments -# are also printed. - -include-align-index = false - -# And these are the feature functions to activate. -feature-function = OOVPenalty -feature-function = WordPenalty - -## Model weights ##################################################### - -# For each langage model line listed above, create a weight in the -# following format: the keyword "lm", a 0-based index, and the weight. -# lm_INDEX WEIGHT - - -# The phrasal weights correspond to weights stored with each of the -# grammar rules. The format is -# -# tm_OWNER_COLUMN WEIGHT -# -# where COLUMN denotes the 0-based order of the parameter in the -# grammar file and WEIGHT is the corresponding weight. In the future, -# we plan to add a sparse feature representation which will simplify -# this. - -# The wordpenalty feature counts the number of words in each hypothesis. - - -# This feature counts the number of unknown words in the hypothesis. - - -# This feature weights paths through an input lattice. It is only activated -# when decoding lattices. - -WordPenalty -4.72455379476569 -OOVPenalty 0.7897219562429866 -tm_pt_0 0.3137696816891433 -tm_glue_0 -0.04493059277470993 +feature_functions = [ + {class=WordPenalty}, + {class=OOVPenalty} +] + +grammars = [ + {class=TextGrammar, owner=pt, span_limit=20, path=src/test/resources/decoder/lowercaser/grammar.test}, + {class=TextGrammar, owner=glue, span_limit=-1, path=src/test/resources/decoder/lowercaser/grammar.glue} +] + +weights = { + WordPenalty = -4.72455379476569 + OOVPenalty = 0.7897219562429866 + tm_pt_0 = 0.3137696816891433 + tm_glue_0 = -0.04493059277470993 +} http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/26dcdb67/joshua-core/src/test/resources/lm_oov/joshua.config ---------------------------------------------------------------------- diff --git a/joshua-core/src/test/resources/lm_oov/joshua.config b/joshua-core/src/test/resources/lm_oov/joshua.config index 0a70b95..d62f094 100644 --- a/joshua-core/src/test/resources/lm_oov/joshua.config +++ b/joshua-core/src/test/resources/lm_oov/joshua.config @@ -1,17 +1,23 @@ -feature-function = LanguageModel -lm_type berkeleylm -lm_order 5 -lm_file src/test/resources/berkeley_lm/lm -oov_feature -tm = thrax -owner pt -maxspan 12 -path src/test/resources/kbest_extraction/grammar -tm = thrax -owner glue -maxspan -1 -path src/test/resources/kbest_extraction/glue-grammar +top_n = 0 -top-n = 0 +feature_functions = [ + {class=LanguageModel, lm_type=berkeleylm, lm_order=5, lm_file=src/test/resources/berkeley_lm/lm, oov_feature=true}, + {class=OOVPenalty} +] + +grammars = [ + {class=TextGrammar, owner=pt, span_limit=12, path=src/test/resources/kbest_extraction/grammar}, + {class=TextGrammar, owner=glue, span_limit=-1, path=src/test/resources/kbest_extraction/glue-grammar} +] -#feature_function = WordPenalty -feature_function = OOVPenalty # Model Weights #### -lm_0 0 -lm_0_oov 1 -OOVPenalty 1 -pt_0 0 -glue_0 0 +weights = { + lm_0 = 0 + lm_0_oov = 1 + OOVPenalty = 1 + pt_0 = 0 + glue_0 = 0 +} \ No newline at end of file