http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java ---------------------------------------------------------------------- diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java b/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java index 74770c5..6e9ef23 100644 --- a/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java +++ b/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java @@ -19,11 +19,11 @@ package org.apache.joshua.decoder.phrase; import java.io.File; -import java.io.IOException; import java.util.List; +import java.util.Optional; import org.apache.joshua.corpus.Vocabulary; -import org.apache.joshua.decoder.JoshuaConfiguration; +import org.apache.joshua.decoder.DecoderConfig; import org.apache.joshua.decoder.ff.FeatureFunction; import org.apache.joshua.decoder.ff.FeatureVector; import org.apache.joshua.decoder.ff.tm.Grammar; @@ -31,52 +31,37 @@ import org.apache.joshua.decoder.ff.tm.OwnerId; import org.apache.joshua.decoder.ff.tm.Rule; import org.apache.joshua.decoder.ff.tm.RuleCollection; import org.apache.joshua.decoder.ff.tm.Trie; -import org.apache.joshua.decoder.ff.tm.hash_based.MemoryBasedBatchGrammar; +import org.apache.joshua.decoder.ff.tm.hash_based.TextGrammar; import org.apache.joshua.decoder.ff.tm.packed.PackedGrammar; +import com.typesafe.config.Config; +import com.typesafe.config.ConfigValueFactory; + /** * Represents a phrase table, and is implemented as a wrapper around either a {@link PackedGrammar} - * or a {@link MemoryBasedBatchGrammar}. + * or a {@link TextGrammar}. * * TODO: this should all be implemented as a two-level trie (source trie and target trie). */ public class PhraseTable implements Grammar { - private final JoshuaConfiguration config; - private Grammar backend; + private final Grammar backend; + private final Optional<String> path; /** * Chain to the super with a number of defaults. For example, we only use a single nonterminal, * and there is no span limit. - * - * @param grammarFile file path parent directory - * @param owner used to set phrase owners - * @param type the grammar specification keyword (e.g., "thrax" or "moses") - * @param config a populated {@link org.apache.joshua.decoder.JoshuaConfiguration} - * @throws IOException if there is an error reading the grammar file */ - public PhraseTable(String grammarFile, String owner, String type, JoshuaConfiguration config) - throws IOException { - this.config = config; - int spanLimit = 0; - - if (grammarFile != null && new File(grammarFile).isDirectory()) { - this.backend = new PackedGrammar(grammarFile, spanLimit, owner, type, config); - if (this.backend.getMaxSourcePhraseLength() == -1) { - String msg = "FATAL: Using a packed grammar for a phrase table backend requires that you " - + "packed the grammar with Joshua 6.0.2 or greater"; - throw new RuntimeException(msg); - } - + public PhraseTable(Config config) { + // override span_limit to 0 + final Config newConfig = config.withValue("span_limit", ConfigValueFactory.fromAnyRef(0)); + this.path = newConfig.hasPath("path") ? Optional.of(newConfig.getString("path")) : Optional.empty(); + if (path.isPresent() && new File(path.get()).isDirectory()) { + this.backend = new PackedGrammar(newConfig); } else { - this.backend = new MemoryBasedBatchGrammar(type, grammarFile, owner, "[X]", spanLimit, config); + this.backend = new TextGrammar(newConfig); } } - - public PhraseTable(String owner, JoshuaConfiguration config) { - this.config = config; - this.backend = new MemoryBasedBatchGrammar(owner, config, 20); - } /** * Returns the longest source phrase read. @@ -119,10 +104,10 @@ public class PhraseTable implements Grammar { } @Override - public void addOOVRules(int sourceWord, List<FeatureFunction> featureFunctions) { + public void addOOVRules(int sourceWord, DecoderConfig config) { // TODO: _OOV shouldn't be outright added, since the word might not be OOV for the LM (but now almost // certainly is) - int targetWord = config.mark_oovs + int targetWord = config.getFlags().getBoolean("mark_oovs") ? Vocabulary.id(Vocabulary.word(sourceWord) + "_OOV") : sourceWord; @@ -135,7 +120,7 @@ public class PhraseTable implements Grammar { new FeatureVector(0), new byte[] {0,0}, backend.getOwner()); addRule(oovRule); - oovRule.estimateRuleCost(featureFunctions); + oovRule.estimateRuleCost(config.getFeatureFunctions()); } @Override
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/Stack.java ---------------------------------------------------------------------- diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/Stack.java b/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/Stack.java index b867d10..b972834 100644 --- a/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/Stack.java +++ b/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/Stack.java @@ -25,7 +25,6 @@ import java.util.HashSet; import java.util.PriorityQueue; import java.util.Set; -import org.apache.joshua.decoder.JoshuaConfiguration; import org.apache.joshua.decoder.ff.tm.Rule; import org.apache.joshua.decoder.segment_file.Sentence; import org.slf4j.Logger; @@ -43,8 +42,8 @@ public class Stack extends ArrayList<Hypothesis> { private final HashMap<Coverage, ArrayList<Hypothesis>> coverages; - private Sentence sentence; - private JoshuaConfiguration config; + private final Sentence sentence; + private final int popLimit; /* The list of states we've already visited. */ private final HashSet<Candidate> visitedStates; @@ -61,9 +60,9 @@ public class Stack extends ArrayList<Hypothesis> { * @param sentence input for a {@link org.apache.joshua.lattice.Lattice} * @param config populated {@link org.apache.joshua.decoder.JoshuaConfiguration} */ - public Stack(Sentence sentence, JoshuaConfiguration config) { + public Stack(Sentence sentence, int popLimit) { this.sentence = sentence; - this.config = config; + this.popLimit = popLimit; this.candidates = new PriorityQueue<Candidate>(1); this.coverages = new HashMap<Coverage, ArrayList<Hypothesis>>(); @@ -175,7 +174,7 @@ public class Stack extends ArrayList<Hypothesis> { * candidate. */ public void search() { - int to_pop = config.pop_limit; + int to_pop = popLimit; if (LOG.isDebugEnabled()) { LOG.debug("Stack::search(): pop: {} size: {}", to_pop, candidates.size()); http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java ---------------------------------------------------------------------- diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java b/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java index 8fae284..d4f09a0 100644 --- a/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java +++ b/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java @@ -36,16 +36,14 @@ package org.apache.joshua.decoder.phrase; */ import static org.apache.joshua.decoder.chart_parser.ComputeNodeResult.computeNodeResult; -import static org.apache.joshua.decoder.ff.tm.OwnerMap.UNKNOWN_OWNER; import java.util.ArrayList; import java.util.List; -import org.apache.joshua.decoder.JoshuaConfiguration; +import org.apache.joshua.decoder.DecoderConfig; import org.apache.joshua.decoder.chart_parser.ComputeNodeResult; import org.apache.joshua.decoder.chart_parser.NodeResult; import org.apache.joshua.decoder.ff.FeatureFunction; -import org.apache.joshua.decoder.ff.tm.AbstractGrammar; import org.apache.joshua.decoder.ff.tm.Grammar; import org.apache.joshua.decoder.hypergraph.HGNode; import org.apache.joshua.decoder.hypergraph.HyperEdge; @@ -54,6 +52,8 @@ import org.apache.joshua.decoder.segment_file.Sentence; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.google.common.collect.ImmutableList; + public class Stacks { private static final Logger LOG = LoggerFactory.getLogger(Stacks.class); @@ -63,16 +63,19 @@ public class Stacks { // The end state private Hypothesis end; - - final List<FeatureFunction> featureFunctions; private final Sentence sentence; - private final JoshuaConfiguration config; + private final DecoderConfig config; + private final ImmutableList<FeatureFunction> featureFunctions; /* Contains all the phrase tables */ private final PhraseChart chart; + private final int reorderingLimit; + private final int numTranslationOptions; + private final int popLimit; + /** * Entry point. Initialize everything. Create pass-through (OOV) phrase table and glue phrase * table (with start-of-sentence and end-of-sentence rules). @@ -82,33 +85,27 @@ public class Stacks { * @param grammars an array of {@link org.apache.joshua.decoder.ff.tm.Grammar}'s * @param config a populated {@link org.apache.joshua.decoder.JoshuaConfiguration} */ - public Stacks(Sentence sentence, List<FeatureFunction> featureFunctions, Grammar[] grammars, - JoshuaConfiguration config) { + public Stacks(Sentence sentence, DecoderConfig config) { this.sentence = sentence; - this.featureFunctions = featureFunctions; this.config = config; + this.featureFunctions = config.getFeatureFunctions(); + this.popLimit = config.getFlags().getInt("pop_limit"); + this.numTranslationOptions = config.getFlags().getInt("num_translation_options"); + this.reorderingLimit = config.getFlags().getInt("reordering_limit"); - int num_phrase_tables = 0; - for (Grammar grammar : grammars) - if (grammar instanceof PhraseTable) - ++num_phrase_tables; - - PhraseTable[] phraseTables = new PhraseTable[num_phrase_tables + 2]; - for (int i = 0, j = 0; i < grammars.length; i++) - if (grammars[i] instanceof PhraseTable) - phraseTables[j++] = (PhraseTable) grammars[i]; - - phraseTables[phraseTables.length - 2] = new PhraseTable(UNKNOWN_OWNER, config); - phraseTables[phraseTables.length - 2].addRule(Hypothesis.END_RULE); + // collect grammars that are phrase tables + final ImmutableList.Builder<PhraseTable> phraseTablesBuilder = new ImmutableList.Builder<>(); + for (Grammar grammar : config.getGrammars()) { + if (grammar instanceof PhraseTable) { + phraseTablesBuilder.add((PhraseTable) grammar); + } + } - phraseTables[phraseTables.length - 1] = new PhraseTable("oov", config); - AbstractGrammar.addOOVRules(phraseTables[phraseTables.length - 1], sentence.getLattice(), featureFunctions, config.true_oovs_only); + this.chart = new PhraseChart(phraseTablesBuilder.build(), config, sentence, numTranslationOptions); - this.chart = new PhraseChart(phraseTables, featureFunctions, sentence, config.num_translation_options); } - /** * The main algorithm. Returns a hypergraph representing the search space. * @@ -125,15 +122,15 @@ public class Stacks { stacks.add(null); // Initialize root hypothesis with <s> context and future cost for everything. - NodeResult result = computeNodeResult(this.featureFunctions, Hypothesis.BEGIN_RULE, + NodeResult result = computeNodeResult(config, Hypothesis.BEGIN_RULE, null, -1, 1, null, this.sentence); - Stack firstStack = new Stack(sentence, config); + Stack firstStack = new Stack(sentence, popLimit); firstStack.add(new Hypothesis(result.getDPStates(), future.Full())); stacks.add(firstStack); // Decode with increasing numbers of source words. for (int source_words = 2; source_words <= sentence.length(); ++source_words) { - Stack targetStack = new Stack(sentence, config); + Stack targetStack = new Stack(sentence, popLimit); stacks.add(targetStack); // Iterate over stacks to continue from. @@ -159,7 +156,7 @@ public class Stacks { int begin = coverage.firstZero(); // the absolute position of the ending spot of the last possible phrase - int last_end = Math.min(coverage.firstZero() + config.reordering_limit, chart.SentenceLength()); + int last_end = Math.min(coverage.firstZero() + reorderingLimit, chart.SentenceLength()); int last_begin = (last_end > phrase_length) ? (last_end - phrase_length) : 0; for (begin = coverage.firstZero(); begin <= last_begin; begin++) { @@ -192,7 +189,7 @@ public class Stacks { * phrases from that span. The hypotheses are wrapped in HypoState objects, which * augment the hypothesis score with a future cost. */ - Candidate cand = new Candidate(featureFunctions, sentence, hypotheses, phrases, future_delta, new int[] {0, 0}); + Candidate cand = new Candidate(config, sentence, hypotheses, phrases, future_delta, new int[] {0, 0}); targetStack.addCandidate(cand); } } @@ -229,7 +226,7 @@ public class Stacks { private boolean permissible(Coverage coverage, int begin, int end) { int firstZero = coverage.firstZero(); - if (config.reordering_limit < 0) + if (reorderingLimit < 0) return true; /* We can always start with the first zero since it doesn't create a reordering gap @@ -240,7 +237,7 @@ public class Stacks { /* If a gap is created by applying this phrase, make sure that you can reach the first * zero later on without violating the distortion constraint. */ - return end - firstZero <= config.reordering_limit; + return end - firstZero <= reorderingLimit; } http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/segment_file/ParseTreeInput.java ---------------------------------------------------------------------- diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/segment_file/ParseTreeInput.java b/joshua-core/src/main/java/org/apache/joshua/decoder/segment_file/ParseTreeInput.java index b9b1896..d5879da 100644 --- a/joshua-core/src/main/java/org/apache/joshua/decoder/segment_file/ParseTreeInput.java +++ b/joshua-core/src/main/java/org/apache/joshua/decoder/segment_file/ParseTreeInput.java @@ -18,12 +18,12 @@ */ package org.apache.joshua.decoder.segment_file; -import org.apache.joshua.decoder.JoshuaConfiguration; +import com.typesafe.config.Config; public class ParseTreeInput extends Sentence { - public ParseTreeInput(String input, int id, JoshuaConfiguration joshuaConfiguration) { - super(input, id,joshuaConfiguration); + public ParseTreeInput(String input, int id, Config config) { + super(input, id, config); } // looks_like_parse_tree = sentence.sentence().matches("^\\(+[A-Z]+ .*"); http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/segment_file/ParsedSentence.java ---------------------------------------------------------------------- diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/segment_file/ParsedSentence.java b/joshua-core/src/main/java/org/apache/joshua/decoder/segment_file/ParsedSentence.java index a97718e..c2af704 100644 --- a/joshua-core/src/main/java/org/apache/joshua/decoder/segment_file/ParsedSentence.java +++ b/joshua-core/src/main/java/org/apache/joshua/decoder/segment_file/ParsedSentence.java @@ -21,14 +21,15 @@ package org.apache.joshua.decoder.segment_file; import org.apache.joshua.corpus.Vocabulary; import org.apache.joshua.corpus.syntax.ArraySyntaxTree; import org.apache.joshua.corpus.syntax.SyntaxTree; -import org.apache.joshua.decoder.JoshuaConfiguration; + +import com.typesafe.config.Config; public class ParsedSentence extends Sentence { private SyntaxTree syntaxTree = null; - public ParsedSentence(String input, int id,JoshuaConfiguration joshuaConfiguration) { - super(input, id, joshuaConfiguration); + public ParsedSentence(String input, int id, Config config) { + super(input, id, config); } public int[] getWordIDs() { http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/segment_file/Sentence.java ---------------------------------------------------------------------- diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/segment_file/Sentence.java b/joshua-core/src/main/java/org/apache/joshua/decoder/segment_file/Sentence.java index f84c41a..5926433 100644 --- a/joshua-core/src/main/java/org/apache/joshua/decoder/segment_file/Sentence.java +++ b/joshua-core/src/main/java/org/apache/joshua/decoder/segment_file/Sentence.java @@ -21,21 +21,17 @@ package org.apache.joshua.decoder.segment_file; import static org.apache.joshua.util.FormatUtils.addSentenceMarkers; import java.util.ArrayList; -import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedList; import java.util.List; -import java.util.Map; import java.util.StringTokenizer; -import java.util.UUID; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.joshua.corpus.Vocabulary; -import org.apache.joshua.decoder.JoshuaConfiguration; -import org.apache.joshua.decoder.KenLMPool; import org.apache.joshua.decoder.LanguageModelStateManager; +import org.apache.joshua.decoder.SearchAlgorithm; import org.apache.joshua.decoder.ff.tm.Grammar; import org.apache.joshua.lattice.Arc; import org.apache.joshua.lattice.Lattice; @@ -45,6 +41,8 @@ import org.apache.joshua.util.Regex; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.typesafe.config.Config; + /** * This class represents lattice input. The lattice is contained on a single line and is represented * in PLF (Python Lattice Format), e.g., @@ -80,7 +78,11 @@ public class Sentence { /* List of constraints */ private final List<ConstraintSpan> constraints; - public JoshuaConfiguration config = null; + private final Config config; + + private final boolean latticeDecoding; + private final int maximumSentenceLength; + private final SearchAlgorithm searchAlgorithm; private LanguageModelStateManager stateManager = new LanguageModelStateManager(); @@ -92,11 +94,14 @@ public class Sentence { * @param id ID to associate with the input string * @param joshuaConfiguration a populated {@link org.apache.joshua.decoder.JoshuaConfiguration} */ - public Sentence(String inputString, int id, JoshuaConfiguration joshuaConfiguration) { + public Sentence(String inputString, int id, Config config) { inputString = Regex.spaces.replaceAll(inputString, " ").trim(); - config = joshuaConfiguration; + this.config = config; + this.latticeDecoding = this.config.getBoolean("lattice_decoding"); + this.maximumSentenceLength = this.config.getInt("maximum_sentence_length"); + this.searchAlgorithm = SearchAlgorithm.valueOf(this.config.getString("search_algorithm")); this.constraints = new LinkedList<>(); @@ -131,8 +136,13 @@ public class Sentence { } // Only trim strings - if (! (joshuaConfiguration.lattice_decoding && source.startsWith("((("))) - adjustForLength(joshuaConfiguration.maxlen); + if (! (latticeDecoding && source.startsWith("((("))) { + adjustForLength(maximumSentenceLength); + } + } + + public Config getConfig() { + return config; } /** @@ -181,7 +191,7 @@ public class Sentence { * * @param grammars a list of grammars to consult to find in- and out-of-vocabulary items */ - public void segmentOOVs(Grammar[] grammars) { + public void segmentOOVs(final List<Grammar> grammars) { Lattice<Token> oldLattice = this.getLattice(); /* Build a list of terminals across all grammars */ @@ -426,8 +436,8 @@ public class Sentence { public Lattice<Token> getLattice() { if (this.sourceLattice == null) { - if (config.lattice_decoding && rawSource().startsWith("(((")) { - if (config.search_algorithm.equals("stack")) { + if (latticeDecoding && rawSource().startsWith("(((")) { + if (searchAlgorithm == SearchAlgorithm.stack) { throw new RuntimeException("* FATAL: lattice decoding currently not supported for stack-based search algorithm."); } this.sourceLattice = Lattice.createTokenLatticeFromPLF(rawSource(), config); http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/segment_file/Token.java ---------------------------------------------------------------------- diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/segment_file/Token.java b/joshua-core/src/main/java/org/apache/joshua/decoder/segment_file/Token.java index 4cbc7fa..ae9d261 100644 --- a/joshua-core/src/main/java/org/apache/joshua/decoder/segment_file/Token.java +++ b/joshua-core/src/main/java/org/apache/joshua/decoder/segment_file/Token.java @@ -25,11 +25,12 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.joshua.corpus.Vocabulary; -import org.apache.joshua.decoder.JoshuaConfiguration; import org.apache.joshua.util.FormatUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.typesafe.config.Config; + /** * Stores the identity of a word and its annotations in a sentence. @@ -73,9 +74,7 @@ public class Token { * @param config a populated {@link org.apache.joshua.decoder.JoshuaConfiguration} * */ - public Token(String rawWord, JoshuaConfiguration config) { - - JoshuaConfiguration joshuaConfiguration = config; + public Token(String rawWord, Config config) { annotations = new HashMap<>(); @@ -105,7 +104,7 @@ public class Token { // annotations. token = escapeSpecialSymbols(token); - if (joshuaConfiguration != null && joshuaConfiguration.lowercase) { + if (config != null && config.getBoolean("lowercase")) { if (FormatUtils.ISALLUPPERCASE(token)) annotations.put("lettercase", "all-upper"); else if (Character.isUpperCase(token.charAt(0))) http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/lattice/Lattice.java ---------------------------------------------------------------------- diff --git a/joshua-core/src/main/java/org/apache/joshua/lattice/Lattice.java b/joshua-core/src/main/java/org/apache/joshua/lattice/Lattice.java index c557c07..2f722c1 100644 --- a/joshua-core/src/main/java/org/apache/joshua/lattice/Lattice.java +++ b/joshua-core/src/main/java/org/apache/joshua/lattice/Lattice.java @@ -28,12 +28,14 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.joshua.corpus.Vocabulary; -import org.apache.joshua.decoder.JoshuaConfiguration; import org.apache.joshua.decoder.segment_file.Token; import org.apache.joshua.util.ChartSpan; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.typesafe.config.Config; +import com.typesafe.config.ConfigFactory; + /** * A lattice representation of a directed graph. * @@ -61,9 +63,6 @@ public class Lattice<Value> implements Iterable<Node<Value>> { */ private List<Node<Value>> nodes; - - JoshuaConfiguration config = null; - /** * Constructs a new lattice from an existing list of (connected) nodes. * <p> @@ -73,13 +72,13 @@ public class Lattice<Value> implements Iterable<Node<Value>> { * @param nodes A list of nodes which must be in topological order. * @param config a populated {@link org.apache.joshua.decoder.JoshuaConfiguration} */ - public Lattice(List<Node<Value>> nodes, JoshuaConfiguration config) { + public Lattice(List<Node<Value>> nodes, Config config) { this.nodes = nodes; // this.distances = calculateAllPairsShortestPath(); this.latticeHasAmbiguity = true; } - public Lattice(List<Node<Value>> nodes, boolean isAmbiguous, JoshuaConfiguration config) { + public Lattice(List<Node<Value>> nodes, boolean isAmbiguous, Config config) { // Node<Value> sink = new Node<Value>(nodes.size()); // nodes.add(sink); this.nodes = nodes; @@ -93,7 +92,7 @@ public class Lattice<Value> implements Iterable<Node<Value>> { * @param linearChain a sequence of Value objects * @param config a populated {@link org.apache.joshua.decoder.JoshuaConfiguration} */ - public Lattice(Value[] linearChain, JoshuaConfiguration config) { + public Lattice(Value[] linearChain, Config config) { this.latticeHasAmbiguity = false; this.nodes = new ArrayList<Node<Value>>(); @@ -144,7 +143,7 @@ public class Lattice<Value> implements Iterable<Node<Value>> { * @param config a populated {@link org.apache.joshua.decoder.JoshuaConfiguration} * @return Lattice representation of the linear chain. */ - public static Lattice<Token> createTokenLatticeFromString(String source, JoshuaConfiguration config) { + public static Lattice<Token> createTokenLatticeFromString(String source, Config config) { String[] tokens = source.split("\\s+"); Token[] integerSentence = new Token[tokens.length]; for (int i = 0; i < tokens.length; i++) { @@ -154,7 +153,7 @@ public class Lattice<Value> implements Iterable<Node<Value>> { return new Lattice<Token>(integerSentence, config); } - public static Lattice<Token> createTokenLatticeFromPLF(String data, JoshuaConfiguration config) { + public static Lattice<Token> createTokenLatticeFromPLF(String data, Config config) { ArrayList<Node<Token>> nodes = new ArrayList<Node<Token>>(); // This matches a sequence of tuples, which describe arcs leaving this node @@ -248,7 +247,7 @@ public class Lattice<Value> implements Iterable<Node<Value>> { * @param config a populated {@link org.apache.joshua.decoder.JoshuaConfiguration} * @return A lattice that corresponds to the given string. */ - public static Lattice<String> createStringLatticeFromString(String data, JoshuaConfiguration config) { + public static Lattice<String> createStringLatticeFromString(String data, Config config) { Map<Integer, Node<String>> nodes = new HashMap<Integer, Node<String>>(); @@ -536,6 +535,6 @@ public class Lattice<Value> implements Iterable<Node<Value>> { LOG.debug("Nodelist={}", nodeList); - return new Lattice<String>(nodeList, new JoshuaConfiguration()); + return new Lattice<String>(nodeList, ConfigFactory.empty()); } } http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/mira/MIRA.java ---------------------------------------------------------------------- diff --git a/joshua-core/src/main/java/org/apache/joshua/mira/MIRA.java b/joshua-core/src/main/java/org/apache/joshua/mira/MIRA.java index fb1f5e2..c7ff949 100755 --- a/joshua-core/src/main/java/org/apache/joshua/mira/MIRA.java +++ b/joshua-core/src/main/java/org/apache/joshua/mira/MIRA.java @@ -18,13 +18,12 @@ */ package org.apache.joshua.mira; -import org.apache.joshua.decoder.JoshuaConfiguration; +import org.apache.joshua.decoder.Decoder; import org.apache.joshua.util.FileUtility; import org.apache.joshua.util.StreamGobbler; public class MIRA { public static void main(String[] args) throws Exception { - JoshuaConfiguration joshuaConfiguration = new JoshuaConfiguration(); boolean external = false; // should each MIRA iteration be launched externally? if (args.length == 1) { @@ -42,7 +41,7 @@ public class MIRA { } if (!external) { - MIRACore myMIRA = new MIRACore(args[0], joshuaConfiguration); + MIRACore myMIRA = new MIRACore(args[0], Decoder.getDefaultFlags()); myMIRA.run_MIRA(); // optimize lambda[] myMIRA.finish(); } else { http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/mira/MIRACore.java ---------------------------------------------------------------------- diff --git a/joshua-core/src/main/java/org/apache/joshua/mira/MIRACore.java b/joshua-core/src/main/java/org/apache/joshua/mira/MIRACore.java index e0354b9..eab6eb9 100755 --- a/joshua-core/src/main/java/org/apache/joshua/mira/MIRACore.java +++ b/joshua-core/src/main/java/org/apache/joshua/mira/MIRACore.java @@ -46,13 +46,14 @@ import java.util.zip.GZIPOutputStream; import org.apache.joshua.corpus.Vocabulary; import org.apache.joshua.decoder.Decoder; -import org.apache.joshua.decoder.JoshuaConfiguration; import org.apache.joshua.metrics.EvaluationMetric; import org.apache.joshua.util.StreamGobbler; import org.apache.joshua.util.io.ExistingUTF8EncodedTextFile; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.typesafe.config.Config; + /** * This code was originally written by Yuan Cao, who copied the MERT code to produce this file. */ @@ -61,7 +62,7 @@ public class MIRACore { private static final Logger LOG = LoggerFactory.getLogger(MIRACore.class); - private final JoshuaConfiguration joshuaConfiguration; + private final Config config; private TreeSet<Integer>[] indicesOfInterest_all; private final static DecimalFormat f4 = new DecimalFormat("###0.0000"); @@ -256,19 +257,19 @@ public class MIRACore { // private int useDisk; - public MIRACore(JoshuaConfiguration joshuaConfiguration) { - this.joshuaConfiguration = joshuaConfiguration; + public MIRACore(Config config) { + this.config = config; } - public MIRACore(String[] args, JoshuaConfiguration joshuaConfiguration) throws FileNotFoundException, IOException { - this.joshuaConfiguration = joshuaConfiguration; + public MIRACore(String[] args, Config config) throws FileNotFoundException, IOException { + this.config = config; EvaluationMetric.set_knownMetrics(); processArgsArray(args); initialize(0); } - public MIRACore(String configFileName, JoshuaConfiguration joshuaConfiguration) throws FileNotFoundException, IOException { - this.joshuaConfiguration = joshuaConfiguration; + public MIRACore(String configFileName, Config config) throws FileNotFoundException, IOException { + this.config = config; EvaluationMetric.set_knownMetrics(); processArgsArray(cfgFileToArgsArray(configFileName)); initialize(0); @@ -480,7 +481,7 @@ public class MIRACore { // by default, load joshua decoder if (decoderCommand == null && fakeFileNameTemplate == null) { println("Loading Joshua decoder...", 1); - myDecoder = new Decoder(joshuaConfiguration); + myDecoder = new Decoder(config); println("...finished loading @ " + (new Date()), 1); println(""); } else { http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/oracle/OracleExtractionHG.java ---------------------------------------------------------------------- diff --git a/joshua-core/src/main/java/org/apache/joshua/oracle/OracleExtractionHG.java b/joshua-core/src/main/java/org/apache/joshua/oracle/OracleExtractionHG.java index 572c7f1..54c7cce 100644 --- a/joshua-core/src/main/java/org/apache/joshua/oracle/OracleExtractionHG.java +++ b/joshua-core/src/main/java/org/apache/joshua/oracle/OracleExtractionHG.java @@ -18,24 +18,15 @@ */ package org.apache.joshua.oracle; -import static org.apache.joshua.decoder.hypergraph.ViterbiExtractor.getViterbiString; -import static org.apache.joshua.util.FormatUtils.removeSentenceMarkers; - -import java.io.BufferedWriter; -import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import org.apache.joshua.corpus.Vocabulary; -import org.apache.joshua.decoder.JoshuaConfiguration; import org.apache.joshua.decoder.Support; -import org.apache.joshua.decoder.Decoder; import org.apache.joshua.decoder.hypergraph.HGNode; import org.apache.joshua.decoder.hypergraph.HyperEdge; import org.apache.joshua.decoder.hypergraph.HyperGraph; import org.apache.joshua.decoder.hypergraph.KBestExtractor; -import org.apache.joshua.util.FileUtility; -import org.apache.joshua.util.io.LineReader; import org.apache.joshua.util.FormatUtils; /** @@ -101,98 +92,98 @@ public class OracleExtractionHG extends SplitHg { /* * for 919 sent, time_on_reading: 148797 time_on_orc_extract: 580286 */ - @SuppressWarnings({ "unused" }) - public static void main(String[] args) throws IOException { - JoshuaConfiguration joshuaConfiguration = new JoshuaConfiguration(); - /* - * String f_hypergraphs="C:\\Users\\zli\\Documents\\mt03.src.txt.ss.nbest.hg.items"; String - * f_rule_tbl="C:\\Users\\zli\\Documents\\mt03.src.txt.ss.nbest.hg.rules"; String - * f_ref_files="C:\\Users\\zli\\Documents\\mt03.ref.txt.1"; String f_orc_out - * ="C:\\Users\\zli\\Documents\\mt03.orc.txt"; - */ - if (6 != args.length) { - System.out - .println("Usage: java Decoder f_hypergraphs f_rule_tbl f_ref_files f_orc_out lm_order orc_extract_nbest"); - System.out.println("num of args is " + args.length); - for (int i = 0; i < args.length; i++) { - System.out.println("arg is: " + args[i]); - } - System.exit(1); - } - // String f_hypergraphs = args[0].trim(); - // String f_rule_tbl = args[1].trim(); - String f_ref_files = args[2].trim(); - String f_orc_out = args[3].trim(); - int lm_order = Integer.parseInt(args[4].trim()); - boolean orc_extract_nbest = Boolean.valueOf(args[5].trim()); // oracle extraction from nbest or hg - - int baseline_lm_feat_id = 0; - - KBestExtractor kbest_extractor = null; - int topN = 300;// TODO - joshuaConfiguration.use_unique_nbest = true; - joshuaConfiguration.include_align_index = false; - boolean do_ngram_clip_nbest = true; // TODO - if (orc_extract_nbest) { - System.out.println("oracle extraction from nbest list"); - - kbest_extractor = new KBestExtractor(null, null, Decoder.weights, false, joshuaConfiguration); - } - - BufferedWriter orc_out = FileUtility.getWriteFileStream(f_orc_out); - - long start_time0 = System.currentTimeMillis(); - long time_on_reading = 0; - long time_on_orc_extract = 0; - // DiskHyperGraph dhg_read = new DiskHyperGraph(baseline_lm_feat_id, true, null); - - // dhg_read.initRead(f_hypergraphs, f_rule_tbl, null); - - OracleExtractionHG orc_extractor = new OracleExtractionHG(baseline_lm_feat_id); - long start_time = System.currentTimeMillis(); - int sent_id = 0; - for (String ref_sent: new LineReader(f_ref_files)) { - System.out.println("############Process sentence " + sent_id); - start_time = System.currentTimeMillis(); - sent_id++; - // if(sent_id>10)break; - - // HyperGraph hg = dhg_read.readHyperGraph(); - HyperGraph hg = null; - if (hg == null) - continue; - - // System.out.println("read disk hyp: " + (System.currentTimeMillis()-start_time)); - time_on_reading += System.currentTimeMillis() - start_time; - start_time = System.currentTimeMillis(); - - String orc_sent = null; - double orc_bleu = 0; - if (orc_extract_nbest) { - Object[] res = orc_extractor.oracle_extract_nbest(kbest_extractor, hg, topN, - do_ngram_clip_nbest, ref_sent); - orc_sent = (String) res[0]; - orc_bleu = (Double) res[1]; - } else { - HyperGraph hg_oracle = orc_extractor.oracle_extract_hg(hg, hg.sentLen(), lm_order, ref_sent); - orc_sent = removeSentenceMarkers(getViterbiString(hg_oracle)); - orc_bleu = orc_extractor.get_best_goal_cost(hg, orc_extractor.g_tbl_split_virtual_items); - - time_on_orc_extract += System.currentTimeMillis() - start_time; - System.out.println("num_virtual_items: " + orc_extractor.g_num_virtual_items - + " num_virtual_dts: " + orc_extractor.g_num_virtual_deductions); - // System.out.println("oracle extract: " + (System.currentTimeMillis()-start_time)); - } - - orc_out.write(orc_sent + "\n"); - System.out.println("orc bleu is " + orc_bleu); - } - orc_out.close(); - - System.out.println("time_on_reading: " + time_on_reading); - System.out.println("time_on_orc_extract: " + time_on_orc_extract); - System.out.println("total running time: " + (System.currentTimeMillis() - start_time0)); - } +// @SuppressWarnings({ "unused" }) +// public static void main(String[] args) throws IOException { +// JoshuaConfiguration joshuaConfiguration = new JoshuaConfiguration(); +// /* +// * String f_hypergraphs="C:\\Users\\zli\\Documents\\mt03.src.txt.ss.nbest.hg.items"; String +// * f_rule_tbl="C:\\Users\\zli\\Documents\\mt03.src.txt.ss.nbest.hg.rules"; String +// * f_ref_files="C:\\Users\\zli\\Documents\\mt03.ref.txt.1"; String f_orc_out +// * ="C:\\Users\\zli\\Documents\\mt03.orc.txt"; +// */ +// if (6 != args.length) { +// System.out +// .println("Usage: java Decoder f_hypergraphs f_rule_tbl f_ref_files f_orc_out lm_order orc_extract_nbest"); +// System.out.println("num of args is " + args.length); +// for (int i = 0; i < args.length; i++) { +// System.out.println("arg is: " + args[i]); +// } +// System.exit(1); +// } +// // String f_hypergraphs = args[0].trim(); +// // String f_rule_tbl = args[1].trim(); +// String f_ref_files = args[2].trim(); +// String f_orc_out = args[3].trim(); +// int lm_order = Integer.parseInt(args[4].trim()); +// boolean orc_extract_nbest = Boolean.valueOf(args[5].trim()); // oracle extraction from nbest or hg +// +// int baseline_lm_feat_id = 0; +// +// KBestExtractor kbest_extractor = null; +// int topN = 300;// TODO +// joshuaConfiguration.use_unique_nbest = true; +// joshuaConfiguration.include_align_index = false; +// boolean do_ngram_clip_nbest = true; // TODO +// if (orc_extract_nbest) { +// System.out.println("oracle extraction from nbest list"); +// +// kbest_extractor = new KBestExtractor(null, null, Decoder.weights, false, joshuaConfiguration); +// } +// +// BufferedWriter orc_out = FileUtility.getWriteFileStream(f_orc_out); +// +// long start_time0 = System.currentTimeMillis(); +// long time_on_reading = 0; +// long time_on_orc_extract = 0; +// // DiskHyperGraph dhg_read = new DiskHyperGraph(baseline_lm_feat_id, true, null); +// +// // dhg_read.initRead(f_hypergraphs, f_rule_tbl, null); +// +// OracleExtractionHG orc_extractor = new OracleExtractionHG(baseline_lm_feat_id); +// long start_time = System.currentTimeMillis(); +// int sent_id = 0; +// for (String ref_sent: new LineReader(f_ref_files)) { +// System.out.println("############Process sentence " + sent_id); +// start_time = System.currentTimeMillis(); +// sent_id++; +// // if(sent_id>10)break; +// +// // HyperGraph hg = dhg_read.readHyperGraph(); +// HyperGraph hg = null; +// if (hg == null) +// continue; +// +// // System.out.println("read disk hyp: " + (System.currentTimeMillis()-start_time)); +// time_on_reading += System.currentTimeMillis() - start_time; +// start_time = System.currentTimeMillis(); +// +// String orc_sent = null; +// double orc_bleu = 0; +// if (orc_extract_nbest) { +// Object[] res = orc_extractor.oracle_extract_nbest(kbest_extractor, hg, topN, +// do_ngram_clip_nbest, ref_sent); +// orc_sent = (String) res[0]; +// orc_bleu = (Double) res[1]; +// } else { +// HyperGraph hg_oracle = orc_extractor.oracle_extract_hg(hg, hg.sentLen(), lm_order, ref_sent); +// orc_sent = removeSentenceMarkers(getViterbiString(hg_oracle)); +// orc_bleu = orc_extractor.get_best_goal_cost(hg, orc_extractor.g_tbl_split_virtual_items); +// +// time_on_orc_extract += System.currentTimeMillis() - start_time; +// System.out.println("num_virtual_items: " + orc_extractor.g_num_virtual_items +// + " num_virtual_dts: " + orc_extractor.g_num_virtual_deductions); +// // System.out.println("oracle extract: " + (System.currentTimeMillis()-start_time)); +// } +// +// orc_out.write(orc_sent + "\n"); +// System.out.println("orc bleu is " + orc_bleu); +// } +// orc_out.close(); +// +// System.out.println("time_on_reading: " + time_on_reading); +// System.out.println("time_on_orc_extract: " + time_on_orc_extract); +// System.out.println("total running time: " + (System.currentTimeMillis() - start_time0)); +// } // find the oracle hypothesis in the nbest list public Object[] oracle_extract_nbest(KBestExtractor kbest_extractor, HyperGraph hg, int n, http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/pro/PRO.java ---------------------------------------------------------------------- diff --git a/joshua-core/src/main/java/org/apache/joshua/pro/PRO.java b/joshua-core/src/main/java/org/apache/joshua/pro/PRO.java index fd9a7cb..f40093b 100755 --- a/joshua-core/src/main/java/org/apache/joshua/pro/PRO.java +++ b/joshua-core/src/main/java/org/apache/joshua/pro/PRO.java @@ -18,13 +18,12 @@ */ package org.apache.joshua.pro; -import org.apache.joshua.decoder.JoshuaConfiguration; +import org.apache.joshua.decoder.Decoder; import org.apache.joshua.util.FileUtility; import org.apache.joshua.util.StreamGobbler; public class PRO { public static void main(String[] args) throws Exception { - JoshuaConfiguration joshuaConfiguration = new JoshuaConfiguration(); boolean external = false; // should each PRO iteration be launched externally? if (args.length == 1) { @@ -42,7 +41,7 @@ public class PRO { } if (!external) { - PROCore myPRO = new PROCore(args[0],joshuaConfiguration); + PROCore myPRO = new PROCore(args[0], Decoder.getDefaultFlags()); myPRO.run_PRO(); // optimize lambda[]!!! myPRO.finish(); } else { http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/pro/PROCore.java ---------------------------------------------------------------------- diff --git a/joshua-core/src/main/java/org/apache/joshua/pro/PROCore.java b/joshua-core/src/main/java/org/apache/joshua/pro/PROCore.java index aba9d6b..b92fcd5 100755 --- a/joshua-core/src/main/java/org/apache/joshua/pro/PROCore.java +++ b/joshua-core/src/main/java/org/apache/joshua/pro/PROCore.java @@ -47,13 +47,14 @@ import java.util.zip.GZIPOutputStream; import org.apache.joshua.corpus.Vocabulary; import org.apache.joshua.decoder.Decoder; -import org.apache.joshua.decoder.JoshuaConfiguration; import org.apache.joshua.metrics.EvaluationMetric; import org.apache.joshua.util.StreamGobbler; import org.apache.joshua.util.io.ExistingUTF8EncodedTextFile; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.typesafe.config.Config; + /** * This code was originally written by Yuan Cao, who copied the MERT code to produce this file. */ @@ -62,7 +63,7 @@ public class PROCore { private static final Logger LOG = LoggerFactory.getLogger(PROCore.class); - private final JoshuaConfiguration joshuaConfiguration; + private final Config config; private TreeSet<Integer>[] indicesOfInterest_all; private final static DecimalFormat f4 = new DecimalFormat("###0.0000"); @@ -249,19 +250,19 @@ public class PROCore { // private int useDisk; - public PROCore(JoshuaConfiguration joshuaConfiguration) { - this.joshuaConfiguration = joshuaConfiguration; + public PROCore(Config config) { + this.config = config; } - public PROCore(String[] args, JoshuaConfiguration joshuaConfiguration) throws FileNotFoundException, IOException { - this.joshuaConfiguration = joshuaConfiguration; + public PROCore(String[] args, Config config) throws FileNotFoundException, IOException { + this.config = config; EvaluationMetric.set_knownMetrics(); processArgsArray(args); initialize(0); } - public PROCore(String configFileName, JoshuaConfiguration joshuaConfiguration) throws FileNotFoundException, IOException { - this.joshuaConfiguration = joshuaConfiguration; + public PROCore(String configFileName, Config config) throws FileNotFoundException, IOException { + this.config = config; EvaluationMetric.set_knownMetrics(); processArgsArray(cfgFileToArgsArray(configFileName)); initialize(0); @@ -473,7 +474,7 @@ public class PROCore { // by default, load joshua decoder if (decoderCommand == null && fakeFileNameTemplate == null) { println("Loading Joshua decoder...", 1); - myDecoder = new Decoder(joshuaConfiguration); + myDecoder = new Decoder(config); println("...finished loading @ " + (new Date()), 1); println(""); } else { http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/server/ServerThread.java ---------------------------------------------------------------------- diff --git a/joshua-core/src/main/java/org/apache/joshua/server/ServerThread.java b/joshua-core/src/main/java/org/apache/joshua/server/ServerThread.java index 7b14bdc..81fdeaf 100644 --- a/joshua-core/src/main/java/org/apache/joshua/server/ServerThread.java +++ b/joshua-core/src/main/java/org/apache/joshua/server/ServerThread.java @@ -35,9 +35,10 @@ import java.util.Arrays; import java.util.HashMap; import org.apache.joshua.decoder.Decoder; -import org.apache.joshua.decoder.JoshuaConfiguration; +import org.apache.joshua.decoder.SearchAlgorithm; import org.apache.joshua.decoder.Translation; import org.apache.joshua.decoder.TranslationResponseStream; +import org.apache.joshua.decoder.ff.FeatureVector; import org.apache.joshua.decoder.ff.tm.Rule; import org.apache.joshua.decoder.ff.tm.Trie; import org.apache.joshua.decoder.ff.tm.format.HieroFormatReader; @@ -59,8 +60,7 @@ public class ServerThread extends Thread implements HttpHandler { private static final Logger LOG = LoggerFactory.getLogger(ServerThread.class); private static final Charset FILE_ENCODING = Charset.forName("UTF-8"); - private final JoshuaConfiguration joshuaConfiguration; - private Socket socket = null; + private final Socket socket; private final Decoder decoder; /** @@ -70,8 +70,7 @@ public class ServerThread extends Thread implements HttpHandler { * @param decoder the configured decoder that handles performing translations * @param joshuaConfiguration a populated {@link org.apache.joshua.decoder.JoshuaConfiguration} */ - public ServerThread(Socket socket, Decoder decoder, JoshuaConfiguration joshuaConfiguration) { - this.joshuaConfiguration = joshuaConfiguration; + public ServerThread(Socket socket, Decoder decoder) { this.socket = socket; this.decoder = decoder; } @@ -88,7 +87,7 @@ public class ServerThread extends Thread implements HttpHandler { try { BufferedReader reader = new BufferedReader(new InputStreamReader(socket.getInputStream(), FILE_ENCODING)); - TranslationRequestStream request = new TranslationRequestStream(reader, joshuaConfiguration); + TranslationRequestStream request = new TranslationRequestStream(reader, decoder.getDecoderConfig().getFlags()); try { TranslationResponseStream translationResponseStream = decoder.decodeAll(request); @@ -162,7 +161,7 @@ public class ServerThread extends Thread implements HttpHandler { String meta = params.get("meta"); BufferedReader reader = new BufferedReader(new StringReader(query)); - TranslationRequestStream request = new TranslationRequestStream(reader, joshuaConfiguration); + TranslationRequestStream request = new TranslationRequestStream(reader, decoder.getDecoderConfig().getFlags()); TranslationResponseStream translationResponseStream = decoder.decodeAll(request); JSONMessage message = new JSONMessage(); @@ -194,9 +193,11 @@ public class ServerThread extends Thread implements HttpHandler { String type = tokens[0]; String args = tokens.length > 1 ? tokens[1] : ""; + final FeatureVector weights = decoder.getDecoderConfig().getWeights(); + if (type.equals("get_weight")) { String weight = tokens[1]; - LOG.info("WEIGHT: %s = %.3f", weight, Decoder.weights.getOrDefault(hashFeature(weight))); + LOG.info("WEIGHT: %s = %.3f", weight, weights.getOrDefault(hashFeature(weight))); } else if (type.equals("set_weights")) { // Change a decoder weight @@ -205,15 +206,15 @@ public class ServerThread extends Thread implements HttpHandler { String feature = argTokens[i]; int featureId = hashFeature(feature); String newValue = argTokens[i+1]; - float old_weight = Decoder.weights.getOrDefault(featureId); - Decoder.weights.put(featureId, Float.parseFloat(newValue)); - LOG.info("set_weights: {} {} -> {}", feature, old_weight, Decoder.weights.getOrDefault(featureId)); + float old_weight = weights.getOrDefault(featureId); + weights.put(featureId, Float.parseFloat(newValue)); + LOG.info("set_weights: {} {} -> {}", feature, old_weight, weights.getOrDefault(featureId)); } - message.addMetaData("weights " + Decoder.weights.toString()); + message.addMetaData("weights " + weights.toString()); } else if (type.equals("get_weights")) { - message.addMetaData("weights " + Decoder.weights.toString()); + message.addMetaData("weights " + weights.toString()); } else if (type.equals("add_rule")) { String argTokens[] = args.split(" \\|\\|\\| "); @@ -236,7 +237,7 @@ public class ServerThread extends Thread implements HttpHandler { /* Prepend source and target side nonterminals for phrase-based decoding. Probably better * handled in each grammar type's addRule() function. */ - String ruleString = (joshuaConfiguration.search_algorithm.equals("stack")) + String ruleString = (decoder.getDecoderConfig().getSearchAlgorithm() == SearchAlgorithm.stack) ? String.format("%s ||| [X,1] %s ||| [X,1] %s ||| -1 %s %s", lhs, source, target, featureStr, alignmentStr) : String.format("%s ||| %s ||| %s ||| -1 %s %s", lhs, source, target, featureStr, alignmentStr); http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/server/TcpServer.java ---------------------------------------------------------------------- diff --git a/joshua-core/src/main/java/org/apache/joshua/server/TcpServer.java b/joshua-core/src/main/java/org/apache/joshua/server/TcpServer.java index e054186..cefe6d9 100644 --- a/joshua-core/src/main/java/org/apache/joshua/server/TcpServer.java +++ b/joshua-core/src/main/java/org/apache/joshua/server/TcpServer.java @@ -22,7 +22,6 @@ import java.io.IOException; import java.net.ServerSocket; import org.apache.joshua.decoder.Decoder; -import org.apache.joshua.decoder.JoshuaConfiguration; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -32,12 +31,10 @@ import org.slf4j.LoggerFactory; */ public class TcpServer { private static final Logger LOG = LoggerFactory.getLogger(TcpServer.class); - private final JoshuaConfiguration joshuaConfiguration; private Decoder decoder; private int port; - public TcpServer(Decoder decoder, int port,JoshuaConfiguration joshuaConfiguration) { - this.joshuaConfiguration = joshuaConfiguration; + public TcpServer(Decoder decoder, int port) { this.decoder = decoder; this.port = port; } @@ -48,18 +45,18 @@ public class TcpServer { public void start() { try { - ServerSocket serverSocket = new ServerSocket(joshuaConfiguration.server_port); + ServerSocket serverSocket = new ServerSocket(port); LOG.info("** TCP Server running and listening on port {}.", port); boolean listening = true; while (listening) - new ServerThread(serverSocket.accept(), decoder, joshuaConfiguration).start(); + new ServerThread(serverSocket.accept(), decoder).start(); serverSocket.close(); } catch (IOException e) { throw new RuntimeException(String.format("Could not listen on port: %d.", - joshuaConfiguration.server_port)); + port)); } } } http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/util/Constants.java ---------------------------------------------------------------------- diff --git a/joshua-core/src/main/java/org/apache/joshua/util/Constants.java b/joshua-core/src/main/java/org/apache/joshua/util/Constants.java index 669023b..d6b8368 100644 --- a/joshua-core/src/main/java/org/apache/joshua/util/Constants.java +++ b/joshua-core/src/main/java/org/apache/joshua/util/Constants.java @@ -41,5 +41,11 @@ public final class Constants { public static final String TM_PREFIX = "tm"; public static final String labeledFeatureSeparator = "="; + + public static final String GLUE_OWNER = "glue"; + + public static final String OOV_OWNER = "oov"; + + public static final String CUSTOM_OWNER = "custom"; } http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/util/PackedGrammarServer.java ---------------------------------------------------------------------- diff --git a/joshua-core/src/main/java/org/apache/joshua/util/PackedGrammarServer.java b/joshua-core/src/main/java/org/apache/joshua/util/PackedGrammarServer.java index d764340..db4818a 100644 --- a/joshua-core/src/main/java/org/apache/joshua/util/PackedGrammarServer.java +++ b/joshua-core/src/main/java/org/apache/joshua/util/PackedGrammarServer.java @@ -25,18 +25,23 @@ import java.util.List; import java.util.Map; import org.apache.joshua.corpus.Vocabulary; -import org.apache.joshua.decoder.JoshuaConfiguration; import org.apache.joshua.decoder.ff.tm.Rule; import org.apache.joshua.decoder.ff.tm.Trie; import org.apache.joshua.decoder.ff.tm.packed.PackedGrammar; import org.apache.joshua.util.io.LineReader; +import com.google.common.collect.ImmutableMap; +import com.typesafe.config.Config; +import com.typesafe.config.ConfigFactory; + public class PackedGrammarServer { private PackedGrammar grammar; - public PackedGrammarServer(String packed_directory,JoshuaConfiguration joshuaConfiguration) throws FileNotFoundException, IOException { - grammar = new PackedGrammar(packed_directory, -1, "owner", "thrax", joshuaConfiguration); + public PackedGrammarServer(String packed_directory) { + final Config grammarConfig = ConfigFactory.parseMap( + ImmutableMap.of("owner", "thrax", "span_limit", "-1"), "packed grammar config"); + grammar = new PackedGrammar(grammarConfig); } public List<Rule> get(String source) { @@ -76,8 +81,7 @@ public class PackedGrammarServer { public static void main(String[] args) throws FileNotFoundException, IOException { - JoshuaConfiguration joshuaConfiguration = new JoshuaConfiguration(); - PackedGrammarServer pgs = new PackedGrammarServer(args[0], joshuaConfiguration); + PackedGrammarServer pgs = new PackedGrammarServer(args[0]); for (String line: new LineReader(System.in)) { List<Rule> rules = pgs.get(line); http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/util/io/LineReader.java ---------------------------------------------------------------------- diff --git a/joshua-core/src/main/java/org/apache/joshua/util/io/LineReader.java b/joshua-core/src/main/java/org/apache/joshua/util/io/LineReader.java index ea5d8f1..5868de5 100644 --- a/joshua-core/src/main/java/org/apache/joshua/util/io/LineReader.java +++ b/joshua-core/src/main/java/org/apache/joshua/util/io/LineReader.java @@ -71,7 +71,7 @@ public class LineReader implements Reader<String>, AutoCloseable { */ public LineReader(String filename) throws IOException { - display_progress = (Decoder.VERBOSE >= 1); + display_progress = true; progress = 0; @@ -98,7 +98,7 @@ public class LineReader implements Reader<String>, AutoCloseable { public LineReader(String filename, boolean show_progress) throws IOException { this(filename); - display_progress = (Decoder.VERBOSE >= 1 && show_progress); + display_progress = show_progress; } http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/zmert/MertCore.java ---------------------------------------------------------------------- diff --git a/joshua-core/src/main/java/org/apache/joshua/zmert/MertCore.java b/joshua-core/src/main/java/org/apache/joshua/zmert/MertCore.java index abed07a..88a36a2 100644 --- a/joshua-core/src/main/java/org/apache/joshua/zmert/MertCore.java +++ b/joshua-core/src/main/java/org/apache/joshua/zmert/MertCore.java @@ -50,13 +50,14 @@ import java.util.zip.GZIPInputStream; import java.util.zip.GZIPOutputStream; import org.apache.joshua.decoder.Decoder; -import org.apache.joshua.decoder.JoshuaConfiguration; import org.apache.joshua.metrics.EvaluationMetric; import org.apache.joshua.util.StreamGobbler; import org.apache.joshua.util.io.ExistingUTF8EncodedTextFile; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.typesafe.config.Config; + /** * This code was originally written by Omar Zaidan. In September of 2012, it was augmented to support * a sparse feature implementation. @@ -68,7 +69,7 @@ public class MertCore { private static final Logger LOG = LoggerFactory.getLogger(MertCore.class); - private final JoshuaConfiguration joshuaConfiguration; + private final Config config; private TreeSet<Integer>[] indicesOfInterest_all; private final static DecimalFormat f4 = new DecimalFormat("###0.0000"); @@ -255,20 +256,20 @@ public class MertCore { // private int useDisk; - public MertCore(JoshuaConfiguration joshuaConfiguration) + public MertCore(Config config) { - this.joshuaConfiguration = joshuaConfiguration; + this.config = config; } - public MertCore(String[] args, JoshuaConfiguration joshuaConfiguration) throws FileNotFoundException, IOException { - this.joshuaConfiguration = joshuaConfiguration; + public MertCore(String[] args, Config config) throws FileNotFoundException, IOException { + this.config = config; EvaluationMetric.set_knownMetrics(); processArgsArray(args); initialize(0); } - public MertCore(String configFileName,JoshuaConfiguration joshuaConfiguration) throws FileNotFoundException, IOException { - this.joshuaConfiguration = joshuaConfiguration; + public MertCore(String configFileName, Config config) throws FileNotFoundException, IOException { + this.config = config; EvaluationMetric.set_knownMetrics(); processArgsArray(cfgFileToArgsArray(configFileName)); initialize(0); @@ -487,7 +488,7 @@ public class MertCore { if (decoderCommand == null && fakeFileNameTemplate == null) { println("Loading Joshua decoder...", 1); - myDecoder = new Decoder(joshuaConfiguration); + myDecoder = new Decoder(config); println("...finished loading @ " + (new Date()), 1); println(""); } else { @@ -2794,9 +2795,8 @@ public class MertCore { String configFileName = args[0]; String stateFileName = args[1]; int currIteration = Integer.parseInt(args[2]); - JoshuaConfiguration joshuaConfiguration = new JoshuaConfiguration(); - MertCore DMC = new MertCore(joshuaConfiguration); // dummy MertCore object + MertCore DMC = new MertCore(Decoder.getDefaultFlags()); // dummy MertCore object // if bad args[], System.exit(80) http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/zmert/ZMERT.java ---------------------------------------------------------------------- diff --git a/joshua-core/src/main/java/org/apache/joshua/zmert/ZMERT.java b/joshua-core/src/main/java/org/apache/joshua/zmert/ZMERT.java index 7e4c2cc..ae82d2c 100644 --- a/joshua-core/src/main/java/org/apache/joshua/zmert/ZMERT.java +++ b/joshua-core/src/main/java/org/apache/joshua/zmert/ZMERT.java @@ -21,7 +21,7 @@ package org.apache.joshua.zmert; import java.io.BufferedReader; import java.io.InputStreamReader; -import org.apache.joshua.decoder.JoshuaConfiguration; +import org.apache.joshua.decoder.Decoder; import org.apache.joshua.util.FileUtility; public class ZMERT { @@ -43,8 +43,7 @@ public class ZMERT { } if (!external) { - JoshuaConfiguration joshuaConfiguration = new JoshuaConfiguration(); - MertCore myMert = new MertCore(args[0],joshuaConfiguration); + MertCore myMert = new MertCore(args[0], Decoder.getDefaultFlags()); myMert.run_MERT(); // optimize lambda[]!!! myMert.finish(); } else { http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-web/src/main/java/org/apache/joshua/decoder/DecoderServlet.java ---------------------------------------------------------------------- diff --git a/joshua-web/src/main/java/org/apache/joshua/decoder/DecoderServlet.java b/joshua-web/src/main/java/org/apache/joshua/decoder/DecoderServlet.java index a6e75c0..72556a7 100644 --- a/joshua-web/src/main/java/org/apache/joshua/decoder/DecoderServlet.java +++ b/joshua-web/src/main/java/org/apache/joshua/decoder/DecoderServlet.java @@ -59,7 +59,7 @@ public class DecoderServlet extends HttpServlet { private void handleRequest(Decoder decoder, InputStream in, OutputStream out) throws IOException { BufferedReader reader = new BufferedReader(new InputStreamReader(in, Charset.forName("UTF-8"))); - TranslationRequestStream request = new TranslationRequestStream(reader, decoder.getJoshuaConfiguration()); + TranslationRequestStream request = new TranslationRequestStream(reader, decoder.getDecoderConfig().getFlags()); TranslationResponseStream translations = decoder.decodeAll(request); http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-web/src/main/java/org/apache/joshua/decoder/DecoderServletContextListener.java ---------------------------------------------------------------------- diff --git a/joshua-web/src/main/java/org/apache/joshua/decoder/DecoderServletContextListener.java b/joshua-web/src/main/java/org/apache/joshua/decoder/DecoderServletContextListener.java index 933911f..f3b27f6 100644 --- a/joshua-web/src/main/java/org/apache/joshua/decoder/DecoderServletContextListener.java +++ b/joshua-web/src/main/java/org/apache/joshua/decoder/DecoderServletContextListener.java @@ -14,6 +14,7 @@ import javax.servlet.ServletContextListener; import javax.servlet.annotation.WebListener; import com.google.common.base.Throwables; +import com.typesafe.config.ConfigValueFactory; /** * Initializes {@link Decoder} via <code>decoderArgsLine</code> init parameter. @@ -32,13 +33,8 @@ public class DecoderServletContextListener implements ServletContextListener { @Override public void contextInitialized(ServletContextEvent sce) { - String argsLine = sce.getServletContext().getInitParameter("decoderArgsLine"); try { - JoshuaConfiguration joshuaConfiguration = new JoshuaConfiguration(); - new ArgsParser(argsLine.split(" "), joshuaConfiguration); - joshuaConfiguration.use_structured_output = true; - joshuaConfiguration.sanityCheck(); - Decoder decoder = new Decoder(joshuaConfiguration); + Decoder decoder = new Decoder(Decoder.getDefaultFlags().withValue("use_structured_output", ConfigValueFactory.fromAnyRef("true"))); sce.getServletContext().setAttribute(DECODER_CONTEXT_ATTRIBUTE_NAME, decoder); } catch (Exception ex) { Throwables.propagate(ex);