Joshua 7 configuration system

Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/f2edda0f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/f2edda0f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/f2edda0f

Branch: refs/heads/7_confsystem
Commit: f2edda0f14967f414f2550989cedf9a281536193
Parents: f7513ab
Author: Felix Hieber <fhie...@amazon.com>
Authored: Wed Sep 14 20:03:51 2016 +0200
Committer: Hieber, Felix <fhie...@amazon.de>
Committed: Thu Sep 15 17:34:00 2016 +0200

----------------------------------------------------------------------
 .../java/org/apache/joshua/adagrad/AdaGrad.java |   5 +-
 .../org/apache/joshua/adagrad/AdaGradCore.java  |  20 +-
 .../java/org/apache/joshua/decoder/Decoder.conf | 126 ++++
 .../java/org/apache/joshua/decoder/Decoder.java | 590 +++++++--------
 .../apache/joshua/decoder/DecoderConfig.java    | 123 ++++
 .../org/apache/joshua/decoder/DecoderTask.java  | 164 ++---
 .../org/apache/joshua/decoder/InputType.java    |   4 +
 .../joshua/decoder/JoshuaConfiguration.java     | 738 -------------------
 .../apache/joshua/decoder/JoshuaDecoder.java    | 160 ++--
 .../java/org/apache/joshua/decoder/OOVItem.java |  50 ++
 .../apache/joshua/decoder/SearchAlgorithm.java  |   7 +
 .../org/apache/joshua/decoder/ServerType.java   |   4 +
 .../joshua/decoder/StructuredTranslation.java   |   3 +-
 .../org/apache/joshua/decoder/Translation.java  |  50 +-
 .../joshua/decoder/TranslationRequest.java      |  24 +
 .../joshua/decoder/chart_parser/Chart.java      | 138 ++--
 .../decoder/chart_parser/ComputeNodeResult.java |  22 +-
 .../apache/joshua/decoder/ff/Accumulator.java   |  29 +
 .../joshua/decoder/ff/ArityPhrasePenalty.java   |  13 +-
 .../joshua/decoder/ff/FeatureAccumulator.java   |  36 +
 .../joshua/decoder/ff/FeatureFunction.java      | 131 +---
 .../joshua/decoder/ff/LabelCombinationFF.java   |   9 +-
 .../joshua/decoder/ff/LabelSubstitutionFF.java  |   7 +-
 .../joshua/decoder/ff/LexicalFeatures.java      |  22 +-
 .../apache/joshua/decoder/ff/OOVPenalty.java    |  35 +-
 .../apache/joshua/decoder/ff/PhraseModel.java   |  10 +-
 .../apache/joshua/decoder/ff/PhrasePenalty.java |  11 +-
 .../org/apache/joshua/decoder/ff/RuleFF.java    |  20 +-
 .../apache/joshua/decoder/ff/RuleLength.java    |   7 +-
 .../org/apache/joshua/decoder/ff/RuleShape.java |   7 +-
 .../joshua/decoder/ff/ScoreAccumulator.java     |  38 +
 .../apache/joshua/decoder/ff/SourcePathFF.java  |   7 +-
 .../apache/joshua/decoder/ff/StatefulFF.java    |   9 +-
 .../apache/joshua/decoder/ff/StatelessFF.java   |   7 +-
 .../apache/joshua/decoder/ff/TargetBigram.java  |  19 +-
 .../apache/joshua/decoder/ff/WordPenalty.java   |  17 +-
 .../decoder/ff/fragmentlm/FragmentLMFF.java     |  18 +-
 .../joshua/decoder/ff/lm/LanguageModelFF.java   |  31 +-
 .../ff/lm/StateMinimizingLanguageModel.java     |  22 +-
 .../joshua/decoder/ff/phrase/Distortion.java    |  14 +-
 .../joshua/decoder/ff/tm/AbstractGrammar.java   |  68 +-
 .../joshua/decoder/ff/tm/CreateGlueGrammar.java |   3 +-
 .../apache/joshua/decoder/ff/tm/Grammar.java    |   6 +-
 .../joshua/decoder/ff/tm/GrammarReader.java     |   3 +-
 .../decoder/ff/tm/SentenceFilteredGrammar.java  | 366 ---------
 .../decoder/ff/tm/format/MosesFormatReader.java |   1 -
 .../tm/hash_based/MemoryBasedBatchGrammar.java  | 248 -------
 .../decoder/ff/tm/hash_based/TextGrammar.java   | 216 ++++++
 .../ff/tm/hash_based/TextGrammarFactory.java    | 148 ++++
 .../decoder/ff/tm/packed/PackedGrammar.java     |  91 +--
 .../GrammarBuilderWalkerFunction.java           |  11 +-
 .../decoder/hypergraph/KBestExtractor.java      |  45 +-
 .../decoder/io/TranslationRequestStream.java    |  20 +-
 .../apache/joshua/decoder/phrase/Candidate.java |  19 +-
 .../joshua/decoder/phrase/PhraseChart.java      |  23 +-
 .../joshua/decoder/phrase/PhraseTable.java      |  53 +-
 .../org/apache/joshua/decoder/phrase/Stack.java |  11 +-
 .../apache/joshua/decoder/phrase/Stacks.java    |  61 +-
 .../decoder/segment_file/ParseTreeInput.java    |   6 +-
 .../decoder/segment_file/ParsedSentence.java    |   7 +-
 .../joshua/decoder/segment_file/Sentence.java   |  36 +-
 .../joshua/decoder/segment_file/Token.java      |   9 +-
 .../java/org/apache/joshua/lattice/Lattice.java |  21 +-
 .../main/java/org/apache/joshua/mira/MIRA.java  |   5 +-
 .../java/org/apache/joshua/mira/MIRACore.java   |  19 +-
 .../joshua/oracle/OracleExtractionHG.java       | 193 +++--
 .../main/java/org/apache/joshua/pro/PRO.java    |   5 +-
 .../java/org/apache/joshua/pro/PROCore.java     |  19 +-
 .../org/apache/joshua/server/ServerThread.java  |  29 +-
 .../org/apache/joshua/server/TcpServer.java     |  11 +-
 .../java/org/apache/joshua/util/Constants.java  |   6 +
 .../apache/joshua/util/PackedGrammarServer.java |  14 +-
 .../org/apache/joshua/util/io/LineReader.java   |   4 +-
 .../java/org/apache/joshua/zmert/MertCore.java  |  22 +-
 .../java/org/apache/joshua/zmert/ZMERT.java     |   5 +-
 .../apache/joshua/decoder/DecoderServlet.java   |   2 +-
 .../decoder/DecoderServletContextListener.java  |   8 +-
 77 files changed, 1885 insertions(+), 2676 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/adagrad/AdaGrad.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/adagrad/AdaGrad.java 
b/joshua-core/src/main/java/org/apache/joshua/adagrad/AdaGrad.java
index 0784318..15b8b88 100755
--- a/joshua-core/src/main/java/org/apache/joshua/adagrad/AdaGrad.java
+++ b/joshua-core/src/main/java/org/apache/joshua/adagrad/AdaGrad.java
@@ -18,13 +18,12 @@
  */
 package org.apache.joshua.adagrad;
 
-import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.decoder.Decoder;
 import org.apache.joshua.util.FileUtility;
 import org.apache.joshua.util.StreamGobbler;
 
 public class AdaGrad {
   public static void main(String[] args) throws Exception {
-    JoshuaConfiguration joshuaConfiguration = new JoshuaConfiguration();
     boolean external = false; // should each AdaGrad iteration be launched 
externally?
 
     if (args.length == 1) {
@@ -42,7 +41,7 @@ public class AdaGrad {
     }
 
     if (!external) {
-      AdaGradCore myAdaGrad = new AdaGradCore(args[0], joshuaConfiguration);
+      AdaGradCore myAdaGrad = new AdaGradCore(args[0], 
Decoder.getDefaultFlags());
       myAdaGrad.run_AdaGrad(); // optimize lambda[]
       myAdaGrad.finish();
     } else {

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/adagrad/AdaGradCore.java
----------------------------------------------------------------------
diff --git 
a/joshua-core/src/main/java/org/apache/joshua/adagrad/AdaGradCore.java 
b/joshua-core/src/main/java/org/apache/joshua/adagrad/AdaGradCore.java
index b21ab71..4fe7e07 100755
--- a/joshua-core/src/main/java/org/apache/joshua/adagrad/AdaGradCore.java
+++ b/joshua-core/src/main/java/org/apache/joshua/adagrad/AdaGradCore.java
@@ -47,13 +47,14 @@ import java.util.zip.GZIPOutputStream;
 
 import org.apache.joshua.corpus.Vocabulary;
 import org.apache.joshua.decoder.Decoder;
-import org.apache.joshua.decoder.JoshuaConfiguration;
 import org.apache.joshua.metrics.EvaluationMetric;
 import org.apache.joshua.util.StreamGobbler;
 import org.apache.joshua.util.io.ExistingUTF8EncodedTextFile;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import com.typesafe.config.Config;
+
 /**
  * This code was originally written by Yuan Cao, who copied the MERT code to 
produce this file.
  */
@@ -66,8 +67,6 @@ public class AdaGradCore {
   private final static double epsilon = 1.0 / 1000000;
   private final static DecimalFormat f4 = new DecimalFormat("###0.0000");
 
-  private final JoshuaConfiguration joshuaConfiguration;
-
   private TreeSet<Integer>[] indicesOfInterest_all;
 
   private int verbosity; // anything of priority <= verbosity will be printed
@@ -249,6 +248,7 @@ public class AdaGradCore {
   private String sourceFileName, refFileName, decoderOutFileName;
   private String decoderConfigFileName, decoderCommandFileName;
   private String fakeFileNameTemplate, fakeFileNamePrefix, fakeFileNameSuffix;
+  private Config config;
 
   // e.g. output.it[1-x].someOldRun would be specified as:
   // output.it?.someOldRun
@@ -256,19 +256,19 @@ public class AdaGradCore {
 
   // private int useDisk;
 
-  public AdaGradCore(JoshuaConfiguration joshuaConfiguration) {
-    this.joshuaConfiguration = joshuaConfiguration;
+  public AdaGradCore(Config config) {
+    this.config = config;
   }
 
-  public AdaGradCore(String[] args, JoshuaConfiguration joshuaConfiguration) 
throws FileNotFoundException, IOException {
-    this.joshuaConfiguration = joshuaConfiguration;
+  public AdaGradCore(String[] args, Config config) throws 
FileNotFoundException, IOException {
+    this.config = config;
     EvaluationMetric.set_knownMetrics();
     processArgsArray(args);
     initialize(0);
   }
 
-  public AdaGradCore(String configFileName, JoshuaConfiguration 
joshuaConfiguration) throws FileNotFoundException, IOException {
-    this.joshuaConfiguration = joshuaConfiguration;
+  public AdaGradCore(String configFileName, Config config) throws 
FileNotFoundException, IOException {
+    this.config = config;
     EvaluationMetric.set_knownMetrics();
     processArgsArray(cfgFileToArgsArray(configFileName));
     initialize(0);
@@ -480,7 +480,7 @@ public class AdaGradCore {
     // by default, load joshua decoder
     if (decoderCommand == null && fakeFileNameTemplate == null) {
       println("Loading Joshua decoder...", 1);
-      myDecoder = new Decoder(joshuaConfiguration);
+      myDecoder = new Decoder(config);
       println("...finished loading @ " + (new Date()), 1);
       println("");
     } else {

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/Decoder.conf
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/Decoder.conf 
b/joshua-core/src/main/java/org/apache/joshua/decoder/Decoder.conf
new file mode 100644
index 0000000..3b86fb6
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/Decoder.conf
@@ -0,0 +1,126 @@
+# This default Joshua config file is written in TypesafeConfig syntax
+# [https://github.com/typesafehub/config] and is parsed at
+# initialization time as the default configuration.
+# It may be overriden by any other given Joshua config file and/or
+# commandline overrides.
+
+# Joshua supported search algorithms: "cky" or "stack"
+search_algorithm = cky
+
+# default grammar symbols. The symbols here should be enclosed in square 
brackets.
+default_non_terminal = "[X]"
+goal_symbol = "[GOAL]"
+
+# whether to construct a StructuredTranslation object for each request instead 
of
+# printing to stdout. Used when the Decoder is used from Java directly.
+use_structured_output = false
+
+# If set to true, Joshua will recapitalize the output by projecting the case 
from aligned source-side words
+project_case = false
+
+output_format = "%i ||| %s ||| %f ||| %c"
+
+# When true, _OOV is appended to all words that are passed through (useful for 
something like
+# transliteration on the target side
+mark_oovs = false
+
+# n-best configuration
+# Make sure output strings in the n-best list are unique.
+use_unique_nbest = true
+# Include the phrasal alignments in the output.
+include_align_index = false
+# The number of hypotheses to output by default.
+top_n = 1
+# Write n-best output to this file
+n_best_file = ""
+
+# The cube pruning pop limit. Set to 0 for exhaustive pruning.
+pop_limit = 100
+
+# Maximum sentence length. Sentences longer than this are truncated.
+maximum_sentence_length = 200
+
+# The number of target sides considered for each source side (after sorting by 
model weight)
+num_translation_options = 20
+
+# The distortion limit
+reordering_limit = 8
+
+# If set to true, Joshua will lowercase the input, creating an annotation that 
marks the original case
+lowercase = false
+
+# Whether to segment OOVs into a lattice
+segment_oovs = false
+
+# Enable lattice decoding.
+lattice_decoding = false
+
+# If false, sorting of the complete grammar is done at load time. If true, 
grammar tries are not
+# sorted till they are first accessed. Amortized sorting means you get your 
first translation
+# much, much quicker (good for debugging), but that per-sentence decoding is a 
bit slower.
+amortized_sorting = true
+
+# syntax-constrained decoding
+constrain_parse = false
+
+use_pos_labels = false
+
+# oov-specific
+true_oovs_only = false
+
+# Dynamic sentence-level filtering.
+filter_grammar = false
+
+# The number of decoding threads to use (-threads).
+num_parallel_decoders = 1
+
+# Enables synchronous parsing.
+parse = false
+  
+# Whether to do forest rescoring. If set to true, the references are expected 
on STDIN along with
+# the input sentences in the following format: 
+# input sentence ||| ||| reference1 ||| reference2 ... 
+# (The second field is reserved for the output sentence for alignment and 
forced decoding).
+rescore_forest = false
+rescore_forest_weight = 10.0f
+
+
+# If true, decode using a dot chart (standard CKY+); if false, use the much 
more efficient
+# version of Sennrich (SSST 2014)
+use_dot_chart = true
+
+# moses compatibility
+moses = false
+
+# Read input from a file (Moses compatible flag)
+input_file = ""
+
+# Timeout in seconds for threads
+translation_thread_timeout = 30_000
+
+# grammars are a list of sub configs for each grammar
+grammars = []
+
+# the list of feature functions with their configurations
+feature_functions = []
+
+# an external weights file to be specified
+weights_file = ""
+
+# The decoder weights directly specified as key-value pairs. These take 
precedence over weights in weights_file
+weights = {}
+
+serverSettings = {     
+
+       # Input typr: plain or json
+       input_type = plain
+
+       # Type of server: none, TCP, HTTP
+       server_type = none
+
+       # If set, Joshua will start a (multi-threaded) TCP/IP server on this 
port.
+       server_port = 0
+}
+
+# the default size of the cache used by packed grammars
+default_packed_grammar_cache_size = 5000
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/Decoder.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/Decoder.java 
b/joshua-core/src/main/java/org/apache/joshua/decoder/Decoder.java
index 3d6f3bc..e448ec8 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/Decoder.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/Decoder.java
@@ -19,16 +19,14 @@
 package org.apache.joshua.decoder;
 
 import static org.apache.joshua.decoder.ff.FeatureMap.hashFeature;
-import static org.apache.joshua.decoder.ff.tm.OwnerMap.getOwner;
+import static 
org.apache.joshua.decoder.ff.tm.hash_based.TextGrammarFactory.createCustomGrammar;
+import static 
org.apache.joshua.decoder.ff.tm.hash_based.TextGrammarFactory.createGlueTextGrammar;
 import static org.apache.joshua.util.Constants.spaceSeparator;
 
-import java.io.BufferedWriter;
 import java.io.File;
-import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.lang.reflect.Constructor;
 import java.util.ArrayList;
-import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map.Entry;
@@ -49,21 +47,21 @@ import org.apache.joshua.decoder.ff.tm.Grammar;
 import org.apache.joshua.decoder.ff.tm.OwnerId;
 import org.apache.joshua.decoder.ff.tm.OwnerMap;
 import org.apache.joshua.decoder.ff.tm.Rule;
-import org.apache.joshua.decoder.ff.tm.format.HieroFormatReader;
-import org.apache.joshua.decoder.ff.tm.hash_based.MemoryBasedBatchGrammar;
+import org.apache.joshua.decoder.ff.tm.hash_based.TextGrammarFactory;
 import org.apache.joshua.decoder.ff.tm.packed.PackedGrammar;
 import org.apache.joshua.decoder.io.TranslationRequestStream;
-import org.apache.joshua.decoder.phrase.PhraseTable;
 import org.apache.joshua.decoder.segment_file.Sentence;
-import org.apache.joshua.util.FileUtility;
-import org.apache.joshua.util.FormatUtils;
-import org.apache.joshua.util.Regex;
 import org.apache.joshua.util.io.LineReader;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import com.google.common.base.Strings;
+import com.google.common.base.Throwables;
+import com.google.common.collect.ImmutableList;
 import com.google.common.util.concurrent.ThreadFactoryBuilder;
+import com.typesafe.config.Config;
+import com.typesafe.config.ConfigFactory;
+import com.typesafe.config.ConfigParseOptions;
+import com.typesafe.config.ConfigValue;
 
 /**
  * This class handles decoder initialization and the complication introduced 
by multithreading.
@@ -80,6 +78,7 @@ import com.google.common.util.concurrent.ThreadFactoryBuilder;
  * but also ensures that round-robin parallelization occurs, since 
RequestParallelizer uses the
  * thread pool before translating each request.
  *
+ * TODO(fhieber): this documentation should be updated
  * A decoding thread is handled by DecoderTask and launched from 
DecoderThreadRunner. The purpose
  * of the runner is to record where to place the translated sentence when it 
is done (i.e., which
  * TranslationResponseStream object). TranslationResponseStream itself is an 
iterator whose next() call blocks until the next
@@ -90,40 +89,49 @@ import 
com.google.common.util.concurrent.ThreadFactoryBuilder;
  * @author wren ng thornton w...@users.sourceforge.net
  * @author Lane Schwartz dowob...@users.sourceforge.net
  * @author Kellen Sunderland kellen.sunderl...@gmail.com
+ * @author Felix Hieber felix.hie...@gmail.com
  */
 public class Decoder {
 
   private static final Logger LOG = LoggerFactory.getLogger(Decoder.class);
 
-  private final JoshuaConfiguration joshuaConfiguration;
-
-  public JoshuaConfiguration getJoshuaConfiguration() {
-    return joshuaConfiguration;
-  }
-
   /*
-   * Many of these objects themselves are global objects. We pass them in when 
constructing other
-   * objects, so that they all share pointers to the same object. This is good 
because it reduces
-   * overhead, but it can be problematic because of unseen dependencies (for 
example, in the
-   * Vocabulary shared by language model, translation grammar, etc).
+   * Holds the common (immutable) decoder state (features, grammars etc.) 
after initialization
    */
-  private final List<Grammar> grammars = new ArrayList<Grammar>();
-  private final ArrayList<FeatureFunction> featureFunctions = new 
ArrayList<>();
-  private Grammar customPhraseTable = null;
-
-  /* The feature weights. */
-  public static FeatureVector weights;
-
-  public static int VERBOSE = 1;
-
+  private final DecoderConfig decoderConfig;
+  
+  private static final ImmutableList<String> GRAMMAR_PACKAGES = 
ImmutableList.of(
+      "org.apache.joshua.decoder.ff.tm.hash_based",
+      "org.apache.joshua.decoder.ff.tm.packed",
+      "org.apache.joshua.decoder.phrase");
+  
+  private static final ImmutableList<String> FEATURE_PACKAGES = 
ImmutableList.of(
+      "org.apache.joshua.decoder.ff",
+      "org.apache.joshua.decoder.ff.lm",
+      "org.apache.joshua.decoder.ff.phrase");
+  
   /**
    * Constructor method that creates a new decoder using the specified 
configuration file.
    *
    * @param joshuaConfiguration a populated {@link 
org.apache.joshua.decoder.JoshuaConfiguration}
    */
-  public Decoder(JoshuaConfiguration joshuaConfiguration) {
-    this.joshuaConfiguration = joshuaConfiguration;
-    this.initialize();
+  public Decoder(Config config) {
+    this.decoderConfig = initialize(config); 
+  }
+  
+  /**
+   * Returns the default Decoder flags.
+   */
+  public static Config getDefaultFlags() {
+    final ConfigParseOptions options = 
ConfigParseOptions.defaults().setAllowMissing(false);
+    return ConfigFactory.parseResources(Decoder.class, "Decoder.conf", 
options).resolve();
+  }
+  
+  /**
+   * Returns the DecoderConfig
+   */
+  public DecoderConfig getDecoderConfig() {
+    return decoderConfig;
   }
 
   /**
@@ -149,8 +157,8 @@ public class Decoder {
             .setNameFormat("TranslationWorker-%d")
             .setDaemon(true)
             .build();
-    ExecutorService executor = 
Executors.newFixedThreadPool(this.joshuaConfiguration.num_parallel_decoders,
-            threadFactory);
+    int numParallelDecoders = 
this.decoderConfig.getFlags().getInt("num_parallel_decoders");
+    ExecutorService executor = 
Executors.newFixedThreadPool(numParallelDecoders, threadFactory);
     try {
       for (; ; ) {
         Sentence sentence = request.next();
@@ -176,15 +184,49 @@ public class Decoder {
 
 
   /**
-   * We can also just decode a single sentence in the same thread.
+   * Decode call for a single sentence.
+   * Creates a sentence-specific {@link DecoderConfig} including
+   * sentence-specific OOVGrammar.
    *
    * @param sentence {@link org.apache.joshua.lattice.Lattice} input
-   * @throws RuntimeException if any fatal errors occur during translation
    * @return the sentence {@link org.apache.joshua.decoder.Translation}
    */
   public Translation decode(Sentence sentence) {
-    DecoderTask decoderTask = new DecoderTask(this.grammars, 
this.featureFunctions, joshuaConfiguration);
-    return decoderTask.translate(sentence);
+    final DecoderConfig sentenceConfig = createSentenceDecoderConfig(sentence, 
decoderConfig);
+    final DecoderTask decoderTask = new DecoderTask(sentenceConfig, sentence);
+    return decoderTask.translate();
+  }
+  
+  /**
+   * Creates a sentence-specific {@link DecoderConfig}.
+   * Most importantly, adds an OOV grammar for the words of this
+   * sentence.
+   */
+  private static DecoderConfig createSentenceDecoderConfig(
+      final Sentence sentence, final DecoderConfig globalConfig) {
+    
+    // create a new list of grammars that includes the OOVgrammar
+    // this is specific to the search algorithm
+    final ImmutableList.Builder<Grammar> grammars = new 
ImmutableList.Builder<>();
+    switch (globalConfig.getSearchAlgorithm()) {
+    case cky:
+      grammars
+        .add(TextGrammarFactory.createOovGrammarForSentence(sentence, 
globalConfig));
+    case stack:
+      grammars 
+        .add(TextGrammarFactory.createEndRulePhraseTable(sentence, 
globalConfig))
+        .add(TextGrammarFactory.createOovPhraseTable(sentence, globalConfig));
+    }
+    
+    return new DecoderConfig(
+        globalConfig.getFlags(),
+        globalConfig.getFeatureFunctions(),
+        grammars.addAll(globalConfig.getGrammars()).build(),
+        globalConfig.getCustomGrammar(),
+        globalConfig.getVocabulary(),
+        globalConfig.getWeights(),
+        globalConfig.getFeatureMap(),
+        globalConfig.getOwnerMap());
   }
 
   /**
@@ -206,248 +248,105 @@ public class Decoder {
     StatefulFF.resetGlobalStateIndex();
   }
 
-  public static void writeConfigFile(double[] newWeights, String template, 
String outputFile,
-      String newDiscriminativeModel) {
-    try {
-      int columnID = 0;
-
-      try (LineReader reader = new LineReader(template);
-           BufferedWriter writer = FileUtility.getWriteFileStream(outputFile)) 
{
-        for (String line : reader) {
-          line = line.trim();
-          if (Regex.commentOrEmptyLine.matches(line) || line.contains("=")) {
-            // comment, empty line, or parameter lines: just copy
-            writer.write(line);
-            writer.newLine();
-
-          } else { // models: replace the weight
-            String[] fds = Regex.spaces.split(line);
-            StringBuilder newSent = new StringBuilder();
-            if (!Regex.floatingNumber.matches(fds[fds.length - 1])) {
-              throw new IllegalArgumentException("last field is not a number; 
the field is: "
-                      + fds[fds.length - 1]);
-            }
-
-            if (newDiscriminativeModel != null && 
"discriminative".equals(fds[0])) {
-              newSent.append(fds[0]).append(' ');
-              newSent.append(newDiscriminativeModel).append(' ');// change the
-              // file name
-              for (int i = 2; i < fds.length - 1; i++) {
-                newSent.append(fds[i]).append(' ');
-              }
-            } else {// regular
-              for (int i = 0; i < fds.length - 1; i++) {
-                newSent.append(fds[i]).append(' ');
-              }
-            }
-            if (newWeights != null)
-              newSent.append(newWeights[columnID++]);// change the weight
-            else
-              newSent.append(fds[fds.length - 1]);// do not change
-
-            writer.write(newSent.toString());
-            writer.newLine();
-          }
-        }
-      }
-
-      if (newWeights != null && columnID != newWeights.length) {
-        throw new IllegalArgumentException("number of models does not match 
number of weights");
-      }
-
-    } catch (IOException e) {
-      e.printStackTrace();
-    }
-  }
-
   /**
-   * Initialize all parts of the JoshuaDecoder.
+   * Initialize all parts of the Decoder.
    */
-  private void initialize() {
-    try {
-
-      long pre_load_time = System.currentTimeMillis();
-      resetGlobalState();
-
-      /* Weights can be listed in a separate file (denoted by parameter 
"weights-file") or directly
-       * in the Joshua config file. Config file values take precedent.
-       */
-      this.readWeights(joshuaConfiguration.weights_file);
-
-
-      /* Add command-line-passed weights to the weights array for processing 
below */
-      if (!Strings.isNullOrEmpty(joshuaConfiguration.weight_overwrite)) {
-        String[] tokens = joshuaConfiguration.weight_overwrite.split("\\s+");
-        for (int i = 0; i < tokens.length; i += 2) {
-          String feature = tokens[i];
-          float value = Float.parseFloat(tokens[i+1]);
-
-          if (joshuaConfiguration.moses)
-            feature = demoses(feature);
-
-          joshuaConfiguration.weights.add(String.format("%s %s", feature, 
tokens[i+1]));
-          LOG.info("COMMAND LINE WEIGHT: {} -> {}", feature, value);
-        }
-      }
-
-      /* Read the weights found in the config file */
-      for (String pairStr: joshuaConfiguration.weights) {
-        String pair[] = pairStr.split("\\s+");
-
-        /* Sanity check for old-style unsupported feature invocations. */
-        if (pair.length != 2) {
-          String errMsg = "FATAL: Invalid feature weight line found in config 
file.\n" +
-              String.format("The line was '%s'\n", pairStr) +
-              "You might be using an old version of the config file that is no 
longer supported\n" +
-              "Check joshua.apache.org or email d...@joshua.apache.org for 
help\n" +
-              "Code = " + 17;
-          throw new RuntimeException(errMsg);
-        }
-
-        weights.add(hashFeature(pair[0]), Float.parseFloat(pair[1]));
-      }
-
-      LOG.info("Read {} weights", weights.size());
-
-      // Do this before loading the grammars and the LM.
-      this.featureFunctions.clear();
-
-      // Initialize and load grammars. This must happen first, since the vocab 
gets defined by
-      // the packed grammar (if any)
-      this.initializeTranslationGrammars();
-      LOG.info("Grammar loading took: {} seconds.",
-          (System.currentTimeMillis() - pre_load_time) / 1000);
-
-      // Initialize the features: requires that LM model has been initialized.
-      this.initializeFeatureFunctions();
-
-      // This is mostly for compatibility with the Moses tuning script
-      if (joshuaConfiguration.show_weights_and_quit) {
-        for (Entry<Integer, Float> entry : weights.entrySet()) {
-          System.out.println(String.format("%s=%.5f", 
FeatureMap.getFeature(entry.getKey()), entry.getValue()));
-        }
-        // TODO (fhieber): this functionality should not be in main Decoder 
class and simply exit.
-        System.exit(0);
-      }
-
-      // Sort the TM grammars (needed to do cube pruning)
-      if (joshuaConfiguration.amortized_sorting) {
-        LOG.info("Grammar sorting happening lazily on-demand.");
-      } else {
-        long pre_sort_time = System.currentTimeMillis();
-        for (Grammar grammar : this.grammars) {
-          grammar.sortGrammar(this.featureFunctions);
-        }
-        LOG.info("Grammar sorting took {} seconds.",
-            (System.currentTimeMillis() - pre_sort_time) / 1000);
+  private DecoderConfig initialize(final Config config) {
+    
+    LOG.info("Initializing decoder ...");
+    long initTime = System.currentTimeMillis();
+    
+    /*
+     * (1) read weights (denoted by parameter "weights-file")
+     * or directly in the Joshua config. Config file values take precedent.
+     */
+    final FeatureVector weights = readWeights(config);
+    
+    /*
+     * (2) initialize/instantiate translation grammars
+     * Unfortunately this can not be static due to customPhraseTable member.
+     */
+    final List<Grammar> grammars = initializeTranslationGrammars(config);
+    final Grammar customGrammar = 
createCustomGrammar(SearchAlgorithm.valueOf(config.getString("search_algorithm")));
+    grammars.add(customGrammar);
+    
+    /*
+     * (3) initialize/instantiate feature functions 
+     */
+    final ImmutableList<FeatureFunction> featureFunctions = 
initializeFeatureFunctions(config, grammars, weights);
+    
+    /*
+     * (4) Optionally sort the grammars for cube-pruning
+     */
+    if (config.getBoolean("amortized_sorting")) {
+      LOG.info("Grammar sorting happening lazily on-demand.");
+    } else {
+      long preSortTime = System.currentTimeMillis();
+      for (final Grammar grammar : grammars) {
+        grammar.sortGrammar(featureFunctions);
       }
-
-    } catch (IOException e) {
-      LOG.warn(e.getMessage(), e);
+      LOG.info("Grammar sorting took {} seconds.", (System.currentTimeMillis() 
- preSortTime) / 1000);
     }
+    
+    LOG.info("Initialization done ({} seconds)", (System.currentTimeMillis() - 
initTime) / 1000);
+    // TODO(fhieber): right now we still rely on static variables for vocab 
etc.
+    // this should be changed and then we pass the instance of vocab etc. in 
here
+    return new DecoderConfig(
+        config,
+        featureFunctions,
+        ImmutableList.copyOf(grammars),
+        customGrammar,
+        null,
+        weights,
+        null,
+        null);
   }
 
   /**
-   * Initializes translation grammars Retained for backward compatibility
-   *
-   * @throws IOException Several grammar elements read from disk that can
-   * cause IOExceptions.
+   * Returns a list of initialized {@link Grammar}s
    */
-  private void initializeTranslationGrammars() throws IOException {
-
-    if (joshuaConfiguration.tms.size() > 0) {
-
-      // collect packedGrammars to check if they use a shared vocabulary
-      final List<PackedGrammar> packed_grammars = new ArrayList<>();
-
-      // tm = {thrax/hiero,packed,samt,moses} OWNER LIMIT FILE
-      for (String tmLine : joshuaConfiguration.tms) {
-
-        String type = tmLine.substring(0,  tmLine.indexOf(' '));
-        String[] args = tmLine.substring(tmLine.indexOf(' 
')).trim().split("\\s+");
-        HashMap<String, String> parsedArgs = FeatureFunction.parseArgs(args);
-
-        String owner = parsedArgs.get("owner");
-        int span_limit = Integer.parseInt(parsedArgs.get("maxspan"));
-        String path = parsedArgs.get("path");
-
-        Grammar grammar;
-        if (! type.equals("moses") && ! type.equals("phrase")) {
-          if (new File(path).isDirectory()) {
-            try {
-              PackedGrammar packed_grammar = new PackedGrammar(path, 
span_limit, owner, type, joshuaConfiguration);
-              packed_grammars.add(packed_grammar);
-              grammar = packed_grammar;
-            } catch (FileNotFoundException e) {
-              String msg = String.format("Couldn't load packed grammar from 
'%s'", path)
-                  + "Perhaps it doesn't exist, or it may be an old packed file 
format.";
-              throw new RuntimeException(msg);
-            }
-          } else {
-            // thrax, hiero, samt
-            grammar = new MemoryBasedBatchGrammar(type, path, owner,
-                joshuaConfiguration.default_non_terminal, span_limit, 
joshuaConfiguration);
-          }
-
-        } else {
-
-          joshuaConfiguration.search_algorithm = "stack";
-          grammar = new PhraseTable(path, owner, type, joshuaConfiguration);
-        }
-
-        this.grammars.add(grammar);
+  private List<Grammar> initializeTranslationGrammars(final Config config) {
+    
+    final List<Grammar> result = new ArrayList<>();
+    
+    // collect packedGrammars to check if they use a shared vocabulary
+    final List<PackedGrammar> packedGrammars = new ArrayList<>();
+    
+    final long startTime = System.currentTimeMillis();
+    
+    for (final Config grammarConfig : config.getConfigList("grammars")) {
+      final Class<?> clazz = 
getClassFromPackages(grammarConfig.getString("class"), GRAMMAR_PACKAGES);
+      try {
+        final Constructor<?> constructor = clazz.getConstructor(Config.class);
+        final Grammar grammar = (Grammar) 
constructor.newInstance(grammarConfig);
+        result.add(grammar);
+      } catch (Exception e) {
+        LOG.error("Unable to instantiate grammar '{}'", clazz.getName());
+        Throwables.propagate(e);
       }
-
-      checkSharedVocabularyChecksumsForPackedGrammars(packed_grammars);
-
-    } else {
-      LOG.warn("no grammars supplied!  Supplying dummy glue grammar.");
-      MemoryBasedBatchGrammar glueGrammar = new 
MemoryBasedBatchGrammar("glue", joshuaConfiguration, -1);
-      glueGrammar.addGlueRules(featureFunctions);
-      this.grammars.add(glueGrammar);
-    }
-
-    /* Add the grammar for custom entries */
-    if (joshuaConfiguration.search_algorithm.equals("stack"))
-      this.customPhraseTable = new PhraseTable("custom", joshuaConfiguration);
-    else
-      this.customPhraseTable = new MemoryBasedBatchGrammar("custom", 
joshuaConfiguration, 20);
-    this.grammars.add(this.customPhraseTable);
-
-    /* Create an epsilon-deleting grammar */
-    if (joshuaConfiguration.lattice_decoding) {
-      LOG.info("Creating an epsilon-deleting grammar");
-      MemoryBasedBatchGrammar latticeGrammar = new 
MemoryBasedBatchGrammar("lattice", joshuaConfiguration, -1);
-      HieroFormatReader reader = new 
HieroFormatReader(OwnerMap.register("lattice"));
-
-      String goalNT = 
FormatUtils.cleanNonTerminal(joshuaConfiguration.goal_symbol);
-      String defaultNT = 
FormatUtils.cleanNonTerminal(joshuaConfiguration.default_non_terminal);
-
-      //FIXME: arguments changed to match string format on best effort basis.  
Author please review.
-      String ruleString = String.format("[%s] ||| [%s,1] <eps> ||| [%s,1] ||| 
", goalNT, defaultNT, defaultNT);
-
-      Rule rule = reader.parseLine(ruleString);
-      latticeGrammar.addRule(rule);
-      rule.estimateRuleCost(featureFunctions);
-
-      this.grammars.add(latticeGrammar);
     }
-
-    /* Now create a feature function for each owner */
-    final Set<OwnerId> ownersSeen = new HashSet<>();
-
-    for (Grammar grammar: this.grammars) {
-      OwnerId owner = grammar.getOwner();
-      if (! ownersSeen.contains(owner)) {
-        this.featureFunctions.add(
-            new PhraseModel(
-                weights, new String[] { "tm", "-owner", getOwner(owner) }, 
joshuaConfiguration, grammar));
-        ownersSeen.add(owner);
+    
+    if (result.isEmpty()) {
+      
+      LOG.warn("no grammars supplied! Supplying dummy glue grammar.");
+      result.add(createGlueTextGrammar(
+          config.getString("goal_symbol"),
+          config.getString("default_non_terminal")));
+      
+    } else {
+      
+      checkSharedVocabularyChecksumsForPackedGrammars(packedGrammars);
+      
+      if (config.getBoolean("lattice_decoding")) {
+        LOG.info("Creating an epsilon-deleting grammar");
+        result.add(TextGrammarFactory.addEpsilonDeletingGrammar(
+            config.getString("goal_symbol"),
+            config.getString("default_non_terminal")));
       }
     }
-
-    LOG.info("Memory used {} MB",
-        ((Runtime.getRuntime().totalMemory() - 
Runtime.getRuntime().freeMemory()) / 1000000.0));
+    
+    LOG.info("Grammar loading took: {} seconds.", (System.currentTimeMillis() 
- startTime) / 1000);
+    return result;
   }
 
   /**
@@ -470,44 +369,55 @@ public class Decoder {
     }
   }
 
-  /*
-   * This function reads the weights for the model. Feature names and their 
weights are listed one
-   * per line in the following format:
-   *
-   * FEATURE_NAME WEIGHT
-   */
-  private void readWeights(String fileName) {
-    Decoder.weights = new FeatureVector(5);
-
-    if (fileName.equals(""))
-      return;
-
-    try (LineReader lineReader = new LineReader(fileName);) {
-      for (String line : lineReader) {
-        line = line.replaceAll(spaceSeparator, " ");
-
-        if (line.equals("") || line.startsWith("#") || line.startsWith("//")
-            || line.indexOf(' ') == -1)
-          continue;
+  /**
+    * This function reads the weights for the model either
+    * from the weights_file in format
+    * NAME VALUE
+    * once per line;
+    * or from the weights section in the {@link Config} object.
+    * The latter take precedence.
+    */
+  private static FeatureVector readWeights(final Config config) {
+    final FeatureVector weights = new FeatureVector(5);
+    
+    // read from optional weights_file
+    if (config.hasPath("weights_file")
+        && new File(config.getString("weights_file")).exists()) {
+      final String weightsFilename = config.getString("weights_file");
+      try (LineReader lineReader = new LineReader(weightsFilename);) {
+        for (String line : lineReader) {
+          line = line.replaceAll(spaceSeparator, " ");
+          if (line.equals("") || line.startsWith("#") || line.startsWith("//") 
|| line.indexOf(' ') == -1) {
+            continue;
+          }
+          final String tokens[] = line.split(spaceSeparator);
+          String feature = tokens[0];
+          final float value = Float.parseFloat(tokens[1]);
 
-        String tokens[] = line.split(spaceSeparator);
-        String feature = tokens[0];
-        Float value = Float.parseFloat(tokens[1]);
+          // Kludge for compatibility with Moses tuners
+          if (config.getBoolean("moses")) {
+            feature = demoses(feature);
+          }
 
-        // Kludge for compatibility with Moses tuners
-        if (joshuaConfiguration.moses) {
-          feature = demoses(feature);
+          weights.put(hashFeature(feature), value);
         }
-
-        weights.add(hashFeature(feature), value);
+        LOG.info("Read {} weights from file '{}'", weights.size(), 
weightsFilename);
+      } catch (IOException e) {
+        Throwables.propagate(e);
       }
-    } catch (IOException ioe) {
-      throw new RuntimeException(ioe);
     }
-    LOG.info("Read {} weights from file '{}'", weights.size(), fileName);
+    
+    // overwrite with config values
+    for (Entry<String, ConfigValue> entry : 
config.getConfig("weights").entrySet()) {
+      final String name = entry.getKey();
+      float value = ((Number) entry.getValue().unwrapped()).floatValue();
+      weights.put(hashFeature(name), value);
+    }
+    LOG.info("Read {} weights", weights.size());
+    return weights;
   }
 
-  private String demoses(String feature) {
+  private static String demoses(String feature) {
     if (feature.endsWith("="))
       feature = feature.replace("=", "");
     if (feature.equals("OOV_Penalty"))
@@ -518,60 +428,58 @@ public class Decoder {
   }
 
   /**
-   * Feature functions are instantiated with a line of the form
-   *
-   * <pre>
-   *   FEATURE OPTIONS
-   * </pre>
-   *
-   * Weights for features are listed separately.
-   *
+   * Initializes & instantiates feature functions.
+   * Required a list of previously loaded grammars to instantiate the 
PhraseModel feature function
+   * as well.
    */
-  private void initializeFeatureFunctions() {
-
-    for (String featureLine : joshuaConfiguration.features) {
-      // line starts with NAME, followed by args
-      // 1. create new class named NAME, pass it config, weights, and the args
-
-      String fields[] = featureLine.split("\\s+");
-      String featureName = fields[0];
-
+  private static ImmutableList<FeatureFunction> initializeFeatureFunctions(
+      final Config config, final List<Grammar> grammars, final FeatureVector 
weights) {
+    
+    final ImmutableList.Builder<FeatureFunction> result = new 
ImmutableList.Builder<>();
+    
+    // (1) create PhraseModel feature function for every owner
+    final Set<OwnerId> ownersSeen = new HashSet<>();
+    for (final Grammar grammar: grammars) {
+      final OwnerId owner = grammar.getOwner();
+      if (!ownersSeen.contains(owner)) {
+        result.add(new PhraseModel(owner, ConfigFactory.empty(), weights));
+        ownersSeen.add(owner);
+      }
+    }
+    
+    // (2) instantiate other feature functions by class name
+    for (Config featureConfig : config.getConfigList("feature_functions")) {
+      final Class<?> clazz = 
getClassFromPackages(featureConfig.getString("class"), FEATURE_PACKAGES);
       try {
-
-        Class<?> clas = getFeatureFunctionClass(featureName);
-        Constructor<?> constructor = clas.getConstructor(FeatureVector.class,
-            String[].class, JoshuaConfiguration.class);
-        FeatureFunction feature = (FeatureFunction) 
constructor.newInstance(weights, fields, joshuaConfiguration);
-        this.featureFunctions.add(feature);
-
+        final Constructor<?> constructor = clazz.getConstructor(Config.class, 
FeatureVector.class);
+        final FeatureFunction feature = (FeatureFunction) 
constructor.newInstance(featureConfig, weights);
+        result.add(feature);
       } catch (Exception e) {
-        throw new RuntimeException(String.format("Unable to instantiate 
feature function '%s'!", featureLine), e);
+        LOG.error("Unable to instantiate feature '{}'", clazz.getName());
+        Throwables.propagate(e);
       }
     }
-
-    for (FeatureFunction feature : featureFunctions) {
+    
+    final ImmutableList<FeatureFunction> features = result.build(); 
+    for (final FeatureFunction feature : features) {
       LOG.info("FEATURE: {}", feature.logString());
     }
+    return features;
   }
-
+  
   /**
-   * Searches a list of predefined paths for classes, and returns the first 
one found. Meant for
-   * instantiating feature functions.
-   *
-   * @param featureName Class name of the feature to return.
-   * @return the class, found in one of the search paths
+   * Searches a list of paths for classes and returns the first one found.
+   * Used for instantiating grammars and feature functions.
    */
-  private Class<?> getFeatureFunctionClass(String featureName) {
+  private static Class<?> getClassFromPackages(String className, 
ImmutableList<String> packages) {
     Class<?> clas = null;
-
-    String[] packages = { "org.apache.joshua.decoder.ff", 
"org.apache.joshua.decoder.ff.lm", "org.apache.joshua.decoder.ff.phrase" };
     for (String path : packages) {
       try {
-        clas = Class.forName(String.format("%s.%s", path, featureName));
+        clas = Class.forName(String.format("%s.%s", path, className));
         break;
       } catch (ClassNotFoundException e) {
         try {
-          clas = Class.forName(String.format("%s.%sFF", path, featureName));
+          clas = Class.forName(String.format("%s.%sFF", path, className));
           break;
         } catch (ClassNotFoundException e2) {
           // do nothing
@@ -587,11 +495,11 @@ public class Decoder {
    * @param rule the rule to add
    */
   public void addCustomRule(Rule rule) {
-    customPhraseTable.addRule(rule);
-    rule.estimateRuleCost(featureFunctions);
+    decoderConfig.getCustomGrammar().addRule(rule);
+    rule.estimateRuleCost(decoderConfig.getFeatureFunctions());
   }
 
   public Grammar getCustomPhraseTable() {
-    return customPhraseTable;
+    return decoderConfig.getCustomGrammar();
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/DecoderConfig.java
----------------------------------------------------------------------
diff --git 
a/joshua-core/src/main/java/org/apache/joshua/decoder/DecoderConfig.java 
b/joshua-core/src/main/java/org/apache/joshua/decoder/DecoderConfig.java
new file mode 100644
index 0000000..71ee40c
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/DecoderConfig.java
@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder;
+
+import org.apache.joshua.corpus.Vocabulary;
+import org.apache.joshua.decoder.ff.FeatureFunction;
+import org.apache.joshua.decoder.ff.FeatureMap;
+import org.apache.joshua.decoder.ff.FeatureVector;
+import org.apache.joshua.decoder.ff.tm.Grammar;
+import org.apache.joshua.decoder.ff.tm.OwnerMap;
+
+import com.google.common.collect.ImmutableList;
+import com.typesafe.config.Config;
+
+/**
+ * This is the central config/state object that holds references to relevant 
attributes
+ * of the decoder (features, grammars, etc.).
+ * A sentence-specific instance of this object is created before
+ * translating a single sentence.
+ * 
+ * @author Felix Hieber, felix.hie...@gmail.com
+ */
+public class DecoderConfig {
+  
+  /** Decoder flags */ 
+  private final Config flags;
+  
+  /** Decoder feature functions */
+  private final ImmutableList<FeatureFunction> featureFunctions;
+  
+  /** Decoder grammars/phrase tables */
+  private final ImmutableList<Grammar> grammars;
+  
+  /** Decoder custom grammar where rules can be added */
+  private final Grammar customGrammar;
+  
+  /** Decoder vocabulary */
+  private final Vocabulary vocabulary;
+  
+  /** Decoder weights */
+  private final FeatureVector weights;
+  
+  /** Decoder feature mapping */
+  private final FeatureMap featureMap;
+  
+  /** Decoder grammar owner mapping */
+  private final OwnerMap ownerMap;
+  
+  private final SearchAlgorithm searchAlgorithm;
+  
+  public DecoderConfig(
+      final Config flags,
+      final ImmutableList<FeatureFunction> featureFunctions,
+      final ImmutableList<Grammar> grammars,
+      final Grammar customGrammar,
+      final Vocabulary vocabulary,
+      final FeatureVector weights,
+      final FeatureMap featureMap,
+      final OwnerMap ownerMap) {
+    this.flags = flags;
+    this.featureFunctions = featureFunctions;
+    this.grammars = grammars;
+    this.customGrammar = customGrammar;
+    this.vocabulary = vocabulary;
+    this.weights = weights;
+    this.featureMap = featureMap;
+    this.ownerMap = ownerMap;
+    this.searchAlgorithm = 
SearchAlgorithm.valueOf(flags.getString("search_algorithm"));
+  }
+
+  public Config getFlags() {
+    return flags;
+  }
+
+  public ImmutableList<FeatureFunction> getFeatureFunctions() {
+    return featureFunctions;
+  }
+
+  public ImmutableList<Grammar> getGrammars() {
+    return grammars;
+  }
+  
+  public Grammar getCustomGrammar() {
+    return customGrammar;
+  }
+
+  public Vocabulary getVocabulary() {
+    return vocabulary;
+  }
+
+  public FeatureVector getWeights() {
+    return weights;
+  }
+
+  public FeatureMap getFeatureMap() {
+    return featureMap;
+  }
+
+  public OwnerMap getOwnerMap() {
+    return ownerMap;
+  }
+  
+  public SearchAlgorithm getSearchAlgorithm() {
+    return searchAlgorithm;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/DecoderTask.java
----------------------------------------------------------------------
diff --git 
a/joshua-core/src/main/java/org/apache/joshua/decoder/DecoderTask.java 
b/joshua-core/src/main/java/org/apache/joshua/decoder/DecoderTask.java
index 0c7a76b..d820d7d 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/DecoderTask.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/DecoderTask.java
@@ -18,22 +18,21 @@
  */
 package org.apache.joshua.decoder;
 
-import java.util.ArrayList;
-import java.util.List;
-
+import org.apache.joshua.corpus.Vocabulary;
 import org.apache.joshua.decoder.chart_parser.Chart;
-import org.apache.joshua.decoder.ff.FeatureFunction;
-import org.apache.joshua.decoder.ff.SourceDependentFF;
 import org.apache.joshua.decoder.ff.tm.Grammar;
 import org.apache.joshua.decoder.hypergraph.ForestWalker;
 import org.apache.joshua.decoder.hypergraph.GrammarBuilderWalkerFunction;
 import org.apache.joshua.decoder.hypergraph.HyperGraph;
 import org.apache.joshua.decoder.phrase.Stacks;
 import org.apache.joshua.decoder.segment_file.Sentence;
-import org.apache.joshua.corpus.Vocabulary;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import com.google.common.collect.ImmutableMap;
+import com.typesafe.config.Config;
+import com.typesafe.config.ConfigFactory;
+
 /**
  * This class handles decoding of individual Sentence objects (which can 
represent plain sentences
  * or lattices). A single sentence can be decoded by a call to translate() 
and, if an InputHandler
@@ -41,122 +40,110 @@ import org.slf4j.LoggerFactory;
  * translateAll(), which continually queries the InputHandler for sentences 
until they have all been
  * consumed and translated.
  * 
- * The DecoderFactory class is responsible for launching the threads.
- * 
  * @author Matt Post p...@cs.jhu.edu
  * @author Zhifei Li, zhifei.w...@gmail.com
+ * @author Felix Hieber, felix.hie...@gmail.com
  */
-
 public class DecoderTask {
+  
   private static final Logger LOG = LoggerFactory.getLogger(DecoderTask.class);
-
-  private final JoshuaConfiguration joshuaConfiguration;
-  /*
-   * these variables may be the same across all threads (e.g., just copy from 
DecoderFactory), or
-   * differ from thread to thread
+  
+  /** sentence-specific DecoderConfig,
+   * mostly shared with the global decoderConfig, but can have adaptations
    */
-  private final List<Grammar> allGrammars;
-  private final List<FeatureFunction> featureFunctions;
-
-  public DecoderTask(List<Grammar> grammars, List<FeatureFunction> 
featureFunctions,
-                       JoshuaConfiguration joshuaConfiguration) {
-
-    this.joshuaConfiguration = joshuaConfiguration;
-    this.allGrammars = grammars;
-
-    this.featureFunctions = new ArrayList<>();
-    for (FeatureFunction ff : featureFunctions) {
-      if (ff instanceof SourceDependentFF) {
-        this.featureFunctions.add(((SourceDependentFF) ff).clone());
-      } else {
-        this.featureFunctions.add(ff);
-      }
-    }
+  private final DecoderConfig sentenceConfig;
+  private final Sentence sentence;
+  private final boolean segmentOovs;
+  private final boolean useDotChart;
+  private final boolean doParsing;
+
+  public DecoderTask(final DecoderConfig sentenceConfig, final Sentence 
sentence) {
+    this.sentenceConfig = sentenceConfig;
+    this.sentence = sentence;
+    this.segmentOovs = sentenceConfig.getFlags().getBoolean("segment_oovs");
+    this.useDotChart = sentenceConfig.getFlags().getBoolean("use_dot_chart");
+    this.doParsing = sentenceConfig.getFlags().getBoolean("parse");
   }
 
   /**
-   * Translate a sentence.
-   * 
-   * @param sentence The sentence to be translated.
-   * @return the sentence {@link org.apache.joshua.decoder.Translation}
+   * Translate the sentence.
+   * @return translation of the sentence {@link 
org.apache.joshua.decoder.Translation}
    */
-  public Translation translate(Sentence sentence) {
+  public Translation translate() {
 
     LOG.info("Input {}: {}", sentence.id(), sentence.fullSource());
 
-    if (sentence.target() != null)
+    if (sentence.target() != null) {
       LOG.info("Input {}: Constraining to target sentence '{}'",
           sentence.id(), sentence.target());
+    }
 
     // skip blank sentences
     if (sentence.isEmpty()) {
       LOG.info("Translation {}: Translation took 0 seconds", sentence.id());
-      return new Translation(sentence, null, featureFunctions, 
joshuaConfiguration);
+      return new Translation(sentence, null, sentenceConfig);
     }
 
     long startTime = System.currentTimeMillis();
 
-    int numGrammars = allGrammars.size();
-    Grammar[] grammars = new Grammar[numGrammars];
-
-    for (int i = 0; i < allGrammars.size(); i++)
-      grammars[i] = allGrammars.get(i);
-
-    if (joshuaConfiguration.segment_oovs)
-      sentence.segmentOOVs(grammars);
+    // TODO(fhieber): this should be done in the constructor maybe?
+    // But it should not modify the sentence object.
+    if (segmentOovs) {
+      sentence.segmentOOVs(sentenceConfig.getGrammars());
+    }
 
     /*
      * Joshua supports (as of September 2014) both phrase-based and 
hierarchical decoding. Here
      * we build the appropriate chart. The output of both systems is a 
hypergraph, which is then
      * used for further processing (e.g., k-best extraction).
      */
-    HyperGraph hypergraph;
-    try {
-
-      if (joshuaConfiguration.search_algorithm.equals("stack")) {
-        Stacks stacks = new Stacks(sentence, this.featureFunctions, grammars, 
joshuaConfiguration);
+    final HyperGraph hypergraph = createHypergraph();
 
-        hypergraph = stacks.search();
-      } else {
-        /* Seeding: the chart only sees the grammars, not the factories */
-        Chart chart = new Chart(sentence, this.featureFunctions, grammars,
-            joshuaConfiguration.goal_symbol, joshuaConfiguration);
+    float decodingTime = (System.currentTimeMillis() - startTime) / 1000.0f;
+    float usedMemory = (Runtime.getRuntime().totalMemory() - 
Runtime.getRuntime().freeMemory()) / 1000000.0f;
+    LOG.info("Input {}: Translation took {} seconds", sentence.id(), 
decodingTime);
+    LOG.info("Input {}: Memory used is {} MB", sentence.id(), usedMemory);
 
-        hypergraph = (joshuaConfiguration.use_dot_chart) 
-            ? chart.expand() 
-                : chart.expandSansDotChart();
+    /* Return the translation unless we're doing synchronous parsing. */
+    if (!doParsing || hypergraph == null) {
+      return new Translation(sentence, hypergraph, sentenceConfig);
+    } else {
+      return parse(hypergraph);
+    }
+  }
+  
+  private HyperGraph createHypergraph() {
+    try {
+      switch (sentenceConfig.getSearchAlgorithm()) {
+      case stack:
+        final Stacks stacks = new Stacks(sentence, sentenceConfig);
+        return stacks.search();
+      case cky:
+        final Chart chart = new Chart(sentence, sentenceConfig);
+        return useDotChart ? chart.expand() : chart.expandSansDotChart();
+      default:
+        return null;
       }
-
     } catch (java.lang.OutOfMemoryError e) {
-      LOG.error("Input {}: out of memory", sentence.id());
-      hypergraph = null;
+      return null;
     }
-
-    float seconds = (System.currentTimeMillis() - startTime) / 1000.0f;
-    LOG.info("Input {}: Translation took {} seconds", sentence.id(), seconds);
-    LOG.info("Input {}: Memory used is {} MB", sentence.id(), (Runtime
-        .getRuntime().totalMemory() - Runtime.getRuntime().freeMemory()) / 
1000000.0);
-
-    /* Return the translation unless we're doing synchronous parsing. */
-    if (!joshuaConfiguration.parse || hypergraph == null) {
-      return new Translation(sentence, hypergraph, featureFunctions, 
joshuaConfiguration);
-    }
-
-    /*
-     * Synchronous parsing.
-     * 
-     * Step 1. Traverse the hypergraph to create a grammar for the second-pass 
parse.
-     */
-    Grammar newGrammar = 
getGrammarFromHyperGraph(joshuaConfiguration.goal_symbol, hypergraph);
-    newGrammar.sortGrammar(this.featureFunctions);
+  }
+  
+  /**
+   * Synchronous parsing.
+   */
+  private Translation parse(final HyperGraph hypergraph) {
+    long startTime = System.currentTimeMillis();
+    // Step 1. Traverse the hypergraph to create a grammar for the second-pass 
parse.
+    final Grammar newGrammar = 
getGrammarFromHyperGraph(sentenceConfig.getFlags().getString("goal_symbol"), 
hypergraph);
+    newGrammar.sortGrammar(sentenceConfig.getFeatureFunctions());
     long sortTime = System.currentTimeMillis();
     LOG.info("Sentence {}: New grammar has {} rules.", sentence.id(),
         newGrammar.getNumRules());
 
     /* Step 2. Create a new chart and parse with the instantiated grammar. */
-    Grammar[] newGrammarArray = new Grammar[] { newGrammar };
-    Sentence targetSentence = new Sentence(sentence.target(), sentence.id(), 
joshuaConfiguration);
-    Chart chart = new Chart(targetSentence, featureFunctions, newGrammarArray, 
"GOAL",joshuaConfiguration);
+    final Sentence targetSentence = new Sentence(sentence.target(), 
sentence.id(), sentenceConfig.getFlags());
+    final Chart chart = new Chart(targetSentence, sentenceConfig);
     int goalSymbol = GrammarBuilderWalkerFunction.goalSymbol(hypergraph);
     String goalSymbolString = Vocabulary.word(goalSymbol);
     LOG.info("Sentence {}: goal symbol is {} ({}).", sentence.id(),
@@ -172,14 +159,17 @@ public class DecoderTask {
         (secondParseTime - startTime) / 1000);
     LOG.info("Memory used after sentence {} is {} MB", sentence.id(), (Runtime
         .getRuntime().totalMemory() - Runtime.getRuntime().freeMemory()) / 
1000000.0);
-    return new Translation(sentence, englishParse, featureFunctions, 
joshuaConfiguration); // or do something else
+    return new Translation(sentence, englishParse, sentenceConfig); // or do 
something else
   }
 
-  private Grammar getGrammarFromHyperGraph(String goal, HyperGraph hg) {
-    GrammarBuilderWalkerFunction f = new 
GrammarBuilderWalkerFunction(goal,joshuaConfiguration,
-            "pt");
+  private static Grammar getGrammarFromHyperGraph(String goal, HyperGraph hg) {
+    final Config grammarConfig = ConfigFactory.parseMap(
+        ImmutableMap.of("owner", "pt", "span_limit", "1000"), "");
+    GrammarBuilderWalkerFunction f = new GrammarBuilderWalkerFunction(
+        goal, grammarConfig);
     ForestWalker walker = new ForestWalker();
     walker.walk(hg.goalNode, f);
     return f.getGrammar();
   }
+  
 }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/InputType.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/InputType.java 
b/joshua-core/src/main/java/org/apache/joshua/decoder/InputType.java
new file mode 100644
index 0000000..e04da79
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/InputType.java
@@ -0,0 +1,4 @@
+package org.apache.joshua.decoder;
+
+/* Determines whether to expect JSON input or plain lines */
+public enum InputType { plain, json }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/JoshuaConfiguration.java
----------------------------------------------------------------------
diff --git 
a/joshua-core/src/main/java/org/apache/joshua/decoder/JoshuaConfiguration.java 
b/joshua-core/src/main/java/org/apache/joshua/decoder/JoshuaConfiguration.java
deleted file mode 100644
index ddf24ea..0000000
--- 
a/joshua-core/src/main/java/org/apache/joshua/decoder/JoshuaConfiguration.java
+++ /dev/null
@@ -1,738 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.joshua.decoder;
-
-import static org.apache.joshua.util.Constants.TM_PREFIX;
-import static org.apache.joshua.util.FormatUtils.cleanNonTerminal;
-import static org.apache.joshua.util.FormatUtils.ensureNonTerminalBrackets;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.PrintWriter;
-import java.util.ArrayList;
-import java.util.Collections;
-
-import org.apache.joshua.decoder.ff.StatefulFF;
-import org.apache.joshua.decoder.ff.fragmentlm.Tree;
-import org.apache.joshua.util.FormatUtils;
-import org.apache.joshua.util.Regex;
-import org.apache.joshua.util.io.LineReader;
-import org.apache.log4j.Level;
-import org.apache.log4j.LogManager;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * Configuration file for Joshua decoder.
- *
- * When adding new features to Joshua, any new configurable parameters should 
be added to this
- * class.
- *
- * @author Zhifei Li, zhifei.w...@gmail.com
- * @author Matt Post p...@cs.jhu.edu
- */
-public class JoshuaConfiguration {
-
-  private static final Logger LOG = 
LoggerFactory.getLogger(JoshuaConfiguration.class);
-
-  // whether to construct a StructuredTranslation object for each request 
instead of
-  // printing to stdout. Used when the Decoder is used from Java directly.
-  public Boolean use_structured_output = false;
-
-  // If set to true, Joshua will lowercase the input, creating an annotation 
that marks the
-  // original case
-  public boolean lowercase = false;
-
-  // If set to true, Joshua will recapitalize the output by projecting the 
case from aligned
-  // source-side words
-  public boolean project_case = false;
-
-  // List of grammar files to read
-  public ArrayList<String> tms = new ArrayList<>();
-
-  // A rule cache for commonly used tries to avoid excess object allocations
-  // Testing shows there's up to ~95% hit rate when cache size is 5000 Trie 
nodes.
-  public Integer cachedRuleSize = 5000;
-
-  /*
-   * The file to read the weights from (part of the sparse features 
implementation). Weights can
-   * also just be listed in the main config file.
-   */
-  public String weights_file = "";
-  // Default symbols. The symbol here should be enclosed in square brackets.
-  public String default_non_terminal = 
FormatUtils.ensureNonTerminalBrackets("X");
-  public String goal_symbol = FormatUtils.ensureNonTerminalBrackets("GOAL");
-
-  /*
-   * A list of OOV symbols in the form
-   *
-   * [X1] weight [X2] weight [X3] weight ...
-   *
-   * where the [X] symbols are nonterminals and the weights are weights. For 
each OOV word w in the
-   * input sentence, Joshua will create rules of the form
-   *
-   * X1 -> w (weight)
-   *
-   * If this is empty, an unweighted default_non_terminal is used.
-   */
-  public class OOVItem implements Comparable<OOVItem> {
-    public final String label;
-
-    public final float weight;
-
-    OOVItem(String l, float w) {
-      label = l;
-      weight = w;
-    }
-    @Override
-    public int compareTo(OOVItem other) {
-      if (weight > other.weight)
-        return -1;
-      else if (weight < other.weight)
-        return 1;
-      return 0;
-    }
-  }
-
-  public ArrayList<OOVItem> oovList = null;
-
-  /*
-   * Whether to segment OOVs into a lattice
-   */
-  public boolean segment_oovs = false;
-
-  /*
-   * Enable lattice decoding.
-   */
-  public boolean lattice_decoding = false;
-
-  /*
-   * If false, sorting of the complete grammar is done at load time. If true, 
grammar tries are not
-   * sorted till they are first accessed. Amortized sorting means you get your 
first translation
-   * much, much quicker (good for debugging), but that per-sentence decoding 
is a bit slower.
-   */
-  public boolean amortized_sorting = true;
-  // syntax-constrained decoding
-  public boolean constrain_parse = false;
-
-  public boolean use_pos_labels = false;
-
-  // oov-specific
-  public boolean true_oovs_only = false;
-
-  /* Dynamic sentence-level filtering. */
-  public boolean filter_grammar = false;
-
-  /* The cube pruning pop limit. Set to 0 for exhaustive pruning. */
-  public int pop_limit = 100;
-
-  /* Maximum sentence length. Sentences longer than this are truncated. */
-  public int maxlen = 200;
-
-  /*
-   * N-best configuration.
-   */
-  // Make sure output strings in the n-best list are unique.
-  public boolean use_unique_nbest = true;
-
-  /* Include the phrasal alignments in the output (not word-level alignmetns 
at the moment). */
-  public boolean include_align_index = false;
-
-  /* The number of hypotheses to output by default. */
-  public int topN = 1;
-
-  /**
-   * This string describes the format of each line of output from the decoder 
(i.e., the
-   * translations). The string can include arbitrary text and also variables. 
The following
-   * variables are available:
-   *
-   * <pre>
-   * - %i the 0-indexed sentence number
-   * - %e the source string %s the translated sentence
-   * - %S the translated sentence with some basic capitalization and 
denormalization
-   * - %t the synchronous derivation
-   * - %f the list of feature values (as name=value pairs)
-   * - %c the model cost
-   * - %w the weight vector
-   * - %a the alignments between source and target words (currently 
unimplemented)
-   * - %d a verbose, many-line version of the derivation
-   * </pre>
-   */
-  public String outputFormat = "%i ||| %s ||| %f ||| %c";
-
-  /* The number of decoding threads to use (-threads). */
-  public int num_parallel_decoders = 1;
-
-  /*
-   * When true, _OOV is appended to all words that are passed through (useful 
for something like
-   * transliteration on the target side
-   */
-  public boolean mark_oovs = false;
-
-  /* Enables synchronous parsing. */
-  public boolean parse = false; // perform synchronous parsing
-
-
-  /* A list of the feature functions. */
-  public ArrayList<String> features = new ArrayList<>();
-
-  /* A list of weights found in the main config file (instead of in a separate 
weights file) */
-  public ArrayList<String> weights = new ArrayList<>();
-
-  /* Determines whether to expect JSON input or plain lines */
-  public enum INPUT_TYPE { plain, json }
-
-  public INPUT_TYPE input_type = INPUT_TYPE.plain;
-
-  /* Type of server. Not sure we need to keep the regular TCP one around. */
-  public enum SERVER_TYPE { none, TCP, HTTP }
-
-  public SERVER_TYPE server_type = SERVER_TYPE.TCP;
-
-  /* If set, Joshua will start a (multi-threaded, per "threads") TCP/IP server 
on this port. */
-  public int server_port = 0;
-
-  /*
-   * Whether to do forest rescoring. If set to true, the references are 
expected on STDIN along with
-   * the input sentences in the following format:
-   * 
-   * input sentence ||| ||| reference1 ||| reference2 ...
-   * 
-   * (The second field is reserved for the output sentence for alignment and 
forced decoding).
-   */
-
-  public boolean rescoreForest = false;
-  public float rescoreForestWeight = 10.0f;
-
-  /*
-   * Location of fragment mapping file, which maps flattened SCFG rules to 
their internal
-   * representation.
-   */
-  public String fragmentMapFile = null;
-
-  /*
-   * Whether to use soft syntactic constraint decoding /fuzzy matching, which 
allows that any
-   * nonterminal may be substituted for any other nonterminal (except for OOV 
and GOAL)
-   */
-  public boolean fuzzy_matching = false;
-
-  public static final String SOFT_SYNTACTIC_CONSTRAINT_DECODING_PROPERTY_NAME 
= "fuzzy_matching";
-
-  /***
-   * Phrase-based decoding parameters.
-   */
-  
-  /* The search algorithm: currently either "cky" or "stack" */
-  public String search_algorithm = "cky";
-
-  /* The distortion limit */
-  public int reordering_limit = 8;
-
-  /* The number of target sides considered for each source side (after sorting 
by model weight) */
-  public int num_translation_options = 20;
-
-  /* If true, decode using a dot chart (standard CKY+); if false, use the much 
more efficient
-   * version of Sennrich (SSST 2014)
-   */
-  public boolean use_dot_chart = true;
-
-  /* Moses compatibility */
-  public boolean moses = false;
-
-  /* If true, just print out the weights found in the config file, and exit. */
-  public boolean show_weights_and_quit = false;
-
-  /* Read input from a file (Moses compatible flag) */
-  public String input_file = null;
-
-  /* Write n-best output to this file */
-  public String n_best_file = null;
-
-  /* Whether to look at source side for special annotations */
-  public boolean source_annotations = false;
-
-  /* Weights overridden from the command line */
-  public String weight_overwrite = "";
-
-  /* Timeout in seconds for threads */
-  public long translation_thread_timeout = 30_000;
-
-  /**
-   * This method resets the state of JoshuaConfiguration back to the state 
after initialization.
-   * This is useful when for example making different calls to the decoder 
within the same java
-   * program, which otherwise leads to potential errors due to inconsistent 
state as a result of
-   * loading the configuration multiple times without resetting etc.
-   *
-   * This leads to the insight that in fact it may be an even better idea to 
refactor the code and
-   * make JoshuaConfiguration an object that is is created and passed as an 
argument, rather than a
-   * shared static object. This is just a suggestion for the next step.
-   *
-   */
-  public void reset() {
-    LOG.info("Resetting the JoshuaConfiguration to its defaults ...");
-    LOG.info("\n\tResetting the StatefullFF global state index ...");
-    LOG.info("\n\t...done");
-    StatefulFF.resetGlobalStateIndex();
-    tms = new ArrayList<>();
-    weights_file = "";
-    default_non_terminal = "[X]";
-    oovList = new ArrayList<>();
-    oovList.add(new OOVItem(default_non_terminal, 1.0f));
-    goal_symbol = "[GOAL]";
-    amortized_sorting = true;
-    constrain_parse = false;
-    use_pos_labels = false;
-    true_oovs_only = false;
-    filter_grammar = false;
-    pop_limit = 100;
-    maxlen = 200;
-    use_unique_nbest = false;
-    include_align_index = false;
-    topN = 1;
-    outputFormat = "%i ||| %s ||| %f ||| %c";
-    num_parallel_decoders = 1;
-    mark_oovs = false;
-    // oracleFile = null;
-    parse = false; // perform synchronous parsing
-    features = new ArrayList<>();
-    weights = new ArrayList<>();
-    server_port = 0;
-
-    reordering_limit = 8;
-    num_translation_options = 20;
-    LOG.info("...done");
-  }
-
-  // ===============================================================
-  // Methods
-  // ===============================================================
-
-  /**
-   * To process command-line options, we write them to a file that looks like 
the config file, and
-   * then call readConfigFile() on it. It would be more general to define a 
class that sits on a
-   * stream and knows how to chop it up, but this was quicker to implement.
-   * 
-   * @param options string array of command line options
-   */
-  public void processCommandLineOptions(String[] options) {
-    try {
-      File tmpFile = File.createTempFile("options", null, null);
-      PrintWriter out = new PrintWriter(new FileWriter(tmpFile));
-
-      for (int i = 0; i < options.length; i++) {
-        String key = options[i].substring(1);
-        if (i + 1 == options.length || options[i + 1].startsWith("-")) {
-          // if this is the last item, or if the next item
-          // is another flag, then this is a boolean flag
-          out.println(key + " = true");
-
-        } else {
-          out.print(key + " =");
-          while (i + 1 < options.length && ! options[i + 1].startsWith("-")) {
-            out.print(String.format(" %s", options[i + 1]));
-            i++;
-          }
-          out.println();
-        }
-      }
-      out.close();
-      
-//      LOG.info("Parameters overridden from the command line:");
-      this.readConfigFile(tmpFile.getCanonicalPath());
-
-      tmpFile.delete();
-
-    } catch (IOException e) {
-      throw new RuntimeException(e);
-    }
-  }
-
-  public void readConfigFile(String configFile) throws IOException {
-
-    LineReader configReader = new LineReader(configFile, false);
-    try {
-      for (String line : configReader) {
-        line = line.trim(); // .toLowerCase();
-
-        if (Regex.commentOrEmptyLine.matches(line))
-          continue;
-
-        /*
-         * There are two kinds of substantive (non-comment, non-blank) lines: 
parameters and feature
-         * values. Parameters match the pattern "key = value"; all other 
substantive lines are
-         * interpreted as features.
-         */
-
-        if (line.contains("=")) { // parameters; (not feature function)
-          String[] fds = Regex.equalsWithSpaces.split(line, 2);
-          if (fds.length < 2) {
-            LOG.warn("skipping config file line '{}'", line);
-            continue;
-          }
-
-          String parameter = normalize_key(fds[0]);
-
-          if (parameter.equals(normalize_key("lm"))) {
-            /* This is deprecated. This support old LM lines of the form
-             * 
-             *   lm = berkeleylm 5 false false 100 lm.gz
-             * 
-             * LMs are now loaded as general feature functions, so we 
transform that to either
-             * 
-             *   LanguageModel -lm_order 5 -lm_type berkeleylm -lm_file lm.gz
-             * 
-             * If the line were state minimizing:
-             * 
-             *   lm = kenlm 5 true false 100 lm.gz
-             *              
-             * StateMinimizingLanguageModel -lm_order 5 -lm_file lm.gz
-             */
-
-            String[] tokens = fds[1].split("\\s+");
-            if (tokens[2].equals("true"))
-              features.add(String.format("StateMinimizingLanguageModel 
-lm_type kenlm -lm_order %s -lm_file %s",
-                  tokens[1], tokens[5]));
-            else
-              features.add(String.format("LanguageModel -lm_type %s -lm_order 
%s -lm_file %s",
-                  tokens[0], tokens[1], tokens[5]));
-
-          } else if (parameter.equals(normalize_key(TM_PREFIX))) {
-            /* If found, convert old format:
-             *   tm = TYPE OWNER MAXSPAN PATH
-             * to new format
-             *   tm = TYPE -owner OWNER -maxspan MAXSPAN -path PATH    
-             */
-            String tmLine = fds[1];
-
-            String[] tokens = fds[1].split("\\s+");
-            if (! tokens[1].startsWith("-")) { // old format
-              tmLine = String.format("%s -owner %s -maxspan %s -path %s", 
tokens[0], tokens[1], tokens[2], tokens[3]);
-              LOG.warn("Converting deprecated TM line from '{}' -> '{}'", 
fds[1], tmLine);
-            }
-            tms.add(tmLine);
-
-          } else if (parameter.equals("v")) {
-
-            // This is already handled in ArgsParser, skip it here, easier 
than removing it there
-
-          } else if (parameter.equals(normalize_key("parse"))) {
-            parse = Boolean.parseBoolean(fds[1]);
-            LOG.debug("parse: {}", parse);
-
-          } else if (parameter.equals(normalize_key("oov-list"))) {
-            if (new File(fds[1]).exists()) {
-              oovList = new ArrayList<>();
-              try {
-                File file = new File(fds[1]);
-                BufferedReader br = new BufferedReader(new FileReader(file));
-                try {
-                  String str = br.readLine();
-                  while (str != null) {
-                    String[] tokens = str.trim().split("\\s+");
-
-                    oovList.add(new 
OOVItem(FormatUtils.ensureNonTerminalBrackets(tokens[0]),
-                            (float) Math.log(Float.parseFloat(tokens[1]))));
-
-                    str = br.readLine();
-                  }
-                  br.close();
-                } catch(IOException e){
-                  System.out.println(e);
-                }
-              } catch(IOException e){
-                System.out.println(e);
-              }
-              Collections.sort(oovList);
-
-            } else {
-              String[] tokens = fds[1].trim().split("\\s+");
-              if (tokens.length % 2 != 0) {
-                throw new RuntimeException(String.format("* FATAL: invalid 
format for '%s'", fds[0]));
-              }
-              oovList = new ArrayList<>();
-
-              for (int i = 0; i < tokens.length; i += 2)
-                oovList.add(new 
OOVItem(FormatUtils.ensureNonTerminalBrackets(tokens[i]),
-                    (float) Math.log(Float.parseFloat(tokens[i + 1]))));
-
-              Collections.sort(oovList);
-            }
-
-          } else if (parameter.equals(normalize_key("lattice-decoding"))) {
-            lattice_decoding = true;
-
-          } else if (parameter.equals(normalize_key("segment-oovs"))) {
-            segment_oovs = true;
-            lattice_decoding = true;
-
-          } else if (parameter.equals(normalize_key("default-non-terminal"))) {
-            default_non_terminal = 
ensureNonTerminalBrackets(cleanNonTerminal(fds[1].trim()));
-            LOG.debug("default_non_terminal: {}", default_non_terminal);
-
-          } else if (parameter.equals(normalize_key("goal-symbol"))) {
-            goal_symbol = 
ensureNonTerminalBrackets(cleanNonTerminal(fds[1].trim()));
-            LOG.debug("goalSymbol: {}", goal_symbol);
-
-          } else if (parameter.equals(normalize_key("weights-file"))) {
-            weights_file = fds[1];
-
-          } else if (parameter.equals(normalize_key("constrain_parse"))) {
-            constrain_parse = Boolean.parseBoolean(fds[1]);
-
-          } else if (parameter.equals(normalize_key("true_oovs_only"))) {
-            true_oovs_only = Boolean.parseBoolean(fds[1]);
-
-          } else if (parameter.equals(normalize_key("filter-grammar"))) {
-            filter_grammar = Boolean.parseBoolean(fds[1]);
-
-          } else if (parameter.equals(normalize_key("amortize"))) {
-            amortized_sorting = Boolean.parseBoolean(fds[1]);
-
-          } else if (parameter.equals(normalize_key("use_pos_labels"))) {
-            use_pos_labels = Boolean.parseBoolean(fds[1]);
-
-          } else if (parameter.equals(normalize_key("use_unique_nbest"))) {
-            use_unique_nbest = Boolean.valueOf(fds[1]);
-            LOG.debug("use_unique_nbest: {}", use_unique_nbest);
-
-          } else if (parameter.equals(normalize_key("output-format"))) {
-            outputFormat = fds[1];
-            LOG.debug("output-format: {}", outputFormat);
-
-          } else if (parameter.equals(normalize_key("include_align_index"))) {
-            include_align_index = Boolean.valueOf(fds[1]);
-            LOG.debug("include_align_index: {}", include_align_index);
-
-          } else if (parameter.equals(normalize_key("top_n"))) {
-            topN = Integer.parseInt(fds[1]);
-            LOG.debug("topN: {}", topN);
-
-          } else if (parameter.equals(normalize_key("num_parallel_decoders"))
-              || parameter.equals(normalize_key("threads"))) {
-            num_parallel_decoders = Integer.parseInt(fds[1]);
-            if (num_parallel_decoders <= 0) {
-              throw new IllegalArgumentException(
-                  "Must specify a positive number for num_parallel_decoders");
-            }
-            LOG.debug("num_parallel_decoders: {}", num_parallel_decoders);
-
-          } else if (parameter.equals(normalize_key("mark_oovs"))) {
-            mark_oovs = Boolean.valueOf(fds[1]);
-            LOG.debug("mark_oovs: {}", mark_oovs);
-
-          } else if (parameter.equals(normalize_key("pop-limit"))) {
-            pop_limit = Integer.parseInt(fds[1]);
-            LOG.info("pop-limit: {}", pop_limit);
-
-          } else if (parameter.equals(normalize_key("input-type"))) {
-            switch (fds[1]) {
-            case "json":
-              input_type = INPUT_TYPE.json;
-              break;
-            case "plain":
-              input_type = INPUT_TYPE.plain;
-              break;
-            default:
-              throw new RuntimeException(
-                  String.format("* FATAL: invalid server type '%s'", fds[1]));
-            }
-            LOG.info("    input-type: {}", input_type);
-
-          } else if (parameter.equals(normalize_key("server-type"))) {
-            if (fds[1].toLowerCase().equals("tcp"))
-              server_type = SERVER_TYPE.TCP;
-            else if (fds[1].toLowerCase().equals("http"))
-              server_type = SERVER_TYPE.HTTP;
-
-            LOG.info("    server-type: {}", server_type);
-
-          } else if (parameter.equals(normalize_key("server-port"))) {
-            server_port = Integer.parseInt(fds[1]);
-            LOG.info("    server-port: {}", server_port);
-
-          } else if (parameter.equals(normalize_key("rescore-forest"))) {
-            rescoreForest = true;
-            LOG.info("    rescore-forest: {}", rescoreForest);
-
-          } else if (parameter.equals(normalize_key("rescore-forest-weight"))) 
{
-            rescoreForestWeight = Float.parseFloat(fds[1]);
-            LOG.info("    rescore-forest-weight: {}", rescoreForestWeight);
-
-          } else if (parameter.equals(normalize_key("maxlen"))) {
-            // reset the maximum length
-            maxlen = Integer.parseInt(fds[1]);
-
-          } else if (parameter.equals("c") || parameter.equals("config")) {
-            // this was used to send in the config file, just ignore it
-
-          } else if (parameter.equals(normalize_key("feature-function"))) {
-            // add the feature to the list of features for later processing
-            features.add(fds[1]);
-
-          } else if (parameter.equals(normalize_key("maxlen"))) {
-            // add the feature to the list of features for later processing
-            maxlen = Integer.parseInt(fds[1]);
-
-          } else if (parameter
-              
.equals(normalize_key(SOFT_SYNTACTIC_CONSTRAINT_DECODING_PROPERTY_NAME))) {
-            fuzzy_matching = Boolean.parseBoolean(fds[1]);
-            LOG.debug("fuzzy_matching: {}", fuzzy_matching);
-
-          } else if (parameter.equals(normalize_key("fragment-map"))) {
-            fragmentMapFile = fds[1];
-            Tree.readMapping(fragmentMapFile);
-
-            /** PHRASE-BASED PARAMETERS **/
-          } else if (parameter.equals(normalize_key("search"))) {
-            search_algorithm = fds[1];
-
-            if (!search_algorithm.equals("cky") && 
!search_algorithm.equals("stack")) {
-              throw new RuntimeException(
-                  "-search must be one of 'stack' (for phrase-based decoding) 
" +
-                      "or 'cky' (for hierarchical / syntactic decoding)");
-            }
-
-            if (search_algorithm.equals("cky") && include_align_index) {
-              throw new RuntimeException(
-                  "include_align_index is currently not supported with cky 
search");
-            }
-
-          } else if (parameter.equals(normalize_key("reordering-limit"))) {
-            reordering_limit = Integer.parseInt(fds[1]);
-
-          } else if 
(parameter.equals(normalize_key("num-translation-options"))) {
-            num_translation_options = Integer.parseInt(fds[1]);
-
-          } else if (parameter.equals(normalize_key("no-dot-chart"))) {
-            use_dot_chart = false;
-
-          } else if (parameter.equals(normalize_key("moses"))) {
-            moses = true; // triggers some Moses-specific compatibility options
-
-          } else if (parameter.equals(normalize_key("show-weights"))) {
-            show_weights_and_quit = true;
-
-          } else if (parameter.equals(normalize_key("n-best-list"))) {
-            // for Moses compatibility
-            String[] tokens = fds[1].split("\\s+");
-            n_best_file = tokens[0];
-            if (tokens.length > 1)
-              topN = Integer.parseInt(tokens[1]);
-
-          } else if (parameter.equals(normalize_key("input-file"))) {
-            // for Moses compatibility
-            input_file = fds[1];
-
-          } else if (parameter.equals(normalize_key("weight-file"))) {
-            // for Moses, ignore
-
-          } else if (parameter.equals(normalize_key("weight-overwrite"))) {
-            weight_overwrite = fds[1];
-
-          } else if (parameter.equals(normalize_key("source-annotations"))) {
-            // Check source sentence
-            source_annotations = true;
-
-          } else if (parameter.equals(normalize_key("cached-rules-size"))) {
-            // Check source sentence
-            cachedRuleSize = Integer.parseInt(fds[1]);
-          } else if (parameter.equals(normalize_key("lowercase"))) {
-            lowercase = true;
-
-          } else if (parameter.equals(normalize_key("project-case"))) {
-            project_case = true;
-
-          } else {
-
-            if (parameter.equals(normalize_key("use-sent-specific-tm"))
-                || parameter.equals(normalize_key("add-combined-cost"))
-                || parameter.equals(normalize_key("use-tree-nbest"))
-                || parameter.equals(normalize_key("use-kenlm"))
-                || parameter.equals(normalize_key("useCubePrune"))
-                || parameter.equals(normalize_key("useBeamAndThresholdPrune"))
-                || parameter.equals(normalize_key("regexp-grammar"))) {
-              LOG.warn("ignoring deprecated parameter '{}'", fds[0]);
-
-            } else {
-              throw new RuntimeException("FATAL: unknown configuration 
parameter '" + fds[0] + "'");
-            }
-          }
-
-          LOG.info("    {} = '{}'", normalize_key(fds[0]), fds[1]);
-
-        } else {
-          /*
-           * Lines that don't have an equals sign and are not blank lines, 
empty lines, or comments,
-           * are feature values, which can be present in this file
-           */
-
-          weights.add(line);
-        }
-      }
-    } finally {
-      configReader.close();
-    }
-  }
-
-  /**
-   * Checks for invalid variable configurations
-   */
-  public void sanityCheck() {
-  }
-  
-  /**
-   * Sets the verbosity level to v (0: OFF; 1: INFO; 2: DEBUG).
-   * 
-   * @param v the verbosity level (0, 1, or 2)
-   */
-  public void setVerbosity(int v) {
-    Decoder.VERBOSE = v;
-    switch (Decoder.VERBOSE) {
-    case 0:
-      LogManager.getRootLogger().setLevel(Level.OFF);
-      break;
-    case 1:
-      LogManager.getRootLogger().setLevel(Level.INFO);
-      break;
-    case 2:
-      LogManager.getRootLogger().setLevel(Level.DEBUG);
-      break;
-    }
-  }
-
-  /**
-   * Normalizes parameter names by removing underscores and hyphens and 
lowercasing. This defines
-   * equivalence classes on external use of parameter names, permitting 
arbitrary_under_scores and
-   * camelCasing in paramter names without forcing the user to memorize them 
all. Here are some
-   * examples of equivalent ways to refer to parameter names:
-   * <pre>
-   * {pop-limit, poplimit, PopLimit, popLimit, pop_lim_it} {lmfile, lm-file, 
LM-FILE, lm_file}
-   * </pre>
-   * 
-   * @param text the string to be normalized
-   * @return normalized key
-   * 
-   */
-  public static String normalize_key(String text) {
-    return text.replaceAll("[-_]", "").toLowerCase();
-  }
-}


Reply via email to