Repository: incubator-joshua
Updated Branches:
  refs/heads/7_confsystem [created] dc6cf9967


Add TypesafeConfig dependency


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/f7513abb
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/f7513abb
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/f7513abb

Branch: refs/heads/7_confsystem
Commit: f7513abb8f152872c1607f8b77faed1989cf5b37
Parents: 0afbe53
Author: Felix Hieber <fhie...@amazon.com>
Authored: Wed Sep 14 09:02:32 2016 +0200
Committer: Hieber, Felix <fhie...@amazon.de>
Committed: Thu Sep 15 17:29:11 2016 +0200

----------------------------------------------------------------------
 joshua-core/pom.xml                             |   5 +
 .../org/apache/joshua/decoder/ArgsParser.java   | 116 --------
 .../joshua/decoder/ff/SourceDependentFF.java    |  29 --
 .../ff/similarity/EdgePhraseSimilarityFF.java   | 278 -------------------
 4 files changed, 5 insertions(+), 423 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f7513abb/joshua-core/pom.xml
----------------------------------------------------------------------
diff --git a/joshua-core/pom.xml b/joshua-core/pom.xml
index 2dd046b..b2646ca 100644
--- a/joshua-core/pom.xml
+++ b/joshua-core/pom.xml
@@ -186,6 +186,11 @@
       <artifactId>concurrent</artifactId>
       <version>1.3.4</version>
     </dependency>
+    <dependency>
+    <groupId>com.typesafe</groupId>
+      <artifactId>config</artifactId>
+      <version>1.2.1</version>
+    </dependency>
 
     <!-- Test Dependencies -->
     <dependency>

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f7513abb/joshua-core/src/main/java/org/apache/joshua/decoder/ArgsParser.java
----------------------------------------------------------------------
diff --git 
a/joshua-core/src/main/java/org/apache/joshua/decoder/ArgsParser.java 
b/joshua-core/src/main/java/org/apache/joshua/decoder/ArgsParser.java
deleted file mode 100644
index 97baa27..0000000
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/ArgsParser.java
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.joshua.decoder;
-
-import java.io.IOException;
-import java.nio.charset.Charset;
-import java.nio.file.Files;
-import java.nio.file.Paths;
-
-import org.apache.joshua.util.io.LineReader;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * @author orluke
- *
- */
-public class ArgsParser {
-
-  private static final Logger LOG = LoggerFactory.getLogger(ArgsParser.class);
-
-  private String configFile = null;
-
-  /**
-   * Parse the arguments passed from the command line when the JoshuaDecoder 
application was
-   * executed from the command line.
-   *
-   * @param args string array of input arguments
-   * @param config the {@link org.apache.joshua.decoder.JoshuaConfiguration}
-   * @throws IOException if there is an error wit the input arguments
-   */
-  public ArgsParser(String[] args, JoshuaConfiguration config) throws 
IOException {
-
-    /*
-     * Look for a verbose flag, -v.
-     *
-     * Look for an argument to the "-config" flag to find the config file, if 
any.
-     */
-    if (args.length >= 1) {
-      // Search for a verbose flag
-      for (int i = 0; i < args.length; i++) {
-        if (args[i].equals("-v")) {
-          Decoder.VERBOSE = Integer.parseInt(args[i + 1].trim());
-          config.setVerbosity(Decoder.VERBOSE);
-        }
-
-        if (args[i].equals("-version")) {
-          try (LineReader reader = new LineReader(String.format("%s/VERSION", 
System.getenv("JOSHUA")));) {
-            reader.readLine();
-            String version = reader.readLine().split("\\s+")[2];
-            System.out.println(String.format("The Apache Joshua machine 
translator, version %s", version));
-            System.out.println("joshua.incubator.apache.org");
-            System.exit(0);
-          }
-        } else if (args[i].equals("-license")) {
-          try {
-            Files.readAllLines(Paths.get(String.format("%s/../LICENSE",
-                
JoshuaConfiguration.class.getProtectionDomain().getCodeSource().getLocation()
-                    .getPath())), 
Charset.defaultCharset()).forEach(System.out::println);
-          } catch (IOException e) {
-            throw new RuntimeException("FATAL: missing license file!", e);
-          }
-          System.exit(0);
-        }
-      }
-
-      // Search for the configuration file from the end (so as to take the 
last one)
-      for (int i = args.length-1; i >= 0; i--) {
-        if (args[i].equals("-c") || args[i].equals("-config")) {
-
-          setConfigFile(args[i + 1].trim());
-          try {
-            LOG.info("Parameters read from configuration file: {}", 
getConfigFile());
-            config.readConfigFile(getConfigFile());
-          } catch (IOException e) {
-            throw new RuntimeException(e);
-          }
-          break;
-        }
-      }
-
-      // Now process all the command-line args
-      config.processCommandLineOptions(args);
-    }
-  }
-
-  /**
-   * @return the configFile
-   */
-  public String getConfigFile() {
-    return configFile;
-  }
-
-  /**
-   * @param configFile the configFile to set
-   */
-  public void setConfigFile(String configFile) {
-    this.configFile = configFile;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f7513abb/joshua-core/src/main/java/org/apache/joshua/decoder/ff/SourceDependentFF.java
----------------------------------------------------------------------
diff --git 
a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/SourceDependentFF.java 
b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/SourceDependentFF.java
deleted file mode 100644
index dec509f..0000000
--- 
a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/SourceDependentFF.java
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.joshua.decoder.ff;
-
-import org.apache.joshua.decoder.segment_file.Sentence;
-
-public interface SourceDependentFF extends Cloneable {
-
-  void setSource(Sentence sentence);
-
-  FeatureFunction clone();
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f7513abb/joshua-core/src/main/java/org/apache/joshua/decoder/ff/similarity/EdgePhraseSimilarityFF.java
----------------------------------------------------------------------
diff --git 
a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/similarity/EdgePhraseSimilarityFF.java
 
b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/similarity/EdgePhraseSimilarityFF.java
deleted file mode 100644
index 38bd373..0000000
--- 
a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/similarity/EdgePhraseSimilarityFF.java
+++ /dev/null
@@ -1,278 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.joshua.decoder.ff.similarity;
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.io.PrintWriter;
-import java.net.Socket;
-import java.net.UnknownHostException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-
-import com.google.common.base.Throwables;
-
-import org.apache.joshua.corpus.Vocabulary;
-import org.apache.joshua.decoder.JoshuaConfiguration;
-import org.apache.joshua.decoder.chart_parser.SourcePath;
-import org.apache.joshua.decoder.ff.FeatureVector;
-import org.apache.joshua.decoder.ff.StatefulFF;
-import org.apache.joshua.decoder.ff.SourceDependentFF;
-import org.apache.joshua.decoder.ff.state_maintenance.DPState;
-import org.apache.joshua.decoder.ff.state_maintenance.NgramDPState;
-import org.apache.joshua.decoder.ff.tm.Rule;
-import org.apache.joshua.decoder.hypergraph.HGNode;
-import org.apache.joshua.decoder.segment_file.Sentence;
-import org.apache.joshua.util.Cache;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class EdgePhraseSimilarityFF extends StatefulFF implements 
SourceDependentFF {
-
-  private static final Logger LOG = 
LoggerFactory.getLogger(EdgePhraseSimilarityFF.class);
-
-  private static final Cache<String, Float> cache = new Cache<>(100000000);
-
-  private final String host;
-  private final int port;
-
-  private PrintWriter serverAsk;
-  private BufferedReader serverReply;
-
-  private int[] source;
-
-  private final int MAX_PHRASE_LENGTH = 4;
-
-  public EdgePhraseSimilarityFF(FeatureVector weights, String[] args, 
JoshuaConfiguration config) throws NumberFormatException, UnknownHostException, 
IOException {
-    super(weights, "EdgePhraseSimilarity", args, config);
-
-    this.host = parsedArgs.get("host");
-    this.port = Integer.parseInt(parsedArgs.get("port"));
-
-    initializeConnection();
-  }
-
-  private void initializeConnection() throws NumberFormatException, 
IOException {
-    LOG.info("Opening connection.");
-    Socket socket = new Socket(host, port);
-    serverAsk = new PrintWriter(socket.getOutputStream(), true);
-    serverReply = new BufferedReader(new 
InputStreamReader(socket.getInputStream()));
-  }
-
-  @Override
-  public DPState compute(Rule rule, List<HGNode> tailNodes, int i, int j, 
SourcePath sourcePath,
-      Sentence sentence, Accumulator acc) {
-
-    float value = computeScore(rule, tailNodes);
-    acc.add(featureId, value);
-
-    // TODO 07/2013: EdgePhraseSimilarity needs to know its order rather than 
inferring it from tail
-    // nodes.
-    return new NgramDPState(new int[1], new int[1]);
-  }
-  
-  @Override
-  public DPState computeFinal(HGNode tailNode, int i, int j, SourcePath path, 
Sentence sentence, Accumulator acc) {
-    return null;
-  }
-
-  public float computeScore(Rule rule, List<HGNode> tailNodes) {
-    if (tailNodes == null || tailNodes.isEmpty())
-      return 0;
-
-    // System.err.println("RULE [" + spanStart + ", " + spanEnd + "]: " + 
rule.toString());
-
-    int[] target = rule.getTarget();
-    int lm_state_size = 0;
-    for (HGNode node : tailNodes) {
-      NgramDPState state = (NgramDPState) node.getDPState(stateIndex);
-      lm_state_size += state.getLeftLMStateWords().length + 
state.getRightLMStateWords().length;
-    }
-
-    ArrayList<int[]> batch = new ArrayList<>();
-
-    // Build joined target string.
-    int[] join = new int[target.length + lm_state_size];
-
-    int idx = 0, num_gaps = 1, num_anchors = 0;
-    int[] anchors = new int[rule.getArity() * 2];
-    int[] indices = new int[rule.getArity() * 2];
-    int[] gaps = new int[rule.getArity() + 2];
-    gaps[0] = 0;
-    for (int t = 0; t < target.length; t++) {
-      if (target[t] < 0) {
-        HGNode node = tailNodes.get(-(target[t] + 1));
-        if (t != 0) {
-          indices[num_anchors] = node.i;
-          anchors[num_anchors++] = idx;
-        }
-        NgramDPState state = (NgramDPState) node.getDPState(stateIndex);
-        // System.err.print("LEFT:  ");
-        // for (int w : state.getLeftLMStateWords()) 
System.err.print(Vocabulary.word(w) + " ");
-        // System.err.println();
-        for (int w : state.getLeftLMStateWords())
-          join[idx++] = w;
-        int GAP = 0;
-        join[idx++] = GAP;
-        gaps[num_gaps++] = idx;
-        // System.err.print("RIGHT:  ");
-        // for (int w : state.getRightLMStateWords()) 
System.err.print(Vocabulary.word(w) + " ");
-        // System.err.println();
-        for (int w : state.getRightLMStateWords())
-          join[idx++] = w;
-        if (t != target.length - 1) {
-          indices[num_anchors] = node.j;
-          anchors[num_anchors++] = idx;
-        }
-      } else {
-        join[idx++] = target[t];
-      }
-    }
-    gaps[gaps.length - 1] = join.length + 1;
-
-    // int c = 0;
-    // System.err.print("> ");
-    // for (int k = 0; k < join.length; k++) {
-    // if (c < num_anchors && anchors[c] == k) {
-    // c++;
-    // System.err.print("| ");
-    // }
-    // System.err.print(Vocabulary.word(join[k]) + " ");
-    // }
-    // System.err.println("<");
-
-    int g = 0;
-    for (int a = 0; a < num_anchors; a++) {
-      if (a > 0 && anchors[a - 1] == anchors[a])
-        continue;
-      if (anchors[a] > gaps[g + 1])
-        g++;
-      int left = Math.max(gaps[g], anchors[a] - MAX_PHRASE_LENGTH + 1);
-      int right = Math.min(gaps[g + 1] - 1, anchors[a] + MAX_PHRASE_LENGTH - 
1);
-
-      int[] target_phrase = new int[right - left];
-      System.arraycopy(join, left, target_phrase, 0, target_phrase.length);
-      int[] source_phrase = getSourcePhrase(indices[a]);
-
-      if (source_phrase != null && target_phrase.length != 0) {
-        // System.err.println("ANCHOR: " + indices[a]);
-        batch.add(source_phrase);
-        batch.add(target_phrase);
-      }
-    }
-    return getSimilarity(batch);
-  }
-
-  @Override
-  public float estimateFutureCost(Rule rule, DPState currentState, Sentence 
sentence) {
-    return 0.0f;
-  }
-
-  /**
-   * From SourceDependentFF interface.
-   */
-  @Override
-  public void setSource(Sentence sentence) {
-    if (! sentence.isLinearChain())
-      throw new RuntimeException("EdgePhraseSimilarity not defined for 
lattices");
-    this.source = sentence.getWordIDs();
-  }
-
-  public EdgePhraseSimilarityFF clone() {
-    try {
-      return new EdgePhraseSimilarityFF(this.weights, args, config);
-    } catch (Exception e) {
-      throw Throwables.propagate(e);
-    }
-  }
-
-  @Override
-  public float estimateCost(Rule rule, Sentence sentence) {
-    return 0.0f;
-  }
-
-  private int[] getSourcePhrase(int anchor) {
-    int idx;
-    int length = Math.min(anchor, MAX_PHRASE_LENGTH - 1)
-        + Math.min(source.length - anchor, MAX_PHRASE_LENGTH - 1);
-    if (length <= 0)
-      return null;
-    int[] phrase = new int[length];
-    idx = 0;
-    for (int p = Math.max(0, anchor - MAX_PHRASE_LENGTH + 1); p < 
Math.min(source.length, anchor
-        + MAX_PHRASE_LENGTH - 1); p++)
-      phrase[idx++] = source[p];
-    return phrase;
-  }
-
-  private float getSimilarity(List<int[]> batch) {
-    float similarity = 0.0f;
-    int count = 0;
-    StringBuilder query = new StringBuilder();
-    List<String> to_cache = new ArrayList<>();
-    query.append("xb");
-    for (int i = 0; i < batch.size(); i += 2) {
-      int[] source = batch.get(i);
-      int[] target = batch.get(i + 1);
-
-      if (Arrays.equals(source, target)) {
-        similarity += 1;
-        count++;
-      } else {
-        String source_string = Vocabulary.getWords(source);
-        String target_string = Vocabulary.getWords(target);
-
-        String both;
-        if (source_string.compareTo(target_string) > 0)
-          both = source_string + " ||| " + target_string;
-        else
-          both = target_string + " ||| " + source_string;
-
-        Float cached = cache.get(both);
-        if (cached != null) {
-          // System.err.println("SIM: " + source_string + " X " + 
target_string + " = " + cached);
-          similarity += cached;
-          count++;
-        } else {
-          query.append("\t").append(source_string);
-          query.append("\t").append(target_string);
-          to_cache.add(both);
-        }
-      }
-    }
-    if (!to_cache.isEmpty()) {
-      try {
-        serverAsk.println(query.toString());
-        String response = serverReply.readLine();
-        String[] scores = response.split("\\s+");
-        for (int i = 0; i < scores.length; i++) {
-          Float score = Float.parseFloat(scores[i]);
-          cache.put(to_cache.get(i), score);
-          similarity += score;
-          count++;
-        }
-      } catch (Exception e) {
-        return 0;
-      }
-    }
-    return (count == 0 ? 0 : similarity / count);
-  }
-}

Reply via email to