[01/27] incubator-joshua git commit: large commit converting phrase-based decoding to new rule format

mjpost Mon, 22 Aug 2016 14:47:48 -0700

Repository: incubator-joshua
Updated Branches:
  refs/heads/master 2b570d2b6 -> 2041a3f97



large commit converting phrase-based decoding to new rule format

Not working yet, but much of the code is redone and future estimates are being 
computed correctly


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/dcc7e7ee
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/dcc7e7ee
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/dcc7e7ee

Branch: refs/heads/master
Commit: dcc7e7ee72228de08b70003a49344c2614eaedbe
Parents: fcaf0bf
Author: Matt Post <[email protected]>
Authored: Tue Aug 16 18:13:06 2016 -0400
Committer: Matt Post <[email protected]>
Committed: Tue Aug 16 18:13:06 2016 -0400

----------------------------------------------------------------------
 .gitignore                                      |   1 +
 .../decoder/ff/tm/format/MosesFormatReader.java |  13 +--
 .../apache/joshua/decoder/phrase/Candidate.java | 103 +++++++++++++------
 .../apache/joshua/decoder/phrase/Future.java    |   9 +-
 .../apache/joshua/decoder/phrase/Header.java    |  87 ----------------
 .../joshua/decoder/phrase/Hypothesis.java       |  48 ++++++---
 .../joshua/decoder/phrase/PhraseTable.java      |   8 +-
 .../org/apache/joshua/decoder/phrase/Stack.java |  15 ++-
 .../apache/joshua/decoder/phrase/Stacks.java    |  18 +++-
 .../joshua/decoder/phrase/TargetPhrases.java    |   1 +
 .../org/apache/joshua/tools/GrammarPacker.java  |   8 +-
 11 files changed, 136 insertions(+), 175 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dcc7e7ee/.gitignore
----------------------------------------------------------------------
diff --git a/.gitignore b/.gitignore
index d3d311e..0d42974 100644
--- a/.gitignore
+++ b/.gitignore
@@ -57,3 +57,4 @@ build
 .classpath
 /target/
 .project
+/doc/

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dcc7e7ee/src/main/java/org/apache/joshua/decoder/ff/tm/format/MosesFormatReader.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/joshua/decoder/ff/tm/format/MosesFormatReader.java 
b/src/main/java/org/apache/joshua/decoder/ff/tm/format/MosesFormatReader.java
index 7811b3b..cdf2170 100644
--- 
a/src/main/java/org/apache/joshua/decoder/ff/tm/format/MosesFormatReader.java
+++ 
b/src/main/java/org/apache/joshua/decoder/ff/tm/format/MosesFormatReader.java
@@ -63,22 +63,15 @@ public class MosesFormatReader extends HieroFormatReader {
    *    
    * becomes
    * 
-   *    [X] ||| [X,1] mots francaises ||| [X,1] French words ||| 1 2 3  ||| 
0-1 1-0
+   *    [X] ||| mots francaises ||| French words ||| 1 2 3  ||| 0-1 1-0
    *    
-   * For thrax-extracted phrasal grammars, it transforms
-   * 
-   *    [X] ||| mots francaises ||| French words ||| 1 2 3 ||| 0-1 1-0
-   *
-   * into
-   * 
-   *    [X] ||| [X,1] mots francaises ||| [X,1] French words ||| 1 2 3 ||| 0-1 
1-0
+   * For thrax-extracted phrasal grammars, no transformation is needed.
    */
   @Override
   public Rule parseLine(String line) {
     String[] fields = line.split(Constants.fieldDelimiter);
     
-    String nt = FormatUtils.cleanNonTerminal(Constants.defaultNT);
-    StringBuffer hieroLine = new StringBuffer(Constants.defaultNT + " ||| [" + 
nt + ",1] " + fields[0] + " ||| [" + nt + ",1] " + fields[1] + " |||");
+    StringBuffer hieroLine = new StringBuffer(Constants.defaultNT + " ||| " + 
fields[0] + " ||| " + fields[1] + " |||");
 
     String mosesFeatureString = fields[2];
     for (String value: mosesFeatureString.split(" ")) {

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dcc7e7ee/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java 
b/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
index ee8a2a9..2abe560 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
@@ -19,9 +19,17 @@
 package org.apache.joshua.decoder.phrase;
 
 /*** 
- * A candidate is basically a cube prune state. It contains a list of 
hypotheses and target
- * phrases, and an instantiated candidate is a pair of indices that index 
these two lists. This
- * is the "cube prune" position.
+ * A candidate represents a translation hypothesis that may possibly be added 
to the translation
+ * hypergraph. It groups together (a) a set of translation hypotheses all 
having the same coverage
+ * vector and (b) a set of compatible phrase extensions that all cover the 
same source span. A 
+ * Candidate object therefore denotes a particular precise coverage vector. 
When a Candidate is
+ * instantiated, it has values in ranks[] that are indices into these two 
lists representing
+ * the current cube prune state.
+ * 
+ * For any particular (previous hypothesis) x (translation option) combination 
(a selection from
+ * both lists), there is no guarantee about whether this is a (m)onotonic, 
(s)wap, or (d)iscontinuous
+ * rule application. This must be inferred from the span (recording the 
portion of the input being
+ * translated) and the last index of the previous hypothesis under 
consideration.
  */
 
 import java.util.ArrayList;
@@ -30,27 +38,41 @@ import java.util.List;
 
 import org.apache.joshua.corpus.Span;
 import org.apache.joshua.decoder.chart_parser.ComputeNodeResult;
+import org.apache.joshua.decoder.ff.FeatureFunction;
 import org.apache.joshua.decoder.ff.state_maintenance.DPState;
 import org.apache.joshua.decoder.ff.tm.Rule;
 import org.apache.joshua.decoder.hypergraph.HGNode;
+import org.apache.joshua.decoder.segment_file.Sentence;
 
 public class Candidate {
-
+  
+  private List<FeatureFunction> featureFunctions;
+  private Sentence sentence;
+  
+  // source span of new phrase
+  public Span span;
+  
   // the set of hypotheses that can be paired with phrases from this span 
   private List<Hypothesis> hypotheses;
 
   // the list of target phrases gathered from a span of the input
   private TargetPhrases phrases;
-
-  // source span of new phrase
-  public Span span;
   
   // future cost of applying phrases to hypotheses
-  float future_delta;
+  private float future_delta;
   
   // indices into the hypotheses and phrases arrays (used for cube pruning)
   private int[] ranks;
   
+  // the reordering rule used by an instantiated Candidate
+  private Rule rule;
+  
+  // the HGNode built over the current target side phrase
+  private HGNode phraseNode;
+  
+  // the cost of the current configuration
+  private ComputeNodeResult computedResult;
+  
   // scoring and state information 
   private ComputeNodeResult result;
   
@@ -96,22 +118,27 @@ public class Candidate {
         ranks[0], hypotheses.size(), ranks[1], phrases.size(),
         getHypothesis(), getRule().getEnglishWords().replaceAll("\\[.*?\\] 
",""), getSpan());
   }
-  
-  public Candidate(List<Hypothesis> hypotheses, TargetPhrases phrases, Span 
span, float delta) {
-    this.hypotheses = hypotheses;
-    this.phrases = phrases;
-    this.span = span;
-    this.future_delta = delta;
-    this.ranks = new int[] { 0, 0 };
-  }
 
-  public Candidate(List<Hypothesis> hypotheses, TargetPhrases phrases, Span 
span, float delta, int[] ranks) {
+  public Candidate(List<FeatureFunction> featureFunctions, Sentence sentence, 
+      List<Hypothesis> hypotheses, TargetPhrases phrases, Span span, float 
delta, int[] ranks) {
     this.hypotheses = hypotheses;
     this.phrases = phrases;
     this.span = span;
     this.future_delta = delta;
     this.ranks = ranks;
+    this.rule = isMonotonic() ? Hypothesis.MONO_RULE : Hypothesis.END_RULE;
 //    this.score = hypotheses.get(ranks[0]).score + 
phrases.get(ranks[1]).getEstimatedCost();
+    this.phraseNode = null;
+  }
+  
+  /**
+   * Determines whether the current previous hypothesis extended with the 
currently selected
+   * phrase represents a straight or inverted rule application.
+   * 
+   * @return
+   */
+  private boolean isMonotonic() {
+    return getHypothesis().getLastSourceIndex() < span.start;
   }
   
   /**
@@ -131,7 +158,7 @@ public class Candidate {
    */
   public Candidate extendHypothesis() {
     if (ranks[0] < hypotheses.size() - 1) {
-      return new Candidate(hypotheses, phrases, span, future_delta, new int[] 
{ ranks[0] + 1, ranks[1] });
+      return new Candidate(featureFunctions, sentence, hypotheses, phrases, 
span, future_delta, new int[] { ranks[0] + 1, ranks[1] });
     }
     return null;
   }
@@ -143,7 +170,7 @@ public class Candidate {
    */
   public Candidate extendPhrase() {
     if (ranks[1] < phrases.size() - 1) {
-      return new Candidate(hypotheses, phrases, span, future_delta, new int[] 
{ ranks[0], ranks[1] + 1 });
+      return new Candidate(featureFunctions, sentence, hypotheses, phrases, 
span, future_delta, new int[] { ranks[0], ranks[1] + 1 });
     }
     
     return null;
@@ -170,13 +197,24 @@ public class Candidate {
   }
   
   /**
-   * This returns the target side {@link org.apache.joshua.corpus.Phrase}, 
which is a {@link org.apache.joshua.decoder.ff.tm.Rule} object. This is just a
-   * convenience function that works by returning the phrase indexed in 
ranks[1].
+   * This returns a new Hypothesis (HGNode) representing the phrase being 
added, i.e., a terminal
+   * production in the hypergraph. The score and DP state are computed only 
here on demand.
+   * 
+   * @return a new hypergraph node representing the phrase translation
+   */
+  public HGNode getPhraseNode() {
+    ComputeNodeResult result = new ComputeNodeResult(featureFunctions, 
getRule(), null, span.start, span.end, null, sentence);
+    phraseNode = new HGNode(-1, span.end, rule.getLHS(), result.getDPStates(), 
null, result.getPruningEstimate());
+    return phraseNode;
+  }
+    
+  /**
+   * This returns the rule being applied (straight or inverted)
    * 
    * @return the phrase at position ranks[1]
    */
   public Rule getRule() {
-    return phrases.get(ranks[1]);
+    return this.rule;
   }
   
   /**
@@ -187,7 +225,13 @@ public class Candidate {
    */
   public List<HGNode> getTailNodes() {
     List<HGNode> tailNodes = new ArrayList<HGNode>();
-    tailNodes.add(getHypothesis());
+    if (isMonotonic()) {
+      tailNodes.add(getHypothesis());
+      tailNodes.add(getPhraseNode());
+    } else {
+      tailNodes.add(getPhraseNode());
+      tailNodes.add(getHypothesis());
+    }
     return tailNodes;
   }
   
@@ -202,13 +246,8 @@ public class Candidate {
     return cov;
   }
 
-  /**
-   * Sets the result of a candidate (TODO should just be moved to the 
constructor).
-   * 
-   * @param result todo
-   */
-  public void setResult(ComputeNodeResult result) {
-    this.result = result;
+  public ComputeNodeResult getResult() {
+    return computedResult;
   }
 
   /**
@@ -234,8 +273,4 @@ public class Candidate {
   public List<DPState> getStates() {
     return result.getDPStates();
   }
-
-  public ComputeNodeResult getResult() {
-    return result;
-  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dcc7e7ee/src/main/java/org/apache/joshua/decoder/phrase/Future.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/Future.java 
b/src/main/java/org/apache/joshua/decoder/phrase/Future.java
index 0ece4a3..572aa64 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/Future.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Future.java
@@ -43,7 +43,7 @@ public class Future {
 
     sentlen = chart.SentenceLength();
     entries = new ChartSpan<Float>(sentlen + 1, Float.NEGATIVE_INFINITY);
-
+    
     /*
      * The sentence is represented as a sequence of words, with the first and 
last words set
      * to <s> and </s>. We start indexing at 1 because the first word (<s>) is 
always covered.
@@ -68,7 +68,7 @@ public class Future {
 
     // All the phrases are in, now do minimum dynamic programming.  Lengths 0 
and 1 were already handled above.
     for (int length = 2; length <= chart.SentenceLength(); length++) {
-      for (int begin = 1; begin <= chart.SentenceLength() - length; begin++) {
+      for (int begin = 1; begin < chart.SentenceLength() - length; begin++) {
         for (int division = begin + 1; division < begin + length; division++) {
           setEntry(begin, begin + length, Math.max(getEntry(begin, begin + 
length), getEntry(begin, division) + getEntry(division, begin + length)));
         }
@@ -106,14 +106,13 @@ public class Future {
 
   private float getEntry(int begin, int end) {
     assert end >= begin;
-    assert end < this.sentlen;
+    assert end <= this.sentlen;
     return entries.get(begin, end);
   }
 
   private void setEntry(int begin, int end, float value) {
     assert end >= begin;
-    assert end < this.sentlen;
-    //    System.err.println(String.format("future cost from %d to %d is 
%.5f", begin, end, value));
+    assert end <= this.sentlen;
     entries.set(begin, end, value);
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dcc7e7ee/src/main/java/org/apache/joshua/decoder/phrase/Header.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/Header.java 
b/src/main/java/org/apache/joshua/decoder/phrase/Header.java
deleted file mode 100644
index 30d771c..0000000
--- a/src/main/java/org/apache/joshua/decoder/phrase/Header.java
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.joshua.decoder.phrase;
-
-// PORT: done
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.util.Comparator;
-
-public class Header implements Comparable<Header>, Comparator<Header> {
-
-  private static final Logger LOG = LoggerFactory.getLogger(Header.class);
-
-  private float score;
-  private int arity;
-  private Note note;
-    
-  protected Header() {
-    score = 0.0f;
-    arity = 0;
-    note = null;
-  }
-  
-  protected Header(Header other) {
-    this.score = other.GetScore();
-    this.arity = other.GetArity();
-    this.note = other.GetNote();
-  }
-  
-  protected Header(int arity) {
-    this.score = 0.0f;
-    this.arity = arity;
-    this.note = new Note();
-  }
-  
-  public boolean Valid() {
-    // C++: return base_;
-    LOG.debug("Header::Valid(): {}", (note != null));
-    return note != null;
-  }
-  
-  public float GetScore() {
-    return score;
-  }
-  
-  public void SetScore(float score) {
-    this.score = score;
-  }
-
-  public int GetArity() { return arity; }
-  
-  public Note GetNote() { return note; }
-  
-  public void SetNote(Note note) { this.note = note; }
-
-  @Override
-  public int compareTo(Header other) {
-    if (this.GetScore() < other.GetScore())
-      return -1;
-    else if (this.GetScore() > other.GetScore())
-      return 1;
-    return 0;
-  }
-  
-  @Override
-  public int compare(Header arg0, Header arg1) {
-    return arg0.compareTo(arg1);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dcc7e7ee/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java 
b/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
index 71d3df9..f87b728 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
@@ -18,9 +18,8 @@
  */
 package org.apache.joshua.decoder.phrase;
 
-import java.util.List; 
+import java.util.List;
 
-import org.apache.joshua.corpus.Vocabulary;
 import org.apache.joshua.decoder.ff.state_maintenance.DPState;
 import org.apache.joshua.decoder.ff.tm.Rule;
 import org.apache.joshua.decoder.ff.tm.format.HieroFormatReader;
@@ -28,10 +27,12 @@ import org.apache.joshua.decoder.hypergraph.HGNode;
 import org.apache.joshua.decoder.hypergraph.HyperEdge;
 
 /**
- * Represents a hypothesis, a translation of some coverage of the input. 
Extends {@link org.apache.joshua.decoder.hypergraph.HGNode}, 
- * through a bit of a hack. Whereas (i,j) represents the span of an {@link 
org.apache.joshua.decoder.hypergraph.HGNode}, i here is not used,
- * and j is overloaded to denote the span of the phrase being applied. The 
complete coverage vector 
- * can be obtained by looking at the tail pointer and casting it.
+ * Represents a hypothesis, a translation of some subset of the input 
sentence. Extends 
+ * {@link org.apache.joshua.decoder.hypergraph.HGNode}, through a bit of a 
hack. Whereas (i,j) 
+ * represents the span of an {@link 
org.apache.joshua.decoder.hypergraph.HGNode}, i here is not used,
+ * and j is overloaded to denote the index into the source string of the end 
of the last phrase that 
+ * was applied. The complete coverage vector can be obtained by looking at the 
tail pointer and 
+ * casting it.
  * 
  * @author Kenneth Heafield
  * @author Matt Post [email protected]
@@ -41,9 +42,11 @@ public class Hypothesis extends HGNode implements 
Comparable<Hypothesis> {
   // The hypothesis' coverage vector
   private Coverage coverage;
 
-  public static Rule BEGIN_RULE = new HieroFormatReader().parseLine("[X] ||| 
<s> ||| <s> |||   ||| 0-0");
-  public static Rule END_RULE = new HieroFormatReader().parseLine("[GOAL] ||| 
[X,1] </s> ||| [X,1] </s> |||   ||| 0-0 1-1");
-
+  public static Rule BEGIN_RULE = new HieroFormatReader().parseLine("[GOAL] 
||| <s> ||| <s> |||   ||| 0-0");
+  public static Rule END_RULE = new HieroFormatReader().parseLine("[GOAL] ||| 
[GOAL,1] </s> ||| [GOAL,1] </s> |||   ||| 0-0 1-1");
+  public static Rule MONO_RULE = new HieroFormatReader().parseLine("[GOAL] ||| 
[GOAL,1] [X,2] ||| [GOAL,1] [X,2] |||   ||| 0-0 1-1");
+  public static Rule SWAP_RULE = new HieroFormatReader().parseLine("[GOAL] ||| 
[X,1] [GOAL,2] ||| [GOAL,2] [X,1] |||   ||| 0-1 1-0");
+  
   public String toString() {
     StringBuffer sb = new StringBuffer();
     for (DPState state: getDPStates())
@@ -55,18 +58,25 @@ public class Hypothesis extends HGNode implements 
Comparable<Hypothesis> {
 
   // Initialize root hypothesis. Provide the LM's BeginSentence.
   public Hypothesis(List<DPState> states, float futureCost) {
-    super(0, 1, Vocabulary.id("[X]"), states,
+    super(0, 1, BEGIN_RULE.getLHS(), states,
         new HyperEdge(BEGIN_RULE, 0.0f, 0.0f, null, null), futureCost);
     this.coverage = new Coverage(1);
   }
 
+  /**
+   * This creates a hypothesis from a Candidate object
+   * 
+   * @param cand the candidate
+   */
   public Hypothesis(Candidate cand) {
     // TODO: sourcepath
-    super(-1, cand.span.end, Vocabulary.id("[X]"), cand.getStates(), new 
HyperEdge(
-        cand.getRule(), cand.getResult().getViterbiCost(), 
cand.getResult().getTransitionCost(),
-        cand.getTailNodes(), null), cand.score());
+    super(-1, cand.span.end, cand.getRule().getLHS(), cand.getStates(), 
+        new HyperEdge(cand.getRule(), cand.getResult().getViterbiCost(), 
+            cand.getResult().getTransitionCost(),
+            cand.getTailNodes(), null), cand.score());
     this.coverage = cand.getCoverage();
   }
+
   
   // Extend a previous hypothesis.
   public Hypothesis(List<DPState> states, float score, Hypothesis previous, 
int source_end, Rule target) {
@@ -74,6 +84,10 @@ public class Hypothesis extends HGNode implements 
Comparable<Hypothesis> {
     this.coverage = previous.coverage;
   }
 
+  public Hypothesis(int lastSourceIndex, int lhs, List<DPState> states) {
+    super(-1, lastSourceIndex, lhs, states, null, 0.0f);
+  }
+
   public Coverage getCoverage() {
     return coverage;
   }
@@ -86,16 +100,16 @@ public class Hypothesis extends HGNode implements 
Comparable<Hypothesis> {
    * HGNodes (designed for chart parsing) maintain a span (i,j). We overload j
    * here to record the index of the last translated source word.
    * 
-   * @return the int 'j' which is overloaded to denote the span of the phrase 
being applied
+   * @return the index of the last translated source word
    */
-  public int LastSourceIndex() {
+  public int getLastSourceIndex() {
     return j;
   }
 
   @Override
   public int hashCode() {
     int hash = 0;
-    hash = 31 * LastSourceIndex() + 19 * getCoverage().hashCode();
+    hash = 31 * getLastSourceIndex() + 19 * getCoverage().hashCode();
     if (null != dpStates && dpStates.size() > 0)
       for (DPState dps: dpStates)
         hash *= 57 + dps.hashCode();
@@ -112,7 +126,7 @@ public class Hypothesis extends HGNode implements 
Comparable<Hypothesis> {
     if (obj instanceof Hypothesis) {
       Hypothesis other = (Hypothesis) obj;
 
-      if (LastSourceIndex() != other.LastSourceIndex() || ! 
getCoverage().equals(other.getCoverage()))
+      if (getLastSourceIndex() != other.getLastSourceIndex() || ! 
getCoverage().equals(other.getCoverage()))
         return false;
       
       if (dpStates == null)

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dcc7e7ee/src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java 
b/src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java
index 312781f..6b237a9 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java
@@ -18,8 +18,6 @@
  */
 package org.apache.joshua.decoder.phrase;
 
-import static org.apache.joshua.decoder.ff.tm.OwnerMap.UNKNOWN_OWNER;
-
 import java.io.File;
 import java.io.IOException;
 import java.util.List;
@@ -80,14 +78,13 @@ public class PhraseTable implements Grammar {
   }
       
   /**
-   * Returns the longest source phrase read. Because phrases have a dummy 
nonterminal prepended to
-   * them, we need to subtract 1.
+   * Returns the longest source phrase read.
    * 
    * @return the longest source phrase read.
    */
   @Override
   public int getMaxSourcePhraseLength() {
-    return this.backend.getMaxSourcePhraseLength() - 1;
+    return this.backend.getMaxSourcePhraseLength();
   }
 
   /**
@@ -99,7 +96,6 @@ public class PhraseTable implements Grammar {
   public RuleCollection getPhrases(int[] sourceWords) {
     if (sourceWords.length != 0) {
       Trie pointer = getTrieRoot();
-      pointer = pointer.match(Vocabulary.id("[X]"));
       int i = 0;
       while (pointer != null && i < sourceWords.length)
         pointer = pointer.match(sourceWords[i++]);

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dcc7e7ee/src/main/java/org/apache/joshua/decoder/phrase/Stack.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/Stack.java 
b/src/main/java/org/apache/joshua/decoder/phrase/Stack.java
index d0ae2da..ad24a51 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/Stack.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Stack.java
@@ -22,13 +22,10 @@ import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
-import java.util.List;
 import java.util.PriorityQueue;
 import java.util.Set;
 
 import org.apache.joshua.decoder.JoshuaConfiguration;
-import org.apache.joshua.decoder.chart_parser.ComputeNodeResult;
-import org.apache.joshua.decoder.ff.FeatureFunction;
 import org.apache.joshua.decoder.segment_file.Sentence;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -46,7 +43,6 @@ public class Stack extends ArrayList<Hypothesis> {
   private HashMap<Coverage, ArrayList<Hypothesis>> coverages;
   
   private Sentence sentence;
-  private List<FeatureFunction> featureFunctions;
   private JoshuaConfiguration config;
 
   /* The list of states we've already visited. */
@@ -65,8 +61,7 @@ public class Stack extends ArrayList<Hypothesis> {
    * @param sentence input for a {@link org.apache.joshua.lattice.Lattice}
    * @param config populated {@link 
org.apache.joshua.decoder.JoshuaConfiguration}
    */
-  public Stack(List<FeatureFunction> featureFunctions, Sentence sentence, 
JoshuaConfiguration config) {
-    this.featureFunctions = featureFunctions;
+  public Stack(Sentence sentence, JoshuaConfiguration config) {
     this.sentence = sentence;
     this.config = config;
     
@@ -149,6 +144,9 @@ public class Stack extends ArrayList<Hypothesis> {
 
     // Constrained decoding
     if (sentence.target() != null) {
+      throw new RuntimeException("* FATAL! Constrained decoding no longer 
works for the new phrase format");
+      // TODO: fix constrained decoding
+      /*
       String oldWords = 
cand.getHypothesis().bestHyperedge.getRule().getEnglishWords().replace("[X,1] 
",  "");
       String newWords = cand.getRule().getEnglishWords().replace("[X,1] ",  
"");
           
@@ -159,12 +157,10 @@ public class Stack extends ArrayList<Hypothesis> {
           addCandidate(next); 
         return;
       }
+      */
     }
 
     // TODO: sourcepath
-    ComputeNodeResult result = new ComputeNodeResult(this.featureFunctions, 
cand.getRule(),
-        cand.getTailNodes(), -1, cand.getSpan().end, null, this.sentence);
-    cand.setResult(result);
     
     candidates.add(cand);
   }
@@ -199,6 +195,7 @@ public class Stack extends ArrayList<Hypothesis> {
   /**
    * Adds a popped candidate to the chart / main stack. This is a candidate we 
have decided to
    * keep around.
+   * 
    * @param complete a completely-initialized translation {@link 
org.apache.joshua.decoder.phrase.Candidate}
    * 
    */

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dcc7e7ee/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java 
b/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java
index 8c092ec..dc1a692 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java
@@ -126,13 +126,13 @@ public class Stacks {
     // Initialize root hypothesis with <s> context and future cost for 
everything.
     ComputeNodeResult result = new ComputeNodeResult(this.featureFunctions, 
Hypothesis.BEGIN_RULE,
         null, -1, 1, null, this.sentence);
-    Stack firstStack = new Stack(featureFunctions, sentence, config);
+    Stack firstStack = new Stack(sentence, config);
     firstStack.add(new Hypothesis(result.getDPStates(), future.Full()));
     stacks.add(firstStack);
     
     // Decode with increasing numbers of source words. 
     for (int source_words = 2; source_words <= sentence.length(); 
++source_words) {
-      Stack targetStack = new Stack(featureFunctions, sentence, config);
+      Stack targetStack = new Stack(sentence, config);
       stacks.add(targetStack);
 
       // Iterate over stacks to continue from.
@@ -144,7 +144,13 @@ public class Stacks {
         LOG.debug("WORDS {} MAX {} (STACK {} phrase_length {})", source_words,
             chart.MaxSourcePhraseLength(), from_stack, phrase_length);
         
-        // Iterate over antecedents in this stack.
+        /* Each from stack groups together lots of different coverage vectors 
that all cover the
+         * same number of words. We have the number of covered words from 
from_stack, and the length
+         * of the phrases we are going to add from (source_words - 
from_stack). We now iterate over
+         * all coverage vectors, finding the set of phrases that can extend 
each of them, given
+         * the two constraints: the phrase length, and the current coverage 
vector. These will all
+         * be grouped under the same target stack.
+         */
         for (Coverage coverage: tailStack.getCoverages()) {
           ArrayList<Hypothesis> hypotheses = tailStack.get(coverage); 
           
@@ -161,6 +167,9 @@ public class Stacks {
               continue;
             }
 
+            /* We have found a permissible phrase start point and length, that 
fits with the current
+             * coverage vector. Record that in a Span.
+             */
             Span span = new Span(begin, begin + phrase_length);
 
             // Don't append </s> until the end
@@ -171,7 +180,6 @@ public class Stacks {
             if (phrases == null)
               continue;
 
-
             LOG.debug("Applying {} target phrases over [{}, {}]",
                 phrases.size(), begin, begin + phrase_length);
             
@@ -185,7 +193,7 @@ public class Stacks {
              * phrases from that span. The hypotheses are wrapped in HypoState 
objects, which
              * augment the hypothesis score with a future cost.
              */
-            Candidate cand = new Candidate(hypotheses, phrases, span, 
future_delta);
+            Candidate cand = new Candidate(featureFunctions, sentence, 
hypotheses, phrases, span, future_delta, new int[] {0, 0});
             targetStack.addCandidate(cand);
           }
         }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dcc7e7ee/src/main/java/org/apache/joshua/decoder/phrase/TargetPhrases.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/TargetPhrases.java 
b/src/main/java/org/apache/joshua/decoder/phrase/TargetPhrases.java
index 05a4b0a..ed1d577 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/TargetPhrases.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/TargetPhrases.java
@@ -59,6 +59,7 @@ public class TargetPhrases extends ArrayList<Rule> {
    * Score the rules and sort them. Scoring is necessary because rules are 
only scored if they
    * are used, in an effort to make reading in rules more efficient. This is 
starting to create
    * some trouble and should probably be reworked.
+   * 
    * @param features a {@link java.util.List} of {@link 
org.apache.joshua.decoder.ff.FeatureFunction}'s
    * @param weights a populated {@link 
org.apache.joshua.decoder.ff.FeatureVector}
    * @param num_options the number of options

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dcc7e7ee/src/main/java/org/apache/joshua/tools/GrammarPacker.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/tools/GrammarPacker.java 
b/src/main/java/org/apache/joshua/tools/GrammarPacker.java
index b9208d2..b39b775 100644
--- a/src/main/java/org/apache/joshua/tools/GrammarPacker.java
+++ b/src/main/java/org/apache/joshua/tools/GrammarPacker.java
@@ -61,9 +61,13 @@ public class GrammarPacker {
    * table packing that packed phrases without the [X,1] on the source and 
target sides, which
    * then required special handling in the decoder to use for phrase-based 
decoding.
    * 
-   * 
+   * - 4 (August 2016). Phrase-based decoding rewritten to represent phrases 
without a builtin
+   * nonterminal. Instead, cost-less glue rules are used in phrase-based 
decoding. This eliminates
+   * the need for special handling of phrase grammars (except for having to 
add a LHS), and lets
+   * phrase grammars be used in both hierarchical and phrase-based decoding 
without conversion.
+   *
    */
-  public static final int VERSION = 3;
+  public static final int VERSION = 4;
   
   // Size limit for slice in bytes.
   private static int DATA_SIZE_LIMIT = (int) (Integer.MAX_VALUE * 0.8);

[01/27] incubator-joshua git commit: large commit converting phrase-based decoding to new rule format

Reply via email to