[05/10] incubator-joshua git commit: kellens: Use Guava's memoize for expensive calls, removed unneeded members fhieber: Important bugfix for obtaining word alignments from packedRules in multi-threading environment

mjpost Fri, 01 Apr 2016 15:57:08 -0700

kellens: Use Guava's memoize for expensive calls, removed unneeded members
fhieber: Important bugfix for obtaining word alignments from packedRules in 
multi-threading environment



Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/cabb52ca
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/cabb52ca
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/cabb52ca

Branch: refs/heads/master
Commit: cabb52cabd5a81088b21b9e01a4668ebb2a85ffa
Parents: 244e693
Author: Kellen Sunderland <[email protected]>
Authored: Wed Oct 14 17:13:18 2015 +0200
Committer: Kellen Sunderland <[email protected]>
Committed: Thu Mar 31 10:44:43 2016 +0200

----------------------------------------------------------------------
 resources/wa_grammar.packed/config              |   1 +
 resources/wa_grammar.packed/encoding            | Bin 0 -> 154 bytes
 .../wa_grammar.packed/slice_00000.alignments    | Bin 0 -> 45 bytes
 .../wa_grammar.packed/slice_00000.features      | Bin 0 -> 47 bytes
 resources/wa_grammar.packed/slice_00000.source  | Bin 0 -> 204 bytes
 resources/wa_grammar.packed/slice_00000.target  | Bin 0 -> 128 bytes
 .../wa_grammar.packed/slice_00000.target.lookup | Bin 0 -> 32 bytes
 resources/wa_grammar.packed/vocabulary          | Bin 0 -> 238 bytes
 src/joshua/decoder/ff/tm/PhraseRule.java        |  21 ++--
 src/joshua/decoder/ff/tm/Rule.java              |  42 ++++---
 .../decoder/ff/tm/packed/PackedGrammar.java     | 121 +++++++++++++------
 .../system/MultithreadedTranslationTests.java   | 113 +++++++++++++++++
 12 files changed, 240 insertions(+), 58 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/cabb52ca/resources/wa_grammar.packed/config
----------------------------------------------------------------------
diff --git a/resources/wa_grammar.packed/config 
b/resources/wa_grammar.packed/config
new file mode 100644
index 0000000..ebd1bf1
--- /dev/null
+++ b/resources/wa_grammar.packed/config
@@ -0,0 +1 @@
+max-source-len = 6

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/cabb52ca/resources/wa_grammar.packed/encoding
----------------------------------------------------------------------
diff --git a/resources/wa_grammar.packed/encoding 
b/resources/wa_grammar.packed/encoding
new file mode 100644
index 0000000..630f69f
Binary files /dev/null and b/resources/wa_grammar.packed/encoding differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/cabb52ca/resources/wa_grammar.packed/slice_00000.alignments
----------------------------------------------------------------------
diff --git a/resources/wa_grammar.packed/slice_00000.alignments 
b/resources/wa_grammar.packed/slice_00000.alignments
new file mode 100644
index 0000000..f1425eb
Binary files /dev/null and b/resources/wa_grammar.packed/slice_00000.alignments 
differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/cabb52ca/resources/wa_grammar.packed/slice_00000.features
----------------------------------------------------------------------
diff --git a/resources/wa_grammar.packed/slice_00000.features 
b/resources/wa_grammar.packed/slice_00000.features
new file mode 100644
index 0000000..5a4c774
Binary files /dev/null and b/resources/wa_grammar.packed/slice_00000.features 
differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/cabb52ca/resources/wa_grammar.packed/slice_00000.source
----------------------------------------------------------------------
diff --git a/resources/wa_grammar.packed/slice_00000.source 
b/resources/wa_grammar.packed/slice_00000.source
new file mode 100644
index 0000000..4607b89
Binary files /dev/null and b/resources/wa_grammar.packed/slice_00000.source 
differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/cabb52ca/resources/wa_grammar.packed/slice_00000.target
----------------------------------------------------------------------
diff --git a/resources/wa_grammar.packed/slice_00000.target 
b/resources/wa_grammar.packed/slice_00000.target
new file mode 100644
index 0000000..fe11a38
Binary files /dev/null and b/resources/wa_grammar.packed/slice_00000.target 
differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/cabb52ca/resources/wa_grammar.packed/slice_00000.target.lookup
----------------------------------------------------------------------
diff --git a/resources/wa_grammar.packed/slice_00000.target.lookup 
b/resources/wa_grammar.packed/slice_00000.target.lookup
new file mode 100644
index 0000000..7d82179
Binary files /dev/null and 
b/resources/wa_grammar.packed/slice_00000.target.lookup differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/cabb52ca/resources/wa_grammar.packed/vocabulary
----------------------------------------------------------------------
diff --git a/resources/wa_grammar.packed/vocabulary 
b/resources/wa_grammar.packed/vocabulary
new file mode 100644
index 0000000..637651e
Binary files /dev/null and b/resources/wa_grammar.packed/vocabulary differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/cabb52ca/src/joshua/decoder/ff/tm/PhraseRule.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/ff/tm/PhraseRule.java 
b/src/joshua/decoder/ff/tm/PhraseRule.java
index e42a7ee..c178b31 100644
--- a/src/joshua/decoder/ff/tm/PhraseRule.java
+++ b/src/joshua/decoder/ff/tm/PhraseRule.java
@@ -1,5 +1,8 @@
 package joshua.decoder.ff.tm;
 
+import com.google.common.base.Supplier;
+import com.google.common.base.Suppliers;
+
 /***
  * A class for reading in rules from a Moses phrase table. Most of the 
conversion work is done
  * in {@link joshua.decoder.ff.tm.format.PhraseFormatReader}. This includes 
prepending every
@@ -19,11 +22,20 @@ package joshua.decoder.ff.tm;
 public class PhraseRule extends Rule {
 
   private String mosesFeatureString = null;
+  private Supplier<byte[]> alignmentSupplier;
   
   public PhraseRule(int lhs, int[] french, int[] english, String 
sparse_features, int arity,
       String alignment) {
     super(lhs, french, english, null, arity, alignment);
     mosesFeatureString = sparse_features;
+    this.alignmentSupplier = Suppliers.memoize(() ->{
+        String[] tokens = getAlignmentString().split("[-\\s]+");
+        byte[] alignmentArray = new byte[tokens.length + 2];
+        alignmentArray[0] = alignmentArray[1] = 0;
+        for (int i = 0; i < tokens.length; i++)
+            alignmentArray[i + 2] = (byte) (Short.parseShort(tokens[i]) + 1);
+        return alignmentArray;
+    });
   }
 
   /** 
@@ -49,13 +61,6 @@ public class PhraseRule extends Rule {
    */
   @Override
   public byte[] getAlignment() {
-    if (alignment == null) {
-      String[] tokens = getAlignmentString().split("[-\\s]+");
-      alignment = new byte[tokens.length + 2];
-      alignment[0] = alignment[1] = 0;
-      for (int i = 0; i < tokens.length; i++)
-        alignment[i + 2] = (byte) (Short.parseShort(tokens[i]) + 1);
-    }
-    return alignment;
+    return this.alignmentSupplier.get();
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/cabb52ca/src/joshua/decoder/ff/tm/Rule.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/ff/tm/Rule.java 
b/src/joshua/decoder/ff/tm/Rule.java
index 90a44d5..3d715ea 100644
--- a/src/joshua/decoder/ff/tm/Rule.java
+++ b/src/joshua/decoder/ff/tm/Rule.java
@@ -8,6 +8,9 @@ import java.util.List;
 import java.util.Map;
 import java.util.regex.Pattern;
 
+import com.google.common.base.Supplier;
+import com.google.common.base.Suppliers;
+
 import joshua.corpus.Vocabulary;
 import joshua.decoder.Decoder;
 import joshua.decoder.ff.FeatureFunction;
@@ -43,6 +46,8 @@ public class Rule implements Comparator<Rule>, 
Comparable<Rule> {
   protected FeatureVector features = null;
   protected String sparseFeatureString;
 
+  private final Supplier<byte[]> alignmentSupplier;
+
   /*
    * a feature function will be fired for this rule only if the owner of the 
rule matches the owner
    * of the feature function
@@ -63,7 +68,6 @@ public class Rule implements Comparator<Rule>, 
Comparable<Rule> {
 
   // The alignment string, e.g., 0-0 0-1 1-1 2-1
   private String alignmentString;
-  protected byte[] alignment = null;
 
   /**
    * Constructs a new rule using the provided parameters. The owner and rule 
id for this rule are
@@ -86,16 +90,12 @@ public class Rule implements Comparator<Rule>, 
Comparable<Rule> {
     this.arity = arity;
     this.owner = owner;
     this.english = targetRhs;
+    alignmentSupplier = initializeAlignmentSupplier();
   }
 
   // Sparse feature version
   public Rule(int lhs, int[] sourceRhs, int[] targetRhs, String 
sparseFeatures, int arity) {
-    this.lhs = lhs;
-    this.pFrench = sourceRhs;
-    this.sparseFeatureString = sparseFeatures;
-    this.arity = arity;
-    this.owner = -1;
-    this.english = targetRhs;
+    this(lhs, sourceRhs, targetRhs, sparseFeatures, arity, -1);
   }
 
   public Rule(int lhs, int[] sourceRhs, int[] targetRhs, String 
sparseFeatures, int arity, String alignment) {
@@ -105,6 +105,22 @@ public class Rule implements Comparator<Rule>, 
Comparable<Rule> {
   
   public Rule() {
     this.lhs = -1;
+    alignmentSupplier = initializeAlignmentSupplier();
+  }
+
+  private Supplier<byte[]> initializeAlignmentSupplier(){
+    Supplier<byte[]> result = Suppliers.memoize(() ->{
+      byte[] alignment = null;
+      String alignmentString = getAlignmentString();
+      if (alignmentString != null) {
+        String[] tokens = alignmentString.split("[-\\s]+");
+        alignment = new byte[tokens.length];
+        for (int i = 0; i < tokens.length; i++)
+          alignment[i] = (byte) Short.parseShort(tokens[i]);
+      }
+      return alignment;
+    });
+    return result;
   }
 
   // ===============================================================
@@ -351,23 +367,17 @@ public class Rule implements Comparator<Rule>, 
Comparable<Rule> {
   public String getFeatureString() {
     return sparseFeatureString;
   }
-  
+
   /**
    * Returns an alignment as a sequence of integers. The integers at positions 
i and i+1 are paired,
    * with position i indexing the source and i+1 the target.
    */
   public byte[] getAlignment() {
-    if (alignment == null && getAlignmentString() != null) {
-      String[] tokens = getAlignmentString().split("[-\\s]+");
-      alignment = new byte[tokens.length];
-      for (int i = 0; i < tokens.length; i++)
-        alignment[i] = (byte) Short.parseShort(tokens[i]);
-    }
-    return alignment;
+    return this.alignmentSupplier.get();
   }
   
   public String getAlignmentString() {
-    return alignmentString;
+    return this.alignmentString;
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/cabb52ca/src/joshua/decoder/ff/tm/packed/PackedGrammar.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/ff/tm/packed/PackedGrammar.java 
b/src/joshua/decoder/ff/tm/packed/PackedGrammar.java
index dc72a4b..2251f5a 100644
--- a/src/joshua/decoder/ff/tm/packed/PackedGrammar.java
+++ b/src/joshua/decoder/ff/tm/packed/PackedGrammar.java
@@ -79,6 +79,8 @@ import joshua.util.encoding.EncoderConfiguration;
 import joshua.util.encoding.FloatEncoder;
 import joshua.util.io.LineReader;
 
+import com.google.common.base.Supplier;
+import com.google.common.base.Suppliers;
 import com.google.common.cache.Cache;
 import com.google.common.cache.CacheBuilder;
 
@@ -790,32 +792,63 @@ public class PackedGrammar extends AbstractGrammar {
        *
        */
       public final class PackedPhrasePair extends PackedRule {
+
+        private final Supplier<int[]> englishSupplier;
+        private final Supplier<byte[]> alignmentSupplier;
+
         public PackedPhrasePair(int address) {
           super(address);
+          englishSupplier = initializeEnglishSupplier();
+          alignmentSupplier = initializeAlignmentSupplier();
         }
 
         @Override
         public int getArity() {
           return PackedTrie.this.getArity() + 1;
         }
-        
+
         /**
-         * Take the English phrase of the underlying rule and prepend an [X].
-         * 
-         * @return
+         * Initialize a number of suppliers which get evaluated when their 
respective getters
+         * are called.
+         * Inner lambda functions are guaranteed to only be called once, 
because of this underlying
+         * structures are accessed in a threadsafe way.
+         * Guava's implementation makes sure only one read of a volatile 
variable occurs per get.
+         * This means this implementation should be as thread-safe and 
performant as possible.
          */
-        @Override
-        public int[] getEnglish() {
-          if (tgt == null) {
+
+        private Supplier<int[]> initializeEnglishSupplier(){
+          Supplier<int[]> result = Suppliers.memoize(() ->{
             int[] phrase = getTarget(source[address + 1]);
-            tgt = new int[phrase.length + 1];
+            int[] tgt = new int[phrase.length + 1];
             tgt[0] = -1;
             for (int i = 0; i < phrase.length; i++)
               tgt[i+1] = phrase[i];
-          }
-          return tgt;
+            return tgt;
+          });
+          return result;
         }
 
+        private Supplier<byte[]> initializeAlignmentSupplier(){
+          Supplier<byte[]> result = Suppliers.memoize(() ->{
+            byte[] raw_alignment = getAlignmentArray(source[address + 2]);
+            byte[] points = new byte[raw_alignment.length + 2];
+            points[0] = points[1] = 0;
+            for (int i = 0; i < raw_alignment.length; i++)
+              points[i + 2] = (byte) (raw_alignment[i] + 1);
+            return points;
+          });
+          return result;
+        }
+
+        /**
+         * Take the English phrase of the underlying rule and prepend an [X].
+         * 
+         * @return
+         */
+        @Override
+        public int[] getEnglish() {
+          return this.englishSupplier.get();
+        }
         
         /**
          * Take the French phrase of the underlying rule and prepend an [X].
@@ -838,27 +871,51 @@ public class PackedGrammar extends AbstractGrammar {
          */
         @Override
         public byte[] getAlignment() {
-          // alignments is the underlying raw alignment data
-          if (alignments != null) {
-            byte[] a = getAlignmentArray(source[address + 2]);
-            byte[] points = new byte[a.length + 2];
-            points[0] = points[1] = 0;
-            for (int i = 0; i < a.length; i++)
-              points[i + 2] = (byte) (a[i] + 1);
-            return points;
+          // if no alignments in grammar do not fail
+          if (alignments == null) {
+            return null;
           }
-          return null;
+
+          return this.alignmentSupplier.get();
         }
       }
 
       public class PackedRule extends Rule {
         protected final int address;
-
-        protected int[] tgt = null;
-        private FeatureVector features = null;
+        private final Supplier<int[]> englishSupplier;
+        private final Supplier<FeatureVector> featureVectorSupplier;
+        private final Supplier<byte[]> alignmentsSupplier;
 
         public PackedRule(int address) {
           this.address = address;
+          this.englishSupplier = intializeEnglishSupplier();
+          this.featureVectorSupplier = initializeFeatureVectorSupplier();
+          this.alignmentsSupplier = initializeAlignmentsSupplier();
+        }
+
+        private Supplier<int[]> intializeEnglishSupplier(){
+          Supplier<int[]> result = Suppliers.memoize(() ->{
+            return getTarget(source[address + 1]);
+          });
+          return result;
+        }
+
+        private Supplier<FeatureVector> initializeFeatureVectorSupplier(){
+          Supplier<FeatureVector> result = Suppliers.memoize(() ->{
+            return new FeatureVector(getFeatures(source[address + 2]), "tm_" + 
Vocabulary.word(owner) + "_");
+          });
+          return result;
+        }
+
+        private Supplier<byte[]> initializeAlignmentsSupplier(){
+          Supplier<byte[]> result = Suppliers.memoize(()->{
+            // if no alignments in grammar do not fail
+            if (alignments == null){
+              return null;
+            }
+            return getAlignmentArray(source[address + 2]);
+          });
+          return result;
         }
 
         @Override
@@ -894,10 +951,7 @@ public class PackedGrammar extends AbstractGrammar {
 
         @Override
         public int[] getEnglish() {
-          if (tgt == null) {
-            tgt = getTarget(source[address + 1]);
-          }
-          return tgt;
+          return this.englishSupplier.get();
         }
 
         @Override
@@ -911,18 +965,17 @@ public class PackedGrammar extends AbstractGrammar {
 
         @Override
         public FeatureVector getFeatureVector() {
-          if (features == null) {
-            features = new FeatureVector(getFeatures(source[address + 2]), 
"tm_" + Vocabulary.word(owner) + "_");
-          }
-
-          return features;
+          return this.featureVectorSupplier.get();
         }
         
         @Override
         public byte[] getAlignment() {
-          if (alignments != null)
-            return getAlignmentArray(source[address + 2]);
-          return null;
+          return this.alignmentsSupplier.get();
+        }
+        
+        @Override
+        public String getAlignmentString() {
+            throw new RuntimeException("AlignmentString not implemented for 
PackedRule!");
         }
 
         @Override

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/cabb52ca/tst/joshua/system/MultithreadedTranslationTests.java
----------------------------------------------------------------------
diff --git a/tst/joshua/system/MultithreadedTranslationTests.java 
b/tst/joshua/system/MultithreadedTranslationTests.java
new file mode 100644
index 0000000..4ff549c
--- /dev/null
+++ b/tst/joshua/system/MultithreadedTranslationTests.java
@@ -0,0 +1,113 @@
+package joshua.system;
+
+import static org.junit.Assert.assertTrue;
+
+import java.io.ByteArrayInputStream;
+import java.nio.charset.Charset;
+import java.util.ArrayList;
+
+import joshua.corpus.Vocabulary;
+import joshua.decoder.Decoder;
+import joshua.decoder.JoshuaConfiguration;
+import joshua.decoder.Translation;
+import joshua.decoder.Translations;
+import joshua.decoder.io.TranslationRequest;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Integration test for multithreaded Joshua decoder tests. Grammar used is a
+ * toy packed grammar.
+ *
+ * @author kellens
+ */
+public class MultithreadedTranslationTests {
+
+  private JoshuaConfiguration joshuaConfig = null;
+  private Decoder decoder = null;
+  private static final String INPUT = "A K B1 U Z1 Z2 B2 C";
+
+  @Before
+  public void setUp() throws Exception {
+    Vocabulary.clear();
+    joshuaConfig = new JoshuaConfiguration();
+    joshuaConfig.search_algorithm = "cky";
+    joshuaConfig.mark_oovs = false;
+    joshuaConfig.pop_limit = 100;
+    joshuaConfig.use_unique_nbest = false;
+    joshuaConfig.include_align_index = false;
+    joshuaConfig.topN = 0;
+    joshuaConfig.tms.add("thrax -owner pt -maxspan 20 -path 
resources/wa_grammar.packed");
+    joshuaConfig.tms.add("thrax -owner glue -maxspan -1 -path 
resources/grammar.glue");
+    joshuaConfig.goal_symbol = "[GOAL]";
+    joshuaConfig.default_non_terminal = "[X]";
+    joshuaConfig.features.add("feature_function = OOVPenalty");
+    joshuaConfig.weights.add("tm_pt_0 1");
+    joshuaConfig.weights.add("tm_pt_1 1");
+    joshuaConfig.weights.add("tm_pt_2 1");
+    joshuaConfig.weights.add("tm_pt_3 1");
+    joshuaConfig.weights.add("tm_pt_4 1");
+    joshuaConfig.weights.add("tm_pt_5 1");
+    joshuaConfig.weights.add("tm_glue_0 1");
+    joshuaConfig.weights.add("OOVPenalty 2");
+    joshuaConfig.num_parallel_decoders = 500; // This will enable 500 parallel
+                                              // decoders to run at once.
+                                              // Useful to help flush out
+                                              // concurrency errors in
+                                              // underlying
+                                              // data-structures.
+    this.decoder = new Decoder(joshuaConfig, ""); // Second argument
+                                                  // (configFile)
+                                                  // is not even used by the
+                                                  // constructor/initialize.
+  }
+
+  @After
+  public void tearDown() throws Exception {
+    Vocabulary.clear();
+    this.decoder.cleanUp();
+    this.decoder = null;
+  }
+
+
+
+  // This test was created specifically to reproduce a multithreaded issue
+  // related to mapped byte array access in the PackedGrammer getAlignmentArray
+  // function.
+
+  // We'll test the decoding engine using N = 10,000 identical inputs. This
+  // should be sufficient to induce concurrent data access for many shared
+  // data structures.
+
+  @Test
+  public void 
givenPackedGrammar_whenNTranslationsCalledConcurrently_thenReturnNResults() {
+    // GIVEN
+
+    int inputLines = 10000;
+    joshuaConfig.construct_structured_output = true; // Enabled alignments.
+    StringBuilder sb = new StringBuilder();
+    for (int i = 0; i < inputLines; i++) {
+      sb.append(INPUT + "\n");
+    }
+
+    // Append a large string together to simulate N requests to the decoding
+    // engine.
+    TranslationRequest req = new TranslationRequest(new 
ByteArrayInputStream(sb.toString()
+        .getBytes(Charset.forName("UTF-8"))), joshuaConfig);
+
+    // WHEN
+    // Translate all spans in parallel.
+    Translations translations = this.decoder.decodeAll(req);
+    ArrayList<Translation> translationResults = new ArrayList<Translation>();
+
+    Translation t;
+    while ((t = translations.next()) != null) {
+      translationResults.add(t);
+    }
+
+    // THEN
+    assertTrue(translationResults.size() == inputLines);
+  }
+}

[05/10] incubator-joshua git commit: kellens: Use Guava's memoize for expensive calls, removed unneeded members fhieber: Important bugfix for obtaining word alignments from packedRules in multi-threading environment

Reply via email to