Removed slow and redundant feature string parsing when constructing rules from packed grammar (at sort time and at actual construction of feature vector).
Gets rid of String parsing features over and over again which turned out to be slow in profiling. The solution is not perfect, but we get a nice speedup of roughly a factor 5: If JoshuaConfiguration.amortize is set to false grammars are forced to be sorted at decoder startup. Here are the stats: New code: Took 561.64 seconds to load pipeline. Old code: Took 2688.60 seconds to load pipeline. Basically we are significantly reducing the time for sorting the rules by getting rid of an intermediate string representation of the features in a rule. Since String parsing of floats is removed now there was some float precision change in the regression-test for which I changed the gold output. This is fine. Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/5665f02f Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/5665f02f Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/5665f02f Branch: refs/heads/master Commit: 5665f02ff0385db4f77bf4493db2d96bc63355d8 Parents: 9448ba5 Author: Felix Hieber <[email protected]> Authored: Tue Dec 1 13:34:47 2015 +0100 Committer: Kellen Sunderland <[email protected]> Committed: Thu Mar 31 10:44:43 2016 +0200 ---------------------------------------------------------------------- src/joshua/decoder/Decoder.java | 8 +- src/joshua/decoder/JoshuaConfiguration.java | 2 +- src/joshua/decoder/ff/FeatureVector.java | 5 +- src/joshua/decoder/ff/tm/PhraseRule.java | 48 +++++---- src/joshua/decoder/ff/tm/Rule.java | 100 +++++++++++++------ .../decoder/ff/tm/packed/PackedGrammar.java | 63 ++++++------ test/bn-en/packed/output.scores.gold | 24 ++--- .../system/MultithreadedTranslationTests.java | 12 +++ 8 files changed, 156 insertions(+), 106 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5665f02f/src/joshua/decoder/Decoder.java ---------------------------------------------------------------------- diff --git a/src/joshua/decoder/Decoder.java b/src/joshua/decoder/Decoder.java index 1a353ca..8e74d42 100644 --- a/src/joshua/decoder/Decoder.java +++ b/src/joshua/decoder/Decoder.java @@ -1,5 +1,7 @@ package joshua.decoder; +import static joshua.decoder.ff.FeatureVector.DENSE_FEATURE_NAMES; + import java.io.BufferedWriter; import java.io.File; import java.io.IOException; @@ -627,7 +629,7 @@ public class Decoder { } Decoder.LOG(1, String.format("Read %d weights (%d of them dense)", weights.size(), - FeatureVector.DENSE_FEATURE_NAMES.size())); + DENSE_FEATURE_NAMES.size())); // Do this before loading the grammars and the LM. this.featureFunctions = new ArrayList<FeatureFunction>(); @@ -644,8 +646,8 @@ public class Decoder { // This is mostly for compatibility with the Moses tuning script if (joshuaConfiguration.show_weights_and_quit) { - for (int i = 0; i < FeatureVector.DENSE_FEATURE_NAMES.size(); i++) { - String name = FeatureVector.DENSE_FEATURE_NAMES.get(i); + for (int i = 0; i < DENSE_FEATURE_NAMES.size(); i++) { + String name = DENSE_FEATURE_NAMES.get(i); if (joshuaConfiguration.moses) System.out.println(String.format("%s= %.5f", mosesize(name), weights.getDense(i))); else http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5665f02f/src/joshua/decoder/JoshuaConfiguration.java ---------------------------------------------------------------------- diff --git a/src/joshua/decoder/JoshuaConfiguration.java b/src/joshua/decoder/JoshuaConfiguration.java index 2eb24c4..b7be145 100644 --- a/src/joshua/decoder/JoshuaConfiguration.java +++ b/src/joshua/decoder/JoshuaConfiguration.java @@ -32,7 +32,7 @@ public class JoshuaConfiguration { // whether to construct a StructuredTranslation object for each request instead of // printing to stdout. Used when the Decoder is used from Java directly. - public Boolean construct_structured_output = false; + public Boolean use_structured_output = false; // List of grammar files to read public ArrayList<String> tms = new ArrayList<String>(); http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5665f02f/src/joshua/decoder/ff/FeatureVector.java ---------------------------------------------------------------------- diff --git a/src/joshua/decoder/ff/FeatureVector.java b/src/joshua/decoder/ff/FeatureVector.java index a5526e4..50b2a3c 100644 --- a/src/joshua/decoder/ff/FeatureVector.java +++ b/src/joshua/decoder/ff/FeatureVector.java @@ -78,10 +78,7 @@ public class FeatureVector { * IMPORTANT: Note that, for historical reasons, the sign is reversed on all *dense* scores. * This is the source of *no end* of confusion and should be done away with. */ - sparseFeatures = new HashMap<String, Float>(); - denseFeatures = new ArrayList<Float>(DENSE_FEATURE_NAMES.size()); - for (int i = 0; i < denseFeatures.size(); i++) - denseFeatures.set(i, 0.0f); + this(); int denseFeatureIndex = 0; http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5665f02f/src/joshua/decoder/ff/tm/PhraseRule.java ---------------------------------------------------------------------- diff --git a/src/joshua/decoder/ff/tm/PhraseRule.java b/src/joshua/decoder/ff/tm/PhraseRule.java index c178b31..8530aa0 100644 --- a/src/joshua/decoder/ff/tm/PhraseRule.java +++ b/src/joshua/decoder/ff/tm/PhraseRule.java @@ -21,44 +21,54 @@ import com.google.common.base.Suppliers; */ public class PhraseRule extends Rule { - private String mosesFeatureString = null; - private Supplier<byte[]> alignmentSupplier; + + private final String mosesFeatureString; + private final Supplier<byte[]> alignmentSupplier; + private final Supplier<String> sparseFeaturesStringSupplier; public PhraseRule(int lhs, int[] french, int[] english, String sparse_features, int arity, String alignment) { super(lhs, french, english, null, arity, alignment); - mosesFeatureString = sparse_features; - this.alignmentSupplier = Suppliers.memoize(() ->{ - String[] tokens = getAlignmentString().split("[-\\s]+"); - byte[] alignmentArray = new byte[tokens.length + 2]; - alignmentArray[0] = alignmentArray[1] = 0; - for (int i = 0; i < tokens.length; i++) - alignmentArray[i + 2] = (byte) (Short.parseShort(tokens[i]) + 1); - return alignmentArray; - }); + this.mosesFeatureString = sparse_features; + this.alignmentSupplier = initializeAlignmentSupplier(); + this.sparseFeaturesStringSupplier = initializeSparseFeaturesStringSupplier(); } - + /** * Moses features are probabilities; we need to convert them here by taking the negative log prob. * We do this only when the rule is used to amortize. */ - @Override - public String getFeatureString() { - if (sparseFeatureString == null) { + private Supplier<String> initializeSparseFeaturesStringSupplier() { + return Suppliers.memoize(() ->{ StringBuffer values = new StringBuffer(); for (String value: mosesFeatureString.split(" ")) { float f = Float.parseFloat(value); values.append(String.format("%f ", f <= 0.0 ? -100 : -Math.log(f))); } - sparseFeatureString = values.toString().trim(); - } - return sparseFeatureString; + return values.toString().trim(); + }); } - + /** * This is the exact same as the parent implementation, but we need to add 1 to each alignment * point to account for the nonterminal [X] that was prepended to each rule. */ + private Supplier<byte[]> initializeAlignmentSupplier(){ + return Suppliers.memoize(() ->{ + String[] tokens = getAlignmentString().split("[-\\s]+"); + byte[] alignmentArray = new byte[tokens.length + 2]; + alignmentArray[0] = alignmentArray[1] = 0; + for (int i = 0; i < tokens.length; i++) + alignmentArray[i + 2] = (byte) (Short.parseShort(tokens[i]) + 1); + return alignmentArray; + }); + } + + @Override + public String getFeatureString() { + return this.sparseFeaturesStringSupplier.get(); + } + @Override public byte[] getAlignment() { return this.alignmentSupplier.get(); http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5665f02f/src/joshua/decoder/ff/tm/Rule.java ---------------------------------------------------------------------- diff --git a/src/joshua/decoder/ff/tm/Rule.java b/src/joshua/decoder/ff/tm/Rule.java index 3d715ea..abef4b7 100644 --- a/src/joshua/decoder/ff/tm/Rule.java +++ b/src/joshua/decoder/ff/tm/Rule.java @@ -43,10 +43,9 @@ public class Rule implements Comparator<Rule>, Comparable<Rule> { protected int arity; // And a string containing the sparse ones - protected FeatureVector features = null; - protected String sparseFeatureString; - - private final Supplier<byte[]> alignmentSupplier; + //protected final String sparseFeatureString; + protected final Supplier<String> sparseFeatureStringSupplier; + private final Supplier<FeatureVector> featuresSupplier; /* * a feature function will be fired for this rule only if the owner of the rule matches the owner @@ -68,13 +67,16 @@ public class Rule implements Comparator<Rule>, Comparable<Rule> { // The alignment string, e.g., 0-0 0-1 1-1 2-1 private String alignmentString; + private final Supplier<byte[]> alignmentSupplier; /** - * Constructs a new rule using the provided parameters. The owner and rule id for this rule are + * Constructs a new rule using the provided parameters. Rule id for this rule is * undefined. Note that some of the sparse features may be unlabeled, but they cannot be mapped to * their default names ("tm_OWNER_INDEX") until later, when we know the owner of the rule. This is * not known until the rule is actually added to a grammar in Grammar::addRule(). * + * Constructor used by other constructors below; + * * @param lhs Left-hand side of the rule. * @param sourceRhs Source language right-hand side of the rule. * @param targetRhs Target language right-hand side of the rule. @@ -82,34 +84,63 @@ public class Rule implements Comparator<Rule>, Comparable<Rule> { * @param arity Number of nonterminals in the source language right-hand side. * @param owner */ - public Rule(int lhs, int[] sourceRhs, int[] targetRhs, String sparseFeatures, int arity, - int owner) { + public Rule(int lhs, int[] sourceRhs, int[] targetRhs, String sparseFeatures, int arity, int owner) { + this.lhs = lhs; + this.pFrench = sourceRhs; + this.arity = arity; + this.owner = owner; + this.english = targetRhs; + this.sparseFeatureStringSupplier = Suppliers.memoize(() -> { return sparseFeatures; }); + this.featuresSupplier = initializeFeatureSupplierFromString(); + this.alignmentSupplier = initializeAlignmentSupplier(); + } + + /** + * Constructor used by PackedGrammar's sortRules(). + */ + public Rule(int lhs, int[] sourceRhs, int[] targetRhs, FeatureVector features, int arity, int owner) { this.lhs = lhs; this.pFrench = sourceRhs; - this.sparseFeatureString = sparseFeatures; this.arity = arity; this.owner = owner; this.english = targetRhs; - alignmentSupplier = initializeAlignmentSupplier(); + this.featuresSupplier = Suppliers.memoize(() -> { return features; }); + this.sparseFeatureStringSupplier = initializeSparseFeaturesStringSupplier(); + this.alignmentSupplier = initializeAlignmentSupplier(); } - // Sparse feature version + /** + * Constructor used for SamtFormatReader and GrammarBuilderWalkerFunction's getRuleWithSpans() + * Owner set to -1 + */ public Rule(int lhs, int[] sourceRhs, int[] targetRhs, String sparseFeatures, int arity) { this(lhs, sourceRhs, targetRhs, sparseFeatures, arity, -1); } + /** + * Constructor used for addOOVRules(), HieroFormatReader and PhraseRule. + */ public Rule(int lhs, int[] sourceRhs, int[] targetRhs, String sparseFeatures, int arity, String alignment) { this(lhs, sourceRhs, targetRhs, sparseFeatures, arity); this.alignmentString = alignment; } + /** + * Constructor (implicitly) used by PackedRule + */ public Rule() { this.lhs = -1; - alignmentSupplier = initializeAlignmentSupplier(); + this.sparseFeatureStringSupplier = initializeSparseFeaturesStringSupplier(); + this.featuresSupplier = initializeFeatureSupplierFromString(); + this.alignmentSupplier = initializeAlignmentSupplier(); } + // ========================================================================== + // Lazy loading Suppliers for alignments, feature vector, and feature strings + // ========================================================================== + private Supplier<byte[]> initializeAlignmentSupplier(){ - Supplier<byte[]> result = Suppliers.memoize(() ->{ + return Suppliers.memoize(() ->{ byte[] alignment = null; String alignmentString = getAlignmentString(); if (alignmentString != null) { @@ -120,7 +151,29 @@ public class Rule implements Comparator<Rule>, Comparable<Rule> { } return alignment; }); - return result; + } + + /** + * If Rule was constructed with sparseFeatures String, we lazily populate the + * FeatureSupplier. + */ + private Supplier<FeatureVector> initializeFeatureSupplierFromString(){ + return Suppliers.memoize(() ->{ + if (owner != -1) { + return new FeatureVector(getFeatureString(), "tm_" + Vocabulary.word(owner) + "_"); + } else { + return new FeatureVector(); + } + }); + } + + /** + * If Rule was constructed with a FeatureVector, we lazily populate the sparseFeaturesStringSupplier. + */ + private Supplier<String> initializeSparseFeaturesStringSupplier() { + return Suppliers.memoize(() -> { + return getFeatureVector().toString(); + }); } // =============================================================== @@ -214,18 +267,7 @@ public class Rule implements Comparator<Rule>, Comparable<Rule> { * specified as labeled features of the form "tm_OWNER_INDEX", but the former format is preferred. */ public FeatureVector getFeatureVector() { - /* - * Now read the feature scores, which can be any number of dense features and sparse features. - * Any unlabeled feature becomes a dense feature. By convention, dense features should precede - * sparse (labeled) ones, but it's not required. - */ - - if (features == null) - features = (owner != -1) - ? new FeatureVector(getFeatureString(), "tm_" + Vocabulary.word(owner) + "_") - : new FeatureVector(); - - return features; + return featuresSupplier.get(); } /** @@ -263,21 +305,15 @@ public class Rule implements Comparator<Rule>, Comparable<Rule> { public void setPrecomputableCost(float[] phrase_weights, FeatureVector weights) { float cost = 0.0f; - -// System.err.println(String.format("// Setting precomputable cost for for %s/%s", getEnglishWords(), getFrenchWords())); FeatureVector features = getFeatureVector(); for (int i = 0; i < features.getDenseFeatures().size() && i < phrase_weights.length; i++) { -// System.err.println(String.format(" %d -> %.5f", i, features.get(i))); cost += phrase_weights[i] * features.getDense(i); } for (String key: features.getSparseFeatures().keySet()) { -// System.err.println(String.format(" %s -> %.5f", key, features.get(key))); cost += weights.getSparse(key) * features.getSparse(key); } -// System.err.println(String.format("-> %f", cost)); - this.precomputableCost = cost; } @@ -365,7 +401,7 @@ public class Rule implements Comparator<Rule>, Comparable<Rule> { } public String getFeatureString() { - return sparseFeatureString; + return sparseFeatureStringSupplier.get(); } /** http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5665f02f/src/joshua/decoder/ff/tm/packed/PackedGrammar.java ---------------------------------------------------------------------- diff --git a/src/joshua/decoder/ff/tm/packed/PackedGrammar.java b/src/joshua/decoder/ff/tm/packed/PackedGrammar.java index 792a7ad..a4c47d2 100644 --- a/src/joshua/decoder/ff/tm/packed/PackedGrammar.java +++ b/src/joshua/decoder/ff/tm/packed/PackedGrammar.java @@ -468,46 +468,39 @@ public class PackedGrammar extends AbstractGrammar { } /** - * NEW VERSION - * - * Returns a string version of the features associated with a rule (represented as a block ID). + * Returns the FeatureVector associated with a rule (represented as a block ID). * These features are in the form "feature1=value feature2=value...". By default, unlabeled - * features are named using the pattern - * - * tm_OWNER_INDEX - * - * where OWNER is the grammar's owner (Vocabulary.word(this.owner)) and INDEX is a 0-based index - * of the feature found in the grammar. - * + * features are named using the pattern. * @param block_id - * @return + * @return feature vector */ - private final String getFeatures(int block_id) { - int feature_position = featureLookup[block_id]; - - // The number of non-zero features stored with the rule. - int num_features = encoding.readId(features, feature_position); - - feature_position += EncoderConfiguration.ID_SIZE; - StringBuilder sb = new StringBuilder(); - - for (int i = 0; i < num_features; i++) { - int feature_id = encoding.readId(features, feature_position); - FloatEncoder encoder = encoding.encoder(feature_id); - - String feature_name = Vocabulary.word(encoding.outerId(feature_id)); + private final FeatureVector loadFeatureVector(int block_id) { + int featurePosition = featureLookup[block_id]; + final int numFeatures = encoding.readId(features, featurePosition); + + featurePosition += EncoderConfiguration.ID_SIZE; + final FeatureVector featureVector = new FeatureVector(); + FloatEncoder encoder; + String featureName; + + for (int i = 0; i < numFeatures; i++) { + final int innerId = encoding.readId(features, featurePosition); + final int outerId = encoding.outerId(innerId); + encoder = encoding.encoder(innerId); + // TODO (fhieber): why on earth are dense feature ids (ints) encoded in the vocabulary? + featureName = Vocabulary.word(outerId); + final float value = encoder.read(features, featurePosition); try { - int index = Integer.parseInt(feature_name); - sb.append(String.format(" tm_%s_%d=%.5f", Vocabulary.word(owner), index, - -encoder.read(features, feature_position))); + int index = Integer.parseInt(featureName); + featureVector.increment(index, -value); } catch (NumberFormatException e) { - sb.append(String.format(" %s=%.5f", feature_name, encoder.read(features, feature_position))); + featureVector.increment(featureName, value); } - - feature_position += EncoderConfiguration.ID_SIZE + encoder.size(); + featurePosition += EncoderConfiguration.ID_SIZE + encoder.size(); } - return sb.toString().trim(); + + return featureVector; } /** @@ -697,7 +690,7 @@ public class PackedGrammar extends AbstractGrammar { block_id = source[rules[i]]; Rule rule = new Rule(source[rule_position + 3 * i], src, - getTarget(target_address), getFeatures(block_id), arity, owner); + getTarget(target_address), loadFeatureVector(block_id), arity, owner); estimated[block_id] = rule.estimateRuleCost(models); precomputable[block_id] = rule.getPrecomputableCost(); } @@ -923,8 +916,8 @@ public class PackedGrammar extends AbstractGrammar { private Supplier<FeatureVector> initializeFeatureVectorSupplier(){ Supplier<FeatureVector> result = Suppliers.memoize(() ->{ - return new FeatureVector(getFeatures(source[address + 2]), "tm_" + Vocabulary.word(owner) + "_"); - }); + return loadFeatureVector(source[address + 2]); + }); return result; } http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5665f02f/test/bn-en/packed/output.scores.gold ---------------------------------------------------------------------- diff --git a/test/bn-en/packed/output.scores.gold b/test/bn-en/packed/output.scores.gold index 6ef2c31..fd63d12 100644 --- a/test/bn-en/packed/output.scores.gold +++ b/test/bn-en/packed/output.scores.gold @@ -91,7 +91,7 @@ in 2004 মà§à¦¯à¦¾à¦¸à¦¾à¦à§à¦¸à§à¦à¦¸ ঠà¦à§à¦à¦°à¦¾à¦à§à¦¯à§à¦° বসà§à¦à¦¨ in the city in ডà§à¦®à§à¦à§à¦°à§à¦¯à¦¾à¦ as the national conference was he main speech -lrb- keynote speech -rrb- on the . ||| -675.013 in 2004 মà§à¦¯à¦¾à¦¸à¦¾à¦à§à¦¸à§à¦à¦¸ ঠà¦à§à¦à¦°à¦¾à¦à§à¦¯à§à¦° বসà§à¦à¦¨ in the city in ডà§à¦®à§à¦à§à¦°à§à¦¯à¦¾à¦ as the national conference was he main speech -lrb- keynote speech -rrb- to the . ||| -675.262 in 2004 মà§à¦¯à¦¾à¦¸à¦¾à¦à§à¦¸à§à¦à¦¸ ঠà¦à§à¦à¦°à¦¾à¦à§à¦¯à§à¦° বসà§à¦à¦¨ in the city in ডà§à¦®à§à¦à§à¦°à§à¦¯à¦¾à¦ the national conference was he main speech -lrb- keynote speech -rrb- on the . ||| -675.282 - in 2004 মà§à¦¯à¦¾à¦¸à¦¾à¦à§à¦¸à§à¦à¦¸ ঠà¦à§à¦à¦°à¦¾à¦à§à¦¯à§à¦° বসà§à¦à¦¨ in the city in ডà§à¦®à§à¦à§à¦°à§à¦¯à¦¾à¦ the national conference was he main speech -lrb- keynote speech -rrb- to the . ||| -675.531 + in 2004 মà§à¦¯à¦¾à¦¸à¦¾à¦à§à¦¸à§à¦à¦¸ ঠà¦à§à¦à¦°à¦¾à¦à§à¦¯à§à¦° বসà§à¦à¦¨ in the city in ডà§à¦®à§à¦à§à¦°à§à¦¯à¦¾à¦ the national conference was he main speech -lrb- keynote speech -rrb- to the . ||| -675.530 in 2004 মà§à¦¯à¦¾à¦¸à¦¾à¦à§à¦¸à§à¦à¦¸ ঠà¦à§à¦à¦°à¦¾à¦à§à¦¯à§à¦° বসà§à¦à¦¨ in the city in ডà§à¦®à§à¦à§à¦°à§à¦¯à¦¾à¦ as the national conference was he the speech -lrb- keynote speech -rrb- on the . ||| -675.766 in 2004 মà§à¦¯à¦¾à¦¸à¦¾à¦à§à¦¸à§à¦à¦¸ ঠà¦à§à¦à¦°à¦¾à¦à§à¦¯à§à¦° বসà§à¦à¦¨ in the city in ডà§à¦®à§à¦à§à¦°à§à¦¯à¦¾à¦ as the national conference was he main speech -lrb- keynote speech -rrb- , the . ||| -675.800 in 2004 মà§à¦¯à¦¾à¦¸à¦¾à¦à§à¦¸à§à¦à¦¸ ঠà¦à§à¦à¦°à¦¾à¦à§à¦¯à§à¦° বসà§à¦à¦¨ in the city in ডà§à¦®à§à¦à§à¦°à§à¦¯à¦¾à¦ party national conference was he main speech -lrb- keynote speech -rrb- on the . ||| -675.864 @@ -166,14 +166,14 @@ britain writers of written drama , novels , stories and recently scripts in à¦à¦¦à§à¦¤ . ||| -145.651 britain writers written drama , novels , stories and in the recent script in the à¦à¦¦à§à¦¤ . ||| -145.717 1919 , on may month it saogat magazine was published in the . ||| -29.082 - 1919 on may month it saogat magazine was published in the . ||| -29.174 + 1919 on may month it saogat magazine was published in the . ||| -29.173 1919 , on may month it saogat magazine was published . ||| -29.196 1919 on may month it saogat magazine was published . ||| -29.287 1919 , on may month it is saogat magazine was published in the . ||| -29.459 1919 on may month it is saogat magazine was published in the . ||| -29.550 1919 , on may month it is saogat magazine was published . ||| -29.572 1919 on may month it is saogat magazine was published . ||| -29.663 - 1919 , on may month it saogat magazine was published in . ||| -29.880 + 1919 , on may month it saogat magazine was published in . ||| -29.879 1919 on may month it saogat magazine was published in . ||| -29.971 in 2005 , à¦à¦à¦®à§à¦¨à¦¸ tennis association tour à¦à¦¾à¦¯à¦¼à¦¾à¦°-থà§à¦°à¦¿ à¦à§à¦°à§à¦¨à¦¾à¦®à§à¦¨à§à¦ সানফিসà§à¦ open netaji indoor stadium was arranged . ||| -460.093 2005 , à¦à¦à¦®à§à¦¨à¦¸ tennis association tour à¦à¦¾à¦¯à¦¼à¦¾à¦°-থà§à¦°à¦¿ à¦à§à¦°à§à¦¨à¦¾à¦®à§à¦¨à§à¦ সানফিসà§à¦ open netaji indoor stadium was arranged . ||| -460.244 @@ -385,15 +385,15 @@ মিনিà¦à§à¦¸à§à¦° of were smells à¦à¦¾à¦¨à§à¦¨à¦¬à¦® , a famous operating system design পà§à¦°à¦¶à¦¿à¦à§à¦·à¦ . ||| -345.940 the à¦à¦¾à¦à¦®à§ of 's of à¦à¦¨à§à¦¡à¦¿à¦¯à¦¼à¦¾-তৠwritten in the " it is absurd to compare it with any other indian cinema ... pather panchali is pure cinema " -lrb- " it other by the indian films with compared to unreal ... pather panchali are pure film " -rrb- . ||| -1735.945 the à¦à¦¾à¦à¦®à§ of 's of à¦à¦¨à§à¦¡à¦¿à¦¯à¦¼à¦¾-তৠwritten in the " it is absurd to compare it with any other indian cinema ... pather panchali is pure cinema " -lrb- " it in other by the indian films with compared to unreal ... pather panchali are pure film " -rrb- . ||| -1736.149 - the à¦à¦¾à¦à¦®à§ of 's of à¦à¦¨à§à¦¡à¦¿à¦¯à¦¼à¦¾-তৠwritten in that " it is absurd to compare it with any other indian cinema ... pather panchali is pure cinema " -lrb- " it other by the indian films with compared to unreal ... pather panchali are pure film " -rrb- . ||| -1736.152 + the à¦à¦¾à¦à¦®à§ of 's of à¦à¦¨à§à¦¡à¦¿à¦¯à¦¼à¦¾-তৠwritten in that " it is absurd to compare it with any other indian cinema ... pather panchali is pure cinema " -lrb- " it other by the indian films with compared to unreal ... pather panchali are pure film " -rrb- . ||| -1736.151 the à¦à¦¾à¦à¦®à§ of 's of à¦à¦¨à§à¦¡à¦¿à¦¯à¦¼à¦¾-তৠwritten in the " it is absurd to compare it with any other indian cinema ... pather panchali is pure cinema " -lrb- " it is the by the indian films with compared to unreal ... pather panchali are pure film " -rrb- . ||| -1736.297 the à¦à¦¾à¦à¦®à§ of 's of à¦à¦¨à§à¦¡à¦¿à¦¯à¦¼à¦¾-তৠwritten in that " it is absurd to compare it with any other indian cinema ... pather panchali is pure cinema " -lrb- " it in other by the indian films with compared to unreal ... pather panchali are pure film " -rrb- . ||| -1736.355 the à¦à¦¾à¦à¦®à§ of 's of à¦à¦¨à§à¦¡à¦¿à¦¯à¦¼à¦¾-তৠwritten in the " it is absurd to compare it with any other indian cinema ... pather panchali is pure cinema , " -lrb- " it other by the indian films with compared to unreal ... pather panchali are pure film " -rrb- . ||| -1736.363 the à¦à¦¾à¦à¦®à§ of 's of à¦à¦¨à§à¦¡à¦¿à¦¯à¦¼à¦¾-তৠwritten in the " it is absurd to compare it with any other indian cinema ... pather panchali is pure cinema " -lrb- " it other by the indian films with compared to unreal ... pather panchali is pure film " -rrb- . ||| -1736.461 the à¦à¦¾à¦à¦®à§ of 's of à¦à¦¨à§à¦¡à¦¿à¦¯à¦¼à¦¾-তৠwritten in that " it is absurd to compare it with any other indian cinema ... pather panchali is pure cinema " -lrb- " it is the by the indian films with compared to unreal ... pather panchali are pure film " -rrb- . ||| -1736.503 - the à¦à¦¾à¦à¦®à§ of 's of à¦à¦¨à§à¦¡à¦¿à¦¯à¦¼à¦¾-তৠwritten in that , " it is absurd to compare it with any other indian cinema ... pather panchali is pure cinema " -lrb- " it other by the indian films with compared to unreal ... pather panchali are pure film " -rrb- . ||| -1736.518 - the à¦à¦¾à¦à¦®à§ of 's of à¦à¦¨à§à¦¡à¦¿à¦¯à¦¼à¦¾-তৠwritten in the " it is absurd to compare it with any other indian cinema ... pather panchali is pure cinema ' -lrb- " it other by the indian films with compared to unreal ... pather panchali are pure film " -rrb- . ||| -1736.519 - after this 1953 , in the month of may nazrul and পà§à¦°à¦®à§à¦²à¦¾ দà§à¦¬à§à¦à§ à¦à¦¿à¦à¦¿à§à¦¸à¦¾à¦° for london sent to . ||| -345.818 + the à¦à¦¾à¦à¦®à§ of 's of à¦à¦¨à§à¦¡à¦¿à¦¯à¦¼à¦¾-তৠwritten in that , " it is absurd to compare it with any other indian cinema ... pather panchali is pure cinema " -lrb- " it other by the indian films with compared to unreal ... pather panchali are pure film " -rrb- . ||| -1736.517 + the à¦à¦¾à¦à¦®à§ of 's of à¦à¦¨à§à¦¡à¦¿à¦¯à¦¼à¦¾-তৠwritten in the " it is absurd to compare it with any other indian cinema ... pather panchali is pure cinema ' -lrb- " it other by the indian films with compared to unreal ... pather panchali are pure film " -rrb- . ||| -1736.518 + after this 1953 , in the month of may nazrul and পà§à¦°à¦®à§à¦²à¦¾ দà§à¦¬à§à¦à§ à¦à¦¿à¦à¦¿à§à¦¸à¦¾à¦° for london sent to . ||| -345.817 after this 1953 on may month nazrul and পà§à¦°à¦®à§à¦²à¦¾ দà§à¦¬à§à¦à§ à¦à¦¿à¦à¦¿à§à¦¸à¦¾à¦° for london sent to . ||| -345.874 after that , 1953 may month nazrul and পà§à¦°à¦®à§à¦²à¦¾ দà§à¦¬à§à¦à§ à¦à¦¿à¦à¦¿à§à¦¸à¦¾à¦° for london sent to . ||| -345.956 after that 1953 on may month nazrul and পà§à¦°à¦®à§à¦²à¦¾ দà§à¦¬à§à¦à§ à¦à¦¿à¦à¦¿à§à¦¸à¦¾à¦° for london sent to . ||| -346.040 @@ -409,7 +409,7 @@ the southern and the east there is বিসà§à¦¤à§à¦°à§à¦£ land , west and on the north there is রà§à¦à§à¦· mountain and mountain . ||| -251.017 the southern and the east there is বিসà§à¦¤à§à¦°à§à¦£ plain , west and in the north there are রà§à¦à§à¦· mountain and mountain . ||| -251.127 the southern and the east there is বিসà§à¦¤à§à¦°à§à¦£ land , west and in the north there are রà§à¦à§à¦· mountain and mountain . ||| -251.145 - the south and the east there is বিসà§à¦¤à§à¦°à§à¦£ land , west and on the north there are রà§à¦à§à¦· mountain and mountain . ||| -251.256 + the south and the east there is বিসà§à¦¤à§à¦°à§à¦£ land , west and on the north there are রà§à¦à§à¦· mountain and mountain . ||| -251.255 the southern and the east there is বিসà§à¦¤à§à¦°à§à¦£ plain , west and in the north there is রà§à¦à§à¦· mountain and mountain . ||| -251.309 the southern and the east there is বিসà§à¦¤à§à¦°à§à¦£ plain , west and on the north are রà§à¦à§à¦· mountain and mountain . ||| -251.317 the southern and the east there is বিসà§à¦¤à§à¦°à§à¦£ land , west and in the north there is রà§à¦à§à¦· mountain and mountain . ||| -251.327 @@ -598,7 +598,7 @@ open source or open source -lrb- open source -rrb- the money is computer software the source code or main সাà¦à¦à§à¦¤à¦¿à¦ language free way বিতরণ to . ||| -471.716 open source or open source -lrb- open source -rrb- , the money is computer software of the source code or main সাà¦à¦à§à¦¤à¦¿à¦ language open way বিতরণ to . ||| -471.717 open source or open source -lrb- open source -rrb- , the money is computer software the source code or main সাà¦à¦à§à¦¤à¦¿à¦ language free way বিতরণ to . ||| -471.789 - open source or open source -lrb- open source -rrb- of the money is computer software of the source code or main সাà¦à¦à§à¦¤à¦¿à¦ language open way বিতরণ to . ||| -471.789 + open source or open source -lrb- open source -rrb- of the money is computer software of the source code or main সাà¦à¦à§à¦¤à¦¿à¦ language open way বিতরণ to . ||| -471.790 open source or open source -lrb- open source -rrb- the money is computer software of the source code or main সাà¦à¦à§à¦¤à¦¿à¦ language free way বিতরণ to . ||| -471.794 open source or open source -lrb- open source -rrb- in the money is computer software the source code or main সাà¦à¦à§à¦¤à¦¿à¦ language open way বিতরণ to . ||| -471.850 bangladesh ঠনলাà¦à¦¨à§ dhaka ||| -109.639 @@ -635,11 +635,11 @@ he was the military forces to à¦à¦¨à¦«à¦¿à¦ declared was . ||| -123.198 he was military forces for à¦à¦¨à¦«à¦¿à¦ declared was . ||| -123.198 he was armed forces for à¦à¦¨à¦«à¦¿à¦ was declared . ||| -123.208 - he was the military forces for à¦à¦¨à¦«à¦¿à¦ declared in . ||| -123.628 + he was the military forces for à¦à¦¨à¦«à¦¿à¦ declared in . ||| -123.629 bhutto à¦à§à¦¯à¦¾à¦¸à§à¦®à§à¦¬à¦²à¦¿ বয়à¦à¦ to 2.5 with declared in the , yahya khan mujib was to form the government to à¦à¦¾à¦¨à¦¾à¦²à§ he that government by নà§à¦¬à§à¦¨ not . ||| -492.585 bhutto à¦à§à¦¯à¦¾à¦¸à§à¦®à§à¦¬à¦²à¦¿ বয়à¦à¦ to 2.5 with announced that the , yahya khan mujib was to form the government to à¦à¦¾à¦¨à¦¾à¦²à§ he that government by নà§à¦¬à§à¦¨ not . ||| -492.686 bhutto à¦à§à¦¯à¦¾à¦¸à§à¦®à§à¦¬à¦²à¦¿ বয়à¦à¦ to 2.5 to declared in the , yahya khan mujib was to form the government to à¦à¦¾à¦¨à¦¾à¦²à§ he that government by নà§à¦¬à§à¦¨ not . ||| -492.687 - bhutto à¦à§à¦¯à¦¾à¦¸à§à¦®à§à¦¬à¦²à¦¿ বয়à¦à¦ to 2.5 with declared in the , yahya khan mujib was to form the government on to à¦à¦¾à¦¨à¦¾à¦²à§ he that government by নà§à¦¬à§à¦¨ not . ||| -492.742 + bhutto à¦à§à¦¯à¦¾à¦¸à§à¦®à§à¦¬à¦²à¦¿ বয়à¦à¦ to 2.5 with declared in the , yahya khan mujib was to form the government on to à¦à¦¾à¦¨à¦¾à¦²à§ he that government by নà§à¦¬à§à¦¨ not . ||| -492.743 bhutto à¦à§à¦¯à¦¾à¦¸à§à¦®à§à¦¬à¦²à¦¿ বয়à¦à¦ to 2.5 with declared in the , yahya khan mujib was to form the government for à¦à¦¾à¦¨à¦¾à¦²à§ he that government by নà§à¦¬à§à¦¨ not . ||| -492.760 bhutto à¦à§à¦¯à¦¾à¦¸à§à¦®à§à¦¬à¦²à¦¿ বয়à¦à¦ to 2.5 with announced that that , yahya khan mujib was to form the government to à¦à¦¾à¦¨à¦¾à¦²à§ he that government by নà§à¦¬à§à¦¨ not . ||| -492.771 bhutto à¦à§à¦¯à¦¾à¦¸à§à¦®à§à¦¬à¦²à¦¿ বয়à¦à¦ to 2.5 with announced that the , yahya khan mujib was to form the government on to à¦à¦¾à¦¨à¦¾à¦²à§ he that government by নà§à¦¬à§à¦¨ not . ||| -492.843 @@ -806,7 +806,7 @@ it is mainly শà§à¦à¦°à§à¦° in the middle of was which can শà§à¦à¦°à¦à§ à¦à¦¨à¦«à§à¦²à§à¦¯à¦¼à§à¦à§à¦à¦¾à¦¤à§ infected by british . ||| -349.610 it is mainly শà§à¦à¦°à§à¦° in between in was which can শà§à¦à¦°à¦à§ à¦à¦¨à¦«à§à¦²à§à¦¯à¦¼à§à¦à§à¦à¦¾à¦¤à§ affected by british . ||| -349.627 it is basically শà§à¦à¦°à§à¦° in between in which was which can শà§à¦à¦°à¦à§ à¦à¦¨à¦«à§à¦²à§à¦¯à¦¼à§à¦à§à¦à¦¾à¦¤à§ affected by british . ||| -349.663 - it is basically শà§à¦à¦°à§à¦° in between in was which can শà§à¦à¦°à¦à§ à¦à¦¨à¦«à§à¦²à§à¦¯à¦¼à§à¦à§à¦à¦¾à¦¤à§ affected by british . ||| -349.705 + it is basically শà§à¦à¦°à§à¦° in between in was which can শà§à¦à¦°à¦à§ à¦à¦¨à¦«à§à¦²à§à¦¯à¦¼à§à¦à§à¦à¦¾à¦¤à§ affected by british . ||| -349.704 these are à¦à¦à¦ the mycelium structure . ||| -221.617 these à¦à¦à¦ the mycelium structure . ||| -221.656 these are à¦à¦à¦ to mycelium structure . ||| -221.769 http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5665f02f/tst/joshua/system/MultithreadedTranslationTests.java ---------------------------------------------------------------------- diff --git a/tst/joshua/system/MultithreadedTranslationTests.java b/tst/joshua/system/MultithreadedTranslationTests.java index 4ff549c..b8d8af0 100644 --- a/tst/joshua/system/MultithreadedTranslationTests.java +++ b/tst/joshua/system/MultithreadedTranslationTests.java @@ -28,6 +28,8 @@ public class MultithreadedTranslationTests { private JoshuaConfiguration joshuaConfig = null; private Decoder decoder = null; private static final String INPUT = "A K B1 U Z1 Z2 B2 C"; + private int previousLogLevel; + private final static long NANO_SECONDS_PER_SECOND = 1_000_000_000; @Before public void setUp() throws Exception { @@ -62,6 +64,9 @@ public class MultithreadedTranslationTests { // (configFile) // is not even used by the // constructor/initialize. + + previousLogLevel = Decoder.VERBOSE; + Decoder.VERBOSE = 0; } @After @@ -69,6 +74,7 @@ public class MultithreadedTranslationTests { Vocabulary.clear(); this.decoder.cleanUp(); this.decoder = null; + Decoder.VERBOSE = previousLogLevel; } @@ -102,11 +108,17 @@ public class MultithreadedTranslationTests { Translations translations = this.decoder.decodeAll(req); ArrayList<Translation> translationResults = new ArrayList<Translation>(); + + final long translationStartTime = System.nanoTime(); Translation t; while ((t = translations.next()) != null) { translationResults.add(t); } + final long translationEndTime = System.nanoTime(); + final double pipelineLoadDurationInSeconds = (translationEndTime - translationStartTime) / ((double)NANO_SECONDS_PER_SECOND); + System.err.println(String.format("%.2f seconds", pipelineLoadDurationInSeconds)); + // THEN assertTrue(translationResults.size() == inputLines); }
