Add an LRU cache from Google Guava to decrease allocations in the PackedGrammer getRules() call Results in a 1.5 times speedup in decoding and a large decrease in required garbage collection
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/e70677d2 Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/e70677d2 Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/e70677d2 Branch: refs/heads/master Commit: e70677d2eab23daa7082173e6fe337d68aa12230 Parents: 0990ebc Author: Kellen Sunderland <[email protected]> Authored: Tue Sep 22 13:37:54 2015 +0200 Committer: Kellen Sunderland <[email protected]> Committed: Thu Mar 31 10:44:42 2016 +0200 ---------------------------------------------------------------------- bin/joshua-decoder | 2 +- build.xml | 1 + lib/ivy.xml | 1 + pom.xml | 5 +++++ src/joshua/decoder/JoshuaConfiguration.java | 7 +++++++ .../decoder/ff/tm/packed/PackedGrammar.java | 20 +++++++++++++++++++- 6 files changed, 34 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e70677d2/bin/joshua-decoder ---------------------------------------------------------------------- diff --git a/bin/joshua-decoder b/bin/joshua-decoder index 57c09f1..cdb2cf4 100755 --- a/bin/joshua-decoder +++ b/bin/joshua-decoder @@ -27,7 +27,7 @@ set -u JOSHUA=$(dirname $0)/.. exec java -Xmx${mem} \ - -cp $JOSHUA/class:$JOSHUA/ext/berkeleylm/jar/berkeleylm.jar:$JOSHUA/lib/gson-2.5.jar \ + -cp $JOSHUA/class:$JOSHUA/ext/berkeleylm/jar/berkeleylm.jar:$JOSHUA/lib/gson-2.5.jar:$JOSHUA/lib/guava-19.0.jar \ -Dfile.encoding=utf8 \ -Djava.util.logging.config.file=${JOSHUA}/logging.properties \ -Djava.library.path=$JOSHUA/lib \ http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e70677d2/build.xml ---------------------------------------------------------------------- diff --git a/build.xml b/build.xml index 6456721..7095ca2 100644 --- a/build.xml +++ b/build.xml @@ -28,6 +28,7 @@ <include name="collections-generic-4.01.jar" /> <include name="args4j-2.0.29.jar" /> <include name="gson-2.5.jar" /> + <include name="guava-19.0.jar" /> </fileset> <fileset dir="${thraxlib}"> <include name="thrax.jar" /> http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e70677d2/lib/ivy.xml ---------------------------------------------------------------------- diff --git a/lib/ivy.xml b/lib/ivy.xml index 02f3ff7..d41595d 100644 --- a/lib/ivy.xml +++ b/lib/ivy.xml @@ -12,5 +12,6 @@ <dependency org="net.sourceforge.collections" name="collections-generic" rev="4.01"/> <dependency org="args4j" name="args4j" rev="2.0.29" /> <dependency org="com.google.code.gson" name="gson" rev="2.5"/> + <dependency org="com.google.guava" name="guava" rev="19.0"/> </dependencies> </ivy-module> http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e70677d2/pom.xml ---------------------------------------------------------------------- diff --git a/pom.xml b/pom.xml index 3b4aac1..de75e80 100644 --- a/pom.xml +++ b/pom.xml @@ -122,5 +122,10 @@ <version>4.10</version> <optional>true</optional> </dependency> + <dependency> + <groupId>com.google.guava</groupId> + <artifactId>guava</artifactId> + <version>19.0</version> + </dependency> </dependencies> </project> http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e70677d2/src/joshua/decoder/JoshuaConfiguration.java ---------------------------------------------------------------------- diff --git a/src/joshua/decoder/JoshuaConfiguration.java b/src/joshua/decoder/JoshuaConfiguration.java index ece18d2..49ab87d 100644 --- a/src/joshua/decoder/JoshuaConfiguration.java +++ b/src/joshua/decoder/JoshuaConfiguration.java @@ -33,6 +33,10 @@ public class JoshuaConfiguration { // List of grammar files to read public ArrayList<String> tms = new ArrayList<String>(); + // A rule cache for commonly used tries to avoid excess object allocations + // Testing shows there's up to ~95% hit rate when cache size is 5000 Trie nodes. + public Integer cachedRuleSize = new Integer(5000); + /* * The file to read the weights from (part of the sparse features implementation). Weights can * also just be listed in the main config file. @@ -609,6 +613,9 @@ public class JoshuaConfiguration { // Check source sentence source_annotations = true; + } else if (parameter.equals(normalize_key("cached-rules-size"))) { + // Check source sentence + cachedRuleSize = Integer.parseInt(fds[1]); } else { if (parameter.equals(normalize_key("use-sent-specific-tm")) http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e70677d2/src/joshua/decoder/ff/tm/packed/PackedGrammar.java ---------------------------------------------------------------------- diff --git a/src/joshua/decoder/ff/tm/packed/PackedGrammar.java b/src/joshua/decoder/ff/tm/packed/PackedGrammar.java index df5538a..dc72a4b 100644 --- a/src/joshua/decoder/ff/tm/packed/PackedGrammar.java +++ b/src/joshua/decoder/ff/tm/packed/PackedGrammar.java @@ -79,6 +79,9 @@ import joshua.util.encoding.EncoderConfiguration; import joshua.util.encoding.FloatEncoder; import joshua.util.io.LineReader; +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; + public class PackedGrammar extends AbstractGrammar { private EncoderConfiguration encoding; @@ -92,6 +95,10 @@ public class PackedGrammar extends AbstractGrammar { // The grammar specification keyword (e.g., "thrax" or "moses") private String type; + // A rule cache for commonly used tries to avoid excess object allocations + // Testing shows there's up to ~95% hit rate when cache size is 5000 Trie nodes. + private final Cache<Trie, List<Rule>> cached_rules; + public PackedGrammar(String grammar_dir, int span_limit, String owner, String type, JoshuaConfiguration joshuaConfiguration) throws FileNotFoundException, IOException { super(joshuaConfiguration); @@ -132,6 +139,7 @@ public class PackedGrammar extends AbstractGrammar { for (PackedSlice s : slices) count += s.estimated.length; root = new PackedRoot(slices); + cached_rules = CacheBuilder.newBuilder().maximumSize(joshuaConfiguration.cachedRuleSize).build(); Decoder.LOG(1, String.format("Loaded %d rules", count)); } @@ -618,17 +626,24 @@ public class PackedGrammar extends AbstractGrammar { @Override public List<Rule> getRules() { + List<Rule> rules = cached_rules.getIfPresent(this); + if (rules != null) { + return rules; + } + int num_children = source[position]; int rule_position = position + 2 * (num_children + 1); int num_rules = source[rule_position - 1]; - ArrayList<Rule> rules = new ArrayList<Rule>(num_rules); + rules = new ArrayList<Rule>(num_rules); for (int i = 0; i < num_rules; i++) { if (type.equals("moses") || type.equals("phrase")) rules.add(new PackedPhrasePair(rule_position + 3 * i)); else rules.add(new PackedRule(rule_position + 3 * i)); } + + cached_rules.put(this, rules); return rules; } @@ -684,6 +699,9 @@ public class PackedGrammar extends AbstractGrammar { } for (int i = 0; i < sorted.length; i++) source[rule_position + i] = sorted[i]; + + // Replace rules in cache with their sorted values on next getRules() + cached_rules.invalidate(this); this.sorted = true; }
