Repository: crunch
Updated Branches:
  refs/heads/master 597eadd65 -> 0b19717d1


CRUNCH-604: Avoid expensive Writables.reloadWritableComparableCodes


Project: http://git-wip-us.apache.org/repos/asf/crunch/repo
Commit: http://git-wip-us.apache.org/repos/asf/crunch/commit/0b19717d
Tree: http://git-wip-us.apache.org/repos/asf/crunch/tree/0b19717d
Diff: http://git-wip-us.apache.org/repos/asf/crunch/diff/0b19717d

Branch: refs/heads/master
Commit: 0b19717d105b58e58c1947eda6b673a387e330d0
Parents: 597eadd
Author: Micah Whitacre <[email protected]>
Authored: Tue Aug 2 16:29:55 2016 -0500
Committer: Micah Whitacre <[email protected]>
Committed: Tue Aug 2 16:29:55 2016 -0500

----------------------------------------------------------------------
 .../crunch/types/writable/TupleWritable.java      | 18 ++++++++++++++----
 .../apache/crunch/types/writable/Writables.java   | 17 +++++++++++------
 2 files changed, 25 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/crunch/blob/0b19717d/crunch-core/src/main/java/org/apache/crunch/types/writable/TupleWritable.java
----------------------------------------------------------------------
diff --git 
a/crunch-core/src/main/java/org/apache/crunch/types/writable/TupleWritable.java 
b/crunch-core/src/main/java/org/apache/crunch/types/writable/TupleWritable.java
index 068b0af..c29ffa8 100644
--- 
a/crunch-core/src/main/java/org/apache/crunch/types/writable/TupleWritable.java
+++ 
b/crunch-core/src/main/java/org/apache/crunch/types/writable/TupleWritable.java
@@ -54,6 +54,8 @@ public class TupleWritable extends Configured implements 
WritableComparable<Tupl
   private int[] written;
   private Writable[] values;
 
+  private boolean comparablesLoaded = false;
+
   /**
    * Create an empty tuple with no allocated storage for writables.
    */
@@ -65,10 +67,18 @@ public class TupleWritable extends Configured implements 
WritableComparable<Tupl
     super.setConf(conf);
     if (conf == null) return;
 
-    try {
-      Writables.reloadWritableComparableCodes(conf);
-    } catch (Exception e) {
-      throw new CrunchRuntimeException("Error reloading writable comparable 
codes", e);
+    // only reload comparables if this particular writable hasn't already done 
so;
+    // While there's also some caching within the static 
`reloadWritableComparableCodes`,
+    // it has to hit the configuration, which runs some embarrassing regexes 
and stuff.
+    // As for why SequenceFile$Reader calls `setConf` every single time it 
reads a new value
+    // (thus trigering this expensive path), I don't know.
+    if (!comparablesLoaded) {
+      try {
+        Writables.reloadWritableComparableCodes(conf);
+      } catch (Exception e) {
+        throw new CrunchRuntimeException("Error reloading writable comparable 
codes", e);
+      }
+      comparablesLoaded = true;
     }
   }
 

http://git-wip-us.apache.org/repos/asf/crunch/blob/0b19717d/crunch-core/src/main/java/org/apache/crunch/types/writable/Writables.java
----------------------------------------------------------------------
diff --git 
a/crunch-core/src/main/java/org/apache/crunch/types/writable/Writables.java 
b/crunch-core/src/main/java/org/apache/crunch/types/writable/Writables.java
index 89d08ee..ca65972 100644
--- a/crunch-core/src/main/java/org/apache/crunch/types/writable/Writables.java
+++ b/crunch-core/src/main/java/org/apache/crunch/types/writable/Writables.java
@@ -129,6 +129,8 @@ public class Writables {
 
   private static final String WRITABLE_COMPARABLE_CODES = 
"crunch.writable.comparable.codes";
 
+  private static int WRITABLE_CODES_LOADED = 0;
+
   static void serializeWritableComparableCodes(Configuration conf) throws 
IOException {
     Map<Integer, String> codeToClassNameMap = 
Maps.transformValues(WRITABLE_CODES,
         new Function<Class<? extends Writable>, String>() {
@@ -142,12 +144,15 @@ public class Writables {
 
   static void reloadWritableComparableCodes(Configuration conf) throws 
Exception {
     if (conf.get(WRITABLE_COMPARABLE_CODES) != null) {
-      Map<String, String> codeToClassName = Splitter.on(';')
-          
.withKeyValueSeparator(":").split(conf.get(WRITABLE_COMPARABLE_CODES));
-      for (Map.Entry<String, String> codeToClassNameEntry : 
codeToClassName.entrySet()) {
-        WRITABLE_CODES.put(
-            Integer.parseInt(codeToClassNameEntry.getKey()),
-            (Class<? extends Writable>) 
Class.forName(codeToClassNameEntry.getValue()));
+      String writableCodes = conf.get(WRITABLE_COMPARABLE_CODES);
+      if (writableCodes != null && writableCodes.hashCode() != 
WRITABLE_CODES_LOADED) {
+        Map<String, String> codeToClassName = 
Splitter.on(';').withKeyValueSeparator(":").split(writableCodes);
+        for (Map.Entry<String, String> codeToClassNameEntry : 
codeToClassName.entrySet()) {
+          WRITABLE_CODES.put(
+                  Integer.parseInt(codeToClassNameEntry.getKey()),
+                  (Class<? extends Writable>) 
Class.forName(codeToClassNameEntry.getValue()));
+        }
+        WRITABLE_CODES_LOADED = writableCodes.hashCode();
       }
     }
   }

Reply via email to