Repository: crunch Updated Branches: refs/heads/master 597eadd65 -> 0b19717d1
CRUNCH-604: Avoid expensive Writables.reloadWritableComparableCodes Project: http://git-wip-us.apache.org/repos/asf/crunch/repo Commit: http://git-wip-us.apache.org/repos/asf/crunch/commit/0b19717d Tree: http://git-wip-us.apache.org/repos/asf/crunch/tree/0b19717d Diff: http://git-wip-us.apache.org/repos/asf/crunch/diff/0b19717d Branch: refs/heads/master Commit: 0b19717d105b58e58c1947eda6b673a387e330d0 Parents: 597eadd Author: Micah Whitacre <[email protected]> Authored: Tue Aug 2 16:29:55 2016 -0500 Committer: Micah Whitacre <[email protected]> Committed: Tue Aug 2 16:29:55 2016 -0500 ---------------------------------------------------------------------- .../crunch/types/writable/TupleWritable.java | 18 ++++++++++++++---- .../apache/crunch/types/writable/Writables.java | 17 +++++++++++------ 2 files changed, 25 insertions(+), 10 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/crunch/blob/0b19717d/crunch-core/src/main/java/org/apache/crunch/types/writable/TupleWritable.java ---------------------------------------------------------------------- diff --git a/crunch-core/src/main/java/org/apache/crunch/types/writable/TupleWritable.java b/crunch-core/src/main/java/org/apache/crunch/types/writable/TupleWritable.java index 068b0af..c29ffa8 100644 --- a/crunch-core/src/main/java/org/apache/crunch/types/writable/TupleWritable.java +++ b/crunch-core/src/main/java/org/apache/crunch/types/writable/TupleWritable.java @@ -54,6 +54,8 @@ public class TupleWritable extends Configured implements WritableComparable<Tupl private int[] written; private Writable[] values; + private boolean comparablesLoaded = false; + /** * Create an empty tuple with no allocated storage for writables. */ @@ -65,10 +67,18 @@ public class TupleWritable extends Configured implements WritableComparable<Tupl super.setConf(conf); if (conf == null) return; - try { - Writables.reloadWritableComparableCodes(conf); - } catch (Exception e) { - throw new CrunchRuntimeException("Error reloading writable comparable codes", e); + // only reload comparables if this particular writable hasn't already done so; + // While there's also some caching within the static `reloadWritableComparableCodes`, + // it has to hit the configuration, which runs some embarrassing regexes and stuff. + // As for why SequenceFile$Reader calls `setConf` every single time it reads a new value + // (thus trigering this expensive path), I don't know. + if (!comparablesLoaded) { + try { + Writables.reloadWritableComparableCodes(conf); + } catch (Exception e) { + throw new CrunchRuntimeException("Error reloading writable comparable codes", e); + } + comparablesLoaded = true; } } http://git-wip-us.apache.org/repos/asf/crunch/blob/0b19717d/crunch-core/src/main/java/org/apache/crunch/types/writable/Writables.java ---------------------------------------------------------------------- diff --git a/crunch-core/src/main/java/org/apache/crunch/types/writable/Writables.java b/crunch-core/src/main/java/org/apache/crunch/types/writable/Writables.java index 89d08ee..ca65972 100644 --- a/crunch-core/src/main/java/org/apache/crunch/types/writable/Writables.java +++ b/crunch-core/src/main/java/org/apache/crunch/types/writable/Writables.java @@ -129,6 +129,8 @@ public class Writables { private static final String WRITABLE_COMPARABLE_CODES = "crunch.writable.comparable.codes"; + private static int WRITABLE_CODES_LOADED = 0; + static void serializeWritableComparableCodes(Configuration conf) throws IOException { Map<Integer, String> codeToClassNameMap = Maps.transformValues(WRITABLE_CODES, new Function<Class<? extends Writable>, String>() { @@ -142,12 +144,15 @@ public class Writables { static void reloadWritableComparableCodes(Configuration conf) throws Exception { if (conf.get(WRITABLE_COMPARABLE_CODES) != null) { - Map<String, String> codeToClassName = Splitter.on(';') - .withKeyValueSeparator(":").split(conf.get(WRITABLE_COMPARABLE_CODES)); - for (Map.Entry<String, String> codeToClassNameEntry : codeToClassName.entrySet()) { - WRITABLE_CODES.put( - Integer.parseInt(codeToClassNameEntry.getKey()), - (Class<? extends Writable>) Class.forName(codeToClassNameEntry.getValue())); + String writableCodes = conf.get(WRITABLE_COMPARABLE_CODES); + if (writableCodes != null && writableCodes.hashCode() != WRITABLE_CODES_LOADED) { + Map<String, String> codeToClassName = Splitter.on(';').withKeyValueSeparator(":").split(writableCodes); + for (Map.Entry<String, String> codeToClassNameEntry : codeToClassName.entrySet()) { + WRITABLE_CODES.put( + Integer.parseInt(codeToClassNameEntry.getKey()), + (Class<? extends Writable>) Class.forName(codeToClassNameEntry.getValue())); + } + WRITABLE_CODES_LOADED = writableCodes.hashCode(); } } }
