merged Spark 2.0.0 work. Massive undertaking that provided us performance improvements. Thanks @dalaro and @yucx.
Project: http://git-wip-us.apache.org/repos/asf/tinkerpop/repo Commit: http://git-wip-us.apache.org/repos/asf/tinkerpop/commit/3fc700fd Tree: http://git-wip-us.apache.org/repos/asf/tinkerpop/tree/3fc700fd Diff: http://git-wip-us.apache.org/repos/asf/tinkerpop/diff/3fc700fd Branch: refs/heads/TINKERPOP-1564 Commit: 3fc700fdc19a6cb44d57aecf457a00b8eba0346a Parents: 77bbb42 b8a2452 Author: Marko A. Rodriguez <[email protected]> Authored: Tue Jan 3 07:43:41 2017 -0700 Committer: Marko A. Rodriguez <[email protected]> Committed: Tue Jan 3 07:43:41 2017 -0700 ---------------------------------------------------------------------- CHANGELOG.asciidoc | 4 + docs/src/upgrade/release-3.3.x.asciidoc | 22 +++ giraph-gremlin/pom.xml | 4 +- .../process/computer/GiraphGraphComputer.java | 2 + .../giraph/GiraphGremlinIntegrateTest.java | 33 +++++ .../gremlin/giraph/GiraphGremlinSuite.java | 35 +++++ .../structure/io/GiraphIoRegistryCheck.java | 59 ++++++++ .../gremlin/structure/io/IoRegistry.java | 3 + .../tinkerpop/gremlin/structure/io/Mapper.java | 19 +++ .../gremlin/structure/io/gryo/GryoMapper.java | 115 ++++++++++++++- .../gremlin/structure/io/gryo/GryoPool.java | 56 +------- .../io/gryo/kryoshim/KryoShimService.java | 12 +- .../io/gryo/kryoshim/KryoShimServiceLoader.java | 139 +++++++++---------- .../gryo/kryoshim/shaded/ShadedKryoAdapter.java | 18 +-- .../shaded/ShadedSerializerAdapter.java | 6 +- .../structure/io/util/IoRegistryHelper.java | 88 ++++++++++++ .../tinkerpop/gremlin/util/SystemUtil.java | 55 ++++++++ .../gremlin/structure/io/gryo/GryoPoolTest.java | 25 ++-- .../tinkerpop/gremlin/util/SystemUtilTest.java | 89 ++++++++++++ .../gremlin/process/ProcessComputerSuite.java | 2 +- hadoop-gremlin/conf/hadoop-graphson.properties | 2 + .../conf/hadoop-grateful-gryo.properties | 25 ++-- hadoop-gremlin/conf/hadoop-gryo.properties | 6 +- hadoop-gremlin/conf/hadoop-script.properties | 5 +- hadoop-gremlin/pom.xml | 24 ++-- .../tinkerpop/gremlin/hadoop/Constants.java | 2 + .../structure/io/HadoopPoolShimService.java | 52 +++---- .../hadoop/structure/io/HadoopPools.java | 23 ++- .../structure/io/HadoopPoolsConfigurable.java | 2 +- .../io/graphson/GraphSONRecordReader.java | 13 +- .../io/graphson/GraphSONRecordWriter.java | 12 +- .../structure/io/gryo/GryoRecordReader.java | 13 +- .../structure/io/gryo/GryoRecordWriter.java | 14 +- .../gremlin/hadoop/HadoopGraphProvider.java | 21 ++- .../structure/io/AbstractIoRegistryCheck.java | 122 ++++++++++++++++ .../GraphSONRecordReaderWriterTest.java | 2 +- .../hadoop/structure/io/gryo/ToyIoRegistry.java | 70 ++++++++++ .../hadoop/structure/io/gryo/ToyPoint.java | 113 +++++++++++++++ .../hadoop/structure/io/gryo/ToyTriangle.java | 120 ++++++++++++++++ spark-gremlin/pom.xml | 56 +++++++- .../io/gryo/CompactBufferSerializer.groovy | 2 - .../spark/process/computer/SparkExecutor.java | 18 +-- .../process/computer/SparkGraphComputer.java | 48 ++++++- .../SparkStarBarrierInterceptor.java | 10 +- .../structure/io/gryo/GryoRegistrator.java | 30 +++- .../spark/structure/io/gryo/GryoSerializer.java | 102 ++++++++------ .../io/gryo/IoRegistryAwareKryoSerializer.java | 110 --------------- .../io/gryo/ObjectWritableSerializer.java | 1 - .../structure/io/gryo/Tuple2Serializer.java | 2 - .../structure/io/gryo/Tuple3Serializer.java | 3 - .../io/gryo/VertexWritableSerializer.java | 1 - .../io/gryo/WrappedArraySerializer.java | 1 - .../unshaded/UnshadedKryoShimService.java | 109 +++++---------- .../unshaded/UnshadedSerializerAdapter.java | 9 +- .../gremlin/spark/AbstractSparkTest.java | 11 +- .../spark/SparkGremlinGryoSerializerTest.java | 33 +++++ .../gremlin/spark/SparkGremlinSuite.java | 3 +- ...tratorGraphComputerProcessIntegrateTest.java | 33 ----- ...alizerGraphComputerProcessIntegrateTest.java | 33 +++++ ...SparkHadoopGraphGryoRegistratorProvider.java | 52 ------- .../SparkHadoopGraphGryoSerializerProvider.java | 46 ++++++ .../computer/SparkHadoopGraphProvider.java | 28 +++- .../structure/io/SparkContextStorageCheck.java | 11 +- .../structure/io/SparkIoRegistryCheck.java | 64 +++++++++ 64 files changed, 1621 insertions(+), 622 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/3fc700fd/CHANGELOG.asciidoc ---------------------------------------------------------------------- diff --cc CHANGELOG.asciidoc index c233f0d,c08fecd..31e4a92 --- a/CHANGELOG.asciidoc +++ b/CHANGELOG.asciidoc @@@ -47,6 -46,10 +47,10 @@@ TinkerPop 3.3.0 (Release Date: NOT OFFI * Removed all performance tests that were not part of `gremlin-benchmark`. * Removed dependency on `junit-benchmarks` and it's related reference to `h2`. * Moved the source for the "home page" into the repository under `/site` so that it easier to accept contributions. -* Bumped to support Spark 2.0.0. ++* Bumped to support Spark 2.0.0 with Scala 2.11. + * Added `UnshadedKryoShimService` as the new default serializer model for `SparkGraphComputer`. + * `GryoRegistrator` is more efficient than the previous `GryoSerializer` model in `SparkGraphComputer`. + * Added support for `IoRegistry` custom serialization in Spark/Giraph and provided a general `hadoop-gremlin` test suite. * Replaced term `REST` with `HTTP` to remove any confusion as to the design of the API. * Moved `gremlin-benchmark` under `gremlin-tools` module. * Added `gremlin-tools` and its submodule `gremlin-coverage`. http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/3fc700fd/docs/src/upgrade/release-3.3.x.asciidoc ---------------------------------------------------------------------- diff --cc docs/src/upgrade/release-3.3.x.asciidoc index 542ff89,335696d..30a404f --- a/docs/src/upgrade/release-3.3.x.asciidoc +++ b/docs/src/upgrade/release-3.3.x.asciidoc @@@ -147,3 -140,23 +147,25 @@@ Gremlin-server.ba ^^^^^^^^^^^^^^^^^^ The switch name has changed for installing dependencies. `-i` has been deprecated and replaced by `install`. + + SparkGraphComputer GryoRegistrator + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + Historically, `SparkGraphComputer` has used `GryoSerializer` to handle the serialization of objects in Spark. The reason + this exists is because TinkerPop uses a shaded version of Kryo and thus, couldn't use the standard `KryoSerializer`-model + provided by Spark. However, a "shim model" was created which allows for the shaded and unshaded versions of Kryo to + interact with one another. To this end, `KryoSerializer` can now be used with a `GryoRegistrator`. The properties file + for a `SparkGraphComputer` now looks as follows: + + ``` + spark.serializer=org.apache.spark.serializer.KryoSerializer + spark.kryo.registrator=org.apache.tinkerpop.gremlin.spark.structure.io.gryo.GryoRegistrator + ``` + + If the old `GryoSerializer` model is desired, then the properties file should simply look as before: + + ``` + spark.serializer=org.apache.tinkerpop.gremlin.spark.structure.io.gryo.GryoSerializer -``` ++``` ++ ++See: link:https://issues.apache.org/jira/browse/TINKERPOP-1389 http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/3fc700fd/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/structure/io/gryo/GryoMapper.java ---------------------------------------------------------------------- diff --cc gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/structure/io/gryo/GryoMapper.java index c42a4fc,7b3a6b4..48b670c --- a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/structure/io/gryo/GryoMapper.java +++ b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/structure/io/gryo/GryoMapper.java @@@ -18,13 -18,100 +18,15 @@@ */ package org.apache.tinkerpop.gremlin.structure.io.gryo; + import org.apache.commons.lang.builder.ToStringBuilder; -import org.apache.tinkerpop.gremlin.process.computer.GraphFilter; -import org.apache.tinkerpop.gremlin.process.computer.MapReduce; -import org.apache.tinkerpop.gremlin.process.computer.traversal.strategy.decoration.VertexProgramStrategy; -import org.apache.tinkerpop.gremlin.process.computer.traversal.strategy.optimization.GraphFilterStrategy; -import org.apache.tinkerpop.gremlin.process.computer.util.MapMemory; -import org.apache.tinkerpop.gremlin.process.remote.traversal.DefaultRemoteTraverser; -import org.apache.tinkerpop.gremlin.process.traversal.Bytecode; -import org.apache.tinkerpop.gremlin.process.traversal.Contains; -import org.apache.tinkerpop.gremlin.process.traversal.Operator; -import org.apache.tinkerpop.gremlin.process.traversal.Order; -import org.apache.tinkerpop.gremlin.process.traversal.P; -import org.apache.tinkerpop.gremlin.process.traversal.Path; -import org.apache.tinkerpop.gremlin.process.traversal.Pop; -import org.apache.tinkerpop.gremlin.process.traversal.SackFunctions; -import org.apache.tinkerpop.gremlin.process.traversal.Scope; -import org.apache.tinkerpop.gremlin.process.traversal.step.TraversalOptionParent; -import org.apache.tinkerpop.gremlin.process.traversal.step.filter.RangeGlobalStep; -import org.apache.tinkerpop.gremlin.process.traversal.step.map.FoldStep; -import org.apache.tinkerpop.gremlin.process.traversal.step.map.GroupCountStep; -import org.apache.tinkerpop.gremlin.process.traversal.step.map.GroupStep; -import org.apache.tinkerpop.gremlin.process.traversal.step.map.GroupStepV3d0; -import org.apache.tinkerpop.gremlin.process.traversal.step.map.MatchStep; -import org.apache.tinkerpop.gremlin.process.traversal.step.map.MeanGlobalStep; -import org.apache.tinkerpop.gremlin.process.traversal.step.map.OrderGlobalStep; -import org.apache.tinkerpop.gremlin.process.traversal.step.map.TreeStep; -import org.apache.tinkerpop.gremlin.process.traversal.step.util.BulkSet; -import org.apache.tinkerpop.gremlin.process.traversal.step.util.ProfileStep; -import org.apache.tinkerpop.gremlin.process.traversal.step.util.Tree; -import org.apache.tinkerpop.gremlin.process.traversal.strategy.decoration.ConnectiveStrategy; -import org.apache.tinkerpop.gremlin.process.traversal.strategy.decoration.HaltedTraverserStrategy; -import org.apache.tinkerpop.gremlin.process.traversal.strategy.decoration.PartitionStrategy; -import org.apache.tinkerpop.gremlin.process.traversal.strategy.decoration.SubgraphStrategy; -import org.apache.tinkerpop.gremlin.process.traversal.strategy.finalization.MatchAlgorithmStrategy; -import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.AdjacentToIncidentStrategy; -import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.FilterRankingStrategy; -import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.IdentityRemovalStrategy; -import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.IncidentToAdjacentStrategy; -import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.InlineFilterStrategy; -import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.LazyBarrierStrategy; -import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.MatchPredicateStrategy; -import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.OrderLimitStrategy; -import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.PathProcessorStrategy; -import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.PathRetractionStrategy; -import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.RangeByIsCountStrategy; -import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.RepeatUnrollStrategy; -import org.apache.tinkerpop.gremlin.process.traversal.strategy.verification.LambdaRestrictionStrategy; -import org.apache.tinkerpop.gremlin.process.traversal.strategy.verification.ReadOnlyStrategy; -import org.apache.tinkerpop.gremlin.process.traversal.traverser.B_LP_O_P_S_SE_SL_Traverser; -import org.apache.tinkerpop.gremlin.process.traversal.traverser.B_LP_O_S_SE_SL_Traverser; -import org.apache.tinkerpop.gremlin.process.traversal.traverser.B_O_S_SE_SL_Traverser; -import org.apache.tinkerpop.gremlin.process.traversal.traverser.B_O_Traverser; -import org.apache.tinkerpop.gremlin.process.traversal.traverser.LP_O_OB_P_S_SE_SL_Traverser; -import org.apache.tinkerpop.gremlin.process.traversal.traverser.LP_O_OB_S_SE_SL_Traverser; -import org.apache.tinkerpop.gremlin.process.traversal.traverser.O_OB_S_SE_SL_Traverser; -import org.apache.tinkerpop.gremlin.process.traversal.traverser.O_Traverser; -import org.apache.tinkerpop.gremlin.process.traversal.traverser.util.TraverserSet; -import org.apache.tinkerpop.gremlin.process.traversal.util.AndP; -import org.apache.tinkerpop.gremlin.process.traversal.util.DefaultTraversalMetrics; -import org.apache.tinkerpop.gremlin.process.traversal.util.ImmutableMetrics; -import org.apache.tinkerpop.gremlin.process.traversal.util.MutableMetrics; -import org.apache.tinkerpop.gremlin.process.traversal.util.OrP; -import org.apache.tinkerpop.gremlin.process.traversal.util.TraversalExplanation; -import org.apache.tinkerpop.gremlin.structure.Column; -import org.apache.tinkerpop.gremlin.structure.Direction; -import org.apache.tinkerpop.gremlin.structure.Edge; import org.apache.tinkerpop.gremlin.structure.Graph; -import org.apache.tinkerpop.gremlin.structure.Property; -import org.apache.tinkerpop.gremlin.structure.T; -import org.apache.tinkerpop.gremlin.structure.Vertex; -import org.apache.tinkerpop.gremlin.structure.VertexProperty; import org.apache.tinkerpop.gremlin.structure.io.IoRegistry; import org.apache.tinkerpop.gremlin.structure.io.Mapper; import org.apache.tinkerpop.gremlin.structure.io.gryo.kryoshim.SerializerShim; + import org.apache.tinkerpop.gremlin.structure.io.gryo.kryoshim.shaded.ShadedSerializerAdapter; -import org.apache.tinkerpop.gremlin.structure.util.detached.DetachedEdge; -import org.apache.tinkerpop.gremlin.structure.util.detached.DetachedPath; -import org.apache.tinkerpop.gremlin.structure.util.detached.DetachedProperty; -import org.apache.tinkerpop.gremlin.structure.util.detached.DetachedVertex; -import org.apache.tinkerpop.gremlin.structure.util.detached.DetachedVertexProperty; -import org.apache.tinkerpop.gremlin.structure.util.reference.ReferenceEdge; -import org.apache.tinkerpop.gremlin.structure.util.reference.ReferencePath; -import org.apache.tinkerpop.gremlin.structure.util.reference.ReferenceProperty; -import org.apache.tinkerpop.gremlin.structure.util.reference.ReferenceVertex; -import org.apache.tinkerpop.gremlin.structure.util.reference.ReferenceVertexProperty; -import org.apache.tinkerpop.gremlin.structure.util.star.StarGraph; -import org.apache.tinkerpop.gremlin.structure.util.star.StarGraphSerializer; -import org.apache.tinkerpop.gremlin.util.function.HashSetSupplier; -import org.apache.tinkerpop.gremlin.util.function.Lambda; import org.apache.tinkerpop.shaded.kryo.ClassResolver; import org.apache.tinkerpop.shaded.kryo.Kryo; -import org.apache.tinkerpop.shaded.kryo.KryoSerializable; import org.apache.tinkerpop.shaded.kryo.Serializer; -import org.apache.tinkerpop.shaded.kryo.serializers.JavaSerializer; import org.apache.tinkerpop.shaded.kryo.util.DefaultStreamFactory; import org.apache.tinkerpop.shaded.kryo.util.MapReferenceResolver; import org.javatuples.Pair; @@@ -155,7 -451,22 +157,8 @@@ public final class GryoMapper implement private boolean referenceTracking = true; private Supplier<ClassResolver> classResolver = GryoClassResolver::new; - private Builder() { } + private Builder() { - // Validate the default registrations - // For justification of these default registration rules, see TinkerPopKryoRegistrator - for (TypeRegistration<?> tr : typeRegistrations) { - if (tr.hasSerializer() /* no serializer is acceptable */ && - null == tr.getSerializerShim() /* a shim serializer is acceptable */ && - !(tr.getShadedSerializer() instanceof JavaSerializer) /* shaded JavaSerializer is acceptable */) { - // everything else is invalid - final String msg = String.format("The default GryoMapper type registration %s is invalid. " + - "It must supply either an implementation of %s or %s, but supplies neither. " + - "This is probably a bug in GryoMapper's default serialization class registrations.", tr, - SerializerShim.class.getCanonicalName(), JavaSerializer.class.getCanonicalName()); - throw new IllegalStateException(msg); - } - } + } /** * {@inheritDoc} @@@ -304,4 -607,112 +309,112 @@@ typeRegistrations.add(newRegistrationBuilder.apply(registrationId)); } } + + private static class GryoTypeReg<T> implements TypeRegistration<T> { + + private final Class<T> clazz; + private final Serializer<T> shadedSerializer; + private final SerializerShim<T> serializerShim; + private final Function<Kryo, Serializer> functionOfShadedKryo; + private final int id; + + private GryoTypeReg(final Class<T> clazz, + final Serializer<T> shadedSerializer, + final SerializerShim<T> serializerShim, + final Function<Kryo, Serializer> functionOfShadedKryo, + final int id) { + this.clazz = clazz; + this.shadedSerializer = shadedSerializer; + this.serializerShim = serializerShim; + this.functionOfShadedKryo = functionOfShadedKryo; + this.id = id; + + int serializerCount = 0; + if (null != this.shadedSerializer) + serializerCount++; + if (null != this.serializerShim) + serializerCount++; + if (null != this.functionOfShadedKryo) + serializerCount++; + + if (1 < serializerCount) { + final String msg = String.format( + "GryoTypeReg accepts at most one kind of serializer, but multiple " + + "serializers were supplied for class %s (id %s). " + + "Shaded serializer: %s. Shim serializer: %s. Shaded serializer function: %s.", + this.clazz.getCanonicalName(), id, + this.shadedSerializer, this.serializerShim, this.functionOfShadedKryo); + throw new IllegalArgumentException(msg); + } + } + + private static <T> GryoTypeReg<T> of(final Class<T> clazz, final int id) { + return new GryoTypeReg<>(clazz, null, null, null, id); + } + + private static <T> GryoTypeReg<T> of(final Class<T> clazz, final int id, final Serializer<T> shadedSerializer) { + return new GryoTypeReg<>(clazz, shadedSerializer, null, null, id); + } + + private static <T> GryoTypeReg<T> of(final Class<T> clazz, final int id, final SerializerShim<T> serializerShim) { + return new GryoTypeReg<>(clazz, null, serializerShim, null, id); + } + + private static <T> GryoTypeReg<T> of(final Class clazz, final int id, final Function<Kryo, Serializer> fct) { + return new GryoTypeReg<>(clazz, null, null, fct, id); + } + + @Override + public Serializer<T> getShadedSerializer() { + return shadedSerializer; + } + + @Override + public SerializerShim<T> getSerializerShim() { + return serializerShim; + } + + @Override + public Function<Kryo, Serializer> getFunctionOfShadedKryo() { + return functionOfShadedKryo; + } + + @Override + public Class<T> getTargetClass() { + return clazz; + } + + @Override + public int getId() { + return id; + } + + @Override + public Kryo registerWith(final Kryo kryo) { + if (null != functionOfShadedKryo) + kryo.register(clazz, functionOfShadedKryo.apply(kryo), id); + else if (null != shadedSerializer) + kryo.register(clazz, shadedSerializer, id); + else if (null != serializerShim) + kryo.register(clazz, new ShadedSerializerAdapter<>(serializerShim), id); + else { + kryo.register(clazz, kryo.getDefaultSerializer(clazz), id); + // Suprisingly, the preceding call is not equivalent to + // kryo.register(clazz, id); + } + + return kryo; + } + + @Override + public String toString() { + return new ToStringBuilder(this) + .append("targetClass", clazz) + .append("id", id) + .append("shadedSerializer", shadedSerializer) + .append("serializerShim", serializerShim) + .append("functionOfShadedKryo", functionOfShadedKryo) + .toString(); + } + } -} +} http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/3fc700fd/gremlin-test/src/main/java/org/apache/tinkerpop/gremlin/process/ProcessComputerSuite.java ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/3fc700fd/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/structure/io/gryo/kryoshim/unshaded/UnshadedKryoShimService.java ----------------------------------------------------------------------
