Matthew Stahl created TINKERPOP-1655:
----------------------------------------
Summary: SparkGraphComputer returns vertices without properties
Key: TINKERPOP-1655
URL: https://issues.apache.org/jira/browse/TINKERPOP-1655
Project: TinkerPop
Issue Type: Bug
Affects Versions: 3.3.0
Environment: /usr/lib/spark/jars/spark-core_2.11-2.0.2.jar
Reporter: Matthew Stahl
Spark 2.0 + tinkerpop-3.3.0
Simple program which pulls out the 1st vertex in the grateful-dead.kryo dataset
and prints the property keys works with the standard computer, but when
processed using the SparkGraphComputer, the set of keys is empty.
{code}
// pre-requisite:
// sudo -u zeppelin hadoop fs -copyFromLocal /tmp/grateful-dead.kryo
grateful-dead.kryo
val inputHdfsLocation = "grateful-dead.kryo"
val props = Map[String, String](
"gremlin.graph" ->
"org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraph"
, "gremlin.hadoop.graphReader" ->
"org.apache.tinkerpop.gremlin.hadoop.structure.io.gryo.GryoInputFormat"
, "gremlin.hadoop.inputLocation" -> inputHdfsLocation
, "gremlin.hadoop.outputLocation" -> "output"
, "gremlin.hadoop.jarsInDistributedCache" -> "true"
, "spark.master" -> "local[1]"
, "spark.executor.memory" -> "1g"
, "spark.serializer" ->
"org.apache.tinkerpop.gremlin.spark.structure.io.gryo.GryoSerializer"
// , "spark.kryo.registrator" ->
"org.apache.tinkerpop.gremlin.spark.structure.io.gryo.GryoRegistrator"
)
import org.apache.commons.configuration._
val conf = new BaseConfiguration()
props.foreach( kv => conf.addProperty(kv._1, kv._2))
import org.apache.tinkerpop.gremlin.process.computer._
import org.apache.tinkerpop.gremlin.spark.process.computer._
import org.apache.tinkerpop.gremlin.structure.util._
val graph = GraphFactory.open(conf)
val v = graph.traversal().V().next(1).get(0)
printf("vertex id = %s, keys = %s\n", v.id, v.keys())
val computer = Computer.compute(classOf[SparkGraphComputer])
val v2 = graph.traversal().withComputer(computer).V().next(1).get(0)
printf("vertex id = %s, keys = %s\n", v2.id, v2.keys())
{code}
Above produces:
{code}
inputHdfsLocation: String = grateful-dead.kryo
props: scala.collection.immutable.Map[String,String] = Map(spark.serializer ->
org.apache.tinkerpop.gremlin.spark.structure.io.gryo.GryoSerializer,
gremlin.hadoop.inputLocation -> grateful-dead.kryo,
gremlin.hadoop.jarsInDistributedCache -> true, gremlin.hadoop.graphReader ->
org.apache.tinkerpop.gremlin.hadoop.structure.io.gryo.GryoInputFormat,
gremlin.graph -> org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraph,
gremlin.hadoop.outputLocation -> output, spark.master -> local[1],
spark.executor.memory -> 1g)
import org.apache.commons.configuration._
conf: org.apache.commons.configuration.BaseConfiguration =
org.apache.commons.configuration.BaseConfiguration@1849d0b7
import org.apache.tinkerpop.gremlin.process.computer._
import org.apache.tinkerpop.gremlin.spark.process.computer._
import org.apache.tinkerpop.gremlin.structure.util._
graph: org.apache.tinkerpop.gremlin.structure.Graph =
hadoopgraph[gryoinputformat->no-writer]
v: org.apache.tinkerpop.gremlin.structure.Vertex = v[1]
vertex id = 1, keys = [name, songType, performances]
computer: org.apache.tinkerpop.gremlin.process.computer.Computer =
sparkgraphcomputer
v2: org.apache.tinkerpop.gremlin.structure.Vertex = v[1]
vertex id = 1, keys = []
{code}
--
This message was sent by Atlassian JIRA
(v6.3.15#6346)