[ https://issues.apache.org/jira/browse/TINKERPOP-1298?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15307950#comment-15307950 ]
ASF GitHub Bot commented on TINKERPOP-1298: ------------------------------------------- Github user dkuppitz commented on a diff in the pull request: https://github.com/apache/incubator-tinkerpop/pull/323#discussion_r65209483 --- Diff: gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/computer/bulkdumping/BulkExportVertexProgram.java --- @@ -0,0 +1,189 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.tinkerpop.gremlin.process.computer.bulkdumping; + +import org.apache.commons.configuration.BaseConfiguration; +import org.apache.commons.configuration.Configuration; +import org.apache.commons.configuration.ConfigurationUtils; +import org.apache.tinkerpop.gremlin.process.computer.GraphComputer; +import org.apache.tinkerpop.gremlin.process.computer.Memory; +import org.apache.tinkerpop.gremlin.process.computer.MessageScope; +import org.apache.tinkerpop.gremlin.process.computer.Messenger; +import org.apache.tinkerpop.gremlin.process.computer.VertexComputeKey; +import org.apache.tinkerpop.gremlin.process.computer.VertexProgram; +import org.apache.tinkerpop.gremlin.process.computer.traversal.TraversalVertexProgram; +import org.apache.tinkerpop.gremlin.process.computer.util.AbstractVertexProgramBuilder; +import org.apache.tinkerpop.gremlin.process.traversal.Path; +import org.apache.tinkerpop.gremlin.process.traversal.Traverser; +import org.apache.tinkerpop.gremlin.process.traversal.traverser.util.TraverserSet; +import org.apache.tinkerpop.gremlin.structure.Graph; +import org.apache.tinkerpop.gremlin.structure.Vertex; +import org.apache.tinkerpop.gremlin.structure.VertexProperty; +import org.apache.tinkerpop.gremlin.structure.util.StringFactory; +import org.javatuples.Tuple; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * @author Daniel Kuppitz (http://gremlin.guru) + */ +public class BulkExportVertexProgram implements VertexProgram<Tuple> { + + public static final String BULK_EXPORT_VERTEX_PROGRAM_CFG_PREFIX = "gremlin.bulkExportVertexProgram"; + public static final String BULK_EXPORT_PROPERTIES = String.join(".", BULK_EXPORT_VERTEX_PROGRAM_CFG_PREFIX, "properties"); + + private Configuration configuration; + private Map<String, String> properties; + private List<String> sortedProperties; + private Set<VertexComputeKey> vertexComputeKeys; + + private BulkExportVertexProgram() { + } + + @Override + public void loadState(final Graph graph, final Configuration config) { + configuration = new BaseConfiguration(); + if (config != null) { + ConfigurationUtils.copy(config, configuration); + } + properties = new HashMap<>(); + sortedProperties = new ArrayList<>(); + for (final String tuple : configuration.getString(BULK_EXPORT_PROPERTIES, "").split("\1")) { + final String[] parts = tuple.split("\2", -1); + properties.put(parts[0], parts[1]); + sortedProperties.add(parts[0]); + } + vertexComputeKeys = Collections.singleton(VertexComputeKey.of(BULK_EXPORT_PROPERTIES, false)); + } + + @Override + public void storeState(final Configuration config) { + VertexProgram.super.storeState(config); + if (configuration != null) { + ConfigurationUtils.copy(configuration, config); + } + } + + @Override + public void setup(final Memory memory) { + } + + @Override + public void execute(final Vertex sourceVertex, final Messenger<Tuple> messenger, final Memory memory) { --- End diff -- Copy & paste. I've already changed it to `Object`. > Save OLAP results to file > ------------------------- > > Key: TINKERPOP-1298 > URL: https://issues.apache.org/jira/browse/TINKERPOP-1298 > Project: TinkerPop > Issue Type: Improvement > Components: io, process > Reporter: Daniel Kuppitz > Assignee: Daniel Kuppitz > > Provide a way to save (tabular) results to text files, just like Spark's > {{saveAsTextFile}}. > I'm not sure about the best way to do it. 3 options come to my mind: > # a new step. > # a {{VertexProgram}} > # a configuration option > Things to consider / open questions: > * Is it sufficient to simply {{toString()}} all values or should we allow > formatters / format stings? > * [~jlewandowski] pointed out that it would be nice to have support for the > [parquet file format|https://parquet.apache.org/]. I guess now we're already > talking about support for different {{FileOutputFormats}} and not just > formatters. > * Is that only relevant for OLAP? > * Can we support arbitrary file systems? -- This message was sent by Atlassian JIRA (v6.3.4#6332)