import SNAP fb graph data

Anna Xenaki Tue, 19 May 2015 07:12:18 -0700

Im trying to find communities in FB graphs with Giraph on Hadoop. Imusing Netbeans and have imported all related libraries on the project(and installed Hadoop and giraph on ubuntu 14.04 desktop) I Haveconverted all graph data(http://snap.stanford.edu/data/egonets-Facebook.html) in a txt file asfollows:

"328#306,275,4,218,78,195,181,273#127,76,78,55,4,132"

which means user_id: 328 ,user_friends_in_fb(edges):306,275,4,218,78,195,181,273,properties_of_user: anonimized_feature_number {127,76,78,55,4,132}

I want to import all data into vertexes and edges and then performmapReduce jobs on them, but I am not familiar with the process...I have created 3 classes : FbGraphInputFormat , FbGraphOutputFormat,FbGraphState


import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import org.apache.giraph.edge.Edge;
import org.apache.giraph.edge.EdgeFactory;
import org.apache.giraph.graph.Vertex;
import org.apache.giraph.io.formats.TextVertexInputFormat;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.TaskAttemptContext;

public class FbGraphInputFormat extends TextVertexInputFormat<Text,FbGraphState, DoubleWritable> {


    @Override

public TextVertexReader createVertexReader(InputSplit is,TaskAttemptContext tac) throws IOException {

        return new FbGraphReader();
    }

    protected class FbGraphReader extends TextVertexReader {

        @Override

public boolean nextVertex() throws IOException,InterruptedException {

            return getRecordReader().nextKeyValue();
        }

        @Override

public Vertex<Text, FbGraphState, DoubleWritable>getCurrentVertex() throws IOException, InterruptedException {

            String line = getRecordReader().getCurrentValue().toString();
            String[] tokens = line.trim().split("#");
            if (tokens.length < 2) {

throw new IllegalArgumentException("Invalid line: (" +line + ")");

            }
            FbGraphState state = new FbGraphState();
            Text id = new Text(tokens[0]);
            state.setValue(id.toString());
            state.setNodeWeight(1.0);

            Map<Text, DoubleWritable> edgeMap = new HashMap<>();

ArrayList<Edge<Text, DoubleWritable>> edgesList = newArrayList<>();String[] edges = (tokens.length > 2) ? tokens[1].split(","): new String[0];

            for (int i = 0; i < edges.length; i++) {
                double weight =  1.0;
                Text edgeKey = new Text(edges[i]);
                edgeMap.put(edgeKey, new DoubleWritable(weight));
                // edgesList.add(EdgeFactory.create(new
                // LongWritable(edgeKey),new LongWritable(weight)));
            }

            for (Map.Entry<Text, DoubleWritable> entry
                    : edgeMap.entrySet()) {

edgesList.add(EdgeFactory.create(entry.getKey(),entry.getValue()));

Vertex<Text, FbGraphState, DoubleWritable> vertex =this.getConf().createVertex();


            vertex.initialize(id, state, edgesList);

            return vertex;
        }

    }
--------------------------------------------------
import java.io.IOException;
import org.apache.giraph.edge.Edge;
import org.apache.giraph.graph.Vertex;
import org.apache.giraph.io.formats.TextVertexOutputFormat;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.TaskAttemptContext;

public class FbGraphOutputFormat extendsTextVertexOutputFormat<Text,FbGraphState,DoubleWritable>{



    @Override

public TextVertexWriter createVertexWriter(TaskAttemptContext tac)throws IOException, InterruptedException {

       return new FbGraphWriter();
    }

    private static class FbGraphWriter extends TextVertexWriter {

        public FbGraphWriter() {
        }

        @Override

public void writeVertex(Vertex<Text, FbGraphState,DoubleWritable> vertex) throws IOException, InterruptedException {

           StringBuilder b = new StringBuilder();
            b.append(vertex.getValue().getValue());
            b.append("\t");
            b.append(vertex.getValue().getNodeWeight());
            b.append("\t");

            for (Edge<Text,DoubleWritable> e: vertex.getEdges()){
                b.append(e.getTargetVertexId());
                b.append(":");
                b.append(e.getValue());
                b.append(",");
            }
            b.setLength(b.length() - 1);

getRecordWriter().write(vertex.getId(), newText(b.toString()));


        }
    }
}
-------------------------------------------------
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableUtils;
public class FbGraphState implements Writable {

    private String value;
    private double nodeWeight;

    public FbGraphState() {
    }

    public void write(DataOutput d) throws IOException {
        WritableUtils.writeString(d, value);
        d.writeDouble(nodeWeight);

    }

    @Override
    public void readFields(DataInput di) throws IOException {
        value = WritableUtils.readString(di);
        nodeWeight = di.readLong();

    }

    public void setValue(String value) {
        this.value = value;
    }

    public void setNodeWeight(double nodeWeight) {
        this.nodeWeight = nodeWeight;
    }

    public String getValue() {
        return value;
    }

    public double getNodeWeight() {
        return nodeWeight;
    }

}

Am I correct so far? How do I run this? I mean from netbeans what otherclass should I create in order to input the txt file with the data? andsee the results. And if I accomplish that how do I perfom a map reducejob on the data? are they saved someplace in this manner in order tolater retrieve them? Do I read them from txt file every time and createvertexes on the fly and perform jobs in the same class?Is there any example for Graph data with properties input in Giraph?cause Shortests path example is not a fit for me, I dont have the samedata input format.


Thanx Anu

import SNAP fb graph data

Reply via email to