[igraph] Inconsistency problems with large 19 million edges graph

Stefano Scerra Sun, 05 Apr 2015 10:16:42 -0700

Hello,
I'm having a really strange problem with the library and I hope to get some
advice.
After loading a large 19 million edges graph, the library returns an
inconsistent edge list.
More precisely, nonexistent edges appear in the graph's edge sequence
attribute.


Link to dataset:
https://drive.google.com/open?id=0B_wZDWWn4C1RdDhIbXlDRWFLd0E&authuser=0

def create_graph(file_graph):

    with open(file_graph) as in_file:
        g = igraph.Graph.Read_Ncol(in_file, weights=False, directed=True)
        return g

def create_graph2(file_graph):
    edges = []
    with open(file_graph, "r") as in_file:
        reader = csv.reader(in_file, delimiter=" ")
        i = 0
        for row in reader:
            if row:
                edges.append([int(row[0]), int(row[1])])
                i += 1
                if i % 100000 == 0: print(i)
    return igraph.Graph.TupleList(edges=edges, directed=True)

def generate_weighted_graph(input, output):
    g = create_graph2(input)
    print("ecount:", g.ecount(), "vcount:", g.vcount())
    k = 0
    with open(output, "w") as output_file:
        writer = csv.writer(output_file, delimiter=" ")
        for e in g.es:
            i, j = e.tuple[0], e.tuple[1]
            j_in = set(g.neighbors(j, mode="IN"))
            i_out = set(g.neighbors(i, mode="OUT"))
            n = len(j_in.intersection(i_out))
            weight = n / (len(i_out)-1 + len(j_in) - n)
            writer.writerow([i, j, weight])
            if k % 1000 == 0:
                print(k, i, j, weight)
            k += 1

if __name__ == "__main__":
    generate_weighted_graph("C:/datasets/ff", "C:/datasets/ff_weighted")

_______________________________________________
igraph-help mailing list
[email protected]
https://lists.nongnu.org/mailman/listinfo/igraph-help

[igraph] Inconsistency problems with large 19 million edges graph

Reply via email to