Hello, I'm having a really strange problem with the library and I hope to get some advice. After loading a large 19 million edges graph, the library returns an inconsistent edge list. More precisely, nonexistent edges appear in the graph's edge sequence attribute.
Link to dataset: https://drive.google.com/open?id=0B_wZDWWn4C1RdDhIbXlDRWFLd0E&authuser=0 def create_graph(file_graph): with open(file_graph) as in_file: g = igraph.Graph.Read_Ncol(in_file, weights=False, directed=True) return g def create_graph2(file_graph): edges = [] with open(file_graph, "r") as in_file: reader = csv.reader(in_file, delimiter=" ") i = 0 for row in reader: if row: edges.append([int(row[0]), int(row[1])]) i += 1 if i % 100000 == 0: print(i) return igraph.Graph.TupleList(edges=edges, directed=True) def generate_weighted_graph(input, output): g = create_graph2(input) print("ecount:", g.ecount(), "vcount:", g.vcount()) k = 0 with open(output, "w") as output_file: writer = csv.writer(output_file, delimiter=" ") for e in g.es: i, j = e.tuple[0], e.tuple[1] j_in = set(g.neighbors(j, mode="IN")) i_out = set(g.neighbors(i, mode="OUT")) n = len(j_in.intersection(i_out)) weight = n / (len(i_out)-1 + len(j_in) - n) writer.writerow([i, j, weight]) if k % 1000 == 0: print(k, i, j, weight) k += 1 if __name__ == "__main__": generate_weighted_graph("C:/datasets/ff", "C:/datasets/ff_weighted")
_______________________________________________ igraph-help mailing list [email protected] https://lists.nongnu.org/mailman/listinfo/igraph-help
