Commit 885080f1 authored by dirk.wintergruen's avatar dirk.wintergruen
Browse files

union method had a bug if more then one edge existed between nodes.

parent 209a6b0c
...@@ -148,23 +148,29 @@ def union(g_1, g_2, v_attr_name,e_attr_name = None, typ_field = None): ...@@ -148,23 +148,29 @@ def union(g_1, g_2, v_attr_name,e_attr_name = None, typ_field = None):
v_attr_2 = g_2.vs[v_attr_name] v_attr_2 = g_2.vs[v_attr_name]
#we are now constructing the union graph.
#first we the edges.
# collect all attributes on the two graphs
attrs = [x for x in g_1.es.attributes() if x != "weight"] #TODO have to deal with weights attrs = [x for x in g_1.es.attributes() if x != "weight"] #TODO have to deal with weights
attrs += [x for x in g_2.es.attributes() if x != "weight"] attrs += [x for x in g_2.es.attributes() if x != "weight"]
attrs = list(set(attrs)) #make unique attrs = list(set(attrs)) #make unique
#first we create a list of all edges identified by the attribute used as unique iddentifier
edge_list_by_attribute_1 = get_edgelist(g_1,attrs,v_attr_1) edge_list_by_attribute_1 = get_edgelist(g_1,attrs,v_attr_1)
edge_list_by_attribute_2 = get_edgelist(g_2,attrs,v_attr_2) edge_list_by_attribute_2 = get_edgelist(g_2,attrs,v_attr_2)
edge_list_by_attribute_merged = edge_list_by_attribute_1.union(edge_list_by_attribute_2) edge_list_by_attribute_merged = edge_list_by_attribute_1.union(edge_list_by_attribute_2)
#generated a list of all vertices - identifed by the attribute
v_attr_merged = sorted(list(set(g_2.vs[v_attr_name]).union(set(g_1.vs[v_attr_name])))) v_attr_merged = sorted(list(set(g_2.vs[v_attr_name]).union(set(g_1.vs[v_attr_name]))))
#now we numberate the attributes
attribute_to_ind = {v_attr_merged:i for i, v_attr_merged in enumerate(v_attr_merged)} attribute_to_ind = {v_attr_merged:i for i, v_attr_merged in enumerate(v_attr_merged)}
#now we create a list of all unique edges, here we a have at most one edge between vertices.
mergedEdges = get_merged_edge_list(edge_list_by_attribute_merged) mergedEdges = get_merged_edge_list(edge_list_by_attribute_merged)
mergedEdgesList = [(i,j) for i, j in mergedEdges.keys()] mergedEdgesList = [(i,j) for i, j in mergedEdges.keys()]
edge_list_merged = [ (attribute_to_ind[i], attribute_to_ind[j]) for i, j in mergedEdgesList] edge_list_merged = [ (attribute_to_ind[i], attribute_to_ind[j]) for i, j in mergedEdgesList]
#logger.debug("dict g1 %s:" % g_1.__dict__)
graph_merged = g_1.__class__(edge_list_merged,directed=True) graph_merged = g_1.__class__(edge_list_merged,directed=True)
## add additiona attributes ## add additiona attributes
for k,v in g_1.__dict__.items(): for k,v in g_1.__dict__.items():
...@@ -176,6 +182,8 @@ def union(g_1, g_2, v_attr_name,e_attr_name = None, typ_field = None): ...@@ -176,6 +182,8 @@ def union(g_1, g_2, v_attr_name,e_attr_name = None, typ_field = None):
#logger.debug("dict graph_merged %s:" % graph_merged.__dict__) #logger.debug("dict graph_merged %s:" % graph_merged.__dict__)
add_edges = defaultdict(defaultdict) add_edges = defaultdict(defaultdict)
logger.debug("create add_edges dict") logger.debug("create add_edges dict")
# now we add atributes to each edge, if there is more than one set of attributes we add an edge for each new set.
for a in tqdm(attrs): for a in tqdm(attrs):
for (e,vs),edge in zip(mergedEdges.items(),graph_merged.es): for (e,vs),edge in zip(mergedEdges.items(),graph_merged.es):
cnt = 0 cnt = 0
...@@ -211,10 +219,14 @@ def union(g_1, g_2, v_attr_name,e_attr_name = None, typ_field = None): ...@@ -211,10 +219,14 @@ def union(g_1, g_2, v_attr_name,e_attr_name = None, typ_field = None):
#now the attributes #now the attributes
for d,attr_list in attributes.items(): for d,attr_list in attributes.items():
graph_merged.es[d].extend(attr_list) graph_merged_tmp = graph_merged.es[d][0:-len(attr_list)] #we had set before the first lot of edges now we have to add the new values for the new edges
graph_merged_tmp.extend(attr_list)
graph_merged.es[d]= graph_merged_tmp
if len(graph_merged.vs) == 0: if len(graph_merged.vs) == 0:
graph_merged.add_vertices(v_attr_merged) graph_merged.add_vertices(v_attr_merged)
graph_merged.vs[v_attr_name] = v_attr_merged graph_merged.vs[v_attr_name] = v_attr_merged
# Include attributes that are in both g_1 and/or g_2. If different attribute values are present in a vertex, # Include attributes that are in both g_1 and/or g_2. If different attribute values are present in a vertex,
...@@ -243,8 +255,9 @@ def union(g_1, g_2, v_attr_name,e_attr_name = None, typ_field = None): ...@@ -243,8 +255,9 @@ def union(g_1, g_2, v_attr_name,e_attr_name = None, typ_field = None):
attrs_set = defaultdict(set) attrs_set = defaultdict(set)
dels = [] dels = []
logger.debug("now we attributes to edges")
if e_attr_name is not None: if e_attr_name is not None:
logger.debug("now we attributes to edges")
for e in graph_merged.es: for e in graph_merged.es:
attr_test =e[e_attr_name] attr_test =e[e_attr_name]
...@@ -618,7 +631,7 @@ def projectBipartiteProcess(data): ...@@ -618,7 +631,7 @@ def projectBipartiteProcess(data):
#for s_n,e_n, attrs in tqdm(new_edges): #for s_n,e_n, attrs in tqdm(new_edges):
# ng.add_edge(s_n, e_n, **attrs) # ng.add_edge(s_n, e_n, **attrs)
#print(new_nodes)
return new_edges,new_nodes, time() - start_time return new_edges,new_nodes, time() - start_time
def projectBipartiteProcess2(data): def projectBipartiteProcess2(data):
...@@ -771,24 +784,7 @@ def projectBipartiteParallel(og,types,inverse=False,safe=True, ...@@ -771,24 +784,7 @@ def projectBipartiteParallel(og,types,inverse=False,safe=True,
with Pool(len(data)) as p: with Pool(len(data)) as p:
graphs = p.map(projectBipartiteProcess,data) graphs = p.map(projectBipartiteProcess,data)
ret_gr = graphs[0]
#if save_intermediate:
# path = "/tmp/intermediate/%s/"%uuid.uuid4().urn
# os.makedirs(path)
# for i in range(0,len(graphs)):
# graphs[i].write(path+"%s.graphml"%i)
#for gr in graphs[1:]:
#
# try:
# ret_gr = union(ret_gr, gr, vattr_name,eattr_name)
# except AssertionError:
# print(ret_gr.vs[vattr_name])
# print(gr.vs[vattr_name])
#return ret_gr
all_edges = [] all_edges = []
all_nodes = {} all_nodes = {}
for edges,nodes,time in graphs: for edges,nodes,time in graphs:
...@@ -797,8 +793,6 @@ def projectBipartiteParallel(og,types,inverse=False,safe=True, ...@@ -797,8 +793,6 @@ def projectBipartiteParallel(og,types,inverse=False,safe=True,
print("Time:%s" % time) print("Time:%s" % time)
#create nodes #create nodes
if only_edges_nodes or extended_graph: if only_edges_nodes or extended_graph:
if extended_graph is not None: if extended_graph is not None:
...@@ -810,10 +804,10 @@ def projectBipartiteParallel(og,types,inverse=False,safe=True, ...@@ -810,10 +804,10 @@ def projectBipartiteParallel(og,types,inverse=False,safe=True,
graph = igraph.Graph() graph = igraph.Graph()
print("nodes") logging.info("nodes")
for n,attr in tqdm(all_nodes.items()): for n,attr in tqdm(all_nodes.items()):
graph.add_vertex(**attr) graph.add_vertex(**attr)
print("edges") logging.info("edges")
for s,e,edge_attr in tqdm(all_edges): for s,e,edge_attr in tqdm(all_edges):
graph.add_edge(s,e,**edge_attr) graph.add_edge(s,e,**edge_attr)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment