Commit 22e017f4 authored by dirk.wintergruen's avatar dirk.wintergruen
Browse files

merge

parent 4fe6fbb2
......@@ -50,7 +50,9 @@ class GraphCache:
return getattr(self.instance, name)
def add_edge_safe(gr,ns_vertex,nm_vertex,node_edge_properties):
""" safe adding of edge, makes sure taht only edges are added which are not existing. An edge is understood the same if it has the same properties.
This can be very slow.
"""
edges = gr.es.select(_source=ns_vertex.index,_target=nm_vertex.index)
for edg in edges:
shared_items = set(edg.attributes().items()).symmetric_difference(set(node_edge_properties.items()))
......@@ -536,7 +538,7 @@ def projectBipartiteProcess(data):
new_nodes = {}
new_edges = []
if safe:
print("safe mode can be very slow!")
logger.info("safe mode can be very slow!")
for i in tqdm(inc):
......@@ -715,24 +717,19 @@ def projectBipartiteProcess2(data):
def projectBipartiteParallel(og,types,inverse=False,safe=True,
worker = 5,
vattr_name="name",
save_intermediate=True,
eattr_name=None,
only_edges_nodes = False,
extended_graph=None):
"""
Creates a projection onto one the types of projections. The name attribute of the vertices is used to identify nodes as unique.
:param og: graph
:param types: name of a boolean vertex attribute, if this is true than the node is part of the projection
:param inverse: instead of choosing the set of nodes where types is true choose the inverse
:param safe:
:param worker:
:param vattr_name:
:param save_intermediate:
:param eattr_name:
:param only_edges_nodes:
:param extended_graph:
:param safe: default = true, if true safe adding of edge, makes sure taht only edges are added which are not existing. An edge is understood the same if it has the same properties.
This can be very slow.
:param worker: number of workers, defaults to 5
:param only_edges_nodes: defaults to False, all nodes without edges are deleted
:param extended_graph: (experimental) add an extendedgraph object this is the used for the projection.
:return:
"""
......@@ -759,15 +756,19 @@ def projectBipartiteParallel(og,types,inverse=False,safe=True,
data = []
for i in tqdm(chunks(range(0,len(inc)),int(len(inc)/worker)+1)):
#print("ch",i)
data.append([i,nodes_list,p_l,safe]) #repeat i Zeilen, d.h. alle articles mit links to persons, nodes_list alle nodes with links p_l liste der article
if len(data) == 0:
raise NoDataError
if worker == 1:
graphs = [projectBipartiteProcess(data)]
else:
for i in tqdm(chunks(range(0,len(inc)),int(len(inc)/worker)+1)):
#print("ch",i)
data.append([i,nodes_list,p_l,safe]) #repeat i Zeilen, d.h. alle articles mit links to persons, nodes_list alle nodes with links p_l liste der article
with Pool(len(data)) as p:
graphs = p.map(projectBipartiteProcess,data)
if len(data) == 0:
raise NoDataError
with Pool(len(data)) as p:
graphs = p.map(projectBipartiteProcess,data)
ret_gr = graphs[0]
#if save_intermediate:
......@@ -818,22 +819,43 @@ def projectBipartiteParallel(og,types,inverse=False,safe=True,
return graph
def projectBipartiteParallel2(og,types,inverse=False,safe=True,worker = 5,vattr_name="name",save_intermediate=True,eattr_name=None):
print("start - bipartite")
def projectBipartiteParallel2(og,types,inverse=False,
safe=True,
worker = 5,
vattr_name="name",
save_intermediate=True,
eattr_name=None):
"""
Creates a projection onto one of the types of projections (new implementation should be faster than projectBipartiteParallel.
:param og: graph
:param types: name of a boolean vertex attribute, if this is true than the node is part of the projection
:param inverse:
:param safe: default = true, if true safe adding of edge, makes sure taht only edges are added which are not existing. An edge is understood the same if it has the same properties.
This can be very slow.
:
:param worker: number of workers, defaults to 5
:param vattr_name: defaults to name, attribute which unitquely defines an edge
:param save_intermediate:
:param eattr_name:
:return:
"""
logger.info("start - bipartite")
assert og.is_bipartite() == True, "Graph is not pipartite!"
if inverse:
og.vs["NOT_%s" % types] = [not x for x in og.vs[types]]
types = "NOT_%s" % types
print("create_inc")
logger.info("create_inc")
inc, p_l, r_l = og.get_incidence(types)
# first find all nodes for the projection which have non zero entries in incidence matrix
nodes_list = []
igraph.write(og,"/tmp/ogr.picklez")
print("start creating batches")
igraph.write(og,"/tmp/ogr.picklez") #todo this has to be become sager
logger.info("start creating batches")
#Zähle wie viele links in jeder Zeile
cnt_links = {}
for rs_cnt in tqdm(range(0,len(inc))): #gehe duch slle spalten (not types - e.g. if person - article and selected type is person than go through articles
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment