Skip to content

Commit 25e1ac7

Browse files
parallel implementation of clustering and average_clustering (#130)
* parallel implementation of clustering * benchmarks added * add average_clustering * Add heatmaps * improved average_clustering implementation * minor edit in nodes_to_chunk * style fix * move heatmaps to old_heatmaps folder * style fix * new timing script heatmaps
1 parent e6b1503 commit 25e1ac7

File tree

8 files changed

+148
-3
lines changed

8 files changed

+148
-3
lines changed

_nx_parallel/__init__.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,13 @@ def get_info():
9090
'get_chunks : str, function (default = "chunks")': "A function that takes in `list(iter_func(nbunch, 2))` as input and returns an iterable `pairs_chunks`, here `iter_func` is `permutations` in case of directed graphs and `combinations` in case of undirected graphs. The default is to create chunks by slicing the list into `n_jobs` chunks, such that size of each chunk is atmost 10, and at least 1."
9191
},
9292
},
93+
"average_clustering": {
94+
"url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/cluster.py#L213",
95+
"additional_docs": "The nodes are chunked into `node_chunks` and then the average clustering coefficient for all `node_chunks` is computed in parallel over `n_jobs` number of CPU cores.",
96+
"additional_parameters": {
97+
'get_chunks : str, function (default = "chunks")': "A function that takes in a list of all the nodes (or nbunch) as input and returns an iterable `node_chunks`. The default chunking is done by slicing the `nodes` into `n_jobs` number of chunks."
98+
},
99+
},
93100
"average_neighbor_degree": {
94101
"url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/assortativity/neighbor_degree.py#L10",
95102
"additional_docs": "The nodes are chunked into `node_chunks` and then the average degree of the neighborhood of each node for all `node_chunks` is computed in parallel over `n_jobs` number of CPU cores.",
@@ -111,6 +118,13 @@ def get_info():
111118
'get_chunks : str, function (default = "chunks")': "A function that takes in a list of all the nodes as input and returns an iterable `node_chunks`. The default chunking is done by slicing the `nodes` into `n_jobs` number of chunks."
112119
},
113120
},
121+
"clustering": {
122+
"url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/cluster.py#L146",
123+
"additional_docs": "The nodes are chunked into `node_chunks` and then the clustering coefficient for all `node_chunks` is computed in parallel over `n_jobs` number of CPU cores.",
124+
"additional_parameters": {
125+
'get_chunks : str, function (default = "chunks")': "A function that takes in a list of all the nodes (or nbunch) as input and returns an iterable `node_chunks`. The default chunking is done by slicing the `nodes` into `n_jobs` number of chunks."
126+
},
127+
},
114128
"cn_soundarajan_hopcroft": {
115129
"url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/link_prediction.py#L200",
116130
"additional_docs": "The edge pairs are chunked into `pairs_chunks` and then the number of common neighbors for all `pairs_chunks` is computed in parallel, using community information, over `n_jobs` number of CPU cores.",
@@ -210,7 +224,7 @@ def get_info():
210224
},
211225
},
212226
"square_clustering": {
213-
"url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/cluster.py#L14",
227+
"url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/cluster.py#L22",
214228
"additional_docs": "The nodes are chunked into `node_chunks` and then the square clustering coefficient for all `node_chunks` are computed in parallel over `n_jobs` number of CPU cores.",
215229
"additional_parameters": {
216230
'get_chunks : str, function (default = "chunks")': "A function that takes in a list of all the nodes (or nbunch) as input and returns an iterable `node_chunks`. The default chunking is done by slicing the `nodes` into `n_jobs` number of chunks."
@@ -224,7 +238,7 @@ def get_info():
224238
},
225239
},
226240
"triangles": {
227-
"url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/cluster.py#L76",
241+
"url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/cluster.py#L84",
228242
"additional_docs": "The nodes are chunked into `node_chunks` and for all `node_chunks` the number of triangles that include a node as one vertex is computed in parallel over `n_jobs` number of CPU cores.",
229243
"additional_parameters": {
230244
'get_chunks : str, function (default = "chunks")': "A function that takes in a list of all the nodes (or nbunch) as input and returns an iterable `node_chunks`. The default chunking is done by slicing the `nodes` into `n_jobs` number of chunks."

benchmarks/benchmarks/bench_cluster.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,9 @@ def time_square_clustering(self, backend, num_nodes, edge_prob):
2020

2121
def time_triangles(self, backend, num_nodes, edge_prob):
2222
_ = nx.triangles(self.G, backend=backend)
23+
24+
def time_clustering(self, backend, num_nodes, edge_prob):
25+
_ = nx.clustering(self.G, backend=backend)
26+
27+
def time_average_clustering(self, backend, num_nodes, edge_prob):
28+
_ = nx.average_clustering(self.G, backend=backend)

nx_parallel/algorithms/cluster.py

Lines changed: 124 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,20 @@
11
from itertools import combinations, chain
22
from collections import Counter
33
from joblib import Parallel, delayed
4-
from networkx.algorithms.cluster import _triangles_and_degree_iter
54
import nx_parallel as nxp
5+
import networkx as nx
6+
from networkx.algorithms.cluster import (
7+
_directed_weighted_triangles_and_degree_iter,
8+
_directed_triangles_and_degree_iter,
9+
_weighted_triangles_and_degree_iter,
10+
_triangles_and_degree_iter,
11+
)
612

713
__all__ = [
814
"square_clustering",
915
"triangles",
16+
"clustering",
17+
"average_clustering",
1018
]
1119

1220

@@ -132,3 +140,118 @@ def _compute_triangles_chunk(node_iter_chunk, later_nbrs):
132140
for result in results:
133141
triangle_counts.update(result)
134142
return triangle_counts
143+
144+
145+
@nxp._configure_if_nx_active()
146+
def clustering(G, nodes=None, weight=None, get_chunks="chunks"):
147+
"""The nodes are chunked into `node_chunks` and then the clustering
148+
coefficient for all `node_chunks` is computed in parallel over `n_jobs`
149+
number of CPU cores.
150+
151+
networkx.clustering: https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.cluster.clustering.html
152+
153+
Parameters
154+
----------
155+
get_chunks : str, function (default = "chunks")
156+
A function that takes in a list of all the nodes (or nbunch) as input and
157+
returns an iterable `node_chunks`. The default chunking is done by slicing the
158+
`nodes` into `n_jobs` number of chunks.
159+
"""
160+
161+
def _compute_chunk(chunk):
162+
if G.is_directed():
163+
if weight is not None:
164+
td_iter = _directed_weighted_triangles_and_degree_iter(G, chunk, weight)
165+
clusterc = {
166+
v: 0 if t == 0 else t / ((dt * (dt - 1) - 2 * db) * 2)
167+
for v, dt, db, t in td_iter
168+
}
169+
else:
170+
td_iter = _directed_triangles_and_degree_iter(G, chunk)
171+
clusterc = {
172+
v: 0 if t == 0 else t / ((dt * (dt - 1) - 2 * db) * 2)
173+
for v, dt, db, t in td_iter
174+
}
175+
else:
176+
# The formula 2*T/(d*(d-1)) from docs is t/(d*(d-1)) here b/c t==2*T
177+
if weight is not None:
178+
td_iter = _weighted_triangles_and_degree_iter(G, chunk, weight)
179+
clusterc = {
180+
v: 0 if t == 0 else t / (d * (d - 1)) for v, d, t in td_iter
181+
}
182+
else:
183+
td_iter = _triangles_and_degree_iter(G, chunk)
184+
clusterc = {
185+
v: 0 if t == 0 else t / (d * (d - 1)) for v, d, t, _ in td_iter
186+
}
187+
return clusterc
188+
189+
if hasattr(G, "graph_object"):
190+
G = G.graph_object
191+
192+
n_jobs = nxp.get_n_jobs()
193+
194+
nodes_to_chunk = list(G.nbunch_iter(nodes))
195+
196+
if get_chunks == "chunks":
197+
node_chunks = nxp.chunks(nodes_to_chunk, n_jobs)
198+
else:
199+
node_chunks = get_chunks(nodes_to_chunk)
200+
201+
results = Parallel()(delayed(_compute_chunk)(chunk) for chunk in node_chunks)
202+
203+
clusterc = {}
204+
for result in results:
205+
clusterc.update(result)
206+
207+
if nodes in G:
208+
return clusterc[nodes]
209+
return clusterc
210+
211+
212+
@nxp._configure_if_nx_active()
213+
def average_clustering(
214+
G, nodes=None, weight=None, count_zeros=True, get_chunks="chunks"
215+
):
216+
"""The nodes are chunked into `node_chunks` and then the average clustering
217+
coefficient for all `node_chunks` is computed in parallel over `n_jobs`
218+
number of CPU cores.
219+
220+
networkx.average_clustering: https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.cluster.average_clustering.html
221+
222+
Parameters
223+
----------
224+
get_chunks : str, function (default = "chunks")
225+
A function that takes in a list of all the nodes (or nbunch) as input and
226+
returns an iterable `node_chunks`. The default chunking is done by slicing the
227+
`nodes` into `n_jobs` number of chunks.
228+
"""
229+
230+
def _compute_chunk(chunk):
231+
return nx.clustering(G, chunk, weight=weight)
232+
233+
if hasattr(G, "graph_object"):
234+
G = G.graph_object
235+
236+
n_jobs = nxp.get_n_jobs()
237+
238+
if nodes is None:
239+
nodes = list(G)
240+
241+
if get_chunks == "chunks":
242+
node_chunks = nxp.chunks(nodes, n_jobs)
243+
else:
244+
node_chunks = get_chunks(nodes)
245+
246+
results = Parallel()(delayed(_compute_chunk)(chunk) for chunk in node_chunks)
247+
248+
clustering = {}
249+
for result in results:
250+
clustering.update(result)
251+
252+
c = clustering.values()
253+
254+
if not count_zeros:
255+
c = [v for v in c if abs(v) > 0]
256+
257+
return sum(c) / len(c)

nx_parallel/interface.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@
4444
# Clustering
4545
"square_clustering",
4646
"triangles",
47+
"clustering",
48+
"average_clustering",
4749
# Shortest Paths : unweighted graphs
4850
"all_pairs_shortest_path",
4951
"all_pairs_shortest_path_length",
76.3 KB
Loading
75.4 KB
Loading
34.9 KB
Loading
35 KB
Loading

0 commit comments

Comments
 (0)