|
1 | 1 | from itertools import combinations, chain |
2 | 2 | from collections import Counter |
3 | 3 | from joblib import Parallel, delayed |
4 | | -from networkx.algorithms.cluster import _triangles_and_degree_iter |
5 | 4 | import nx_parallel as nxp |
| 5 | +import networkx as nx |
| 6 | +from networkx.algorithms.cluster import ( |
| 7 | + _directed_weighted_triangles_and_degree_iter, |
| 8 | + _directed_triangles_and_degree_iter, |
| 9 | + _weighted_triangles_and_degree_iter, |
| 10 | + _triangles_and_degree_iter, |
| 11 | +) |
6 | 12 |
|
7 | 13 | __all__ = [ |
8 | 14 | "square_clustering", |
9 | 15 | "triangles", |
| 16 | + "clustering", |
| 17 | + "average_clustering", |
10 | 18 | ] |
11 | 19 |
|
12 | 20 |
|
@@ -132,3 +140,118 @@ def _compute_triangles_chunk(node_iter_chunk, later_nbrs): |
132 | 140 | for result in results: |
133 | 141 | triangle_counts.update(result) |
134 | 142 | return triangle_counts |
| 143 | + |
| 144 | + |
| 145 | +@nxp._configure_if_nx_active() |
| 146 | +def clustering(G, nodes=None, weight=None, get_chunks="chunks"): |
| 147 | + """The nodes are chunked into `node_chunks` and then the clustering |
| 148 | + coefficient for all `node_chunks` is computed in parallel over `n_jobs` |
| 149 | + number of CPU cores. |
| 150 | +
|
| 151 | + networkx.clustering: https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.cluster.clustering.html |
| 152 | +
|
| 153 | + Parameters |
| 154 | + ---------- |
| 155 | + get_chunks : str, function (default = "chunks") |
| 156 | + A function that takes in a list of all the nodes (or nbunch) as input and |
| 157 | + returns an iterable `node_chunks`. The default chunking is done by slicing the |
| 158 | + `nodes` into `n_jobs` number of chunks. |
| 159 | + """ |
| 160 | + |
| 161 | + def _compute_chunk(chunk): |
| 162 | + if G.is_directed(): |
| 163 | + if weight is not None: |
| 164 | + td_iter = _directed_weighted_triangles_and_degree_iter(G, chunk, weight) |
| 165 | + clusterc = { |
| 166 | + v: 0 if t == 0 else t / ((dt * (dt - 1) - 2 * db) * 2) |
| 167 | + for v, dt, db, t in td_iter |
| 168 | + } |
| 169 | + else: |
| 170 | + td_iter = _directed_triangles_and_degree_iter(G, chunk) |
| 171 | + clusterc = { |
| 172 | + v: 0 if t == 0 else t / ((dt * (dt - 1) - 2 * db) * 2) |
| 173 | + for v, dt, db, t in td_iter |
| 174 | + } |
| 175 | + else: |
| 176 | + # The formula 2*T/(d*(d-1)) from docs is t/(d*(d-1)) here b/c t==2*T |
| 177 | + if weight is not None: |
| 178 | + td_iter = _weighted_triangles_and_degree_iter(G, chunk, weight) |
| 179 | + clusterc = { |
| 180 | + v: 0 if t == 0 else t / (d * (d - 1)) for v, d, t in td_iter |
| 181 | + } |
| 182 | + else: |
| 183 | + td_iter = _triangles_and_degree_iter(G, chunk) |
| 184 | + clusterc = { |
| 185 | + v: 0 if t == 0 else t / (d * (d - 1)) for v, d, t, _ in td_iter |
| 186 | + } |
| 187 | + return clusterc |
| 188 | + |
| 189 | + if hasattr(G, "graph_object"): |
| 190 | + G = G.graph_object |
| 191 | + |
| 192 | + n_jobs = nxp.get_n_jobs() |
| 193 | + |
| 194 | + nodes_to_chunk = list(G.nbunch_iter(nodes)) |
| 195 | + |
| 196 | + if get_chunks == "chunks": |
| 197 | + node_chunks = nxp.chunks(nodes_to_chunk, n_jobs) |
| 198 | + else: |
| 199 | + node_chunks = get_chunks(nodes_to_chunk) |
| 200 | + |
| 201 | + results = Parallel()(delayed(_compute_chunk)(chunk) for chunk in node_chunks) |
| 202 | + |
| 203 | + clusterc = {} |
| 204 | + for result in results: |
| 205 | + clusterc.update(result) |
| 206 | + |
| 207 | + if nodes in G: |
| 208 | + return clusterc[nodes] |
| 209 | + return clusterc |
| 210 | + |
| 211 | + |
| 212 | +@nxp._configure_if_nx_active() |
| 213 | +def average_clustering( |
| 214 | + G, nodes=None, weight=None, count_zeros=True, get_chunks="chunks" |
| 215 | +): |
| 216 | + """The nodes are chunked into `node_chunks` and then the average clustering |
| 217 | + coefficient for all `node_chunks` is computed in parallel over `n_jobs` |
| 218 | + number of CPU cores. |
| 219 | +
|
| 220 | + networkx.average_clustering: https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.cluster.average_clustering.html |
| 221 | +
|
| 222 | + Parameters |
| 223 | + ---------- |
| 224 | + get_chunks : str, function (default = "chunks") |
| 225 | + A function that takes in a list of all the nodes (or nbunch) as input and |
| 226 | + returns an iterable `node_chunks`. The default chunking is done by slicing the |
| 227 | + `nodes` into `n_jobs` number of chunks. |
| 228 | + """ |
| 229 | + |
| 230 | + def _compute_chunk(chunk): |
| 231 | + return nx.clustering(G, chunk, weight=weight) |
| 232 | + |
| 233 | + if hasattr(G, "graph_object"): |
| 234 | + G = G.graph_object |
| 235 | + |
| 236 | + n_jobs = nxp.get_n_jobs() |
| 237 | + |
| 238 | + if nodes is None: |
| 239 | + nodes = list(G) |
| 240 | + |
| 241 | + if get_chunks == "chunks": |
| 242 | + node_chunks = nxp.chunks(nodes, n_jobs) |
| 243 | + else: |
| 244 | + node_chunks = get_chunks(nodes) |
| 245 | + |
| 246 | + results = Parallel()(delayed(_compute_chunk)(chunk) for chunk in node_chunks) |
| 247 | + |
| 248 | + clustering = {} |
| 249 | + for result in results: |
| 250 | + clustering.update(result) |
| 251 | + |
| 252 | + c = clustering.values() |
| 253 | + |
| 254 | + if not count_zeros: |
| 255 | + c = [v for v in c if abs(v) > 0] |
| 256 | + |
| 257 | + return sum(c) / len(c) |
0 commit comments