networkx
diff --git a/‎_nx_parallel/__init__.py‎
Lines changed: 16 additions & 2 deletions b/‎_nx_parallel/__init__.py‎
Lines changed: 16 additions & 2 deletions
diff --git a/‎benchmarks/benchmarks/bench_cluster.py‎
Lines changed: 6 additions & 0 deletions b/‎benchmarks/benchmarks/bench_cluster.py‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎nx_parallel/algorithms/cluster.py‎
Lines changed: 124 additions & 1 deletion b/‎nx_parallel/algorithms/cluster.py‎
Lines changed: 124 additions & 1 deletion
diff --git a/‎nx_parallel/interface.py‎
Lines changed: 2 additions & 0 deletions b/‎nx_parallel/interface.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎timing/new_heatmaps/heatmap_average_clustering_timing.png‎
76.3 KB b/‎timing/new_heatmaps/heatmap_average_clustering_timing.png‎
76.3 KB
diff --git a/‎timing/new_heatmaps/heatmap_clustering_timing.png‎
75.4 KB b/‎timing/new_heatmaps/heatmap_clustering_timing.png‎
75.4 KB
diff --git a/‎timing/old_heatmaps/heatmap_average_clustering_timing.png‎
34.9 KB b/‎timing/old_heatmaps/heatmap_average_clustering_timing.png‎
34.9 KB
diff --git a/‎timing/old_heatmaps/heatmap_clustering_timing.png‎
35 KB b/‎timing/old_heatmaps/heatmap_clustering_timing.png‎
35 KB
@@ -90,6 +90,13 @@ def get_info():
                     'get_chunks : str, function (default = "chunks")': "A function that takes in `list(iter_func(nbunch, 2))` as input and returns an iterable `pairs_chunks`, here `iter_func` is `permutations` in case of directed graphs and `combinations` in case of undirected graphs. The default is to create chunks by slicing the list into `n_jobs` chunks, such that size of each chunk is atmost 10, and at least 1."
                 },
             },
+            "average_clustering": {
+                "url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/cluster.py#L213",
+                "additional_docs": "The nodes are chunked into `node_chunks` and then the average clustering coefficient for all `node_chunks` is computed in parallel over `n_jobs` number of CPU cores.",
+                "additional_parameters": {
+                    'get_chunks : str, function (default = "chunks")': "A function that takes in a list of all the nodes (or nbunch) as input and returns an iterable `node_chunks`. The default chunking is done by slicing the `nodes` into `n_jobs` number of chunks."
+                },
+            },
             "average_neighbor_degree": {
                 "url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/assortativity/neighbor_degree.py#L10",
                 "additional_docs": "The nodes are chunked into `node_chunks` and then the average degree of the neighborhood of each node for all `node_chunks` is computed in parallel over `n_jobs` number of CPU cores.",
@@ -111,6 +118,13 @@ def get_info():
                     'get_chunks : str, function (default = "chunks")': "A function that takes in a list of all the nodes as input and returns an iterable `node_chunks`. The default chunking is done by slicing the `nodes` into `n_jobs` number of chunks."
                 },
             },
+            "clustering": {
+                "url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/cluster.py#L146",
+                "additional_docs": "The nodes are chunked into `node_chunks` and then the clustering coefficient for all `node_chunks` is computed in parallel over `n_jobs` number of CPU cores.",
+                "additional_parameters": {
+                    'get_chunks : str, function (default = "chunks")': "A function that takes in a list of all the nodes (or nbunch) as input and returns an iterable `node_chunks`. The default chunking is done by slicing the `nodes` into `n_jobs` number of chunks."
+                },
+            },
             "cn_soundarajan_hopcroft": {
                 "url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/link_prediction.py#L200",
                 "additional_docs": "The edge pairs are chunked into `pairs_chunks` and then the number of common neighbors for all `pairs_chunks` is computed in parallel, using community information, over `n_jobs` number of CPU cores.",
@@ -210,7 +224,7 @@ def get_info():
                 },
             },
             "square_clustering": {
-                "url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/cluster.py#L14",
+                "url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/cluster.py#L22",
                 "additional_docs": "The nodes are chunked into `node_chunks` and then the square clustering coefficient for all `node_chunks` are computed in parallel over `n_jobs` number of CPU cores.",
                 "additional_parameters": {
                     'get_chunks : str, function (default = "chunks")': "A function that takes in a list of all the nodes (or nbunch) as input and returns an iterable `node_chunks`. The default chunking is done by slicing the `nodes` into `n_jobs` number of chunks."
@@ -224,7 +238,7 @@ def get_info():
                 },
             },
             "triangles": {
-                "url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/cluster.py#L76",
+                "url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/cluster.py#L84",
                 "additional_docs": "The nodes are chunked into `node_chunks` and for all `node_chunks` the number of triangles that include a node as one vertex is computed in parallel over `n_jobs` number of CPU cores.",
                 "additional_parameters": {
                     'get_chunks : str, function (default = "chunks")': "A function that takes in a list of all the nodes (or nbunch) as input and returns an iterable `node_chunks`. The default chunking is done by slicing the `nodes` into `n_jobs` number of chunks."
 
@@ -20,3 +20,9 @@ def time_square_clustering(self, backend, num_nodes, edge_prob):
 
     def time_triangles(self, backend, num_nodes, edge_prob):
         _ = nx.triangles(self.G, backend=backend)
+
+    def time_clustering(self, backend, num_nodes, edge_prob):
+        _ = nx.clustering(self.G, backend=backend)
+
+    def time_average_clustering(self, backend, num_nodes, edge_prob):
+        _ = nx.average_clustering(self.G, backend=backend)
@@ -1,12 +1,20 @@
 from itertools import combinations, chain
 from collections import Counter
 from joblib import Parallel, delayed
-from networkx.algorithms.cluster import _triangles_and_degree_iter
 import nx_parallel as nxp
+import networkx as nx
+from networkx.algorithms.cluster import (
+    _directed_weighted_triangles_and_degree_iter,
+    _directed_triangles_and_degree_iter,
+    _weighted_triangles_and_degree_iter,
+    _triangles_and_degree_iter,
+)
 
 __all__ = [
     "square_clustering",
     "triangles",
+    "clustering",
+    "average_clustering",
 ]
 
 
@@ -132,3 +140,118 @@ def _compute_triangles_chunk(node_iter_chunk, later_nbrs):
     for result in results:
         triangle_counts.update(result)
     return triangle_counts
+
+
+@nxp._configure_if_nx_active()
+def clustering(G, nodes=None, weight=None, get_chunks="chunks"):
+    """The nodes are chunked into `node_chunks` and then the clustering
+    coefficient for all `node_chunks` is computed in parallel over `n_jobs`
+    number of CPU cores.
+
+    networkx.clustering: https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.cluster.clustering.html
+
+    Parameters
+    ----------
+    get_chunks : str, function (default = "chunks")
+        A function that takes in a list of all the nodes (or nbunch) as input and
+        returns an iterable `node_chunks`. The default chunking is done by slicing the
+        `nodes` into `n_jobs` number of chunks.
+    """
+
+    def _compute_chunk(chunk):
+        if G.is_directed():
+            if weight is not None:
+                td_iter = _directed_weighted_triangles_and_degree_iter(G, chunk, weight)
+                clusterc = {
+                    v: 0 if t == 0 else t / ((dt * (dt - 1) - 2 * db) * 2)
+                    for v, dt, db, t in td_iter
+                }
+            else:
+                td_iter = _directed_triangles_and_degree_iter(G, chunk)
+                clusterc = {
+                    v: 0 if t == 0 else t / ((dt * (dt - 1) - 2 * db) * 2)
+                    for v, dt, db, t in td_iter
+                }
+        else:
+            # The formula 2*T/(d*(d-1)) from docs is t/(d*(d-1)) here b/c t==2*T
+            if weight is not None:
+                td_iter = _weighted_triangles_and_degree_iter(G, chunk, weight)
+                clusterc = {
+                    v: 0 if t == 0 else t / (d * (d - 1)) for v, d, t in td_iter
+                }
+            else:
+                td_iter = _triangles_and_degree_iter(G, chunk)
+                clusterc = {
+                    v: 0 if t == 0 else t / (d * (d - 1)) for v, d, t, _ in td_iter
+                }
+        return clusterc
+
+    if hasattr(G, "graph_object"):
+        G = G.graph_object
+
+    n_jobs = nxp.get_n_jobs()
+
+    nodes_to_chunk = list(G.nbunch_iter(nodes))
+
+    if get_chunks == "chunks":
+        node_chunks = nxp.chunks(nodes_to_chunk, n_jobs)
+    else:
+        node_chunks = get_chunks(nodes_to_chunk)
+
+    results = Parallel()(delayed(_compute_chunk)(chunk) for chunk in node_chunks)
+
+    clusterc = {}
+    for result in results:
+        clusterc.update(result)
+
+    if nodes in G:
+        return clusterc[nodes]
+    return clusterc
+
+
+@nxp._configure_if_nx_active()
+def average_clustering(
+    G, nodes=None, weight=None, count_zeros=True, get_chunks="chunks"
+):
+    """The nodes are chunked into `node_chunks` and then the average clustering
+    coefficient for all `node_chunks` is computed in parallel over `n_jobs`
+    number of CPU cores.
+
+    networkx.average_clustering: https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.cluster.average_clustering.html
+
+    Parameters
+    ----------
+    get_chunks : str, function (default = "chunks")
+        A function that takes in a list of all the nodes (or nbunch) as input and
+        returns an iterable `node_chunks`. The default chunking is done by slicing the
+        `nodes` into `n_jobs` number of chunks.
+    """
+
+    def _compute_chunk(chunk):
+        return nx.clustering(G, chunk, weight=weight)
+
+    if hasattr(G, "graph_object"):
+        G = G.graph_object
+
+    n_jobs = nxp.get_n_jobs()
+
+    if nodes is None:
+        nodes = list(G)
+
+    if get_chunks == "chunks":
+        node_chunks = nxp.chunks(nodes, n_jobs)
+    else:
+        node_chunks = get_chunks(nodes)
+
+    results = Parallel()(delayed(_compute_chunk)(chunk) for chunk in node_chunks)
+
+    clustering = {}
+    for result in results:
+        clustering.update(result)
+
+    c = clustering.values()
+
+    if not count_zeros:
+        c = [v for v in c if abs(v) > 0]
+
+    return sum(c) / len(c)
@@ -44,6 +44,8 @@
     # Clustering
     "square_clustering",
     "triangles",
+    "clustering",
+    "average_clustering",
     # Shortest Paths : unweighted graphs
     "all_pairs_shortest_path",
     "all_pairs_shortest_path_length",