rapidsai · eriknw · Oct 24, 2024 · Oct 25, 2024 · Oct 25, 2024 · Nov 5, 2024
diff --git a/benchmarks/nx-cugraph/pytest-based/bench_algos.py b/benchmarks/nx-cugraph/pytest-based/bench_algos.py
@@ -16,6 +16,8 @@
 import networkx as nx
 import pandas as pd
 import pytest
+from collections.abc import Mapping
+
 from cugraph import datasets
 import nx_cugraph as nxcg
 
@@ -520,7 +522,7 @@ def bench_shortest_path(benchmark, graph_obj, backend_wrapper):
         iterations=iterations,
         warmup_rounds=warmup_rounds,
     )
-    assert type(result) is dict
+    assert isinstance(result, Mapping)  # dict in nx, but we duck-type
 
 
 def bench_single_source_shortest_path_length(benchmark, graph_obj, backend_wrapper):

@@ -11,7 +11,7 @@ default_language_version:
     python: python3
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.6.0
+    rev: v5.0.0
     hooks:
       - id: check-added-large-files
       - id: check-case-conflict
@@ -20,16 +20,18 @@ repos:
       - id: check-ast
       - id: check-toml
       - id: check-yaml
+      - id: check-executables-have-shebangs
       - id: debug-statements
       - id: end-of-file-fixer
         exclude_types: [svg]
       - id: mixed-line-ending
       - id: trailing-whitespace
   - repo: https://github.com/abravalheri/validate-pyproject
-    rev: v0.19
+    rev: v0.22
     hooks:
       - id: validate-pyproject
         name: Validate pyproject.toml
+        files: python/nx-cugraph/pyproject.toml
   - repo: https://github.com/PyCQA/autoflake
     rev: v2.3.1
     hooks:
@@ -40,17 +42,17 @@ repos:
     hooks:
       - id: isort
   - repo: https://github.com/asottile/pyupgrade
-    rev: v3.17.0
+    rev: v3.19.0
     hooks:
       - id: pyupgrade
         args: [--py310-plus]
   - repo: https://github.com/psf/black
-    rev: 24.8.0
+    rev: 24.10.0
     hooks:
       - id: black
       # - id: black-jupyter
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.6.7
+    rev: v0.7.2
     hooks:
       - id: ruff
         args: [--fix-only, --show-fixes]  # --unsafe-fixes]
@@ -62,7 +64,7 @@ repos:
         additional_dependencies: &flake8_dependencies
           # These versions need updated manually
           - flake8==7.1.1
-          - flake8-bugbear==24.8.19
+          - flake8-bugbear==24.10.31
           - flake8-simplify==0.21.0
   - repo: https://github.com/asottile/yesqa
     rev: v1.5.0
@@ -75,13 +77,14 @@ repos:
       - id: codespell
         types_or: [python, rst, markdown]
         additional_dependencies: [tomli]
-        files: ^(nx_cugraph|docs)/
+        files: python/nx-cugraph/
+        args: [-L, "coo,numer"]
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.6.7
+    rev: v0.7.2
     hooks:
       - id: ruff
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.6.0
+    rev: v5.0.0
     hooks:
       - id: no-commit-to-branch
         args: [-p, "^branch-2....$"]
@@ -47,7 +47,7 @@ def katz_centrality(
     """`nstart` isn't used (but is checked), and `normalized=False` is not supported."""
     if not normalized:
         # Redundant with the `_can_run` check below when being dispatched by NetworkX,
-        # but we raise here in case this funcion is called directly.
+        # but we raise here in case this function is called directly.
         raise NotImplementedError("normalized=False is not supported.")
     G = _to_graph(G, weight, 1, np.float32)
     if (N := len(G)) == 0:

@@ -10,6 +10,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import collections
 import itertools
 
 import cupy as cp
@@ -19,7 +20,7 @@
 
 from nx_cugraph import _nxver
 from nx_cugraph.convert import _to_graph
-from nx_cugraph.utils import _groupby, index_dtype, networkx_algorithm
+from nx_cugraph.utils import index_dtype, networkx_algorithm
 
 __all__ = [
     "bidirectional_shortest_path",
@@ -179,35 +180,82 @@ def _bfs(
             elif not reverse_path:
                 paths.reverse()
         else:
-            if return_type == "path":
-                distances = distances[mask]
-            groups = _groupby(distances, [predecessors[mask], node_ids])
-
-            # `pred_node_iter` does the equivalent as these nested for loops:
-            # for length in range(1, len(groups)):
-            #     preds, nodes = groups[length]
-            #     for pred, node in zip(preds.tolist(), nodes.tolist()):
-            if G.key_to_id is None:
-                pred_node_iter = concat(
-                    zip(*(x.tolist() for x in groups[length]))
-                    for length in range(1, len(groups))
-                )
-            else:
-                pred_node_iter = concat(
-                    zip(*(G._nodeiter_to_iter(x.tolist()) for x in groups[length]))
-                    for length in range(1, len(groups))
-                )
-            # Consider making utility functions for creating paths
-            paths = {source: [source]}
+            # Computing paths to all nodes can be expensive, so let's delay
+            # computation until needed using `PathMapping`.
+            key_iter = node_ids.tolist()
+            pred_iter = predecessors[mask].tolist()
+            if G.key_to_id is not None:
+                key_iter = G._nodeiter_to_iter(key_iter)
+                pred_iter = G._nodeiter_to_iter(pred_iter)
+            key_to_pred = dict(zip(key_iter, pred_iter))
+            key_to_pred[source] = None
             if reverse_path:
-                for pred, node in pred_node_iter:
-                    paths[node] = [node, *paths[pred]]
+                paths = ReversePathMapping({source: [source]}, key_to_pred)
             else:
-                for pred, node in pred_node_iter:
-                    paths[node] = [*paths[pred], node]
+                paths = PathMapping({source: [source]}, key_to_pred)
     if return_type == "path":
         return paths
     if return_type == "length":
         return lengths
     # return_type == "length-path"
     return lengths, paths
+
+
+class PathMapping(collections.abc.Mapping):
+    """Compute path for nodes as needed using predecessors.
+
+    The path for each node contains itself at the beginning of the path.
+    """
+
+    def __init__(self, data, key_to_pred):
+        self._data = data
+        self._key_to_pred = key_to_pred
+
+    def __getitem__(self, key):
+        if key in self._data:
+            return self._data[key]
+        stack = [key]
+        key = self._key_to_pred[key]
+        while key not in self._data:
+            stack.append(key)
+            key = self._key_to_pred[key]
+        val = self._data[key]
+        for key in reversed(stack):
+            val = self._data[key] = [*val, key]  # Switched in ReversePathMapping
+        return val
+
+    def __iter__(self):
+        return iter(self._key_to_pred)
+
+    def __len__(self):
+        return len(self._key_to_pred)
+
+
+class ReversePathMapping(collections.abc.Mapping):
+    """Compute path for nodes as needed using predecessors.
+
+    The path for each node contains itself at the end of the path.
+    """
+
+    def __init__(self, data, key_to_pred):
+        self._data = data
+        self._key_to_pred = key_to_pred
+
+    def __getitem__(self, key):
+        if key in self._data:
+            return self._data[key]
+        stack = [key]
+        key = self._key_to_pred[key]
+        while key not in self._data:
+            stack.append(key)
+            key = self._key_to_pred[key]
+        val = self._data[key]
+        for key in reversed(stack):
+            val = self._data[key] = [key, *val]  # Switched in PathMapping
+        return val
+
+    def __iter__(self):
+        return iter(self._key_to_pred)
+
+    def __len__(self):
+        return len(self._key_to_pred)
@@ -15,14 +15,9 @@
 import pylibcugraph as plc
 
 from nx_cugraph.convert import _to_graph
-from nx_cugraph.utils import (
-    _dtype_param,
-    _get_float_dtype,
-    _groupby,
-    networkx_algorithm,
-)
+from nx_cugraph.utils import _dtype_param, _get_float_dtype, networkx_algorithm
 
-from .unweighted import _bfs
+from .unweighted import PathMapping, ReversePathMapping, _bfs
 
 __all__ = [
     "dijkstra_path",
@@ -378,22 +373,19 @@ def _sssp(
             elif not reverse_path:
                 paths.reverse()
         else:
-            groups = _groupby(predecessors[mask], node_ids)
-            if (id_to_key := G.id_to_key) is not None:
-                groups = {id_to_key[k]: v for k, v in groups.items() if k >= 0}
-            paths = {source: [source]}
-            preds = [source]
-            while preds:
-                pred = preds.pop()
-                pred_path = paths[pred]
-                nodes = G._nodearray_to_list(groups[pred])
-                if reverse_path:
-                    for node in nodes:
-                        paths[node] = [node, *pred_path]
-                else:
-                    for node in nodes:
-                        paths[node] = [*pred_path, node]
-                preds.extend(nodes & groups.keys())
+            # Computing paths to all nodes can be expensive, so let's delay
+            # computation until needed using `PathMapping`.
+            key_iter = node_ids.tolist()
+            pred_iter = predecessors[mask].tolist()
+            if G.key_to_id is not None:
+                key_iter = G._nodeiter_to_iter(key_iter)
+                pred_iter = G._nodeiter_to_iter(pred_iter)
+            key_to_pred = dict(zip(key_iter, pred_iter))
+            key_to_pred[source] = None
+            if reverse_path:
+                paths = ReversePathMapping({source: [source]}, key_to_pred)
+            else:
+                paths = PathMapping({source: [source]}, key_to_pred)
     if return_type == "path":
         return paths
     if return_type == "length":