Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

nx-cugraph: faster shortest_path #4739

Open
wants to merge 6 commits into
base: branch-24.12
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion benchmarks/nx-cugraph/pytest-based/bench_algos.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
import networkx as nx
import pandas as pd
import pytest
from collections.abc import Mapping

from cugraph import datasets
import nx_cugraph as nxcg

Expand Down Expand Up @@ -520,7 +522,7 @@ def bench_shortest_path(benchmark, graph_obj, backend_wrapper):
iterations=iterations,
warmup_rounds=warmup_rounds,
)
assert type(result) is dict
assert isinstance(result, Mapping) # dict in nx, but we duck-type


def bench_single_source_shortest_path_length(benchmark, graph_obj, backend_wrapper):
Expand Down
21 changes: 12 additions & 9 deletions python/nx-cugraph/lint.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ default_language_version:
python: python3
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.6.0
rev: v5.0.0
hooks:
- id: check-added-large-files
- id: check-case-conflict
Expand All @@ -20,16 +20,18 @@ repos:
- id: check-ast
- id: check-toml
- id: check-yaml
- id: check-executables-have-shebangs
- id: debug-statements
- id: end-of-file-fixer
exclude_types: [svg]
- id: mixed-line-ending
- id: trailing-whitespace
- repo: https://github.com/abravalheri/validate-pyproject
rev: v0.19
rev: v0.22
hooks:
- id: validate-pyproject
name: Validate pyproject.toml
files: python/nx-cugraph/pyproject.toml
- repo: https://github.com/PyCQA/autoflake
rev: v2.3.1
hooks:
Expand All @@ -40,17 +42,17 @@ repos:
hooks:
- id: isort
- repo: https://github.com/asottile/pyupgrade
rev: v3.17.0
rev: v3.19.0
hooks:
- id: pyupgrade
args: [--py310-plus]
- repo: https://github.com/psf/black
rev: 24.8.0
rev: 24.10.0
hooks:
- id: black
# - id: black-jupyter
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.6.7
rev: v0.7.2
hooks:
- id: ruff
args: [--fix-only, --show-fixes] # --unsafe-fixes]
Expand All @@ -62,7 +64,7 @@ repos:
additional_dependencies: &flake8_dependencies
# These versions need updated manually
- flake8==7.1.1
- flake8-bugbear==24.8.19
- flake8-bugbear==24.10.31
- flake8-simplify==0.21.0
- repo: https://github.com/asottile/yesqa
rev: v1.5.0
Expand All @@ -75,13 +77,14 @@ repos:
- id: codespell
types_or: [python, rst, markdown]
additional_dependencies: [tomli]
files: ^(nx_cugraph|docs)/
files: python/nx-cugraph/
args: [-L, "coo,numer"]
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.6.7
rev: v0.7.2
hooks:
- id: ruff
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.6.0
rev: v5.0.0
hooks:
- id: no-commit-to-branch
args: [-p, "^branch-2....$"]
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def katz_centrality(
"""`nstart` isn't used (but is checked), and `normalized=False` is not supported."""
if not normalized:
# Redundant with the `_can_run` check below when being dispatched by NetworkX,
# but we raise here in case this funcion is called directly.
# but we raise here in case this function is called directly.
raise NotImplementedError("normalized=False is not supported.")
G = _to_graph(G, weight, 1, np.float32)
if (N := len(G)) == 0:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import collections
import itertools

import cupy as cp
Expand All @@ -19,7 +20,7 @@

from nx_cugraph import _nxver
from nx_cugraph.convert import _to_graph
from nx_cugraph.utils import _groupby, index_dtype, networkx_algorithm
from nx_cugraph.utils import index_dtype, networkx_algorithm

__all__ = [
"bidirectional_shortest_path",
Expand Down Expand Up @@ -179,35 +180,82 @@ def _bfs(
elif not reverse_path:
paths.reverse()
else:
if return_type == "path":
distances = distances[mask]
groups = _groupby(distances, [predecessors[mask], node_ids])

# `pred_node_iter` does the equivalent as these nested for loops:
# for length in range(1, len(groups)):
# preds, nodes = groups[length]
# for pred, node in zip(preds.tolist(), nodes.tolist()):
if G.key_to_id is None:
pred_node_iter = concat(
zip(*(x.tolist() for x in groups[length]))
for length in range(1, len(groups))
)
else:
pred_node_iter = concat(
zip(*(G._nodeiter_to_iter(x.tolist()) for x in groups[length]))
for length in range(1, len(groups))
)
# Consider making utility functions for creating paths
paths = {source: [source]}
# Computing paths to all nodes can be expensive, so let's delay
# computation until needed using `PathMapping`.
key_iter = node_ids.tolist()
pred_iter = predecessors[mask].tolist()
if G.key_to_id is not None:
key_iter = G._nodeiter_to_iter(key_iter)
pred_iter = G._nodeiter_to_iter(pred_iter)
key_to_pred = dict(zip(key_iter, pred_iter))
key_to_pred[source] = None
if reverse_path:
for pred, node in pred_node_iter:
paths[node] = [node, *paths[pred]]
paths = ReversePathMapping({source: [source]}, key_to_pred)
else:
for pred, node in pred_node_iter:
paths[node] = [*paths[pred], node]
paths = PathMapping({source: [source]}, key_to_pred)
if return_type == "path":
return paths
if return_type == "length":
return lengths
# return_type == "length-path"
return lengths, paths


class PathMapping(collections.abc.Mapping):
"""Compute path for nodes as needed using predecessors.

The path for each node contains itself at the beginning of the path.
"""

def __init__(self, data, key_to_pred):
self._data = data
self._key_to_pred = key_to_pred

def __getitem__(self, key):
if key in self._data:
return self._data[key]
stack = [key]
key = self._key_to_pred[key]
while key not in self._data:
stack.append(key)
key = self._key_to_pred[key]
val = self._data[key]
for key in reversed(stack):
val = self._data[key] = [*val, key] # Switched in ReversePathMapping
return val

def __iter__(self):
return iter(self._key_to_pred)

def __len__(self):
return len(self._key_to_pred)


class ReversePathMapping(collections.abc.Mapping):
"""Compute path for nodes as needed using predecessors.

The path for each node contains itself at the end of the path.
"""

def __init__(self, data, key_to_pred):
self._data = data
self._key_to_pred = key_to_pred

def __getitem__(self, key):
if key in self._data:
return self._data[key]
stack = [key]
key = self._key_to_pred[key]
while key not in self._data:
stack.append(key)
key = self._key_to_pred[key]
val = self._data[key]
for key in reversed(stack):
val = self._data[key] = [key, *val] # Switched in PathMapping
return val

def __iter__(self):
return iter(self._key_to_pred)

def __len__(self):
return len(self._key_to_pred)
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,9 @@
import pylibcugraph as plc

from nx_cugraph.convert import _to_graph
from nx_cugraph.utils import (
_dtype_param,
_get_float_dtype,
_groupby,
networkx_algorithm,
)
from nx_cugraph.utils import _dtype_param, _get_float_dtype, networkx_algorithm

from .unweighted import _bfs
from .unweighted import PathMapping, ReversePathMapping, _bfs

__all__ = [
"dijkstra_path",
Expand Down Expand Up @@ -378,22 +373,19 @@ def _sssp(
elif not reverse_path:
paths.reverse()
else:
groups = _groupby(predecessors[mask], node_ids)
if (id_to_key := G.id_to_key) is not None:
groups = {id_to_key[k]: v for k, v in groups.items() if k >= 0}
paths = {source: [source]}
preds = [source]
while preds:
pred = preds.pop()
pred_path = paths[pred]
nodes = G._nodearray_to_list(groups[pred])
if reverse_path:
for node in nodes:
paths[node] = [node, *pred_path]
else:
for node in nodes:
paths[node] = [*pred_path, node]
preds.extend(nodes & groups.keys())
# Computing paths to all nodes can be expensive, so let's delay
# computation until needed using `PathMapping`.
key_iter = node_ids.tolist()
pred_iter = predecessors[mask].tolist()
if G.key_to_id is not None:
key_iter = G._nodeiter_to_iter(key_iter)
pred_iter = G._nodeiter_to_iter(pred_iter)
key_to_pred = dict(zip(key_iter, pred_iter))
key_to_pred[source] = None
if reverse_path:
paths = ReversePathMapping({source: [source]}, key_to_pred)
else:
paths = PathMapping({source: [source]}, key_to_pred)
if return_type == "path":
return paths
if return_type == "length":
Expand Down
Loading