From da0cde2912ffc32072cf85a1979ac09d8ab9225a Mon Sep 17 00:00:00 2001 From: Christos Kotsalos Date: Mon, 25 Mar 2024 14:24:54 +0100 Subject: [PATCH 01/76] Add dtype for numpy.uintp which is compatible with C uintptr_t (#1544) Need this to pass C pointers to DaCe sdfg and reinterpret cast them inside a tasklet --------- Co-authored-by: Tal Ben-Nun --- dace/dtypes.py | 1 + tests/uintptr_t_test.py | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 tests/uintptr_t_test.py diff --git a/dace/dtypes.py b/dace/dtypes.py index 76e6db8397..f04200e63b 100644 --- a/dace/dtypes.py +++ b/dace/dtypes.py @@ -1216,6 +1216,7 @@ def isconstant(var): int16 = typeclass(numpy.int16) int32 = typeclass(numpy.int32) int64 = typeclass(numpy.int64) +uintp = typeclass(numpy.uintp) uint8 = typeclass(numpy.uint8) uint16 = typeclass(numpy.uint16) uint32 = typeclass(numpy.uint32) diff --git a/tests/uintptr_t_test.py b/tests/uintptr_t_test.py new file mode 100644 index 0000000000..2b1941340d --- /dev/null +++ b/tests/uintptr_t_test.py @@ -0,0 +1,37 @@ +# Copyright 2019-2024 ETH Zurich and the DaCe authors. All rights reserved. +import dace +import ctypes +import numpy as np + + +def test_uintp_size(): + # c_void_p: C type -> void* + size = ctypes.sizeof(ctypes.c_void_p) + # numpy.uintp: Unsigned integer large enough to fit pointer, compatible with C uintptr_t + size_of_np_uintp = np.uintp().itemsize + # Dace uintptr_t representation + size_of_dace_uintp = dace.uintp.bytes + + assert size == size_of_np_uintp == size_of_dace_uintp + + +def test_uintp_use(): + + @dace.program + def tester(arr: dace.float64[20], pointer: dace.uintp[1]): + with dace.tasklet(dace.Language.CPP): + a << arr(-1) + """ + out = decltype(out)(a); + """ + out >> pointer[0] + + ptr = np.empty([1], dtype=np.uintp) + arr = np.random.rand(20) + tester(arr, ptr) + assert arr.__array_interface__['data'][0] == ptr[0] + + +if __name__ == '__main__': + test_uintp_size() + test_uintp_use() From d0db188db5f9d544c3c857ad0a5b32ad290c01ff Mon Sep 17 00:00:00 2001 From: edopao Date: Tue, 26 Mar 2024 12:22:55 +0100 Subject: [PATCH 02/76] Fix bug in map_fusion transformation (#1553) Four-lines bugfix and associated test case for map_fusion transformation. Without this change, the test would fail in SDFG validation with error: `dace.sdfg.validation.InvalidSDFGEdgeError: Memlet data does not match source or destination data nodes) (at state state, edge __s0_n1None_n3IN_T[0] (V:None -> numeric:_inp))` --------- Co-authored-by: alexnick83 <31545860+alexnick83@users.noreply.github.com> --- dace/transformation/dataflow/map_fusion.py | 6 ++++ tests/transformations/mapfusion_test.py | 38 ++++++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/dace/transformation/dataflow/map_fusion.py b/dace/transformation/dataflow/map_fusion.py index 9a0dd0e313..186ea32acc 100644 --- a/dace/transformation/dataflow/map_fusion.py +++ b/dace/transformation/dataflow/map_fusion.py @@ -481,6 +481,12 @@ def fuse_nodes(self, sdfg, graph, edge, new_dst, new_dst_conn, other_edges=None) local_node = edge.src src_connector = edge.src_conn + # update edge data in case source or destination is a scalar access node + test_data = [node.data for node in (edge.src, edge.dst) if isinstance(node, nodes.AccessNode)] + for new_data in test_data: + if isinstance(sdfg.arrays[new_data], data.Scalar): + edge.data.data = new_data + # If destination of edge leads to multiple destinations, redirect all through an access node. if other_edges: # NOTE: If a new local node was already created, reuse it. diff --git a/tests/transformations/mapfusion_test.py b/tests/transformations/mapfusion_test.py index 653fb9d120..724c8c97ee 100644 --- a/tests/transformations/mapfusion_test.py +++ b/tests/transformations/mapfusion_test.py @@ -163,6 +163,43 @@ def test_fusion_with_transient(): assert np.allclose(A, expected) +def test_fusion_with_transient_scalar(): + N = 10 + K = 4 + + def build_sdfg(): + sdfg = dace.SDFG("map_fusion_with_transient_scalar") + state = sdfg.add_state() + sdfg.add_array("A", (N,K), dace.float64) + sdfg.add_array("B", (N,), dace.float64) + sdfg.add_array("T", (N,), dace.float64, transient=True) + t_node = state.add_access("T") + sdfg.add_scalar("V", dace.float64, transient=True) + v_node = state.add_access("V") + + me1, mx1 = state.add_map("map1", dict(i=f"0:{N}")) + tlet1 = state.add_tasklet("select", {"_v"}, {"_out"}, f"_out = _v[i, {K-1}]") + state.add_memlet_path(state.add_access("A"), me1, tlet1, dst_conn="_v", memlet=dace.Memlet.from_array("A", sdfg.arrays["A"])) + state.add_edge(tlet1, "_out", v_node, None, dace.Memlet("V[0]")) + state.add_memlet_path(v_node, mx1, t_node, memlet=dace.Memlet("T[i]")) + + me2, mx2 = state.add_map("map2", dict(j=f"0:{N}")) + tlet2 = state.add_tasklet("numeric", {"_inp"}, {"_out"}, f"_out = _inp + 1") + state.add_memlet_path(t_node, me2, tlet2, dst_conn="_inp", memlet=dace.Memlet("T[j]")) + state.add_memlet_path(tlet2, mx2, state.add_access("B"), src_conn="_out", memlet=dace.Memlet("B[j]")) + + return sdfg + + sdfg = build_sdfg() + sdfg.apply_transformations(MapFusion) + + A = np.random.rand(N, K) + B = np.repeat(np.nan, N) + sdfg(A=A, B=B) + + assert np.allclose(B, (A[:, K-1] + 1)) + + def test_fusion_with_inverted_indices(): @dace.program @@ -278,6 +315,7 @@ def fusion_with_nested_sdfg_1(A: dace.int32[10], B: dace.int32[10], C: dace.int3 test_multiple_fusions() test_fusion_chain() test_fusion_with_transient() + test_fusion_with_transient_scalar() test_fusion_with_inverted_indices() test_fusion_with_empty_memlet() test_fusion_with_nested_sdfg_0() From 888fd2de1da370d5dff6346179af172bfa3d34bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20M=C3=BCller?= <147368808+philip-paul-mueller@users.noreply.github.com> Date: Wed, 10 Apr 2024 09:30:28 +0200 Subject: [PATCH 03/76] Updated the `add_state_{after, before}()` function. (#1556) It is now possible to add conditions and assignments directly to them. Furthermore they also support now `is_start_block` flag. --- dace/sdfg/state.py | 38 ++++++++++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/dace/sdfg/state.py b/dace/sdfg/state.py index a9f7071b0f..0a93d54c2c 100644 --- a/dace/sdfg/state.py +++ b/dace/sdfg/state.py @@ -2477,38 +2477,56 @@ def add_state(self, label=None, is_start_block=False, *, is_start_state: bool=No self.add_node(state, is_start_block=start_block) return state - def add_state_before(self, state: SDFGState, label=None, is_start_state=False) -> SDFGState: + def add_state_before(self, + state: SDFGState, + label=None, + is_start_block=False, + condition: CodeBlock = None, + assignments=None, + *, + is_start_state: bool=None) -> SDFGState: """ Adds a new SDFG state before an existing state, reconnecting predecessors to it instead. :param state: The state to prepend the new state before. :param label: State label. - :param is_start_state: If True, resets scope block starting state to this state. + :param is_start_block: If True, resets scope block starting state to this state. + :param condition: Transition condition of the newly created edge between state and the new state. + :param assignments: Assignments to perform upon transition. :return: A new SDFGState object. """ - new_state = self.add_state(label, is_start_state) + new_state = self.add_state(label, is_start_block=is_start_block, is_start_state=is_start_state) # Reconnect for e in self.in_edges(state): self.remove_edge(e) self.add_edge(e.src, new_state, e.data) - # Add unconditional connection between the new state and the current - self.add_edge(new_state, state, dace.sdfg.InterstateEdge()) + # Add the new edge + self.add_edge(new_state, state, dace.sdfg.InterstateEdge(condition=condition, assignments=assignments)) return new_state - def add_state_after(self, state: SDFGState, label=None, is_start_state=False) -> SDFGState: + def add_state_after(self, + state: SDFGState, + label=None, + is_start_block=False, + condition: CodeBlock = None, + assignments=None, + *, + is_start_state: bool=None) -> SDFGState: """ Adds a new SDFG state after an existing state, reconnecting it to the successors instead. :param state: The state to append the new state after. :param label: State label. - :param is_start_state: If True, resets SDFG starting state to this state. + :param is_start_block: If True, resets scope block starting state to this state. + :param condition: Transition condition of the newly created edge between state and the new state. + :param assignments: Assignments to perform upon transition. :return: A new SDFGState object. """ - new_state = self.add_state(label, is_start_state) + new_state = self.add_state(label, is_start_block=is_start_block, is_start_state=is_start_state) # Reconnect for e in self.out_edges(state): self.remove_edge(e) self.add_edge(new_state, e.dst, e.data) - # Add unconditional connection between the current and the new state - self.add_edge(state, new_state, dace.sdfg.InterstateEdge()) + # Add the new edge + self.add_edge(state, new_state, dace.sdfg.InterstateEdge(condition=condition, assignments=assignments)) return new_state @abc.abstractmethod From 78759b56b537930a5fd3d4bdd64048960765adf1 Mon Sep 17 00:00:00 2001 From: Christos Kotsalos Date: Tue, 16 Apr 2024 16:08:17 +0200 Subject: [PATCH 04/76] Distributed Compilation as an option to DaCe Program (#1555) Option to activate/deactivate Distributed Compilation. This small PR is based on the following comment (DAPP/DaCe Mattermost channel): _I have an unexpected behaviour in DaCe distributed compilation. Currently, if you have an MPI program, distributed compilation is the default behaviour (as seen in [this file](https://github.com/spcl/dace/blob/master/dace/frontend/python/parser.py#L452)). I was expecting that after the loading of the compiled sdfg every rank would do symbol specialization. Although, this is not the case, i.e. every rank uses the compiled sdfg from rank 0, which specializes its symbols with the values corresponding to rank 0. Therefore, the compiled sdfg loaded by all the other ranks use a wrong sdfg (symbols are not specialized with the values of the correct rank). To validate this behaviour, I have de-activated the distributed compilation and set `dace.config.Config.set("cache", value="unique")`. Indeed, this approach works without any issue. Is there a way to change this unexpected behaviour, i.e. to have by default the distributed compilation but every rank to perform symbol specialization. To give a bit more context, I am generating an sdfg that uses closures heavily, i.e. all the gt4py fields are defined externally to the sdfg (could that be an issue)?_ --- dace/frontend/python/interface.py | 7 ++++++- dace/frontend/python/parser.py | 8 +++++--- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/dace/frontend/python/interface.py b/dace/frontend/python/interface.py index 69e650beaa..ecd0b164d6 100644 --- a/dace/frontend/python/interface.py +++ b/dace/frontend/python/interface.py @@ -42,6 +42,7 @@ def program(f: F, recreate_sdfg: bool = True, regenerate_code: bool = True, recompile: bool = True, + distributed_compilation: bool = False, constant_functions=False, **kwargs) -> Callable[..., parser.DaceProgram]: """ @@ -60,6 +61,9 @@ def program(f: F, it. :param recompile: Whether to recompile the code. If False, the library in the build folder will be used if it exists, without recompiling it. + :param distributed_compilation: Whether to compile the code from rank 0, and broadcast it to all the other ranks. + If False, every rank performs the compilation. In this case, make sure to check the ``cache`` configuration entry + such that no caching or clashes can happen between different MPI processes. :param constant_functions: If True, assumes all external functions that do not depend on internal variables are constant. This will hardcode their return values into the @@ -78,7 +82,8 @@ def program(f: F, constant_functions, recreate_sdfg=recreate_sdfg, regenerate_code=regenerate_code, - recompile=recompile) + recompile=recompile, + distributed_compilation=distributed_compilation) function = program diff --git a/dace/frontend/python/parser.py b/dace/frontend/python/parser.py index 14377c4fe2..34cb8fb4ad 100644 --- a/dace/frontend/python/parser.py +++ b/dace/frontend/python/parser.py @@ -151,6 +151,7 @@ def __init__(self, recreate_sdfg: bool = True, regenerate_code: bool = True, recompile: bool = True, + distributed_compilation: bool = False, method: bool = False): from dace.codegen import compiled_sdfg # Avoid import loops @@ -171,6 +172,7 @@ def __init__(self, self.recreate_sdfg = recreate_sdfg self.regenerate_code = regenerate_code self.recompile = recompile + self.distributed_compilation = distributed_compilation self.global_vars = _get_locals_and_globals(f) self.signature = inspect.signature(f) @@ -449,12 +451,12 @@ def __call__(self, *args, **kwargs): sdfg.simplify() with hooks.invoke_sdfg_call_hooks(sdfg) as sdfg: - if not mpi4py: + if self.distributed_compilation and mpi4py: + binaryobj = distributed_compile(sdfg, mpi4py.MPI.COMM_WORLD, validate=self.validate) + else: # Compile SDFG (note: this is done after symbol inference due to shape # altering transformations such as Vectorization) binaryobj = sdfg.compile(validate=self.validate) - else: - binaryobj = distributed_compile(sdfg, mpi4py.MPI.COMM_WORLD, validate=self.validate) # Recreate key and add to cache cachekey = self._cache.make_key(argtypes, specified, self.closure_array_keys, self.closure_constant_keys, From 5d4dfe9e3b21bb80bdad40d86fdfd284cfbf57fe Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 25 Apr 2024 11:04:48 -0700 Subject: [PATCH 05/76] Bump idna from 3.4 to 3.7 (#1557) Bumps [idna](https://github.com/kjd/idna) from 3.4 to 3.7.
Release notes

Sourced from idna's releases.

v3.7

What's Changed

  • Fix issue where specially crafted inputs to encode() could take exceptionally long amount of time to process. [CVE-2024-3651]

Thanks to Guido Vranken for reporting the issue.

Full Changelog: https://github.com/kjd/idna/compare/v3.6...v3.7

Changelog

Sourced from idna's changelog.

3.7 (2024-04-11) ++++++++++++++++

  • Fix issue where specially crafted inputs to encode() could take exceptionally long amount of time to process. [CVE-2024-3651]

Thanks to Guido Vranken for reporting the issue.

3.6 (2023-11-25) ++++++++++++++++

  • Fix regression to include tests in source distribution.

3.5 (2023-11-24) ++++++++++++++++

  • Update to Unicode 15.1.0
  • String codec name is now "idna2008" as overriding the system codec "idna" was not working.
  • Fix typing error for codec encoding
  • "setup.cfg" has been added for this release due to some downstream lack of adherence to PEP 517. Should be removed in a future release so please prepare accordingly.
  • Removed reliance on a symlink for the "idna-data" tool to comport with PEP 517 and the Python Packaging User Guide for sdist archives.
  • Added security reporting protocol for project

Thanks Jon Ribbens, Diogo Teles Sant'Anna, Wu Tingfeng for contributions to this release.

Commits
  • 1d365e1 Release v3.7
  • c1b3154 Merge pull request #172 from kjd/optimize-contextj
  • 0394ec7 Merge branch 'master' into optimize-contextj
  • cd58a23 Merge pull request #152 from elliotwutingfeng/dev
  • 5beb28b More efficient resolution of joiner contexts
  • 1b12148 Update ossf/scorecard-action to v2.3.1
  • d516b87 Update Github actions/checkout to v4
  • c095c75 Merge branch 'master' into dev
  • 60a0a4c Fix typo in GitHub Actions workflow key
  • 5918a0e Merge branch 'master' into dev
  • Additional commits viewable in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=idna&package-manager=pip&previous-version=3.4&new-version=3.7)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself) You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/spcl/dace/network/alerts).
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index f06f3421cd..1186b87cea 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,7 +5,7 @@ charset-normalizer==3.1.0 click==8.1.3 dill==0.3.6 fparser==0.1.3 -idna==3.4 +idna==3.7 importlib-metadata==6.6.0 Jinja2==3.1.3 MarkupSafe==2.1.3 From f01b937b50e54c53db07d233a207622d69e91dd0 Mon Sep 17 00:00:00 2001 From: Tal Ben-Nun Date: Thu, 25 Apr 2024 23:59:55 -0700 Subject: [PATCH 06/76] Fix infinite loops in memlet path when a scope cycle is added (#1559) Fixes #1558 --- dace/sdfg/state.py | 15 +++++++++++++++ tests/sdfg/cycles_test.py | 19 +++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/dace/sdfg/state.py b/dace/sdfg/state.py index 0a93d54c2c..cafea3d754 100644 --- a/dace/sdfg/state.py +++ b/dace/sdfg/state.py @@ -389,7 +389,9 @@ def memlet_path(self, edge: MultiConnectorEdge[mm.Memlet]) -> List[MultiConnecto # Prepend incoming edges until reaching the source node curedge = edge + visited = set() while not isinstance(curedge.src, (nd.CodeNode, nd.AccessNode)): + visited.add(curedge) # Trace through scopes using OUT_# -> IN_# if isinstance(curedge.src, (nd.EntryNode, nd.ExitNode)): if curedge.src_conn is None: @@ -398,10 +400,14 @@ def memlet_path(self, edge: MultiConnectorEdge[mm.Memlet]) -> List[MultiConnecto next_edge = next(e for e in state.in_edges(curedge.src) if e.dst_conn == "IN_" + curedge.src_conn[4:]) result.insert(0, next_edge) curedge = next_edge + if curedge in visited: + raise ValueError('Cycle encountered while reading memlet path') # Append outgoing edges until reaching the sink node curedge = edge + visited.clear() while not isinstance(curedge.dst, (nd.CodeNode, nd.AccessNode)): + visited.add(curedge) # Trace through scope entry using IN_# -> OUT_# if isinstance(curedge.dst, (nd.EntryNode, nd.ExitNode)): if curedge.dst_conn is None: @@ -411,6 +417,8 @@ def memlet_path(self, edge: MultiConnectorEdge[mm.Memlet]) -> List[MultiConnecto next_edge = next(e for e in state.out_edges(curedge.dst) if e.src_conn == "OUT_" + curedge.dst_conn[3:]) result.append(next_edge) curedge = next_edge + if curedge in visited: + raise ValueError('Cycle encountered while reading memlet path') return result @@ -434,16 +442,23 @@ def memlet_tree(self, edge: MultiConnectorEdge) -> mm.MemletTree: # Find tree root curedge = edge + visited = set() if propagate_forward: while (isinstance(curedge.src, nd.EntryNode) and curedge.src_conn is not None): + visited.add(curedge) assert curedge.src_conn.startswith('OUT_') cname = curedge.src_conn[4:] curedge = next(e for e in state.in_edges(curedge.src) if e.dst_conn == 'IN_%s' % cname) + if curedge in visited: + raise ValueError('Cycle encountered while reading memlet path') elif propagate_backward: while (isinstance(curedge.dst, nd.ExitNode) and curedge.dst_conn is not None): + visited.add(curedge) assert curedge.dst_conn.startswith('IN_') cname = curedge.dst_conn[3:] curedge = next(e for e in state.out_edges(curedge.dst) if e.src_conn == 'OUT_%s' % cname) + if curedge in visited: + raise ValueError('Cycle encountered while reading memlet path') tree_root = mm.MemletTree(curedge, downwards=propagate_forward) # Collect children (recursively) diff --git a/tests/sdfg/cycles_test.py b/tests/sdfg/cycles_test.py index 5e94db2eb4..480392ab2d 100644 --- a/tests/sdfg/cycles_test.py +++ b/tests/sdfg/cycles_test.py @@ -1,3 +1,4 @@ +# Copyright 2019-2024 ETH Zurich and the DaCe authors. All rights reserved. import pytest import dace @@ -13,3 +14,21 @@ def test_cycles(): state.add_edge(access, None, access, None, dace.Memlet.simple("A", "0")) sdfg.validate() + + +def test_cycles_memlet_path(): + with pytest.raises(ValueError, match="Found cycles.*"): + sdfg = dace.SDFG("foo") + state = sdfg.add_state() + sdfg.add_array("bla", shape=(10, ), dtype=dace.float32) + mentry_3, _ = state.add_map("map_3", dict(i="0:9")) + mentry_3.add_in_connector("IN_0") + mentry_3.add_out_connector("OUT_0") + state.add_edge(mentry_3, "OUT_0", mentry_3, "IN_0", dace.Memlet(data="bla", subset='0:9')) + + sdfg.validate() + + +if __name__ == '__main__': + test_cycles() + test_cycles_memlet_path() From a0422c9dcea3d62b40da6bb7c20dcaaf027b34d0 Mon Sep 17 00:00:00 2001 From: BenWeber42 Date: Thu, 2 May 2024 20:32:10 +0200 Subject: [PATCH 07/76] Changed default of serialize_all_fields to False (#1564) For the 0.16 release, we want to introduce the change to the default of `serialize_all_fields` to `False`. This reverts PR "Changed default of serialize_all_fields to True #1470". This reverts commit bfe4163f1297e049921c40e2a1bcb208fccc076b. --- dace/config_schema.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dace/config_schema.yml b/dace/config_schema.yml index 737862cacc..b26e96e920 100644 --- a/dace/config_schema.yml +++ b/dace/config_schema.yml @@ -945,10 +945,10 @@ required: serialize_all_fields: type: bool - default: true + default: false title: Serialize all unmodified fields in SDFG files description: > - If False, saving an SDFG keeps only the modified non-default properties. If True, + If False (default), saving an SDFG keeps only the modified non-default properties. If True, saves all fields. ############################################# From 9e1cb4a8f8638ed961568d7374f9eac67b7ac986 Mon Sep 17 00:00:00 2001 From: alexnick83 <31545860+alexnick83@users.noreply.github.com> Date: Fri, 3 May 2024 01:57:41 +0200 Subject: [PATCH 08/76] Adds support for ArrayView to the Python Frontend (#1565) The refactoring of Views in PR #1504 led to the creation of the ArrayView type. This PR addresses an issue in the Python ProgramVisitor, where ArrayViews are not recognized properly as Views (of Arrays), leading to a NotImplementedError. The fix is simple: when checking if a container is an Array or a View (of an Array), instead of making a direct equality comparison to Array or View, a subclass comparison against Array is performed. The latter returns true if the container is an Array or any Array subclass, including ArrayViews. --- dace/frontend/python/newast.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dace/frontend/python/newast.py b/dace/frontend/python/newast.py index 3d2ec5c09d..fda2bd2e23 100644 --- a/dace/frontend/python/newast.py +++ b/dace/frontend/python/newast.py @@ -823,7 +823,7 @@ def _add_access( arr_type = type(parent_array) if arr_type == data.Scalar: self.sdfg.add_scalar(var_name, dtype) - elif arr_type in (data.Array, data.View): + elif issubclass(arr_type, data.Array): self.sdfg.add_array(var_name, shape, dtype, strides=strides) elif arr_type == data.Stream: self.sdfg.add_stream(var_name, dtype) @@ -3116,7 +3116,7 @@ def _add_access( arr_type = data.Scalar if arr_type == data.Scalar: self.sdfg.add_scalar(var_name, dtype) - elif arr_type in (data.Array, data.View): + elif issubclass(arr_type, data.Array): if non_squeezed: strides = [parent_array.strides[d] for d in non_squeezed] else: From 91f3f1f7b6d4aef1c19fbdcedd68668fc79d351c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20M=C3=BCller?= <147368808+philip-paul-mueller@users.noreply.github.com> Date: Mon, 6 May 2024 15:04:46 +0200 Subject: [PATCH 09/76] It is now possible to suppress output in `view()` (#1566) Beside making it possible to suppress the output of `sdfv.view`, this commit also changed the default behaviour of `SDFG.view()` by no longer outputting in which file it was saved. --- dace/cli/sdfv.py | 9 ++++++--- dace/sdfg/sdfg.py | 5 +++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/dace/cli/sdfv.py b/dace/cli/sdfv.py index f503775814..49255a1e7e 100644 --- a/dace/cli/sdfv.py +++ b/dace/cli/sdfv.py @@ -23,7 +23,7 @@ class NewCls(cls): return NewCls -def view(sdfg: dace.SDFG, filename: Optional[Union[str, int]] = None): +def view(sdfg: dace.SDFG, filename: Optional[Union[str, int]] = None, verbose: bool = True): """ View an sdfg in the system's HTML viewer @@ -33,6 +33,7 @@ def view(sdfg: dace.SDFG, filename: Optional[Union[str, int]] = None): the generated HTML and related sources will be served using a basic web server on that port, blocking the current thread. + :param verbose: Be verbose. """ # If vscode is open, try to open it inside vscode if filename is None: @@ -71,7 +72,8 @@ def view(sdfg: dace.SDFG, filename: Optional[Union[str, int]] = None): with open(html_filename, "w") as f: f.write(html) - print("File saved at %s" % html_filename) + if(verbose): + print("File saved at %s" % html_filename) if fd is not None: os.close(fd) @@ -83,7 +85,8 @@ def view(sdfg: dace.SDFG, filename: Optional[Union[str, int]] = None): # start the web server handler = partialclass(http.server.SimpleHTTPRequestHandler, directory=dirname) httpd = http.server.HTTPServer(('localhost', filename), handler) - print(f"Serving at localhost:{filename}, press enter to stop...") + if(verbose): + print(f"Serving at localhost:{filename}, press enter to stop...") # start the server in a different thread def serve(): diff --git a/dace/sdfg/sdfg.py b/dace/sdfg/sdfg.py index 5017a6ff86..0b72924630 100644 --- a/dace/sdfg/sdfg.py +++ b/dace/sdfg/sdfg.py @@ -1547,14 +1547,15 @@ def save(self, filename: str, use_pickle=False, hash=None, exception=None, compr return None - def view(self, filename=None): + def view(self, filename=None, verbose=False): """ View this sdfg in the system's HTML viewer :param filename: the filename to write the HTML to. If `None`, a temporary file will be created. + :param verbose: Be verbose, `False` by default. """ from dace.cli.sdfv import view - view(self, filename=filename) + view(self, filename=filename, verbose=verbose) @staticmethod def _from_file(fp: BinaryIO) -> 'SDFG': From e60652c565e8c6362fc3fcf77e613b5d0964148a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 8 May 2024 11:38:11 +0200 Subject: [PATCH 10/76] Bump jinja2 from 3.1.3 to 3.1.4 (#1569) Bumps [jinja2](https://github.com/pallets/jinja) from 3.1.3 to 3.1.4.
Release notes

Sourced from jinja2's releases.

3.1.4

This is the Jinja 3.1.4 security release, which fixes security issues and bugs but does not otherwise change behavior and should not result in breaking changes.

PyPI: https://pypi.org/project/Jinja2/3.1.4/ Changes: https://jinja.palletsprojects.com/en/3.1.x/changes/#version-3-1-4

  • The xmlattr filter does not allow keys with / solidus, > greater-than sign, or = equals sign, in addition to disallowing spaces. Regardless of any validation done by Jinja, user input should never be used as keys to this filter, or must be separately validated first. GHSA-h75v-3vvj-5mfj
Changelog

Sourced from jinja2's changelog.

Version 3.1.4

Released 2024-05-05

  • The xmlattr filter does not allow keys with / solidus, > greater-than sign, or = equals sign, in addition to disallowing spaces. Regardless of any validation done by Jinja, user input should never be used as keys to this filter, or must be separately validated first. :ghsa:h75v-3vvj-5mfj
Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=jinja2&package-manager=pip&previous-version=3.1.3&new-version=3.1.4)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself) You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/spcl/dace/network/alerts).
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 1186b87cea..e98e33fe74 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,7 +7,7 @@ dill==0.3.6 fparser==0.1.3 idna==3.7 importlib-metadata==6.6.0 -Jinja2==3.1.3 +Jinja2==3.1.4 MarkupSafe==2.1.3 mpmath==1.3.0 networkx==3.1 From 5339c71f1f775083ad508fd4100363263b172542 Mon Sep 17 00:00:00 2001 From: alexnick83 <31545860+alexnick83@users.noreply.github.com> Date: Wed, 8 May 2024 15:04:14 +0200 Subject: [PATCH 11/76] Correction in the docstring of the SDFG class's init method (#1571) This PR corrects the `SDFG.__init__` docstring to refer to the correct parameter `constants` (compile-time constants) instead of `symbols` (scalars that are immutable in the SDFG scope). See also #1563 --- dace/sdfg/sdfg.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dace/sdfg/sdfg.py b/dace/sdfg/sdfg.py index 0b72924630..f10e728607 100644 --- a/dace/sdfg/sdfg.py +++ b/dace/sdfg/sdfg.py @@ -461,8 +461,8 @@ def __init__(self, :param name: Name for the SDFG (also used as the filename for the compiled shared library). - :param symbols: Additional dictionary of symbol names -> types that the SDFG - defines, apart from symbolic data sizes. + :param constants: Additional dictionary of compile-time constants + {name (str): tuple(type (dace.data.Data), value (Any))}. :param propagate: If False, disables automatic propagation of memlet subsets from scopes outwards. Saves processing time but disallows certain From 63adbd780cd3241c05d8db70462ea4da290ffb66 Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Wed, 8 May 2024 14:42:39 -0400 Subject: [PATCH 12/76] Fix Subscript literal evaluation for List (#1570) Looking at: https://github.com/spcl/dace/issues/1568 The code was blindly calling down to a `_visit_potential_constant` which is written for single element rather collection of them. Unroll the list, like the `dict` is done in the `if` above. --- dace/frontend/python/preprocessing.py | 18 +++++++++-- tests/python_frontend/unroll_test.py | 46 +++++++++++++++++++++++++++ 2 files changed, 61 insertions(+), 3 deletions(-) diff --git a/dace/frontend/python/preprocessing.py b/dace/frontend/python/preprocessing.py index 90ef506bcd..420346ca88 100644 --- a/dace/frontend/python/preprocessing.py +++ b/dace/frontend/python/preprocessing.py @@ -752,7 +752,7 @@ def visit_Subscript(self, node: ast.Subscript) -> Any: return self.generic_visit(node) # Then query for the right value - if isinstance(node.value, ast.Dict): + if isinstance(node.value, ast.Dict): # Dict for k, v in zip(node.value.keys, node.value.values): try: gkey = astutils.evalnode(k, self.globals) @@ -760,8 +760,20 @@ def visit_Subscript(self, node: ast.Subscript) -> Any: continue if gkey == gslice: return self._visit_potential_constant(v, True) - else: # List or Tuple - return self._visit_potential_constant(node.value.elts[gslice], True) + elif isinstance(node.value, (ast.List, ast.Tuple)): # List & Tuple + # Loop over the list if slicing makes it a list + if isinstance(node.value.elts[gslice], List): + visited_list = astutils.copy_tree(node.value) + visited_list.elts.clear() + for v in node.value.elts[gslice]: + visited_cst = self._visit_potential_constant(v, True) + visited_list.elts.append(visited_cst) + node.value = visited_list + return node + else: + return self._visit_potential_constant(node.value.elts[gslice], True) + else: # Catch-all + return self._visit_potential_constant(node, True) return self._visit_potential_constant(node, True) diff --git a/tests/python_frontend/unroll_test.py b/tests/python_frontend/unroll_test.py index 98c81156a0..bf2b1e7c91 100644 --- a/tests/python_frontend/unroll_test.py +++ b/tests/python_frontend/unroll_test.py @@ -169,6 +169,52 @@ def tounroll(A: dace.float64[3]): assert np.allclose(a, np.array([1, 2, 3])) +def test_list_global_enumerate(): + tracer_variables = ["vapor", "rain", "nope"] + + @dace.program + def enumerate_parsing( + A, + tracers: dace.compiletime, # Dict[str, np.float64] + ): + for i, q in enumerate(tracer_variables[0:2]): + tracers[q][:] = A # type:ignore + + a = np.ones([3]) + q = { + "vapor": np.zeros([3]), + "rain": np.zeros([3]), + "nope": np.zeros([3]), + } + enumerate_parsing(a, q) + assert np.allclose(q["vapor"], np.array([1, 1, 1])) + assert np.allclose(q["rain"], np.array([1, 1, 1])) + assert np.allclose(q["nope"], np.array([0, 0, 0])) + + +def test_tuple_global_enumerate(): + tracer_variables = ("vapor", "rain", "nope") + + @dace.program + def enumerate_parsing( + A, + tracers: dace.compiletime, # Dict[str, np.float64] + ): + for i, q in enumerate(tracer_variables[0:2]): + tracers[q][:] = A # type:ignore + + a = np.ones([3]) + q = { + "vapor": np.zeros([3]), + "rain": np.zeros([3]), + "nope": np.zeros([3]), + } + enumerate_parsing(a, q) + assert np.allclose(q["vapor"], np.array([1, 1, 1])) + assert np.allclose(q["rain"], np.array([1, 1, 1])) + assert np.allclose(q["nope"], np.array([0, 0, 0])) + + def test_tuple_elements_zip(): a1 = [2, 3, 4] a2 = (4, 5, 6) From ee5a6dfe695f329c3882105b087f3563a0c80b81 Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Thu, 9 May 2024 17:07:10 -0400 Subject: [PATCH 13/76] NOAA/NASA pyFV3 CI on every commit (#1478) Follow up of #1460 - [x] Fixed the `ci` script (including `git checkout issues` around selecting the correct `dace`) - [x] Move `D_SW` to execute only on rank 0 to avoid rebuild - [x] Swapped Rieman Solver on C-grid for D-grid for better coverage ~~WARNING: this PR is blocked by #1477~~ ~~WARNING: this PR is blocked by #1568~~ --------- Co-authored-by: Tal Ben-Nun --- .github/workflows/pace-build-ci.yml | 75 ----------------------- .github/workflows/pyFV3-ci.yml | 94 +++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+), 75 deletions(-) delete mode 100644 .github/workflows/pace-build-ci.yml create mode 100644 .github/workflows/pyFV3-ci.yml diff --git a/.github/workflows/pace-build-ci.yml b/.github/workflows/pace-build-ci.yml deleted file mode 100644 index 672c891a55..0000000000 --- a/.github/workflows/pace-build-ci.yml +++ /dev/null @@ -1,75 +0,0 @@ -name: NASA/NOAA Pace repository build test - -on: - workflow_dispatch: - -defaults: - run: - shell: bash - -jobs: - build_pace: - runs-on: ubuntu-latest - strategy: - matrix: - python-version: [3.8.10] - - steps: - - uses: actions/checkout@v2 - with: - repository: 'git@github.com:GEOS-ESM/pace.git' - ref: 'ci/DaCe' - submodules: 'recursive' - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - - name: Install dependencies & pull correct DaCe - run: | - cd pace - python -m pip install --upgrade pip wheel setuptools - cd external/dace - git checkout ${{ github.sha }} - cd ../.. - pip install -e external/gt4py - pip install -e external/dace - pip install -r requirements_dev.txt - - name: Download data - run: | - cd pace - mkdir -p test_data - cd test_data - wget https://portal.nccs.nasa.gov/datashare/astg/smt/pace-regression-data/8.1.3_c12_6_ranks_standard.D_SW.tar.gz - tar -xzvf 8.1.3_c12_6_ranks_standard.D_SW.tar.gz - wget https://portal.nccs.nasa.gov/datashare/astg/smt/pace-regression-data/8.1.3_c12_6_ranks_standard.RiemSolverC.tar.gz - tar -xzvf 8.1.3_c12_6_ranks_standard.RiemSolverC.tar.gz - wget https://portal.nccs.nasa.gov/datashare/astg/smt/pace-regression-data/8.1.3_c12_6_ranks_standard.Remapping.tar.gz - tar -xzvf 8.1.3_c12_6_ranks_standard.Remapping.tar.gz - cd ../.. - - name: "Regression test: Riemman Solver on C-grid" - run: | - export FV3_DACEMODE=BuildAndRun - export PACE_CONSTANTS=GFS - cd pace - pytest -v -s --data_path=./test_data/8.1.3/c12_6ranks_standard/dycore \ - --backend=dace:cpu --which_modules=Riem_Solver_C \ - --threshold_overrides_file=./fv3core/tests/savepoint/translate/overrides/standard.yaml \ - ./fv3core/tests/savepoint - - name: "Regression test: D-grid shallow water lagrangian dynamics (D_SW)" - run: | - export FV3_DACEMODE=BuildAndRun - export PACE_CONSTANTS=GFS - cd pace - pytest -v -s --data_path=./test_data/8.1.3/c12_6ranks_standard/dycore \ - --backend=dace:cpu --which_modules=D_SW \ - --threshold_overrides_file=./fv3core/tests/savepoint/translate/overrides/standard.yaml \ - ./fv3core/tests/savepoint - - name: "Regression test: Remapping (on rank 0 only)" - run: | - export FV3_DACEMODE=BuildAndRun - export PACE_CONSTANTS=GFS - cd pace - pytest -v -s --data_path=./test_data/8.1.3/c12_6ranks_standard/dycore \ - --backend=dace:cpu --which_modules=Remapping --which_rank=0 \ - --threshold_overrides_file=./fv3core/tests/savepoint/translate/overrides/standard.yaml \ - ./fv3core/tests/savepoint diff --git a/.github/workflows/pyFV3-ci.yml b/.github/workflows/pyFV3-ci.yml new file mode 100644 index 0000000000..f50f424bb8 --- /dev/null +++ b/.github/workflows/pyFV3-ci.yml @@ -0,0 +1,94 @@ +name: NASA/NOAA pyFV3 repository build test + +on: + push: + branches: [ master, ci-fix ] + pull_request: + branches: [ master, ci-fix ] + +defaults: + run: + shell: bash + +jobs: + build_and_validate_pyFV3: + if: "!contains(github.event.pull_request.labels.*.name, 'no-ci')" + runs-on: ubuntu-latest + strategy: + matrix: + python-version: [3.11.7] + + steps: + - uses: actions/checkout@v2 + with: + repository: 'NOAA-GFDL/PyFV3' + ref: 'ci/DaCe' + submodules: 'recursive' + path: 'pyFV3' + - uses: actions/checkout@v2 + with: + path: 'dace' + submodules: 'recursive' + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install library dependencies + run: | + sudo apt-get install libopenmpi-dev libboost-all-dev gcc-13 + sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-13 13 + gcc --version + # Because Github doesn't allow us to do a git checkout in code + # we use a trick to checkout DaCe first (not using the external submodule) + # install the full suite via requirements_dev, then re-install the correct DaCe + - name: Install Python packages + run: | + python -m pip install --upgrade pip wheel setuptools + pip install -e ./pyFV3[develop] + pip install -e ./dace + - name: Download data + run: | + cd pyFV3 + mkdir -p test_data + cd test_data + wget https://portal.nccs.nasa.gov/datashare/astg/smt/pace-regression-data/8.1.3_c12_6ranks_standard.D_SW.tar.gz + tar -xzvf 8.1.3_c12_6ranks_standard.D_SW.tar.gz + wget https://portal.nccs.nasa.gov/datashare/astg/smt/pace-regression-data/8.1.3_c12_6ranks_standard.RiemSolver3.tar.gz + tar -xzvf 8.1.3_c12_6ranks_standard.RiemSolver3.tar.gz + wget https://portal.nccs.nasa.gov/datashare/astg/smt/pace-regression-data/8.1.3_c12_6ranks_standard.Remapping.tar.gz + tar -xzvf 8.1.3_c12_6ranks_standard.Remapping.tar.gz + cd ../.. + # Clean up caches between run for stale un-expanded SDFG to trip the build system (NDSL side issue) + - name: "Regression test: Riemman Solver on D-grid (RiemSolver3)" + env: + FV3_DACEMODE: BuildAndRun + PACE_CONSTANTS: GFS + PACE_LOGLEVEL: Debug + run: | + pytest -v -s --data_path=./pyFV3/test_data/8.1.3/c12_6ranks_standard/dycore \ + --backend=dace:cpu --which_modules=Riem_Solver3 \ + --threshold_overrides_file=./pyFV3/tests/savepoint/translate/overrides/standard.yaml \ + ./pyFV3/tests/savepoint + rm -r ./.gt_cache_FV3_A + - name: "Regression test: Shallow water lagrangian dynamics on D-grid (D_SW) (on rank 0 only)" + env: + FV3_DACEMODE: BuildAndRun + PACE_CONSTANTS: GFS + PACE_LOGLEVEL: Debug + run: | + pytest -v -s --data_path=./pyFV3/test_data/8.1.3/c12_6ranks_standard/dycore \ + --backend=dace:cpu --which_modules=D_SW --which_rank=0 \ + --threshold_overrides_file=./pyFV3/tests/savepoint/translate/overrides/standard.yaml \ + ./pyFV3/tests/savepoint + rm -r ./.gt_cache_FV3_A + - name: "Regression test: Remapping (on rank 0 only)" + env: + FV3_DACEMODE: BuildAndRun + PACE_CONSTANTS: GFS + PACE_LOGLEVEL: Debug + run: | + pytest -v -s --data_path=./pyFV3/test_data/8.1.3/c12_6ranks_standard/dycore \ + --backend=dace:cpu --which_modules=Remapping --which_rank=0 \ + --threshold_overrides_file=./pyFV3/tests/savepoint/translate/overrides/standard.yaml \ + ./pyFV3/tests/savepoint + rm -r ./.gt_cache_FV3_A From b6fbd768d1fd250d5de1f35e7a78a7809f81c4d6 Mon Sep 17 00:00:00 2001 From: BenWeber42 Date: Tue, 28 May 2024 16:11:47 +0200 Subject: [PATCH 14/76] Removed GCC 13 from Pace CI (#1575) Fixes various smaller issues of the Pace CI: - Removed installation command for non-existent `gcc-13` package - Adds Pace CI to merge queue (see: https://docs.github.com/en/repositories/configuring-branches-and-merges-in-your-repository/configuring-pull-request-merges/managing-a-merge-queue#triggering-merge-group-checks-with-github-actions) - Adds `apt-get update` before `apt-get install` - Adds `-y` flag to `apt-get install` --- .github/workflows/pyFV3-ci.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pyFV3-ci.yml b/.github/workflows/pyFV3-ci.yml index f50f424bb8..2b98327381 100644 --- a/.github/workflows/pyFV3-ci.yml +++ b/.github/workflows/pyFV3-ci.yml @@ -5,6 +5,8 @@ on: branches: [ master, ci-fix ] pull_request: branches: [ master, ci-fix ] + merge_group: + branches: [ master, ci-fix ] defaults: run: @@ -35,8 +37,8 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install library dependencies run: | - sudo apt-get install libopenmpi-dev libboost-all-dev gcc-13 - sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-13 13 + sudo apt-get update + sudo apt-get install -y libopenmpi-dev libboost-all-dev gcc --version # Because Github doesn't allow us to do a git checkout in code # we use a trick to checkout DaCe first (not using the external submodule) From 72147393d3b39477723f7b368ca3dfcc9bf22e38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20M=C3=BCller?= <147368808+philip-paul-mueller@users.noreply.github.com> Date: Wed, 29 May 2024 09:39:33 +0200 Subject: [PATCH 15/76] `SDFG.save()` now performs tilde expansion. (#1578) I noticed that it would be cool if I could write `sdfg.save("~/tmp/faulty.sdfg")` and it would save it into my home directory, instead in a directory `./~` in some random directory. --- dace/sdfg/sdfg.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dace/sdfg/sdfg.py b/dace/sdfg/sdfg.py index f10e728607..b43ff2a7bf 100644 --- a/dace/sdfg/sdfg.py +++ b/dace/sdfg/sdfg.py @@ -1520,6 +1520,8 @@ def save(self, filename: str, use_pickle=False, hash=None, exception=None, compr :param compress: If True, uses gzip to compress the file upon saving. :return: The hash of the SDFG, or None if failed/not requested. """ + filename = os.path.expanduser(filename) + if compress: fileopen = lambda file, mode: gzip.open(file, mode + 't') else: From 0020c6c72d8b7c57a05b32ba2d8d7f8ab3f05b06 Mon Sep 17 00:00:00 2001 From: Philipp Schaad Date: Thu, 30 May 2024 16:46:04 +0200 Subject: [PATCH 16/76] Control Flow Block Constraints (#1476) This PR implements pre- and post-conditions on control flow blocks, as well as invariants. This is a feature that was discussed in the last DaCe Workshop of 2023 but has not been implemented yet since then. These invariants serve as helpers in analysis of the SDFG and may in the future be used to add runtime checks / assertions (optionally). A pass can be used to attempt automatic derivation of such constraints, or they can be manually set as properties. This PR adds the scaffolding for this and for now implements a single auto-constraint-derivation criterium, which states that parameters used to determine data container sizes are always `>= 0` and optionally assumed to always be `<= MAX_N`, where `MAX_N` is a configurable analysis pass parameter. --- dace/sdfg/state.py | 7 +++ dace/transformation/passes/analysis.py | 37 +++++++++++++- .../passes/sdfg_constraint_derivation_test.py | 49 +++++++++++++++++++ 3 files changed, 92 insertions(+), 1 deletion(-) create mode 100644 tests/passes/sdfg_constraint_derivation_test.py diff --git a/dace/sdfg/state.py b/dace/sdfg/state.py index cafea3d754..429fbbd690 100644 --- a/dace/sdfg/state.py +++ b/dace/sdfg/state.py @@ -1092,6 +1092,10 @@ class ControlFlowBlock(BlockGraphView, abc.ABC): is_collapsed = Property(dtype=bool, desc='Show this block as collapsed', default=False) + pre_conditions = DictProperty(key_type=str, value_type=list, desc='Pre-conditions for this block') + post_conditions = DictProperty(key_type=str, value_type=list, desc='Post-conditions for this block') + invariant_conditions = DictProperty(key_type=str, value_type=list, desc='Invariant conditions for this block') + _label: str def __init__(self, @@ -1104,6 +1108,9 @@ def __init__(self, self._sdfg = sdfg self._parent_graph = parent self.is_collapsed = False + self.pre_conditions = {} + self.post_conditions = {} + self.invariant_conditions = {} def set_default_lineinfo(self, lineinfo: dace.dtypes.DebugInfo): """ diff --git a/dace/transformation/passes/analysis.py b/dace/transformation/passes/analysis.py index cccfbf10a3..82cae6e470 100644 --- a/dace/transformation/passes/analysis.py +++ b/dace/transformation/passes/analysis.py @@ -2,7 +2,7 @@ from collections import defaultdict from dace.transformation import pass_pipeline as ppl -from dace import SDFG, SDFGState, properties, InterstateEdge, Memlet, data as dt +from dace import SDFG, SDFGState, properties, InterstateEdge, Memlet, data as dt, symbolic from dace.sdfg.graph import Edge from dace.sdfg import nodes as nd from dace.sdfg.analysis import cfg @@ -583,3 +583,38 @@ def apply_pass(self, top_sdfg: SDFG, _) -> Dict[int, Dict[str, Set[Union[Memlet, result[anode.data].add(e.data) top_result[sdfg.cfg_id] = result return top_result + + +@properties.make_properties +class DeriveSDFGConstraints(ppl.Pass): + + CATEGORY: str = 'Analysis' + + assume_max_data_size = properties.Property(dtype=int, default=None, allow_none=True, + desc='Assume that all data containers have no dimension larger than ' + + 'this value. If None, no assumption is made.') + + def modifies(self) -> ppl.Modifies: + return ppl.Modifies.Nothing + + def should_reapply(self, modified: ppl.Modifies) -> bool: + # If anything was modified, reapply + return modified & ppl.Modifies.Everything + + def _derive_parameter_datasize_constraints(self, sdfg: SDFG, invariants: Dict[str, Set[str]]) -> None: + handled = set() + for arr in sdfg.arrays.values(): + for dim in arr.shape: + if isinstance(dim, symbolic.symbol) and not dim in handled: + ds = str(dim) + if ds not in invariants: + invariants[ds] = set() + invariants[ds].add(f'{ds} > 0') + if self.assume_max_data_size is not None: + invariants[ds].add(f'{ds} <= {self.assume_max_data_size}') + handled.add(ds) + + def apply_pass(self, sdfg: SDFG, _) -> Tuple[Dict[str, Set[str]], Dict[str, Set[str]], Dict[str, Set[str]]]: + invariants: Dict[str, Set[str]] = {} + self._derive_parameter_datasize_constraints(sdfg, invariants) + return {}, invariants, {} diff --git a/tests/passes/sdfg_constraint_derivation_test.py b/tests/passes/sdfg_constraint_derivation_test.py new file mode 100644 index 0000000000..868548da7f --- /dev/null +++ b/tests/passes/sdfg_constraint_derivation_test.py @@ -0,0 +1,49 @@ +# Copyright 2019-2024 ETH Zurich and the DaCe authors. All rights reserved. + +import dace +from dace.transformation.passes.analysis import DeriveSDFGConstraints + + +def test_infer_data_dim_constraints_nomax(): + N = dace.symbol('N') + + @dace.program + def matmul(A: dace.float64[N, N], B: dace.float64[N, N], C: dace.float64[N, N]): + for i in range(N): + for j in range(N): + for k in range(N): + C[i, j] += A[i, k] * B[k, j] + + sdfg = matmul.to_sdfg() + + derive_pass = DeriveSDFGConstraints() + _, inv, _ = derive_pass.apply_pass(sdfg, {}) + + assert 'N' in inv + assert 'N > 0' in inv['N'] + + +def test_infer_data_dim_constraints_withmax(): + N = dace.symbol('N') + + @dace.program + def matmul(A: dace.float64[N, N], B: dace.float64[N, N], C: dace.float64[N, N]): + for i in range(N): + for j in range(N): + for k in range(N): + C[i, j] += A[i, k] * B[k, j] + + sdfg = matmul.to_sdfg() + + derive_pass = DeriveSDFGConstraints() + derive_pass.assume_max_data_size = 128 + _, inv, _ = derive_pass.apply_pass(sdfg, {}) + + assert 'N' in inv + assert 'N > 0' in inv['N'] + assert 'N <= 128' in inv['N'] + + +if __name__ == "__main__": + test_infer_data_dim_constraints_nomax() + test_infer_data_dim_constraints_withmax() From 8632b8babaa248c6e13d584cee21c2bfc8f9cbcf Mon Sep 17 00:00:00 2001 From: Philipp Schaad Date: Fri, 31 May 2024 01:09:20 +0200 Subject: [PATCH 17/76] Updated SDFV and Corresponding HTML Template (#1580) SDFV has been updated to include the distribution / compiled files again through a separate `dist` branch. This way pulling DaCe or installing it through pip does not require any additional builds to make use of the viewer. Additionally, paths have been corrected (the HTML template used by the `sdfv` CLI command was severely outdated). (Closes #100) --- dace/viewer/templates/sdfv.html | 127 ++++++++++++++++++++++---------- dace/viewer/webclient | 2 +- 2 files changed, 89 insertions(+), 40 deletions(-) diff --git a/dace/viewer/templates/sdfv.html b/dace/viewer/templates/sdfv.html index fd62ae5288..ea93b7e0d3 100644 --- a/dace/viewer/templates/sdfv.html +++ b/dace/viewer/templates/sdfv.html @@ -1,64 +1,113 @@ + + - + SDFV: SDFG Viewer - - + + - - +
+
+
+
+ -
- +
- - - - - - - - -
-
-
- -
-
- -
-
- -
-
- - -
-
-
-
-
- - -
-
-
-
- - -
-
-
- -
-
-
-
-
-
-
-
+{% block scripts_after %} -
- - - + +{% endblock %} diff --git a/dace/viewer/templates/sdfv_base.html b/dace/viewer/templates/sdfv_base.html new file mode 100644 index 0000000000..8d8361f20e --- /dev/null +++ b/dace/viewer/templates/sdfv_base.html @@ -0,0 +1,127 @@ + + + + + + + {% block head %} + + + {% block title %}SDFV: SDFG Viewer{% endblock %} + + {% block scripts_head %} + + + + + + {% endblock %} + {% endblock %} + + + + {% block content %} + +
+
+
+ +
+
+ +
+
+ +
+
+ + +
+
+
+
+
+ + +
+
+
+
+ + +
+
+
+ +
+
+
+
+
+
+
+
+ + {% endblock %} + {% block scripts_after %} + {% endblock %} + + + + diff --git a/dace/viewer/templates/sdfv_diff_view.html b/dace/viewer/templates/sdfv_diff_view.html new file mode 100644 index 0000000000..30c6f72f4a --- /dev/null +++ b/dace/viewer/templates/sdfv_diff_view.html @@ -0,0 +1,32 @@ +{% extends "sdfv_base.html" %} + +{% block scripts_after %} + +{% endblock %} + diff --git a/dace/viewer/webclient b/dace/viewer/webclient index ee843101e8..27174b1918 160000 --- a/dace/viewer/webclient +++ b/dace/viewer/webclient @@ -1 +1 @@ -Subproject commit ee843101e8b1b664153f74cee7280b7488ee43f2 +Subproject commit 27174b19180d6cf41e70a77a3a63bfef67ef6983 diff --git a/setup.py b/setup.py index 614d168c41..6e8635bdf6 100644 --- a/setup.py +++ b/setup.py @@ -86,6 +86,7 @@ 'dacelab = dace.cli.dacelab:main', 'sdfv = dace.cli.sdfv:main', 'sdfgcc = dace.cli.sdfgcc:main', + 'sdfg-diff = dace.cli.sdfg_diff:main', 'fcfd = dace.cli.fcdc:main', 'daceprof = dace.cli.daceprof:main', ], From 95c65beb55eb2113b70db6042d6c87703df94923 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20M=C3=BCller?= <147368808+philip-paul-mueller@users.noreply.github.com> Date: Sun, 15 Sep 2024 08:50:52 +0200 Subject: [PATCH 52/76] Made the `SDFGState.add_mapped_tasklet()` more convenient (#1655) Before if the user wanted to supply in and output nodes he had to present a `dict` that maps the data name to the access node. However, because of the rules of a valid SDFG the key of that `dict` was always the same as the data the access node this information is redundant. Thus this commit allows to only pass the access nodes. --- dace/sdfg/state.py | 13 +++++++++++-- tests/sdfg/state_test.py | 24 ++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/dace/sdfg/state.py b/dace/sdfg/state.py index 6dca3d186e..e8a8161747 100644 --- a/dace/sdfg/state.py +++ b/dace/sdfg/state.py @@ -1736,8 +1736,12 @@ def add_mapped_tasklet(self, language=dtypes.Language.Python, debuginfo=None, external_edges=False, - input_nodes: Optional[Dict[str, nd.AccessNode]] = None, - output_nodes: Optional[Dict[str, nd.AccessNode]] = None, + input_nodes: Optional[Union[Dict[str, nd.AccessNode], + List[nd.AccessNode], + Set[nd.AccessNode]]] = None, + output_nodes: Optional[Union[Dict[str, nd.AccessNode], + List[nd.AccessNode], + Set[nd.AccessNode]]] = None, propagate=True) -> Tuple[nd.Tasklet, nd.MapEntry, nd.MapExit]: """ Convenience function that adds a map entry, tasklet, map exit, and the respective edges to external arrays. @@ -1777,6 +1781,11 @@ def add_mapped_tasklet(self, tinputs = {k: None for k, v in inputs.items()} toutputs = {k: None for k, v in outputs.items()} + if isinstance(input_nodes, (list, set)): + input_nodes = {input_node.data: input_node for input_node in input_nodes} + if isinstance(output_nodes, (list, set)): + output_nodes = {output_node.data: output_node for output_node in output_nodes} + tasklet = nd.Tasklet( name, tinputs, diff --git a/tests/sdfg/state_test.py b/tests/sdfg/state_test.py index eb4e97ba66..7ba43ac4c0 100644 --- a/tests/sdfg/state_test.py +++ b/tests/sdfg/state_test.py @@ -58,7 +58,31 @@ def double_loop(arr: dace.float32[N]): sdfg.validate() +def test_add_mapped_tasklet(): + sdfg = dace.SDFG("test_add_mapped_tasklet") + state = sdfg.add_state(is_start_block=True) + + for name in "AB": + sdfg.add_array(name, (10, 10), dace.float64) + A, B = (state.add_access(name) for name in "AB") + + tsklt, me, mx = state.add_mapped_tasklet( + "test_map", + map_ranges={"i": "0:10", "j": "0:10"}, + inputs={"__in": dace.Memlet("A[i, j]")}, + code="__out = math.sin(__in)", + outputs={"__out": dace.Memlet("B[j, i]")}, + external_edges=True, + output_nodes=[B], + input_nodes={A}, + ) + sdfg.validate() + assert all(out_edge.dst is B for out_edge in state.out_edges(mx)) + assert all(in_edge.src is A for in_edge in state.in_edges(me)) + + if __name__ == '__main__': test_read_write_set() test_read_write_set_y_formation() test_deepcopy_state() + test_add_mapped_tasklet() From d31dd7b2990396ac6f76a5cbaa34c131372b54b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20M=C3=BCller?= <147368808+philip-paul-mueller@users.noreply.github.com> Date: Sun, 15 Sep 2024 08:58:17 +0200 Subject: [PATCH 53/76] Maps With Zero Parameters (#1649) Before a map without any parameter was considered not invalid, it would pass validation, but most likly compilation would fail (except it is a serial map). This PR adds: - Disallows such maps. - Fixes a small bug in the constructor of the `Map` object. - It updates `TrivialMapElimination` such that it correctly handles the case if it has dynamic map ranges. - It removes the `TrivialMapRangeElimination` transformation as it is redundant and contained a bug. --------- Co-authored-by: Tal Ben-Nun --- dace/sdfg/nodes.py | 9 +- dace/transformation/dataflow/__init__.py | 1 - .../dataflow/trivial_map_elimination.py | 106 ++++++++++++------ .../dataflow/trivial_map_range_elimination.py | 48 -------- tests/trivial_map_elimination_test.py | 67 ++++++++++- tests/trivial_map_range_elimination_test.py | 58 ---------- 6 files changed, 142 insertions(+), 147 deletions(-) delete mode 100644 dace/transformation/dataflow/trivial_map_range_elimination.py delete mode 100644 tests/trivial_map_range_elimination_test.py diff --git a/dace/sdfg/nodes.py b/dace/sdfg/nodes.py index 143b60a30f..409d30c57a 100644 --- a/dace/sdfg/nodes.py +++ b/dace/sdfg/nodes.py @@ -932,7 +932,7 @@ def __init__(self, self.label = label self.schedule = schedule self.unroll = unroll - self.collapse = 1 + self.collapse = collapse self.params = params self.range = ndrange self.debuginfo = debuginfo @@ -948,7 +948,12 @@ def __repr__(self): def validate(self, sdfg, state, node): if not dtypes.validate_name(self.label): - raise NameError('Invalid map name "%s"' % self.label) + raise NameError(f'Invalid map name "{self.label}"') + if self.get_param_num() == 0: + raise ValueError('There must be at least one parameter in a map.') + if self.get_param_num() != self.range.dims(): + raise ValueError(f'There are {self.get_param_num()} parameters but the range' + f' has {self.range.dims()} dimensions.') def get_param_num(self): """ Returns the number of map dimension parameters/symbols. """ diff --git a/dace/transformation/dataflow/__init__.py b/dace/transformation/dataflow/__init__.py index db4c928481..4ed7fd6283 100644 --- a/dace/transformation/dataflow/__init__.py +++ b/dace/transformation/dataflow/__init__.py @@ -12,7 +12,6 @@ from .map_fission import MapFission from .map_unroll import MapUnroll from .trivial_map_elimination import TrivialMapElimination -from .trivial_map_range_elimination import TrivialMapRangeElimination from .otf_map_fusion import OTFMapFusion # Data movement diff --git a/dace/transformation/dataflow/trivial_map_elimination.py b/dace/transformation/dataflow/trivial_map_elimination.py index 9387cfce23..69f445fd96 100644 --- a/dace/transformation/dataflow/trivial_map_elimination.py +++ b/dace/transformation/dataflow/trivial_map_elimination.py @@ -1,6 +1,7 @@ # Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved. """ Contains classes that implement the trivial-map-elimination transformation. """ +import dace from dace.sdfg import nodes from dace.sdfg import utils as sdutil from dace.transformation import transformation @@ -10,12 +11,17 @@ @make_properties class TrivialMapElimination(transformation.SingleStateTransformation): - """ Implements the Trivial-Map Elimination pattern. + """Implements the Trivial-Map Elimination pattern. - Trivial-Map Elimination removes all dimensions containing only one - element from a map. If this applies to all ranges the map is removed. - Example: Map[i=0:I,j=7] -> Map[i=0:I] - Example: Map[i=0 ,j=7] -> nothing + Trivial-Map Elimination removes all dimensions containing only one + element from a map. If this applies to all ranges the map is removed. + Example: Map[i=0:I,j=7] -> Map[i=0:I] + Example: Map[i=0 ,j=7] -> nothing + + There are some special cases: + - GPU maps are ignored as they are syntactically needed. + - If all map ranges are trivial and the map has dynamic map ranges, + the map is not removed, and one map parameter is retained. """ map_entry = transformation.PatternNode(nodes.MapEntry) @@ -26,52 +32,78 @@ def expressions(cls): def can_be_applied(self, graph, expr_index, sdfg, permissive=False): map_entry = self.map_entry - return any(r[0] == r[1] for r in map_entry.map.range) + + if map_entry.map.schedule in (dace.dtypes.GPU_SCHEDULES + [dace.ScheduleType.GPU_Default]): + return False + if not any(r[0] == r[1] for r in map_entry.map.range): + return False + if (map_entry.map.get_param_num()) == 1 and ( + any(not e.dst_conn.startswith("IN_") for e in graph.in_edges(map_entry) if not e.data.is_empty()) + ): + # There is only one map parameter and there are dynamic map ranges, this can not be resolved. + return False + return True def apply(self, graph, sdfg): map_entry = self.map_entry - map_exit = graph.exit_node(map_entry) remaining_ranges = [] remaining_params = [] + scope = graph.scope_subgraph(map_entry) for map_param, ranges in zip(map_entry.map.params, map_entry.map.range.ranges): map_from, map_to, _ = ranges if map_from == map_to: # Replace the map index variable with the value it obtained - scope = graph.scope_subgraph(map_entry) scope.replace(map_param, map_from) else: remaining_ranges.append(ranges) remaining_params.append(map_param) - map_entry.map.range.ranges = remaining_ranges + map_entry.map.range = remaining_ranges map_entry.map.params = remaining_params - if len(remaining_ranges) == 0: - # Redirect map entry's out edges - write_only_map = True - for edge in graph.out_edges(map_entry): - path = graph.memlet_path(edge) - index = path.index(edge) - - if not edge.data.is_empty(): - # Add an edge directly from the previous source connector to the destination - graph.add_edge(path[index - 1].src, path[index - 1].src_conn, edge.dst, edge.dst_conn, edge.data) - write_only_map = False - - # Redirect map exit's in edges. - for edge in graph.in_edges(map_exit): - path = graph.memlet_path(edge) - index = path.index(edge) - - # Add an edge directly from the source to the next destination connector - if len(path) > index + 1: - graph.add_edge(edge.src, edge.src_conn, path[index + 1].dst, path[index + 1].dst_conn, edge.data) - if write_only_map: - outer_exit = path[index+1].dst - outer_entry = graph.entry_node(outer_exit) - if outer_entry is not None: - graph.add_edge(outer_entry, None, edge.src, None, Memlet()) - - # Remove map - graph.remove_nodes_from([map_entry, map_exit]) + if len(remaining_params) != 0: + # There are still some dimensions left, so no need to remove the map + pass + + elif any(not e.dst_conn.startswith("IN_") for e in graph.in_edges(map_entry) if not e.data.is_empty()): + # The map has dynamic map ranges, thus we can not remove the map. + # Instead we add one dimension back to keep the SDFG valid. + map_entry.map.params = [map_param] + map_entry.map.range = [ranges] + + else: + # The map is empty and there are no dynamic map ranges. + self.remove_empty_map(graph, sdfg) + + def remove_empty_map(self, graph, sdfg): + map_entry = self.map_entry + map_exit = graph.exit_node(map_entry) + + # Redirect map entry's out edges + write_only_map = True + for edge in graph.out_edges(map_entry): + if edge.data.is_empty(): + continue + # Add an edge directly from the previous source connector to the destination + path = graph.memlet_path(edge) + index = path.index(edge) + graph.add_edge(path[index - 1].src, path[index - 1].src_conn, edge.dst, edge.dst_conn, edge.data) + write_only_map = False + + # Redirect map exit's in edges. + for edge in graph.in_edges(map_exit): + path = graph.memlet_path(edge) + index = path.index(edge) + + # Add an edge directly from the source to the next destination connector + if len(path) > index + 1: + graph.add_edge(edge.src, edge.src_conn, path[index + 1].dst, path[index + 1].dst_conn, edge.data) + if write_only_map: + outer_exit = path[index+1].dst + outer_entry = graph.entry_node(outer_exit) + if outer_entry is not None: + graph.add_edge(outer_entry, None, edge.src, None, Memlet()) + + # Remove map + graph.remove_nodes_from([map_entry, map_exit]) diff --git a/dace/transformation/dataflow/trivial_map_range_elimination.py b/dace/transformation/dataflow/trivial_map_range_elimination.py deleted file mode 100644 index 1de1f0de90..0000000000 --- a/dace/transformation/dataflow/trivial_map_range_elimination.py +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved. -""" Contains classes that implement the trivial map range elimination transformation. """ - -from dace import registry -from dace.sdfg import nodes -from dace.sdfg import utils as sdutil -from dace.transformation import transformation -from dace.properties import make_properties - - -@make_properties -class TrivialMapRangeElimination(transformation.SingleStateTransformation): - """ Implements the Trivial Map Range Elimination pattern. - - Trivial Map Range Elimination takes a multi-dimensional map with - a range containing one element and removes the corresponding dimension. - Example: Map[i=0:I,j=0] -> Map[i=0:I] - """ - - map_entry = transformation.PatternNode(nodes.MapEntry) - - @classmethod - def expressions(cls): - return [sdutil.node_path_graph(cls.map_entry)] - - def can_be_applied(self, graph, expr_index, sdfg, permissive=False): - map_entry = self.map_entry - if len(map_entry.map.range) <= 1: - return False # only acts on multi-dimensional maps - return any(frm == to for frm, to, _ in map_entry.map.range) - - def apply(self, graph, sdfg): - map_entry = self.map_entry - - remaining_ranges = [] - remaining_params = [] - for map_param, ranges in zip(map_entry.map.params, map_entry.map.range.ranges): - map_from, map_to, _ = ranges - if map_from == map_to: - # Replace the map index variable with the value it obtained - scope = graph.scope_subgraph(map_entry) - scope.replace(map_param, map_from) - else: - remaining_ranges.append(ranges) - remaining_params.append(map_param) - - map_entry.map.range.ranges = remaining_ranges - map_entry.map.params = remaining_params diff --git a/tests/trivial_map_elimination_test.py b/tests/trivial_map_elimination_test.py index 52ab4c1557..f159dc6e6a 100644 --- a/tests/trivial_map_elimination_test.py +++ b/tests/trivial_map_elimination_test.py @@ -52,6 +52,37 @@ def trivial_map_init_sdfg(): return sdfg +def trivial_map_with_dynamic_map_range_sdfg(): + sdfg = dace.SDFG("trivial_map_with_dynamic_map_range") + state = sdfg.add_state("state1", is_start_block=True) + + for name in "ABC": + sdfg.add_scalar(name, dtype=dace.float32, transient=False) + A, B, C = (state.add_access(name) for name in "ABC") + + _, me, _ = state.add_mapped_tasklet( + name="MAP", + map_ranges=[("__i", "0:1"), ("__j", "10:11")], + inputs={"__in": dace.Memlet("A[0]")}, + input_nodes={"A": A}, + code="__out = __in + 1", + outputs={"__out": dace.Memlet("B[0]")}, + output_nodes={"B": B}, + external_edges=True, + ) + state.add_edge( + C, + None, + me, + "dynamic_variable", + dace.Memlet("C[0]"), + ) + me.add_in_connector("dynamic_variable") + sdfg.validate() + + return sdfg + + def trivial_map_pseudo_init_sdfg(): sdfg = dace.SDFG('trivial_map_range_expanded') sdfg.add_array('A', [5, 1], dace.float64) @@ -160,7 +191,6 @@ def test_can_be_applied(self): count = graph.apply_transformations(TrivialMapElimination, validate=False, validate_all=False) graph.validate() - #graph.view() self.assertGreater(count, 0) @@ -188,5 +218,40 @@ def test_reconnects_edges(self): self.assertEqual(len(state.out_edges(map_entries[0])), 1) +class TrivialMapEliminationWithDynamicMapRangesTest(unittest.TestCase): + """ + Tests the case where the map has trivial ranges and dynamic map ranges. + """ + + def test_can_be_applied(self): + graph = trivial_map_with_dynamic_map_range_sdfg() + + count = graph.apply_transformations(TrivialMapElimination) + graph.validate() + + self.assertEqual(count, 1) + + + def test_removes_map(self): + graph = trivial_map_with_dynamic_map_range_sdfg() + + graph.apply_transformations(TrivialMapElimination) + + state = graph.nodes()[0] + map_entries = [n for n in state.nodes() if isinstance(n, dace.sdfg.nodes.MapEntry)] + self.assertEqual(len(map_entries), 1) + self.assertEqual(state.in_degree(map_entries[0]), 2) + self.assertTrue(any(e.dst_conn.startswith("IN_") for e in state.in_edges(map_entries[0]))) + self.assertTrue(any(not e.dst_conn.startswith("IN_") for e in state.in_edges(map_entries[0]))) + + def test_not_remove_dynamic_map_range(self): + graph = trivial_map_with_dynamic_map_range_sdfg() + + count1 = graph.apply_transformations(TrivialMapElimination) + self.assertEqual(count1, 1) + + count2 = graph.apply_transformations(TrivialMapElimination) + self.assertEqual(count2, 0) + if __name__ == '__main__': unittest.main() diff --git a/tests/trivial_map_range_elimination_test.py b/tests/trivial_map_range_elimination_test.py deleted file mode 100644 index 5be1e6a2bf..0000000000 --- a/tests/trivial_map_range_elimination_test.py +++ /dev/null @@ -1,58 +0,0 @@ -# Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved. -import dace -from dace.sdfg import nodes -from dace.transformation.dataflow import TrivialMapRangeElimination -import unittest - - -def trivial_map_range_sdfg(): - sdfg = dace.SDFG('trivial_map_range') - sdfg.add_array('A', [5], dace.float64) - sdfg.add_array('B', [5], dace.float64) - state = sdfg.add_state() - - # Nodes - read = state.add_read('A') - map_entry, map_exit = state.add_map('map', dict(i='0:1', j='0:5')) - tasklet = state.add_tasklet('tasklet', {'a'}, {'b'}, 'b = a') - write = state.add_write('B') - - # Edges - state.add_memlet_path(read, map_entry, tasklet, memlet=dace.Memlet.simple('A', '0'), dst_conn='a') - state.add_memlet_path(tasklet, map_exit, write, memlet=dace.Memlet.simple('B', 'i'), src_conn='b') - - sdfg.validate() - return sdfg - - -class TrivialMapRangeEliminationTest(unittest.TestCase): - def test_can_be_applied(self): - graph = trivial_map_range_sdfg() - - count = graph.apply_transformations(TrivialMapRangeElimination) - - self.assertGreater(count, 0) - - def test_transforms_map(self): - graph = trivial_map_range_sdfg() - - graph.apply_transformations(TrivialMapRangeElimination) - - state = graph.nodes()[0] - map_entry = [n for n in state.nodes() if isinstance(n, dace.sdfg.nodes.MapEntry)][0] - self.assertEqual(map_entry.map.params, ['j']) - self.assertEqual(map_entry.map.range, dace.subsets.Range([(0, 4, 1)])) - - def test_raplaces_map_params_in_scope(self): - graph = trivial_map_range_sdfg() - - graph.apply_transformations(TrivialMapRangeElimination) - - state = graph.nodes()[0] - map_exit = [n for n in state.nodes() if isinstance(n, dace.sdfg.nodes.MapExit)][0] - out_memlet = state.in_edges(map_exit)[0] - self.assertEqual(out_memlet.data.subset, dace.subsets.Range([(0, 0, 1)])) - - -if __name__ == '__main__': - unittest.main() From b0699ed83d04923adf4825eb8aacce72ecc9a376 Mon Sep 17 00:00:00 2001 From: Philipp Schaad Date: Mon, 16 Sep 2024 17:56:50 +0200 Subject: [PATCH 54/76] Update SDFV (#1656) --- dace/viewer/webclient | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dace/viewer/webclient b/dace/viewer/webclient index 27174b1918..c6b8fe4fd2 160000 --- a/dace/viewer/webclient +++ b/dace/viewer/webclient @@ -1 +1 @@ -Subproject commit 27174b19180d6cf41e70a77a3a63bfef67ef6983 +Subproject commit c6b8fe4fd2c3616b0480ead4c24d8012b91a31fd From 829687cef20fe901a720ec5ab8b1b3f78535649e Mon Sep 17 00:00:00 2001 From: Tal Ben-Nun Date: Tue, 17 Sep 2024 23:44:03 -0700 Subject: [PATCH 55/76] Bug in constant propagation with multiple constants (#1658) Propagating multiple constants symbolically at the same time is not a good idea if propagated symbol A can change a value that affects propagated symbol B. This PR adds a failing test and hopefully a fix. @luigifusco @phschaad --------- Co-authored-by: Luigi Fusco --- .../dataflow/stream_transient.py | 1 - .../passes/constant_propagation.py | 5 +- dace/transformation/transformation.py | 2 +- tests/passes/constant_propagation_test.py | 81 ++++++++++++++++++- 4 files changed, 82 insertions(+), 7 deletions(-) diff --git a/dace/transformation/dataflow/stream_transient.py b/dace/transformation/dataflow/stream_transient.py index b8c0f5820c..d4df0b6855 100644 --- a/dace/transformation/dataflow/stream_transient.py +++ b/dace/transformation/dataflow/stream_transient.py @@ -6,7 +6,6 @@ from dace.symbolic import symstr import warnings -from numpy.core.numeric import outer from dace import data, dtypes, registry, symbolic, subsets from dace.frontend.operations import detect_reduction_type from dace.properties import SymbolicProperty, make_properties, Property diff --git a/dace/transformation/passes/constant_propagation.py b/dace/transformation/passes/constant_propagation.py index 9006ae3c10..b2c3df3ce8 100644 --- a/dace/transformation/passes/constant_propagation.py +++ b/dace/transformation/passes/constant_propagation.py @@ -214,7 +214,10 @@ def _add_nested_datanames(name: str, desc: data.Structure): for aname, aval in constants.items(): # If something was assigned more than once (to a different value), it's not a constant - if aname in assignments and aval != assignments[aname]: + # If a symbol appearing in the replacing expression of a constant is modified, + # the constant is not valid anymore + if ((aname in assignments and aval != assignments[aname]) or + symbolic.free_symbols_and_functions(aval) & edge.data.assignments.keys()): assignments[aname] = _UnknownValue else: assignments[aname] = aval diff --git a/dace/transformation/transformation.py b/dace/transformation/transformation.py index d9cd798f0c..727ec5555b 100644 --- a/dace/transformation/transformation.py +++ b/dace/transformation/transformation.py @@ -1092,7 +1092,7 @@ def _subgraph_transformation_extract_sdfg_arg(*args) -> SDFG: raise TypeError('Unrecognized graph type "%s"' % type(subgraph).__name__) -def single_level_sdfg_only(cls: ppl.Pass): +def single_level_sdfg_only(cls: PassT) -> PassT: for function_name in ['apply_pass', 'apply_to']: _make_function_blocksafe(cls, function_name, lambda *args: args[1]) diff --git a/tests/passes/constant_propagation_test.py b/tests/passes/constant_propagation_test.py index 89b7e7ed5c..3420403b49 100644 --- a/tests/passes/constant_propagation_test.py +++ b/tests/passes/constant_propagation_test.py @@ -1,4 +1,4 @@ -# Copyright 2019-2022 ETH Zurich and the DaCe authors. All rights reserved. +# Copyright 2019-2024 ETH Zurich and the DaCe authors. All rights reserved. import pytest import dace @@ -359,8 +359,8 @@ def test_for_with_conditional_assignment(): sdfg.add_symbol('i', dace.int64) sdfg.add_symbol('check', dace.bool) sdfg.add_symbol('__tmp1', dace.bool) - sdfg.add_array('__return', {1,}, dace.bool) - sdfg.add_array('in_arr', {N,}, dace.bool) + sdfg.add_array('__return', {1}, dace.bool) + sdfg.add_array('in_arr', {N}, dace.bool) init = sdfg.add_state('init') guard = sdfg.add_state('guard') @@ -473,7 +473,7 @@ def test_for_with_external_init_nested_start_with_guard(): def test_skip_branch(): sdfg = dace.SDFG('skip_branch') sdfg.add_symbol('k', dace.int32) - sdfg.add_array('__return', (1,), dace.int32) + sdfg.add_array('__return', (1, ), dace.int32) init = sdfg.add_state('init') if_guard = sdfg.add_state('if_guard') if_state = sdfg.add_state('if_state') @@ -501,6 +501,78 @@ def test_skip_branch(): assert (rval_2[0] == 1) +def test_dependency_change(): + """ + Tests a regression in constant propagation that stems from a variable's + dependency being set in the same edge where the pre-propagated symbol was + also a right-hand side expression. The original SDFG is semantically-sound, + but the propagated one may update ``t`` to be ``t + `` + instead of the older ``irev``. + """ + + sdfg = dace.SDFG('tester') + sdfg.add_symbol('N', dace.int64) + sdfg.add_array('a', [1], dace.int64) + init = sdfg.add_state() + entry = sdfg.add_state('entry') + body = sdfg.add_state('body') + body2 = sdfg.add_state('body2') + exiting = sdfg.add_state('exiting') + latch = sdfg.add_state('latch') + final = sdfg.add_state('final') + + sdfg.add_edge(init, entry, dace.InterstateEdge(assignments=dict(i='0', t='0', irev='2500'))) + sdfg.add_edge(entry, body, dace.InterstateEdge()) + sdfg.add_edge( + body, body2, + dace.InterstateEdge(assignments=dict(t_next='(t + irev)', + irev_next='(irev + (- 1))', + i_next='i + 1'), )) + sdfg.add_edge( + body2, exiting, + dace.InterstateEdge(assignments=dict(cont='i_next == 2500'), )) + sdfg.add_edge(exiting, final, dace.InterstateEdge('cont')) + sdfg.add_edge(exiting, latch, dace.InterstateEdge('not cont', dict( + irev='irev_next', + i='i_next', + ))) + sdfg.add_edge(latch, body, dace.InterstateEdge(assignments=dict(t='t_next'))) + + t = body.add_tasklet('add', {'inp'}, {'out'}, 'out = inp + t') + body.add_edge(body.add_read('a'), None, t, 'inp', dace.Memlet('a[0]')) + body.add_edge(t, 'out', body.add_write('a'), None, dace.Memlet('a[0]')) + + ConstantPropagation().apply_pass(sdfg, {}) + + # Python code equivalent of the above SDFG + ref = 0 + + i = 0 + t = 0 + irev = 2500 + while True: + # body + ref += t + + # exiting state + t_next = t + irev + irev_next = (irev + (-1)) + i_next = i + 1 + cont = (i_next == 2500) + if not cont: + irev = irev_next + i = i_next + # + t = t_next + continue + else: + break + + a = np.zeros([1], np.int64) + sdfg(a=a) + assert a[0] == ref + + if __name__ == '__main__': test_simple_constants() test_nested_constants() @@ -519,3 +591,4 @@ def test_skip_branch(): test_for_with_external_init_nested() test_for_with_external_init_nested_start_with_guard() test_skip_branch() + test_dependency_change() From c2bacca575f88d88b0e4ea4659f6029ac7de833d Mon Sep 17 00:00:00 2001 From: BenWeber42 Date: Wed, 18 Sep 2024 16:19:33 +0200 Subject: [PATCH 56/76] Changed more `==` to `equal_valued` (#1633) Follow up to #1620. Replaced more `==` with `equal_valued`, since SymPy 1.13 changed the semantics of `==` for their symbolic expressions. --- dace/frontend/common/einsum.py | 14 +++++++------- dace/libraries/blas/nodes/gemm.py | 22 +++++++++++----------- tests/numpy/einsum_test.py | 10 +++++----- 3 files changed, 23 insertions(+), 23 deletions(-) diff --git a/dace/frontend/common/einsum.py b/dace/frontend/common/einsum.py index 18e40d57f0..e2cc2be88b 100644 --- a/dace/frontend/common/einsum.py +++ b/dace/frontend/common/einsum.py @@ -122,7 +122,7 @@ def create_batch_gemm_sdfg(dtype, strides, alpha, beta): BATCH, sAM, sAK, sAB, sBK, sBN, sBB, sCM, sCN, sCB = (symbolic.symbol(s) if symbolic.issymbolic( strides[s]) else strides[s] for s in ['BATCH', 'sAM', 'sAK', 'sAB', 'sBK', 'sBN', 'sBB', 'sCM', 'sCN', 'sCB']) - batched = strides['BATCH'] != 1 + batched = not symbolic.equal_valued(1, strides['BATCH']) _, xarr = sdfg.add_array('X', dtype=dtype, @@ -198,7 +198,7 @@ def _create_einsum_internal(sdfg: SDFG, raise ValueError('Invalid number of arrays for einsum expression') if init_output is None: - init_output = (beta != 1.0) + init_output = not symbolic.equal_valued(1, beta) if alpha is None: alpha = 1.0 @@ -284,7 +284,7 @@ def _create_einsum_internal(sdfg: SDFG, rnode = Reduce('einsum_reduce') rnode.axes = axes rnode.wcr = 'lambda a, b: a + b' - if beta == 0: + if symbolic.equal_valued(0, beta): rnode.identity = 0 c = state.add_write(output) @@ -301,7 +301,7 @@ def _create_einsum_internal(sdfg: SDFG, # Add state before this one to initialize the output value if to_init: init_state = sdfg.add_state_before(state) - if beta == 0.0: + if symbolic.equal_valued(0, beta): inputs = {} inputs_scalar = set() code = f'out_{output} = 0' @@ -321,12 +321,12 @@ def _create_einsum_internal(sdfg: SDFG, onode = init_state.add_write(output) init_state.add_edge(t, 'out_%s' % output, onode, None, Memlet.simple(output, '0')) - if beta != 0.0: + if not symbolic.equal_valued(0, beta): inode = init_state.add_read(output) init_state.add_edge(inode, None, t, 'inp_%s' % output, Memlet.simple(output, '0')) wcr = 'lambda a,b: a+b' if is_conflicted else None - alphacode = '' if alpha == 1.0 else f'{alpha} * ' + alphacode = '' if symbolic.equal_valued(1, alpha) else f'{alpha} * ' # Pure einsum map state.add_mapped_tasklet( 'einsum', {k: '0:%s' % v @@ -376,7 +376,7 @@ def _create_einsum_internal(sdfg: SDFG, strides['sCB'] = strides['sCM'] = strides['N'] # Transposed output, swap order - if strides['sCM'] == 1: + if symbolic.equal_valued(1, strides['sCM']): strides['sCM'], strides['sCN'] = strides['sCN'], strides['sCM'] strides['M'], strides['N'] = strides['N'], strides['M'] (strides['sAM'], strides['sAK'], strides['sAB'], strides['sBK'], strides['sBN'], strides['sBB']) = \ diff --git a/dace/libraries/blas/nodes/gemm.py b/dace/libraries/blas/nodes/gemm.py index 1f11c5dc17..ac8732d106 100644 --- a/dace/libraries/blas/nodes/gemm.py +++ b/dace/libraries/blas/nodes/gemm.py @@ -395,7 +395,7 @@ def expansion(cls, node, state, sdfg): nstate.add_edge(tasklet, '_conn_c', gc, None, dace.Memlet.from_array('_c_gpu', cdesc)) nstate.add_nedge(gc, c, dace.Memlet.from_array('_c', cdesc)) - if node.beta != 0.0: + if not equal_valued(0, node.beta): rc = nstate.add_read('_c') rgc = nstate.add_access('_c_gpu') tasklet.add_in_connector('_conn_cin') @@ -461,7 +461,7 @@ def expansion(node, state, sdfg): (_, adesc, ashape, astrides), (_, bdesc, bshape, bstrides), _ = _get_matmul_operands(node, state, sdfg) dtype = adesc.dtype.base_type - if node.beta != 0: + if not equal_valued(0, node.beta): raise NotImplementedError M = ashape[0] @@ -588,7 +588,7 @@ def expansion(node, parent_state, parent_sdfg, num_pes=32, tile_size_m=None): new_sdfg.add_array("_b", shape_b, dtype_b, strides=strides_b, storage=outer_array_b.storage) new_sdfg.add_array("_c", shape_c, dtype_c, strides=strides_c, storage=outer_array_c.storage) - if node.beta != 0: + if not equal_valued(0, node.beta): new_sdfg.add_array("_cin", shape_c, dtype_c, strides=strides_c, storage=outer_array_c.storage) def make_read_A(state): @@ -672,7 +672,7 @@ def make_write_C(state): # Receives the results and adds it to C pipe = state.add_read("C_pipe") - if node.beta != 0: + if not equal_valued(0, node.beta): mem_read = state.add_read("_cin") mem = state.add_write("_c") @@ -688,15 +688,15 @@ def make_write_C(state): # deal with out-of-bound accesses - mul_accumulated = f"{node.alpha} * from_kernel" if node.alpha != 1.0 else "from_kernel" - if node.beta != 0: - if node.beta != 1.0: + mul_accumulated = f"{node.alpha} * from_kernel" if not equal_valued(1, node.alpha) else "from_kernel" + if not equal_valued(0, node.beta): + if not equal_valued(1, node.beta): add_prev_c = f" + {node.beta} * prev_c" else: add_prev_c = " + prev_c" else: add_prev_c = "" - tasklet_inputs = {"from_kernel", "prev_c"} if node.beta != 0 else {"from_kernel"} + tasklet_inputs = {"from_kernel", "prev_c"} if not equal_valued(0, node.beta) else {"from_kernel"} tasklet = state.add_tasklet( "write_C", tasklet_inputs, {"to_memory"}, f"""\ if tm * {T} + m < {M} and n0 * {P} + n1 < {N} : @@ -707,7 +707,7 @@ def make_write_C(state): tasklet, dst_conn="from_kernel", memlet=dace.Memlet(f"C_pipe[{P}-1]")) - if node.beta != 0: + if not equal_valued(0, node.beta): state.add_memlet_path(mem_read, entry_map, tasklet, @@ -998,7 +998,7 @@ class Gemm(dace.sdfg.nodes.LibraryNode): def __init__(self, name, location=None, transA=False, transB=False, alpha=1, beta=0, cin=True): super().__init__(name, location=location, - inputs=({"_a", "_b", "_cin"} if beta != 0 and cin else {"_a", "_b"}), + inputs=({"_a", "_b", "_cin"} if not equal_valued(0, beta) and cin else {"_a", "_b"}), outputs={"_c"}) self.transA = True if transA else False self.transB = True if transB else False @@ -1091,7 +1091,7 @@ def gemm_libnode(pv: 'ProgramVisitor', state.add_edge(B_in, None, libnode, '_b', mm.Memlet(B)) state.add_edge(libnode, '_c', C_out, None, mm.Memlet(C)) - if beta != 0: + if not equal_valued(0, beta): C_in = state.add_read(C) state.add_edge(C_in, None, libnode, '_cin', mm.Memlet(C)) diff --git a/tests/numpy/einsum_test.py b/tests/numpy/einsum_test.py index 89ab253fd2..48e0d47b93 100644 --- a/tests/numpy/einsum_test.py +++ b/tests/numpy/einsum_test.py @@ -268,12 +268,12 @@ def tester(A, B): assert np.allclose(sdfg(A, B), C) -@pytest.mark.parametrize('symbolic', (False, True)) -def test_lift_einsum_alpha_beta(symbolic): +@pytest.mark.parametrize('symbolic_alpha', (False, True)) +def test_lift_einsum_alpha_beta(symbolic_alpha): from dace.libraries.blas.nodes.einsum import Einsum from dace.transformation.dataflow import LiftEinsum - alph = dace.symbol('alph') if symbolic else 2 + alph = dace.symbol('alph') if symbolic_alpha else 2 @dace.program def tester(A, B): @@ -296,9 +296,9 @@ def tester(A, B): if isinstance(node, Einsum): assert node.einsum_str == 'ij,jk->ik' assert node.alpha == alph - assert node.beta == 1.0 + assert symbolic.equal_valued(1, node.beta) - if not symbolic: + if not symbolic_alpha: C = 1 + 2 * A @ B assert np.allclose(sdfg(A, B), C) From d0dcf1ca8407f02f691816eed7102057df2d8149 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20M=C3=BCller?= <147368808+philip-paul-mueller@users.noreply.github.com> Date: Fri, 20 Sep 2024 11:01:38 +0200 Subject: [PATCH 57/76] Fixed `PruneConnectors` (#1660) There was a bug in the `PruneConnectors` transformation, the apply function did not prune the sets correctly. I also made some additional changes. --- .../dataflow/prune_connectors.py | 167 +++--------------- .../transformations/prune_connectors_test.py | 155 ++++++++++++++-- 2 files changed, 166 insertions(+), 156 deletions(-) diff --git a/dace/transformation/dataflow/prune_connectors.py b/dace/transformation/dataflow/prune_connectors.py index 499f488448..a8371047df 100644 --- a/dace/transformation/dataflow/prune_connectors.py +++ b/dace/transformation/dataflow/prune_connectors.py @@ -11,67 +11,66 @@ @properties.make_properties class PruneConnectors(pm.SingleStateTransformation): - """ Removes unused connectors from nested SDFGs, as well as their memlets - in the outer scope, replacing them with empty memlets if necessary. + """ + Removes unused connectors from nested SDFGs, as well as their memlets in the outer scope. - Optionally: after pruning, removes the unused containers from parent SDFG. + The transformation will not apply if this would remove all inputs and outputs. """ nsdfg = pm.PatternNode(nodes.NestedSDFG) - remove_unused_containers = properties.Property(dtype=bool, - default=False, - desc='If True, remove unused containers from parent SDFG.') - @classmethod def expressions(cls): return [utils.node_path_graph(cls.nsdfg)] def can_be_applied(self, graph: SDFGState, expr_index: int, sdfg: SDFG, permissive: bool = False) -> bool: + prune_in, prune_out = self._get_prune_sets(graph) + if not prune_in and not prune_out: + return False + + return True + + def _get_prune_sets(self, state: SDFGState) -> Tuple[Set[str], Set[str]]: + """Computes the set of the input and output connectors that can be removed. + + Returns: + A tuple of two sets, the first set contains the name of all input + connectors that can be removed and the second the name of all output + connectors that can be removed. + """ nsdfg = self.nsdfg + # From the input connectors (i.e. data container on the inside) remove + # all those that are not used for reading and from the output containers + # remove those that are not used fro reading. + # NOTE: If a data container is used for reading and writing then only the + # output connector is retained, except the output is a WCR, then the input + # is also retained. read_set, write_set = nsdfg.sdfg.read_and_write_sets() prune_in = nsdfg.in_connectors.keys() - read_set prune_out = nsdfg.out_connectors.keys() - write_set - # Take into account symbol mappings - strs = tuple(nsdfg.symbol_mapping.values()) - syms = tuple(symbolic.pystr_to_symbolic(s) for s in strs) - symnames = tuple(s.name if hasattr(s, 'name') else '' for s in syms) - for conn in list(prune_in): - if conn in syms or conn in symnames or conn in nsdfg.sdfg.symbols: - prune_in.remove(conn) - - # Add WCR outputs to "do not prune" input list - for e in graph.out_edges(nsdfg): + for e in state.out_edges(nsdfg): if e.data.wcr is not None and e.src_conn in prune_in: prune_in.remove(e.src_conn) - if not prune_in and not prune_out: - return False - - return True + return prune_in, prune_out def apply(self, state: SDFGState, sdfg: SDFG): nsdfg = self.nsdfg + # Determine which connectors can be removed. + prune_in, prune_out = self._get_prune_sets(state) + # Fission subgraph around nsdfg into its own state to avoid data races nsdfg_state = helpers.state_fission_after(state, nsdfg) - read_set, write_set = nsdfg.sdfg.read_and_write_sets() - prune_in = nsdfg.in_connectors.keys() - read_set - prune_out = nsdfg.out_connectors.keys() - write_set - # Detect which nodes are used, so we can delete unused nodes after the # connectors have been pruned + read_set, write_set = nsdfg.sdfg.read_and_write_sets() all_data_used = read_set | write_set - # Add WCR outputs to "do not prune" input list - for e in nsdfg_state.out_edges(nsdfg): - if e.data.wcr is not None and e.src_conn in prune_in: - prune_in.remove(e.src_conn) - for conn in prune_in: for e in nsdfg_state.in_edges_by_connector(nsdfg, conn): nsdfg_state.remove_memlet_path(e, remove_orphans=True) @@ -89,18 +88,6 @@ def apply(self, state: SDFGState, sdfg: SDFG): # If the data is now unused, we can purge it from the SDFG nsdfg.sdfg.remove_data(conn) - if self.remove_unused_containers: - # Remove unused containers from parent SDFGs - containers = list(sdfg.arrays.keys()) - for name in containers: - s = nsdfg.sdfg - while s.parent_sdfg: - s = s.parent_sdfg - try: - s.remove_data(name) - except ValueError: - break - class PruneSymbols(pm.SingleStateTransformation): """ @@ -177,99 +164,3 @@ def apply(self, graph: SDFGState, sdfg: SDFG): # If not used in SDFG, remove from symbols as well if helpers.is_symbol_unused(nsdfg.sdfg, candidate): nsdfg.sdfg.remove_symbol(candidate) - - -class PruneUnusedOutputs(pm.SingleStateTransformation): - """ - Removes unused symbol mappings from nested SDFGs, as well as internal - symbols if necessary. - """ - - nsdfg = pm.PatternNode(nodes.NestedSDFG) - - @classmethod - def expressions(cls): - return [utils.node_path_graph(cls.nsdfg)] - - @classmethod - def _candidates(cls, nsdfg: nodes.NestedSDFG) -> Tuple[Set[str], Set[Tuple[SDFGState, nodes.AccessNode]]]: - # Start with all non-transient arrays - candidates = set(conn for conn in nsdfg.out_connectors.keys()) - candidate_nodes: Set[Tuple[SDFGState, nodes.AccessNode]] = set() - - # Remove candidates that are used more than once in the outer SDFG - state = nsdfg.sdfg.parent - sdfg = nsdfg.sdfg.parent_sdfg - for e in state.out_edges(nsdfg): - if e.data.is_empty(): - continue - outer_desc = sdfg.arrays[e.data.data] - if isinstance(outer_desc, dt.View): - candidates.remove(e.src_conn) - continue - if not outer_desc.transient: - candidates.remove(e.src_conn) - continue - if not isinstance(state.memlet_path(e)[-1].dst, nodes.AccessNode): - candidates.remove(e.src_conn) - continue - - all_access_nodes = [(s, n) for s in sdfg.nodes() for n in s.data_nodes() if n.data == e.data.data] - if len(all_access_nodes) > 1: - candidates.remove(e.src_conn) - continue - if all_access_nodes[0][0].out_degree(all_access_nodes[0][1]) > 0: - candidates.remove(e.src_conn) - continue - - if not candidates: - return set(), set() - - # Remove candidates that are used in the nested SDFG - for nstate in nsdfg.sdfg.states(): - for node in nstate.data_nodes(): - if node.data in candidates: - # If used in nested SDFG - if nstate.out_degree(node) > 0: - candidates.remove(node.data) - continue - # If a result of a code node - if any(not isinstance(nstate.memlet_path(e)[0].src, nodes.AccessNode) - for e in nstate.in_edges(node)): - candidates.remove(node.data) - continue - - # Add node for later use - candidate_nodes.add((nstate, node)) - - # Any array that is used in interstate edges is removed - for e in nsdfg.sdfg.all_interstate_edges(): - candidates -= (set(map(str, symbolic.symbols_in_ast(e.data.condition.code[0])))) - for assign in e.data.assignments.values(): - candidates -= (symbolic.free_symbols_and_functions(assign)) - - candidate_nodes = {n for n in candidate_nodes if n[1].data in candidates} - - return candidates, candidate_nodes - - def can_be_applied(self, graph: SDFGState, expr_index: int, sdfg: SDFG, permissive: bool = False) -> bool: - nsdfg: nodes.NestedSDFG = self.nsdfg - candidates, _ = self._candidates(nsdfg) - if len(candidates) > 0: - return True - - return False - - def apply(self, state: SDFGState, sdfg: SDFG): - nsdfg = self.nsdfg - - candidates, candidate_nodes = self._candidates(nsdfg) - for outer_edge in state.out_edges(nsdfg): - if outer_edge.src_conn in candidates: - state.remove_memlet_path(outer_edge) - sdfg.remove_data(outer_edge.data.data, validate=False) - for nstate, node in candidate_nodes: - for ie in nstate.in_edges(node): - nstate.remove_memlet_path(ie) - for cand in candidates: - nsdfg.sdfg.remove_data(cand, validate=False) diff --git a/tests/transformations/prune_connectors_test.py b/tests/transformations/prune_connectors_test.py index 59e1b125ff..4026ec3e1c 100644 --- a/tests/transformations/prune_connectors_test.py +++ b/tests/transformations/prune_connectors_test.py @@ -4,6 +4,8 @@ import os import copy import pytest +from typing import Tuple + import dace from dace.transformation.dataflow import PruneConnectors from dace.transformation.helpers import nest_state_subgraph @@ -137,17 +139,109 @@ def make_sdfg(): return sdfg_outer -@pytest.mark.parametrize("remove_unused_containers", [False, True]) -def test_prune_connectors(remove_unused_containers, n=None): +def _make_read_write_sdfg( + conforming_memlet: bool, +) -> Tuple[dace.SDFG, dace.nodes.NestedSDFG]: + """Creates an SDFG for the `test_read_write_{1, 2}` tests. + + The SDFG is rather synthetic, it has an input `in_arg` and adds to every element + 10 and stores that in array `A`, through access node `A1`. From this access node + the data flows into a nested SDFG. However, the data is not read but overwritten, + through a map that writes through access node `inner_A`. That access node + then writes into container `inner_B`. Both `inner_A` and `inner_B` are outputs + of the nested SDFG and are written back into data container `A` and `B`. + + Depending on `conforming_memlet` the memlet that copies `inner_A` into `inner_B` + will either be associated to `inner_A` (`True`) or `inner_B` (`False`). + This choice has consequences on if the transformation can apply or not. + + Notes: + This is most likely a bug, see [issue#1643](https://github.com/spcl/dace/issues/1643), + however, it is the historical behaviour. + """ + + # Creating the outer SDFG. + osdfg = dace.SDFG("Outer_sdfg") + ostate = osdfg.add_state(is_start_block=True) + + osdfg.add_array("in_arg", dtype=dace.float64, shape=(4, 4), transient=False) + osdfg.add_array("A", dtype=dace.float64, shape=(4, 4), transient=False) + osdfg.add_array("B", dtype=dace.float64, shape=(4, 4), transient=False) + in_arg, A1, A2, B = (ostate.add_access(name) for name in ["in_arg", "A", "A", "B"]) + + ostate.add_mapped_tasklet( + "producer", + map_ranges={"i": "0:4", "j": "0:4"}, + inputs={"__in": dace.Memlet("in_arg[i, j]")}, + code="__out = __in + 10.", + outputs={"__out": dace.Memlet("A[i, j]")}, + input_nodes={in_arg}, + output_nodes={A1}, + external_edges=True, + ) + + # Creating the inner SDFG + isdfg = dace.SDFG("Inner_sdfg") + istate = isdfg.add_state(is_start_block=True) + + isdfg.add_array("inner_A", dtype=dace.float64, shape=(4, 4), transient=False) + isdfg.add_array("inner_B", dtype=dace.float64, shape=(4, 4), transient=False) + inner_A, inner_B = (istate.add_access(name) for name in ["inner_A", "inner_B"]) + + istate.add_mapped_tasklet( + "inner_consumer", + map_ranges={"i": "0:4", "j": "0:4"}, + inputs={}, + code="__out = 10", + outputs={"__out": dace.Memlet("inner_A[i, j]")}, + output_nodes={inner_A}, + external_edges=True, + ) + + # Depending on to which data container this memlet is associated, + # the transformation will apply or it will not apply. + if conforming_memlet: + # Because the `data` field of the inncoming and outgoing memlet are both + # set to `inner_A` the read to `inner_A` will be removed and the + # transformation can apply. + istate.add_nedge( + inner_A, + inner_B, + dace.Memlet("inner_A[0:4, 0:4] -> 0:4, 0:4"), + ) + else: + # Because the `data` filed of the involved memlets differs the read to + # `inner_A` will not be removed thus the transformation can not remove + # the incoming `inner_A`. + istate.add_nedge( + inner_A, + inner_B, + dace.Memlet("inner_B[0:4, 0:4] -> 0:4, 0:4"), + ) + + # Add the nested SDFG + nsdfg = ostate.add_nested_sdfg( + sdfg=isdfg, + parent=osdfg, + inputs={"inner_A"}, + outputs={"inner_A", "inner_B"}, + ) + + # Connecting the nested SDFG + ostate.add_edge(A1, None, nsdfg, "inner_A", dace.Memlet("A[0:4, 0:4]")) + ostate.add_edge(nsdfg, "inner_A", A2, None, dace.Memlet("A[0:4, 0:4]")) + ostate.add_edge(nsdfg, "inner_B", B, None, dace.Memlet("B[0:4, 0:4]")) + + return osdfg, nsdfg + + +def test_prune_connectors(n=None): if n is None: n = 64 sdfg = make_sdfg() - if sdfg.apply_transformations_repeated(PruneConnectors, - options=[{ - 'remove_unused_containers': remove_unused_containers - }]) != 3: + if sdfg.apply_transformations_repeated(PruneConnectors) != 3: raise RuntimeError("PruneConnectors was not applied.") arr_in = np.zeros((n, n), dtype=np.uint16) @@ -158,18 +252,16 @@ def test_prune_connectors(remove_unused_containers, n=None): except FileNotFoundError: pass - if remove_unused_containers: - sdfg(read_used=arr_in, write_used=arr_out, N=n) - else: - sdfg(read_used=arr_in, - read_unused=arr_in, - read_used_outer=arr_in, - read_unused_outer=arr_in, - write_used=arr_out, - write_unused=arr_out, - write_used_outer=arr_out, - write_unused_outer=arr_out, - N=n) + # The pruned connectors are not removed so they have to be supplied. + sdfg(read_used=arr_in, + read_unused=arr_in, + read_used_outer=arr_in, + read_unused_outer=arr_in, + write_used=arr_out, + write_unused=arr_out, + write_used_outer=arr_out, + write_unused_outer=arr_out, + N=n) assert np.allclose(arr_out, arr_in + 1) @@ -240,6 +332,16 @@ def test_unused_retval_2(): assert np.allclose(a, 1) +def test_read_write_1(): + # Because the memlet is conforming, we can apply the transformation. + sdfg = _make_read_write_sdfg(True) + + assert first_mode == PruneConnectors.can_be_applied_to(nsdfg=nsdfg, sdfg=osdfg, expr_index=0, permissive=False) + + + + + def test_prune_connectors_with_dependencies(): sdfg = dace.SDFG('tester') A, A_desc = sdfg.add_array('A', [4], dace.float64) @@ -318,6 +420,21 @@ def test_prune_connectors_with_dependencies(): assert np.allclose(np_d, np_d_) +def test_read_write_1(): + # Because the memlet is conforming, we can apply the transformation. + sdfg, nsdfg = _make_read_write_sdfg(True) + + assert PruneConnectors.can_be_applied_to(nsdfg=nsdfg, sdfg=sdfg, expr_index=0, permissive=False) + sdfg.apply_transformations_repeated(PruneConnectors, validate=True, validate_all=True) + + +def test_read_write_2(): + # Because the memlet is not conforming, we can not apply the transformation. + sdfg, nsdfg = _make_read_write_sdfg(False) + + assert not PruneConnectors.can_be_applied_to(nsdfg=nsdfg, sdfg=sdfg, expr_index=0, permissive=False) + + if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--N", default=64) @@ -330,3 +447,5 @@ def test_prune_connectors_with_dependencies(): test_unused_retval() test_unused_retval_2() test_prune_connectors_with_dependencies() + test_read_write_1() + test_read_write_2() From 7df09c7fc32a33e2ed396ccb8c397972d3329718 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20M=C3=BCller?= <147368808+philip-paul-mueller@users.noreply.github.com> Date: Tue, 24 Sep 2024 15:18:31 +0200 Subject: [PATCH 58/76] Better Name Validation (#1661) This PR adds checks to the SDFG to ensures that names from symbols, data descriptors and so on are unique. Furthermore, it also ensures that the NestedSDFG validates correctly and ensures that no symbols can be written. --- dace/data.py | 21 ----- dace/frontend/common/distr.py | 26 +++--- dace/frontend/python/newast.py | 11 ++- dace/sdfg/nodes.py | 9 +- dace/sdfg/sdfg.py | 159 ++++++++++++++++++++++----------- dace/sdfg/validation.py | 28 ++++++ 6 files changed, 163 insertions(+), 91 deletions(-) diff --git a/dace/data.py b/dace/data.py index 04bdc93357..a07fe42083 100644 --- a/dace/data.py +++ b/dace/data.py @@ -136,27 +136,6 @@ def create_datadescriptor(obj, no_custom_desc=False): 'adaptor method to the type hint or object itself.') -def find_new_name(name: str, existing_names: Sequence[str]) -> str: - """ - Returns a name that matches the given ``name`` as a prefix, but does not - already exist in the given existing name set. The behavior is typically - to append an underscore followed by a unique (increasing) number. If the - name does not already exist in the set, it is returned as-is. - - :param name: The given name to find. - :param existing_names: The set of existing names. - :return: A new name that is not in existing_names. - """ - if name not in existing_names: - return name - cur_offset = 0 - new_name = name + '_' + str(cur_offset) - while new_name in existing_names: - cur_offset += 1 - new_name = name + '_' + str(cur_offset) - return new_name - - def _prod(sequence): return functools.reduce(lambda a, b: a * b, sequence, 1) diff --git a/dace/frontend/common/distr.py b/dace/frontend/common/distr.py index d6f22da358..88a6b0c54a 100644 --- a/dace/frontend/common/distr.py +++ b/dace/frontend/common/distr.py @@ -42,9 +42,9 @@ def _cart_create(pv: 'ProgramVisitor', sdfg: SDFG, state: SDFGState, dims: Shape state.add_node(tasklet) # Pseudo-writing to a dummy variable to avoid removal of Dummy node by transformations. - _, scal = sdfg.add_scalar(pgrid_name, dace.int32, transient=True) - wnode = state.add_write(pgrid_name) - state.add_edge(tasklet, '__out', wnode, None, Memlet.from_array(pgrid_name, scal)) + scal_name, scal = sdfg.add_scalar(pgrid_name, dace.int32, transient=True, find_new_name=True) + wnode = state.add_write(scal_name) + state.add_edge(tasklet, '__out', wnode, None, Memlet.from_array(scal_name, scal)) return pgrid_name @@ -97,9 +97,9 @@ def _cart_sub(pv: 'ProgramVisitor', state.add_node(tasklet) # Pseudo-writing to a dummy variable to avoid removal of Dummy node by transformations. - _, scal = sdfg.add_scalar(pgrid_name, dace.int32, transient=True) - wnode = state.add_write(pgrid_name) - state.add_edge(tasklet, '__out', wnode, None, Memlet.from_array(pgrid_name, scal)) + scal_name, scal = sdfg.add_scalar(pgrid_name, dace.int32, transient=True, find_new_name=True) + wnode = state.add_write(scal_name) + state.add_edge(tasklet, '__out', wnode, None, Memlet.from_array(scal_name, scal)) return pgrid_name @@ -196,7 +196,7 @@ def _intracomm_bcast(pv: 'ProgramVisitor', if comm_obj == MPI.COMM_WORLD: return _bcast(pv, sdfg, state, buffer, root) # NOTE: Highly experimental - sdfg.add_scalar(comm_name, dace.int32) + scal_name, _ = sdfg.add_scalar(comm_name, dace.int32, find_new_name=True) return _bcast(pv, sdfg, state, buffer, root, fcomm=comm_name) @@ -941,9 +941,9 @@ def _subarray(pv: ProgramVisitor, state.add_node(tasklet) # Pseudo-writing to a dummy variable to avoid removal of Dummy node by transformations. - _, scal = sdfg.add_scalar(subarray_name, dace.int32, transient=True) - wnode = state.add_write(subarray_name) - state.add_edge(tasklet, '__out', wnode, None, Memlet.from_array(subarray_name, scal)) + scal_name, scal = sdfg.add_scalar(subarray_name, dace.int32, transient=True, find_new_name=True) + wnode = state.add_write(scal_name) + state.add_edge(tasklet, '__out', wnode, None, Memlet.from_array(scal_name, scal)) return subarray_name @@ -1078,9 +1078,9 @@ def _redistribute(pv: ProgramVisitor, sdfg: SDFG, state: SDFGState, in_buffer: s f'int* {rdistrarray_name}_self_size;' ]) state.add_node(tasklet) - _, scal = sdfg.add_scalar(rdistrarray_name, dace.int32, transient=True) - wnode = state.add_write(rdistrarray_name) - state.add_edge(tasklet, '__out', wnode, None, Memlet.from_array(rdistrarray_name, scal)) + scal_name, scal = sdfg.add_scalar(rdistrarray_name, dace.int32, transient=True, find_new_name=True) + wnode = state.add_write(scal_name) + state.add_edge(tasklet, '__out', wnode, None, Memlet.from_array(scal_name, scal)) libnode = Redistribute('_Redistribute_', rdistrarray_name) diff --git a/dace/frontend/python/newast.py b/dace/frontend/python/newast.py index 1b11fb00c6..60469919f5 100644 --- a/dace/frontend/python/newast.py +++ b/dace/frontend/python/newast.py @@ -3302,6 +3302,7 @@ def _visit_assign(self, node, node_target, op, dtype=None, is_return=False): tokens = name.split('.') name = tokens[0] true_name = None + true_array = None if name in defined_vars: true_name = defined_vars[name] if len(tokens) > 1: @@ -3356,7 +3357,7 @@ def _visit_assign(self, node, node_target, op, dtype=None, is_return=False): new_data, rng = None, None dtype_keys = tuple(dtypes.dtype_to_typeclass().keys()) if not (result in self.sdfg.symbols or symbolic.issymbolic(result) or isinstance(result, dtype_keys) or - (isinstance(result, str) and result in self.sdfg.arrays)): + (isinstance(result, str) and any(result in x for x in [self.sdfg.arrays, self.sdfg._pgrids, self.sdfg._subarrays, self.sdfg._rdistrarrays]))): raise DaceSyntaxError( self, node, "In assignments, the rhs may only be " "data, numerical/boolean constants " @@ -3380,6 +3381,14 @@ def _visit_assign(self, node, node_target, op, dtype=None, is_return=False): _, new_data = self.sdfg.add_scalar(true_name, ttype, transient=True) self.variables[name] = true_name defined_vars[name] = true_name + if any(result in x for x in [self.sdfg._pgrids, self.sdfg._rdistrarrays, self.sdfg._subarrays]): + # NOTE: In previous versions some `pgrid` and subgrid related replacement function, + # see `dace/frontend/common/distr.py`, created dummy variables with the same name + # as the entities, such as process grids, they created. Thus the frontend was + # finding them. Since this is now disallowed, we have to explicitly handle this case. + self.variables[name] = result + defined_vars[name] = result + continue elif isinstance(result, str) and result in self.sdfg.arrays: result_data = self.sdfg.arrays[result] if (name.startswith('__return') and isinstance(result_data, data.Scalar)): diff --git a/dace/sdfg/nodes.py b/dace/sdfg/nodes.py index 409d30c57a..4ae91d5ea0 100644 --- a/dace/sdfg/nodes.py +++ b/dace/sdfg/nodes.py @@ -618,6 +618,7 @@ def used_symbols(self, all_symbols: bool) -> Set[str]: internally_used_symbols = self.sdfg.used_symbols(all_symbols=False) keys_to_use &= internally_used_symbols + # Translate the internal symbols back to their external counterparts. free_syms |= set().union(*(map(str, pystr_to_symbolic(v).free_symbols) for k, v in self.symbol_mapping.items() if k in keys_to_use)) @@ -662,6 +663,10 @@ def validate(self, sdfg, state, references: Optional[Set[int]] = None, **context connectors = self.in_connectors.keys() | self.out_connectors.keys() for conn in connectors: + if conn in self.sdfg.symbols: + raise ValueError( + f'Connector "{conn}" was given, but it refers to a symbol, which is not allowed. ' + 'To pass symbols use "symbol_mapping".') if conn not in self.sdfg.arrays: raise NameError( f'Connector "{conn}" was given but is not a registered data descriptor in the nested SDFG. ' @@ -795,10 +800,8 @@ def new_symbols(self, sdfg, state, symbols) -> Dict[str, dtypes.typeclass]: for p, rng in zip(self._map.params, self._map.range): result[p] = dtypes.result_type_of(infer_expr_type(rng[0], symbols), infer_expr_type(rng[1], symbols)) - # Add dynamic inputs + # Handle the dynamic map ranges. dyn_inputs = set(c for c in self.in_connectors if not c.startswith('IN_')) - - # Try to get connector type from connector for e in state.in_edges(self): if e.dst_conn in dyn_inputs: result[e.dst_conn] = (self.in_connectors[e.dst_conn] or sdfg.arrays[e.data.data].dtype) diff --git a/dace/sdfg/sdfg.py b/dace/sdfg/sdfg.py index 84d7189ebd..5e5df1b0a2 100644 --- a/dace/sdfg/sdfg.py +++ b/dace/sdfg/sdfg.py @@ -746,17 +746,32 @@ def replace_dict(self, super().replace_dict(repldict, symrepl, replace_in_graph, replace_keys) - def add_symbol(self, name, stype): + def add_symbol(self, name, stype, find_new_name: bool = False): """ Adds a symbol to the SDFG. :param name: Symbol name. :param stype: Symbol type. + :param find_new_name: Find a new name. """ - if name in self.symbols: - raise FileExistsError('Symbol "%s" already exists in SDFG' % name) + if find_new_name: + name = self._find_new_name(name) + else: + # We do not check for data constant, because there is a link between the constants and + # the data descriptors. + if name in self.symbols: + raise FileExistsError(f'Symbol "{name}" already exists in SDFG') + if name in self.arrays: + raise FileExistsError(f'Can not create symbol "{name}", the name is used by a data descriptor.') + if name in self._subarrays: + raise FileExistsError(f'Can not create symbol "{name}", the name is used by a subarray.') + if name in self._rdistrarrays: + raise FileExistsError(f'Can not create symbol "{name}", the name is used by a RedistrArray.') + if name in self._pgrids: + raise FileExistsError(f'Can not create symbol "{name}", the name is used by a ProcessGrid.') if not isinstance(stype, dtypes.typeclass): stype = dtypes.dtype_to_typeclass(stype) self.symbols[name] = stype + return name def remove_symbol(self, name): """ Removes a symbol from the SDFG. @@ -1159,14 +1174,23 @@ def cast(dtype: dt.Data, value: Any): return result def add_constant(self, name: str, value: Any, dtype: dt.Data = None): - """ Adds/updates a new compile-time constant to this SDFG. A constant - may either be a scalar or a numpy ndarray thereof. + """ + Adds/updates a new compile-time constant to this SDFG. - :param name: The name of the constant. - :param value: The constant value. - :param dtype: Optional data type of the symbol, or None to deduce - automatically. + A constant may either be a scalar or a numpy ndarray thereof. It is not an + error if there is already a symbol or an array with the same name inside + the SDFG. However, the data descriptors must refer to the same type. + + :param name: The name of the constant. + :param value: The constant value. + :param dtype: Optional data type of the symbol, or None to deduce automatically. """ + if name in self._subarrays: + raise FileExistsError(f'Can not create constant "{name}", the name is used by a subarray.') + if name in self._rdistrarrays: + raise FileExistsError(f'Can not create constant "{name}", the name is used by a RedistrArray.') + if name in self._pgrids: + raise FileExistsError(f'Can not create constant "{name}", the name is used by a ProcessGrid.') self.constants_prop[name] = (dtype or dt.create_datadescriptor(value), value) @property @@ -1598,36 +1622,44 @@ def _find_new_name(self, name: str): """ Tries to find a new name by adding an underscore and a number. """ names = (self._arrays.keys() | self.constants_prop.keys() | self._pgrids.keys() | self._subarrays.keys() - | self._rdistrarrays.keys()) + | self._rdistrarrays.keys() | self.symbols.keys()) return dt.find_new_name(name, names) + def is_name_used(self, name: str) -> bool: + """ Checks if `name` is already used inside the SDFG.""" + if name in self._arrays: + return True + if name in self.symbols: + return True + if name in self.constants_prop: + return True + if name in self._pgrids: + return True + if name in self._subarrays: + return True + if name in self._rdistrarrays: + return True + return False + + def is_name_free(self, name: str) -> bool: + """ Test if `name` is free, i.e. is not used by anything else.""" + return not self.is_name_used(name) + def find_new_constant(self, name: str): """ - Tries to find a new constant name by adding an underscore and a number. + Tries to find a new name for a constant. """ - constants = self.constants - if name not in constants: + if self.is_name_free(name): return name - - index = 0 - while (name + ('_%d' % index)) in constants: - index += 1 - - return name + ('_%d' % index) + return self._find_new_name(name) def find_new_symbol(self, name: str): """ Tries to find a new symbol name by adding an underscore and a number. """ - symbols = self.symbols - if name not in symbols: + if self.is_name_free(name): return name - - index = 0 - while (name + ('_%d' % index)) in symbols: - index += 1 - - return name + ('_%d' % index) + return self._find_new_name(name) def add_array(self, name: str, @@ -1856,13 +1888,14 @@ def add_transient(self, def temp_data_name(self): """ Returns a temporary data descriptor name that can be used in this SDFG. """ - name = '__tmp%d' % self._temp_transients - while name in self._arrays: + + # NOTE: Consider switching to `_find_new_name` + # The frontend seems to access this variable directly. + while self.is_name_used(name): self._temp_transients += 1 name = '__tmp%d' % self._temp_transients self._temp_transients += 1 - return name def add_temp_transient(self, @@ -1917,29 +1950,47 @@ def add_datadesc(self, name: str, datadesc: dt.Data, find_new_name=False) -> str """ if not isinstance(name, str): raise TypeError("Data descriptor name must be a string. Got %s" % type(name).__name__) - # If exists, fail - while name in self._arrays: - if find_new_name: - name = self._find_new_name(name) - else: - raise NameError(f'Array or Stream with name "{name}" already exists in SDFG') - # NOTE: Remove illegal characters, such as dots. Such characters may be introduced when creating views to - # members of Structures. - name = name.replace('.', '_') - assert name not in self._arrays - self._arrays[name] = datadesc - def _add_symbols(desc: dt.Data): + if find_new_name: + # These characters might be introduced through the creation of views to members + # of strictures. + # NOTES: If `find_new_name` is `True` and the name (understood as a sequence of + # any characters) is not used, i.e. `assert self.is_name_free(name)`, then it + # is still "cleaned", i.e. dots are replaced with underscores. However, if + # `find_new_name` is `False` then this cleaning is not applied and it is possible + # to create names that are formally invalid. The above code reproduces the exact + # same behaviour and is maintained for compatibility. This behaviour is + # triggered by tests/python_frontend/structures/structure_python_test.py::test_rgf`. + name = self._find_new_name(name) + name = name.replace('.', '_') + if self.is_name_used(name): + name = self._find_new_name(name) + else: + # We do not check for data constant, because there is a link between the constants and + # the data descriptors. + if name in self.arrays: + raise FileExistsError(f'Data descriptor "{name}" already exists in SDFG') + if name in self.symbols: + raise FileExistsError(f'Can not create data descriptor "{name}", the name is used by a symbol.') + if name in self._subarrays: + raise FileExistsError(f'Can not create data descriptor "{name}", the name is used by a subarray.') + if name in self._rdistrarrays: + raise FileExistsError(f'Can not create data descriptor "{name}", the name is used by a RedistrArray.') + if name in self._pgrids: + raise FileExistsError(f'Can not create data descriptor "{name}", the name is used by a ProcessGrid.') + + def _add_symbols(sdfg: SDFG, desc: dt.Data): if isinstance(desc, dt.Structure): for v in desc.members.values(): if isinstance(v, dt.Data): - _add_symbols(v) + _add_symbols(sdfg, v) for sym in desc.free_symbols: - if sym.name not in self.symbols: - self.add_symbol(sym.name, sym.dtype) + if sym.name not in sdfg.symbols: + sdfg.add_symbol(sym.name, sym.dtype) - # Add free symbols to the SDFG global symbol storage - _add_symbols(datadesc) + # Add the data descriptor to the SDFG and all symbols that are not yet known. + self._arrays[name] = datadesc + _add_symbols(self, datadesc) return name @@ -2044,9 +2095,10 @@ def add_subarray(self, newshape.append(dace.symbolic.pystr_to_symbolic(s)) subshape = newshape + # No need to ensure unique test. subarray_name = self._find_new_name('__subarray') - self._subarrays[subarray_name] = SubArray(subarray_name, dtype, shape, subshape, pgrid, correspondence) + self._subarrays[subarray_name] = SubArray(subarray_name, dtype, shape, subshape, pgrid, correspondence) self.append_init_code(self._subarrays[subarray_name].init_code()) self.append_exit_code(self._subarrays[subarray_name].exit_code()) @@ -2060,12 +2112,13 @@ def add_rdistrarray(self, array_a: str, array_b: str): :param array_b: Output sub-array descriptor. :return: Name of the new redistribution descriptor. """ + # No need to ensure unique test. + name = self._find_new_name('__rdistrarray') - rdistrarray_name = self._find_new_name('__rdistrarray') - self._rdistrarrays[rdistrarray_name] = RedistrArray(rdistrarray_name, array_a, array_b) - self.append_init_code(self._rdistrarrays[rdistrarray_name].init_code(self)) - self.append_exit_code(self._rdistrarrays[rdistrarray_name].exit_code(self)) - return rdistrarray_name + self._rdistrarrays[name] = RedistrArray(name, array_a, array_b) + self.append_init_code(self._rdistrarrays[name].init_code(self)) + self.append_exit_code(self._rdistrarrays[name].exit_code(self)) + return name def add_loop( self, diff --git a/dace/sdfg/validation.py b/dace/sdfg/validation.py index dd936850f0..2869743dcb 100644 --- a/dace/sdfg/validation.py +++ b/dace/sdfg/validation.py @@ -207,6 +207,34 @@ def validate_sdfg(sdfg: 'dace.sdfg.SDFG', references: Set[int] = None, **context if len(blocks) != len(set([s.label for s in blocks])): raise InvalidSDFGError('Found multiple blocks with the same name in ' + cfg.name, sdfg, None) + # Check the names of data descriptors and co. + seen_names: Set[str] = set() + for obj_names in [ + sdfg.arrays.keys(), sdfg.symbols.keys(), sdfg._rdistrarrays.keys(), sdfg._subarrays.keys() + ]: + if not seen_names.isdisjoint(obj_names): + raise InvalidSDFGError( + f'Found duplicated names: "{seen_names.intersection(obj_names)}". Please ensure ' + 'that the names of symbols, data descriptors, subarrays and rdistarrays are unique.', sdfg, None) + seen_names.update(obj_names) + + # Ensure that there is a mentioning of constants in either the array or symbol. + for const_name, (const_type, _) in sdfg.constants_prop.items(): + if const_name in sdfg.arrays: + if const_type != sdfg.arrays[const_name].dtype: + # This should actually be an error, but there is a lots of code that depends on it. + warnings.warn( + f'Mismatch between constant and data descriptor of "{const_name}", ' + f'expected to find "{const_type}" but found "{sdfg.arrays[const_name]}".') + elif const_name in sdfg.symbols: + if const_type != sdfg.symbols[const_name]: + # This should actually be an error, but there is a lots of code that depends on it. + warnings.warn( + f'Mismatch between constant and symobl type of "{const_name}", ' + f'expected to find "{const_type}" but found "{sdfg.symbols[const_name]}".') + else: + warnings.warn(f'Found constant "{const_name}" that does not refer to an array or a symbol.') + # Validate data descriptors for name, desc in sdfg._arrays.items(): if id(desc) in references: From 9945f48c9ff8b7deeb218cfe1b8cdbebd923be60 Mon Sep 17 00:00:00 2001 From: BenWeber42 Date: Tue, 24 Sep 2024 22:36:03 +0200 Subject: [PATCH 59/76] Fix array indirection to memlet subset promotion (#1406) The current solution is rather hacky. I want to run the tests first to see the impacts of this change. Additionally, there is no test yet, because validation doesn't catch the erroneous SDFG yet. Overall, it's not clear currently how to solve the issue and the PR might change as we progress... --- .../transformation/passes/scalar_to_symbol.py | 3 +- tests/passes/scalar_to_symbol_test.py | 46 +++++++++++++++++-- 2 files changed, 44 insertions(+), 5 deletions(-) diff --git a/dace/transformation/passes/scalar_to_symbol.py b/dace/transformation/passes/scalar_to_symbol.py index 8b4f2a9be3..a0cb08ea0c 100644 --- a/dace/transformation/passes/scalar_to_symbol.py +++ b/dace/transformation/passes/scalar_to_symbol.py @@ -323,6 +323,7 @@ def __init__(self, in_edges: Dict[str, mm.Memlet], out_edges: Dict[str, mm.Memle def visit_Subscript(self, node: ast.Subscript) -> Any: # Convert subscript to symbol name + node = self.generic_visit(node) node_name = astutils.rname(node) if node_name in self.in_edges: self.latest[node_name] += 1 @@ -346,7 +347,7 @@ def visit_Subscript(self, node: ast.Subscript) -> Any: return ast.copy_location(ast.Name(id=new_name, ctx=ast.Store()), node) else: self.do_not_remove.add(node_name) - return self.generic_visit(node) + return node def _range_is_promotable(subset: subsets.Range, defined: Set[str]) -> bool: diff --git a/tests/passes/scalar_to_symbol_test.py b/tests/passes/scalar_to_symbol_test.py index 140ec105f7..7fdfbdf737 100644 --- a/tests/passes/scalar_to_symbol_test.py +++ b/tests/passes/scalar_to_symbol_test.py @@ -1,14 +1,12 @@ -# Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved. +# Copyright 2019-2024 ETH Zurich and the DaCe authors. All rights reserved. """ Tests the scalar to symbol promotion functionality. """ import dace from dace.transformation.passes import scalar_to_symbol -from dace.sdfg.state import SDFGState from dace.transformation import transformation as xf, interstate as isxf from dace.transformation.interstate import loop_detection as ld -from dace import registry -from dace.transformation import helpers as xfh import collections +from sympy import core as sympy_core import numpy as np import pytest @@ -692,6 +690,45 @@ def test_ternary_expression(compile_time_evaluatable): sdfg.compile() +def test_double_index_bug(): + + sdfg = dace.SDFG('test_') + state = sdfg.add_state() + + sdfg.add_array('A', shape=(10, ), dtype=dace.float64) + sdfg.add_array('table', shape=(10, 2), dtype=dace.int64) + sdfg.add_array('B', shape=(10, ), dtype=dace.float64) + sdfg.add_scalar('idx', dace.int64, transient=True) + idx_node = state.add_access('idx') + set_tlet = state.add_tasklet('set_idx', code="_idx=0", inputs={}, outputs={"_idx"}) + state.add_mapped_tasklet('map', + map_ranges={'i': "0:10"}, + inputs={ + 'inp': dace.Memlet("A[0:10]"), + '_idx': dace.Memlet('idx[0]'), + 'indices': dace.Memlet('table[0:10, 0:2]') + }, + code="out = inp[indices[i,_idx]]", + outputs={'out': dace.Memlet("B[i]")}, + external_edges=True, + input_nodes={'idx': idx_node}) + + state.add_edge(set_tlet, '_idx', idx_node, None, dace.Memlet('idx[0]')) + + sdfg.simplify() + + # Check that `indices` (which is an array) is not used in a memlet subset + for state in sdfg.states(): + for memlet in state.edges(): + subset = memlet.data.subset + if not isinstance(subset, dace.subsets.Range): + continue + for range in subset.ranges: + for part in range: + for sympy_node in sympy_core.preorder_traversal(part): + assert getattr(sympy_node, "name", None) != "indices" + + if __name__ == '__main__': test_find_promotable() test_promote_simple() @@ -715,3 +752,4 @@ def test_ternary_expression(compile_time_evaluatable): test_dynamic_mapind() test_ternary_expression(False) test_ternary_expression(True) + test_double_index_bug() From 1dc9bc505940e8d58cfbf685390681644ee45bde Mon Sep 17 00:00:00 2001 From: Philipp Schaad Date: Fri, 27 Sep 2024 16:10:07 +0200 Subject: [PATCH 60/76] Conditional Blocks (#1666) This is a continuation of https://github.com/spcl/dace/pull/1617 (superseded and closed by this PR), with a lot of the work being done by @luca-patrignani. # Conditional Blocks This PR implements Conditional Blocks, which are a native way of semantically expressing conditional branching in an SDFG. This replaces the traditional "state machine only" way of expressing conditional branching, with two main goals: 1. **Simplify SDFG analysis and optimization by clearly exposing conditional branching.** Previously, detecting and treating conditional branches required expensive analysis of the control flow graph structure, which had to be performed repeatedly and was error prone. By contrast, Conditional Blocks can be generated by a frontend using semantic information from the source language, entirely circumventing this step. 2. **Address code generation issues.** Code generation relies on a series of control flow detections to generate appropriate code that is not full of `goto` statements for each state transition. However, just as in the above issue, this process is error prone and often leads to invalid code being generated for complex control flow constructs (e.g., conditionals inside of loops with conditional break, continue, return, etc.). By exposing _all_ regular control flow (i.e., loops and conditional branching) with native SDFG constructs, this step can be skipped in code generation. ### Anatomy of Conditional Blocks `ConditionalBlock`s are a type of `ControlFlowBlock` which contains a series of **branches**. Each branch is represented by a full `ControlFlowRegion` and has a condition in the form of a `CodeBlock` attached to it. When a `ConditionalBlock` is executed, the conditions are checked in the insertion order of the branches, and if a matching condition was found, that branch (and only that branch) is executed. When the executed branch finishes executing, the `ConditionalBlock`'s successor is next. If no condition matches, no branch is executed. The condition for a single branch at a time may be `None`, which represents a wildcard or `else` case that is executed if no conditions match. ### Code Generation Changes Code generation (when using this feature) is drastically simplified with respect to control flow: no more control flow detection is performed. Instead, regular control flow constructs are only generated from the new native SDFG constructs ([`LoopRegion`s](https://github.com/spcl/dace/pull/1475) and `ConditionalBlock`s), and any other state transition is either only used for sequential ordering (unconditional transitions to a single, direct successor), or leads to a `goto`. This makes code generation significantly less error prone and simpler to work with. ### Compatibility This feature is implemented minimally invasive and with full backwards compatibility for now. Just as with [`LoopRegion`s](https://github.com/spcl/dace/pull/1475), this feature is only used if an explicit `use_experimental_cfg_blocks` flag is set to `True` in compatible frontends (currently only Python frontend, Fortran frontend integration is coming soon). If an SDFG makes use of these experimental blocks, some passes and transformations will no longer be applied automatically in pipelines. Transformations that handle these blocks correctly can be explicitly marked with `@transformation.experimental_cfg_block_compatible` to apply them on SDFGs with experimental blocks. ### Inlining Conditional blocks can be inlined through a utility function to traditional SDFG state machines. This is automatically done by compatible frontends if the experimental CFG blocks feature is turned off. ### Visualization Components The visualization components are being worked on separately in https://github.com/spcl/dace-webclient/pull/173. This PR does not depend on the visualization components to be merged. --------- Co-authored-by: Luca Patrignani Co-authored-by: luca-patrignani <92518571+luca-patrignani@users.noreply.github.com> --- dace/codegen/control_flow.py | 137 ++++++----- dace/codegen/targets/framecode.py | 2 +- dace/frontend/common/einsum.py | 19 +- dace/frontend/python/astutils.py | 42 ++++ dace/frontend/python/interface.py | 6 +- dace/frontend/python/newast.py | 102 ++++----- dace/frontend/python/parser.py | 5 +- dace/sdfg/analysis/cfg.py | 9 +- dace/sdfg/analysis/cutout.py | 19 +- dace/sdfg/sdfg.py | 13 +- dace/sdfg/state.py | 213 ++++++++++++++++-- dace/sdfg/utils.py | 19 +- dace/sdfg/validation.py | 6 +- .../conditional_regions_test.py | 92 ++++++++ tests/sdfg/conditional_region_test.py | 94 ++++++++ 15 files changed, 617 insertions(+), 161 deletions(-) create mode 100644 tests/python_frontend/conditional_regions_test.py create mode 100644 tests/sdfg/conditional_region_test.py diff --git a/dace/codegen/control_flow.py b/dace/codegen/control_flow.py index ae9351fc43..7701a19ec2 100644 --- a/dace/codegen/control_flow.py +++ b/dace/codegen/control_flow.py @@ -62,7 +62,7 @@ import sympy as sp from dace import dtypes from dace.sdfg.analysis import cfg as cfg_analysis -from dace.sdfg.state import (BreakBlock, ContinueBlock, ControlFlowBlock, ControlFlowRegion, LoopRegion, +from dace.sdfg.state import (BreakBlock, ConditionalBlock, ContinueBlock, ControlFlowBlock, ControlFlowRegion, LoopRegion, ReturnBlock, SDFGState) from dace.sdfg.sdfg import SDFG, InterstateEdge from dace.sdfg.graph import Edge @@ -236,14 +236,18 @@ def first_block(self) -> ReturnBlock: @dataclass -class GeneralBlock(ControlFlow): - """ - General (or unrecognized) control flow block with gotos between blocks. - """ +class RegionBlock(ControlFlow): # The control flow region that this block corresponds to (may be the SDFG in the absence of hierarchical regions). region: Optional[ControlFlowRegion] + +@dataclass +class GeneralBlock(RegionBlock): + """ + General (or unrecognized) control flow block with gotos between blocks. + """ + # List of children control flow blocks elements: List[ControlFlow] @@ -270,7 +274,7 @@ def as_cpp(self, codegen, symbols) -> str: for i, elem in enumerate(self.elements): expr += elem.as_cpp(codegen, symbols) # In a general block, emit transitions and assignments after each individual block or region. - if isinstance(elem, BasicCFBlock) or (isinstance(elem, GeneralBlock) and elem.region): + if isinstance(elem, BasicCFBlock) or (isinstance(elem, RegionBlock) and elem.region): cfg = elem.state.parent_graph if isinstance(elem, BasicCFBlock) else elem.region.parent_graph sdfg = cfg if isinstance(cfg, SDFG) else cfg.sdfg out_edges = cfg.out_edges(elem.state) if isinstance(elem, BasicCFBlock) else cfg.out_edges(elem.region) @@ -514,10 +518,9 @@ def children(self) -> List[ControlFlow]: @dataclass -class GeneralLoopScope(ControlFlow): +class GeneralLoopScope(RegionBlock): """ General loop block based on a loop control flow region. """ - loop: LoopRegion body: ControlFlow def as_cpp(self, codegen, symbols) -> str: @@ -565,6 +568,10 @@ def as_cpp(self, codegen, symbols) -> str: return expr + @property + def loop(self) -> LoopRegion: + return self.region + @property def first_block(self) -> ControlFlowBlock: return self.loop.start_block @@ -601,6 +608,46 @@ def children(self) -> List[ControlFlow]: return list(self.cases.values()) +@dataclass +class GeneralConditionalScope(RegionBlock): + """ General conditional block based on a conditional control flow region. """ + + branch_bodies: List[Tuple[Optional[CodeBlock], ControlFlow]] + + def as_cpp(self, codegen, symbols) -> str: + sdfg = self.conditional.sdfg + expr = '' + for i in range(len(self.branch_bodies)): + branch = self.branch_bodies[i] + if branch[0] is not None: + cond = unparse_interstate_edge(branch[0].code, sdfg, codegen=codegen, symbols=symbols) + cond = cond.strip(';') + if i == 0: + expr += f'if ({cond}) {{\n' + else: + expr += f'}} else if ({cond}) {{\n' + else: + if i < len(self.branch_bodies) - 1 or i == 0: + raise RuntimeError('Missing branch condition for non-final conditional branch') + expr += '} else {\n' + expr += branch[1].as_cpp(codegen, symbols) + if i == len(self.branch_bodies) - 1: + expr += '}\n' + return expr + + @property + def conditional(self) -> ConditionalBlock: + return self.region + + @property + def first_block(self) -> ControlFlowBlock: + return self.conditional + + @property + def children(self) -> List[ControlFlow]: + return [b for _, b in self.branch_bodies] + + def _loop_from_structure(sdfg: SDFG, guard: SDFGState, enter_edge: Edge[InterstateEdge], leave_edge: Edge[InterstateEdge], back_edges: List[Edge[InterstateEdge]], dispatch_state: Callable[[SDFGState], @@ -973,7 +1020,6 @@ def _structured_control_flow_traversal_with_regions(cfg: ControlFlowRegion, if branch_merges is None: branch_merges = cfg_analysis.branch_merges(cfg) - if ptree is None: ptree = cfg_analysis.block_parent_tree(cfg, with_loops=False) @@ -1004,6 +1050,14 @@ def make_empty_block(): cfg_block = ContinueCFBlock(dispatch_state, parent_block, True, node) elif isinstance(node, ReturnBlock): cfg_block = ReturnCFBlock(dispatch_state, parent_block, True, node) + elif isinstance(node, ConditionalBlock): + cfg_block = GeneralConditionalScope(dispatch_state, parent_block, False, node, []) + for cond, branch in node.branches: + if branch is not None: + body = make_empty_block() + body.parent = cfg_block + _structured_control_flow_traversal_with_regions(branch, dispatch_state, body) + cfg_block.branch_bodies.append((cond, body)) elif isinstance(node, ControlFlowRegion): if isinstance(node, LoopRegion): body = make_empty_block() @@ -1027,69 +1081,8 @@ def make_empty_block(): stack.append(oe[0].dst) parent_block.elements.append(cfg_block) continue - - # Potential branch or loop - if node in branch_merges: - mergeblock = branch_merges[node] - - # Add branching node and ignore outgoing edges - parent_block.elements.append(cfg_block) - parent_block.gotos_to_ignore.extend(oe) # TODO: why? - parent_block.assignments_to_ignore.extend(oe) # TODO: why? - cfg_block.last_block = True - - # Parse all outgoing edges recursively first - cblocks: Dict[Edge[InterstateEdge], GeneralBlock] = {} - for branch in oe: - if branch.dst is mergeblock: - # If we hit the merge state (if without else), defer to end of branch traversal - continue - cblocks[branch] = make_empty_block() - _structured_control_flow_traversal_with_regions(cfg=cfg, - dispatch_state=dispatch_state, - parent_block=cblocks[branch], - start=branch.dst, - stop=mergeblock, - generate_children_of=node, - branch_merges=branch_merges, - ptree=ptree, - visited=visited) - - # Classify branch type: - branch_block = None - # If there are 2 out edges, one negation of the other: - # * if/else in case both branches are not merge state - # * if without else in case one branch is merge state - if (len(oe) == 2 and oe[0].data.condition_sympy() == sp.Not(oe[1].data.condition_sympy())): - if oe[0].dst is mergeblock: - # If without else - branch_block = IfScope(dispatch_state, parent_block, False, node, oe[1].data.condition, - cblocks[oe[1]]) - elif oe[1].dst is mergeblock: - branch_block = IfScope(dispatch_state, parent_block, False, node, oe[0].data.condition, - cblocks[oe[0]]) - else: - branch_block = IfScope(dispatch_state, parent_block, False, node, oe[0].data.condition, - cblocks[oe[0]], cblocks[oe[1]]) - else: - # If there are 2 or more edges (one is not the negation of the - # other): - switch = _cases_from_branches(oe, cblocks) - if switch: - # If all edges are of form "x == y" for a single x and - # integer y, it is a switch/case - branch_block = SwitchCaseScope(dispatch_state, parent_block, False, node, switch[0], switch[1]) - else: - # Otherwise, create if/else if/.../else goto exit chain - branch_block = IfElseChain(dispatch_state, parent_block, False, node, - [(e.data.condition, cblocks[e] if e in cblocks else make_empty_block()) - for e in oe]) - # End of branch classification - parent_block.elements.append(branch_block) - if mergeblock != stop: - stack.append(mergeblock) - - else: # No merge state: Unstructured control flow + else: + # Unstructured control flow. parent_block.sequential = False parent_block.elements.append(cfg_block) stack.extend([e.dst for e in oe]) diff --git a/dace/codegen/targets/framecode.py b/dace/codegen/targets/framecode.py index da25816f9b..488c1c7fbd 100644 --- a/dace/codegen/targets/framecode.py +++ b/dace/codegen/targets/framecode.py @@ -483,7 +483,7 @@ def dispatch_state(state: SDFGState) -> str: states_generated.add(state) # For sanity check return stream.getvalue() - if sdfg.root_sdfg.using_experimental_blocks: + if sdfg.root_sdfg.recheck_using_experimental_blocks(): # Use control flow blocks embedded in the SDFG to generate control flow. cft = cflow.structured_control_flow_tree_with_regions(sdfg, dispatch_state) elif config.Config.get_bool('optimizer', 'detect_control_flow'): diff --git a/dace/frontend/common/einsum.py b/dace/frontend/common/einsum.py index e2cc2be88b..407e9eb91c 100644 --- a/dace/frontend/common/einsum.py +++ b/dace/frontend/common/einsum.py @@ -3,7 +3,9 @@ from functools import reduce from itertools import chain from string import ascii_letters -from typing import Dict, Optional +from typing import Dict, List, Optional + +import numpy as np import dace from dace import dtypes, subsets, symbolic @@ -180,6 +182,19 @@ def create_einsum_sdfg(pv: 'dace.frontend.python.newast.ProgramVisitor', beta=beta)[0] +def _build_einsum_views(tensors: str, dimension_dict: dict) -> List[np.ndarray]: + """ + Function taken and adjusted from opt_einsum package version 3.3.0 following unexpected removal in vesion 3.4.0. + Reference: https://github.com/dgasmith/opt_einsum/blob/v3.3.0/opt_einsum/helpers.py#L18 + """ + views = [] + terms = tensors.split('->')[0].split(',') + for term in terms: + dims = [dimension_dict[x] for x in term] + views.append(np.random.rand(*dims)) + return views + + def _create_einsum_internal(sdfg: SDFG, state: SDFGState, einsum_string: str, @@ -231,7 +246,7 @@ def _create_einsum_internal(sdfg: SDFG, # Create optimal contraction path # noinspection PyTypeChecker - _, path_info = oe.contract_path(einsum_string, *oe.helpers.build_views(einsum_string, chardict)) + _, path_info = oe.contract_path(einsum_string, *_build_einsum_views(einsum_string, chardict)) input_nodes = nodes or {arr: state.add_read(arr) for arr in arrays} result_node = None diff --git a/dace/frontend/python/astutils.py b/dace/frontend/python/astutils.py index c9a400e5f1..425e94cd9f 100644 --- a/dace/frontend/python/astutils.py +++ b/dace/frontend/python/astutils.py @@ -384,6 +384,48 @@ def negate_expr(node): return ast.fix_missing_locations(newexpr) +def and_expr(node_a, node_b): + """ Generates the logical AND of two AST expressions. + """ + if type(node_a) is not type(node_b): + raise ValueError('Node types do not match') + + # Support for SymPy expressions + if isinstance(node_a, sympy.Basic): + return sympy.And(node_a, node_b) + # Support for numerical constants + if isinstance(node_a, (numbers.Number, numpy.bool_)): + return str(node_a and node_b) + # Support for strings (most likely dace.Data.Scalar names) + if isinstance(node_a, str): + return f'({node_a}) and ({node_b})' + + from dace.properties import CodeBlock # Avoid import loop + if isinstance(node_a, CodeBlock): + node_a = node_a.code + node_b = node_b.code + + if hasattr(node_a, "__len__"): + if len(node_a) > 1: + raise ValueError("and_expr only expects single expressions, got: {}".format(node_a)) + if len(node_b) > 1: + raise ValueError("and_expr only expects single expressions, got: {}".format(node_b)) + expr_a = node_a[0] + expr_b = node_b[0] + else: + expr_a = node_a + expr_b = node_b + + if isinstance(expr_a, ast.Expr): + expr_a = expr_a.value + if isinstance(expr_b, ast.Expr): + expr_b = expr_b.value + + newexpr = ast.Expr(value=ast.BinOp(left=copy_tree(expr_a), op=ast.And, right=copy_tree(expr_b))) + newexpr = ast.copy_location(newexpr, expr_a) + return ast.fix_missing_locations(newexpr) + + def copy_tree(node: ast.AST) -> ast.AST: """ Copies an entire AST without copying the non-AST parts (e.g., constant values). diff --git a/dace/frontend/python/interface.py b/dace/frontend/python/interface.py index 790f2de506..14164054d3 100644 --- a/dace/frontend/python/interface.py +++ b/dace/frontend/python/interface.py @@ -44,6 +44,7 @@ def program(f: F, recompile: bool = True, distributed_compilation: bool = False, constant_functions=False, + use_experimental_cfg_blocks=False, **kwargs) -> Callable[..., parser.DaceProgram]: """ Entry point to a data-centric program. For methods and ``classmethod``s, use @@ -68,6 +69,8 @@ def program(f: F, not depend on internal variables are constant. This will hardcode their return values into the resulting program. + :param use_experimental_cfg_blocks: If True, makes use of experimental CFG blocks susch as loop and conditional + regions. :note: If arguments are defined with type hints, the program can be compiled ahead-of-time with ``.compile()``. """ @@ -83,7 +86,8 @@ def program(f: F, recreate_sdfg=recreate_sdfg, regenerate_code=regenerate_code, recompile=recompile, - distributed_compilation=distributed_compilation) + distributed_compilation=distributed_compilation, + use_experimental_cfg_blocks=use_experimental_cfg_blocks) function = program diff --git a/dace/frontend/python/newast.py b/dace/frontend/python/newast.py index 60469919f5..0d40e13282 100644 --- a/dace/frontend/python/newast.py +++ b/dace/frontend/python/newast.py @@ -3,7 +3,6 @@ from collections import OrderedDict import copy import itertools -import inspect import networkx as nx import re import sys @@ -25,14 +24,14 @@ from dace.frontend.python.astutils import ExtNodeVisitor, ExtNodeTransformer from dace.frontend.python.astutils import rname from dace.frontend.python import nested_call, replacements, preprocessing -from dace.frontend.python.memlet_parser import (DaceSyntaxError, parse_memlet, pyexpr_to_symbolic, ParseMemlet, - inner_eval_ast, MemletExpr) -from dace.sdfg import nodes, utils as sdutil +from dace.frontend.python.memlet_parser import DaceSyntaxError, parse_memlet, ParseMemlet, inner_eval_ast, MemletExpr +from dace.sdfg import nodes from dace.sdfg.propagation import propagate_memlet, propagate_subset, propagate_states from dace.memlet import Memlet from dace.properties import LambdaProperty, CodeBlock from dace.sdfg import SDFG, SDFGState -from dace.sdfg.state import BreakBlock, ContinueBlock, ControlFlowBlock, FunctionCallRegion, LoopRegion, ControlFlowRegion, NamedRegion +from dace.sdfg.state import (BreakBlock, ConditionalBlock, ContinueBlock, ControlFlowBlock, FunctionCallRegion, + LoopRegion, ControlFlowRegion, NamedRegion) from dace.sdfg.replace import replace_datadesc_names from dace.symbolic import pystr_to_symbolic, inequal_symbols @@ -1301,7 +1300,7 @@ def _views_to_data(state: SDFGState, nodes: List[dace.nodes.AccessNode]) -> List return new_nodes # Map view access nodes to their respective data - for state in self.sdfg.states(): + for state in self.sdfg.all_states(): # NOTE: We need to support views of views nodes = list(state.data_nodes()) while nodes: @@ -2371,7 +2370,7 @@ def visit_For(self, node: ast.For): extra_symbols=extra_syms, parent=loop_region, unconnected_last_block=False) loop_region.start_block = loop_region.node_id(first_subblock) - + self._connect_break_blocks(loop_region) # Handle else clause if node.orelse: # Continue visiting body @@ -2509,14 +2508,17 @@ def visit_While(self, node: ast.While): self._generate_orelse(loop_region, postloop_block) self.last_block = loop_region + self._connect_break_blocks(loop_region) + + def _connect_break_blocks(self, loop_region: LoopRegion): + for node, parent in loop_region.all_nodes_recursive(lambda n, _: not isinstance(n, (LoopRegion, SDFGState))): + if isinstance(node, BreakBlock): + for in_edge in parent.in_edges(node): + in_edge.data.assignments['__dace_did_break_' + loop_region.label] = '1' def _generate_orelse(self, loop_region: LoopRegion, postloop_block: ControlFlowBlock): - did_break_symbol = 'did_break_' + loop_region.label + did_break_symbol = '__dace_did_break_' + loop_region.label self.sdfg.add_symbol(did_break_symbol, dace.int32) - for n in loop_region.nodes(): - if isinstance(n, BreakBlock): - for iedge in loop_region.in_edges(n): - iedge.data.assignments[did_break_symbol] = '1' for iedge in self.cfg_target.in_edges(loop_region): iedge.data.assignments[did_break_symbol] = '0' oedges = self.cfg_target.out_edges(loop_region) @@ -2525,61 +2527,59 @@ def _generate_orelse(self, loop_region: LoopRegion, postloop_block: ControlFlowB intermediate = self.cfg_target.add_state(f'{loop_region.label}_normal_exit') self.cfg_target.add_edge(loop_region, intermediate, - dace.InterstateEdge(condition=f"(not {did_break_symbol} == 1)")) + dace.InterstateEdge(condition=f'(not {did_break_symbol} == 1)')) oedge = oedges[0] self.cfg_target.add_edge(intermediate, oedge.dst, copy.deepcopy(oedge.data)) self.cfg_target.remove_edge(oedge) - self.cfg_target.add_edge(loop_region, postloop_block, dace.InterstateEdge(condition=f"{did_break_symbol} == 1")) + self.cfg_target.add_edge(loop_region, postloop_block, dace.InterstateEdge(condition=f'{did_break_symbol} == 1')) + + def _has_loop_ancestor(self, node: ControlFlowBlock) -> bool: + while node is not None and node is not self.sdfg: + if isinstance(node, LoopRegion): + return True + node = node.parent_graph + return False + def visit_Break(self, node: ast.Break): - if isinstance(self.cfg_target, LoopRegion): - self._on_block_added(self.cfg_target.add_break(f'break_{self.cfg_target.label}_{node.lineno}')) - else: - error_msg = "'break' is only supported inside loops " - if self.nested: - error_msg += ("('break' is not supported in Maps and cannot be used in nested DaCe program calls to " - " break out of loops of outer scopes)") - raise DaceSyntaxError(self, node, error_msg) + if not self._has_loop_ancestor(self.cfg_target): + raise DaceSyntaxError(self, node, "Break block outside loop region") + break_block = BreakBlock(f'break_{node.lineno}') + self.cfg_target.add_node(break_block, ensure_unique_name=True) + self._on_block_added(break_block) def visit_Continue(self, node: ast.Continue): - if isinstance(self.cfg_target, LoopRegion): - self._on_block_added(self.cfg_target.add_continue(f'continue_{self.cfg_target.label}_{node.lineno}')) - else: - error_msg = ("'continue' is only supported inside loops ") - if self.nested: - error_msg += ("('continue' is not supported in Maps and cannot be used in nested DaCe program calls to " - " continue loops of outer scopes)") - raise DaceSyntaxError(self, node, error_msg) + if not self._has_loop_ancestor(self.cfg_target): + raise DaceSyntaxError(self, node, 'Continue block outside loop region') + continue_block = ContinueBlock(f'continue_{node.lineno}') + self.cfg_target.add_node(continue_block, ensure_unique_name=True) + self._on_block_added(continue_block) def visit_If(self, node: ast.If): - # Add a guard state - self._add_state('if_guard') - self.last_block.debuginfo = self.current_lineinfo - # Generate conditions - cond, cond_else, _ = self._visit_test(node.test) + cond, _, _ = self._visit_test(node.test) - # Visit recursively - laststate, first_if_state, last_if_state, return_stmt = \ - self._recursive_visit(node.body, 'if', node.lineno, self.cfg_target, True) - end_if_state = self.last_block + # Add conditional region + cond_block = ConditionalBlock(f'if_{node.lineno}') + self.cfg_target.add_node(cond_block) + self._on_block_added(cond_block) - # Connect the states - self.cfg_target.add_edge(laststate, first_if_state, dace.InterstateEdge(cond)) - self.cfg_target.add_edge(last_if_state, end_if_state, dace.InterstateEdge(condition=f"{not return_stmt}")) + if_body = ControlFlowRegion(cond_block.label + '_body', sdfg=self.sdfg) + cond_block.branches.append((CodeBlock(cond), if_body)) + if_body.parent_graph = self.cfg_target + + # Visit recursively + self._recursive_visit(node.body, 'if', node.lineno, if_body, False) # Process 'else'/'elif' statements if len(node.orelse) > 0: + else_body = ControlFlowRegion(f'{cond_block.label}_else_{node.orelse[0].lineno}', + sdfg=self.sdfg) + #cond_block.branches.append((CodeBlock(cond_else), else_body)) + cond_block.branches.append((None, else_body)) + else_body.parent_graph = self.cfg_target # Visit recursively - _, first_else_state, last_else_state, return_stmt = \ - self._recursive_visit(node.orelse, 'else', node.lineno, self.cfg_target, False) - - # Connect the states - self.cfg_target.add_edge(laststate, first_else_state, dace.InterstateEdge(cond_else)) - self.cfg_target.add_edge(last_else_state, end_if_state, dace.InterstateEdge(condition=f"{not return_stmt}")) - else: - self.cfg_target.add_edge(laststate, end_if_state, dace.InterstateEdge(cond_else)) - self.last_block = end_if_state + self._recursive_visit(node.orelse, 'else', node.lineno, else_body, False) def _parse_tasklet(self, state: SDFGState, node: TaskletType, name=None): diff --git a/dace/frontend/python/parser.py b/dace/frontend/python/parser.py index e55829933c..b0ef56907f 100644 --- a/dace/frontend/python/parser.py +++ b/dace/frontend/python/parser.py @@ -494,8 +494,9 @@ def _parse(self, args, kwargs, simplify=None, save=False, validate=False) -> SDF sdfg, cached = self._generate_pdp(args, kwargs, simplify=simplify) if not self.use_experimental_cfg_blocks: - sdutils.inline_loop_blocks(sdfg) - sdutils.inline_control_flow_regions(sdfg) + for nsdfg in sdfg.all_sdfgs_recursive(): + sdutils.inline_conditional_blocks(nsdfg) + sdutils.inline_control_flow_regions(nsdfg) sdfg.using_experimental_blocks = self.use_experimental_cfg_blocks # Apply simplification pass automatically diff --git a/dace/sdfg/analysis/cfg.py b/dace/sdfg/analysis/cfg.py index 1d5b1e50eb..c96ef5aff0 100644 --- a/dace/sdfg/analysis/cfg.py +++ b/dace/sdfg/analysis/cfg.py @@ -6,7 +6,7 @@ import sympy as sp from typing import Dict, Iterator, List, Optional, Set -from dace.sdfg.state import ControlFlowBlock, ControlFlowRegion +from dace.sdfg.state import ConditionalBlock, ControlFlowBlock, ControlFlowRegion def acyclic_dominance_frontier(cfg: ControlFlowRegion, idom=None) -> Dict[ControlFlowBlock, Set[ControlFlowBlock]]: @@ -374,6 +374,13 @@ def blockorder_topological_sort(cfg: ControlFlowRegion, yield block if recursive: yield from blockorder_topological_sort(block, recursive, ignore_nonstate_blocks) + elif isinstance(block, ConditionalBlock): + if not ignore_nonstate_blocks: + yield block + for _, branch in block.branches: + if not ignore_nonstate_blocks: + yield branch + yield from blockorder_topological_sort(branch, recursive, ignore_nonstate_blocks) elif isinstance(block, SDFGState): yield block else: diff --git a/dace/sdfg/analysis/cutout.py b/dace/sdfg/analysis/cutout.py index 50272167bb..5d2eae7c6f 100644 --- a/dace/sdfg/analysis/cutout.py +++ b/dace/sdfg/analysis/cutout.py @@ -13,7 +13,7 @@ from dace.sdfg import nodes as nd, SDFG, SDFGState, utils as sdutil, InterstateEdge from dace.memlet import Memlet from dace.sdfg.graph import Edge, MultiConnectorEdge -from dace.sdfg.state import StateSubgraphView, SubgraphView +from dace.sdfg.state import ControlFlowBlock, StateSubgraphView, SubgraphView from dace.transformation.transformation import (MultiStateTransformation, PatternTransformation, SubgraphTransformation, @@ -321,7 +321,8 @@ def singlestate_cutout(cls, @classmethod def multistate_cutout(cls, *states: SDFGState, - make_side_effects_global: bool = True) -> Union['SDFGCutout', SDFG]: + make_side_effects_global: bool = True, + override_start_block: Optional[ControlFlowBlock] = None) -> Union['SDFGCutout', SDFG]: """ Cut out a multi-state subgraph from an SDFG to run separately for localized testing or optimization. @@ -336,6 +337,9 @@ def multistate_cutout(cls, :param make_side_effects_global: If True, all transient data containers which are read inside the cutout but may be written to _before_ the cutout, or any data containers which are written to inside the cutout but may be read _after_ the cutout, are made global. + :param override_start_block: If set, explicitly force a given control flow block to be the start block. If left + None (default), the start block is automatically determined based on domination + relationships in the original graph. :return: The created SDFGCutout or the original SDFG where no smaller cutout could be obtained. """ create_element = copy.deepcopy @@ -350,10 +354,13 @@ def multistate_cutout(cls, # Determine the start state and ensure there IS a unique start state. If there is no unique start state, keep # adding states from the predecessor frontier in the state machine until a unique start state can be determined. start_state: Optional[SDFGState] = None - for state in cutout_states: - if state == sdfg.start_state: - start_state = state - break + if override_start_block is not None: + start_state = override_start_block + else: + for state in cutout_states: + if state == sdfg.start_state: + start_state = state + break if start_state is None: bfs_queue: Deque[Tuple[Set[SDFGState], Set[Edge[InterstateEdge]]]] = deque() diff --git a/dace/sdfg/sdfg.py b/dace/sdfg/sdfg.py index 5e5df1b0a2..71b37ea7b7 100644 --- a/dace/sdfg/sdfg.py +++ b/dace/sdfg/sdfg.py @@ -23,7 +23,7 @@ from dace.config import Config from dace.frontend.python import astutils from dace.sdfg import nodes as nd -from dace.sdfg.state import ControlFlowBlock, SDFGState, ControlFlowRegion +from dace.sdfg.state import ConditionalBlock, ControlFlowBlock, SDFGState, ControlFlowRegion from dace.distr_types import ProcessGrid, SubArray, RedistrArray from dace.dtypes import validate_name from dace.properties import (DebugInfoProperty, EnumProperty, ListProperty, make_properties, Property, CodeProperty, @@ -1512,6 +1512,17 @@ def shared_transients(self, check_toplevel: bool = True, include_nested_data: bo seen[sym] = interstate_edge shared.append(sym) + # The same goes for the conditions of conditional blocks. + for block in self.all_control_flow_blocks(): + if isinstance(block, ConditionalBlock): + for cond, _ in block.branches: + if cond is not None: + cond_symbols = set(map(str, dace.symbolic.symbols_in_ast(cond.code[0]))) + for sym in cond_symbols: + if sym in self.arrays and self.arrays[sym].transient: + seen[sym] = block + shared.append(sym) + # If transient is accessed in more than one state, it is shared for state in self.states(): for node in state.data_nodes(): diff --git a/dace/sdfg/state.py b/dace/sdfg/state.py index e8a8161747..8d443e6beb 100644 --- a/dace/sdfg/state.py +++ b/dace/sdfg/state.py @@ -11,7 +11,10 @@ from typing import (TYPE_CHECKING, Any, AnyStr, Callable, Dict, Iterable, Iterator, List, Optional, Set, Tuple, Union, overload) +import sympy + import dace +from dace.frontend.python import astutils import dace.serialize from dace import data as dt from dace import dtypes @@ -22,8 +25,8 @@ from dace.properties import (CodeBlock, DebugInfoProperty, DictProperty, EnumProperty, Property, SubsetProperty, SymbolicProperty, CodeProperty, make_properties) from dace.sdfg import nodes as nd -from dace.sdfg.graph import (MultiConnectorEdge, OrderedMultiDiConnectorGraph, SubgraphView, OrderedDiGraph, Edge, - generate_element_id) +from dace.sdfg.graph import (MultiConnectorEdge, NodeNotFoundError, OrderedMultiDiConnectorGraph, SubgraphView, + OrderedDiGraph, Edge, generate_element_id) from dace.sdfg.propagation import propagate_memlet from dace.sdfg.validation import validate_state from dace.subsets import Range, Subset @@ -1140,6 +1143,11 @@ def set_default_lineinfo(self, lineinfo: dace.dtypes.DebugInfo): """ self._default_lineinfo = lineinfo + def view(self): + from dace.sdfg.analysis.cutout import SDFGCutout + cutout = SDFGCutout.multistate_cutout(self, make_side_effects_global=False, override_start_block=self) + cutout.view() + def to_json(self, parent=None): tmp = { 'type': self.__class__.__name__, @@ -2561,21 +2569,21 @@ def inline(self) -> Tuple[bool, Any]: """ parent = self.parent_graph if parent: - end_state = parent.add_state(self.label + '_end') # Add all region states and make sure to keep track of all the ones that need to be connected in the end. to_connect: Set[SDFGState] = set() block_to_state_map: Dict[ControlFlowBlock, SDFGState] = dict() for node in self.nodes(): node.label = self.label + '_' + node.label - parent.add_node(node, ensure_unique_name=True) if isinstance(node, ReturnBlock) and isinstance(parent, dace.SDFG): # If a return block is being inlined into an SDFG, convert it into a regular state. Otherwise it # remains as-is. newnode = parent.add_state(node.label) block_to_state_map[node] = newnode - elif self.out_degree(node) == 0: - to_connect.add(node) + else: + parent.add_node(node, ensure_unique_name=True) + if self.out_degree(node) == 0 and not isinstance(node, (BreakBlock, ContinueBlock, ReturnBlock)): + to_connect.add(node) # Add all region edges. for edge in self.edges(): @@ -2587,14 +2595,26 @@ def inline(self) -> Tuple[bool, Any]: for b_edge in parent.in_edges(self): parent.add_edge(b_edge.src, self.start_block, b_edge.data) parent.remove_edge(b_edge) - # Redirect all edges exiting the region to instead exit the end state. - for a_edge in parent.out_edges(self): - parent.add_edge(end_state, a_edge.dst, a_edge.data) - parent.remove_edge(a_edge) - - for node in to_connect: - parent.add_edge(node, end_state, dace.InterstateEdge()) - + + end_state = None + if len(to_connect) > 0: + end_state = parent.add_state(self.label + '_end') + # Redirect all edges exiting the region to instead exit the end state. + for a_edge in parent.out_edges(self): + parent.add_edge(end_state, a_edge.dst, a_edge.data) + parent.remove_edge(a_edge) + + for node in to_connect: + parent.add_edge(node, end_state, dace.InterstateEdge()) + else: + # TODO: Move this to dead state elimination. + dead_blocks = [succ for succ in parent.successors(self) if parent.in_degree(succ) == 1] + while dead_blocks: + layer = list(dead_blocks) + dead_blocks.clear() + for u in layer: + dead_blocks.extend([succ for succ in parent.successors(u) if parent.in_degree(succ) == 1]) + parent.remove_node(u) # Remove the original control flow region (self) from the parent graph. parent.remove_node(self) @@ -2741,6 +2761,9 @@ def all_control_flow_regions(self, recursive=False) -> Iterator['ControlFlowRegi yield from node.sdfg.all_control_flow_regions(recursive=recursive) elif isinstance(block, ControlFlowRegion): yield from block.all_control_flow_regions(recursive=recursive) + elif isinstance(block, ConditionalBlock): + for _, branch in block.branches: + yield from branch.all_control_flow_regions(recursive=recursive) def all_sdfgs_recursive(self) -> Iterator['SDFG']: """ Iterate over this and all nested SDFGs. """ @@ -2755,6 +2778,9 @@ def all_states(self) -> Iterator[SDFGState]: yield block elif isinstance(block, ControlFlowRegion): yield from block.all_states() + elif isinstance(block, ConditionalBlock): + for _, region in block.branches: + yield from region.all_states() def all_control_flow_blocks(self, recursive=False) -> Iterator[ControlFlowBlock]: """ Iterate over all control flow blocks in this control flow graph. """ @@ -2788,7 +2814,7 @@ def _used_symbols_internal(self, for block in ordered_blocks: state_symbols = set() - if isinstance(block, ControlFlowRegion): + if isinstance(block, (ControlFlowRegion, ConditionalBlock)): b_free_syms, b_defined_syms, b_used_before_syms = block._used_symbols_internal(all_symbols, defined_syms, free_syms, @@ -3020,7 +3046,7 @@ def inline(self) -> Tuple[bool, Any]: # and return are inlined correctly. def recursive_inline_cf_regions(region: ControlFlowRegion) -> None: for block in region.nodes(): - if isinstance(block, ControlFlowRegion) and not isinstance(block, LoopRegion): + if (isinstance(block, ControlFlowRegion) or isinstance(block, ConditionalBlock)) and not isinstance(block, LoopRegion): recursive_inline_cf_regions(block) block.inline() recursive_inline_cf_regions(self) @@ -3189,16 +3215,165 @@ def has_return(self) -> bool: return True return False + +@make_properties +class ConditionalBlock(ControlFlowBlock, ControlGraphView): + + _branches: List[Tuple[Optional[CodeBlock], ControlFlowRegion]] + + def __init__(self, label: str = '', sdfg: Optional['SDFG'] = None, parent: Optional['ControlFlowRegion'] = None): + super().__init__(label, sdfg, parent) + self._branches = [] + + def __str__(self): + return self._label + + def __repr__(self) -> str: + return f'ConditionalBlock ({self.label})' + + @property + def branches(self) -> List[Tuple[Optional[CodeBlock], ControlFlowRegion]]: + return self._branches + + def nodes(self) -> List['ControlFlowBlock']: + return [node for _, node in self._branches if node is not None] + + def edges(self) -> List[Edge['dace.sdfg.InterstateEdge']]: + return [] + + def _used_symbols_internal(self, + all_symbols: bool, + defined_syms: Optional[Set] = None, + free_syms: Optional[Set] = None, + used_before_assignment: Optional[Set] = None, + keep_defined_in_mapping: bool = False) -> Tuple[Set[str], Set[str], Set[str]]: + defined_syms = set() if defined_syms is None else defined_syms + free_syms = set() if free_syms is None else free_syms + used_before_assignment = set() if used_before_assignment is None else used_before_assignment + + for condition, region in self._branches: + if condition is not None: + free_syms |= condition.get_free_symbols(defined_syms) + b_free_symbols, b_defined_symbols, b_used_before_assignment = region._used_symbols_internal( + all_symbols, defined_syms, free_syms, used_before_assignment, keep_defined_in_mapping) + free_syms |= b_free_symbols + defined_syms |= b_defined_symbols + used_before_assignment |= b_used_before_assignment + + defined_syms -= used_before_assignment + free_syms -= defined_syms + + return free_syms, defined_syms, used_before_assignment + + def replace_dict(self, + repl: Dict[str, str], + symrepl: Optional[Dict[symbolic.SymbolicType, symbolic.SymbolicType]] = None, + replace_in_graph: bool = True, + replace_keys: bool = True): + if replace_keys: + from dace.sdfg.replace import replace_properties_dict + replace_properties_dict(self, repl, symrepl) + + for _, region in self._branches: + region.replace_dict(repl, symrepl, replace_in_graph) + + def to_json(self, parent=None): + json = super().to_json(parent) + json['branches'] = [(condition.to_json() if condition is not None else None, cfg.to_json()) + for condition, cfg in self._branches] + return json + + @classmethod + def from_json(cls, json_obj, context=None): + context = context or {'sdfg': None, 'parent_graph': None} + _type = json_obj['type'] + if _type != cls.__name__: + raise TypeError('Class type mismatch') + + ret = cls(label=json_obj['label'], sdfg=context['sdfg']) + + dace.serialize.set_properties_from_json(ret, json_obj) + + for condition, region in json_obj['branches']: + if condition is not None: + ret._branches.append((CodeBlock.from_json(condition), ControlFlowRegion.from_json(region, context))) + else: + ret._branches.append((None, ControlFlowRegion.from_json(region, context))) + return ret + + def inline(self) -> Tuple[bool, Any]: + """ + Inlines the conditional region into its parent control flow region. + + :return: True if the inlining succeeded, false otherwise. + """ + parent = self.parent_graph + if not parent: + raise RuntimeError('No top-level SDFG present to inline into') + + # Add all boilerplate states necessary for the structure. + guard_state = parent.add_state(self.label + '_guard') + end_state = parent.add_state(self.label + '_end') + + # Redirect all edges to the region to the init state. + for b_edge in parent.in_edges(self): + parent.add_edge(b_edge.src, guard_state, b_edge.data) + parent.remove_edge(b_edge) + # Redirect all edges exiting the region to instead exit the end state. + for a_edge in parent.out_edges(self): + parent.add_edge(end_state, a_edge.dst, a_edge.data) + parent.remove_edge(a_edge) + + from dace.sdfg.sdfg import InterstateEdge + else_branch = None + full_cond_expression: Optional[List[ast.AST]] = None + for condition, region in self._branches: + if condition is None: + else_branch = region + else: + if full_cond_expression is None: + full_cond_expression = condition.code[0] + else: + full_cond_expression = astutils.and_expr(full_cond_expression, condition.code[0]) + parent.add_node(region) + parent.add_edge(guard_state, region, InterstateEdge(condition=condition)) + parent.add_edge(region, end_state, InterstateEdge()) + if full_cond_expression is not None: + negative_full_cond = astutils.negate_expr(full_cond_expression) + negative_cond = CodeBlock([negative_full_cond]) + else: + negative_cond = CodeBlock('1') + + if else_branch is not None: + parent.add_node(else_branch) + parent.add_edge(guard_state, else_branch, InterstateEdge(condition=negative_cond)) + parent.add_edge(region, end_state, InterstateEdge()) + else: + parent.add_edge(guard_state, end_state, InterstateEdge(condition=negative_cond)) + + parent.remove_node(self) + + sdfg = parent if isinstance(parent, dace.SDFG) else parent.sdfg + sdfg.reset_cfg_list() + + return True, (guard_state, end_state) + + @make_properties class NamedRegion(ControlFlowRegion): + debuginfo = DebugInfoProperty() + def __init__(self, label: str, sdfg: Optional['SDFG']=None, debuginfo: Optional[dtypes.DebugInfo]=None): super().__init__(label, sdfg) self.debuginfo = debuginfo @make_properties -class FunctionCallRegion(ControlFlowRegion): +class FunctionCallRegion(NamedRegion): + arguments = DictProperty(str, str) - def __init__(self, label: str, arguments: Dict[str, str] = {}, sdfg: 'SDFG' = None): - super().__init__(label, sdfg) + + def __init__(self, label: str, arguments: Dict[str, str] = {}, sdfg: 'SDFG' = None, + debuginfo: Optional[dtypes.DebugInfo]=None): + super().__init__(label, sdfg, debuginfo) self.arguments = arguments diff --git a/dace/sdfg/utils.py b/dace/sdfg/utils.py index a90a232aeb..5b9ce1a431 100644 --- a/dace/sdfg/utils.py +++ b/dace/sdfg/utils.py @@ -13,7 +13,7 @@ from dace.sdfg.graph import MultiConnectorEdge from dace.sdfg.sdfg import SDFG from dace.sdfg.nodes import Node, NestedSDFG -from dace.sdfg.state import SDFGState, StateSubgraphView, LoopRegion, ControlFlowRegion +from dace.sdfg.state import ConditionalBlock, SDFGState, StateSubgraphView, LoopRegion, ControlFlowRegion from dace.sdfg.scope import ScopeSubgraphView from dace.sdfg import nodes as nd, graph as gr, propagation from dace import config, data as dt, dtypes, memlet as mm, subsets as sbs @@ -1262,11 +1262,10 @@ def inline_loop_blocks(sdfg: SDFG, permissive: bool = False, progress: bool = No def inline_control_flow_regions(sdfg: SDFG, permissive: bool = False, progress: bool = None) -> int: - blocks = [n for n, _ in sdfg.all_nodes_recursive() - if isinstance(n, ControlFlowRegion) and not isinstance(n, (LoopRegion, SDFG))] + blocks = [n for n, _ in sdfg.all_nodes_recursive() if isinstance(n, ControlFlowRegion)] count = 0 - for _block in optional_progressbar(reversed(blocks), title='Inlining control flow blocks', + for _block in optional_progressbar(reversed(blocks), title='Inlining control flow regions', n=len(blocks), progress=progress): block: ControlFlowRegion = _block if block.inline()[0]: @@ -1274,6 +1273,18 @@ def inline_control_flow_regions(sdfg: SDFG, permissive: bool = False, progress: return count +def inline_conditional_blocks(sdfg: SDFG, permissive: bool = False, progress: bool = None) -> int: + blocks = [n for n, _ in sdfg.all_nodes_recursive() if isinstance(n, ConditionalBlock)] + count = 0 + + for _block in optional_progressbar(reversed(blocks), title='Inlining conditional blocks', + n=len(blocks), progress=progress): + block: ConditionalBlock = _block + if block.inline()[0]: + count += 1 + + return count + def inline_sdfgs(sdfg: SDFG, permissive: bool = False, progress: bool = None, multistate: bool = True) -> int: """ diff --git a/dace/sdfg/validation.py b/dace/sdfg/validation.py index 2869743dcb..f305affb80 100644 --- a/dace/sdfg/validation.py +++ b/dace/sdfg/validation.py @@ -34,7 +34,7 @@ def validate_control_flow_region(sdfg: 'SDFG', symbols: dict, references: Set[int] = None, **context: bool): - from dace.sdfg.state import SDFGState, ControlFlowRegion + from dace.sdfg.state import SDFGState, ControlFlowRegion, ConditionalBlock from dace.sdfg.scope import is_in_scope if len(region.source_nodes()) > 1 and region.start_block is None: @@ -118,6 +118,10 @@ def validate_control_flow_region(sdfg: 'SDFG', if isinstance(edge.dst, SDFGState): validate_state(edge.dst, region.node_id(edge.dst), sdfg, symbols, initialized_transients, references, **context) + elif isinstance(edge.dst, ConditionalBlock): + for _, r in edge.dst.branches: + if r is not None: + validate_control_flow_region(sdfg, r, initialized_transients, symbols, references, **context) elif isinstance(edge.dst, ControlFlowRegion): validate_control_flow_region(sdfg, edge.dst, initialized_transients, symbols, references, **context) # End of block DFS diff --git a/tests/python_frontend/conditional_regions_test.py b/tests/python_frontend/conditional_regions_test.py new file mode 100644 index 0000000000..07e214653c --- /dev/null +++ b/tests/python_frontend/conditional_regions_test.py @@ -0,0 +1,92 @@ +# Copyright 2019-2024 ETH Zurich and the DaCe authors. All rights reserved. + +import dace +import numpy as np +from dace.sdfg.state import ConditionalBlock + + +def test_dataflow_if_check(): + + @dace.program + def dataflow_if_check(A: dace.int32[10], i: dace.int64): + if A[i] < 10: + return 0 + elif A[i] == 10: + return 10 + return 100 + + dataflow_if_check.use_experimental_cfg_blocks = True + sdfg = dataflow_if_check.to_sdfg() + + assert any(isinstance(x, ConditionalBlock) for x in sdfg.nodes()) + + A = np.zeros((10,), np.int32) + A[4] = 10 + A[5] = 100 + assert sdfg(A, 0)[0] == 0 + assert sdfg(A, 4)[0] == 10 + assert sdfg(A, 5)[0] == 100 + assert sdfg(A, 6)[0] == 0 + + +def test_nested_if_chain(): + + @dace.program + def nested_if_chain(i: dace.int64): + if i < 2: + return 0 + else: + if i < 4: + return 1 + else: + if i < 6: + return 2 + else: + if i < 8: + return 3 + else: + return 4 + + nested_if_chain.use_experimental_cfg_blocks = True + sdfg = nested_if_chain.to_sdfg() + + assert any(isinstance(x, ConditionalBlock) for x in sdfg.nodes()) + + assert nested_if_chain(0)[0] == 0 + assert nested_if_chain(2)[0] == 1 + assert nested_if_chain(4)[0] == 2 + assert nested_if_chain(7)[0] == 3 + assert nested_if_chain(15)[0] == 4 + + +def test_elif_chain(): + + @dace.program + def elif_chain(i: dace.int64): + if i < 2: + return 0 + elif i < 4: + return 1 + elif i < 6: + return 2 + elif i < 8: + return 3 + else: + return 4 + + elif_chain.use_experimental_cfg_blocks = True + sdfg = elif_chain.to_sdfg() + + assert any(isinstance(x, ConditionalBlock) for x in sdfg.nodes()) + + assert elif_chain(0)[0] == 0 + assert elif_chain(2)[0] == 1 + assert elif_chain(4)[0] == 2 + assert elif_chain(7)[0] == 3 + assert elif_chain(15)[0] == 4 + + +if __name__ == '__main__': + test_dataflow_if_check() + test_nested_if_chain() + test_elif_chain() diff --git a/tests/sdfg/conditional_region_test.py b/tests/sdfg/conditional_region_test.py new file mode 100644 index 0000000000..4e4eda3f44 --- /dev/null +++ b/tests/sdfg/conditional_region_test.py @@ -0,0 +1,94 @@ +# Copyright 2019-2024 ETH Zurich and the DaCe authors. All rights reserved. + +import numpy as np +import dace +from dace.properties import CodeBlock +from dace.sdfg.sdfg import SDFG, InterstateEdge +from dace.sdfg.state import ConditionalBlock, ControlFlowRegion +import dace.serialize + + +def test_cond_region_if(): + sdfg = dace.SDFG('regular_if') + sdfg.add_array("A", (1,), dace.float32) + sdfg.add_symbol("i", dace.int32) + state0 = sdfg.add_state('state0', is_start_block=True) + + if1 = ConditionalBlock("if1") + sdfg.add_node(if1) + sdfg.add_edge(state0, if1, InterstateEdge()) + + if_body = ControlFlowRegion("if_body", sdfg=sdfg) + if1.branches.append((CodeBlock("i == 1"), if_body)) + + state1 = if_body.add_state("state1", is_start_block=True) + acc_a = state1.add_access('A') + t1 = state1.add_tasklet("t1", None, {"a"}, "a = 100") + state1.add_edge(t1, 'a', acc_a, None, dace.Memlet('A[0]')) + + assert sdfg.is_valid() + A = np.ones((1,), dtype=np.float32) + sdfg(i=1, A=A) + assert A[0] == 100 + + A = np.ones((1,), dtype=np.float32) + sdfg(i=0, A=A) + assert A[0] == 1 + +def test_serialization(): + sdfg = SDFG("test_serialization") + cond_region = ConditionalBlock("cond_region") + sdfg.add_node(cond_region, is_start_block=True) + sdfg.add_symbol("i", dace.int32) + + for j in range(10): + cfg = ControlFlowRegion(f"cfg_{j}", sdfg) + cond_region.branches.append((CodeBlock(f"i == {j}"), cfg)) + + assert sdfg.is_valid() + + new_sdfg = SDFG.from_json(sdfg.to_json()) + assert new_sdfg.is_valid() + new_cond_region: ConditionalBlock = new_sdfg.nodes()[0] + for j in range(10): + condition, cfg = new_cond_region.branches[j] + assert condition == CodeBlock(f"i == {j}") + assert cfg.label == f"cfg_{j}" + +def test_if_else(): + sdfg = dace.SDFG('regular_if_else') + sdfg.add_array("A", (1,), dace.float32) + sdfg.add_symbol("i", dace.int32) + state0 = sdfg.add_state('state0', is_start_block=True) + + if1 = ConditionalBlock("if1") + sdfg.add_node(if1) + sdfg.add_edge(state0, if1, InterstateEdge()) + + if_body = ControlFlowRegion("if_body", sdfg=sdfg) + state1 = if_body.add_state("state1", is_start_block=True) + acc_a = state1.add_access('A') + t1 = state1.add_tasklet("t1", None, {"a"}, "a = 100") + state1.add_edge(t1, 'a', acc_a, None, dace.Memlet('A[0]')) + if1.branches.append((CodeBlock("i == 1"), if_body)) + + else_body = ControlFlowRegion("else_body", sdfg=sdfg) + state2 = else_body.add_state("state1", is_start_block=True) + acc_a2 = state2.add_access('A') + t2 = state2.add_tasklet("t2", None, {"a"}, "a = 200") + state2.add_edge(t2, 'a', acc_a2, None, dace.Memlet('A[0]')) + if1.branches.append((CodeBlock("i == 0"), else_body)) + + assert sdfg.is_valid() + A = np.ones((1,), dtype=np.float32) + sdfg(i=1, A=A) + assert A[0] == 100 + + A = np.ones((1,), dtype=np.float32) + sdfg(i=0, A=A) + assert A[0] == 200 + +if __name__ == '__main__': + test_cond_region_if() + test_serialization() + test_if_else() From 74a31cb71e624fe541a32b3b097e46b193d08114 Mon Sep 17 00:00:00 2001 From: BenWeber42 Date: Sun, 29 Sep 2024 09:16:14 +0200 Subject: [PATCH 61/76] Renamed `graph.bfs_edges` to `edge_bfs` (#1604) `networkx` has similar methods `bfs_edges` and `edge_bfs` for their graph classes. There is a slight and documented difference between the two: https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.traversal.edgebfs.edge_bfs.html#networkx.algorithms.traversal.edgebfs.edge_bfs Unfortunately, our `graph.bfs_edges` behaves differently to `networkx`'s `bfs_edges`. In fact, it behaves exactly like `networkx`'s `edge_bfs`. To avoid future confusion (just happened to me), I propose renaming our `bfs_edges` to `edge_bfs` to be consistent with `networkx`. --- dace/sdfg/graph.py | 7 +++++-- dace/sdfg/sdfg.py | 4 ++-- .../dataflow/gpu_transform_local_storage.py | 4 ++-- dace/transformation/helpers.py | 2 +- tests/graph_test.py | 20 +++++++++---------- 5 files changed, 20 insertions(+), 17 deletions(-) diff --git a/dace/sdfg/graph.py b/dace/sdfg/graph.py index 778027f663..5ec4bbb029 100644 --- a/dace/sdfg/graph.py +++ b/dace/sdfg/graph.py @@ -310,9 +310,12 @@ def __len__(self) -> int: """ Returns the total number of nodes in the graph (nx compatibility)""" return self.number_of_nodes() - def bfs_edges(self, node: Union[NodeT, Sequence[NodeT]], reverse: bool = False) -> Iterable[Edge[EdgeT]]: + def edge_bfs(self, node: Union[NodeT, Sequence[NodeT]], reverse: bool = False) -> Iterable[Edge[EdgeT]]: """Returns a generator over edges in the graph originating from the - passed node in BFS order""" + passed node in BFS order. + + :note: All reachable edges are yielded including back edges + """ if isinstance(node, (tuple, list)): queue = deque(node) else: diff --git a/dace/sdfg/sdfg.py b/dace/sdfg/sdfg.py index 71b37ea7b7..77ad8b31b5 100644 --- a/dace/sdfg/sdfg.py +++ b/dace/sdfg/sdfg.py @@ -2403,12 +2403,12 @@ def fill_scope_connectors(self): def predecessor_state_transitions(self, state): """ Yields paths (lists of edges) that the SDFG can pass through before computing the given state. """ - return self.bfs_edges(state, reverse=True) + return self.edge_bfs(state, reverse=True) def predecessor_states(self, state): """ Returns a list of unique states that the SDFG can pass through before computing the given state. """ - return (e.src for e in self.bfs_edges(state, reverse=True)) + return (e.src for e in self.edge_bfs(state, reverse=True)) def validate(self, references: Optional[Set[int]] = None, **context: bool) -> None: validate_sdfg(self, references, **context) diff --git a/dace/transformation/dataflow/gpu_transform_local_storage.py b/dace/transformation/dataflow/gpu_transform_local_storage.py index bac603d14e..7957175e70 100644 --- a/dace/transformation/dataflow/gpu_transform_local_storage.py +++ b/dace/transformation/dataflow/gpu_transform_local_storage.py @@ -376,7 +376,7 @@ def apply(self, graph: SDFGState, sdfg): graph.add_edge(node, None, edge.dst, edge.dst_conn, newmemlet) - for e in graph.bfs_edges(edge.dst, reverse=False): + for e in graph.edge_bfs(edge.dst, reverse=False): parent, _, _child, _, memlet = e if parent != edge.dst and not in_scope(graph, parent, edge.dst): break @@ -454,7 +454,7 @@ def apply(self, graph: SDFGState, sdfg): graph.add_edge(edge.src, edge.src_conn, node, None, newmemlet) end_node = graph.entry_node(edge.src) - for e in graph.bfs_edges(edge.src, reverse=True): + for e in graph.edge_bfs(edge.src, reverse=True): parent, _, _child, _, memlet = e if parent == end_node: break diff --git a/dace/transformation/helpers.py b/dace/transformation/helpers.py index 0d583236cb..74a3d2ee12 100644 --- a/dace/transformation/helpers.py +++ b/dace/transformation/helpers.py @@ -709,7 +709,7 @@ def state_fission_after(state: SDFGState, node: nodes.Node, label: Optional[str] orig_edges.add(e) # Collect nodes_to_move - for edge in state.bfs_edges(node): + for edge in state.edge_bfs(node): nodes_to_move.add(edge.dst) orig_edges.add(edge) diff --git a/tests/graph_test.py b/tests/graph_test.py index 9313b2e3cc..6d6b5f49ac 100644 --- a/tests/graph_test.py +++ b/tests/graph_test.py @@ -95,16 +95,16 @@ def test_ordered_multidigraph(self): e6 = h.add_edge(6, 7, None) e7 = h.add_edge(6, 8, None) e8 = h.add_edge(2, 6, None) - bfs_edges = h.bfs_edges(0) - self.assertEqual(next(bfs_edges), e0) - self.assertEqual(next(bfs_edges), e1) - self.assertEqual(next(bfs_edges), e2) - self.assertEqual(next(bfs_edges), e4) - self.assertEqual(next(bfs_edges), e5) - self.assertEqual(next(bfs_edges), e8) - self.assertEqual(next(bfs_edges), e3) - self.assertEqual(next(bfs_edges), e6) - self.assertEqual(next(bfs_edges), e7) + edge_bfs = h.edge_bfs(0) + self.assertEqual(next(edge_bfs), e0) + self.assertEqual(next(edge_bfs), e1) + self.assertEqual(next(edge_bfs), e2) + self.assertEqual(next(edge_bfs), e4) + self.assertEqual(next(edge_bfs), e5) + self.assertEqual(next(edge_bfs), e8) + self.assertEqual(next(edge_bfs), e3) + self.assertEqual(next(edge_bfs), e6) + self.assertEqual(next(edge_bfs), e7) def test_dfs_edges(self): From 51871a792c2a2fd542b461b56855848abd05c09a Mon Sep 17 00:00:00 2001 From: alexnick83 <31545860+alexnick83@users.noreply.github.com> Date: Thu, 3 Oct 2024 19:15:59 +0200 Subject: [PATCH 62/76] WCRToAugAssign (#1098) Introduces a transformation that converts WCR to an augmented assignment. --------- Co-authored-by: Philipp Schaad --- dace/transformation/dataflow/__init__.py | 2 +- .../transformation/dataflow/wcr_conversion.py | 91 +++++++++++++++++-- .../transformations/wcr_to_augassign_test.py | 45 +++++++++ 3 files changed, 131 insertions(+), 7 deletions(-) create mode 100644 tests/transformations/wcr_to_augassign_test.py diff --git a/dace/transformation/dataflow/__init__.py b/dace/transformation/dataflow/__init__.py index 4ed7fd6283..6fa274f041 100644 --- a/dace/transformation/dataflow/__init__.py +++ b/dace/transformation/dataflow/__init__.py @@ -38,7 +38,7 @@ RedundantArrayCopying3) from .merge_arrays import InMergeArrays, OutMergeArrays, MergeSourceSinkArrays from .prune_connectors import PruneConnectors, PruneSymbols -from .wcr_conversion import AugAssignToWCR +from .wcr_conversion import AugAssignToWCR, WCRToAugAssign from .tasklet_fusion import TaskletFusion from .trivial_tasklet_elimination import TrivialTaskletElimination diff --git a/dace/transformation/dataflow/wcr_conversion.py b/dace/transformation/dataflow/wcr_conversion.py index 60da5d3939..443f7734c8 100644 --- a/dace/transformation/dataflow/wcr_conversion.py +++ b/dace/transformation/dataflow/wcr_conversion.py @@ -1,13 +1,14 @@ -# Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved. +# Copyright 2019-2024 ETH Zurich and the DaCe authors. All rights reserved. """ Transformations to convert subgraphs to write-conflict resolutions. """ import ast +import copy import re import copy -from dace import registry, nodes, dtypes, Memlet -from dace.transformation import transformation, helpers as xfh -from dace.sdfg import graph as gr, utils as sdutil -from dace import SDFG, SDFGState -from dace.sdfg.state import StateSubgraphView +from dace import nodes, dtypes, Memlet +from dace.frontend.python import astutils +from dace.transformation import transformation +from dace.sdfg import utils as sdutil +from dace import Memlet, SDFG, SDFGState from dace.transformation import helpers from dace.sdfg.propagation import propagate_memlets_state @@ -268,3 +269,81 @@ def apply(self, state: SDFGState, sdfg: SDFG): outedge.data.wcr = f'lambda a,b: a {op} b' # At this point we are leading to an access node again and can # traverse further up + + +class WCRToAugAssign(transformation.SingleStateTransformation): + """ + Converts a tasklet with a write-conflict resolution to an augmented assignment subgraph (e.g., "a = a + b"). + """ + tasklet = transformation.PatternNode(nodes.Tasklet) + output = transformation.PatternNode(nodes.AccessNode) + map_exit = transformation.PatternNode(nodes.MapExit) + + _EXPRESSIONS = ['+', '-', '*', '^', '%'] #, '/'] + _EXPR_MAP = {'-': ('+', '-({expr})'), '/': ('*', '((decltype({expr}))1)/({expr})')} + _PYOP_MAP = {ast.Add: '+', ast.Sub: '-', ast.Mult: '*', ast.BitXor: '^', ast.Mod: '%', ast.Div: '/'} + + @classmethod + def expressions(cls): + return [ + sdutil.node_path_graph(cls.tasklet, cls.output), + sdutil.node_path_graph(cls.tasklet, cls.map_exit, cls.output) + ] + + def can_be_applied(self, graph, expr_index, sdfg, permissive=False): + if expr_index == 0: + edges = graph.edges_between(self.tasklet, self.output) + else: + edges = graph.edges_between(self.tasklet, self.map_exit) + if len(edges) != 1: + return False + if edges[0].data.wcr is None: + return False + + # If the access subset on the WCR edge is overapproximated (i.e., the access may be dynamic), we do not support + # swapping to an augmented assignment pattern with this transformation. + if edges[0].data.subset.num_elements() > edges[0].data.volume or edges[0].data.dynamic is True: + return False + + return True + + def apply(self, state: SDFGState, sdfg: SDFG): + if self.expr_index == 0: + edge = state.edges_between(self.tasklet, self.output)[0] + wcr = ast.parse(edge.data.wcr).body[0].value.body + if isinstance(wcr, ast.BinOp): + wcr.left.id = '__in1' + wcr.right.id = '__in2' + code = astutils.unparse(wcr) + else: + raise NotImplementedError + edge.data.wcr = None + in_access = state.add_access(self.output.data) + new_tasklet = state.add_tasklet('augassign', {'__in1', '__in2'}, {'__out'}, f"__out = {code}") + scal_name, scal_desc = sdfg.add_scalar('tmp', sdfg.arrays[self.output.data].dtype, transient=True, + find_new_name=True) + state.add_edge(self.tasklet, edge.src_conn, new_tasklet, '__in1', Memlet.from_array(scal_name, scal_desc)) + state.add_edge(in_access, None, new_tasklet, '__in2', copy.deepcopy(edge.data)) + state.add_edge(new_tasklet, '__out', self.output, edge.dst_conn, edge.data) + state.remove_edge(edge) + else: + edge = state.edges_between(self.tasklet, self.map_exit)[0] + map_entry = state.entry_node(self.map_exit) + wcr = ast.parse(edge.data.wcr).body[0].value.body + if isinstance(wcr, ast.BinOp): + wcr.left.id = '__in1' + wcr.right.id = '__in2' + code = astutils.unparse(wcr) + else: + raise NotImplementedError + for e in state.memlet_path(edge): + e.data.wcr = None + in_access = state.add_access(self.output.data) + new_tasklet = state.add_tasklet('augassign', {'__in1', '__in2'}, {'__out'}, f"__out = {code}") + scal_name, scal_desc = sdfg.add_scalar('tmp', sdfg.arrays[self.output.data].dtype, transient=True, + find_new_name=True) + state.add_edge(self.tasklet, edge.src_conn, new_tasklet, '__in1', Memlet.from_array(scal_name, scal_desc)) + state.add_memlet_path(in_access, map_entry, new_tasklet, memlet=copy.deepcopy(edge.data), dst_conn='__in2') + state.add_edge(new_tasklet, '__out', self.map_exit, edge.dst_conn, edge.data) + state.remove_edge(edge) + \ No newline at end of file diff --git a/tests/transformations/wcr_to_augassign_test.py b/tests/transformations/wcr_to_augassign_test.py new file mode 100644 index 0000000000..111ef135eb --- /dev/null +++ b/tests/transformations/wcr_to_augassign_test.py @@ -0,0 +1,45 @@ +# Copyright 2019-2024 ETH Zurich and the DaCe authors. All rights reserved. +""" Tests WCRToAugAssign. """ + +import dace +import numpy as np +from dace.transformation.dataflow import WCRToAugAssign + + +def test_tasklet(): + + @dace.program + def test(): + a = np.zeros((10,)) + for i in dace.map[1:9]: + a[i-1] += 1 + return a + + sdfg = test.to_sdfg(simplify=False) + sdfg.apply_transformations(WCRToAugAssign) + + val = sdfg() + ref = test.f() + assert(np.allclose(val, ref)) + + +def test_mapped_tasklet(): + + @dace.program + def test(): + a = np.zeros((10,)) + for i in dace.map[1:9]: + a[i-1] += 1 + return a + + sdfg = test.to_sdfg(simplify=True) + sdfg.apply_transformations(WCRToAugAssign) + + val = sdfg() + ref = test.f() + assert(np.allclose(val, ref)) + + +if __name__ == '__main__': + test_tasklet() + test_mapped_tasklet() From 6525bc531cf33a344e8f6a03345b84bfe33c1c8d Mon Sep 17 00:00:00 2001 From: Tal Ben-Nun Date: Thu, 3 Oct 2024 11:32:57 -0700 Subject: [PATCH 63/76] Inter-state edge assignment race test (#1672) Add a validation test for race conditions induced by assigning and reading the same value in a single interstate edge. For example: `i = j + 1; j = ...` --- dace/sdfg/validation.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/dace/sdfg/validation.py b/dace/sdfg/validation.py index f305affb80..e75099276f 100644 --- a/dace/sdfg/validation.py +++ b/dace/sdfg/validation.py @@ -108,6 +108,16 @@ def validate_control_flow_region(sdfg: 'SDFG', f'Trying to read an inaccessible data container "{container}" ' f'(Storage: {sdfg.arrays[container].storage}) in host code interstate edge', sdfg, eid) + # Check for race conditions on edge assignments + for aname, aval in edge.data.assignments.items(): + syms = symbolic.free_symbols_and_functions(aval) + also_assigned = (syms & edge.data.assignments.keys()) - {aname} + if also_assigned: + eid = region.edge_id(edge) + raise InvalidSDFGInterstateEdgeError(f'Race condition: inter-state assignment {aname} = {aval} uses ' + f'variables {also_assigned}, which are also modified in the same ' + 'edge.', sdfg, eid) + # Add edge symbols into defined symbols symbols.update(issyms) From e6440a687c4ea851b8661d18d9490604b116d440 Mon Sep 17 00:00:00 2001 From: Tal Ben-Nun Date: Fri, 11 Oct 2024 11:17:09 -0700 Subject: [PATCH 64/76] Fix race conditions in Constant Propagation and Reference-To-View (#1679) * Fixes a case where constant propagation would cause an inter-state edge assignment race condition * Fixes reference-to-view disconnecting a state graph and causing a race condition * More informative error message in code generation for copy dispatching --- dace/codegen/dispatcher.py | 5 ++ .../passes/constant_propagation.py | 16 +++++- .../passes/reference_reduction.py | 37 ++++++++----- tests/passes/constant_propagation_test.py | 55 +++++++++++++++++++ tests/sdfg/reference_test.py | 47 ++++++++++++++++ 5 files changed, 144 insertions(+), 16 deletions(-) diff --git a/dace/codegen/dispatcher.py b/dace/codegen/dispatcher.py index 3ac9e097f8..9bec33b4ef 100644 --- a/dace/codegen/dispatcher.py +++ b/dace/codegen/dispatcher.py @@ -598,6 +598,8 @@ def dispatch_copy(self, src_node: nodes.Node, dst_node: nodes.Node, edge: MultiC cfg: ControlFlowRegion, dfg: StateSubgraphView, state_id: int, function_stream: CodeIOStream, output_stream: CodeIOStream) -> None: """ Dispatches a code generator for a memory copy operation. """ + if edge.data.is_empty(): + return state = cfg.state(state_id) target = self.get_copy_dispatcher(src_node, dst_node, edge, sdfg, state) if target is None: @@ -616,6 +618,9 @@ def dispatch_output_definition(self, src_node: nodes.Node, dst_node: nodes.Node, """ state = cfg.state(state_id) target = self.get_copy_dispatcher(src_node, dst_node, edge, sdfg, state) + if target is None: + raise ValueError( + f'Could not dispatch copy code generator for {src_node} -> {dst_node} in state {state.label}') # Dispatch self._used_targets.add(target) diff --git a/dace/transformation/passes/constant_propagation.py b/dace/transformation/passes/constant_propagation.py index b2c3df3ce8..bfa0928415 100644 --- a/dace/transformation/passes/constant_propagation.py +++ b/dace/transformation/passes/constant_propagation.py @@ -177,7 +177,7 @@ def _add_nested_datanames(name: str, desc: data.Structure): # TODO: How are we handling this? pass arrays.add(f'{name}.{k}') - + for name, desc in sdfg.arrays.items(): if isinstance(desc, data.Structure): _add_nested_datanames(name, desc) @@ -222,6 +222,20 @@ def _add_nested_datanames(name: str, desc: data.Structure): else: assignments[aname] = aval + for edge in sdfg.out_edges(state): + for aname, aval in assignments.items(): + # If the specific replacement would result in the value + # being both used and reassigned on the same inter-state + # edge, remove it from consideration. + replacements = symbolic.free_symbols_and_functions(aval) + used_in_assignments = { + k + for k, v in edge.data.assignments.items() if aname in symbolic.free_symbols_and_functions(v) + } + reassignments = replacements & edge.data.assignments.keys() + if reassignments and (used_in_assignments - reassignments): + assignments[aname] = _UnknownValue + if state not in result: # Condition may evaluate to False when state is the start-state result[state] = {} redo |= self._propagate(result[state], assignments) diff --git a/dace/transformation/passes/reference_reduction.py b/dace/transformation/passes/reference_reduction.py index 5bee098c55..dc5ae1eb7d 100644 --- a/dace/transformation/passes/reference_reduction.py +++ b/dace/transformation/passes/reference_reduction.py @@ -166,21 +166,28 @@ def remove_refsets( affected_nodes = set() for e in state.in_edges_by_connector(node, 'set'): # This is a reference set edge. Consider scope and neighbors and remove set - edges_to_remove.add(e) - affected_nodes.add(e.src) - affected_nodes.add(e.dst) - - # If source node does not have any other neighbors, it can be removed - if all(ee is e or ee.data.is_empty() for ee in state.all_edges(e.src)): - nodes_to_remove.add(e.src) - # If set reference does not have any other neighbors, it can be removed - if all(ee is e or ee.data.is_empty() for ee in state.all_edges(node)): - nodes_to_remove.add(node) - - # If in a scope, ensure reference node will not be disconnected - scope = state.entry_node(node) - if scope is not None and node not in nodes_to_remove: - edges_to_add.append((scope, None, node, None, Memlet())) + if state.out_degree(e.dst) == 0: + edges_to_remove.add(e) + affected_nodes.add(e.src) + affected_nodes.add(e.dst) + + # If source node does not have any other neighbors, it can be removed + if all(ee is e or ee.data.is_empty() for ee in state.all_edges(e.src)): + nodes_to_remove.add(e.src) + # If set reference does not have any other neighbors, it can be removed + if all(ee is e or ee.data.is_empty() for ee in state.all_edges(node)): + nodes_to_remove.add(node) + + # If in a scope, ensure reference node will not be disconnected + scope = state.entry_node(node) + if scope is not None and node not in nodes_to_remove: + edges_to_add.append((scope, None, node, None, Memlet())) + else: # Node has other neighbors, modify edge to become an empty memlet instead + e.dst_conn = None + e.dst.remove_in_connector('set') + e.data = Memlet() + + # Modify the state graph as necessary for e in edges_to_remove: diff --git a/tests/passes/constant_propagation_test.py b/tests/passes/constant_propagation_test.py index 3420403b49..acb1033554 100644 --- a/tests/passes/constant_propagation_test.py +++ b/tests/passes/constant_propagation_test.py @@ -573,6 +573,59 @@ def test_dependency_change(): assert a[0] == ref +@pytest.mark.parametrize('extra_state', (False, True)) +def test_dependency_change_same_edge(extra_state): + """ + Tests a regression in constant propagation that stems from a variable's + dependency being set in the same edge where the pre-propagated symbol was + also a right-hand side expression. In this case, ``i61`` is incorrectly + propagated to ``i60`` and ``i17`` is set to ``i61``, which is also updated + on the same inter-state edge. + """ + + sdfg = dace.SDFG('tester') + sdfg.add_symbol('N', dace.int64) + sdfg.add_array('a', [1], dace.int64) + sdfg.add_scalar('cont', dace.int64, transient=True) + init = sdfg.add_state() + entry = sdfg.add_state('entry') + body = sdfg.add_state('body') + latch = sdfg.add_state('latch') + final = sdfg.add_state('final') + + sdfg.add_edge(init, entry, dace.InterstateEdge(assignments=dict(i60='0'))) + sdfg.add_edge(entry, body, dace.InterstateEdge(assignments=dict(i61='i60 + 1', i17='i60 * 12'))) + sdfg.add_edge(body, final, dace.InterstateEdge('cont')) + sdfg.add_edge(body, latch, dace.InterstateEdge('not cont', dict(i60='i61'))) + if not extra_state: + sdfg.add_edge(latch, body, dace.InterstateEdge(assignments=dict(i61='i60 + 1', i17='i60 * 12'))) + else: + # Test that the multi-value definition is not propagated to following edges + extra = sdfg.add_state('extra') + sdfg.add_edge(latch, extra, dace.InterstateEdge(assignments=dict(i61='i60 + 1', i17='i60 * 12'))) + sdfg.add_edge(extra, body, dace.InterstateEdge(assignments=dict(i18='i60 + i61'))) + + t = body.add_tasklet('add', {'inp'}, {'out', 'c'}, 'out = inp + i17; c = i61 == 10') + body.add_edge(body.add_read('a'), None, t, 'inp', dace.Memlet('a[0]')) + body.add_edge(t, 'out', body.add_write('a'), None, dace.Memlet('a[0]')) + body.add_edge(t, 'c', body.add_write('cont'), None, dace.Memlet('cont[0]')) + + ConstantPropagation().apply_pass(sdfg, {}) + + sdfg.validate() + + # Python code equivalent of the above SDFG + ref = 0 + i60 = 0 + for i60 in range(0, 10): + i17 = i60 * 12 + ref += i17 + + a = np.zeros([1], np.int64) + sdfg(a=a) + assert a[0] == ref + + if __name__ == '__main__': test_simple_constants() test_nested_constants() @@ -592,3 +645,5 @@ def test_dependency_change(): test_for_with_external_init_nested_start_with_guard() test_skip_branch() test_dependency_change() + test_dependency_change_same_edge(False) + test_dependency_change_same_edge(True) diff --git a/tests/sdfg/reference_test.py b/tests/sdfg/reference_test.py index 6c4d1eda1f..d712c653c9 100644 --- a/tests/sdfg/reference_test.py +++ b/tests/sdfg/reference_test.py @@ -7,6 +7,7 @@ from dace.transformation.passes.reference_reduction import ReferenceToView import numpy as np import pytest +import networkx as nx def test_unset_reference(): @@ -636,6 +637,51 @@ def test_ref2view_refset_in_scope(array_outside_scope, depends_on_iterate): assert np.allclose(B, ref) +def test_ref2view_reconnection(): + """ + Tests a regression in which ReferenceToView disconnects an existing weakly-connected state + and thus creating a race condition. + """ + sdfg = dace.SDFG('reftest') + sdfg.add_array('A', [2], dace.float64) + sdfg.add_array('B', [1], dace.float64) + sdfg.add_reference('ref', [1], dace.float64) + + state = sdfg.add_state() + a2 = state.add_access('A') + ref = state.add_access('ref') + b = state.add_access('B') + + t2 = state.add_tasklet('addone', {'inp'}, {'out'}, 'out = inp + 1') + state.add_edge(ref, None, t2, 'inp', dace.Memlet('ref[0]')) + state.add_edge(t2, 'out', b, None, dace.Memlet('B[0]')) + state.add_edge(a2, None, ref, 'set', dace.Memlet('A[1]')) + + t1 = state.add_tasklet('addone', {'inp'}, {'out'}, 'out = inp + 1') + a1 = state.add_access('A') + state.add_edge(a1, None, t1, 'inp', dace.Memlet('A[1]')) + state.add_edge(t1, 'out', a2, None, dace.Memlet('A[1]')) + + # Test correctness before pass + A = np.random.rand(2) + B = np.random.rand(1) + ref = (A[1] + 2) + sdfg(A=A, B=B) + assert np.allclose(B, ref) + + # Test reference-to-view + result = Pipeline([ReferenceToView()]).apply_pass(sdfg, {}) + assert result['ReferenceToView'] == {'ref'} + + # Pass should not break order + assert len(list(nx.weakly_connected_components(state.nx))) == 1 + + # Test correctness after pass + ref = (A[1] + 2) + sdfg(A=A, B=B) + assert np.allclose(B, ref) + + if __name__ == '__main__': test_unset_reference() test_reference_branch() @@ -662,3 +708,4 @@ def test_ref2view_refset_in_scope(array_outside_scope, depends_on_iterate): test_ref2view_refset_in_scope(False, True) test_ref2view_refset_in_scope(True, False) test_ref2view_refset_in_scope(True, True) + test_ref2view_reconnection() From 64c54abbb0aacb6c674c2ff51bf524e2ec8d62e5 Mon Sep 17 00:00:00 2001 From: Tal Ben-Nun Date: Sat, 12 Oct 2024 02:27:56 -0700 Subject: [PATCH 65/76] Improve memlet label and string initialization (#1680) Follow up on the discussion in #1678. Supports `src[expr] -> dst[expr]`, `src[expr] -> [expr]`, and `[expr] -> dst[expr]` initializations for memlets. Also improves memlet label printouts. @philip-paul-mueller @phschaad the expression mentioned in the other PR will now be printed as `[0, 0] -> B[0]` for clarity and can be reparsed. --- dace/frontend/fortran/fortran_parser.py | 4 +- dace/frontend/python/replacements.py | 6 +-- dace/memlet.py | 45 +++++++++++++------ dace/transformation/dataflow/bank_split.py | 6 +-- tests/codegen/dependency_edge_test.py | 4 +- tests/fpga/multibank_copy_fpga_test.py | 8 ++-- tests/inlining_test.py | 6 +-- tests/passes/access_ranges_test.py | 2 +- tests/sdfg/reference_test.py | 6 +-- .../transformations/prune_connectors_test.py | 4 +- 10 files changed, 55 insertions(+), 36 deletions(-) diff --git a/dace/frontend/fortran/fortran_parser.py b/dace/frontend/fortran/fortran_parser.py index 28143f715a..52344c141f 100644 --- a/dace/frontend/fortran/fortran_parser.py +++ b/dace/frontend/fortran/fortran_parser.py @@ -536,8 +536,8 @@ def subroutine2sdfg(self, node: ast_internal_classes.Subroutine_Subprogram_Node, for i, s in zip(all_indices, array.shape)]) smallsubset = subsets.Range([(0, s - 1, 1) for s in shape]) - memlet = Memlet(f'{array_name}[{subset}]->{smallsubset}') - memlet2 = Memlet(f'{viewname}[{smallsubset}]->{subset}') + memlet = Memlet(f'{array_name}[{subset}]->[{smallsubset}]') + memlet2 = Memlet(f'{viewname}[{smallsubset}]->[{subset}]') wv = None rv = None if local_name.name in read_names: diff --git a/dace/frontend/python/replacements.py b/dace/frontend/python/replacements.py index ce35d7c9a1..5e6118a34b 100644 --- a/dace/frontend/python/replacements.py +++ b/dace/frontend/python/replacements.py @@ -453,7 +453,7 @@ def _numpy_flip(pv: ProgramVisitor, sdfg: SDFG, state: SDFGState, arr: str, axis # acpy, _ = sdfg.add_temp_transient(desc.shape, desc.dtype, desc.storage) # vnode = state.add_read(view) # anode = state.add_read(acpy) - # state.add_edge(vnode, None, anode, None, Memlet(f'{view}[{sset}] -> {dset}')) + # state.add_edge(vnode, None, anode, None, Memlet(f'{view}[{sset}] -> [{dset}]')) arr_copy, _ = sdfg.add_temp_transient_like(desc) inpidx = ','.join([f'__i{i}' for i in range(ndim)]) @@ -3934,7 +3934,7 @@ def implement_ufunc_accumulate(visitor: ProgramVisitor, ast_node: ast.Call, sdfg init_state = nested_sdfg.add_state(label="init") r = init_state.add_read(inpconn) w = init_state.add_write(outconn) - init_state.add_nedge(r, w, dace.Memlet("{a}[{i}] -> {oi}".format(a=inpconn, i='0', oi='0'))) + init_state.add_nedge(r, w, dace.Memlet("{a}[{i}] -> [{oi}]".format(a=inpconn, i='0', oi='0'))) body_state = nested_sdfg.add_state(label="body") r1 = body_state.add_read(inpconn) @@ -4189,7 +4189,7 @@ def view(pv: ProgramVisitor, sdfg: SDFG, state: SDFGState, arr: str, dtype, type find_new_name=True) # Register view with DaCe program visitor - # NOTE: We do not create here a Memlet of the form `A[subset] -> osubset` + # NOTE: We do not create here a Memlet of the form `A[subset] -> [osubset]` # because the View can be of a different dtype. Adding `other_subset` in # such cases will trigger validation error. pv.views[newarr] = (arr, Memlet.from_array(arr, desc)) diff --git a/dace/memlet.py b/dace/memlet.py index d50c6c77f7..1e39b4179d 100644 --- a/dace/memlet.py +++ b/dace/memlet.py @@ -75,7 +75,9 @@ def __init__(self, of use API. Must follow one of the following forms: 1. ``ARRAY``, 2. ``ARRAY[SUBSET]``, - 3. ``ARRAY[SUBSET] -> OTHER_SUBSET``. + 3. ``ARRAY[SUBSET] -> [OTHER_SUBSET]``, + 4. ``[OTHER_SUBSET] -> ARRAY[SUBSET]``, + 5. ``SRC_ARRAY[SRC_SUBSET] -> DST_ARRAY[DST_SUBSET]``. :param data: Data descriptor name attached to this memlet. :param subset: The subset to take from the data attached to the edge, represented either as a string or a Subset object. @@ -330,6 +332,10 @@ def _parse_from_subexpr(self, expr: str): raise SyntaxError('Invalid memlet syntax "%s"' % expr) return expr, None + # [subset] syntax + if expr.startswith('['): + return None, SubsetProperty.from_string(expr[1:-1]) + # array[subset] syntax arrname, subset_str = expr[:-1].split('[') if not dtypes.validate_name(arrname): @@ -342,27 +348,40 @@ def _parse_memlet_from_str(self, expr: str): or the _data,_subset fields. :param expr: A string expression of the this memlet, given as an ease - of use API. Must follow one of the following forms: - 1. ``ARRAY``, - 2. ``ARRAY[SUBSET]``, - 3. ``ARRAY[SUBSET] -> OTHER_SUBSET``. - Note that modes 2 and 3 are deprecated and will leave - the memlet uninitialized until inserted into an SDFG. + of use API. Must follow one of the following forms: + 1. ``ARRAY``, + 2. ``ARRAY[SUBSET]``, + 3. ``ARRAY[SUBSET] -> [OTHER_SUBSET]``, + 4. ``[OTHER_SUBSET] -> ARRAY[SUBSET]``, + 5. ``SRC_ARRAY[SRC_SUBSET] -> DST_ARRAY[DST_SUBSET]``. + Note that options 1-2 will leave the memlet uninitialized + until added into an SDFG. """ expr = expr.strip() if '->' not in expr: # Options 1 and 2 self.data, self.subset = self._parse_from_subexpr(expr) return - # Option 3 + # Options 3-5 src_expr, dst_expr = expr.split('->') src_expr = src_expr.strip() dst_expr = dst_expr.strip() - if '[' not in src_expr and not dtypes.validate_name(src_expr): - raise SyntaxError('Expression without data name not yet allowed') - self.data, self.subset = self._parse_from_subexpr(src_expr) - self.other_subset = SubsetProperty.from_string(dst_expr) + src_data, src_subset = self._parse_from_subexpr(src_expr) + dst_data, dst_subset = self._parse_from_subexpr(dst_expr) + if src_data is None and dst_data is None: + raise SyntaxError('At least one data name needs to be given') + + if src_data is not None: # Prefer src[subset] -> [other_subset] + self.data = src_data + self.subset = src_subset + self.other_subset = dst_subset + self._is_data_src = True + else: + self.data = dst_data + self.subset = dst_subset + self.other_subset = src_subset + self._is_data_src = False def try_initialize(self, sdfg: 'dace.sdfg.SDFG', state: 'dace.sdfg.SDFGState', edge: 'dace.sdfg.graph.MultiConnectorEdge'): @@ -660,7 +679,7 @@ def _label(self, shape): if self.other_subset is not None: if self._is_data_src is False: - result += ' <- [%s]' % str(self.other_subset) + result = f'[{self.other_subset}] -> {result}' else: result += ' -> [%s]' % str(self.other_subset) return result diff --git a/dace/transformation/dataflow/bank_split.py b/dace/transformation/dataflow/bank_split.py index 89fbcc8697..ed7bf26b6f 100644 --- a/dace/transformation/dataflow/bank_split.py +++ b/dace/transformation/dataflow/bank_split.py @@ -162,8 +162,8 @@ def apply(self, graph: SDFGState, sdfg: SDFG) -> Union[Any, None]: target_offset_str = ", ".join([f"({x}):({x}+{y})" for x, y in zip(target_offset, target_size)]) if collect_src: copy_memlet = memlet.Memlet(f"{src.data}[{target_hbm_bank_str}, {target_size_str}]->" - f"{target_offset_str}") + f"[{target_offset_str}]") else: - copy_memlet = memlet.Memlet(f"{src.data}[{target_offset_str}]->{target_hbm_bank_str}, " - f"{target_size_str}") + copy_memlet = memlet.Memlet(f"{src.data}[{target_offset_str}]->[{target_hbm_bank_str}, " + f"{target_size_str}]") graph.add_edge(src, None, dst, None, copy_memlet) diff --git a/tests/codegen/dependency_edge_test.py b/tests/codegen/dependency_edge_test.py index 5fca6fed22..a6d994bfe0 100644 --- a/tests/codegen/dependency_edge_test.py +++ b/tests/codegen/dependency_edge_test.py @@ -38,12 +38,12 @@ def test_mapped_dependency_edge(reverse): state.add_edge(map_entry, "OUT_A", tmp_A, None, dace.Memlet("A[i]")) state.add_edge(map_entry, "OUT_B", tmp_B, None, dace.Memlet("B[i]")) - state.add_edge(tmp_A, None, A2, None, dace.Memlet("tmp_A[0] -> ((i+1)%2)")) + state.add_edge(tmp_A, None, A2, None, dace.Memlet("tmp_A[0] -> [((i+1)%2)]")) if not reverse: state.add_edge(A2, None, tmp_B, None, dace.Memlet()) # Dependency Edge state.add_edge(A2, None, map_exit, "IN_A", dace.Memlet("A[0:2]")) - state.add_edge(tmp_B, None, A3, None, dace.Memlet("tmp_B[0] -> ((i+1)%2)")) + state.add_edge(tmp_B, None, A3, None, dace.Memlet("tmp_B[0] -> [((i+1)%2)]")) if reverse: state.add_edge(A3, None, tmp_A, None, dace.Memlet()) # Dependency Edge state.add_edge(A3, None, map_exit, "IN_A", dace.Memlet("A[0:2]")) diff --git a/tests/fpga/multibank_copy_fpga_test.py b/tests/fpga/multibank_copy_fpga_test.py index 4e8c586a99..5f48e4373a 100644 --- a/tests/fpga/multibank_copy_fpga_test.py +++ b/tests/fpga/multibank_copy_fpga_test.py @@ -75,9 +75,9 @@ def copy_multibank_1_mem_type(mem_type): s, a, _ = mkc(sdfg, None, "a", "x", StorageType.Default, StorageType.FPGA_Global, [3, 4, 4], [3, 4, 4], "a", None, (mem_type, "0:3")) s, _, _ = mkc(sdfg, s, "x", "y", None, StorageType.FPGA_Global, None, [2, 4, 4, 4], - "x[1, 1:4, 1:4]->1, 1:4, 1:4, 1", None, (mem_type, "3:5")) + "x[1, 1:4, 1:4]->[1, 1:4, 1:4, 1]", None, (mem_type, "3:5")) s, _, _ = mkc(sdfg, s, "y", "z", None, StorageType.FPGA_Global, None, [1, 4, 4, 4], - "y[1, 0:4, 0:4, 0:4]->0, 0:4, 0:4, 0:4", None, (mem_type, "5:6")) + "y[1, 0:4, 0:4, 0:4]->[0, 0:4, 0:4, 0:4]", None, (mem_type, "5:6")) s, _, _ = mkc(sdfg, s, "z", "w", None, StorageType.FPGA_Global, None, [1, 4, 4, 4], "z", None, (mem_type, "6:7")) s, _, c = mkc(sdfg, s, "w", "c", None, StorageType.Default, None, [1, 4, 4, 4], "w") @@ -97,9 +97,9 @@ def copy_multibank_2_mem_type(mem_type_1, mem_type_2): sdfg = dace.SDFG("copy_multibank_2_mem_type_" + mem_type_1 + "_" + mem_type_2) s, a, _ = mkc(sdfg, None, "a", "x", StorageType.Default, StorageType.FPGA_Global, [3, 5, 5], [3, 5, 5], "a", None, (mem_type_1, "0:3")) - s, _, _ = mkc(sdfg, s, "x", "d1", None, StorageType.FPGA_Global, None, [3, 5, 5], "x[2, 0:5, 0:5]->1, 0:5, 0:5", + s, _, _ = mkc(sdfg, s, "x", "d1", None, StorageType.FPGA_Global, None, [3, 5, 5], "x[2, 0:5, 0:5]->[1, 0:5, 0:5]", None, (mem_type_2, "1:4")) - s, _, _ = mkc(sdfg, s, "d1", "y", None, StorageType.FPGA_Global, None, [1, 7, 7], "d1[1, 0:5,0:5]->0, 2:7, 2:7", + s, _, _ = mkc(sdfg, s, "d1", "y", None, StorageType.FPGA_Global, None, [1, 7, 7], "d1[1, 0:5,0:5]->[0, 2:7, 2:7]", None, (mem_type_1, "3:4")) s, _, c = mkc(sdfg, s, "y", "c", None, StorageType.Default, None, [1, 7, 7], "y") diff --git a/tests/inlining_test.py b/tests/inlining_test.py index 7c3510daed..c6d8fa8d9f 100644 --- a/tests/inlining_test.py +++ b/tests/inlining_test.py @@ -54,8 +54,8 @@ def test_regression_reshape_unsqueeze(): A = nstate.add_access("view") W = nstate.add_write("output") - mm1 = dace.Memlet("input[0:3, 0:3] -> 0:3, 0:3") - mm2 = dace.Memlet("view[0:3, 0:2] -> 3:9") + mm1 = dace.Memlet("input[0:3, 0:3] -> [0:3, 0:3]") + mm2 = dace.Memlet("view[0:3, 0:2] -> [3:9]") nstate.add_edge(R, None, A, None, mm1) nstate.add_edge(A, None, W, None, mm2) @@ -405,7 +405,7 @@ def test_regression_inline_subset(): nsdfg.add_array("input", [96, 32], dace.float64) nsdfg.add_array("output", [32, 32], dace.float64) nstate.add_edge(nstate.add_read("input"), None, nstate.add_write("output"), None, - dace.Memlet("input[32:64, 0:32] -> 0:32, 0:32")) + dace.Memlet("input[32:64, 0:32] -> [0:32, 0:32]")) @dace.program def test(A: dace.float64[96, 32]): diff --git a/tests/passes/access_ranges_test.py b/tests/passes/access_ranges_test.py index 263cb2243d..3bab2e9ab0 100644 --- a/tests/passes/access_ranges_test.py +++ b/tests/passes/access_ranges_test.py @@ -47,7 +47,7 @@ def tester(A: dace.float64[N, N], B: dace.float64[20, 20]): # Construct read/write memlets memlet1 = dace.Memlet('A[0:N, 0:N]') memlet1._is_data_src = False - memlet2 = dace.Memlet('A[1:21, 1:21] -> 0:20, 0:20') + memlet2 = dace.Memlet('A[1:21, 1:21] -> [0:20, 0:20]') memlet2._is_data_src = False memlet3 = dace.Memlet('A[0, 0]') memlet4 = dace.Memlet('A[0, 0]') diff --git a/tests/sdfg/reference_test.py b/tests/sdfg/reference_test.py index d712c653c9..da5c4a0111 100644 --- a/tests/sdfg/reference_test.py +++ b/tests/sdfg/reference_test.py @@ -159,7 +159,7 @@ def _create_scoped_sdfg(): inp = state.add_read('B') t = state.add_tasklet('doit', {'r'}, {'w'}, 'w = r + 1') out = state.add_write('A') - state.add_memlet_path(inp, me, ref, memlet=dace.Memlet('B[1, i] -> i')) + state.add_memlet_path(inp, me, ref, memlet=dace.Memlet('B[1, i] -> [i]')) state.add_edge(ref, None, t, 'r', dace.Memlet('ref[i]')) state.add_edge_pair(mx, t, out, internal_connector='w', internal_memlet=dace.Memlet('A[10, i]')) @@ -250,7 +250,7 @@ def _create_loop_nonfree_symbols_sdfg(): sdfg.add_loop(istate, state, after, 'i', '0', 'i < 20', 'i + 1') # Reference set inside loop - state.add_edge(state.add_read('A'), None, state.add_write('ref'), 'set', dace.Memlet('A[i] -> 0')) + state.add_edge(state.add_read('A'), None, state.add_write('ref'), 'set', dace.Memlet('A[i] -> [0]')) # Use outisde loop t = after.add_tasklet('setone', {}, {'out'}, 'out = 1') @@ -519,7 +519,7 @@ def test_reference_loop_nonfree(): assert len(sources) == 1 # There is only one SDFG sources = sources[0] assert len(sources) == 1 - assert sources['ref'] == {dace.Memlet('A[i] -> 0')} + assert sources['ref'] == {dace.Memlet('A[i] -> [0]')} # Test loop-to-map - should fail to apply from dace.transformation.interstate import LoopToMap diff --git a/tests/transformations/prune_connectors_test.py b/tests/transformations/prune_connectors_test.py index 4026ec3e1c..63bbe5843f 100644 --- a/tests/transformations/prune_connectors_test.py +++ b/tests/transformations/prune_connectors_test.py @@ -207,7 +207,7 @@ def _make_read_write_sdfg( istate.add_nedge( inner_A, inner_B, - dace.Memlet("inner_A[0:4, 0:4] -> 0:4, 0:4"), + dace.Memlet("inner_A[0:4, 0:4] -> [0:4, 0:4]"), ) else: # Because the `data` filed of the involved memlets differs the read to @@ -216,7 +216,7 @@ def _make_read_write_sdfg( istate.add_nedge( inner_A, inner_B, - dace.Memlet("inner_B[0:4, 0:4] -> 0:4, 0:4"), + dace.Memlet("inner_B[0:4, 0:4] -> [0:4, 0:4]"), ) # Add the nested SDFG From 073b61373a57990a08d78d1843b3bb5ae0af5d0d Mon Sep 17 00:00:00 2001 From: Philipp Schaad Date: Sat, 12 Oct 2024 21:13:19 +0200 Subject: [PATCH 66/76] Control Flow Raising (#1657) This PR mainly provides control flow raising passes for the new intrinsic control flow constructs (Branches and loops) in SDFGs. In addition to raising, the state and control flow reachability passes have been adjusted to faithfully work with the intrinsic control flow constructs. Along with the raising and reachability passes, a few important bugfixes and a general cleanup is included in the PR, but no other functionality is changed. --- dace/codegen/control_flow.py | 37 +- dace/codegen/targets/framecode.py | 24 +- dace/frontend/python/newast.py | 7 +- dace/frontend/python/parser.py | 2 + dace/sdfg/analysis/schedule_tree/treenodes.py | 15 +- .../analysis/writeset_underapproximation.py | 397 +++++++++--------- dace/sdfg/propagation.py | 54 +-- dace/sdfg/state.py | 38 +- dace/transformation/helpers.py | 4 +- .../interstate/loop_detection.py | 300 ++++++++++--- .../transformation/interstate/loop_lifting.py | 99 +++++ dace/transformation/pass_pipeline.py | 3 +- .../passes/analysis/__init__.py | 1 + .../passes/{ => analysis}/analysis.py | 141 +++++-- .../passes/analysis/loop_analysis.py | 116 +++++ .../simplification/control_flow_raising.py | 96 +++++ dace/transformation/subgraph/expansion.py | 9 +- dace/transformation/subgraph/helpers.py | 17 +- .../control_flow_raising_test.py | 98 +++++ .../writeset_underapproximation_test.py | 102 +++-- tests/sdfg/conditional_region_test.py | 50 +-- tests/sdfg/loop_region_test.py | 51 +++ .../interstate/loop_lifting_test.py | 217 ++++++++++ tests/transformations/loop_detection_test.py | 51 ++- 24 files changed, 1468 insertions(+), 461 deletions(-) create mode 100644 dace/transformation/interstate/loop_lifting.py create mode 100644 dace/transformation/passes/analysis/__init__.py rename dace/transformation/passes/{ => analysis}/analysis.py (81%) create mode 100644 dace/transformation/passes/analysis/loop_analysis.py create mode 100644 dace/transformation/passes/simplification/control_flow_raising.py create mode 100644 tests/passes/simplification/control_flow_raising_test.py create mode 100644 tests/transformations/interstate/loop_lifting_test.py diff --git a/dace/codegen/control_flow.py b/dace/codegen/control_flow.py index 7701a19ec2..f5559984e7 100644 --- a/dace/codegen/control_flow.py +++ b/dace/codegen/control_flow.py @@ -275,9 +275,13 @@ def as_cpp(self, codegen, symbols) -> str: expr += elem.as_cpp(codegen, symbols) # In a general block, emit transitions and assignments after each individual block or region. if isinstance(elem, BasicCFBlock) or (isinstance(elem, RegionBlock) and elem.region): - cfg = elem.state.parent_graph if isinstance(elem, BasicCFBlock) else elem.region.parent_graph + if isinstance(elem, BasicCFBlock): + g_elem = elem.state + else: + g_elem = elem.region + cfg = g_elem.parent_graph sdfg = cfg if isinstance(cfg, SDFG) else cfg.sdfg - out_edges = cfg.out_edges(elem.state) if isinstance(elem, BasicCFBlock) else cfg.out_edges(elem.region) + out_edges = cfg.out_edges(g_elem) for j, e in enumerate(out_edges): if e not in self.gotos_to_ignore: # Skip gotos to immediate successors @@ -532,26 +536,27 @@ def as_cpp(self, codegen, symbols) -> str: expr = '' if self.loop.update_statement and self.loop.init_statement and self.loop.loop_variable: - # Initialize to either "int i = 0" or "i = 0" depending on whether the type has been defined. - defined_vars = codegen.dispatcher.defined_vars - if not defined_vars.has(self.loop.loop_variable): - try: - init = f'{symbols[self.loop.loop_variable]} ' - except KeyError: - init = 'auto ' - symbols[self.loop.loop_variable] = None - init += unparse_interstate_edge(self.loop.init_statement.code[0], sdfg, codegen=codegen, symbols=symbols) + init = unparse_interstate_edge(self.loop.init_statement.code[0], sdfg, codegen=codegen, symbols=symbols) init = init.strip(';') update = unparse_interstate_edge(self.loop.update_statement.code[0], sdfg, codegen=codegen, symbols=symbols) update = update.strip(';') if self.loop.inverted: - expr += f'{init};\n' - expr += 'do {\n' - expr += _clean_loop_body(self.body.as_cpp(codegen, symbols)) - expr += f'{update};\n' - expr += f'\n}} while({cond});\n' + if self.loop.update_before_condition: + expr += f'{init};\n' + expr += 'do {\n' + expr += _clean_loop_body(self.body.as_cpp(codegen, symbols)) + expr += f'{update};\n' + expr += f'}} while({cond});\n' + else: + expr += f'{init};\n' + expr += 'while (1) {\n' + expr += _clean_loop_body(self.body.as_cpp(codegen, symbols)) + expr += f'if (!({cond}))\n' + expr += 'break;\n' + expr += f'{update};\n' + expr += '}\n' else: expr += f'for ({init}; {cond}; {update}) {{\n' expr += _clean_loop_body(self.body.as_cpp(codegen, symbols)) diff --git a/dace/codegen/targets/framecode.py b/dace/codegen/targets/framecode.py index 488c1c7fbd..d71ea40fee 100644 --- a/dace/codegen/targets/framecode.py +++ b/dace/codegen/targets/framecode.py @@ -15,12 +15,14 @@ from dace.codegen.prettycode import CodeIOStream from dace.codegen.common import codeblock_to_cpp, sym2cpp from dace.codegen.targets.target import TargetCodeGenerator +from dace.codegen.tools.type_inference import infer_expr_type +from dace.frontend.python import astutils from dace.sdfg import SDFG, SDFGState, nodes from dace.sdfg import scope as sdscope from dace.sdfg import utils from dace.sdfg.analysis import cfg as cfg_analysis -from dace.sdfg.state import ControlFlowRegion -from dace.transformation.passes.analysis import StateReachability +from dace.sdfg.state import ControlFlowRegion, LoopRegion +from dace.transformation.passes.analysis import StateReachability, loop_analysis def _get_or_eval_sdfg_first_arg(func, sdfg): @@ -916,6 +918,24 @@ def generate_code(self, interstate_symbols.update(symbols) global_symbols.update(symbols) + if isinstance(cfr, LoopRegion) and cfr.loop_variable is not None and cfr.init_statement is not None: + init_assignment = cfr.init_statement.code[0] + update_assignment = cfr.update_statement.code[0] + if isinstance(init_assignment, astutils.ast.Assign): + init_assignment = init_assignment.value + if isinstance(update_assignment, astutils.ast.Assign): + update_assignment = update_assignment.value + if not cfr.loop_variable in interstate_symbols: + l_end = loop_analysis.get_loop_end(cfr) + l_start = loop_analysis.get_init_assignment(cfr) + l_step = loop_analysis.get_loop_stride(cfr) + sym_type = dtypes.result_type_of(infer_expr_type(l_start, global_symbols), + infer_expr_type(l_step, global_symbols), + infer_expr_type(l_end, global_symbols)) + interstate_symbols[cfr.loop_variable] = sym_type + if not cfr.loop_variable in global_symbols: + global_symbols[cfr.loop_variable] = interstate_symbols[cfr.loop_variable] + for isvarName, isvarType in interstate_symbols.items(): if isvarType is None: raise TypeError(f'Type inference failed for symbol {isvarName}') diff --git a/dace/frontend/python/newast.py b/dace/frontend/python/newast.py index 0d40e13282..cacf15d785 100644 --- a/dace/frontend/python/newast.py +++ b/dace/frontend/python/newast.py @@ -2565,8 +2565,7 @@ def visit_If(self, node: ast.If): self._on_block_added(cond_block) if_body = ControlFlowRegion(cond_block.label + '_body', sdfg=self.sdfg) - cond_block.branches.append((CodeBlock(cond), if_body)) - if_body.parent_graph = self.cfg_target + cond_block.add_branch(CodeBlock(cond), if_body) # Visit recursively self._recursive_visit(node.body, 'if', node.lineno, if_body, False) @@ -2575,9 +2574,7 @@ def visit_If(self, node: ast.If): if len(node.orelse) > 0: else_body = ControlFlowRegion(f'{cond_block.label}_else_{node.orelse[0].lineno}', sdfg=self.sdfg) - #cond_block.branches.append((CodeBlock(cond_else), else_body)) - cond_block.branches.append((None, else_body)) - else_body.parent_graph = self.cfg_target + cond_block.add_branch(None, else_body) # Visit recursively self._recursive_visit(node.orelse, 'else', node.lineno, else_body, False) diff --git a/dace/frontend/python/parser.py b/dace/frontend/python/parser.py index b0ef56907f..d99be1265d 100644 --- a/dace/frontend/python/parser.py +++ b/dace/frontend/python/parser.py @@ -499,6 +499,8 @@ def _parse(self, args, kwargs, simplify=None, save=False, validate=False) -> SDF sdutils.inline_control_flow_regions(nsdfg) sdfg.using_experimental_blocks = self.use_experimental_cfg_blocks + sdfg.reset_cfg_list() + # Apply simplification pass automatically if not cached and (simplify == True or (simplify is None and Config.get_bool('optimizer', 'automatic_simplification'))): diff --git a/dace/sdfg/analysis/schedule_tree/treenodes.py b/dace/sdfg/analysis/schedule_tree/treenodes.py index 619b71b770..3b447fa15a 100644 --- a/dace/sdfg/analysis/schedule_tree/treenodes.py +++ b/dace/sdfg/analysis/schedule_tree/treenodes.py @@ -162,10 +162,17 @@ def as_string(self, indent: int = 0): loop = self.header.loop if loop.update_statement and loop.init_statement and loop.loop_variable: if loop.inverted: - pre_header = indent * INDENTATION + f'{loop.init_statement.as_string}\n' - header = indent * INDENTATION + 'do:\n' - pre_footer = (indent + 1) * INDENTATION + f'{loop.update_statement.as_string}\n' - footer = indent * INDENTATION + f'while {loop.loop_condition.as_string}' + if loop.update_before_condition: + pre_header = indent * INDENTATION + f'{loop.init_statement.as_string}\n' + header = indent * INDENTATION + 'do:\n' + pre_footer = (indent + 1) * INDENTATION + f'{loop.update_statement.as_string}\n' + footer = indent * INDENTATION + f'while {loop.loop_condition.as_string}' + else: + pre_header = indent * INDENTATION + f'{loop.init_statement.as_string}\n' + header = indent * INDENTATION + 'while True:\n' + pre_footer = (indent + 1) * INDENTATION + f'if (not {loop.loop_condition.as_string}):\n' + pre_footer += (indent + 2) * INDENTATION + 'break\n' + footer = (indent + 1) * INDENTATION + f'{loop.update_statement.as_string}\n' return pre_header + header + super().as_string(indent) + '\n' + pre_footer + footer else: result = (indent * INDENTATION + diff --git a/dace/sdfg/analysis/writeset_underapproximation.py b/dace/sdfg/analysis/writeset_underapproximation.py index bfd5f4cb00..a0f84e93a6 100644 --- a/dace/sdfg/analysis/writeset_underapproximation.py +++ b/dace/sdfg/analysis/writeset_underapproximation.py @@ -1,42 +1,36 @@ # Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved. """ -Pass derived from ``propagation.py`` that under-approximates write-sets of for-loops and Maps in -an SDFG. +Pass derived from ``propagation.py`` that under-approximates write-sets of for-loops and Maps in an SDFG. """ -from collections import defaultdict import copy +from dataclasses import dataclass, field import itertools +import sys import warnings -from typing import Any, Dict, List, Set, Tuple, Type, Union +from collections import defaultdict +from typing import Dict, List, Set, Tuple, Union + +if sys.version_info >= (3, 8): + from typing import TypedDict +else: + from typing_extensions import TypedDict + import sympy import dace +from dace import SDFG, Memlet, data, dtypes, registry, subsets, symbolic +from dace.sdfg import SDFGState +from dace.sdfg import graph +from dace.sdfg import graph as gr +from dace.sdfg import nodes, scope +from dace.sdfg.analysis import cfg as cfg_analysis +from dace.sdfg.nodes import AccessNode, NestedSDFG +from dace.sdfg.state import LoopRegion from dace.symbolic import issymbolic, pystr_to_symbolic, simplify -from dace.transformation.pass_pipeline import Modifies, Pass -from dace import registry, subsets, symbolic, dtypes, data, SDFG, Memlet -from dace.sdfg.nodes import NestedSDFG, AccessNode -from dace.sdfg import nodes, SDFGState, graph as gr -from dace.sdfg.analysis import cfg from dace.transformation import pass_pipeline as ppl -from dace.sdfg import graph -from dace.sdfg import scope - -# dictionary mapping each edge to a copy of the memlet of that edge with its write set -# underapproximated -approximation_dict: Dict[graph.Edge, Memlet] = {} -# dictionary that maps loop headers to "border memlets" that are written to in the -# corresponding loop -loop_write_dict: Dict[SDFGState, Dict[str, Memlet]] = {} -# dictionary containing information about the for loops in the SDFG -loop_dict: Dict[SDFGState, Tuple[SDFGState, SDFGState, - List[SDFGState], str, subsets.Range]] = {} -# dictionary mapping each nested SDFG to the iteration variables surrounding it -iteration_variables: Dict[SDFG, Set[str]] = {} -# dictionary mapping each state to the iteration variables surrounding it -# (including the ones from surrounding SDFGs) -ranges_per_state: Dict[SDFGState, - Dict[str, subsets.Range]] = defaultdict(lambda: {}) +from dace.transformation import transformation +from dace.transformation.pass_pipeline import Modifies @registry.make_registry @@ -81,7 +75,7 @@ def can_be_applied(self, expressions, variable_context, node_range, orig_edges): # Return False if iteration variable appears in multiple dimensions # or if two iteration variables appear in the same dimension - if not self._iteration_variables_appear_multiple_times(data_dims, expressions, other_params, params): + if not self._iteration_variables_appear_only_once(data_dims, expressions, other_params, params): return False node_range = self._make_range(node_range) @@ -89,27 +83,25 @@ def can_be_applied(self, expressions, variable_context, node_range, orig_edges): for dim in range(data_dims): dexprs = [] for expr in expressions: - if isinstance(expr[dim], symbolic.SymExpr): - dexprs.append(expr[dim].expr) - elif isinstance(expr[dim], tuple): - dexprs.append( - (expr[dim][0].expr if isinstance(expr[dim][0], symbolic.SymExpr) else - expr[dim][0], expr[dim][1].expr if isinstance( - expr[dim][1], symbolic.SymExpr) else expr[dim][1], expr[dim][2].expr - if isinstance(expr[dim][2], symbolic.SymExpr) else expr[dim][2])) + expr_dim = expr[dim] + if isinstance(expr_dim, symbolic.SymExpr): + dexprs.append(expr_dim.expr) + elif isinstance(expr_dim, tuple): + dexprs.append((expr_dim[0].expr if isinstance(expr_dim[0], symbolic.SymExpr) else expr_dim[0], + expr_dim[1].expr if isinstance(expr_dim[1], symbolic.SymExpr) else expr_dim[1], + expr_dim[2].expr if isinstance(expr_dim[2], symbolic.SymExpr) else expr_dim[2])) else: - dexprs.append(expr[dim]) + dexprs.append(expr_dim) for pattern_class in SeparableUnderapproximationMemletPattern.extensions().keys(): smpattern = pattern_class() - if smpattern.can_be_applied(dexprs, variable_context, node_range, orig_edges, dim, - data_dims): + if smpattern.can_be_applied(dexprs, variable_context, node_range, orig_edges, dim, data_dims): self.patterns_per_dim[dim] = smpattern break return None not in self.patterns_per_dim - def _iteration_variables_appear_multiple_times(self, data_dims, expressions, other_params, params): + def _iteration_variables_appear_only_once(self, data_dims, expressions, other_params, params): for expr in expressions: for param in params: occured_before = False @@ -146,8 +138,7 @@ def _iteration_variables_appear_multiple_times(self, data_dims, expressions, oth def _make_range(self, node_range): return subsets.Range([(rb.expr if isinstance(rb, symbolic.SymExpr) else rb, - re.expr if isinstance( - re, symbolic.SymExpr) else re, + re.expr if isinstance(re, symbolic.SymExpr) else re, rs.expr if isinstance(rs, symbolic.SymExpr) else rs) for rb, re, rs in node_range]) @@ -160,19 +151,16 @@ def propagate(self, array, expressions, node_range): dexprs = [] for expr in expressions: - if isinstance(expr[i], symbolic.SymExpr): - dexprs.append(expr[i].expr) - elif isinstance(expr[i], tuple): - dexprs.append(( - expr[i][0].expr if isinstance( - expr[i][0], symbolic.SymExpr) else expr[i][0], - expr[i][1].expr if isinstance( - expr[i][1], symbolic.SymExpr) else expr[i][1], - expr[i][2].expr if isinstance( - expr[i][2], symbolic.SymExpr) else expr[i][2], - expr.tile_sizes[i])) + expr_i = expr[i] + if isinstance(expr_i, symbolic.SymExpr): + dexprs.append(expr_i.expr) + elif isinstance(expr_i, tuple): + dexprs.append((expr_i[0].expr if isinstance(expr_i[0], symbolic.SymExpr) else expr_i[0], + expr_i[1].expr if isinstance(expr_i[1], symbolic.SymExpr) else expr_i[1], + expr_i[2].expr if isinstance(expr_i[2], symbolic.SymExpr) else expr_i[2], + expr.tile_sizes[i])) else: - dexprs.append(expr[i]) + dexprs.append(expr_i) result[i] = smpattern.propagate(array, dexprs, node_range) @@ -417,7 +405,7 @@ def _find_unconditionally_executed_states(sdfg: SDFG) -> Set[SDFGState]: sdfg.add_edge(sink_node, dummy_sink, dace.sdfg.InterstateEdge()) # get all the nodes that are executed unconditionally in the state-machine a.k.a nodes # that dominate the sink states - dominators = cfg.all_dominators(sdfg) + dominators = cfg_analysis.all_dominators(sdfg) states = dominators[dummy_sink] # remove dummy state sdfg.remove_node(dummy_sink) @@ -689,21 +677,44 @@ def _merge_subsets(subset_a: subsets.Subset, subset_b: subsets.Subset) -> subset return subset_b +@dataclass +class UnderapproximateWritesDict: + approximation: Dict[graph.Edge, Memlet] = field(default_factory=dict) + loop_approximation: Dict[SDFGState, Dict[str, Memlet]] = field(default_factory=dict) + loops: Dict[SDFGState, + Tuple[SDFGState, SDFGState, List[SDFGState], str, subsets.Range]] = field(default_factory=dict) + + +@transformation.experimental_cfg_block_compatible class UnderapproximateWrites(ppl.Pass): + # Dictionary mapping each edge to a copy of the memlet of that edge with its write set underapproximated. + approximation_dict: Dict[graph.Edge, Memlet] + # Dictionary that maps loop headers to "border memlets" that are written to in the corresponding loop. + loop_write_dict: Dict[SDFGState, Dict[str, Memlet]] + # Dictionary containing information about the for loops in the SDFG. + loop_dict: Dict[SDFGState, Tuple[SDFGState, SDFGState, List[SDFGState], str, subsets.Range]] + # Dictionary mapping each nested SDFG to the iteration variables surrounding it. + iteration_variables: Dict[SDFG, Set[str]] + # Mapping of state to the iteration variables surrounding them, including the ones from surrounding SDFGs. + ranges_per_state: Dict[SDFGState, Dict[str, subsets.Range]] + + def __init__(self): + super().__init__() + self.approximation_dict = {} + self.loop_write_dict = {} + self.loop_dict = {} + self.iteration_variables = {} + self.ranges_per_state = defaultdict(lambda: {}) + def modifies(self) -> Modifies: - return ppl.Modifies.Everything + return ppl.Modifies.States def should_reapply(self, modified: ppl.Modifies) -> bool: - # If anything was modified, reapply - return modified & ppl.Modifies.States | ppl.Modifies.Edges | ppl.Modifies.Symbols | ppl.Modifies.Nodes - - def apply_pass( - self, sdfg: dace.SDFG, pipeline_results: Dict[str, Any] - ) -> Dict[str, Union[ - Dict[graph.Edge, Memlet], - Dict[SDFGState, Dict[str, Memlet]], - Dict[SDFGState, Tuple[SDFGState, SDFGState, List[SDFGState], str, subsets.Range]]]]: + # If anything was modified, reapply. + return modified & ppl.Modifies.Everything + + def apply_pass(self, top_sdfg: dace.SDFG, _) -> Dict[int, UnderapproximateWritesDict]: """ Applies the pass to the given SDFG. @@ -725,55 +736,71 @@ def apply_pass( :notes: The only modification this pass performs on the SDFG is splitting interstate edges. """ - # clear the global dictionaries - approximation_dict.clear() - loop_write_dict.clear() - loop_dict.clear() - iteration_variables.clear() - ranges_per_state.clear() - - # fill the approximation dictionary with the original edges as keys and the edges with the - # approximated memlets as values - for (edge, parent) in sdfg.all_edges_recursive(): - if isinstance(parent, SDFGState): - approximation_dict[edge] = copy.deepcopy(edge.data) - if not isinstance(approximation_dict[edge].subset, - subsets.SubsetUnion) and approximation_dict[edge].subset: - approximation_dict[edge].subset = subsets.SubsetUnion( - [approximation_dict[edge].subset]) - if not isinstance(approximation_dict[edge].dst_subset, - subsets.SubsetUnion) and approximation_dict[edge].dst_subset: - approximation_dict[edge].dst_subset = subsets.SubsetUnion( - [approximation_dict[edge].dst_subset]) - if not isinstance(approximation_dict[edge].src_subset, - subsets.SubsetUnion) and approximation_dict[edge].src_subset: - approximation_dict[edge].src_subset = subsets.SubsetUnion( - [approximation_dict[edge].src_subset]) - - self._underapproximate_writes_sdfg(sdfg) - - # Replace None with empty SubsetUnion in each Memlet - for entry in approximation_dict.values(): - if entry.subset is None: - entry.subset = subsets.SubsetUnion([]) - return { - "approximation": approximation_dict, - "loop_approximation": loop_write_dict, - "loops": loop_dict - } + result = defaultdict(lambda: UnderapproximateWritesDict()) + + for sdfg in top_sdfg.all_sdfgs_recursive(): + # Clear the global dictionaries. + self.approximation_dict = {} + self.loop_write_dict = {} + self.loop_dict = {} + self.iteration_variables = {} + self.ranges_per_state = defaultdict(lambda: {}) + + # fill the approximation dictionary with the original edges as keys and the edges with the + # approximated memlets as values + for (edge, parent) in sdfg.all_edges_recursive(): + if isinstance(parent, SDFGState): + self.approximation_dict[edge] = copy.deepcopy(edge.data) + if not isinstance(self.approximation_dict[edge].subset, + subsets.SubsetUnion) and self.approximation_dict[edge].subset: + self.approximation_dict[edge].subset = subsets.SubsetUnion([ + self.approximation_dict[edge].subset + ]) + if not isinstance(self.approximation_dict[edge].dst_subset, + subsets.SubsetUnion) and self.approximation_dict[edge].dst_subset: + self.approximation_dict[edge].dst_subset = subsets.SubsetUnion([ + self.approximation_dict[edge].dst_subset + ]) + if not isinstance(self.approximation_dict[edge].src_subset, + subsets.SubsetUnion) and self.approximation_dict[edge].src_subset: + self.approximation_dict[edge].src_subset = subsets.SubsetUnion([ + self.approximation_dict[edge].src_subset + ]) + + self._underapproximate_writes_sdfg(sdfg) + + # Replace None with empty SubsetUnion in each Memlet + for entry in self.approximation_dict.values(): + if entry.subset is None: + entry.subset = subsets.SubsetUnion([]) + + result[sdfg.cfg_id].approximation = self.approximation_dict + result[sdfg.cfg_id].loop_approximation = self.loop_write_dict + result[sdfg.cfg_id].loops = self.loop_dict + + return result def _underapproximate_writes_sdfg(self, sdfg: SDFG): """ Underapproximates write-sets of loops, maps and nested SDFGs in the given SDFG. """ from dace.transformation.helpers import split_interstate_edges + from dace.transformation.passes.analysis import loop_analysis split_interstate_edges(sdfg) loops = self._find_for_loops(sdfg) - loop_dict.update(loops) + self.loop_dict.update(loops) + + for region in sdfg.all_control_flow_regions(): + if isinstance(region, LoopRegion): + start = loop_analysis.get_init_assignment(region) + stop = loop_analysis.get_loop_end(region) + stride = loop_analysis.get_loop_stride(region) + for state in region.all_states(): + self.ranges_per_state[state][region.loop_variable] = subsets.Range([(start, stop, stride)]) - for state in sdfg.nodes(): - self._underapproximate_writes_state(sdfg, state) + for state in region.all_states(): + self._underapproximate_writes_state(sdfg, state) self._underapproximate_writes_loops(loops, sdfg) @@ -792,8 +819,8 @@ def _find_for_loops(self, """ # We import here to avoid cyclic imports. - from dace.transformation.interstate.loop_detection import find_for_loop from dace.sdfg import utils as sdutils + from dace.transformation.interstate.loop_detection import find_for_loop # dictionary mapping loop headers to beginstate, loopstates, looprange identified_loops = {} @@ -885,13 +912,12 @@ def _find_for_loops(self, sources=[begin], condition=lambda _, child: child != guard) - if itvar not in ranges_per_state[begin]: + if itvar not in self.ranges_per_state[begin]: for loop_state in loop_states: - ranges_per_state[loop_state][itervar] = subsets.Range([ - rng]) + self.ranges_per_state[loop_state][itervar] = subsets.Range([rng]) loop_state_list.append(loop_state) - ranges_per_state[guard][itervar] = subsets.Range([rng]) + self.ranges_per_state[guard][itervar] = subsets.Range([rng]) identified_loops[guard] = (begin, last_loop_state, loop_state_list, itvar, subsets.Range([rng])) @@ -934,8 +960,11 @@ def _underapproximate_writes_state(self, sdfg: SDFG, state: SDFGState): # approximation_dict # First, propagate nested SDFGs in a bottom-up fashion + dnodes: Set[nodes.AccessNode] = set() for node in state.nodes(): - if isinstance(node, nodes.NestedSDFG): + if isinstance(node, AccessNode): + dnodes.add(node) + elif isinstance(node, nodes.NestedSDFG): self._find_live_iteration_variables(node, sdfg, state) # Propagate memlets inside the nested SDFG. @@ -947,6 +976,15 @@ def _underapproximate_writes_state(self, sdfg: SDFG, state: SDFGState): # Process scopes from the leaves upwards self._underapproximate_writes_scope(sdfg, state, state.scope_leaves()) + # Make sure any scalar writes are also added if they have not been processed yet. + for dn in dnodes: + desc = sdfg.data(dn.data) + if isinstance(desc, data.Scalar) or (isinstance(desc, data.Array) and desc.total_size == 1): + for iedge in state.in_edges(dn): + if not iedge in self.approximation_dict: + self.approximation_dict[iedge] = copy.deepcopy(iedge.data) + self.approximation_dict[iedge]._edge = iedge + def _find_live_iteration_variables(self, nsdfg: nodes.NestedSDFG, sdfg: SDFG, @@ -963,15 +1001,14 @@ def symbol_map(mapping, symbol): return None map_iteration_variables = _collect_iteration_variables(state, nsdfg) - sdfg_iteration_variables = iteration_variables[ - sdfg] if sdfg in iteration_variables else set() - state_iteration_variables = ranges_per_state[state].keys() + sdfg_iteration_variables = self.iteration_variables[sdfg] if sdfg in self.iteration_variables else set() + state_iteration_variables = self.ranges_per_state[state].keys() iteration_variables_local = (map_iteration_variables | sdfg_iteration_variables | state_iteration_variables) mapped_iteration_variables = set( map(lambda x: symbol_map(nsdfg.symbol_mapping, x), iteration_variables_local)) if mapped_iteration_variables: - iteration_variables[nsdfg.sdfg] = mapped_iteration_variables + self.iteration_variables[nsdfg.sdfg] = mapped_iteration_variables def _underapproximate_writes_nested_sdfg( self, @@ -1025,12 +1062,11 @@ def _init_border_memlet(template_memlet: Memlet, # Collect all memlets belonging to this access node memlets = [] for edge in edges: - inside_memlet = approximation_dict[edge] + inside_memlet = self.approximation_dict[edge] memlets.append(inside_memlet) # initialize border memlet if it does not exist already if border_memlet is None: - border_memlet = _init_border_memlet( - inside_memlet, node.label) + border_memlet = _init_border_memlet(inside_memlet, node.label) # Given all of this access nodes' memlets union all the subsets to one SubsetUnion if len(memlets) > 0: @@ -1042,18 +1078,16 @@ def _init_border_memlet(template_memlet: Memlet, border_memlet.subset, subset) # collect the memlets for each loop in the NSDFG - if state in loop_write_dict: - for node_label, loop_memlet in loop_write_dict[state].items(): + if state in self.loop_write_dict: + for node_label, loop_memlet in self.loop_write_dict[state].items(): if node_label not in border_memlets: continue border_memlet = border_memlets[node_label] # initialize border memlet if it does not exist already if border_memlet is None: - border_memlet = _init_border_memlet( - loop_memlet, node_label) + border_memlet = _init_border_memlet(loop_memlet, node_label) # compute the union of the ranges to merge the subsets. - border_memlet.subset = _merge_subsets( - border_memlet.subset, loop_memlet.subset) + border_memlet.subset = _merge_subsets(border_memlet.subset, loop_memlet.subset) # Make sure any potential NSDFG symbol mapping is correctly reversed # when propagating out. @@ -1068,17 +1102,16 @@ def _init_border_memlet(template_memlet: Memlet, # Propagate the inside 'border' memlets outside the SDFG by # offsetting, and unsqueezing if necessary. for edge in parent_state.out_edges(nsdfg_node): - out_memlet = approximation_dict[edge] + out_memlet = self.approximation_dict[edge] if edge.src_conn in border_memlets: internal_memlet = border_memlets[edge.src_conn] if internal_memlet is None: out_memlet.subset = None out_memlet.dst_subset = None - approximation_dict[edge] = out_memlet + self.approximation_dict[edge] = out_memlet continue - out_memlet = _unsqueeze_memlet_subsetunion(internal_memlet, out_memlet, parent_sdfg, - nsdfg_node) - approximation_dict[edge] = out_memlet + out_memlet = _unsqueeze_memlet_subsetunion(internal_memlet, out_memlet, parent_sdfg, nsdfg_node) + self.approximation_dict[edge] = out_memlet def _underapproximate_writes_loop(self, sdfg: SDFG, @@ -1099,9 +1132,7 @@ def _underapproximate_writes_loop(self, propagate_memlet_loop will be called recursively on the outermost loopheaders """ - def _init_border_memlet(template_memlet: Memlet, - node_label: str - ): + def _init_border_memlet(template_memlet: Memlet, node_label: str): ''' Creates a Memlet with the same data as the template_memlet, stores it in the border_memlets dictionary and returns it. @@ -1111,8 +1142,7 @@ def _init_border_memlet(template_memlet: Memlet, border_memlets[node_label] = border_memlet return border_memlet - def filter_subsets(itvar: str, itrange: subsets.Range, - memlet: Memlet) -> List[subsets.Subset]: + def filter_subsets(itvar: str, itrange: subsets.Range, memlet: Memlet) -> List[subsets.Subset]: # helper method that filters out subsets that do not depend on the iteration variable # if the iteration range is symbolic @@ -1134,7 +1164,7 @@ def filter_subsets(itvar: str, itrange: subsets.Range, if rng.num_elements() == 0: return # make sure there is no break out of the loop - dominators = cfg.all_dominators(sdfg) + dominators = cfg_analysis.all_dominators(sdfg) if any(begin not in dominators[s] and not begin is s for s in loop_states): return border_memlets = defaultdict(None) @@ -1159,7 +1189,7 @@ def filter_subsets(itvar: str, itrange: subsets.Range, # collect all the subsets of the incoming memlets for the current access node for edge in edges: - inside_memlet = copy.copy(approximation_dict[edge]) + inside_memlet = copy.copy(self.approximation_dict[edge]) # filter out subsets that could become empty depending on assignments # of symbols filtered_subsets = filter_subsets( @@ -1177,35 +1207,27 @@ def filter_subsets(itvar: str, itrange: subsets.Range, self._underapproximate_writes_loop_subset(sdfg, memlets, border_memlet, sdfg.arrays[node.label], itvar, rng) - if state not in loop_write_dict: + if state not in self.loop_write_dict: continue # propagate the border memlets of nested loop - for node_label, other_border_memlet in loop_write_dict[state].items(): + for node_label, other_border_memlet in self.loop_write_dict[state].items(): # filter out subsets that could become empty depending on symbol assignments - filtered_subsets = filter_subsets( - itvar, rng, other_border_memlet) + filtered_subsets = filter_subsets(itvar, rng, other_border_memlet) if not filtered_subsets: continue - other_border_memlet.subset = subsets.SubsetUnion( - filtered_subsets) + other_border_memlet.subset = subsets.SubsetUnion(filtered_subsets) border_memlet = border_memlets.get(node_label) if border_memlet is None: - border_memlet = _init_border_memlet( - other_border_memlet, node_label) + border_memlet = _init_border_memlet(other_border_memlet, node_label) self._underapproximate_writes_loop_subset(sdfg, [other_border_memlet], border_memlet, sdfg.arrays[node_label], itvar, rng) - loop_write_dict[loop_header] = border_memlets + self.loop_write_dict[loop_header] = border_memlets - def _underapproximate_writes_loop_subset(self, - sdfg: dace.SDFG, - memlets: List[Memlet], - dst_memlet: Memlet, - arr: dace.data.Array, - itvar: str, - rng: subsets.Subset, + def _underapproximate_writes_loop_subset(self, sdfg: dace.SDFG, memlets: List[Memlet], dst_memlet: Memlet, + arr: dace.data.Array, itvar: str, rng: subsets.Subset, loop_nest_itvars: Union[Set[str], None] = None): """ Helper function that takes a list of (border) memlets, propagates them out of a @@ -1223,16 +1245,11 @@ def _underapproximate_writes_loop_subset(self, if len(memlets) > 0: params = [itvar] # get all the other iteration variables surrounding this memlet - surrounding_itvars = iteration_variables[sdfg] if sdfg in iteration_variables else set( - ) + surrounding_itvars = self.iteration_variables[sdfg] if sdfg in self.iteration_variables else set() if loop_nest_itvars: surrounding_itvars |= loop_nest_itvars - subset = self._underapproximate_subsets(memlets, - arr, - params, - rng, - use_dst=True, + subset = self._underapproximate_subsets(memlets, arr, params, rng, use_dst=True, surrounding_itvars=surrounding_itvars).subset if subset is None or len(subset.subset_list) == 0: @@ -1240,9 +1257,7 @@ def _underapproximate_writes_loop_subset(self, # compute the union of the ranges to merge the subsets. dst_memlet.subset = _merge_subsets(dst_memlet.subset, subset) - def _underapproximate_writes_scope(self, - sdfg: SDFG, - state: SDFGState, + def _underapproximate_writes_scope(self, sdfg: SDFG, state: SDFGState, scopes: Union[scope.ScopeTree, List[scope.ScopeTree]]): """ Propagate memlets from the given scopes outwards. @@ -1253,8 +1268,7 @@ def _underapproximate_writes_scope(self, """ # for each map scope find the iteration variables of surrounding maps - surrounding_map_vars: Dict[scope.ScopeTree, - Set[str]] = _collect_itvars_scope(scopes) + surrounding_map_vars: Dict[scope.ScopeTree, Set[str]] = _collect_itvars_scope(scopes) if isinstance(scopes, scope.ScopeTree): scopes_to_process = [scopes] else: @@ -1272,8 +1286,7 @@ def _underapproximate_writes_scope(self, sdfg, state, surrounding_map_vars) - self._underapproximate_writes_node( - state, scope_node.exit, surrounding_iteration_variables) + self._underapproximate_writes_node(state, scope_node.exit, surrounding_iteration_variables) # Add parent to next frontier next_scopes.add(scope_node.parent) scopes_to_process = next_scopes @@ -1286,9 +1299,8 @@ def _collect_iteration_variables_scope_node(self, surrounding_map_vars: Dict[scope.ScopeTree, Set[str]]) -> Set[str]: map_iteration_variables = surrounding_map_vars[ scope_node] if scope_node in surrounding_map_vars else set() - sdfg_iteration_variables = iteration_variables[ - sdfg] if sdfg in iteration_variables else set() - loop_iteration_variables = ranges_per_state[state].keys() + sdfg_iteration_variables = self.iteration_variables[sdfg] if sdfg in self.iteration_variables else set() + loop_iteration_variables = self.ranges_per_state[state].keys() surrounding_iteration_variables = (map_iteration_variables | sdfg_iteration_variables | loop_iteration_variables) @@ -1308,12 +1320,8 @@ def _underapproximate_writes_node(self, :param surrounding_itvars: Iteration variables that surround the map scope """ if isinstance(node, nodes.EntryNode): - internal_edges = [ - e for e in dfg_state.out_edges(node) if e.src_conn and e.src_conn.startswith('OUT_') - ] - external_edges = [ - e for e in dfg_state.in_edges(node) if e.dst_conn and e.dst_conn.startswith('IN_') - ] + internal_edges = [e for e in dfg_state.out_edges(node) if e.src_conn and e.src_conn.startswith('OUT_')] + external_edges = [e for e in dfg_state.in_edges(node) if e.dst_conn and e.dst_conn.startswith('IN_')] def geticonn(e): return e.src_conn[4:] @@ -1323,12 +1331,8 @@ def geteconn(e): use_dst = False else: - internal_edges = [ - e for e in dfg_state.in_edges(node) if e.dst_conn and e.dst_conn.startswith('IN_') - ] - external_edges = [ - e for e in dfg_state.out_edges(node) if e.src_conn and e.src_conn.startswith('OUT_') - ] + internal_edges = [e for e in dfg_state.in_edges(node) if e.dst_conn and e.dst_conn.startswith('IN_')] + external_edges = [e for e in dfg_state.out_edges(node) if e.src_conn and e.src_conn.startswith('OUT_')] def geticonn(e): return e.dst_conn[3:] @@ -1339,21 +1343,17 @@ def geteconn(e): use_dst = True for edge in external_edges: - if approximation_dict[edge].is_empty(): + if self.approximation_dict[edge].is_empty(): new_memlet = Memlet() else: internal_edge = next( e for e in internal_edges if geticonn(e) == geteconn(edge)) - aligned_memlet = self._align_memlet( - dfg_state, internal_edge, dst=use_dst) - new_memlet = self._underapproximate_memlets(dfg_state, - aligned_memlet, - node, - True, - connector=geteconn( - edge), + aligned_memlet = self._align_memlet(dfg_state, internal_edge, dst=use_dst) + new_memlet = self._underapproximate_memlets(dfg_state, aligned_memlet, node, True, + connector=geteconn(edge), surrounding_itvars=surrounding_itvars) - approximation_dict[edge] = new_memlet + new_memlet._edge = edge + self.approximation_dict[edge] = new_memlet def _align_memlet(self, state: SDFGState, @@ -1373,16 +1373,16 @@ def _align_memlet(self, is_src = edge.data._is_data_src # Memlet is already aligned if is_src is None or (is_src and not dst) or (not is_src and dst): - res = approximation_dict[edge] + res = self.approximation_dict[edge] return res # Data<->Code memlets always have one data container mpath = state.memlet_path(edge) if not isinstance(mpath[0].src, AccessNode) or not isinstance(mpath[-1].dst, AccessNode): - return approximation_dict[edge] + return self.approximation_dict[edge] # Otherwise, find other data container - result = copy.deepcopy(approximation_dict[edge]) + result = copy.deepcopy(self.approximation_dict[edge]) if dst: node = mpath[-1].dst else: @@ -1390,8 +1390,8 @@ def _align_memlet(self, # Fix memlet fields result.data = node.data - result.subset = approximation_dict[edge].other_subset - result.other_subset = approximation_dict[edge].subset + result.subset = self.approximation_dict[edge].other_subset + result.other_subset = self.approximation_dict[edge].subset result._is_data_src = not is_src return result @@ -1448,9 +1448,9 @@ def _underapproximate_memlets(self, # and union their subsets if union_inner_edges: aggdata = [ - approximation_dict[e] + self.approximation_dict[e] for e in neighboring_edges - if approximation_dict[e].data == memlet.data and approximation_dict[e] != memlet + if self.approximation_dict[e].data == memlet.data and self.approximation_dict[e] != memlet ] else: aggdata = [] @@ -1459,8 +1459,7 @@ def _underapproximate_memlets(self, if arr is None: if memlet.data not in sdfg.arrays: - raise KeyError('Data descriptor (Array, Stream) "%s" not defined in SDFG.' % - memlet.data) + raise KeyError('Data descriptor (Array, Stream) "%s" not defined in SDFG.' % memlet.data) # FIXME: A memlet alone (without an edge) cannot figure out whether it is data<->data or data<->code # so this test cannot be used diff --git a/dace/sdfg/propagation.py b/dace/sdfg/propagation.py index 1c038dd2e4..f62bb6eb58 100644 --- a/dace/sdfg/propagation.py +++ b/dace/sdfg/propagation.py @@ -4,21 +4,22 @@ from internal memory accesses and scope ranges). """ -from collections import deque import copy -from dace.symbolic import issymbolic, pystr_to_symbolic, simplify -import itertools import functools +import itertools +import warnings +from collections import deque +from typing import List, Set + import sympy -from sympy import ceiling, Symbol +from sympy import Symbol, ceiling from sympy.concrete.summations import Sum -import warnings -import networkx as nx -from dace import registry, subsets, symbolic, dtypes, data +from dace import data, dtypes, registry, subsets, symbolic from dace.memlet import Memlet -from dace.sdfg import nodes, graph as gr -from typing import List, Set +from dace.sdfg import graph as gr +from dace.sdfg import nodes +from dace.symbolic import issymbolic, pystr_to_symbolic, simplify @registry.make_registry @@ -61,17 +62,17 @@ def can_be_applied(self, expressions, variable_context, node_range, orig_edges): for rb, re, rs in node_range]) for dim in range(data_dims): - dexprs = [] for expr in expressions: - if isinstance(expr[dim], symbolic.SymExpr): - dexprs.append(expr[dim].approx) - elif isinstance(expr[dim], tuple): - dexprs.append((expr[dim][0].approx if isinstance(expr[dim][0], symbolic.SymExpr) else expr[dim][0], - expr[dim][1].approx if isinstance(expr[dim][1], symbolic.SymExpr) else expr[dim][1], - expr[dim][2].approx if isinstance(expr[dim][2], symbolic.SymExpr) else expr[dim][2])) + expr_dim = expr[dim] + if isinstance(expr_dim, symbolic.SymExpr): + dexprs.append(expr_dim.approx) + elif isinstance(expr_dim, tuple): + dexprs.append((expr_dim[0].approx if isinstance(expr_dim[0], symbolic.SymExpr) else expr_dim[0], + expr_dim[1].approx if isinstance(expr_dim[1], symbolic.SymExpr) else expr_dim[1], + expr_dim[2].approx if isinstance(expr_dim[2], symbolic.SymExpr) else expr_dim[2])) else: - dexprs.append(expr[dim]) + dexprs.append(expr_dim) for pattern_class in SeparableMemletPattern.extensions().keys(): smpattern = pattern_class() @@ -93,15 +94,16 @@ def propagate(self, array, expressions, node_range): dexprs = [] for expr in expressions: - if isinstance(expr[i], symbolic.SymExpr): - dexprs.append(expr[i].approx) - elif isinstance(expr[i], tuple): - dexprs.append((expr[i][0].approx if isinstance(expr[i][0], symbolic.SymExpr) else expr[i][0], - expr[i][1].approx if isinstance(expr[i][1], symbolic.SymExpr) else expr[i][1], - expr[i][2].approx if isinstance(expr[i][2], symbolic.SymExpr) else expr[i][2], + expr_i = expr[i] + if isinstance(expr_i, symbolic.SymExpr): + dexprs.append(expr_i.approx) + elif isinstance(expr_i, tuple): + dexprs.append((expr_i[0].approx if isinstance(expr_i[0], symbolic.SymExpr) else expr_i[0], + expr_i[1].approx if isinstance(expr_i[1], symbolic.SymExpr) else expr_i[1], + expr_i[2].approx if isinstance(expr_i[2], symbolic.SymExpr) else expr_i[2], expr.tile_sizes[i])) else: - dexprs.append(expr[i]) + dexprs.append(expr_i) result[i] = smpattern.propagate(array, dexprs, overapprox_range) @@ -569,8 +571,8 @@ def _annotate_loop_ranges(sdfg, unannotated_cycle_states): """ # We import here to avoid cyclic imports. - from dace.transformation.interstate.loop_detection import find_for_loop from dace.sdfg import utils as sdutils + from dace.transformation.interstate.loop_detection import find_for_loop condition_edges = {} @@ -739,8 +741,8 @@ def propagate_states(sdfg, concretize_dynamic_unbounded=False) -> None: # We import here to avoid cyclic imports. from dace.sdfg import InterstateEdge - from dace.transformation.helpers import split_interstate_edges from dace.sdfg.analysis import cfg + from dace.transformation.helpers import split_interstate_edges # Reset the state edge annotations (which may have changed due to transformations) reset_state_annotations(sdfg) diff --git a/dace/sdfg/state.py b/dace/sdfg/state.py index 8d443e6beb..2ae6109b31 100644 --- a/dace/sdfg/state.py +++ b/dace/sdfg/state.py @@ -2987,35 +2987,52 @@ class LoopRegion(ControlFlowRegion): inverted = Property(dtype=bool, default=False, desc='If True, the loop condition is checked after the first iteration.') + update_before_condition = Property(dtype=bool, + default=True, + desc='If False, the loop condition is checked before the update statement is' + + ' executed. This only applies to inverted loops, turning them from a typical ' + + 'do-while style into a while(true) with a break before the update (at the end ' + + 'of an iteration) if the condition no longer holds.') loop_variable = Property(dtype=str, default='', desc='The loop variable, if given') def __init__(self, label: str, - condition_expr: Optional[str] = None, + condition_expr: Optional[Union[str, CodeBlock]] = None, loop_var: Optional[str] = None, - initialize_expr: Optional[str] = None, - update_expr: Optional[str] = None, + initialize_expr: Optional[Union[str, CodeBlock]] = None, + update_expr: Optional[Union[str, CodeBlock]] = None, inverted: bool = False, - sdfg: Optional['SDFG'] = None): + sdfg: Optional['SDFG'] = None, + update_before_condition = True): super(LoopRegion, self).__init__(label, sdfg) if initialize_expr is not None: - self.init_statement = CodeBlock(initialize_expr) + if isinstance(initialize_expr, CodeBlock): + self.init_statement = initialize_expr + else: + self.init_statement = CodeBlock(initialize_expr) else: self.init_statement = None if condition_expr: - self.loop_condition = CodeBlock(condition_expr) + if isinstance(condition_expr, CodeBlock): + self.loop_condition = condition_expr + else: + self.loop_condition = CodeBlock(condition_expr) else: self.loop_condition = CodeBlock('True') if update_expr is not None: - self.update_statement = CodeBlock(update_expr) + if isinstance(update_expr, CodeBlock): + self.update_statement = update_expr + else: + self.update_statement = CodeBlock(update_expr) else: self.update_statement = None self.loop_variable = loop_var or '' self.inverted = inverted + self.update_before_condition = update_before_condition def inline(self) -> Tuple[bool, Any]: """ @@ -3234,7 +3251,12 @@ def __repr__(self) -> str: @property def branches(self) -> List[Tuple[Optional[CodeBlock], ControlFlowRegion]]: return self._branches - + + def add_branch(self, condition: Optional[CodeBlock], branch: ControlFlowRegion): + self._branches.append([condition, branch]) + branch.parent_graph = self.parent_graph + branch.sdfg = self.sdfg + def nodes(self) -> List['ControlFlowBlock']: return [node for _, node in self._branches if node is not None] diff --git a/dace/transformation/helpers.py b/dace/transformation/helpers.py index 74a3d2ee12..6ca4602079 100644 --- a/dace/transformation/helpers.py +++ b/dace/transformation/helpers.py @@ -379,7 +379,7 @@ def nest_state_subgraph(sdfg: SDFG, SDFG. :raise ValueError: The subgraph is contained in more than one scope. """ - if state.parent != sdfg: + if state.sdfg != sdfg: raise KeyError('State does not belong to given SDFG') if subgraph is not state and subgraph.graph is not state: raise KeyError('Subgraph does not belong to given state') @@ -433,7 +433,7 @@ def nest_state_subgraph(sdfg: SDFG, # top-level graph) data_in_subgraph = set(n.data for n in subgraph.nodes() if isinstance(n, nodes.AccessNode)) # Find other occurrences in SDFG - other_nodes = set(n.data for s in sdfg.nodes() for n in s.nodes() + other_nodes = set(n.data for s in sdfg.states() for n in s.nodes() if isinstance(n, nodes.AccessNode) and n not in subgraph.nodes()) subgraph_transients = set() for data in data_in_subgraph: diff --git a/dace/transformation/interstate/loop_detection.py b/dace/transformation/interstate/loop_detection.py index 93c2f6ea1c..8081447132 100644 --- a/dace/transformation/interstate/loop_detection.py +++ b/dace/transformation/interstate/loop_detection.py @@ -1,9 +1,9 @@ -# Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved. +# Copyright 2019-2024 ETH Zurich and the DaCe authors. All rights reserved. """ Loop detection transformation """ import sympy as sp import networkx as nx -from typing import AnyStr, Optional, Tuple, List, Set +from typing import AnyStr, Iterable, Optional, Tuple, List, Set from dace import sdfg as sd, symbolic from dace.sdfg import graph as gr, utils as sdutil, InterstateEdge @@ -29,6 +29,9 @@ class DetectLoop(transformation.PatternTransformation): # Available for rotated and self loops entry_state = transformation.PatternNode(sd.SDFGState) + # Available for explicit-latch rotated loops + loop_break = transformation.PatternNode(sd.SDFGState) + @classmethod def expressions(cls): # Case 1: Loop with one state @@ -69,7 +72,46 @@ def expressions(cls): ssdfg.add_edge(cls.loop_begin, cls.loop_begin, sd.InterstateEdge()) ssdfg.add_edge(cls.loop_begin, cls.exit_state, sd.InterstateEdge()) - return [sdfg, msdfg, rsdfg, rmsdfg, ssdfg] + # Case 6: Rotated multi-state loop with explicit exiting and latch states + mlrmsdfg = gr.OrderedDiGraph() + mlrmsdfg.add_nodes_from([cls.entry_state, cls.loop_break, cls.loop_latch, cls.loop_begin, cls.exit_state]) + mlrmsdfg.add_edge(cls.entry_state, cls.loop_begin, sd.InterstateEdge()) + mlrmsdfg.add_edge(cls.loop_latch, cls.loop_begin, sd.InterstateEdge()) + mlrmsdfg.add_edge(cls.loop_break, cls.exit_state, sd.InterstateEdge()) + mlrmsdfg.add_edge(cls.loop_break, cls.loop_latch, sd.InterstateEdge()) + + # Case 7: Rotated single-state loop with explicit exiting and latch states + mlrsdfg = gr.OrderedDiGraph() + mlrsdfg.add_nodes_from([cls.entry_state, cls.loop_latch, cls.loop_begin, cls.exit_state]) + mlrsdfg.add_edge(cls.entry_state, cls.loop_begin, sd.InterstateEdge()) + mlrsdfg.add_edge(cls.loop_latch, cls.loop_begin, sd.InterstateEdge()) + mlrsdfg.add_edge(cls.loop_begin, cls.exit_state, sd.InterstateEdge()) + mlrsdfg.add_edge(cls.loop_begin, cls.loop_latch, sd.InterstateEdge()) + + # Case 8: Guarded rotated multi-state loop with explicit exiting and latch states (modification of case 6) + gmlrmsdfg = gr.OrderedDiGraph() + gmlrmsdfg.add_nodes_from([cls.entry_state, cls.loop_break, cls.loop_latch, cls.loop_begin, cls.exit_state]) + gmlrmsdfg.add_edge(cls.entry_state, cls.loop_begin, sd.InterstateEdge()) + gmlrmsdfg.add_edge(cls.loop_latch, cls.loop_begin, sd.InterstateEdge()) + gmlrmsdfg.add_edge(cls.loop_begin, cls.loop_break, sd.InterstateEdge()) + gmlrmsdfg.add_edge(cls.loop_break, cls.exit_state, sd.InterstateEdge()) + gmlrmsdfg.add_edge(cls.loop_break, cls.loop_latch, sd.InterstateEdge()) + + return [sdfg, msdfg, rsdfg, rmsdfg, ssdfg, mlrmsdfg, mlrsdfg, gmlrmsdfg] + + @property + def inverted(self) -> bool: + """ + Whether the loop matched a pattern of an inverted (do-while style) loop. + """ + return self.expr_index in (2, 3, 5, 6, 7) + + @property + def first_loop_block(self) -> ControlFlowBlock: + """ + The first control flow block executed in each loop iteration. + """ + return self.loop_guard if self.expr_index <= 1 else self.loop_begin def can_be_applied(self, graph: ControlFlowRegion, @@ -77,19 +119,26 @@ def can_be_applied(self, sdfg: sd.SDFG, permissive: bool = False) -> bool: if expr_index == 0: - return self.detect_loop(graph, False) is not None + return self.detect_loop(graph, multistate_loop=False, accept_missing_itvar=permissive) is not None elif expr_index == 1: - return self.detect_loop(graph, True) is not None + return self.detect_loop(graph, multistate_loop=True, accept_missing_itvar=permissive) is not None elif expr_index == 2: - return self.detect_rotated_loop(graph, False) is not None + return self.detect_rotated_loop(graph, multistate_loop=False, accept_missing_itvar=permissive) is not None elif expr_index == 3: - return self.detect_rotated_loop(graph, True) is not None + return self.detect_rotated_loop(graph, multistate_loop=True, accept_missing_itvar=permissive) is not None elif expr_index == 4: - return self.detect_self_loop(graph) is not None + return self.detect_self_loop(graph, accept_missing_itvar=permissive) is not None + elif expr_index in (5, 7): + return self.detect_rotated_loop(graph, multistate_loop=True, accept_missing_itvar=permissive, + separate_latch=True) is not None + elif expr_index == 6: + return self.detect_rotated_loop(graph, multistate_loop=False, accept_missing_itvar=permissive, + separate_latch=True) is not None raise ValueError(f'Invalid expression index {expr_index}') - def detect_loop(self, graph: ControlFlowRegion, multistate_loop: bool) -> Optional[str]: + def detect_loop(self, graph: ControlFlowRegion, multistate_loop: bool, + accept_missing_itvar: bool = False) -> Optional[str]: """ Detects a loop of the form: @@ -159,13 +208,19 @@ def detect_loop(self, graph: ControlFlowRegion, multistate_loop: bool) -> Option # The backedge must reassign the iteration variable itvar &= backedge.data.assignments.keys() if len(itvar) != 1: - # Either no consistent iteration variable found, or too many - # consistent iteration variables found - return None + if not accept_missing_itvar: + # Either no consistent iteration variable found, or too many consistent iteration variables found + return None + else: + if len(itvar) == 0: + return '' + else: + return None return next(iter(itvar)) - def detect_rotated_loop(self, graph: ControlFlowRegion, multistate_loop: bool) -> Optional[str]: + def detect_rotated_loop(self, graph: ControlFlowRegion, multistate_loop: bool, + accept_missing_itvar: bool = False, separate_latch: bool = False) -> Optional[str]: """ Detects a loop of the form: @@ -181,6 +236,9 @@ def detect_rotated_loop(self, graph: ControlFlowRegion, multistate_loop: bool) - :return: The loop variable or ``None`` if not detected. """ latch = self.loop_latch + ltest = self.loop_latch + if separate_latch: + ltest = self.loop_break if multistate_loop else self.loop_begin begin = self.loop_begin # A for-loop start has at least two incoming edges (init and increment) @@ -188,18 +246,14 @@ def detect_rotated_loop(self, graph: ControlFlowRegion, multistate_loop: bool) - if len(begin_inedges) < 2: return None # A for-loop latch only has two outgoing edges (loop condition and exit-loop) - latch_outedges = graph.out_edges(latch) + latch_outedges = graph.out_edges(ltest) if len(latch_outedges) != 2: return None - # All incoming edges to the start of the loop must set the same variable - itvar = None - for iedge in begin_inedges: - if itvar is None: - itvar = set(iedge.data.assignments.keys()) - else: - itvar &= iedge.data.assignments.keys() - if itvar is None: + # A for-loop latch can further only have one incoming edge (the increment edge). A while-loop, i.e., a loop + # with no explicit iteration variable, may have more than that. + latch_inedges = graph.in_edges(latch) + if not accept_missing_itvar and len(latch_inedges) != 1: return None # Outgoing edges must be a negation of each other @@ -208,8 +262,13 @@ def detect_rotated_loop(self, graph: ControlFlowRegion, multistate_loop: bool) - # All nodes inside loop must be dominated by loop start dominators = nx.dominance.immediate_dominators(graph.nx, graph.start_block) - loop_nodes = list(sdutil.dfs_conditional(graph, sources=[begin], condition=lambda _, child: child != latch)) - loop_nodes += [latch] + if begin is ltest: + loop_nodes = [begin] + else: + loop_nodes = list(sdutil.dfs_conditional(graph, sources=[begin], condition=lambda _, child: child != ltest)) + loop_nodes.append(latch) + if ltest is not latch and ltest is not begin: + loop_nodes.append(ltest) backedge = None for node in loop_nodes: for e in graph.out_edges(node): @@ -231,16 +290,9 @@ def detect_rotated_loop(self, graph: ControlFlowRegion, multistate_loop: bool) - if backedge is None: return None - # The backedge must reassign the iteration variable - itvar &= backedge.data.assignments.keys() - if len(itvar) != 1: - # Either no consistent iteration variable found, or too many - # consistent iteration variables found - return None + return rotated_loop_find_itvar(begin_inedges, latch_inedges, backedge, ltest, accept_missing_itvar)[0] - return next(iter(itvar)) - - def detect_self_loop(self, graph: ControlFlowRegion) -> Optional[str]: + def detect_self_loop(self, graph: ControlFlowRegion, accept_missing_itvar: bool = False) -> Optional[str]: """ Detects a loop of the form: @@ -288,9 +340,14 @@ def detect_self_loop(self, graph: ControlFlowRegion) -> Optional[str]: # The backedge must reassign the iteration variable itvar &= backedge.data.assignments.keys() if len(itvar) != 1: - # Either no consistent iteration variable found, or too many - # consistent iteration variables found - return None + if not accept_missing_itvar: + # Either no consistent iteration variable found, or too many consistent iteration variables found + return None + else: + if len(itvar) == 0: + return '' + else: + return None return next(iter(itvar)) @@ -310,9 +367,10 @@ def loop_information( if self.expr_index <= 1: guard = self.loop_guard return find_for_loop(guard.parent_graph, guard, entry, itervar) - elif self.expr_index in (2, 3): + elif self.expr_index in (2, 3, 5, 6, 7): latch = self.loop_latch - return find_rotated_for_loop(latch.parent_graph, latch, entry, itervar) + return find_rotated_for_loop(latch.parent_graph, latch, entry, itervar, + separate_latch=(self.expr_index in (5, 6, 7))) elif self.expr_index == 4: return find_rotated_for_loop(entry.parent_graph, entry, entry, itervar) @@ -334,6 +392,14 @@ def loop_body(self) -> List[ControlFlowBlock]: return loop_nodes elif self.expr_index == 4: return [begin] + elif self.expr_index in (5, 7): + ltest = self.loop_break + latch = self.loop_latch + loop_nodes = list(sdutil.dfs_conditional(graph, sources=[begin], condition=lambda _, child: child != ltest)) + loop_nodes += [ltest, latch] + return loop_nodes + elif self.expr_index == 6: + return [begin, self.loop_latch] return [] @@ -343,8 +409,10 @@ def loop_meta_states(self) -> List[ControlFlowBlock]: """ if self.expr_index in (0, 1): return [self.loop_guard] - if self.expr_index in (2, 3): + if self.expr_index in (2, 3, 6): return [self.loop_latch] + if self.expr_index in (5, 7): + return [self.loop_break, self.loop_latch] return [] def loop_init_edge(self) -> gr.Edge[InterstateEdge]: @@ -357,7 +425,7 @@ def loop_init_edge(self) -> gr.Edge[InterstateEdge]: guard = self.loop_guard body = self.loop_body() return next(e for e in graph.in_edges(guard) if e.src not in body) - elif self.expr_index in (2, 3): + elif self.expr_index in (2, 3, 5, 6, 7): latch = self.loop_latch return next(e for e in graph.in_edges(begin) if e.src is not latch) elif self.expr_index == 4: @@ -377,9 +445,12 @@ def loop_exit_edge(self) -> gr.Edge[InterstateEdge]: elif self.expr_index in (2, 3): latch = self.loop_latch return graph.edges_between(latch, exitstate)[0] - elif self.expr_index == 4: + elif self.expr_index in (4, 6): begin = self.loop_begin return graph.edges_between(begin, exitstate)[0] + elif self.expr_index in (5, 7): + ltest = self.loop_break + return graph.edges_between(ltest, exitstate)[0] raise ValueError(f'Invalid expression index {self.expr_index}') @@ -398,6 +469,10 @@ def loop_condition_edge(self) -> gr.Edge[InterstateEdge]: elif self.expr_index == 4: begin = self.loop_begin return graph.edges_between(begin, begin)[0] + elif self.expr_index in (5, 6, 7): + latch = self.loop_latch + ltest = self.loop_break if self.expr_index in (5, 7) else self.loop_begin + return graph.edges_between(ltest, latch)[0] raise ValueError(f'Invalid expression index {self.expr_index}') @@ -411,15 +486,93 @@ def loop_increment_edge(self) -> gr.Edge[InterstateEdge]: guard = self.loop_guard body = self.loop_body() return next(e for e in graph.in_edges(guard) if e.src in body) - elif self.expr_index in (2, 3): - body = self.loop_body() - return next(e for e in graph.in_edges(begin) if e.src in body) + elif self.expr_index in (2, 3, 5, 6, 7): + _, step_edge = rotated_loop_find_itvar(graph.in_edges(begin), graph.in_edges(self.loop_latch), + graph.edges_between(self.loop_latch, begin)[0], self.loop_latch) + return step_edge elif self.expr_index == 4: return graph.edges_between(begin, begin)[0] raise ValueError(f'Invalid expression index {self.expr_index}') +def rotated_loop_find_itvar(begin_inedges: List[gr.Edge[InterstateEdge]], + latch_inedges: List[gr.Edge[InterstateEdge]], + backedge: gr.Edge[InterstateEdge], latch: ControlFlowBlock, + accept_missing_itvar: bool = False) -> Tuple[Optional[str], + Optional[gr.Edge[InterstateEdge]]]: + # The iteration variable must be assigned (initialized) on all edges leading into the beginning block, which + # are not the backedge. Gather all variabes for which that holds - they are all candidates for the iteration + # variable (Phase 1). Said iteration variable must then be incremented: + # EITHER: On the backedge, in which case the increment is only executed if the loop does not exit. This + # corresponds to a while(true) loop that checks the condition at the end of the loop body and breaks + # if it does not hold before incrementing. (Scenario 1) + # OR: On the edge(s) leading into the latch, in which case the increment is executed BEFORE the condition is + # checked - which corresponds to a do-while loop. (Scenario 2) + # For either case, the iteration variable may only be incremented on one of these places. Filter the candidates + # down to each variable for which this condition holds (Phase 2). If there is exactly one candidate remaining, + # that is the iteration variable. Otherwise it cannot be determined. + + # Phase 1: Gather iteration variable candidates. + itvar_candidates = None + for e in begin_inedges: + if e is backedge: + continue + if itvar_candidates is None: + itvar_candidates = set(e.data.assignments.keys()) + else: + itvar_candidates &= set(e.data.assignments.keys()) + + # Phase 2: Filter down the candidates according to incrementation edges. + step_edge = None + filtered_candidates = set() + backedge_incremented = set(backedge.data.assignments.keys()) + latch_incremented = None + if backedge.src is not backedge.dst: + # If this is a self loop, there are no edges going into the latch to be considered. The only incoming edges are + # from outside the loop. + for e in latch_inedges: + if e is backedge: + continue + if latch_incremented is None: + latch_incremented = set(e.data.assignments.keys()) + else: + latch_incremented &= set(e.data.assignments.keys()) + if latch_incremented is None: + latch_incremented = set() + for cand in itvar_candidates: + if cand in backedge_incremented: + # Scenario 1. + + # Note, only allow this scenario if the backedge leads directly from the latch to the entry, i.e., there is + # no intermediate block on the backedge path. + if backedge.src is not latch: + continue + + if cand not in latch_incremented: + filtered_candidates.add(cand) + elif cand in latch_incremented: + # Scenario 2. + if cand not in backedge_incremented: + filtered_candidates.add(cand) + if len(filtered_candidates) != 1: + if not accept_missing_itvar: + # Either no consistent iteration variable found, or too many consistent iteration variables found + return None, None + else: + if len(filtered_candidates) == 0: + return '', None + else: + return None, None + else: + itvar = next(iter(filtered_candidates)) + if itvar in backedge_incremented: + step_edge = backedge + elif len(latch_inedges) == 1: + step_edge = latch_inedges[0] + return itvar, step_edge + + def find_for_loop( graph: ControlFlowRegion, guard: sd.SDFGState, @@ -520,6 +673,10 @@ def find_for_loop( match = condition.match(itersym >= a) if match: end = match[a] + if end is None: + match = condition.match(sp.Ne(itersym + stride, a)) + if match: + end = match[a] - stride if end is None: # No match found return None @@ -531,14 +688,14 @@ def find_rotated_for_loop( graph: ControlFlowRegion, latch: sd.SDFGState, entry: sd.SDFGState, - itervar: Optional[str] = None + itervar: Optional[str] = None, + separate_latch: bool = False, ) -> Optional[Tuple[AnyStr, Tuple[symbolic.SymbolicType, symbolic.SymbolicType, symbolic.SymbolicType], Tuple[ List[sd.SDFGState], sd.SDFGState]]]: """ Finds rotated loop range from state machine. - :param latch: State from which the outgoing edges detect whether to exit - the loop or not. + :param latch: State from which the outgoing edges detect whether to reenter the loop or not. :param entry: First state in the loop body. :param itervar: An optional field that overrides the analyzed iteration variable. :return: (iteration variable, (start, end, stride), @@ -547,20 +704,19 @@ def find_rotated_for_loop( """ # Extract state transition edge information entry_inedges = graph.in_edges(entry) - condition_edge = graph.edges_between(latch, entry)[0] - - # All incoming edges to the loop entry must set the same variable + if separate_latch: + condition_edge = graph.in_edges(latch)[0] + backedge = graph.edges_between(latch, entry)[0] + else: + condition_edge = graph.edges_between(latch, entry)[0] + backedge = condition_edge + latch_inedges = graph.in_edges(latch) + + self_loop = latch is entry + step_edge = None if itervar is None: - itervars = None - for iedge in entry_inedges: - if itervars is None: - itervars = set(iedge.data.assignments.keys()) - else: - itervars &= iedge.data.assignments.keys() - if itervars and len(itervars) == 1: - itervar = next(iter(itervars)) - else: - # Ambiguous or no iteration variable + itervar, step_edge = rotated_loop_find_itvar(entry_inedges, latch_inedges, backedge, latch) + if itervar is None: return None condition = condition_edge.data.condition_sympy() @@ -570,18 +726,12 @@ def find_rotated_for_loop( # have one assignment. init_edges = [] init_assignment = None - step_edge = None itersym = symbolic.symbol(itervar) for iedge in entry_inedges: + if iedge is condition_edge: + continue assignment = iedge.data.assignments[itervar] - if itersym in symbolic.pystr_to_symbolic(assignment).free_symbols: - if step_edge is None: - step_edge = iedge - else: - # More than one edge with the iteration variable as a free - # symbol, which is not legal. Invalid for loop. - return None - else: + if itersym not in symbolic.pystr_to_symbolic(assignment).free_symbols: if init_assignment is None: init_assignment = assignment init_edges.append(iedge) @@ -591,10 +741,16 @@ def find_rotated_for_loop( return None else: init_edges.append(iedge) - if step_edge is None or len(init_edges) == 0 or init_assignment is None: + if len(init_edges) == 0 or init_assignment is None: # Less than two assignment variations, can't be a valid for loop. return None + if self_loop: + step_edge = condition_edge + else: + if step_edge is None: + return None + # Get the init expression and the stride. start = symbolic.pystr_to_symbolic(init_assignment) stride = (symbolic.pystr_to_symbolic(step_edge.data.assignments[itervar]) - itersym) @@ -626,6 +782,10 @@ def find_rotated_for_loop( match = condition.match(itersym >= a) if match: end = match[a] + if end is None: + match = condition.match(sp.Ne(itersym + stride, a)) + if match: + end = match[a] - stride if end is None: # No match found return None diff --git a/dace/transformation/interstate/loop_lifting.py b/dace/transformation/interstate/loop_lifting.py new file mode 100644 index 0000000000..072c2519ed --- /dev/null +++ b/dace/transformation/interstate/loop_lifting.py @@ -0,0 +1,99 @@ +# Copyright 2019-2024 ETH Zurich and the DaCe authors. All rights reserved. + +from dace import properties +from dace.sdfg.sdfg import SDFG, InterstateEdge +from dace.sdfg.state import ControlFlowRegion, LoopRegion +from dace.transformation import transformation +from dace.transformation.interstate.loop_detection import DetectLoop + + +@properties.make_properties +@transformation.experimental_cfg_block_compatible +class LoopLifting(DetectLoop, transformation.MultiStateTransformation): + + def can_be_applied(self, graph: transformation.ControlFlowRegion, expr_index: int, sdfg: transformation.SDFG, + permissive: bool = False) -> bool: + # Check loop detection with permissive = True, which allows loops where no iteration variable could be detected. + # We want this to detect while loops. + if not super().can_be_applied(graph, expr_index, sdfg, permissive=True): + return False + + # Check that there's a condition edge, that's the only requirement to lift it into loop. + cond_edge = self.loop_condition_edge() + if not cond_edge or cond_edge.data.condition is None: + return False + return True + + def apply(self, graph: ControlFlowRegion, sdfg: SDFG): + first_state = self.first_loop_block + after = self.exit_state + + loop_info = self.loop_information() + + body = self.loop_body() + meta = self.loop_meta_states() + full_body = set(body) + full_body.update(meta) + cond_edge = self.loop_condition_edge() + incr_edge = self.loop_increment_edge() + inverted = self.inverted + init_edge = self.loop_init_edge() + exit_edge = self.loop_exit_edge() + + label = 'loop_' + first_state.label + if loop_info is None: + itvar = None + init_expr = None + incr_expr = None + else: + incr_expr = f'{loop_info[0]} = {incr_edge.data.assignments[loop_info[0]]}' + init_expr = f'{loop_info[0]} = {init_edge.data.assignments[loop_info[0]]}' + itvar = loop_info[0] + + left_over_assignments = {} + for k in init_edge.data.assignments.keys(): + if k != itvar: + left_over_assignments[k] = init_edge.data.assignments[k] + left_over_incr_assignments = {} + if incr_edge is not None: + for k in incr_edge.data.assignments.keys(): + if k != itvar: + left_over_incr_assignments[k] = incr_edge.data.assignments[k] + + if inverted and incr_edge is cond_edge: + update_before_condition = False + else: + update_before_condition = True + + loop = LoopRegion(label, condition_expr=cond_edge.data.condition, loop_var=itvar, initialize_expr=init_expr, + update_expr=incr_expr, inverted=inverted, sdfg=sdfg, + update_before_condition=update_before_condition) + + graph.add_node(loop) + graph.add_edge(init_edge.src, loop, + InterstateEdge(condition=init_edge.data.condition, assignments=left_over_assignments)) + graph.add_edge(loop, after, InterstateEdge(assignments=exit_edge.data.assignments)) + + loop.add_node(first_state, is_start_block=True) + added = set() + for e in graph.all_edges(*full_body): + if e.src in full_body and e.dst in full_body: + if not e in added: + added.add(e) + if e is incr_edge: + if left_over_incr_assignments != {}: + dst = loop.add_state(label + '_tail') if not inverted else e.dst + loop.add_edge(e.src, dst, InterstateEdge(assignments=left_over_incr_assignments)) + elif e is cond_edge: + if not inverted: + e.data.condition = properties.CodeBlock('1') + loop.add_edge(e.src, e.dst, e.data) + else: + loop.add_edge(e.src, e.dst, e.data) + + # Remove old loop. + for n in full_body: + graph.remove_node(n) + + sdfg.root_sdfg.using_experimental_blocks = True + sdfg.reset_cfg_list() diff --git a/dace/transformation/pass_pipeline.py b/dace/transformation/pass_pipeline.py index 494f9c39ae..9a8154df90 100644 --- a/dace/transformation/pass_pipeline.py +++ b/dace/transformation/pass_pipeline.py @@ -29,7 +29,8 @@ class Modifies(Flag): Memlets = auto() #: Memlets' existence, contents, or properties were modified Nodes = AccessNodes | Scopes | Tasklets | NestedSDFGs #: Modification of any dataflow node (contained in an SDFG state) was made Edges = InterstateEdges | Memlets #: Any edge (memlet or inter-state) was modified - Everything = Descriptors | Symbols | States | InterstateEdges | Nodes | Memlets #: Modification to arbitrary parts of SDFGs (nodes, edges, or properties) + CFG = States | InterstateEdges #: A CFG (any level) was modified (connectivity or number of control flow blocks, but not their contents) + Everything = Descriptors | Symbols | CFG | Nodes | Memlets #: Modification to arbitrary parts of SDFGs (nodes, edges, or properties) @properties.make_properties diff --git a/dace/transformation/passes/analysis/__init__.py b/dace/transformation/passes/analysis/__init__.py new file mode 100644 index 0000000000..5bc1f6e3f3 --- /dev/null +++ b/dace/transformation/passes/analysis/__init__.py @@ -0,0 +1 @@ +from .analysis import * diff --git a/dace/transformation/passes/analysis.py b/dace/transformation/passes/analysis/analysis.py similarity index 81% rename from dace/transformation/passes/analysis.py rename to dace/transformation/passes/analysis/analysis.py index c8bb0b7a9c..095319f807 100644 --- a/dace/transformation/passes/analysis.py +++ b/dace/transformation/passes/analysis/analysis.py @@ -1,7 +1,8 @@ -# Copyright 2019-2022 ETH Zurich and the DaCe authors. All rights reserved. +# Copyright 2019-2024 ETH Zurich and the DaCe authors. All rights reserved. from collections import defaultdict -from dace.transformation import pass_pipeline as ppl +from dace.sdfg.state import ControlFlowBlock, ControlFlowRegion, LoopRegion +from dace.transformation import pass_pipeline as ppl, transformation from dace import SDFG, SDFGState, properties, InterstateEdge, Memlet, data as dt, symbolic from dace.sdfg.graph import Edge from dace.sdfg import nodes as nd @@ -16,6 +17,7 @@ @properties.make_properties +@transformation.experimental_cfg_block_compatible class StateReachability(ppl.Pass): """ Evaluates state reachability (which other states can be executed after each state). @@ -28,25 +30,106 @@ def modifies(self) -> ppl.Modifies: def should_reapply(self, modified: ppl.Modifies) -> bool: # If anything was modified, reapply - return modified & ppl.Modifies.States + return modified & ppl.Modifies.CFG + + def depends_on(self): + return {ControlFlowBlockReachability} - def apply_pass(self, top_sdfg: SDFG, _) -> Dict[int, Dict[SDFGState, Set[SDFGState]]]: + def apply_pass(self, top_sdfg: SDFG, pipeline_res: Dict) -> Dict[int, Dict[SDFGState, Set[SDFGState]]]: """ :return: A dictionary mapping each state to its other reachable states. """ + # Ensure control flow block reachability is run if not run within a pipeline. + if pipeline_res is None or not ControlFlowBlockReachability.__name__ in pipeline_res: + cf_block_reach_dict = ControlFlowBlockReachability().apply_pass(top_sdfg, {}) + else: + cf_block_reach_dict = pipeline_res[ControlFlowBlockReachability.__name__] reachable: Dict[int, Dict[SDFGState, Set[SDFGState]]] = {} for sdfg in top_sdfg.all_sdfgs_recursive(): - result: Dict[SDFGState, Set[SDFGState]] = {} + result: Dict[SDFGState, Set[SDFGState]] = defaultdict(set) + for state in sdfg.states(): + for reached in cf_block_reach_dict[state.parent_graph.cfg_id][state]: + if isinstance(reached, SDFGState): + result[state].add(reached) + reachable[sdfg.cfg_id] = result + return reachable + + +@properties.make_properties +@transformation.experimental_cfg_block_compatible +class ControlFlowBlockReachability(ppl.Pass): + """ + Evaluates control flow block reachability (which control flow block can be executed after each control flow block) + """ + + CATEGORY: str = 'Analysis' + + contain_to_single_level = properties.Property(dtype=bool, default=False) + + def __init__(self, contain_to_single_level=False) -> None: + super().__init__() + + self.contain_to_single_level = contain_to_single_level + def modifies(self) -> ppl.Modifies: + return ppl.Modifies.Nothing + + def should_reapply(self, modified: ppl.Modifies) -> bool: + return modified & ppl.Modifies.CFG + + def _region_closure(self, region: ControlFlowRegion, + block_reach: Dict[int, Dict[ControlFlowBlock, Set[ControlFlowBlock]]]) -> Set[SDFGState]: + closure: Set[SDFGState] = set() + if isinstance(region, LoopRegion): + # Any point inside the loop may reach any other point inside the loop again. + # TODO(later): This is an overapproximation. A branch terminating in a break is excluded from this. + closure.update(region.all_control_flow_blocks()) + + # Add all states that this region can reach in its parent graph to the closure. + for reached_block in block_reach[region.parent_graph.cfg_id][region]: + if isinstance(reached_block, ControlFlowRegion): + closure.update(reached_block.all_control_flow_blocks()) + closure.add(reached_block) + + # Walk up the parent tree. + pivot = region.parent_graph + while pivot and not isinstance(pivot, SDFG): + closure.update(self._region_closure(pivot, block_reach)) + pivot = pivot.parent_graph + return closure + + def apply_pass(self, top_sdfg: SDFG, _) -> Dict[int, Dict[ControlFlowBlock, Set[ControlFlowBlock]]]: + """ + :return: For each control flow region, a dictionary mapping each control flow block to its other reachable + control flow blocks in the same region. + """ + single_level_reachable: Dict[int, Dict[ControlFlowBlock, Set[ControlFlowBlock]]] = defaultdict( + lambda: defaultdict(set) + ) + for cfg in top_sdfg.all_control_flow_regions(recursive=True): # In networkx this is currently implemented naively for directed graphs. # The implementation below is faster # tc: nx.DiGraph = nx.transitive_closure(sdfg.nx) + for n, v in reachable_nodes(cfg.nx): + single_level_reachable[cfg.cfg_id][n] = set(v) + if isinstance(cfg, LoopRegion): + single_level_reachable[cfg.cfg_id][n].update(cfg.nodes()) - for n, v in reachable_nodes(sdfg.nx): - result[n] = set(v) - - reachable[sdfg.cfg_id] = result + if self.contain_to_single_level: + return single_level_reachable + reachable: Dict[int, Dict[ControlFlowBlock, Set[ControlFlowBlock]]] = {} + for sdfg in top_sdfg.all_sdfgs_recursive(): + for cfg in sdfg.all_control_flow_regions(): + result: Dict[ControlFlowBlock, Set[ControlFlowBlock]] = defaultdict(set) + for block in cfg.nodes(): + for reached in single_level_reachable[block.parent_graph.cfg_id][block]: + if isinstance(reached, ControlFlowRegion): + result[block].update(reached.all_control_flow_blocks()) + result[block].add(reached) + if block.parent_graph is not sdfg: + result[block].update(self._region_closure(block.parent_graph, single_level_reachable)) + reachable[cfg.cfg_id] = result return reachable @@ -99,6 +182,7 @@ def reachable_nodes(G): @properties.make_properties +@transformation.experimental_cfg_block_compatible class SymbolAccessSets(ppl.Pass): """ Evaluates symbol access sets (which symbols are read/written in each state or interstate edge). @@ -116,25 +200,27 @@ def should_reapply(self, modified: ppl.Modifies) -> bool: def apply_pass(self, top_sdfg: SDFG, _) -> Dict[int, Dict[Union[SDFGState, Edge[InterstateEdge]], Tuple[Set[str], Set[str]]]]: """ - :return: A dictionary mapping each state to a tuple of its (read, written) data descriptors. + :return: A dictionary mapping each state and interstate edge to a tuple of its (read, written) symbols. """ - top_result: Dict[int, Dict[SDFGState, Tuple[Set[str], Set[str]]]] = {} + top_result: Dict[int, Dict[Union[SDFGState, Edge[InterstateEdge]], Tuple[Set[str], Set[str]]]] = {} for sdfg in top_sdfg.all_sdfgs_recursive(): - adesc = set(sdfg.arrays.keys()) - result: Dict[SDFGState, Tuple[Set[str], Set[str]]] = {} - for state in sdfg.nodes(): - readset = state.free_symbols - # No symbols may be written to inside states. - result[state] = (readset, set()) - for oedge in sdfg.out_edges(state): - edge_readset = oedge.data.read_symbols() - adesc - edge_writeset = set(oedge.data.assignments.keys()) - result[oedge] = (edge_readset, edge_writeset) - top_result[sdfg.cfg_id] = result + for cfg in sdfg.all_control_flow_regions(): + adesc = set(sdfg.arrays.keys()) + result: Dict[SDFGState, Tuple[Set[str], Set[str]]] = {} + for block in cfg.nodes(): + if isinstance(block, SDFGState): + # No symbols may be written to inside states. + result[block] = (block.free_symbols, set()) + for oedge in cfg.out_edges(block): + edge_readset = oedge.data.read_symbols() - adesc + edge_writeset = set(oedge.data.assignments.keys()) + result[oedge] = (edge_readset, edge_writeset) + top_result[cfg.cfg_id] = result return top_result @properties.make_properties +@transformation.experimental_cfg_block_compatible class AccessSets(ppl.Pass): """ Evaluates memory access sets (which arrays/data descriptors are read/written in each state). @@ -179,6 +265,7 @@ def apply_pass(self, top_sdfg: SDFG, _) -> Dict[int, Dict[SDFGState, Tuple[Set[s @properties.make_properties +@transformation.experimental_cfg_block_compatible class FindAccessStates(ppl.Pass): """ For each data descriptor, creates a set of states in which access nodes of that data are used. @@ -201,13 +288,13 @@ def apply_pass(self, top_sdfg: SDFG, _) -> Dict[int, Dict[str, Set[SDFGState]]]: for sdfg in top_sdfg.all_sdfgs_recursive(): result: Dict[str, Set[SDFGState]] = defaultdict(set) - for state in sdfg.nodes(): + for state in sdfg.states(): for anode in state.data_nodes(): result[anode.data].add(state) # Edges that read from arrays add to both ends' access sets anames = sdfg.arrays.keys() - for e in sdfg.edges(): + for e in sdfg.all_interstate_edges(): fsyms = e.data.free_symbols & anames for access in fsyms: result[access].update({e.src, e.dst}) @@ -217,6 +304,7 @@ def apply_pass(self, top_sdfg: SDFG, _) -> Dict[int, Dict[str, Set[SDFGState]]]: @properties.make_properties +@transformation.experimental_cfg_block_compatible class FindAccessNodes(ppl.Pass): """ For each data descriptor, creates a dictionary mapping states to all read and write access nodes with the given @@ -242,7 +330,7 @@ def apply_pass(self, top_sdfg: SDFG, for sdfg in top_sdfg.all_sdfgs_recursive(): result: Dict[str, Dict[SDFGState, Tuple[Set[nd.AccessNode], Set[nd.AccessNode]]]] = defaultdict( lambda: defaultdict(lambda: [set(), set()])) - for state in sdfg.nodes(): + for state in sdfg.states(): for anode in state.data_nodes(): if state.in_degree(anode) > 0: result[anode.data][state][1].add(anode) @@ -508,6 +596,7 @@ def apply_pass(self, top_sdfg: SDFG, pipeline_results: Dict[str, Any]) -> Dict[i @properties.make_properties +@transformation.experimental_cfg_block_compatible class AccessRanges(ppl.Pass): """ For each data descriptor, finds all memlets used to access it (read/write ranges). @@ -544,6 +633,7 @@ def apply_pass(self, top_sdfg: SDFG, _) -> Dict[int, Dict[str, Set[Memlet]]]: @properties.make_properties +@transformation.experimental_cfg_block_compatible class FindReferenceSources(ppl.Pass): """ For each Reference data descriptor, finds all memlets used to set it. If a Tasklet was used @@ -586,6 +676,7 @@ def apply_pass(self, top_sdfg: SDFG, _) -> Dict[int, Dict[str, Set[Union[Memlet, @properties.make_properties +@transformation.experimental_cfg_block_compatible class DeriveSDFGConstraints(ppl.Pass): CATEGORY: str = 'Analysis' diff --git a/dace/transformation/passes/analysis/loop_analysis.py b/dace/transformation/passes/analysis/loop_analysis.py new file mode 100644 index 0000000000..3d15f73c73 --- /dev/null +++ b/dace/transformation/passes/analysis/loop_analysis.py @@ -0,0 +1,116 @@ +# Copyright 2019-2024 ETH Zurich and the DaCe authors. All rights reserved. +""" +Various analyses concerning LopoRegions, and utility functions to get information about LoopRegions for other passes. +""" + +import ast +from typing import Any, Dict, Optional +from dace.frontend.python import astutils + +import sympy + +from dace import symbolic +from dace.sdfg.state import LoopRegion + + +class FindAssignment(ast.NodeVisitor): + + assignments: Dict[str, str] + multiple: bool + + def __init__(self): + self.assignments = {} + self.multiple = False + + def visit_Assign(self, node: ast.Assign) -> Any: + for tgt in node.targets: + if isinstance(tgt, ast.Name): + if tgt.id in self.assignments: + self.multiple = True + self.assignments[tgt.id] = astutils.unparse(node.value) + return self.generic_visit(node) + + +def get_loop_end(loop: LoopRegion) -> Optional[symbolic.SymbolicType]: + """ + Parse a loop region to identify the end value of the iteration variable under normal loop termination (no break). + """ + end: Optional[symbolic.SymbolicType] = None + a = sympy.Wild('a') + condition = symbolic.pystr_to_symbolic(loop.loop_condition.as_string) + itersym = symbolic.pystr_to_symbolic(loop.loop_variable) + match = condition.match(itersym < a) + if match: + end = match[a] - 1 + if end is None: + match = condition.match(itersym <= a) + if match: + end = match[a] + if end is None: + match = condition.match(itersym > a) + if match: + end = match[a] + 1 + if end is None: + match = condition.match(itersym >= a) + if match: + end = match[a] + return end + + +def get_init_assignment(loop: LoopRegion) -> Optional[symbolic.SymbolicType]: + """ + Parse a loop region's init statement to identify the exact init assignment expression. + """ + init_stmt = loop.init_statement + if init_stmt is None: + return None + + init_codes_list = init_stmt.code if isinstance(init_stmt.code, list) else [init_stmt.code] + assignments: Dict[str, str] = {} + for code in init_codes_list: + visitor = FindAssignment() + visitor.visit(code) + if visitor.multiple: + return None + for assign in visitor.assignments: + if assign in assignments: + return None + assignments[assign] = visitor.assignments[assign] + + if loop.loop_variable in assignments: + return symbolic.pystr_to_symbolic(assignments[loop.loop_variable]) + + return None + + +def get_update_assignment(loop: LoopRegion) -> Optional[symbolic.SymbolicType]: + """ + Parse a loop region's update statement to identify the exact update assignment expression. + """ + update_stmt = loop.update_statement + if update_stmt is None: + return None + + update_codes_list = update_stmt.code if isinstance(update_stmt.code, list) else [update_stmt.code] + assignments: Dict[str, str] = {} + for code in update_codes_list: + visitor = FindAssignment() + visitor.visit(code) + if visitor.multiple: + return None + for assign in visitor.assignments: + if assign in assignments: + return None + assignments[assign] = visitor.assignments[assign] + + if loop.loop_variable in assignments: + return symbolic.pystr_to_symbolic(assignments[loop.loop_variable]) + + return None + + +def get_loop_stride(loop: LoopRegion) -> Optional[symbolic.SymbolicType]: + update_assignment = get_update_assignment(loop) + if update_assignment: + return update_assignment - symbolic.pystr_to_symbolic(loop.loop_variable) + return None diff --git a/dace/transformation/passes/simplification/control_flow_raising.py b/dace/transformation/passes/simplification/control_flow_raising.py new file mode 100644 index 0000000000..abe305f12c --- /dev/null +++ b/dace/transformation/passes/simplification/control_flow_raising.py @@ -0,0 +1,96 @@ +# Copyright 2019-2024 ETH Zurich and the DaCe authors. All rights reserved. + +from typing import Optional, Tuple +import networkx as nx +from dace import properties +from dace.sdfg.analysis import cfg as cfg_analysis +from dace.sdfg.sdfg import SDFG, InterstateEdge +from dace.sdfg.state import ConditionalBlock, ControlFlowBlock, ControlFlowRegion +from dace.sdfg.utils import dfs_conditional +from dace.transformation import pass_pipeline as ppl, transformation +from dace.transformation.interstate.loop_lifting import LoopLifting + + +@properties.make_properties +@transformation.experimental_cfg_block_compatible +class ControlFlowRaising(ppl.Pass): + """ + Raises all detectable control flow that can be expressed with native SDFG structures, such as loops and branching. + """ + + CATEGORY: str = 'Simplification' + + def modifies(self) -> ppl.Modifies: + return ppl.Modifies.CFG + + def should_reapply(self, modified: ppl.Modifies) -> bool: + return modified & ppl.Modifies.CFG + + def _lift_conditionals(self, sdfg: SDFG) -> int: + cfgs = list(sdfg.all_control_flow_regions()) + n_cond_regions_pre = len([x for x in sdfg.all_control_flow_blocks() if isinstance(x, ConditionalBlock)]) + + for region in cfgs: + sinks = region.sink_nodes() + dummy_exit = region.add_state('__DACE_DUMMY') + for s in sinks: + region.add_edge(s, dummy_exit, InterstateEdge()) + idom = nx.immediate_dominators(region.nx, region.start_block) + alldoms = cfg_analysis.all_dominators(region, idom) + branch_merges = cfg_analysis.branch_merges(region, idom, alldoms) + + for block in region.nodes(): + graph = block.parent_graph + oedges = graph.out_edges(block) + if len(oedges) > 1 and block in branch_merges: + merge_block = branch_merges[block] + + # Construct the branching block. + conditional = ConditionalBlock('conditional_' + block.label, sdfg, graph) + graph.add_node(conditional) + # Connect it. + graph.add_edge(block, conditional, InterstateEdge()) + + # Populate branches. + for i, oe in enumerate(oedges): + branch_name = 'branch_' + str(i) + '_' + block.label + branch = ControlFlowRegion(branch_name, sdfg) + conditional.add_branch(oe.data.condition, branch) + if oe.dst is merge_block: + # Empty branch. + continue + + branch_nodes = set(dfs_conditional(graph, [oe.dst], lambda _, x: x is not merge_block)) + branch_start = branch.add_state(branch_name + '_start', is_start_block=True) + branch.add_nodes_from(branch_nodes) + branch_end = branch.add_state(branch_name + '_end') + branch.add_edge(branch_start, oe.dst, InterstateEdge(assignments=oe.data.assignments)) + added = set() + for e in graph.all_edges(*branch_nodes): + if not (e in added): + added.add(e) + if e is oe: + continue + elif e.dst is merge_block: + branch.add_edge(e.src, branch_end, e.data) + else: + branch.add_edge(e.src, e.dst, e.data) + graph.remove_nodes_from(branch_nodes) + + # Connect to the end of the branch / what happens after. + if merge_block is not dummy_exit: + graph.add_edge(conditional, merge_block, InterstateEdge()) + region.remove_node(dummy_exit) + + n_cond_regions_post = len([x for x in sdfg.all_control_flow_blocks() if isinstance(x, ConditionalBlock)]) + return n_cond_regions_post - n_cond_regions_pre + + def apply_pass(self, top_sdfg: SDFG, _) -> Optional[Tuple[int, int]]: + lifted_loops = 0 + lifted_branches = 0 + for sdfg in top_sdfg.all_sdfgs_recursive(): + lifted_loops += sdfg.apply_transformations_repeated([LoopLifting], validate_all=False, validate=False) + lifted_branches += self._lift_conditionals(sdfg) + if lifted_branches == 0 and lifted_loops == 0: + return None + return lifted_loops, lifted_branches diff --git a/dace/transformation/subgraph/expansion.py b/dace/transformation/subgraph/expansion.py index db1e9b59ab..aa182e8c80 100644 --- a/dace/transformation/subgraph/expansion.py +++ b/dace/transformation/subgraph/expansion.py @@ -1,26 +1,21 @@ -# Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved. +# Copyright 2019-2024 ETH Zurich and the DaCe authors. All rights reserved. """ This module contains classes that implement the expansion transformation. """ -from dace import dtypes, registry, symbolic, subsets +from dace import dtypes, symbolic, subsets from dace.sdfg import nodes -from dace.memlet import Memlet from dace.sdfg import replace, SDFG, dynamic_map_inputs from dace.sdfg.graph import SubgraphView from dace.transformation import transformation from dace.properties import make_properties, Property -from dace.sdfg.propagation import propagate_memlets_sdfg from dace.transformation.subgraph import helpers from collections import defaultdict from copy import deepcopy as dcpy -from typing import List, Union import itertools -import dace.libraries.standard as stdlib import warnings -import sys def offset_map(state, map_entry): diff --git a/dace/transformation/subgraph/helpers.py b/dace/transformation/subgraph/helpers.py index b2af49c879..0ea1903522 100644 --- a/dace/transformation/subgraph/helpers.py +++ b/dace/transformation/subgraph/helpers.py @@ -1,20 +1,11 @@ -# Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved. +# Copyright 2019-2024 ETH Zurich and the DaCe authors. All rights reserved. """ Subgraph Transformation Helper API """ -from dace import dtypes, registry, symbolic, subsets -from dace.sdfg import nodes, utils -from dace.memlet import Memlet -from dace.sdfg import replace, SDFG, SDFGState -from dace.properties import make_properties, Property -from dace.sdfg.propagation import propagate_memlets_sdfg +from dace import subsets +from dace.sdfg import nodes from dace.sdfg.graph import SubgraphView -from collections import defaultdict import copy -from typing import List, Union, Dict, Tuple, Set - -import dace.libraries.standard as stdlib - -import itertools +from typing import List, Dict, Set # **************** # Helper functions diff --git a/tests/passes/simplification/control_flow_raising_test.py b/tests/passes/simplification/control_flow_raising_test.py new file mode 100644 index 0000000000..53e01df12f --- /dev/null +++ b/tests/passes/simplification/control_flow_raising_test.py @@ -0,0 +1,98 @@ +# Copyright 2019-2024 ETH Zurich and the DaCe authors. All rights reserved. + +import dace +import numpy as np +from dace.sdfg.state import ConditionalBlock +from dace.transformation.pass_pipeline import FixedPointPipeline, Pipeline +from dace.transformation.passes.simplification.control_flow_raising import ControlFlowRaising + + +def test_dataflow_if_check(): + + @dace.program + def dataflow_if_check(A: dace.int32[10], i: dace.int64): + if A[i] < 10: + return 0 + elif A[i] == 10: + return 10 + return 100 + + sdfg = dataflow_if_check.to_sdfg() + + assert not any(isinstance(x, ConditionalBlock) for x in sdfg.nodes()) + + ppl = FixedPointPipeline([ControlFlowRaising()]) + ppl.__experimental_cfg_block_compatible__ = True + ppl.apply_pass(sdfg, {}) + + assert any(isinstance(x, ConditionalBlock) for x in sdfg.nodes()) + + A = np.zeros((10,), np.int32) + A[4] = 10 + A[5] = 100 + assert sdfg(A, 0)[0] == 0 + assert sdfg(A, 4)[0] == 10 + assert sdfg(A, 5)[0] == 100 + assert sdfg(A, 6)[0] == 0 + + +def test_nested_if_chain(): + + @dace.program + def nested_if_chain(i: dace.int64): + if i < 2: + return 0 + else: + if i < 4: + return 1 + else: + if i < 6: + return 2 + else: + if i < 8: + return 3 + else: + return 4 + + sdfg = nested_if_chain.to_sdfg() + + assert not any(isinstance(x, ConditionalBlock) for x in sdfg.nodes()) + + assert nested_if_chain(0)[0] == 0 + assert nested_if_chain(2)[0] == 1 + assert nested_if_chain(4)[0] == 2 + assert nested_if_chain(7)[0] == 3 + assert nested_if_chain(15)[0] == 4 + + +def test_elif_chain(): + + @dace.program + def elif_chain(i: dace.int64): + if i < 2: + return 0 + elif i < 4: + return 1 + elif i < 6: + return 2 + elif i < 8: + return 3 + else: + return 4 + + elif_chain.use_experimental_cfg_blocks = True + sdfg = elif_chain.to_sdfg() + + assert any(isinstance(x, ConditionalBlock) for x in sdfg.nodes()) + + assert elif_chain(0)[0] == 0 + assert elif_chain(2)[0] == 1 + assert elif_chain(4)[0] == 2 + assert elif_chain(7)[0] == 3 + assert elif_chain(15)[0] == 4 + + +if __name__ == '__main__': + test_dataflow_if_check() + test_nested_if_chain() + test_elif_chain() diff --git a/tests/passes/writeset_underapproximation_test.py b/tests/passes/writeset_underapproximation_test.py index 7d5272d80a..96df87b5e7 100644 --- a/tests/passes/writeset_underapproximation_test.py +++ b/tests/passes/writeset_underapproximation_test.py @@ -1,7 +1,8 @@ # Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved. +from typing import Dict import dace -from dace.sdfg.analysis.writeset_underapproximation import UnderapproximateWrites +from dace.sdfg.analysis.writeset_underapproximation import UnderapproximateWrites, UnderapproximateWritesDict from dace.subsets import Range from dace.transformation.pass_pipeline import Pipeline @@ -9,8 +10,6 @@ M = dace.symbol("M") K = dace.symbol("K") -pipeline = Pipeline([UnderapproximateWrites()]) - def test_2D_map_overwrites_2D_array(): """ @@ -33,9 +32,10 @@ def test_2D_map_overwrites_2D_array(): output_nodes={'B': a1}, external_edges=True) + pipeline = Pipeline([UnderapproximateWrites()]) results = pipeline.apply_pass(sdfg, {})[UnderapproximateWrites.__name__] - result = results['approximation'] + result = results[sdfg.cfg_id].approximation edge = map_state.in_edges(a1)[0] result_subset_list = result[edge].subset.subset_list result_subset = result_subset_list[0] @@ -65,9 +65,10 @@ def test_2D_map_added_indices(): output_nodes={"B": a1}, external_edges=True) + pipeline = Pipeline([UnderapproximateWrites()]) results = pipeline.apply_pass(sdfg, {})[UnderapproximateWrites.__name__] - result = results["approximation"] + result = results[sdfg.cfg_id].approximation edge = map_state.in_edges(a1)[0] assert (len(result[edge].subset.subset_list) == 0) @@ -94,9 +95,10 @@ def test_2D_map_multiplied_indices(): output_nodes={"B": a1}, external_edges=True) + pipeline = Pipeline([UnderapproximateWrites()]) results = pipeline.apply_pass(sdfg, {})[UnderapproximateWrites.__name__] - result = results["approximation"] + result = results[sdfg.cfg_id].approximation edge = map_state.in_edges(a1)[0] assert (len(result[edge].subset.subset_list) == 0) @@ -121,9 +123,10 @@ def test_1D_map_one_index_multiple_dims(): output_nodes={"B": a1}, external_edges=True) + pipeline = Pipeline([UnderapproximateWrites()]) results = pipeline.apply_pass(sdfg, {})[UnderapproximateWrites.__name__] - result = results["approximation"] + result = results[sdfg.cfg_id].approximation edge = map_state.in_edges(a1)[0] assert (len(result[edge].subset.subset_list) == 0) @@ -146,9 +149,10 @@ def test_1D_map_one_index_squared(): output_nodes={"B": a1}, external_edges=True) + pipeline = Pipeline([UnderapproximateWrites()]) results = pipeline.apply_pass(sdfg, {})[UnderapproximateWrites.__name__] - result = results["approximation"] + result = results[sdfg.cfg_id].approximation edge = map_state.in_edges(a1)[0] assert (len(result[edge].subset.subset_list) == 0) @@ -185,9 +189,10 @@ def test_map_tree_full_write(): inner_edge_1 = map_state.add_edge(inner_map_exit_1, "OUT_B", map_exit, "IN_B", dace.Memlet(data="B")) outer_edge = map_state.add_edge(map_exit, "OUT_B", a1, None, dace.Memlet(data="B")) + pipeline = Pipeline([UnderapproximateWrites()]) results = pipeline.apply_pass(sdfg, {})[UnderapproximateWrites.__name__] - result = results["approximation"] + result = results[sdfg.cfg_id].approximation expected_subset_outer_edge = Range.from_string("0:M, 0:N") expected_subset_inner_edge = Range.from_string("0:M, _i") result_inner_edge_0 = result[inner_edge_0].subset.subset_list[0] @@ -230,9 +235,10 @@ def test_map_tree_no_write_multiple_indices(): inner_edge_1 = map_state.add_edge(inner_map_exit_1, "OUT_B", map_exit, "IN_B", dace.Memlet(data="B")) outer_edge = map_state.add_edge(map_exit, "OUT_B", a1, None, dace.Memlet(data="B")) + pipeline = Pipeline([UnderapproximateWrites()]) results = pipeline.apply_pass(sdfg, {})[UnderapproximateWrites.__name__] - result = results["approximation"] + result = results[sdfg.cfg_id].approximation result_inner_edge_0 = result[inner_edge_0].subset.subset_list result_inner_edge_1 = result[inner_edge_1].subset.subset_list result_outer_edge = result[outer_edge].subset.subset_list @@ -273,9 +279,10 @@ def test_map_tree_multiple_indices_per_dimension(): inner_edge_1 = map_state.add_edge(inner_map_exit_1, "OUT_B", map_exit, "IN_B", dace.Memlet(data="B")) outer_edge = map_state.add_edge(map_exit, "OUT_B", a1, None, dace.Memlet(data="B")) + pipeline = Pipeline([UnderapproximateWrites()]) results = pipeline.apply_pass(sdfg, {})[UnderapproximateWrites.__name__] - result = results["approximation"] + result = results[sdfg.cfg_id].approximation expected_subset_outer_edge = Range.from_string("0:M, 0:N") expected_subset_inner_edge_1 = Range.from_string("0:M, _i") result_inner_edge_1 = result[inner_edge_1].subset.subset_list[0] @@ -300,11 +307,12 @@ def loop(A: dace.float64[N, M]): sdfg = loop.to_sdfg(simplify=True) + pipeline = Pipeline([UnderapproximateWrites()]) results = pipeline.apply_pass(sdfg, {})[UnderapproximateWrites.__name__] nsdfg = sdfg.cfg_list[1].parent_nsdfg_node map_state = sdfg.states()[0] - result = results["approximation"] + result = results[sdfg.cfg_id].approximation edge = map_state.out_edges(nsdfg)[0] assert (len(result[edge].subset.subset_list) == 0) @@ -323,11 +331,12 @@ def loop(A: dace.float64[N, M]): sdfg = loop.to_sdfg(simplify=True) + pipeline = Pipeline([UnderapproximateWrites()]) results = pipeline.apply_pass(sdfg, {})[UnderapproximateWrites.__name__] map_state = sdfg.states()[0] edge = map_state.in_edges(map_state.data_nodes()[0])[0] - result = results["approximation"] + result = results[sdfg.cfg_id].approximation expected_subset = Range.from_string("0:N, 0:M") assert (str(result[edge].subset.subset_list[0]) == str(expected_subset)) @@ -357,9 +366,10 @@ def test_map_in_loop(): output_nodes={"B": a1}, external_edges=True) + pipeline = Pipeline([UnderapproximateWrites()]) results = pipeline.apply_pass(sdfg, {})[UnderapproximateWrites.__name__] - result = results["loop_approximation"] + result = results[sdfg.cfg_id].loop_approximation expected_subset = Range.from_string("0:N, 0:M") assert (str(result[guard]["B"].subset.subset_list[0]) == str(expected_subset)) @@ -390,9 +400,10 @@ def test_map_in_loop_multiplied_indices_first_dimension(): output_nodes={"B": a1}, external_edges=True) + pipeline = Pipeline([UnderapproximateWrites()]) results = pipeline.apply_pass(sdfg, {})[UnderapproximateWrites.__name__] - result = results["loop_approximation"] + result = results[sdfg.cfg_id].loop_approximation assert (guard not in result.keys() or len(result[guard]) == 0) @@ -421,9 +432,10 @@ def test_map_in_loop_multiplied_indices_second_dimension(): output_nodes={"B": a1}, external_edges=True) + pipeline = Pipeline([UnderapproximateWrites()]) results = pipeline.apply_pass(sdfg, {})[UnderapproximateWrites.__name__] - result = results["loop_approximation"] + result = results[sdfg.cfg_id].loop_approximation assert (guard not in result.keys() or len(result[guard]) == 0) @@ -444,8 +456,9 @@ def nested_loop(A: dace.float64[M, N]): sdfg = nested_loop.to_sdfg(simplify=True) + pipeline = Pipeline([UnderapproximateWrites()]) result = pipeline.apply_pass(sdfg, {})[UnderapproximateWrites.__name__] - write_approx = result["approximation"] + write_approx = result[sdfg.cfg_id].approximation # find write set accessnode = None write_set = None @@ -478,9 +491,10 @@ def nested_loop(A: dace.float64[M, N]): sdfg = nested_loop.to_sdfg(simplify=True) + pipeline = Pipeline([UnderapproximateWrites()]) result = pipeline.apply_pass(sdfg, {})[UnderapproximateWrites.__name__] - write_approx = result["approximation"] + write_approx = result[sdfg.cfg_id].approximation # find write set accessnode = None write_set = None @@ -510,15 +524,16 @@ def nested_loop(A: dace.float64[M, N]): sdfg = nested_loop.to_sdfg(simplify=True) + pipeline = Pipeline([UnderapproximateWrites()]) result = pipeline.apply_pass(sdfg, {})[UnderapproximateWrites.__name__] - write_approx = result["approximation"] + write_approx = result[sdfg.cfg_id].approximation # find write set accessnode = None write_set = None - for node, _ in sdfg.all_nodes_recursive(): + for node, parent in sdfg.all_nodes_recursive(): if isinstance(node, dace.nodes.AccessNode): - if node.data == "A": + if node.data == "A" and parent.out_degree(node) == 0: accessnode = node for edge, memlet in write_approx.items(): if edge.dst is accessnode: @@ -531,6 +546,7 @@ def test_nested_sdfg_in_map_branches(): Nested SDFG that overwrites second dimension of array conditionally. --> should approximate write-set of map as empty """ + # No, should be approximated precisely - at least certainly with CF regions..? @dace.program def nested_loop(A: dace.float64[M, N]): @@ -542,15 +558,16 @@ def nested_loop(A: dace.float64[M, N]): sdfg = nested_loop.to_sdfg(simplify=True) - result = pipeline.apply_pass(sdfg, {})[UnderapproximateWrites.__name__] + pipeline = Pipeline([UnderapproximateWrites()]) + result: Dict[int, UnderapproximateWritesDict] = pipeline.apply_pass(sdfg, {})[UnderapproximateWrites.__name__] - write_approx = result["approximation"] + write_approx = result[sdfg.cfg_id].approximation # find write set accessnode = None write_set = None - for node, _ in sdfg.all_nodes_recursive(): + for node, parent in sdfg.all_nodes_recursive(): if isinstance(node, dace.nodes.AccessNode): - if node.data == "A": + if node.data == "A" and parent.out_degree(node) == 0: accessnode = node for edge, memlet in write_approx.items(): if edge.dst is accessnode: @@ -574,9 +591,10 @@ def test_simple_loop_overwrite(): loop_tasklet = loop_body.add_tasklet("overwrite", {}, {"a"}, "a = 0") loop_body.add_edge(loop_tasklet, "a", a0, None, dace.Memlet("A[i]")) - result = pipeline.apply_pass(sdfg, {})[UnderapproximateWrites.__name__]["loop_approximation"] + pipeline = Pipeline([UnderapproximateWrites()]) + result: UnderapproximateWritesDict = pipeline.apply_pass(sdfg, {})[UnderapproximateWrites.__name__][sdfg.cfg_id] - assert (str(result[guard]["A"].subset) == str(Range.from_array(sdfg.arrays["A"]))) + assert (str(result.loop_approximation[guard]["A"].subset) == str(Range.from_array(sdfg.arrays["A"]))) def test_loop_2D_overwrite(): @@ -598,7 +616,8 @@ def test_loop_2D_overwrite(): loop_tasklet = loop_body.add_tasklet("overwrite", {}, {"a"}, "a = 0") loop_body.add_edge(loop_tasklet, "a", a0, None, dace.Memlet("A[j,i]")) - result = pipeline.apply_pass(sdfg, {})[UnderapproximateWrites.__name__]["loop_approximation"] + pipeline = Pipeline([UnderapproximateWrites()]) + result = pipeline.apply_pass(sdfg, {})[UnderapproximateWrites.__name__][sdfg.cfg_id].loop_approximation assert (str(result[guard1]["A"].subset) == str(Range.from_array(sdfg.arrays["A"]))) assert (str(result[guard2]["A"].subset) == "j, 0:N") @@ -629,7 +648,8 @@ def test_loop_2D_propagation_gap_symbolic(): loop_tasklet = loop_body.add_tasklet("overwrite", {}, {"a"}, "a = 0") loop_body.add_edge(loop_tasklet, "a", a0, None, dace.Memlet("A[j,i]")) - result = pipeline.apply_pass(sdfg, {})[UnderapproximateWrites.__name__]["loop_approximation"] + pipeline = Pipeline([UnderapproximateWrites()]) + result = pipeline.apply_pass(sdfg, {})[UnderapproximateWrites.__name__][sdfg.cfg_id].loop_approximation assert ("A" not in result[guard1].keys()) assert ("A" not in result[guard2].keys()) @@ -657,7 +677,8 @@ def test_2_loops_overwrite(): loop_tasklet_2 = loop_body_2.add_tasklet("overwrite", {}, {"a"}, "a = 0") loop_body_2.add_edge(loop_tasklet_2, "a", a1, None, dace.Memlet("A[i]")) - result = pipeline.apply_pass(sdfg, {})[UnderapproximateWrites.__name__]["loop_approximation"] + pipeline = Pipeline([UnderapproximateWrites()]) + result = pipeline.apply_pass(sdfg, {})[UnderapproximateWrites.__name__][sdfg.cfg_id].loop_approximation assert (str(result[guard_1]["A"].subset) == str(Range.from_array(sdfg.arrays["A"]))) assert (str(result[guard_2]["A"].subset) == str(Range.from_array(sdfg.arrays["A"]))) @@ -687,7 +708,8 @@ def test_loop_2D_overwrite_propagation_gap_non_empty(): loop_tasklet = loop_body.add_tasklet("overwrite", {}, {"a"}, "a = 0") loop_body.add_edge(loop_tasklet, "a", a0, None, dace.Memlet("A[j,i]")) - result = pipeline.apply_pass(sdfg, {})[UnderapproximateWrites.__name__]["loop_approximation"] + pipeline = Pipeline([UnderapproximateWrites()]) + result = pipeline.apply_pass(sdfg, {})[UnderapproximateWrites.__name__][sdfg.cfg_id].loop_approximation assert (str(result[guard1]["A"].subset) == str(Range.from_array(sdfg.arrays["A"]))) assert (str(result[guard2]["A"].subset) == "j, 0:N") @@ -717,7 +739,8 @@ def test_loop_nest_multiplied_indices(): loop_tasklet = loop_body.add_tasklet("overwrite", {}, {"a"}, "a = 0") loop_body.add_edge(loop_tasklet, "a", a0, None, dace.Memlet("A[i,i*j]")) - result = pipeline.apply_pass(sdfg, {})[UnderapproximateWrites.__name__]["loop_approximation"] + pipeline = Pipeline([UnderapproximateWrites()]) + result = pipeline.apply_pass(sdfg, {})[UnderapproximateWrites.__name__][sdfg.cfg_id].loop_approximation assert (guard1 not in result.keys() or "A" not in result[guard1].keys()) assert (guard2 not in result.keys() or "A" not in result[guard2].keys()) @@ -748,7 +771,8 @@ def test_loop_nest_empty_nested_loop(): loop_tasklet = loop_body.add_tasklet("overwrite", {}, {"a"}, "a = 0") loop_body.add_edge(loop_tasklet, "a", a0, None, dace.Memlet("A[j,i]")) - result = pipeline.apply_pass(sdfg, {})[UnderapproximateWrites.__name__]["loop_approximation"] + pipeline = Pipeline([UnderapproximateWrites()]) + result = pipeline.apply_pass(sdfg, {})[UnderapproximateWrites.__name__][sdfg.cfg_id].loop_approximation assert (guard1 not in result.keys() or "A" not in result[guard1].keys()) assert (guard2 not in result.keys() or "A" not in result[guard2].keys()) @@ -779,7 +803,8 @@ def test_loop_nest_inner_loop_conditional(): loop_tasklet = loop_body.add_tasklet("overwrite", {}, {"a"}, "a = 0") loop_body.add_edge(loop_tasklet, "a", a0, None, dace.Memlet("A[k]")) - result = pipeline.apply_pass(sdfg, {})[UnderapproximateWrites.__name__]["loop_approximation"] + pipeline = Pipeline([UnderapproximateWrites()]) + result = pipeline.apply_pass(sdfg, {})[UnderapproximateWrites.__name__][sdfg.cfg_id].loop_approximation assert (guard1 not in result.keys() or "A" not in result[guard1].keys()) assert (guard2 in result.keys() and "A" in result[guard2].keys() and str(result[guard2]['A'].subset) == "0:N") @@ -799,9 +824,10 @@ def nested_loop(A: dace.float64[M, N]): sdfg = nested_loop.to_sdfg(simplify=True) + pipeline = Pipeline([UnderapproximateWrites()]) result = pipeline.apply_pass(sdfg, {})[UnderapproximateWrites.__name__] - write_approx = result["approximation"] + write_approx = result[sdfg.cfg_id].approximation write_set = None accessnode = None for node, _ in sdfg.all_nodes_recursive(): @@ -828,10 +854,11 @@ def nested_loop(A: dace.float64[M, N]): sdfg = nested_loop.to_sdfg(simplify=True) + pipeline = Pipeline([UnderapproximateWrites()]) result = pipeline.apply_pass(sdfg, {})[UnderapproximateWrites.__name__] # find write set - write_approx = result["approximation"] + write_approx = result[sdfg.cfg_id].approximation accessnode = None write_set = None for node, _ in sdfg.all_nodes_recursive(): @@ -864,9 +891,10 @@ def test_loop_break(): loop_tasklet = loop_body_1.add_tasklet("overwrite", {}, {"a"}, "a = 0") loop_body_1.add_edge(loop_tasklet, "a", a0, None, dace.Memlet("A[i]")) + pipeline = Pipeline([UnderapproximateWrites()]) results = pipeline.apply_pass(sdfg, {})[UnderapproximateWrites.__name__] - result = results["loop_approximation"] + result = results[sdfg.cfg_id].loop_approximation assert (guard3 not in result.keys() or "A" not in result[guard3].keys()) diff --git a/tests/sdfg/conditional_region_test.py b/tests/sdfg/conditional_region_test.py index 4e4eda3f44..0be40f43d3 100644 --- a/tests/sdfg/conditional_region_test.py +++ b/tests/sdfg/conditional_region_test.py @@ -10,20 +10,20 @@ def test_cond_region_if(): sdfg = dace.SDFG('regular_if') - sdfg.add_array("A", (1,), dace.float32) - sdfg.add_symbol("i", dace.int32) + sdfg.add_array('A', (1,), dace.float32) + sdfg.add_symbol('i', dace.int32) state0 = sdfg.add_state('state0', is_start_block=True) - if1 = ConditionalBlock("if1") + if1 = ConditionalBlock('if1') sdfg.add_node(if1) sdfg.add_edge(state0, if1, InterstateEdge()) - if_body = ControlFlowRegion("if_body", sdfg=sdfg) - if1.branches.append((CodeBlock("i == 1"), if_body)) + if_body = ControlFlowRegion('if_body', sdfg=sdfg) + if1.add_branch(CodeBlock('i == 1'), if_body) - state1 = if_body.add_state("state1", is_start_block=True) + state1 = if_body.add_state('state1', is_start_block=True) acc_a = state1.add_access('A') - t1 = state1.add_tasklet("t1", None, {"a"}, "a = 100") + t1 = state1.add_tasklet('t1', None, {'a'}, 'a = 100') state1.add_edge(t1, 'a', acc_a, None, dace.Memlet('A[0]')) assert sdfg.is_valid() @@ -36,14 +36,14 @@ def test_cond_region_if(): assert A[0] == 1 def test_serialization(): - sdfg = SDFG("test_serialization") - cond_region = ConditionalBlock("cond_region") + sdfg = SDFG('test_serialization') + cond_region = ConditionalBlock('cond_region') sdfg.add_node(cond_region, is_start_block=True) - sdfg.add_symbol("i", dace.int32) + sdfg.add_symbol('i', dace.int32) for j in range(10): - cfg = ControlFlowRegion(f"cfg_{j}", sdfg) - cond_region.branches.append((CodeBlock(f"i == {j}"), cfg)) + cfg = ControlFlowRegion(f'cfg_{j}', sdfg) + cond_region.add_branch(CodeBlock(f'i == {j}'), cfg) assert sdfg.is_valid() @@ -52,32 +52,32 @@ def test_serialization(): new_cond_region: ConditionalBlock = new_sdfg.nodes()[0] for j in range(10): condition, cfg = new_cond_region.branches[j] - assert condition == CodeBlock(f"i == {j}") - assert cfg.label == f"cfg_{j}" + assert condition == CodeBlock(f'i == {j}') + assert cfg.label == f'cfg_{j}' def test_if_else(): sdfg = dace.SDFG('regular_if_else') - sdfg.add_array("A", (1,), dace.float32) - sdfg.add_symbol("i", dace.int32) + sdfg.add_array('A', (1,), dace.float32) + sdfg.add_symbol('i', dace.int32) state0 = sdfg.add_state('state0', is_start_block=True) - if1 = ConditionalBlock("if1") + if1 = ConditionalBlock('if1') sdfg.add_node(if1) sdfg.add_edge(state0, if1, InterstateEdge()) - if_body = ControlFlowRegion("if_body", sdfg=sdfg) - state1 = if_body.add_state("state1", is_start_block=True) + if_body = ControlFlowRegion('if_body', sdfg=sdfg) + state1 = if_body.add_state('state1', is_start_block=True) acc_a = state1.add_access('A') - t1 = state1.add_tasklet("t1", None, {"a"}, "a = 100") + t1 = state1.add_tasklet('t1', None, {'a'}, 'a = 100') state1.add_edge(t1, 'a', acc_a, None, dace.Memlet('A[0]')) - if1.branches.append((CodeBlock("i == 1"), if_body)) + if1.add_branch(CodeBlock('i == 1'), if_body) - else_body = ControlFlowRegion("else_body", sdfg=sdfg) - state2 = else_body.add_state("state1", is_start_block=True) + else_body = ControlFlowRegion('else_body', sdfg=sdfg) + state2 = else_body.add_state('state1', is_start_block=True) acc_a2 = state2.add_access('A') - t2 = state2.add_tasklet("t2", None, {"a"}, "a = 200") + t2 = state2.add_tasklet('t2', None, {'a'}, 'a = 200') state2.add_edge(t2, 'a', acc_a2, None, dace.Memlet('A[0]')) - if1.branches.append((CodeBlock("i == 0"), else_body)) + if1.add_branch(CodeBlock('i == 0'), else_body) assert sdfg.is_valid() A = np.ones((1,), dtype=np.float32) diff --git a/tests/sdfg/loop_region_test.py b/tests/sdfg/loop_region_test.py index 6aca54f40c..dedafb67ba 100644 --- a/tests/sdfg/loop_region_test.py +++ b/tests/sdfg/loop_region_test.py @@ -86,6 +86,27 @@ def _make_do_for_loop() -> SDFG: return sdfg +def _make_do_for_inverted_cond_loop() -> SDFG: + sdfg = dace.SDFG('do_for_inverted_cond') + sdfg.using_experimental_blocks = True + sdfg.add_symbol('i', dace.int32) + sdfg.add_array('A', [10], dace.float32) + state0 = sdfg.add_state('state0', is_start_block=True) + loop1 = LoopRegion(label='loop1', condition_expr='i < 8', loop_var='i', initialize_expr='i = 0', + update_expr='i = i + 1', inverted=True, update_before_condition=False) + sdfg.add_node(loop1) + state1 = loop1.add_state('state1', is_start_block=True) + acc_a = state1.add_access('A') + t1 = state1.add_tasklet('t1', None, {'a'}, 'a = i') + state1.add_edge(t1, 'a', acc_a, None, dace.Memlet('A[i]')) + state2 = loop1.add_state('state2') + loop1.add_edge(state1, state2, dace.InterstateEdge()) + state3 = sdfg.add_state('state3') + sdfg.add_edge(state0, loop1, dace.InterstateEdge()) + sdfg.add_edge(loop1, state3, dace.InterstateEdge()) + return sdfg + + def _make_triple_nested_for_loop() -> SDFG: sdfg = dace.SDFG('gemm') sdfg.using_experimental_blocks = True @@ -177,6 +198,19 @@ def test_loop_do_for(): assert np.allclose(a_validation, a_test) +def test_loop_do_for_inverted_condition(): + sdfg = _make_do_for_inverted_cond_loop() + + assert sdfg.is_valid() + + a_validation = np.zeros([10], dtype=np.float32) + a_test = np.zeros([10], dtype=np.float32) + sdfg(A=a_test) + for i in range(9): + a_validation[i] = i + assert np.allclose(a_validation, a_test) + + def test_loop_triple_nested_for(): sdfg = _make_triple_nested_for_loop() @@ -249,6 +283,21 @@ def test_loop_to_stree_do_for(): f'{tn.INDENTATION}while (i < 10)') +def test_loop_to_stree_do_for_inverted_cond(): + sdfg = _make_do_for_inverted_cond_loop() + + assert sdfg.is_valid() + + stree = s2t.as_schedule_tree(sdfg) + + assert stree.as_string() == (f'{tn.INDENTATION}i = 0\n' + + f'{tn.INDENTATION}while True:\n' + + f'{2 * tn.INDENTATION}A[i] = tasklet()\n' + + f'{2 * tn.INDENTATION}if (not (i < 8)):\n' + + f'{3 * tn.INDENTATION}break\n' + + f'{2 * tn.INDENTATION}i = (i + 1)\n') + + def test_loop_to_stree_triple_nested_for(): sdfg = _make_triple_nested_for_loop() @@ -267,9 +316,11 @@ def test_loop_to_stree_triple_nested_for(): test_loop_regular_while() test_loop_do_while() test_loop_do_for() + test_loop_do_for_inverted_condition() test_loop_triple_nested_for() test_loop_to_stree_regular_for() test_loop_to_stree_regular_while() test_loop_to_stree_do_while() test_loop_to_stree_do_for() + test_loop_to_stree_do_for_inverted_cond() test_loop_to_stree_triple_nested_for() diff --git a/tests/transformations/interstate/loop_lifting_test.py b/tests/transformations/interstate/loop_lifting_test.py new file mode 100644 index 0000000000..20f244621c --- /dev/null +++ b/tests/transformations/interstate/loop_lifting_test.py @@ -0,0 +1,217 @@ +# Copyright 2019-2024 ETH Zurich and the DaCe authors. All rights reserved. +""" Tests loop raising trainsformations. """ + +import numpy as np +import pytest +import dace +from dace.memlet import Memlet +from dace.sdfg.sdfg import SDFG, InterstateEdge +from dace.sdfg.state import LoopRegion +from dace.transformation.interstate.loop_lifting import LoopLifting + + +def test_lift_regular_for_loop(): + sdfg = SDFG('regular_for') + N = dace.symbol('N') + sdfg.add_symbol('i', dace.int32) + sdfg.add_symbol('j', dace.int32) + sdfg.add_symbol('k', dace.int32) + sdfg.add_array('A', (N,), dace.int32) + start_state = sdfg.add_state('start', is_start_block=True) + init_state = sdfg.add_state('init') + guard_state = sdfg.add_state('guard') + main_state = sdfg.add_state('loop_state') + loop_exit = sdfg.add_state('exit') + final_state = sdfg.add_state('final') + sdfg.add_edge(start_state, init_state, InterstateEdge(assignments={'j': 0})) + sdfg.add_edge(init_state, guard_state, InterstateEdge(assignments={'i': 0, 'k': 0})) + sdfg.add_edge(guard_state, main_state, InterstateEdge(condition='i < N')) + sdfg.add_edge(main_state, guard_state, InterstateEdge(assignments={'i': 'i + 2', 'j': 'j + 1'})) + sdfg.add_edge(guard_state, loop_exit, InterstateEdge(condition='i >= N', assignments={'k': 2})) + sdfg.add_edge(loop_exit, final_state, InterstateEdge()) + a_access = main_state.add_access('A') + w_tasklet = main_state.add_tasklet('t1', {}, {'out'}, 'out = 1') + main_state.add_edge(w_tasklet, 'out', a_access, None, Memlet('A[i]')) + a_access_2 = loop_exit.add_access('A') + w_tasklet_2 = loop_exit.add_tasklet('t1', {}, {'out'}, 'out = k') + loop_exit.add_edge(w_tasklet_2, 'out', a_access_2, None, Memlet('A[1]')) + a_access_3 = final_state.add_access('A') + w_tasklet_3 = final_state.add_tasklet('t1', {}, {'out'}, 'out = j') + final_state.add_edge(w_tasklet_3, 'out', a_access_3, None, Memlet('A[3]')) + + N = 30 + A = np.zeros((N,)).astype(np.int32) + A_valid = np.zeros((N,)).astype(np.int32) + sdfg(A=A_valid, N=N) + sdfg.apply_transformations_repeated([LoopLifting]) + + assert sdfg.using_experimental_blocks == True + assert any(isinstance(x, LoopRegion) for x in sdfg.nodes()) + + sdfg(A=A, N=N) + + assert np.allclose(A_valid, A) + + +@pytest.mark.parametrize('increment_before_condition', (True, False)) +def test_lift_loop_llvm_canonical(increment_before_condition): + addendum = '_incr_before_cond' if increment_before_condition else '' + sdfg = dace.SDFG('llvm_canonical' + addendum) + N = dace.symbol('N') + sdfg.add_symbol('i', dace.int32) + sdfg.add_symbol('j', dace.int32) + sdfg.add_symbol('k', dace.int32) + sdfg.add_array('A', (N,), dace.int32) + + entry = sdfg.add_state('entry', is_start_block=True) + guard = sdfg.add_state('guard') + preheader = sdfg.add_state('preheader') + body = sdfg.add_state('body') + latch = sdfg.add_state('latch') + loopexit = sdfg.add_state('loopexit') + exitstate = sdfg.add_state('exitstate') + + sdfg.add_edge(entry, guard, InterstateEdge(assignments={'j': 0})) + sdfg.add_edge(guard, exitstate, InterstateEdge(condition='N <= 0')) + sdfg.add_edge(guard, preheader, InterstateEdge(condition='N > 0')) + sdfg.add_edge(preheader, body, InterstateEdge(assignments={'i': 0, 'k': 0})) + if increment_before_condition: + sdfg.add_edge(body, latch, InterstateEdge(assignments={'i': 'i + 2', 'j': 'j + 1'})) + sdfg.add_edge(latch, body, InterstateEdge(condition='i < N')) + sdfg.add_edge(latch, loopexit, InterstateEdge(condition='i >= N', assignments={'k': 2})) + else: + sdfg.add_edge(body, latch, InterstateEdge(assignments={'j': 'j + 1'})) + sdfg.add_edge(latch, body, InterstateEdge(condition='i < N - 2', assignments={'i': 'i + 2'})) + sdfg.add_edge(latch, loopexit, InterstateEdge(condition='i >= N - 2', assignments={'k': 2})) + sdfg.add_edge(loopexit, exitstate, InterstateEdge()) + + a_access = body.add_access('A') + w_tasklet = body.add_tasklet('t1', {}, {'out'}, 'out = 1') + body.add_edge(w_tasklet, 'out', a_access, None, Memlet('A[i]')) + a_access_2 = loopexit.add_access('A') + w_tasklet_2 = loopexit.add_tasklet('t1', {}, {'out'}, 'out = k') + loopexit.add_edge(w_tasklet_2, 'out', a_access_2, None, Memlet('A[1]')) + a_access_3 = exitstate.add_access('A') + w_tasklet_3 = exitstate.add_tasklet('t1', {}, {'out'}, 'out = j') + exitstate.add_edge(w_tasklet_3, 'out', a_access_3, None, Memlet('A[3]')) + + N = 30 + A = np.zeros((N,)).astype(np.int32) + A_valid = np.zeros((N,)).astype(np.int32) + sdfg(A=A_valid, N=N) + sdfg.apply_transformations_repeated([LoopLifting]) + + assert sdfg.using_experimental_blocks == True + assert any(isinstance(x, LoopRegion) for x in sdfg.nodes()) + + sdfg(A=A, N=N) + + assert np.allclose(A_valid, A) + + +def test_lift_loop_llvm_canonical_while(): + sdfg = dace.SDFG('llvm_canonical_while') + N = dace.symbol('N') + sdfg.add_symbol('j', dace.int32) + sdfg.add_symbol('k', dace.int32) + sdfg.add_array('A', (N,), dace.int32) + sdfg.add_scalar('i', dace.int32, transient=True) + + entry = sdfg.add_state('entry', is_start_block=True) + guard = sdfg.add_state('guard') + preheader = sdfg.add_state('preheader') + body = sdfg.add_state('body') + latch = sdfg.add_state('latch') + loopexit = sdfg.add_state('loopexit') + exitstate = sdfg.add_state('exitstate') + + sdfg.add_edge(entry, guard, InterstateEdge(assignments={'j': 0})) + sdfg.add_edge(guard, exitstate, InterstateEdge(condition='N <= 0')) + sdfg.add_edge(guard, preheader, InterstateEdge(condition='N > 0')) + sdfg.add_edge(preheader, body, InterstateEdge(assignments={'k': 0})) + sdfg.add_edge(body, latch, InterstateEdge(assignments={'j': 'j + 1'})) + sdfg.add_edge(latch, body, InterstateEdge(condition='i < N - 2')) + sdfg.add_edge(latch, loopexit, InterstateEdge(condition='i >= N - 2', assignments={'k': 2})) + sdfg.add_edge(loopexit, exitstate, InterstateEdge()) + + i_init_write = entry.add_access('i') + iw_init_tasklet = entry.add_tasklet('ti', {}, {'out'}, 'out = 0') + entry.add_edge(iw_init_tasklet, 'out', i_init_write, None, Memlet('i[0]')) + a_access = body.add_access('A') + w_tasklet = body.add_tasklet('t1', {}, {'out'}, 'out = 1') + body.add_edge(w_tasklet, 'out', a_access, None, Memlet('A[i]')) + i_read = body.add_access('i') + i_write = body.add_access('i') + iw_tasklet = body.add_tasklet('t2', {'in1'}, {'out'}, 'out = in1 + 2') + body.add_edge(i_read, None, iw_tasklet, 'in1', Memlet('i[0]')) + body.add_edge(iw_tasklet, 'out', i_write, None, Memlet('i[0]')) + a_access_2 = loopexit.add_access('A') + w_tasklet_2 = loopexit.add_tasklet('t1', {}, {'out'}, 'out = k') + loopexit.add_edge(w_tasklet_2, 'out', a_access_2, None, Memlet('A[1]')) + a_access_3 = exitstate.add_access('A') + w_tasklet_3 = exitstate.add_tasklet('t1', {}, {'out'}, 'out = j') + exitstate.add_edge(w_tasklet_3, 'out', a_access_3, None, Memlet('A[3]')) + + N = 30 + A = np.zeros((N,)).astype(np.int32) + A_valid = np.zeros((N,)).astype(np.int32) + sdfg(A=A_valid, N=N) + sdfg.apply_transformations_repeated([LoopLifting]) + + assert sdfg.using_experimental_blocks == True + assert any(isinstance(x, LoopRegion) for x in sdfg.nodes()) + + sdfg(A=A, N=N) + + assert np.allclose(A_valid, A) + + +def test_do_while(): + sdfg = SDFG('regular_for') + N = dace.symbol('N') + sdfg.add_symbol('i', dace.int32) + sdfg.add_symbol('j', dace.int32) + sdfg.add_symbol('k', dace.int32) + sdfg.add_array('A', (N,), dace.int32) + start_state = sdfg.add_state('start', is_start_block=True) + init_state = sdfg.add_state('init') + guard_state = sdfg.add_state('guard') + main_state = sdfg.add_state('loop_state') + loop_exit = sdfg.add_state('exit') + final_state = sdfg.add_state('final') + sdfg.add_edge(start_state, init_state, InterstateEdge(assignments={'j': 0})) + sdfg.add_edge(init_state, main_state, InterstateEdge(assignments={'i': 0, 'k': 0})) + sdfg.add_edge(main_state, guard_state, InterstateEdge(assignments={'i': 'i + 2', 'j': 'j + 1'})) + sdfg.add_edge(guard_state, main_state, InterstateEdge(condition='i < N')) + sdfg.add_edge(guard_state, loop_exit, InterstateEdge(condition='i >= N', assignments={'k': 2})) + sdfg.add_edge(loop_exit, final_state, InterstateEdge()) + a_access = main_state.add_access('A') + w_tasklet = main_state.add_tasklet('t1', {}, {'out'}, 'out = 1') + main_state.add_edge(w_tasklet, 'out', a_access, None, Memlet('A[i]')) + a_access_2 = loop_exit.add_access('A') + w_tasklet_2 = loop_exit.add_tasklet('t1', {}, {'out'}, 'out = k') + loop_exit.add_edge(w_tasklet_2, 'out', a_access_2, None, Memlet('A[1]')) + a_access_3 = final_state.add_access('A') + w_tasklet_3 = final_state.add_tasklet('t1', {}, {'out'}, 'out = j') + final_state.add_edge(w_tasklet_3, 'out', a_access_3, None, Memlet('A[3]')) + + N = 30 + A = np.zeros((N,)).astype(np.int32) + A_valid = np.zeros((N,)).astype(np.int32) + sdfg(A=A_valid, N=N) + sdfg.apply_transformations_repeated([LoopLifting]) + + assert sdfg.using_experimental_blocks == True + assert any(isinstance(x, LoopRegion) for x in sdfg.nodes()) + + sdfg(A=A, N=N) + + assert np.allclose(A_valid, A) + + +if __name__ == '__main__': + test_lift_regular_for_loop() + test_lift_loop_llvm_canonical(True) + test_lift_loop_llvm_canonical(False) + test_lift_loop_llvm_canonical_while() + test_do_while() diff --git a/tests/transformations/loop_detection_test.py b/tests/transformations/loop_detection_test.py index 5469f45762..323a27787a 100644 --- a/tests/transformations/loop_detection_test.py +++ b/tests/transformations/loop_detection_test.py @@ -27,7 +27,8 @@ def tester(a: dace.float64[20]): assert rng == (1, 19, 1) -def test_loop_rotated(): +@pytest.mark.parametrize('increment_before_condition', (True, False)) +def test_loop_rotated(increment_before_condition): sdfg = dace.SDFG('tester') sdfg.add_symbol('N', dace.int32) @@ -37,8 +38,12 @@ def test_loop_rotated(): exitstate = sdfg.add_state('exitstate') sdfg.add_edge(entry, body, dace.InterstateEdge(assignments=dict(i=0))) - sdfg.add_edge(body, latch, dace.InterstateEdge()) - sdfg.add_edge(latch, body, dace.InterstateEdge('i < N', assignments=dict(i='i + 2'))) + if increment_before_condition: + sdfg.add_edge(body, latch, dace.InterstateEdge(assignments=dict(i='i + 2'))) + sdfg.add_edge(latch, body, dace.InterstateEdge('i < N')) + else: + sdfg.add_edge(body, latch, dace.InterstateEdge()) + sdfg.add_edge(latch, body, dace.InterstateEdge('i < N', assignments=dict(i='i + 2'))) sdfg.add_edge(latch, exitstate, dace.InterstateEdge('i >= N')) xform = CountLoops() @@ -48,8 +53,9 @@ def test_loop_rotated(): assert rng == (0, dace.symbol('N') - 1, 2) -@pytest.mark.skip('Extra incrementation states should not be supported by loop detection') def test_loop_rotated_extra_increment(): + # Extra incrementation states (i.e., something more than a single edge between the latch and the body) should not + # be allowed and consequently not be detected as loops. sdfg = dace.SDFG('tester') sdfg.add_symbol('N', dace.int32) @@ -60,15 +66,13 @@ def test_loop_rotated_extra_increment(): exitstate = sdfg.add_state('exitstate') sdfg.add_edge(entry, body, dace.InterstateEdge(assignments=dict(i=0))) + sdfg.add_edge(body, latch, dace.InterstateEdge()) sdfg.add_edge(latch, increment, dace.InterstateEdge('i < N')) sdfg.add_edge(increment, body, dace.InterstateEdge(assignments=dict(i='i + 1'))) sdfg.add_edge(latch, exitstate, dace.InterstateEdge('i >= N')) xform = CountLoops() - assert sdfg.apply_transformations(xform) == 1 - itvar, rng, _ = xform.loop_information() - assert itvar == 'i' - assert rng == (0, dace.symbol('N') - 1, 1) + assert sdfg.apply_transformations(xform) == 0 def test_self_loop(): @@ -91,7 +95,8 @@ def test_self_loop(): assert rng == (2, dace.symbol('N') - 1, 3) -def test_loop_llvm_canonical(): +@pytest.mark.parametrize('increment_before_condition', (True, False)) +def test_loop_llvm_canonical(increment_before_condition): sdfg = dace.SDFG('tester') sdfg.add_symbol('N', dace.int32) @@ -106,8 +111,12 @@ def test_loop_llvm_canonical(): sdfg.add_edge(guard, exitstate, dace.InterstateEdge('N <= 0')) sdfg.add_edge(guard, preheader, dace.InterstateEdge('N > 0')) sdfg.add_edge(preheader, body, dace.InterstateEdge(assignments=dict(i=0))) - sdfg.add_edge(body, latch, dace.InterstateEdge()) - sdfg.add_edge(latch, body, dace.InterstateEdge('i < N', assignments=dict(i='i + 1'))) + if increment_before_condition: + sdfg.add_edge(body, latch, dace.InterstateEdge(assignments=dict(i='i + 1'))) + sdfg.add_edge(latch, body, dace.InterstateEdge('i < N')) + else: + sdfg.add_edge(body, latch, dace.InterstateEdge()) + sdfg.add_edge(latch, body, dace.InterstateEdge('i < N', assignments=dict(i='i + 1'))) sdfg.add_edge(latch, loopexit, dace.InterstateEdge('i >= N')) sdfg.add_edge(loopexit, exitstate, dace.InterstateEdge()) @@ -118,9 +127,10 @@ def test_loop_llvm_canonical(): assert rng == (0, dace.symbol('N') - 1, 1) -@pytest.mark.skip('Extra incrementation states should not be supported by loop detection') @pytest.mark.parametrize('with_bounds_check', (False, True)) def test_loop_llvm_canonical_with_extras(with_bounds_check): + # Extra incrementation states (i.e., something more than a single edge between the latch and the body) should not + # be allowed and consequently not be detected as loops. sdfg = dace.SDFG('tester') sdfg.add_symbol('N', dace.int32) @@ -148,17 +158,16 @@ def test_loop_llvm_canonical_with_extras(with_bounds_check): sdfg.add_edge(loopexit, exitstate, dace.InterstateEdge()) xform = CountLoops() - assert sdfg.apply_transformations(xform) == 1 - itvar, rng, _ = xform.loop_information() - assert itvar == 'i' - assert rng == (0, dace.symbol('N') - 1, 1) + assert sdfg.apply_transformations(xform) == 0 if __name__ == '__main__': test_pyloop() - test_loop_rotated() - # test_loop_rotated_extra_increment() + test_loop_rotated(True) + test_loop_rotated(False) + test_loop_rotated_extra_increment() test_self_loop() - test_loop_llvm_canonical() - # test_loop_llvm_canonical_with_extras(False) - # test_loop_llvm_canonical_with_extras(True) + test_loop_llvm_canonical(True) + test_loop_llvm_canonical(False) + test_loop_llvm_canonical_with_extras(False) + test_loop_llvm_canonical_with_extras(True) From 653ec33634617b7738be8214acda18df8bd9a356 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20M=C3=BCller?= <147368808+philip-paul-mueller@users.noreply.github.com> Date: Thu, 17 Oct 2024 17:03:03 +0200 Subject: [PATCH 67/76] Updated `InlineMultistateSDFG` (#1689) The `can_be_applied()` function did not consider the symbol map when the shape of the arrays were compared. This commit fixes this behaiour by first appling a replacing step before the comparisson. Furthermore, the commit removes all the commented out code. --- .../interstate/multistate_inline.py | 223 +----------------- 1 file changed, 7 insertions(+), 216 deletions(-) diff --git a/dace/transformation/interstate/multistate_inline.py b/dace/transformation/interstate/multistate_inline.py index 42dccd8616..f637f479dc 100644 --- a/dace/transformation/interstate/multistate_inline.py +++ b/dace/transformation/interstate/multistate_inline.py @@ -10,7 +10,7 @@ from dace.sdfg.graph import MultiConnectorEdge from dace.sdfg import InterstateEdge, SDFG, SDFGState from dace.sdfg import utils as sdutil, infer_types -from dace.sdfg.replace import replace_datadesc_names +from dace.sdfg.replace import replace_datadesc_names, replace_properties_dict from dace.transformation import transformation, helpers from dace.properties import make_properties from dace import data @@ -103,7 +103,10 @@ def can_be_applied(self, state: SDFGState, expr_index, sdfg, permissive=False): if isinstance(outer_desc, data.View): return False - inner_desc = nested_sdfg.sdfg.arrays[edge.dst_conn] + # We can not compare shapes directly, we have to consider the symbol map + # for that. Clone the descriptor because the operation is inplace. + inner_desc = nested_sdfg.sdfg.arrays[edge.dst_conn].clone() + symbolic.safe_replace(nested_sdfg.symbol_mapping, lambda m: replace_properties_dict(inner_desc, m)) if (outer_desc.shape != inner_desc.shape or outer_desc.strides != inner_desc.strides): return False @@ -121,7 +124,8 @@ def can_be_applied(self, state: SDFGState, expr_index, sdfg, permissive=False): if isinstance(outer_desc, data.View): return False - inner_desc = nested_sdfg.sdfg.arrays[edge.src_conn] + inner_desc = nested_sdfg.sdfg.arrays[edge.src_conn].clone() + symbolic.safe_replace(nested_sdfg.symbol_mapping, lambda m: replace_properties_dict(inner_desc, m)) if (outer_desc.shape != inner_desc.shape or outer_desc.strides != inner_desc.strides): return False @@ -208,27 +212,6 @@ def apply(self, outer_state: SDFGState, sdfg: SDFG): ####################################################### # Collect and modify access nodes as necessary - # Access nodes that need to be reshaped - # reshapes: Set(str) = set() - # for aname, array in nsdfg.arrays.items(): - # if array.transient: - # continue - # edge = None - # if aname in inputs: - # edge = inputs[aname] - # if len(array.shape) > len(edge.data.subset): - # reshapes.add(aname) - # continue - # if aname in outputs: - # edge = outputs[aname] - # if len(array.shape) > len(edge.data.subset): - # reshapes.add(aname) - # continue - # if edge is not None and not InlineMultistateSDFG._check_strides( - # array.strides, sdfg.arrays[edge.data.data].strides, - # edge.data, nsdfg_node): - # reshapes.add(aname) - # Mapping from nested transient name to top-level name transients: Dict[str, str] = {} @@ -281,50 +264,6 @@ def apply(self, outer_state: SDFGState, sdfg: SDFG): symbolic.safe_replace(repldict, lambda m: replace_datadesc_names(nsdfg, m), value_as_string=True) - # Add views whenever reshapes are necessary - # for dname in reshapes: - # desc = nsdfg.arrays[dname] - # # To avoid potential confusion, rename protected __return keyword - # if dname.startswith('__return'): - # newname = f'{nsdfg.name}_ret{dname[8:]}' - # else: - # newname = dname - # newname, _ = sdfg.add_view(newname, - # desc.shape, - # desc.dtype, - # storage=desc.storage, - # strides=desc.strides, - # offset=desc.offset, - # debuginfo=desc.debuginfo, - # allow_conflicts=desc.allow_conflicts, - # total_size=desc.total_size, - # alignment=desc.alignment, - # may_alias=desc.may_alias, - # find_new_name=True) - # repldict[dname] = newname - - # Add extra access nodes for out/in view nodes - # inv_reshapes = {repldict[r]: r for r in reshapes} - # for nstate in nsdfg.nodes(): - # for node in nstate.nodes(): - # if isinstance(node, - # nodes.AccessNode) and node.data in inv_reshapes: - # if nstate.in_degree(node) > 0 and nstate.out_degree( - # node) > 0: - # # Such a node has to be in the output set - # edge = outputs[inv_reshapes[node.data]] - - # # Redirect outgoing edges through access node - # out_edges = list(nstate.out_edges(node)) - # anode = nstate.add_access(edge.data.data) - # vnode = nstate.add_access(node.data) - # nstate.add_nedge(node, anode, edge.data) - # nstate.add_nedge(anode, vnode, edge.data) - # for e in out_edges: - # nstate.remove_edge(e) - # nstate.add_edge(vnode, e.src_conn, e.dst, - # e.dst_conn, e.data) - # Make unique names for states statenames = set(s.label for s in sdfg.nodes()) for nstate in nsdfg.nodes(): @@ -364,46 +303,6 @@ def apply(self, outer_state: SDFGState, sdfg: SDFG): sdfg.start_state = sdfg.node_id(source) # TODO: Modify memlets by offsetting - # If both source and sink nodes are inputs/outputs, reconnect once - # edges_to_ignore = self._modify_access_to_access(new_incoming_edges, - # nsdfg, nstate, state, - # orig_data) - - # source_to_outer = {n: e.src for n, e in new_incoming_edges.items()} - # sink_to_outer = {n: e.dst for n, e in new_outgoing_edges.items()} - # # If a source/sink node is one of the inputs/outputs, reconnect it, - # # replacing memlets in outgoing/incoming paths - # modified_edges = set() - # modified_edges |= self._modify_memlet_path(new_incoming_edges, nstate, - # state, sink_to_outer, True, - # edges_to_ignore) - # modified_edges |= self._modify_memlet_path(new_outgoing_edges, nstate, - # state, source_to_outer, - # False, edges_to_ignore) - - # # Reshape: add connections to viewed data - # self._modify_reshape_data(reshapes, repldict, inputs, nstate, state, - # True) - # self._modify_reshape_data(reshapes, repldict, outputs, nstate, state, - # False) - - # Modify all other internal edges pertaining to input/output nodes - # for nstate in nsdfg.nodes(): - # for node in nstate.nodes(): - # if isinstance(node, nodes.AccessNode): - # if node.data in input_set or node.data in output_set: - # if node.data in input_set: - # outer_edge = inputs[input_set[node.data]] - # else: - # outer_edge = outputs[output_set[node.data]] - - # for edge in state.all_edges(node): - # if (edge not in modified_edges - # and edge.data.data == node.data): - # for e in state.memlet_tree(edge): - # if e.data.data == node.data: - # e._data = helpers.unsqueeze_memlet( - # e.data, outer_edge.data) # Replace nested SDFG parents with new SDFG for nstate in nsdfg.nodes(): @@ -420,111 +319,3 @@ def apply(self, outer_state: SDFGState, sdfg: SDFG): sdfg._cfg_list = sdfg.reset_cfg_list() return nsdfg.nodes() - - # def _modify_access_to_access( - # self, - # input_edges: Dict[nodes.Node, MultiConnectorEdge], - # nsdfg: SDFG, - # nstate: SDFGState, - # state: SDFGState, - # orig_data: Dict[Union[nodes.AccessNode, MultiConnectorEdge], str], - # ) -> Set[MultiConnectorEdge]: - # """ - # Deals with access->access edges where both sides are non-transient. - # """ - # result = set() - # for node, top_edge in input_edges.items(): - # for inner_edge in nstate.out_edges(node): - # if inner_edge.dst not in orig_data: - # continue - # inner_data = orig_data[inner_edge.dst] - # if (isinstance(inner_edge.dst, nodes.AccessNode) - # and not nsdfg.arrays[inner_data].transient): - # matching_edge: MultiConnectorEdge = next( - # state.out_edges_by_connector(top_edge.dst, inner_data)) - # # Create memlet by unsqueezing both w.r.t. src and dst - # # subsets - # in_memlet = helpers.unsqueeze_memlet( - # inner_edge.data, top_edge.data) - # out_memlet = helpers.unsqueeze_memlet( - # inner_edge.data, matching_edge.data) - # new_memlet = in_memlet - # new_memlet.other_subset = out_memlet.subset - - # # Connect with new edge - # state.add_edge(top_edge.src, top_edge.src_conn, - # matching_edge.dst, matching_edge.dst_conn, - # new_memlet) - # result.add(inner_edge) - - # return result - - # def _modify_memlet_path( - # self, - # new_edges: Dict[nodes.Node, MultiConnectorEdge], - # nstate: SDFGState, - # state: SDFGState, - # inner_to_outer: Dict[nodes.Node, MultiConnectorEdge], - # inputs: bool, - # edges_to_ignore: Set[MultiConnectorEdge], - # ) -> Set[MultiConnectorEdge]: - # """ Modifies memlet paths in an inlined SDFG. Returns set of modified - # edges. - # """ - # result = set() - # for node, top_edge in new_edges.items(): - # inner_edges = (nstate.out_edges(node) - # if inputs else nstate.in_edges(node)) - # for inner_edge in inner_edges: - # if inner_edge in edges_to_ignore: - # continue - # new_memlet = helpers.unsqueeze_memlet(inner_edge.data, - # top_edge.data) - # if inputs: - # if inner_edge.dst in inner_to_outer: - # dst = inner_to_outer[inner_edge.dst] - # else: - # dst = inner_edge.dst - - # new_edge = state.add_edge(top_edge.src, top_edge.src_conn, - # dst, inner_edge.dst_conn, - # new_memlet) - # mtree = state.memlet_tree(new_edge) - # else: - # if inner_edge.src in inner_to_outer: - # # don't add edges twice - # continue - - # new_edge = state.add_edge(inner_edge.src, - # inner_edge.src_conn, top_edge.dst, - # top_edge.dst_conn, new_memlet) - # mtree = state.memlet_tree(new_edge) - - # # Modify all memlets going forward/backward - # def traverse(mtree_node): - # result.add(mtree_node.edge) - # mtree_node.edge._data = helpers.unsqueeze_memlet( - # mtree_node.edge.data, top_edge.data) - # for child in mtree_node.children: - # traverse(child) - - # for child in mtree.children: - # traverse(child) - - # return result - - # def _modify_reshape_data(self, reshapes: Set[str], repldict: Dict[str, str], - # new_edges: Dict[str, MultiConnectorEdge], - # nstate: SDFGState, state: SDFGState, inputs: bool): - # anodes = nstate.source_nodes() if inputs else nstate.sink_nodes() - # reshp = {repldict[r]: r for r in reshapes} - # for node in anodes: - # if not isinstance(node, nodes.AccessNode): - # continue - # if node.data not in reshp: - # continue - # edge = new_edges[reshp[node.data]] - # if inputs: - # state.add_edge(edge.src, edge.src_conn, node, None, edge.data) - # else: - # state.add_edge(node, None, edge.dst, edge.dst_conn, edge.data) From 4fbeba4155c6e317cbad54b08c2d4a75fe5e6985 Mon Sep 17 00:00:00 2001 From: Pratyai Mazumder Date: Fri, 18 Oct 2024 12:14:06 +0200 Subject: [PATCH 68/76] Some very minor improvement in one error handling and one warning message. (#1686) 1. Do not throw error if `clear_instrumentation_reports()` does not have anything to clear. (The function is useful to avoid accumulating many, many obsolete profile data files over time) 2. Put some more information in a warning message. --- dace/sdfg/sdfg.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/dace/sdfg/sdfg.py b/dace/sdfg/sdfg.py index 77ad8b31b5..38a41236a6 100644 --- a/dace/sdfg/sdfg.py +++ b/dace/sdfg/sdfg.py @@ -949,7 +949,11 @@ def clear_instrumentation_reports(self): Clears the instrumentation report folder of this SDFG. """ path = os.path.join(self.build_folder, 'perf') - for fname in os.listdir(path): + try: + files = os.listdir(path) + except FileNotFoundError: + return + for fname in files: if not fname.startswith('report-'): continue os.unlink(os.path.join(path, fname)) @@ -2288,8 +2292,8 @@ def compile(self, output_file=None, validate=True) -> 'CompiledSDFG': sdfg.name = f'{self.name}_{index}' index += 1 if self.name != sdfg.name: - warnings.warn('SDFG "%s" is already loaded by another object, ' - 'recompiling under a different name.' % self.name) + warnings.warn(f"SDFG '{self.name}' is already loaded by another object, recompiling under a different " + f"name '{sdfg.name}'.") try: # Fill in scope entry/exit connectors From 975a0657ce2904ed5c12693c3cd9debbc89b7ab0 Mon Sep 17 00:00:00 2001 From: edopao Date: Fri, 18 Oct 2024 12:47:37 +0200 Subject: [PATCH 69/76] Extend TrivialTaskletElimination for map scope (#1650) Extend the transformation `TrivialTaskletElimination` for the case where the input or output of the copy-tasklet is a map node. The following SDFG: image is transformed to this SDFG: image --- .../dataflow/trivial_tasklet_elimination.py | 48 ++++--- .../trivial_tasklet_elimination_test.py | 129 ++++++++++++++++++ 2 files changed, 160 insertions(+), 17 deletions(-) create mode 100644 tests/transformations/trivial_tasklet_elimination_test.py diff --git a/dace/transformation/dataflow/trivial_tasklet_elimination.py b/dace/transformation/dataflow/trivial_tasklet_elimination.py index b4c23524e2..6a84959f7d 100644 --- a/dace/transformation/dataflow/trivial_tasklet_elimination.py +++ b/dace/transformation/dataflow/trivial_tasklet_elimination.py @@ -17,48 +17,62 @@ class TrivialTaskletElimination(transformation.SingleStateTransformation): """ read = transformation.PatternNode(nodes.AccessNode) + read_map = transformation.PatternNode(nodes.MapEntry) tasklet = transformation.PatternNode(nodes.Tasklet) write = transformation.PatternNode(nodes.AccessNode) + write_map = transformation.PatternNode(nodes.MapExit) @classmethod def expressions(cls): - return [sdutil.node_path_graph(cls.read, cls.tasklet, cls.write)] + return [ + sdutil.node_path_graph(cls.read, cls.tasklet, cls.write), + sdutil.node_path_graph(cls.read_map, cls.tasklet, cls.write), + sdutil.node_path_graph(cls.read, cls.tasklet, cls.write_map), + ] def can_be_applied(self, graph, expr_index, sdfg, permissive=False): - read = self.read + read = self.read_map if expr_index == 1 else self.read tasklet = self.tasklet - write = self.write - # Do not apply on Streams - if isinstance(sdfg.arrays[read.data], data.Stream): - return False - if isinstance(sdfg.arrays[write.data], data.Stream): + write = self.write_map if expr_index == 2 else self.write + if len(tasklet.in_connectors) != 1: return False if len(graph.in_edges(tasklet)) != 1: return False - if len(graph.out_edges(tasklet)) != 1: - return False - if graph.edges_between(tasklet, write)[0].data.wcr: - return False - if len(tasklet.in_connectors) != 1: - return False if len(tasklet.out_connectors) != 1: return False + if len(graph.out_edges(tasklet)) != 1: + return False in_conn = list(tasklet.in_connectors.keys())[0] out_conn = list(tasklet.out_connectors.keys())[0] if tasklet.code.as_string != f'{out_conn} = {in_conn}': return False - + read_memlet = graph.edges_between(read, tasklet)[0].data + read_desc = sdfg.arrays[read_memlet.data] + write_memlet = graph.edges_between(tasklet, write)[0].data + if write_memlet.wcr: + return False + write_desc = sdfg.arrays[write_memlet.data] + # Do not apply on streams + if isinstance(read_desc, data.Stream): + return False + if isinstance(write_desc, data.Stream): + return False + # Keep copy-tasklet connected to map node if source and destination nodes + # have different data type (implicit type cast) + if expr_index != 0 and read_desc.dtype != write_desc.dtype: + return False + return True def apply(self, graph, sdfg): - read = self.read + read = self.read_map if self.expr_index == 1 else self.read tasklet = self.tasklet - write = self.write + write = self.write_map if self.expr_index == 2 else self.write in_edge = graph.edges_between(read, tasklet)[0] out_edge = graph.edges_between(tasklet, write)[0] graph.remove_edge(in_edge) graph.remove_edge(out_edge) out_edge.data.other_subset = in_edge.data.subset - graph.add_nedge(read, write, out_edge.data) + graph.add_edge(read, in_edge.src_conn, write, out_edge.dst_conn, out_edge.data) graph.remove_node(tasklet) diff --git a/tests/transformations/trivial_tasklet_elimination_test.py b/tests/transformations/trivial_tasklet_elimination_test.py new file mode 100644 index 0000000000..8f97b51b7e --- /dev/null +++ b/tests/transformations/trivial_tasklet_elimination_test.py @@ -0,0 +1,129 @@ +# Copyright 2019-2024 ETH Zurich and the DaCe authors. All rights reserved. +import dace +from dace.transformation.dataflow.trivial_tasklet_elimination import TrivialTaskletElimination + + +N = 10 + + +def test_trivial_tasklet(): + ty_ = dace.int32 + sdfg = dace.SDFG("trivial_tasklet") + sdfg.add_symbol("s", ty_) + sdfg.add_array("v", (N,), ty_) + st = sdfg.add_state() + + tmp1_name, _ = sdfg.add_scalar(sdfg.temp_data_name(), ty_, transient=True) + tmp1_node = st.add_access(tmp1_name) + init_tasklet = st.add_tasklet("init", {}, {"out"}, "out = s") + st.add_edge(init_tasklet, "out", tmp1_node, None, dace.Memlet(tmp1_node.data)) + + tmp2_name, _ = sdfg.add_scalar(sdfg.temp_data_name(), ty_, transient=True) + tmp2_node = st.add_access(tmp2_name) + copy_tasklet = st.add_tasklet("copy", {"inp"}, {"out"}, "out = inp") + st.add_edge(tmp1_node, None, copy_tasklet, "inp", dace.Memlet(tmp1_node.data)) + st.add_edge(copy_tasklet, "out", tmp2_node, None, dace.Memlet(tmp2_node.data)) + + bcast_tasklet, _, _ = st.add_mapped_tasklet( + "bcast", + dict(i=f"0:{N}"), + inputs={"inp": dace.Memlet(f"{tmp2_node.data}[0]")}, + input_nodes={tmp2_node.data: tmp2_node}, + code="out = inp", + outputs={"out": dace.Memlet("v[i]")}, + external_edges=True, + ) + + sdfg.validate() + tasklet_nodes = {x for x in st.nodes() if isinstance(x, dace.nodes.Tasklet)} + assert tasklet_nodes == {init_tasklet, copy_tasklet, bcast_tasklet} + + count = sdfg.apply_transformations_repeated(TrivialTaskletElimination) + assert count == 1 + + assert len(st.out_edges(tmp1_node)) == 1 + assert st.out_edges(tmp1_node)[0].dst == tmp2_node + + tasklet_nodes = {x for x in st.nodes() if isinstance(x, dace.nodes.Tasklet)} + assert tasklet_nodes == {init_tasklet, bcast_tasklet} + + +def test_trivial_tasklet_with_map(): + ty_ = dace.int32 + sdfg = dace.SDFG("trivial_tasklet_with_map") + sdfg.add_symbol("s", ty_) + sdfg.add_array("v", (N,), ty_) + st = sdfg.add_state() + + tmp1_name, _ = sdfg.add_scalar(sdfg.temp_data_name(), ty_, transient=True) + tmp1_node = st.add_access(tmp1_name) + init_tasklet = st.add_tasklet("init", {}, {"out"}, "out = s") + st.add_edge(init_tasklet, "out", tmp1_node, None, dace.Memlet(tmp1_node.data)) + + me, mx = st.add_map("bcast", dict(i=f"0:{N}")) + + copy_tasklet = st.add_tasklet("copy", {"inp"}, {"out"}, "out = inp") + st.add_memlet_path(tmp1_node, me, copy_tasklet, dst_conn="inp", memlet=dace.Memlet(f"{tmp1_node.data}[0]")) + tmp2_name, _ = sdfg.add_scalar(sdfg.temp_data_name(), ty_, transient=True) + tmp2_node = st.add_access(tmp2_name) + st.add_edge(copy_tasklet, "out", tmp2_node, None, dace.Memlet(tmp2_node.data)) + + bcast_tasklet = st.add_tasklet("bcast", {"inp"}, {"out"}, "out = inp") + st.add_edge(tmp2_node, None, bcast_tasklet, "inp", dace.Memlet(tmp2_node.data)) + st.add_memlet_path(bcast_tasklet, mx, st.add_access("v"), src_conn="out", memlet=dace.Memlet("v[i]")) + + sdfg.validate() + tasklet_nodes = {x for x in st.nodes() if isinstance(x, dace.nodes.Tasklet)} + assert tasklet_nodes == {init_tasklet, copy_tasklet, bcast_tasklet} + + count = sdfg.apply_transformations_repeated(TrivialTaskletElimination) + assert count == 2 + + tasklet_nodes = {x for x in st.nodes() if isinstance(x, dace.nodes.Tasklet)} + assert tasklet_nodes == {init_tasklet} + + assert len(st.in_edges(tmp2_node)) == 1 + assert st.in_edges(tmp2_node)[0].src == me + + assert len(st.out_edges(tmp2_node)) == 1 + assert st.out_edges(tmp2_node)[0].dst == mx + + +def test_trivial_tasklet_with_implicit_cast(): + ty32_ = dace.int32 + ty64_ = dace.int64 + sdfg = dace.SDFG("trivial_tasklet_with_implicit_cast") + sdfg.add_symbol("s", ty32_) + sdfg.add_array("v", (N,), ty32_) + st = sdfg.add_state() + + tmp1_name, _ = sdfg.add_scalar(sdfg.temp_data_name(), ty32_, transient=True) + tmp1_node = st.add_access(tmp1_name) + init_tasklet = st.add_tasklet("init", {}, {"out"}, "out = s") + st.add_edge(init_tasklet, "out", tmp1_node, None, dace.Memlet(tmp1_node.data)) + + me, mx = st.add_map("bcast", dict(i=f"0:{N}")) + + copy_tasklet = st.add_tasklet("copy", {"inp"}, {"out"}, "out = inp") + st.add_memlet_path(tmp1_node, me, copy_tasklet, dst_conn="inp", memlet=dace.Memlet(f"{tmp1_node.data}[0]")) + tmp2_name, _ = sdfg.add_scalar(sdfg.temp_data_name(), ty64_, transient=True) + tmp2_node = st.add_access(tmp2_name) + st.add_edge(copy_tasklet, "out", tmp2_node, None, dace.Memlet(tmp2_node.data)) + + bcast_tasklet = st.add_tasklet("bcast", {"inp"}, {"out"}, "out = inp") + st.add_edge(tmp2_node, None, bcast_tasklet, "inp", dace.Memlet(tmp2_node.data)) + st.add_memlet_path(bcast_tasklet, mx, st.add_access("v"), src_conn="out", memlet=dace.Memlet("v[i]")) + + sdfg.validate() + tasklet_nodes = {x for x in st.nodes() if isinstance(x, dace.nodes.Tasklet)} + assert tasklet_nodes == {init_tasklet, copy_tasklet, bcast_tasklet} + + # not applied because of data types mismatch on read/write nodes + count = sdfg.apply_transformations_repeated(TrivialTaskletElimination) + assert count == 0 + + +if __name__ == '__main__': + test_trivial_tasklet() + test_trivial_tasklet_with_map() + test_trivial_tasklet_with_implicit_cast() From 380554f709f0cffe6407dab9a9ee60655264aa9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20M=C3=BCller?= <147368808+philip-paul-mueller@users.noreply.github.com> Date: Wed, 23 Oct 2024 17:17:50 +0200 Subject: [PATCH 70/76] Fix to Read and Write Sets (#1678) During my work on the [new map fusion](https://github.com/spcl/dace/issues/1643) I discovered a bug in `SDFGState._read_and_write_set()`. Originally I solved it there, but it was decided to move it into its own PR. Lets look at the first, super silly example, that is not useful on its own. The main point here, is that the `data` attribute of the Memlet does not refer to the source of the connection but of the destination. ![test_1](https://github.com/user-attachments/assets/740ee4fc-cfe5-4844-a999-e316cb8f9c16) BTW: The Webviewer outputs something like `B[0] -> [0, 0]` however, the parser of the Memlet constructor does not understand this, it must be written as `B[0] -> 0, 0`, i.e. the second set of brackets must be omitted, this should be changed! From the above we would expect the following sets: - Reads: - `A`: `[Range (0, 0)]` - `B`: Should not be listed in this set, because it is fully read and written, thus it is excluded. - Writes - `B`: `[Range (0)]` - `C`: `[Range (0, 0), Range (1, 1)]` However, the current implementation gives us: - Reads: `{'A': [Range (0)], 'B': [Range (1, 1)]}` - Write: `{'B': [Range (0)], 'C': [Range (1, 1), Range (0)]}` The current behaviour is wrong because: - `A` is a `2x2` array, thus the read set should also have two dimensions. - `B` inside the read set, it is a scalar, but the range has two dimensions, furthermore, it is present at all. - `C` the first member of the write set (`Range(1, 1)`) is correct, while the second (`Range(0)`) is horrible wrong. The second example is even more simple. ![test_2](https://github.com/user-attachments/assets/da3d03af-6f10-411f-952e-ab057ed057c6) From the SDFG we expect the following sets: - Reads: - `A`: `[Range(0, 0)]` - Writes: - `B`: `[Range(0)]` It is important that in the above example `other_subset` is `None` and `data` is set to `A`, so it is not one of these "crazy" non standard Memlets we have seen in the first test. However, the current implementation gives us: - Reads: `{'A': [Range (0, 0)]}` - Writes: `{'B': [Range (0, 0)]}` This clearly shows, that whatever the implementation does is not correct. --- dace/sdfg/state.py | 103 +++++++++++------ tests/sdfg/state_test.py | 93 ++++++++++++++- .../move_loop_into_map_test.py | 64 ++++++++++- .../transformations/prune_connectors_test.py | 22 +--- .../refine_nested_access_test.py | 108 ++++++++++++++++++ 5 files changed, 328 insertions(+), 62 deletions(-) diff --git a/dace/sdfg/state.py b/dace/sdfg/state.py index 2ae6109b31..09e7607d65 100644 --- a/dace/sdfg/state.py +++ b/dace/sdfg/state.py @@ -745,51 +745,82 @@ def update_if_not_none(dic, update): return defined_syms + def _read_and_write_sets(self) -> Tuple[Dict[AnyStr, List[Subset]], Dict[AnyStr, List[Subset]]]: """ Determines what data is read and written in this subgraph, returning dictionaries from data containers to all subsets that are read/written. """ + from dace.sdfg import utils # Avoid cyclic import + + # Ensures that the `{src,dst}_subset` are properly set. + # TODO: find where the problems are + for edge in self.edges(): + edge.data.try_initialize(self.sdfg, self, edge) + read_set = collections.defaultdict(list) write_set = collections.defaultdict(list) - from dace.sdfg import utils # Avoid cyclic import - subgraphs = utils.concurrent_subgraphs(self) - for sg in subgraphs: - rs = collections.defaultdict(list) - ws = collections.defaultdict(list) - # Traverse in topological order, so data that is written before it - # is read is not counted in the read set - for n in utils.dfs_topological_sort(sg, sources=sg.source_nodes()): - if isinstance(n, nd.AccessNode): - in_edges = sg.in_edges(n) - out_edges = sg.out_edges(n) - # Filter out memlets which go out but the same data is written to the AccessNode by another memlet - for out_edge in list(out_edges): - for in_edge in list(in_edges): - if (in_edge.data.data == out_edge.data.data - and in_edge.data.dst_subset.covers(out_edge.data.src_subset)): - out_edges.remove(out_edge) - break - - for e in in_edges: - # skip empty memlets - if e.data.is_empty(): - continue - # Store all subsets that have been written - ws[n.data].append(e.data.subset) - for e in out_edges: - # skip empty memlets - if e.data.is_empty(): - continue - rs[n.data].append(e.data.subset) - # Union all subgraphs, so an array that was excluded from the read - # set because it was written first is still included if it is read - # in another subgraph - for data, accesses in rs.items(): + + # NOTE: In a previous version a _single_ read (i.e. leaving Memlet) that was + # fully covered by a single write (i.e. an incoming Memlet) was removed from + # the read set and only the write survived. However, this was never fully + # implemented nor correctly implemented and caused problems. + # So this filtering was removed. + + for subgraph in utils.concurrent_subgraphs(self): + subgraph_read_set = collections.defaultdict(list) # read and write set of this subgraph. + subgraph_write_set = collections.defaultdict(list) + for n in utils.dfs_topological_sort(subgraph, sources=subgraph.source_nodes()): + if not isinstance(n, nd.AccessNode): + # Read and writes can only be done through access nodes, + # so ignore every other node. + continue + + # Get a list of all incoming (writes) and outgoing (reads) edges of the + # access node, ignore all empty memlets as they do not carry any data. + in_edges = [in_edge for in_edge in subgraph.in_edges(n) if not in_edge.data.is_empty()] + out_edges = [out_edge for out_edge in subgraph.out_edges(n) if not out_edge.data.is_empty()] + + # Extract the subsets that describes where we read and write the data + # and store them for the later filtering. + # NOTE: In certain cases the corresponding subset might be None, in this case + # we assume that the whole array is written, which is the default behaviour. + ac_desc = n.desc(self.sdfg) + ac_size = ac_desc.total_size + in_subsets = dict() + for in_edge in in_edges: + # Ensure that if the destination subset is not given, our assumption, that the + # whole array is written to, is valid, by testing if the memlet transfers the + # whole array. + assert (in_edge.data.dst_subset is not None) or (in_edge.data.num_elements() == ac_size) + in_subsets[in_edge] = ( + sbs.Range.from_array(ac_desc) + if in_edge.data.dst_subset is None + else in_edge.data.dst_subset + ) + out_subsets = dict() + for out_edge in out_edges: + assert (out_edge.data.src_subset is not None) or (out_edge.data.num_elements() == ac_size) + out_subsets[out_edge] = ( + sbs.Range.from_array(ac_desc) + if out_edge.data.src_subset is None + else out_edge.data.src_subset + ) + + # Update the read and write sets of the subgraph. + if in_edges: + subgraph_write_set[n.data].extend(in_subsets.values()) + if out_edges: + subgraph_read_set[n.data].extend(out_subsets[out_edge] for out_edge in out_edges) + + # Add the subgraph's read and write set to the final ones. + for data, accesses in subgraph_read_set.items(): read_set[data] += accesses - for data, accesses in ws.items(): + for data, accesses in subgraph_write_set.items(): write_set[data] += accesses - return read_set, write_set + + return copy.deepcopy((read_set, write_set)) + def read_and_write_sets(self) -> Tuple[Set[AnyStr], Set[AnyStr]]: """ diff --git a/tests/sdfg/state_test.py b/tests/sdfg/state_test.py index 7ba43ac4c0..4bde3788e0 100644 --- a/tests/sdfg/state_test.py +++ b/tests/sdfg/state_test.py @@ -1,5 +1,6 @@ # Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved. import dace +from dace import subsets as sbs from dace.transformation.helpers import find_sdfg_control_flow @@ -19,7 +20,9 @@ def test_read_write_set(): state.add_memlet_path(rw_b, task2, dst_conn='B', memlet=dace.Memlet('B[2]')) state.add_memlet_path(task2, write_c, src_conn='C', memlet=dace.Memlet('C[2]')) - assert 'B' not in state.read_and_write_sets()[0] + read_set, write_set = state.read_and_write_sets() + assert {'B', 'A'} == read_set + assert {'C', 'B'} == write_set def test_read_write_set_y_formation(): @@ -41,7 +44,10 @@ def test_read_write_set_y_formation(): state.add_memlet_path(rw_b, task2, dst_conn='B', memlet=dace.Memlet(data='B', subset='0')) state.add_memlet_path(task2, write_c, src_conn='C', memlet=dace.Memlet(data='C', subset='0')) - assert 'B' not in state.read_and_write_sets()[0] + read_set, write_set = state.read_and_write_sets() + assert {'B', 'A'} == read_set + assert {'C', 'B'} == write_set + def test_deepcopy_state(): N = dace.symbol('N') @@ -58,6 +64,87 @@ def double_loop(arr: dace.float32[N]): sdfg.validate() +def test_read_and_write_set_filter(): + sdfg = dace.SDFG('graph') + state = sdfg.add_state('state') + sdfg.add_array('A', [2, 2], dace.float64) + sdfg.add_scalar('B', dace.float64) + sdfg.add_array('C', [2, 2], dace.float64) + A, B, C = (state.add_access(name) for name in ('A', 'B', 'C')) + + state.add_nedge( + A, + B, + dace.Memlet("B[0] -> [0, 0]"), + ) + state.add_nedge( + B, + C, + dace.Memlet("C[1, 1] -> [0]"), + ) + state.add_nedge( + B, + C, + dace.Memlet("B[0] -> [0, 0]"), + ) + sdfg.validate() + + expected_reads = { + "A": [sbs.Range.from_string("0, 0")], + "B": [sbs.Range.from_string("0")], + } + expected_writes = { + "B": [sbs.Range.from_string("0")], + "C": [sbs.Range.from_string("0, 0"), sbs.Range.from_string("1, 1")], + } + read_set, write_set = state._read_and_write_sets() + + for expected_sets, computed_sets in [(expected_reads, read_set), (expected_writes, write_set)]: + assert expected_sets.keys() == computed_sets.keys(), f"Expected the set to contain '{expected_sets.keys()}' but got '{computed_sets.keys()}'." + for access_data in expected_sets.keys(): + for exp in expected_sets[access_data]: + found_match = False + for res in computed_sets[access_data]: + if res == exp: + found_match = True + break + assert found_match, f"Could not find the subset '{exp}' only got '{computed_sets}'" + + +def test_read_and_write_set_selection(): + sdfg = dace.SDFG('graph') + state = sdfg.add_state('state') + sdfg.add_array('A', [2, 2], dace.float64) + sdfg.add_scalar('B', dace.float64) + A, B = (state.add_access(name) for name in ('A', 'B')) + + state.add_nedge( + A, + B, + dace.Memlet("A[0, 0]"), + ) + sdfg.validate() + + expected_reads = { + "A": [sbs.Range.from_string("0, 0")], + } + expected_writes = { + "B": [sbs.Range.from_string("0")], + } + read_set, write_set = state._read_and_write_sets() + + for expected_sets, computed_sets in [(expected_reads, read_set), (expected_writes, write_set)]: + assert expected_sets.keys() == computed_sets.keys(), f"Expected the set to contain '{expected_sets.keys()}' but got '{computed_sets.keys()}'." + for access_data in expected_sets.keys(): + for exp in expected_sets[access_data]: + found_match = False + for res in computed_sets[access_data]: + if res == exp: + found_match = True + break + assert found_match, f"Could not find the subset '{exp}' only got '{computed_sets}'" + + def test_add_mapped_tasklet(): sdfg = dace.SDFG("test_add_mapped_tasklet") state = sdfg.add_state(is_start_block=True) @@ -82,6 +169,8 @@ def test_add_mapped_tasklet(): if __name__ == '__main__': + test_read_and_write_set_selection() + test_read_and_write_set_filter() test_read_write_set() test_read_write_set_y_formation() test_deepcopy_state() diff --git a/tests/transformations/move_loop_into_map_test.py b/tests/transformations/move_loop_into_map_test.py index dca775bb7a..ad51941cb0 100644 --- a/tests/transformations/move_loop_into_map_test.py +++ b/tests/transformations/move_loop_into_map_test.py @@ -2,6 +2,7 @@ import dace from dace.transformation.interstate import MoveLoopIntoMap import unittest +import copy import numpy as np I = dace.symbol("I") @@ -147,7 +148,12 @@ def test_apply_multiple_times_1(self): self.assertTrue(np.allclose(val, ref)) def test_more_than_a_map(self): - """ `out` is read and written indirectly by the MapExit, potentially leading to a RW dependency. """ + """ + `out` is read and written indirectly by the MapExit, potentially leading to a RW dependency. + + Note that there is actually no dependency, however, the transformation, because it relies + on `SDFGState.read_and_write_sets()` it can not detect this and can thus not be applied. + """ sdfg = dace.SDFG('more_than_a_map') _, aarr = sdfg.add_array('A', (3, 3), dace.float64) _, barr = sdfg.add_array('B', (3, 3), dace.float64) @@ -167,11 +173,12 @@ def test_more_than_a_map(self): external_edges=True, input_nodes=dict(out=oread, B=bread), output_nodes=dict(tmp=twrite)) - body.add_nedge(aread, oread, dace.Memlet.from_array('A', aarr)) + body.add_nedge(aread, oread, dace.Memlet.from_array('A', oarr)) body.add_nedge(twrite, owrite, dace.Memlet.from_array('out', oarr)) sdfg.add_loop(None, body, None, '_', '0', '_ < 10', '_ + 1') - count = sdfg.apply_transformations(MoveLoopIntoMap) - self.assertFalse(count > 0) + + count = sdfg.apply_transformations(MoveLoopIntoMap, validate_all=True, validate=True) + self.assertTrue(count == 0) def test_more_than_a_map_1(self): """ @@ -269,6 +276,55 @@ def test_more_than_a_map_3(self): count = sdfg.apply_transformations(MoveLoopIntoMap) self.assertFalse(count > 0) + def test_more_than_a_map_4(self): + """ + The test is very similar to `test_more_than_a_map()`. But a memlet is different + which leads to a RW dependency, which blocks the transformation. + """ + sdfg = dace.SDFG('more_than_a_map') + _, aarr = sdfg.add_array('A', (3, 3), dace.float64) + _, barr = sdfg.add_array('B', (3, 3), dace.float64) + _, oarr = sdfg.add_array('out', (3, 3), dace.float64) + _, tarr = sdfg.add_array('tmp', (3, 3), dace.float64, transient=True) + body = sdfg.add_state('map_state') + aread = body.add_access('A') + oread = body.add_access('out') + bread = body.add_access('B') + twrite = body.add_access('tmp') + owrite = body.add_access('out') + body.add_mapped_tasklet('op', + dict(i='0:3', j='0:3'), + dict(__in1=dace.Memlet('out[i, j]'), __in2=dace.Memlet('B[i, j]')), + '__out = __in1 - __in2', + dict(__out=dace.Memlet('tmp[i, j]')), + external_edges=True, + input_nodes=dict(out=oread, B=bread), + output_nodes=dict(tmp=twrite)) + body.add_nedge(aread, oread, dace.Memlet('A[Mod(_, 3), 0:3] -> [Mod(_ + 1, 3), 0:3]', aarr)) + body.add_nedge(twrite, owrite, dace.Memlet.from_array('out', oarr)) + sdfg.add_loop(None, body, None, '_', '0', '_ < 10', '_ + 1') + + sdfg_args_ref = { + "A": np.array(np.random.rand(3, 3), dtype=np.float64), + "B": np.array(np.random.rand(3, 3), dtype=np.float64), + "out": np.array(np.random.rand(3, 3), dtype=np.float64), + } + sdfg_args_res = copy.deepcopy(sdfg_args_ref) + + # Perform the reference execution + sdfg(**sdfg_args_ref) + + # Apply the transformation and execute the SDFG again. + count = sdfg.apply_transformations(MoveLoopIntoMap, validate_all=True, validate=True) + sdfg(**sdfg_args_res) + + for name in sdfg_args_ref.keys(): + self.assertTrue( + np.allclose(sdfg_args_ref[name], sdfg_args_res[name]), + f"Miss match for {name}", + ) + self.assertFalse(count > 0) + if __name__ == '__main__': unittest.main() diff --git a/tests/transformations/prune_connectors_test.py b/tests/transformations/prune_connectors_test.py index 63bbe5843f..b7b287d77e 100644 --- a/tests/transformations/prune_connectors_test.py +++ b/tests/transformations/prune_connectors_test.py @@ -153,7 +153,6 @@ def _make_read_write_sdfg( Depending on `conforming_memlet` the memlet that copies `inner_A` into `inner_B` will either be associated to `inner_A` (`True`) or `inner_B` (`False`). - This choice has consequences on if the transformation can apply or not. Notes: This is most likely a bug, see [issue#1643](https://github.com/spcl/dace/issues/1643), @@ -332,16 +331,6 @@ def test_unused_retval_2(): assert np.allclose(a, 1) -def test_read_write_1(): - # Because the memlet is conforming, we can apply the transformation. - sdfg = _make_read_write_sdfg(True) - - assert first_mode == PruneConnectors.can_be_applied_to(nsdfg=nsdfg, sdfg=osdfg, expr_index=0, permissive=False) - - - - - def test_prune_connectors_with_dependencies(): sdfg = dace.SDFG('tester') A, A_desc = sdfg.add_array('A', [4], dace.float64) @@ -420,18 +409,11 @@ def test_prune_connectors_with_dependencies(): assert np.allclose(np_d, np_d_) -def test_read_write_1(): - # Because the memlet is conforming, we can apply the transformation. +def test_read_write(): sdfg, nsdfg = _make_read_write_sdfg(True) + assert not PruneConnectors.can_be_applied_to(nsdfg=nsdfg, sdfg=sdfg, expr_index=0, permissive=False) - assert PruneConnectors.can_be_applied_to(nsdfg=nsdfg, sdfg=sdfg, expr_index=0, permissive=False) - sdfg.apply_transformations_repeated(PruneConnectors, validate=True, validate_all=True) - - -def test_read_write_2(): - # Because the memlet is not conforming, we can not apply the transformation. sdfg, nsdfg = _make_read_write_sdfg(False) - assert not PruneConnectors.can_be_applied_to(nsdfg=nsdfg, sdfg=sdfg, expr_index=0, permissive=False) diff --git a/tests/transformations/refine_nested_access_test.py b/tests/transformations/refine_nested_access_test.py index d9fb9a7392..81640665ed 100644 --- a/tests/transformations/refine_nested_access_test.py +++ b/tests/transformations/refine_nested_access_test.py @@ -156,7 +156,115 @@ def inner_sdfg(A: dace.int32[5], B: dace.int32[5, 5], idx_a: int, idx_b: int): assert np.allclose(ref, val) +def _make_rna_read_and_write_set_sdfg(diff_in_out: bool) -> dace.SDFG: + """Generates the SDFG for the `test_rna_read_and_write_sets_*()` tests. + + If `diff_in_out` is `False` then the output is also used as temporary storage + within the nested SDFG. Because of the definition of the read/write sets, + this usage of the temporary storage is not picked up and it is only considered + as write set. + + If `diff_in_out` is true, then a different storage container, which is classified + as output, is used as temporary storage. + + This test was added during [PR#1678](https://github.com/spcl/dace/pull/1678). + """ + + def _make_nested_sdfg(diff_in_out: bool) -> dace.SDFG: + sdfg = dace.SDFG("inner_sdfg") + state = sdfg.add_state(is_start_block=True) + sdfg.add_array("A", dtype=dace.float64, shape=(2,), transient=False) + sdfg.add_array("T1", dtype=dace.float64, shape=(2,), transient=False) + + A = state.add_access("A") + T1_output = state.add_access("T1") + if diff_in_out: + sdfg.add_array("T2", dtype=dace.float64, shape=(2,), transient=False) + T1_input = state.add_access("T2") + else: + T1_input = state.add_access("T1") + + tsklt = state.add_tasklet( + "comp", + inputs={"__in1": None, "__in2": None}, + outputs={"__out": None}, + code="__out = __in1 + __in2", + ) + + state.add_edge(A, None, tsklt, "__in1", dace.Memlet("A[1]")) + # An alternative would be to write to a different location here. + # Then, the data would be added to the access node. + state.add_edge(A, None, T1_input, None, dace.Memlet("A[0] -> [0]")) + state.add_edge(T1_input, None, tsklt, "__in2", dace.Memlet(T1_input.data + "[0]")) + state.add_edge(tsklt, "__out", T1_output, None, dace.Memlet(T1_output.data + "[1]")) + return sdfg + + sdfg = dace.SDFG("Parent_SDFG") + state = sdfg.add_state(is_start_block=True) + + sdfg.add_array("A", dtype=dace.float64, shape=(2,), transient=False) + sdfg.add_array("T1", dtype=dace.float64, shape=(2,), transient=False) + sdfg.add_array("T2", dtype=dace.float64, shape=(2,), transient=False) + A = state.add_access("A") + T1 = state.add_access("T1") + + nested_sdfg = _make_nested_sdfg(diff_in_out) + + nsdfg = state.add_nested_sdfg( + nested_sdfg, + parent=sdfg, + inputs={"A"}, + outputs={"T2", "T1"} if diff_in_out else {"T1"}, + symbol_mapping={}, + ) + + state.add_edge(A, None, nsdfg, "A", dace.Memlet("A[0:2]")) + state.add_edge(nsdfg, "T1", T1, None, dace.Memlet("T1[0:2]")) + + if diff_in_out: + state.add_edge(nsdfg, "T2", state.add_access("T2"), None, dace.Memlet("T2[0:2]")) + sdfg.validate() + return sdfg + + +def test_rna_read_and_write_sets_doule_use(): + # The transformation does not apply because we access element `0` of both arrays that we + # pass inside the nested SDFG. + sdfg = _make_rna_read_and_write_set_sdfg(False) + nb_applied = sdfg.apply_transformations_repeated( + [RefineNestedAccess], + validate=True, + validate_all=True, + ) + assert nb_applied == 0 + + +def test_rna_read_and_write_sets_different_storage(): + + # There is a dedicated temporary storage used. + sdfg = _make_rna_read_and_write_set_sdfg(True) + + nb_applied = sdfg.apply_transformations_repeated( + [RefineNestedAccess], + validate=True, + validate_all=True, + ) + assert nb_applied > 0 + + args = { + "A": np.array(np.random.rand(2), dtype=np.float64, copy=True), + "T2": np.array(np.random.rand(2), dtype=np.float64, copy=True), + "T1": np.zeros(2, dtype=np.float64), + } + ref = args["A"][0] + args["A"][1] + sdfg(**args) + res = args["T1"][1] + assert np.allclose(res, ref), f"Expected '{ref}' but got '{res}'." + + if __name__ == '__main__': test_refine_dataflow() test_refine_interstate() test_free_symbols_only_by_indices() + test_rna_read_and_write_sets_different_storage() + test_rna_read_and_write_sets_doule_use() From 0217f26ff89ea86944a83539c7c47568bd7463c2 Mon Sep 17 00:00:00 2001 From: Pratyai Mazumder Date: Thu, 24 Oct 2024 07:11:37 +0200 Subject: [PATCH 71/76] Make `is_empty()` and `propagate_subset()` not unnecessarily rely on the `src` and `dst` (#1699) --- dace/memlet.py | 20 +++++++++----------- dace/sdfg/propagation.py | 13 +++++++++---- 2 files changed, 18 insertions(+), 15 deletions(-) diff --git a/dace/memlet.py b/dace/memlet.py index 1e39b4179d..f78da3a6b7 100644 --- a/dace/memlet.py +++ b/dace/memlet.py @@ -230,7 +230,7 @@ def is_empty(self) -> bool: primarily used for connecting nodes to scopes without transferring data to them. """ - return (self.data is None and self.src_subset is None and self.dst_subset is None) + return (self.data is None and self.subset is None and self.other_subset is None) @property def num_accesses(self): @@ -561,20 +561,18 @@ def used_symbols(self, all_symbols: bool, edge=None) -> Set[str]: view_edge = True if not view_edge: - if self.src_subset: - result |= self.src_subset.free_symbols - - if self.dst_subset: - result |= self.dst_subset.free_symbols + if self.subset: + result |= self.subset.free_symbols + if self.other_subset: + result |= self.other_subset.free_symbols else: # View edges do not require the end of the range nor strides - if self.src_subset: - for rb, _, _ in self.src_subset.ndrange(): + if self.subset: + for rb, _, _ in self.subset.ndrange(): if symbolic.issymbolic(rb): result |= set(map(str, rb.free_symbols)) - - if self.dst_subset: - for rb, _, _ in self.dst_subset.ndrange(): + if self.other_subset: + for rb, _, _ in self.other_subset.ndrange(): if symbolic.issymbolic(rb): result |= set(map(str, rb.free_symbols)) diff --git a/dace/sdfg/propagation.py b/dace/sdfg/propagation.py index f62bb6eb58..a24db0c72b 100644 --- a/dace/sdfg/propagation.py +++ b/dace/sdfg/propagation.py @@ -1430,10 +1430,15 @@ def propagate_subset(memlets: List[Memlet], tmp_subset = None subset = None - if use_dst and md.dst_subset is not None: - subset = md.dst_subset - elif not use_dst and md.src_subset is not None: - subset = md.src_subset + src, dst = md.subset, md.other_subset + if md._is_data_src is not None: + # Ideally, this should always be the case. In practice, it is not always so. So, if the memlet is uninitialized + # for some reason, we just explicitly fallback to `subset` and `other_subset` to retain the prior behaviour. + src, dst = md.src_subset, md.dst_subset + if use_dst and dst is not None: + subset = dst + elif not use_dst and src is not None: + subset = src else: subset = md.subset From 4f5655390e17010d93bc64f35bacef762e4aef98 Mon Sep 17 00:00:00 2001 From: iBug Date: Thu, 24 Oct 2024 22:46:53 +0800 Subject: [PATCH 72/76] fix(codegen/prettycode): Use base_indentation as intended (#1697) --- dace/codegen/prettycode.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dace/codegen/prettycode.py b/dace/codegen/prettycode.py index de143f5e86..0fc4ebe3f1 100644 --- a/dace/codegen/prettycode.py +++ b/dace/codegen/prettycode.py @@ -14,7 +14,7 @@ class CodeIOStream(StringIO): nodes. """ def __init__(self, base_indentation=0): super(CodeIOStream, self).__init__() - self._indent = 0 + self._indent = base_indentation self._spaces = int(Config.get('compiler', 'indentation_spaces')) self._lineinfo = Config.get_bool('compiler', 'codegen_lineinfo') From 2bf537a2c8e9764baeaf6fd0b978bbe0486dbfc3 Mon Sep 17 00:00:00 2001 From: Tal Ben-Nun Date: Thu, 24 Oct 2024 08:17:24 -0700 Subject: [PATCH 73/76] Rename `master` branch to `main` --- .github/workflows/fpga-ci.yml | 6 +++--- .github/workflows/general-ci.yml | 6 +++--- .github/workflows/gpu-ci.yml | 6 +++--- .github/workflows/heterogeneous-ci.yml | 6 +++--- .github/workflows/pyFV3-ci.yml | 6 +++--- CONTRIBUTING.md | 2 +- README.md | 18 +++++++++--------- dace/frontend/python/README.md | 2 +- doc/codegen/codegen.rst | 6 +++--- doc/extensions/extensions.rst | 8 ++++---- doc/frontend/daceprograms.rst | 4 ++-- doc/ide/cli.rst | 2 +- doc/optimization/gpu.rst | 4 ++-- doc/optimization/optimization.rst | 6 +++--- doc/optimization/profiling.rst | 4 ++-- doc/optimization/vscode.rst | 2 +- doc/sdfg/ir.rst | 2 +- doc/sdfg/transformations.rst | 2 +- doc/setup/integration.rst | 2 +- doc/setup/quickstart.rst | 4 ++-- tutorials/benchmarking.ipynb | 2 +- 21 files changed, 50 insertions(+), 50 deletions(-) diff --git a/.github/workflows/fpga-ci.yml b/.github/workflows/fpga-ci.yml index d03d044b30..29be0ec1f1 100644 --- a/.github/workflows/fpga-ci.yml +++ b/.github/workflows/fpga-ci.yml @@ -2,11 +2,11 @@ name: FPGA Tests on: push: - branches: [ master, ci-fix ] + branches: [ main, ci-fix ] pull_request: - branches: [ master, ci-fix ] + branches: [ main, ci-fix ] merge_group: - branches: [ master, ci-fix ] + branches: [ main, ci-fix ] jobs: test-fpga: diff --git a/.github/workflows/general-ci.yml b/.github/workflows/general-ci.yml index f7b44e6978..2dcffc6484 100644 --- a/.github/workflows/general-ci.yml +++ b/.github/workflows/general-ci.yml @@ -2,11 +2,11 @@ name: General Tests on: push: - branches: [ master, ci-fix ] + branches: [ main, ci-fix ] pull_request: - branches: [ master, ci-fix ] + branches: [ main, ci-fix ] merge_group: - branches: [ master, ci-fix ] + branches: [ main, ci-fix ] jobs: test: diff --git a/.github/workflows/gpu-ci.yml b/.github/workflows/gpu-ci.yml index ce7f9b628e..2a1ccb43ef 100644 --- a/.github/workflows/gpu-ci.yml +++ b/.github/workflows/gpu-ci.yml @@ -2,11 +2,11 @@ name: GPU Tests on: push: - branches: [ master, ci-fix ] + branches: [ main, ci-fix ] pull_request: - branches: [ master, ci-fix ] + branches: [ main, ci-fix ] merge_group: - branches: [ master, ci-fix ] + branches: [ main, ci-fix ] env: CUDACXX: /usr/local/cuda/bin/nvcc diff --git a/.github/workflows/heterogeneous-ci.yml b/.github/workflows/heterogeneous-ci.yml index 7c65e90718..5f7dbff77e 100644 --- a/.github/workflows/heterogeneous-ci.yml +++ b/.github/workflows/heterogeneous-ci.yml @@ -2,11 +2,11 @@ name: Heterogeneous Tests on: push: - branches: [ master, ci-fix ] + branches: [ main, ci-fix ] pull_request: - branches: [ master, ci-fix ] + branches: [ main, ci-fix ] merge_group: - branches: [ master, ci-fix ] + branches: [ main, ci-fix ] env: CUDA_HOME: /usr/local/cuda diff --git a/.github/workflows/pyFV3-ci.yml b/.github/workflows/pyFV3-ci.yml index 2b98327381..f58fdf85ac 100644 --- a/.github/workflows/pyFV3-ci.yml +++ b/.github/workflows/pyFV3-ci.yml @@ -2,11 +2,11 @@ name: NASA/NOAA pyFV3 repository build test on: push: - branches: [ master, ci-fix ] + branches: [ main, ci-fix ] pull_request: - branches: [ master, ci-fix ] + branches: [ main, ci-fix ] merge_group: - branches: [ master, ci-fix ] + branches: [ main, ci-fix ] defaults: run: diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 6bf69495b1..313b3f0f21 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -47,7 +47,7 @@ For automatic styling, we use the [yapf](https://github.com/google/yapf) file fo We use [pytest](https://www.pytest.org/) for our testing infrastructure. All tests under the `tests/` folder (and any subfolders within) are automatically read and run. The files must be under the right subfolder based on the component being tested (e.g., `tests/sdfg/` for IR-related tests), and must have the right -suffix: either `*_test.py` or `*_cudatest.py`. See [pytest.ini](https://github.com/spcl/dace/blob/master/pytest.ini) +suffix: either `*_test.py` or `*_cudatest.py`. See [pytest.ini](https://github.com/spcl/dace/blob/main/pytest.ini) for more information, and for the markers we use to specify software/hardware requirements. The structure of the test file must follow `pytest` standards (i.e., free functions called `test_*`), and diff --git a/README.md b/README.md index 41b059c953..ef4bdec1db 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ [![FPGA Tests](https://github.com/spcl/dace/actions/workflows/fpga-ci.yml/badge.svg)](https://github.com/spcl/dace/actions/workflows/fpga-ci.yml) [![Documentation Status](https://readthedocs.org/projects/spcldace/badge/?version=latest)](https://spcldace.readthedocs.io/en/latest/?badge=latest) [![PyPI version](https://badge.fury.io/py/dace.svg)](https://badge.fury.io/py/dace) -[![codecov](https://codecov.io/gh/spcl/dace/branch/master/graph/badge.svg)](https://codecov.io/gh/spcl/dace) +[![codecov](https://codecov.io/gh/spcl/dace/branch/main/graph/badge.svg)](https://codecov.io/gh/spcl/dace) ![D](dace.svg)aCe - Data-Centric Parallel Programming @@ -11,7 +11,7 @@ _Decoupling domain science from performance optimization._ -DaCe is a [fast](https://nbviewer.org/github/spcl/dace/blob/master/tutorials/benchmarking.ipynb) parallel programming +DaCe is a [fast](https://nbviewer.org/github/spcl/dace/blob/main/tutorials/benchmarking.ipynb) parallel programming framework that takes code in Python/NumPy and other programming languages, and maps it to high-performance **CPU, GPU, and FPGA** programs, which can be optimized to achieve state-of-the-art. Internally, DaCe uses the Stateful DataFlow multiGraph (SDFG) *data-centric intermediate @@ -61,13 +61,13 @@ be used in any C ABI compatible language (C/C++, FORTRAN, etc.). For more information on how to use DaCe, see the [samples](samples) or tutorials below: -* [Getting Started](https://nbviewer.jupyter.org/github/spcl/dace/blob/master/tutorials/getting_started.ipynb) -* [Benchmarks, Instrumentation, and Performance Comparison with Other Python Compilers](https://nbviewer.jupyter.org/github/spcl/dace/blob/master/tutorials/benchmarking.ipynb) -* [Explicit Dataflow in Python](https://nbviewer.jupyter.org/github/spcl/dace/blob/master/tutorials/explicit.ipynb) -* [NumPy API Reference](https://nbviewer.jupyter.org/github/spcl/dace/blob/master/tutorials/numpy_frontend.ipynb) -* [SDFG API](https://nbviewer.jupyter.org/github/spcl/dace/blob/master/tutorials/sdfg_api.ipynb) -* [Using and Creating Transformations](https://nbviewer.jupyter.org/github/spcl/dace/blob/master/tutorials/transformations.ipynb) -* [Extending the Code Generator](https://nbviewer.jupyter.org/github/spcl/dace/blob/master/tutorials/codegen.ipynb) +* [Getting Started](https://nbviewer.jupyter.org/github/spcl/dace/blob/main/tutorials/getting_started.ipynb) +* [Benchmarks, Instrumentation, and Performance Comparison with Other Python Compilers](https://nbviewer.jupyter.org/github/spcl/dace/blob/main/tutorials/benchmarking.ipynb) +* [Explicit Dataflow in Python](https://nbviewer.jupyter.org/github/spcl/dace/blob/main/tutorials/explicit.ipynb) +* [NumPy API Reference](https://nbviewer.jupyter.org/github/spcl/dace/blob/main/tutorials/numpy_frontend.ipynb) +* [SDFG API](https://nbviewer.jupyter.org/github/spcl/dace/blob/main/tutorials/sdfg_api.ipynb) +* [Using and Creating Transformations](https://nbviewer.jupyter.org/github/spcl/dace/blob/main/tutorials/transformations.ipynb) +* [Extending the Code Generator](https://nbviewer.jupyter.org/github/spcl/dace/blob/main/tutorials/codegen.ipynb) Publication ----------- diff --git a/dace/frontend/python/README.md b/dace/frontend/python/README.md index bd57e36519..aa176f687c 100644 --- a/dace/frontend/python/README.md +++ b/dace/frontend/python/README.md @@ -4,7 +4,7 @@ The Python-Frontend aims to assist users in creating SDFGs from Python code relatively quickly. You may read a list of supported Python features [here](python_supported_features.md). The frontend supports also operations among DaCe arrays, in a manner similar to NumPy. A short tutorial can be bound -[here](https://nbviewer.jupyter.org/github/spcl/dace/blob/master/tutorials/numpy_frontend.ipynb). +[here](https://nbviewer.jupyter.org/github/spcl/dace/blob/main/tutorials/numpy_frontend.ipynb). Please note that the Python-Frontend is still in an early version. For any issues and feature requests, you can create an issue in the main DaCe project. You can also address any questions you have to alziogas@inf.ethz.ch diff --git a/doc/codegen/codegen.rst b/doc/codegen/codegen.rst index a000022ee6..f3058c1440 100644 --- a/doc/codegen/codegen.rst +++ b/doc/codegen/codegen.rst @@ -32,8 +32,8 @@ There are many features that are enabled by generating code from SDFGs: .. note:: - You can also extend the code generator with new backends externally, see the `Customizing Code Generation tutorial `_ - and the `Tensor Core sample `_ for more information. + You can also extend the code generator with new backends externally, see the `Customizing Code Generation tutorial `_ + and the `Tensor Core sample `_ for more information. After the code is generated, ``compiler.py`` will invoke CMake on the build folder (e.g., ``.dacecache//build``) @@ -145,7 +145,7 @@ necessary headers. The runtime is used for: match Python interfaces. This is especially useful to generate matching code when calling functions such as ``range`` inside Tasklets. -The folder also contains other files and helper functions, refer to its contents `on GitHub `_ +The folder also contains other files and helper functions, refer to its contents `on GitHub `_ for more information. diff --git a/doc/extensions/extensions.rst b/doc/extensions/extensions.rst index 4644bef109..3f73a924bc 100644 --- a/doc/extensions/extensions.rst +++ b/doc/extensions/extensions.rst @@ -17,10 +17,10 @@ The three key mechanisms of extensibility are class inheritance, :ref:`replaceme For more examples of how to extend DaCe, see the following resources: - * Library nodes: `Einsum specialization library node `_ - * Transformations: `Using and Creating Transformations `_ - * Code generators: `Extending the Code Generator `_ - * Frontend extensions (enumerations and replacements): `Tensor Core code sample `_ + * Library nodes: `Einsum specialization library node `_ + * Transformations: `Using and Creating Transformations `_ + * Code generators: `Extending the Code Generator `_ + * Frontend extensions (enumerations and replacements): `Tensor Core code sample `_ .. .. toctree .. :maxdepth: 1 diff --git a/doc/frontend/daceprograms.rst b/doc/frontend/daceprograms.rst index c21ac34722..4229fe422d 100644 --- a/doc/frontend/daceprograms.rst +++ b/doc/frontend/daceprograms.rst @@ -9,7 +9,7 @@ This includes standard Python code (loops, functions, context managers, etc.), b and (most) functions. .. note:: - For more examples, see the `Getting Started `_ + For more examples, see the `Getting Started `_ Jupyter Notebook tutorial. Usage @@ -349,7 +349,7 @@ Explicit Dataflow Mode The DaCe Python frontend allows users to write SDFG tasklets and memlets directly in Python code. -For more example uses, see the `Explicit Dataflow `_ +For more example uses, see the `Explicit Dataflow `_ tutorial. Memlets diff --git a/doc/ide/cli.rst b/doc/ide/cli.rst index d73d32fdfc..1f63397841 100644 --- a/doc/ide/cli.rst +++ b/doc/ide/cli.rst @@ -123,4 +123,4 @@ nothing is given, the tool will time the entire execution of each program using +---------------------------+--------------+-----------------------------------------------------------+ For a more detailed guide on how to profile SDFGs and work with the resulting data, see :ref:`profiling` and -`this tutorial `_. +`this tutorial `_. diff --git a/doc/optimization/gpu.rst b/doc/optimization/gpu.rst index a08877de3b..f94d377b51 100644 --- a/doc/optimization/gpu.rst +++ b/doc/optimization/gpu.rst @@ -170,7 +170,7 @@ Optimizing GPU SDFGs When optimizing GPU SDFGs, there are a few things to keep in mind. Below is a non-exhaustive list of common GPU optimization practices and how DaCe helps achieve them. To see some of these optimizations in action, check out the ``optimize_for_gpu`` -function in the `Matrix Multiplication optimization example `_. +function in the `Matrix Multiplication optimization example `_. * **Minimize host<->GPU transfers**: It is important to keep as much data as possible on the GPU across the application. This is especially true for data that is accessed frequently, such as data that is used in a loop. @@ -234,7 +234,7 @@ function in the `Matrix Multiplication optimization example `_ + in your code. See the `Tensor Core code sample `_ to see how to make use of such units. * **Advanced GPU Map schedules**: DaCe provides two additional built-in map schedules: :class:`~dace.dtypes.ScheduleType.GPU_ThreadBlock_Dynamic` diff --git a/doc/optimization/optimization.rst b/doc/optimization/optimization.rst index f1eb84005b..592ab5e6fc 100644 --- a/doc/optimization/optimization.rst +++ b/doc/optimization/optimization.rst @@ -36,9 +36,9 @@ tunes the data layout of arrays. The following resources are available to help you optimize your SDFG: - * Using transformations: `Using and Creating Transformations `_ - * Creating optimized schedules that can match optimized libraries: `Matrix multiplication CPU and GPU optimization example `_ - * Auto-tuning and instrumentation: `Tuning data layouts sample `_ + * Using transformations: `Using and Creating Transformations `_ + * Creating optimized schedules that can match optimized libraries: `Matrix multiplication CPU and GPU optimization example `_ + * Auto-tuning and instrumentation: `Tuning data layouts sample `_ The following subsections provide more information on the different types of optimization methods: diff --git a/doc/optimization/profiling.rst b/doc/optimization/profiling.rst index 497dc81ae8..617b3a9cb9 100644 --- a/doc/optimization/profiling.rst +++ b/doc/optimization/profiling.rst @@ -5,7 +5,7 @@ Profiling and Instrumentation .. note:: - For more information and examples, see the `Benchmarking and Instrumentation `_ tutorial. + For more information and examples, see the `Benchmarking and Instrumentation `_ tutorial. Simple profiling ---------------- @@ -120,7 +120,7 @@ There are more instrumentation types available, such as fine-grained GPU kernel Instrumentation can also collect performance counters on CPUs and GPUs using `LIKWID `_. The :class:`~dace.dtypes.InstrumentationType.LIKWID_Counters` instrumentation type can be configured to collect a wide variety of performance counters on CPUs and GPUs. An example use can be found in the -`LIKWID instrumentation code sample `_. +`LIKWID instrumentation code sample `_. Instrumentation file format diff --git a/doc/optimization/vscode.rst b/doc/optimization/vscode.rst index 1b72effbcc..07f7797b4e 100644 --- a/doc/optimization/vscode.rst +++ b/doc/optimization/vscode.rst @@ -145,5 +145,5 @@ transformations |add-xform-by-folder-btn|. The latter recursively traverses the for any Python source code files and attempts to load each one as a transformation. For more information on how to use and author data-centric transformations, -see :ref:`transforming` and the `Using and Creating Transformations `_ +see :ref:`transforming` and the `Using and Creating Transformations `_ tutorial. diff --git a/doc/sdfg/ir.rst b/doc/sdfg/ir.rst index 61dc8d4858..1a7a8368cb 100644 --- a/doc/sdfg/ir.rst +++ b/doc/sdfg/ir.rst @@ -627,7 +627,7 @@ override default implementations for a library node type, or for an entire libra Internally, an expansion is a subclass of :class:`~dace.transformation.transformation.ExpandTransformation`. It is responsible for creating a new SDFG that implements the library node, and for connecting the inputs and outputs of the library node to the new SDFG. An example of such an expansion is Einstein summation specialization -(`see full file `_): +(`see full file `_): .. code-block:: python diff --git a/doc/sdfg/transformations.rst b/doc/sdfg/transformations.rst index 0a9791ca66..470d413271 100644 --- a/doc/sdfg/transformations.rst +++ b/doc/sdfg/transformations.rst @@ -23,7 +23,7 @@ All transformations extend the :class:`~dace.transformation.transformation.Trans Transformations can have properties and those can be used when applying them: for example, tile sizes in :class:`~dace.transformation.dataflow.tiling.MapTiling`. -For more information on how to use and author data-centric transformations, see the `Using and Creating Transformations `_ +For more information on how to use and author data-centric transformations, see the `Using and Creating Transformations `_ tutorial. diff --git a/doc/setup/integration.rst b/doc/setup/integration.rst index 3e1fc5fa70..78607feda9 100644 --- a/doc/setup/integration.rst +++ b/doc/setup/integration.rst @@ -79,7 +79,7 @@ you to call the SDFG's entry point function, perform basic type checking, and ar Python callback to function pointer, etc.). Since the compiled SDFG is a low-level interface, it is much faster to call than the Python interface. -`We show this behavior in the Benchmarking tutorial `_. +`We show this behavior in the Benchmarking tutorial `_. However, it requires caution as opposed to calling the ``@dace.program`` or the ``SDFG`` object because: * Each array return value is represented internally as a single array (not reallocated every call) and will be diff --git a/doc/setup/quickstart.rst b/doc/setup/quickstart.rst index 4a54de720c..70f24cbfb1 100644 --- a/doc/setup/quickstart.rst +++ b/doc/setup/quickstart.rst @@ -36,5 +36,5 @@ From here on out, you can optimize (:ref:`interactively `, :ref:`program your code. -For more examples of how to use DaCe, see the `samples `_ and -`tutorials `_ folders on GitHub. +For more examples of how to use DaCe, see the `samples `_ and +`tutorials `_ folders on GitHub. diff --git a/tutorials/benchmarking.ipynb b/tutorials/benchmarking.ipynb index f2330957a3..59302e8090 100644 --- a/tutorials/benchmarking.ipynb +++ b/tutorials/benchmarking.ipynb @@ -1260,7 +1260,7 @@ "source": [ "### Instrumentation API\n", "\n", - "The Instrumentation API allows more fine-grained control over measuring program metrics. It creates a JSON report in `.dacecache//perf`, which can be obtained with the API or viewed with any Chrome Tracing capable viewer. More usage information and how to use the API to tune programs can be found in the [program tuning sample](https://github.com/spcl/dace/blob/master/samples/optimization/tuning.py)." + "The Instrumentation API allows more fine-grained control over measuring program metrics. It creates a JSON report in `.dacecache//perf`, which can be obtained with the API or viewed with any Chrome Tracing capable viewer. More usage information and how to use the API to tune programs can be found in the [program tuning sample](https://github.com/spcl/dace/blob/main/samples/optimization/tuning.py)." ] }, { From 057a6804ea2da60b053895e490cf230d0ef90225 Mon Sep 17 00:00:00 2001 From: Tal Ben-Nun Date: Thu, 24 Oct 2024 21:13:20 -0700 Subject: [PATCH 74/76] Use codecov tokens (#1707) --- .github/workflows/fpga-ci.yml | 3 +++ .github/workflows/general-ci.yml | 6 +++++- .github/workflows/gpu-ci.yml | 1 + .github/workflows/heterogeneous-ci.yml | 1 + 4 files changed, 10 insertions(+), 1 deletion(-) diff --git a/.github/workflows/fpga-ci.yml b/.github/workflows/fpga-ci.yml index 29be0ec1f1..ef8e5348da 100644 --- a/.github/workflows/fpga-ci.yml +++ b/.github/workflows/fpga-ci.yml @@ -8,6 +8,9 @@ on: merge_group: branches: [ main, ci-fix ] +env: + CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} + jobs: test-fpga: if: ${{ !contains(github.event.pull_request.labels.*.name, 'no-ci') }} diff --git a/.github/workflows/general-ci.yml b/.github/workflows/general-ci.yml index 2dcffc6484..faf0a727be 100644 --- a/.github/workflows/general-ci.yml +++ b/.github/workflows/general-ci.yml @@ -85,4 +85,8 @@ jobs: ./tests/polybench_test.sh ./tests/xform_test.sh coverage combine .; coverage report; coverage xml - ./codecov + + - uses: codecov/codecov-action@v4 + with: + token: ${{ secrets.CODECOV_TOKEN }} + verbose: true diff --git a/.github/workflows/gpu-ci.yml b/.github/workflows/gpu-ci.yml index 2a1ccb43ef..527e004478 100644 --- a/.github/workflows/gpu-ci.yml +++ b/.github/workflows/gpu-ci.yml @@ -11,6 +11,7 @@ on: env: CUDACXX: /usr/local/cuda/bin/nvcc MKLROOT: /opt/intel/oneapi/mkl/latest/ + CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} jobs: diff --git a/.github/workflows/heterogeneous-ci.yml b/.github/workflows/heterogeneous-ci.yml index 5f7dbff77e..99b566e21f 100644 --- a/.github/workflows/heterogeneous-ci.yml +++ b/.github/workflows/heterogeneous-ci.yml @@ -12,6 +12,7 @@ env: CUDA_HOME: /usr/local/cuda CUDACXX: nvcc MKLROOT: /opt/intel/oneapi/mkl/latest/ + CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} jobs: test-heterogeneous: From 813a2f435cacf509d43be8e109498f7526d06d0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20M=C3=BCller?= <147368808+philip-paul-mueller@users.noreply.github.com> Date: Fri, 25 Oct 2024 17:25:06 +0200 Subject: [PATCH 75/76] Modified `SDFGState.unordered_arglist()` (#1708) This PR fixes the way how arguments are detected in scopes. Technically this only affects GPU code generation, but it is a side effect of how the code is generated. In GPU mode a `Map` is translated into one kernel, for this reason a signature must be computed (this is the reason why CPU code generation is not affected, no function call is produced). To compute this signature the `unsorted_arglist()` function scans what is needed. However, this was implemented not correctly. Assume that AccessNode for array `A` is outside the map and inside the map a temporary scalar, `tmp_in` is defined and initialized to `tmp_in = A[__i0, __i1]`, see also this image: ![argliost_situation](https://github.com/user-attachments/assets/fdf54dea-4ef5-49be-8ce2-33b78ce5962d) If the `data` property of the Memlet that connects the MapEntry with the AccessNode for `tmp_in` is referencing `A` then the (old) function would find that `A` is needed inside, although there is no AccessNode for `A` inside the map. If however, this Memlet referrers `tmp_in` (which is not super standard, but should be allowed), then the old version would not pick up. This would then lead to a code generation error. This PR modifies the function such that such cases are handled. This is done by following all edges that are adjacent to the MapEntry (from the inside) to where the are actually originate. --- dace/sdfg/state.py | 60 ++++++-- tests/codegen/argumet_signature_test.py | 197 ++++++++++++++++++++++++ 2 files changed, 247 insertions(+), 10 deletions(-) create mode 100644 tests/codegen/argumet_signature_test.py diff --git a/dace/sdfg/state.py b/dace/sdfg/state.py index 09e7607d65..b982dfd718 100644 --- a/dace/sdfg/state.py +++ b/dace/sdfg/state.py @@ -849,6 +849,8 @@ def unordered_arglist(self, for node in self.nodes(): if isinstance(node, nd.AccessNode): descs[node.data] = node.desc(sdfg) + # NOTE: In case of multiple nodes of the same data this will + # override previously found nodes. descs_with_nodes[node.data] = node if isinstance(node.desc(sdfg), dt.Scalar): scalars_with_nodes.add(node.data) @@ -865,19 +867,57 @@ def unordered_arglist(self, else: data_args[node.data] = desc - # Add data arguments from memlets, if do not appear in any of the nodes - # (i.e., originate externally) + # Add data arguments from memlets, if do not appear in any of the nodes (i.e., originate externally) + # TODO: Investigate is scanning the adjacent edges of the input and output connectors is better. for edge in self.edges(): - if edge.data.data is not None and edge.data.data not in descs: - desc = sdfg.arrays[edge.data.data] - if isinstance(desc, dt.Scalar): - # Ignore code->code edges. - if (isinstance(edge.src, nd.CodeNode) and isinstance(edge.dst, nd.CodeNode)): - continue + if edge.data.is_empty(): + continue + + elif edge.data.data not in descs: + # The edge reads data from the outside, and the Memlet is directly indicating what is read. + if (isinstance(edge.src, nd.CodeNode) and isinstance(edge.dst, nd.CodeNode)): + continue # Ignore code->code edges. + additional_descs = {edge.data.data: sdfg.arrays[edge.data.data]} + + elif isinstance(edge.dst, (nd.AccessNode, nd.CodeNode)) and isinstance(edge.src, nd.EntryNode): + # Special case from the above; An AccessNode reads data from the Outside, but + # the Memlet references the data on the inside. Thus we have to follow the data + # to where it originates from. + # NOTE: We have to use a memlet path, because we have to go "against the flow" + # Furthermore, in a valid SDFG the data will only come from one source anyway. + top_source_edge = self.graph.memlet_path(edge)[0] + if not isinstance(top_source_edge.src, nd.AccessNode): + continue + additional_descs = ( + {top_source_edge.src.data: top_source_edge.src.desc(sdfg)} + if top_source_edge.src.data not in descs + else {} + ) + + elif isinstance(edge.dst, nd.ExitNode) and isinstance(edge.src, (nd.AccessNode, nd.CodeNode)): + # Same case as above, but for outgoing Memlets. + # NOTE: We have to use a memlet tree here, because the data could potentially + # go to multiple sources. We have to do it this way, because if we would call + # `memlet_tree()` here, then we would just get the edge back. + additional_descs = {} + connector_to_look = "OUT_" + edge.dst_conn[3:] + for oedge in self.graph.out_edges_by_connector(edge.dst, connector_to_look): + if ( + (not oedge.data.is_empty()) and (oedge.data.data not in descs) + and (oedge.data.data not in additional_descs) + ): + additional_descs[oedge.data.data] = sdfg.arrays[oedge.data.data] + + else: + # Case is ignored. + continue - scalar_args[edge.data.data] = desc + # Now processing the list of newly found data. + for aname, additional_desc in additional_descs.items(): + if isinstance(additional_desc, dt.Scalar): + scalar_args[aname] = additional_desc else: - data_args[edge.data.data] = desc + data_args[aname] = additional_desc # Loop over locally-used data descriptors for name, desc in descs.items(): diff --git a/tests/codegen/argumet_signature_test.py b/tests/codegen/argumet_signature_test.py new file mode 100644 index 0000000000..376724439f --- /dev/null +++ b/tests/codegen/argumet_signature_test.py @@ -0,0 +1,197 @@ +import dace +import copy + +def test_argument_signature_test(): + """Tests if the argument signature is computed correctly. + + The test is focused on if data dependencies are picked up if they are only + referenced indirectly. This effect is only directly visible for GPU. + The test also runs on GPU, but will only compile for GPU. + """ + + def make_sdfg() -> dace.SDFG: + sdfg = dace.SDFG("Repr") + state = sdfg.add_state(is_start_block=True) + N = dace.symbol(sdfg.add_symbol("N", dace.int32)) + for name in "BC": + sdfg.add_array( + name=name, + dtype=dace.float64, + shape=(N, N), + strides=(N, 1), + transient=False, + ) + + # `A` uses a stride that is not used by any of the other arrays. + # However, the stride is used if we want to index array `A`. + second_stride_A = dace.symbol(sdfg.add_symbol("second_stride_A", dace.int32)) + sdfg.add_array( + name="A", + dtype=dace.float64, + shape=(N,), + strides=(second_stride_A,), + transient=False, + + ) + + # Also array `D` uses a stride that is not used by any other array. + second_stride_D = dace.symbol(sdfg.add_symbol("second_stride_D", dace.int32)) + sdfg.add_array( + name="D", + dtype=dace.float64, + shape=(N, N), + strides=(second_stride_D, 1), + transient=False, + + ) + + # Simplest way to generate a mapped Tasklet, we will later modify it. + state.add_mapped_tasklet( + "computation", + map_ranges={"__i0": "0:N", "__i1": "0:N"}, + inputs={ + "__in0": dace.Memlet("A[__i1]"), + "__in1": dace.Memlet("B[__i0, __i1]"), + }, + code="__out = __in0 + __in1", + outputs={"__out": dace.Memlet("C[__i0, __i1]")}, + external_edges=True, + ) + + # Instead of going from the MapEntry to the Tasklet we will go through + # an temporary AccessNode that is only used inside the map scope. + # Thus there is no direct reference to `A` inside the map scope, that would + # need `second_stride_A`. + sdfg.add_scalar("tmp_in", transient=True, dtype=dace.float64) + tmp_in = state.add_access("tmp_in") + for e in state.edges(): + if e.dst_conn == "__in0": + iedge = e + break + state.add_edge( + iedge.src, + iedge.src_conn, + tmp_in, + None, + # The important thing is that the Memlet, that connects the MapEntry with the + # AccessNode, does not refers to the memory outside (its source) but to the transient + # inside (its destination) + dace.Memlet(data="tmp_in", subset="0", other_subset="__i1"), # This does not work! + #dace.Memlet(data="A", subset="__i1", other_subset="0"), # This would work! + ) + state.add_edge( + tmp_in, + None, + iedge.dst, + iedge.dst_conn, + dace.Memlet(f"{tmp_in.data}[0]"), + ) + state.remove_edge(iedge) + + # Here we are doing something similar as for `A`, but this time for the output. + # The output of the Tasklet is stored inside a temporary scalar. + # From that scalar we then go to `C`, here the Memlet on the inside is still + # referring to `C`, thus it is referenced directly. + # We also add a second output that goes to `D` , but the inner Memlet does + # not refer to `D` but to the temporary. Thus there is no direct mention of + # `D` inside the map scope. + sdfg.add_scalar("tmp_out", transient=True, dtype=dace.float64) + tmp_out = state.add_access("tmp_out") + for e in state.edges(): + if e.src_conn == "__out": + oedge = e + assert oedge.data.data == "C" + break + + state.add_edge( + oedge.src, + oedge.src_conn, + tmp_out, + None, + dace.Memlet(data="tmp_out", subset="0"), + ) + state.add_edge( + tmp_out, + None, + oedge.dst, + oedge.dst_conn, + dace.Memlet(data="C", subset="__i0, __i1"), + ) + + # Now we create a new output that uses `tmp_out` but goes into `D`. + # The memlet on the inside will not use `D` but `tmp_out`. + state.add_edge( + tmp_out, + None, + oedge.dst, + "IN_D", + dace.Memlet(data=tmp_out.data, subset="0", other_subset="__i1, __i0"), + ) + state.add_edge( + oedge.dst, + "OUT_D", + state.add_access("D"), + None, + dace.Memlet(data="D", subset="__i0, __i1", other_subset="0"), + ) + oedge.dst.add_in_connector("IN_D", force=True) + oedge.dst.add_out_connector("OUT_D", force=True) + state.remove_edge(oedge) + + # Without this the test does not work properly + # It is related to [Issue#1703](https://github.com/spcl/dace/issues/1703) + sdfg.validate() + for edge in state.edges(): + edge.data.try_initialize(edge=edge, sdfg=sdfg, state=state) + + for array in sdfg.arrays.values(): + if isinstance(array, dace.data.Array): + array.storage = dace.StorageType.GPU_Global + else: + array.storage = dace.StorageType.Register + sdfg.apply_gpu_transformations(simplify=False) + sdfg.validate() + + return sdfg + + # Build the SDFG + sdfg = make_sdfg() + + map_entry = None + for state in sdfg.states(): + for node in state.nodes(): + if isinstance(node, dace.nodes.MapEntry): + map_entry = node + break + if map_entry is not None: + break + + # Now get the argument list of the map. + res_arglist = { k:v for k, v in state.scope_subgraph(map_entry).arglist().items()} + + ref_arglist = { + 'A': dace.data.Array, + 'B': dace.data.Array, + 'C': dace.data.Array, + 'D': dace.data.Array, + 'N': dace.data.Scalar, + 'second_stride_A': dace.data.Scalar, + 'second_stride_D': dace.data.Scalar, + } + + assert len(ref_arglist) == len(res_arglist), f"Expected {len(ref_arglist)} but got {len(res_arglist)}" + for aname in ref_arglist.keys(): + atype_ref = ref_arglist[aname] + atype_res = res_arglist[aname] + assert isinstance(atype_res, atype_ref), f"Expected '{aname}' to have type {atype_ref}, but it had {type(atype_res)}." + + # If we have cupy we will also compile it. + try: + import cupy as cp + except ImportError: + return + + csdfg = sdfg.compile() + +if __name__ == "__main__": + test_argument_signature_test() From 2070d393993e2db9d49e278a1052d6d9972cbb6d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 28 Oct 2024 17:53:01 -0700 Subject: [PATCH 76/76] Bump urllib3 from 2.0.7 to 2.2.2 (#1600) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 7332dc0419..3cc37cc468 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,6 +16,6 @@ ply==3.11 PyYAML==6.0.1 six==1.16.0 sympy==1.9 -urllib3==2.0.7 +urllib3==2.2.2 websockets==11.0.3 zipp==3.15.0