Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

simplify() can prduce incorrect transformation #1694

Open
pratyai opened this issue Oct 18, 2024 · 1 comment
Open

simplify() can prduce incorrect transformation #1694

pratyai opened this issue Oct 18, 2024 · 1 comment

Comments

@pratyai
Copy link
Collaborator

pratyai commented Oct 18, 2024

Describe the bug

Original Graph

Consider the graph produced by the following script:

def make_sdfg():
    g = SDFG('prog')
    g.add_array('A', (5, 5), dace.float32)
    g.add_array('b', (1,), dace.float32, transient=True)
    g.add_array('c', (5, 5), dace.float32, transient=True)
    g.add_array('d', (1,), dace.float32, transient=True)
    g.add_array('e', (5, 5), dace.float32, transient=True)

    st0 = g.add_state('st0', is_start_block=True)
    st = st0
    # Map 1
    A = st.add_access('A')
    en, ex = st.add_map('m0', {'i': '0:5', 'j': '0:5'})
    st.add_memlet_path(A, en, dst_conn='IN_A', memlet=Memlet(expr='A[i, j]'))
    b = st.add_access('b')
    st.add_memlet_path(en, b, src_conn='OUT_A', memlet=Memlet(expr='A[i, j] -> b[0]'))
    c = st.add_access('c')
    st.add_memlet_path(b, c, memlet=Memlet(expr='b[0] -> c[i, j]'))
    st.add_memlet_path(c, ex, dst_conn='IN_A', memlet=Memlet(expr='c[i, j] -> A[i, j]'))
    A = st.add_access('A')
    st.add_memlet_path(ex, A, src_conn='OUT_A', memlet=Memlet(expr='A[i, j]'))
    # Map 2
    en, ex = st.add_map('m1', {'i': '0:5', 'j': '0:5'})
    st.add_memlet_path(A, en, dst_conn='IN_A', memlet=Memlet(expr='A[i, j]'))
    d = st.add_access('d')
    st.add_memlet_path(en, d, src_conn='OUT_A', memlet=Memlet(expr='A[i, j] -> d[0]'))
    e = st.add_access('e')
    st.add_memlet_path(d, e, memlet=Memlet(expr='d[0] -> e[i, j]'))
    st.add_memlet_path(e, ex, dst_conn='IN_A', memlet=Memlet(expr='e[i, j] -> A[i, j]'))
    A = st.add_access('A')
    st.add_memlet_path(ex, A, src_conn='OUT_A', memlet=Memlet(expr='A[i, j]'))
    st0.fill_scope_connectors()

    g.validate()
    g.compile()
    return g

Which produces this graph (orig.sdfg): https://polybox.ethz.ch/index.php/s/1mnftP8zNIX4rtA
And compiles into:

void __program_prog_internal(prog_state_t*__state, float * __restrict__ A)
{

    {

        {
            #pragma omp parallel for
            for (auto i = 0; i < 5; i += 1) {
                for (auto j = 0; j < 5; j += 1) {
                    float b[1]  DACE_ALIGN(64);
                    float c[25]  DACE_ALIGN(64);

                    dace::CopyND<float, 1, false, 1>::template ConstDst<1>::Copy(
                    A + ((5 * i) + j), b, 1);

                    dace::CopyND<float, 1, false, 1>::template ConstDst<1>::Copy(
                    b, c + ((5 * i) + j), 1);

                    dace::CopyND<float, 1, false, 1>::template ConstDst<1>::Copy(
                    c + ((5 * i) + j), A + ((5 * i) + j), 1);
                }
            }
        }
        {
            #pragma omp parallel for
            for (auto i = 0; i < 5; i += 1) {
                for (auto j = 0; j < 5; j += 1) {
                    float d[1]  DACE_ALIGN(64);
                    float e[25]  DACE_ALIGN(64);

                    dace::CopyND<float, 1, false, 1>::template ConstDst<1>::Copy(
                    A + ((5 * i) + j), d, 1);

                    dace::CopyND<float, 1, false, 1>::template ConstDst<1>::Copy(
                    d, e + ((5 * i) + j), 1);

                    dace::CopyND<float, 1, false, 1>::template ConstDst<1>::Copy(
                    e + ((5 * i) + j), A + ((5 * i) + j), 1);
                }
            }
        }

    }
}

Simplified Graph

Now consider a simplification produced by the following script:

g = make_sdfg()
g.simplify()

Which produces this graph (simple.sdfg): https://polybox.ethz.ch/index.php/s/7gYiAxHijVnKTes
And compiles into:

void __program_prog_internal(prog_state_t*__state, float * __restrict__ A)
{

    {

        {
            #pragma omp parallel for
            for (auto i = 0; i < 5; i += 1) {
                for (auto j = 0; j < 5; j += 1) {
                    float *c;
                    c = new float DACE_ALIGN(64)[25];

                    dace::CopyND<float, 1, false, 1>::template ConstDst<1>::Copy(
                    A, c + ((5 * i) + j), 1);

                    dace::CopyND<float, 1, false, 1>::template ConstDst<1>::Copy(
                    c + ((5 * i) + j), A + ((5 * i) + j), 1);
                    delete[] c;
                }
            }
        }
        {
            #pragma omp parallel for
            for (auto i = 0; i < 5; i += 1) {
                for (auto j = 0; j < 5; j += 1) {
                    float *e;
                    e = new float DACE_ALIGN(64)[25];

                    dace::CopyND<float, 1, false, 1>::template ConstDst<1>::Copy(
                    A, e + ((5 * i) + j), 1);

                    dace::CopyND<float, 1, false, 1>::template ConstDst<1>::Copy(
                    e + ((5 * i) + j), A + ((5 * i) + j), 1);
                    delete[] e;
                }
            }
        }

    }
}

Problem

Observe that the first copies of each loop between the two versions, e.g.:

// Orig
                    dace::CopyND<float, 1, false, 1>::template ConstDst<1>::Copy(
                    A + ((5 * i) + j), b, 1);

vs.

// Simplified
                    dace::CopyND<float, 1, false, 1>::template ConstDst<1>::Copy(
                    A, c + ((5 * i) + j), 1);

I claim that this transformation is wrong, since only the A[0][0] element is going to be ever copied from. The correct version should be the following:

// Corrected Simplified
                    dace::CopyND<float, 1, false, 1>::template ConstDst<1>::Copy(
                    A + ((5 * i) + j), c + ((5 * i) + j), 1);
@pratyai
Copy link
Collaborator Author

pratyai commented Oct 18, 2024

I believe this is due to incorrectly setting the other_subset to None, since None is interpreted as subset but offset to 0 (as Tal mentioned on chat the other day).

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant