-
Notifications
You must be signed in to change notification settings - Fork 86
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Co-authored-by: Philip James-Roxby <phil.jamesroxby@gmail.com> Co-authored-by: pjr <pjr@xilinx.com> Co-authored-by: Jack Lo <36210336+jackl-xilinx@users.noreply.github.com> Co-authored-by: Kristof Denolf <kristof.denolf@amd.com> Co-authored-by: Joseph Melber <jgmelber@gmail.com> Co-authored-by: Andra Bisca <andra.bisca@gmail.com> Co-authored-by: AndraBisca <andrab@amd.com>
- Loading branch information
1 parent
1e4da96
commit 6354de5
Showing
19 changed files
with
686 additions
and
657 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
//===- scale.cc -------------------------------------------------*- C++ -*-===// | ||
// | ||
// This file is licensed under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
// Copyright (C) 2023, Advanced Micro Devices, Inc. | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#define __AIENGINE__ 2 | ||
#define NOCPP | ||
#define __AIEARCH__ 20 | ||
|
||
#include <stdint.h> | ||
#include <stdio.h> | ||
#include <stdlib.h> | ||
#include <type_traits> | ||
|
||
#include <aie_api/aie.hpp> | ||
|
||
template <typename T_in, typename T_out, const int N> | ||
void eltwise_mul(T_in *a, T_in *b, T_out *c) { | ||
for (int i = 0; i < N; i++) { | ||
c[i] = a[i] * b[i]; | ||
} | ||
} | ||
|
||
template <typename T_in, typename T_out, const int N> | ||
void eltwise_vmul(T_in *a, T_in *b, T_out *c) { | ||
|
||
constexpr int vec_factor = 16; | ||
event0(); | ||
T_in *__restrict pA1 = a; | ||
T_in *__restrict pB1 = b; | ||
T_out *__restrict pC1 = c; | ||
const int F = N / vec_factor; | ||
for (int i = 0; i < F; i++) | ||
chess_prepare_for_pipelining chess_loop_range(16, ) { | ||
aie::vector<T_in, vec_factor> A0 = aie::load_v<vec_factor>(pA1); | ||
pA1 += vec_factor; | ||
aie::vector<T_in, vec_factor> B0 = aie::load_v<vec_factor>(pB1); | ||
pB1 += vec_factor; | ||
aie::vector<T_out, vec_factor> cout = aie::mul(A0, B0); | ||
aie::store_v(pC1, cout); | ||
pC1 += vec_factor; | ||
} | ||
event1(); | ||
} | ||
|
||
extern "C" { | ||
|
||
void eltwise_mul_bf16_scalar(bfloat16 *a_in, bfloat16 *b_in, bfloat16 *c_out) { | ||
eltwise_mul<bfloat16, bfloat16, 1024>(a_in, b_in, c_out); | ||
} | ||
|
||
void eltwise_mul_bf16_vector(bfloat16 *a_in, bfloat16 *b_in, bfloat16 *c_out) { | ||
eltwise_vmul<bfloat16, bfloat16, 1024>(a_in, b_in, c_out); | ||
} | ||
|
||
} // extern "C" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
<!---//===- README.md --------------------------*- Markdown -*-===// | ||
// | ||
// This file is licensed under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
// Copyright (C) 2022, Advanced Micro Devices, Inc. | ||
// | ||
//===----------------------------------------------------------------------===//--> | ||
|
||
# <ins>Passthrough DMAs</ins> | ||
|
||
This reference design can be run on a RyzenAI NPU. | ||
|
||
In the [design](./aie2.py) data is brought from external memory to `ComputeTile2` and back, without modification from the tile, by using an implicit copy via the compute tile's Data Movement Accelerator (DMA). The data is read from and written to external memory through Shim tile (`col`, 0). | ||
|
||
The implicit copy is performed using the `object_fifo_link` operation that specifies how input data arriving via `of_in` should be sent further via `of_out` by specifically leveraging the compute tile's DMA. This operation and its functionality are described in more depth in [Section-2b](../../../programming_guide/section-2/section-2b/README.md/#object-fifo-link) of the programming guide. | ||
|
||
|
||
To compile and run the design for NPU: | ||
``` | ||
make | ||
make run | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.