diff --git a/AMD-license-agreement-for-non-commercial-models.md b/AMD-license-agreement-for-non-commercial-models.md index b598532..8a384de 100644 --- a/AMD-license-agreement-for-non-commercial-models.md +++ b/AMD-license-agreement-for-non-commercial-models.md @@ -3,7 +3,7 @@ LICENSE AGREEMENT FOR NON-COMMERCIAL MODELS Trained Models: -bincop-cnv, cnv-w1a1, cnv-w1a2, cnv-w2a2, kwsmlp-w3a3, mobilenetv1-w4a4, resnet50-w1a2, tfc-w1a1, tfc-w1a2, tfc-w2a2, unsw_nb15-mlp-w2a2, vgg10-radioml-w4a4 +bincop-cnv, cnv-w1a1, cnv-w1a2, cnv-w2a2, kwsmlp-w3a3, mobilenetv1-w4a4, resnet50-w1a2, tfc-w1a1, tfc-w1a2, tfc-w2a2, unsw_nb15-mlp-w2a2, vgg10-radioml-w4a4, cnv_1w1a_gtsrb.onnx This License Agreement for Non-Commercial Models (“Agreement”) is a legal agreement between you (either an individual or an entity) and Advanced Micro Devices, Inc. on behalf of itself and its subsidiaries and affiliates (collectively “AMD”). DO NOT USE THE TRAINED MODELS IDENTIFIED ABOVE UNTIL YOU HAVE CAREFULLY READ THIS AGREEMENT. BY USING, INSTALLING, MODIFYING, COPYING, TRAINING, BENCHMARKING, OR DISTRIBUTING THE TRAINED MODELS, YOU AGREE TO AND ACCEPT ALL TERMS AND CONDITIONS OF THIS AGREEMENT. If you do not accept these terms, do not use the Trained Models. diff --git a/AUTHORS.rst b/AUTHORS.rst index ba06e5d..f7616fa 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -2,9 +2,17 @@ Contributors ============ -* Yaman Umuroglu (@maltanar) (maintainer) +* Mirza Mrahorovic (@mmrahorovic) (maintainer) * Jakoba Petri-Koenig (@auphelia) +* Yaman Umuroglu (@maltanar) * Lucian Petrica (@quetric) * Tobias Alonso (@Tobi-Alonso) * Hendrik Borras (@HenniOVP) * Felix Paul Jentzsch (@felixpj) +* Aziz Bahri (@azizb-xlnx) +* John Monks (@jmonks-amd) +* Fionn O'Donohoe (@fionnodonohoe-xlnx) +* Radoslav Pitoňák (@rpitonak) +* Matthias Gehre (@mgehre-amd) +* NaelF (@NaelF) +* Tim Paine (@timkpaine) diff --git a/LICENSE b/LICENSE index 66e9909..788d4c6 100644 --- a/LICENSE +++ b/LICENSE @@ -1,20 +1,22 @@ BSD 3-Clause License -Copyright (c) 2023, Xilinx +Copyright (C) 2020-2022, Xilinx, Inc. +Copyright (C) 2022-2024, Advanced Micro Devices, Inc. +All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: -1. Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. -2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. -3. Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. +* Neither the name of FINN nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE diff --git a/build/README.md b/build/README.md index e284deb..7cdac19 100644 --- a/build/README.md +++ b/build/README.md @@ -16,7 +16,7 @@ to do this again in the future when the `finn-examples` repo gets updated and re Docker community edition `docker-ce`. 3. Set up the environment variables to point to your Vivado/Vitis installation, depending on your target platform(s): - * For Zynq platforms you'll need to set `VIVADO_PATH`, e.g. `VIVADO_PATH=/opt/xilinx/Vivado/2019.1/` + * For Zynq platforms you'll need to set `VIVADO_PATH`, e.g. `VIVADO_PATH=/opt/xilinx/Vivado/2022.2/` * For Alveo platforms you'll need to set `VITIS_PATH`, `PLATFORM_REPO_PATHS` and `XILINX_XRT` ## Build bitfiles diff --git a/build/bnn-pynq/README.md b/build/bnn-pynq/README.md index 0c2b65a..3ccb705 100644 --- a/build/bnn-pynq/README.md +++ b/build/bnn-pynq/README.md @@ -42,7 +42,7 @@ cd $FINN_EXAMPLES/build/finn The BNN-PYNQ networks are part of the [Brevitas examples](https://github.com/Xilinx/brevitas/tree/master/src/brevitas_examples/bnn_pynq). You can find the details on quantization, accuracy, layers used in the Brevitas repo, as well as the training scripts if you'd like to retrain them yourself. -Subsequently, those trained networks are [exported to ONNX](https://github.com/Xilinx/finn/blob/master/notebooks/basics/1_brevitas_network_import.ipynb). In addition, the particular versions +Subsequently, those trained networks are [exported to ONNX](https://github.com/Xilinx/finn/blob/main/notebooks/basics/1_brevitas_network_import_via_QONNX.ipynb). In addition, the particular versions used here have two additions, as described in the "Adding Pre- and Postprocessing" section of [this notebook](https://github.com/Xilinx/finn/blob/master/notebooks/end2end_example/bnn-pynq/tfc_end2end_example.ipynb): * A divide-by-255 node is added at the input, and the input is marked as 8-bit (to directly accept 8-bit images as input) diff --git a/build/bnn-pynq/build.py b/build/bnn-pynq/build.py index ea509bf..867410c 100644 --- a/build/bnn-pynq/build.py +++ b/build/bnn-pynq/build.py @@ -1,3 +1,31 @@ +# Copyright (C) 2024, Advanced Micro Devices, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + import finn.builder.build_dataflow as build import finn.builder.build_dataflow_config as build_cfg from finn.util.basic import alveo_default_platform @@ -54,12 +82,13 @@ def platform_to_shell(platform): cfg = build_cfg.DataflowBuildConfig( output_dir="output_%s_%s" % (model_name, release_platform_name), folding_config_file="folding_config/%s_folding_config.json" % model_name, - synth_clk_period_ns=10.0, + synth_clk_period_ns=5.0, board=platform_name, shell_flow_type=shell_flow_type, vitis_platform=vitis_platform, generate_outputs=[build_cfg.DataflowOutputType.BITFILE], save_intermediate_models=True, + default_swg_exception=True, ) model_file = "models/%s.onnx" % model_name # launch FINN compiler to build diff --git a/build/bnn-pynq/folding_config/cnv-w1a1_folding_config.json b/build/bnn-pynq/folding_config/cnv-w1a1_folding_config.json index 03b462d..0bb17e9 100644 --- a/build/bnn-pynq/folding_config/cnv-w1a1_folding_config.json +++ b/build/bnn-pynq/folding_config/cnv-w1a1_folding_config.json @@ -1,79 +1,87 @@ { "Defaults": {}, - "Thresholding_Batch_0": { - "PE": 1, - "ram_style": "distributed" + "Thresholding_rtl_0": { + "PE": 1 }, - "ConvolutionInputGenerator_0": { + "ConvolutionInputGenerator_rtl_0": { "SIMD": 3, "ram_style": "distributed" }, - "MatrixVectorActivation_0": { + "MVAU_hls_0": { "PE": 16, "SIMD": 3, - "ram_style": "auto" + "ram_style": "auto", + "resType": "lut" }, - "ConvolutionInputGenerator_1": { + "ConvolutionInputGenerator_rtl_1": { "SIMD": 32, "ram_style": "distributed" }, - "MatrixVectorActivation_1": { + "MVAU_hls_1": { "PE": 32, "SIMD": 32, - "ram_style": "auto" + "ram_style": "auto", + "resType": "lut" }, - "ConvolutionInputGenerator_2": { + "ConvolutionInputGenerator_rtl_2": { "SIMD": 32, "ram_style": "distributed" }, - "MatrixVectorActivation_2": { + "MVAU_hls_2": { "PE": 16, "SIMD": 32, - "ram_style": "auto" + "ram_style": "auto", + "resType": "lut" }, - "ConvolutionInputGenerator_3": { + "ConvolutionInputGenerator_rtl_3": { "SIMD": 32, "ram_style": "distributed" }, - "MatrixVectorActivation_3": { + "MVAU_hls_3": { "PE": 16, "SIMD": 32, - "ram_style": "auto" + "ram_style": "auto", + "resType": "lut" }, - "ConvolutionInputGenerator_4": { + "ConvolutionInputGenerator_rtl_4": { "SIMD": 32, "ram_style": "distributed" }, - "MatrixVectorActivation_4": { + "MVAU_hls_4": { "PE": 4, "SIMD": 32, - "ram_style": "auto" + "ram_style": "auto", + "resType": "lut" }, - "ConvolutionInputGenerator_5": { + "ConvolutionInputGenerator_rtl_5": { "SIMD": 32, "ram_style": "distributed" }, - "MatrixVectorActivation_5": { + "MVAU_hls_5": { "PE": 1, "SIMD": 32, - "ram_style": "auto" + "ram_style": "auto", + "resType": "lut" }, - "MatrixVectorActivation_6": { + "MVAU_hls_6": { "PE": 1, "SIMD": 4, - "ram_style": "auto" + "ram_style": "auto", + "resType": "lut" }, - "MatrixVectorActivation_7": { + "MVAU_hls_7": { "PE": 1, "SIMD": 8, - "ram_style": "auto" + "ram_style": "auto", + "resType": "lut" }, - "MatrixVectorActivation_8": { + "MVAU_hls_8": { "PE": 5, "SIMD": 1, - "ram_style": "auto" + "ram_style": "auto", + "resType": "lut" }, - "LabelSelect_Batch_0": { + "LabelSelect_hls_0": { "PE": 1 } } diff --git a/build/bnn-pynq/folding_config/cnv-w1a2_folding_config.json b/build/bnn-pynq/folding_config/cnv-w1a2_folding_config.json index c83c614..954de5b 100644 --- a/build/bnn-pynq/folding_config/cnv-w1a2_folding_config.json +++ b/build/bnn-pynq/folding_config/cnv-w1a2_folding_config.json @@ -1,79 +1,87 @@ { "Defaults": {}, - "Thresholding_Batch_0": { - "PE": 1, - "ram_style": "distributed" + "Thresholding_rtl_0": { + "PE": 1 }, - "ConvolutionInputGenerator_0": { + "ConvolutionInputGenerator_rtl_0": { "SIMD": 3, "ram_style": "distributed" }, - "MatrixVectorActivation_0": { + "MVAU_hls_0": { "PE": 8, "SIMD": 3, - "ram_style": "auto" + "ram_style": "auto", + "resType": "lut" }, - "ConvolutionInputGenerator_1": { + "ConvolutionInputGenerator_rtl_1": { "SIMD": 16, "ram_style": "distributed" }, - "MatrixVectorActivation_1": { + "MVAU_hls_1": { "PE": 16, "SIMD": 16, - "ram_style": "auto" + "ram_style": "auto", + "resType": "lut" }, - "ConvolutionInputGenerator_2": { + "ConvolutionInputGenerator_rtl_2": { "SIMD": 16, "ram_style": "distributed" }, - "MatrixVectorActivation_2": { + "MVAU_hls_2": { "PE": 8, "SIMD": 16, - "ram_style": "auto" + "ram_style": "auto", + "resType": "lut" }, - "ConvolutionInputGenerator_3": { + "ConvolutionInputGenerator_rtl_3": { "SIMD": 16, "ram_style": "distributed" }, - "MatrixVectorActivation_3": { + "MVAU_hls_3": { "PE": 8, "SIMD": 16, - "ram_style": "block" + "ram_style": "block", + "resType": "lut" }, - "ConvolutionInputGenerator_4": { + "ConvolutionInputGenerator_rtl_4": { "SIMD": 8, "ram_style": "distributed" }, - "MatrixVectorActivation_4": { + "MVAU_hls_4": { "PE": 4, "SIMD": 8, - "ram_style": "auto" + "ram_style": "auto", + "resType": "lut" }, - "ConvolutionInputGenerator_5": { + "ConvolutionInputGenerator_rtl_5": { "SIMD": 8, "ram_style": "distributed" }, - "MatrixVectorActivation_5": { + "MVAU_hls_5": { "PE": 1, "SIMD": 8, - "ram_style": "auto" + "ram_style": "auto", + "resType": "lut" }, - "MatrixVectorActivation_6": { + "MVAU_hls_6": { "PE": 1, "SIMD": 2, - "ram_style": "distributed" + "ram_style": "distributed", + "resType": "lut" }, - "MatrixVectorActivation_7": { + "MVAU_hls_7": { "PE": 2, "SIMD": 2, - "ram_style": "block" + "ram_style": "block", + "resType": "lut" }, - "MatrixVectorActivation_8": { + "MVAU_hls_8": { "PE": 5, "SIMD": 1, - "ram_style": "distributed" + "ram_style": "distributed", + "resType": "lut" }, - "LabelSelect_Batch_0": { + "LabelSelect_hls_0": { "PE": 1 } } diff --git a/build/bnn-pynq/folding_config/cnv-w2a2_folding_config.json b/build/bnn-pynq/folding_config/cnv-w2a2_folding_config.json index c83c614..954de5b 100644 --- a/build/bnn-pynq/folding_config/cnv-w2a2_folding_config.json +++ b/build/bnn-pynq/folding_config/cnv-w2a2_folding_config.json @@ -1,79 +1,87 @@ { "Defaults": {}, - "Thresholding_Batch_0": { - "PE": 1, - "ram_style": "distributed" + "Thresholding_rtl_0": { + "PE": 1 }, - "ConvolutionInputGenerator_0": { + "ConvolutionInputGenerator_rtl_0": { "SIMD": 3, "ram_style": "distributed" }, - "MatrixVectorActivation_0": { + "MVAU_hls_0": { "PE": 8, "SIMD": 3, - "ram_style": "auto" + "ram_style": "auto", + "resType": "lut" }, - "ConvolutionInputGenerator_1": { + "ConvolutionInputGenerator_rtl_1": { "SIMD": 16, "ram_style": "distributed" }, - "MatrixVectorActivation_1": { + "MVAU_hls_1": { "PE": 16, "SIMD": 16, - "ram_style": "auto" + "ram_style": "auto", + "resType": "lut" }, - "ConvolutionInputGenerator_2": { + "ConvolutionInputGenerator_rtl_2": { "SIMD": 16, "ram_style": "distributed" }, - "MatrixVectorActivation_2": { + "MVAU_hls_2": { "PE": 8, "SIMD": 16, - "ram_style": "auto" + "ram_style": "auto", + "resType": "lut" }, - "ConvolutionInputGenerator_3": { + "ConvolutionInputGenerator_rtl_3": { "SIMD": 16, "ram_style": "distributed" }, - "MatrixVectorActivation_3": { + "MVAU_hls_3": { "PE": 8, "SIMD": 16, - "ram_style": "block" + "ram_style": "block", + "resType": "lut" }, - "ConvolutionInputGenerator_4": { + "ConvolutionInputGenerator_rtl_4": { "SIMD": 8, "ram_style": "distributed" }, - "MatrixVectorActivation_4": { + "MVAU_hls_4": { "PE": 4, "SIMD": 8, - "ram_style": "auto" + "ram_style": "auto", + "resType": "lut" }, - "ConvolutionInputGenerator_5": { + "ConvolutionInputGenerator_rtl_5": { "SIMD": 8, "ram_style": "distributed" }, - "MatrixVectorActivation_5": { + "MVAU_hls_5": { "PE": 1, "SIMD": 8, - "ram_style": "auto" + "ram_style": "auto", + "resType": "lut" }, - "MatrixVectorActivation_6": { + "MVAU_hls_6": { "PE": 1, "SIMD": 2, - "ram_style": "distributed" + "ram_style": "distributed", + "resType": "lut" }, - "MatrixVectorActivation_7": { + "MVAU_hls_7": { "PE": 2, "SIMD": 2, - "ram_style": "block" + "ram_style": "block", + "resType": "lut" }, - "MatrixVectorActivation_8": { + "MVAU_hls_8": { "PE": 5, "SIMD": 1, - "ram_style": "distributed" + "ram_style": "distributed", + "resType": "lut" }, - "LabelSelect_Batch_0": { + "LabelSelect_hls_0": { "PE": 1 } } diff --git a/build/bnn-pynq/folding_config/tfc-w1a1_folding_config.json b/build/bnn-pynq/folding_config/tfc-w1a1_folding_config.json index 95167f1..bbaad88 100644 --- a/build/bnn-pynq/folding_config/tfc-w1a1_folding_config.json +++ b/build/bnn-pynq/folding_config/tfc-w1a1_folding_config.json @@ -1,30 +1,33 @@ { "Defaults": {}, - "Thresholding_Batch_0": { - "PE": 49, - "ram_style": "distributed" + "Thresholding_rtl_0": { + "PE": 49 }, - "MatrixVectorActivation_0": { + "MVAU_hls_0": { "PE": 16, "SIMD": 49, - "ram_style": "block" + "ram_style": "block", + "resType": "lut" }, - "MatrixVectorActivation_1": { + "MVAU_hls_1": { "PE": 8, "SIMD": 8, - "ram_style": "auto" + "ram_style": "auto", + "resType": "lut" }, - "MatrixVectorActivation_2": { + "MVAU_hls_2": { "PE": 8, "SIMD": 8, - "ram_style": "auto" + "ram_style": "auto", + "resType": "lut" }, - "MatrixVectorActivation_3": { + "MVAU_hls_3": { "PE": 10, "SIMD": 8, - "ram_style": "distributed" + "ram_style": "distributed", + "resType": "lut" }, - "LabelSelect_Batch_0": { + "LabelSelect_hls_0": { "PE": 1 } } diff --git a/build/bnn-pynq/folding_config/tfc-w1a2_folding_config.json b/build/bnn-pynq/folding_config/tfc-w1a2_folding_config.json index 95167f1..bbaad88 100644 --- a/build/bnn-pynq/folding_config/tfc-w1a2_folding_config.json +++ b/build/bnn-pynq/folding_config/tfc-w1a2_folding_config.json @@ -1,30 +1,33 @@ { "Defaults": {}, - "Thresholding_Batch_0": { - "PE": 49, - "ram_style": "distributed" + "Thresholding_rtl_0": { + "PE": 49 }, - "MatrixVectorActivation_0": { + "MVAU_hls_0": { "PE": 16, "SIMD": 49, - "ram_style": "block" + "ram_style": "block", + "resType": "lut" }, - "MatrixVectorActivation_1": { + "MVAU_hls_1": { "PE": 8, "SIMD": 8, - "ram_style": "auto" + "ram_style": "auto", + "resType": "lut" }, - "MatrixVectorActivation_2": { + "MVAU_hls_2": { "PE": 8, "SIMD": 8, - "ram_style": "auto" + "ram_style": "auto", + "resType": "lut" }, - "MatrixVectorActivation_3": { + "MVAU_hls_3": { "PE": 10, "SIMD": 8, - "ram_style": "distributed" + "ram_style": "distributed", + "resType": "lut" }, - "LabelSelect_Batch_0": { + "LabelSelect_hls_0": { "PE": 1 } } diff --git a/build/bnn-pynq/folding_config/tfc-w2a2_folding_config.json b/build/bnn-pynq/folding_config/tfc-w2a2_folding_config.json index 95167f1..bbaad88 100644 --- a/build/bnn-pynq/folding_config/tfc-w2a2_folding_config.json +++ b/build/bnn-pynq/folding_config/tfc-w2a2_folding_config.json @@ -1,30 +1,33 @@ { "Defaults": {}, - "Thresholding_Batch_0": { - "PE": 49, - "ram_style": "distributed" + "Thresholding_rtl_0": { + "PE": 49 }, - "MatrixVectorActivation_0": { + "MVAU_hls_0": { "PE": 16, "SIMD": 49, - "ram_style": "block" + "ram_style": "block", + "resType": "lut" }, - "MatrixVectorActivation_1": { + "MVAU_hls_1": { "PE": 8, "SIMD": 8, - "ram_style": "auto" + "ram_style": "auto", + "resType": "lut" }, - "MatrixVectorActivation_2": { + "MVAU_hls_2": { "PE": 8, "SIMD": 8, - "ram_style": "auto" + "ram_style": "auto", + "resType": "lut" }, - "MatrixVectorActivation_3": { + "MVAU_hls_3": { "PE": 10, "SIMD": 8, - "ram_style": "distributed" + "ram_style": "distributed", + "resType": "lut" }, - "LabelSelect_Batch_0": { + "LabelSelect_hls_0": { "PE": 1 } } diff --git a/build/get-finn.sh b/build/get-finn.sh index 6dbe581..de796a1 100755 --- a/build/get-finn.sh +++ b/build/get-finn.sh @@ -1,6 +1,6 @@ #!/bin/bash # Copyright (C) 2020-2022, Xilinx -# Copyright (C) 2023, Advanced Micro Devices, Inc. +# Copyright (C) 2022-2024, Advanced Micro Devices, Inc. # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -31,7 +31,7 @@ # URL for git repo to be cloned REPO_URL=https://github.com/Xilinx/finn # commit hash for repo -REPO_COMMIT=02ce6954c93963c8407cd5c20761fccf92e1c70d +REPO_COMMIT=39fb8859fec0e47276ffadcafe43092d1b10af7e # directory (under the same folder as this script) to clone to REPO_DIR=finn diff --git a/build/gtsrb/README.md b/build/gtsrb/README.md new file mode 100644 index 0000000..f5179a1 --- /dev/null +++ b/build/gtsrb/README.md @@ -0,0 +1,26 @@ +# Brevitas GTSRB example + +This is the binarized CNV topology from the paper [FINN: A Framework for Fast, Scalable Binarized Neural Network Inference](https://arxiv.org/abs/1612.07119) which is trained +on the [German Traffic Sign Recognition Benchmark (GTSRB)](https://benchmark.ini.rub.de/gtsrb_news.html) dataset. + +## Build bitfiles for GTSRB + +0. Ensure you have performed the *Setup* steps in the top-level README for setting up the FINN requirements and environment variables. + +1. Run the `download-model.sh` script under the `models` directory to download the pretrained QONNX model. You should have `gtsrb/models/cnv_1w1a_gtsrb.onnx` as a result. + +2. Launch the build as follows: +```SHELL +# update this according to where you cloned this repo: +FINN_EXAMPLES=/path/to/finn-examples +# cd into finn submodule +cd $FINN_EXAMPLES/build/finn +# launch the build on the gtsrb folder +./run-docker.sh build_custom $FINN_EXAMPLES/build/gtsrb +``` + +3. The generated outputs will be under `gtsrb/output__`. You can find a description of the generated files [here](https://finn-dev.readthedocs.io/en/latest/command_line.html#simple-dataflow-build-mode). + +## Where did the ONNX model files come from? + +The model is part of the QONNX model zoo and gets directly downloaded from [here](https://github.com/fastmachinelearning/qonnx_model_zoo/tree/feature/gtsrb_cnv/models/GTSRB/Brevitas_CNV1W1A). diff --git a/build/gtsrb/build.py b/build/gtsrb/build.py new file mode 100644 index 0000000..a46d766 --- /dev/null +++ b/build/gtsrb/build.py @@ -0,0 +1,124 @@ +# Copyright (C) 2024, Advanced Micro Devices, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import finn.builder.build_dataflow as build +import finn.builder.build_dataflow_config as build_cfg +from finn.builder.build_dataflow_config import default_build_dataflow_steps +from qonnx.core.datatype import DataType +import os +import shutil +import numpy as np +from onnx import helper as oh + +models = [ + "cnv_1w1a_gtsrb", +] + +# which platforms to build the networks for +zynq_platforms = ["Pynq-Z1"] +platforms_to_build = zynq_platforms + + +def custom_step_add_preproc(model, cfg): + # GTSRB data with raw uint8 pixels is divided by 255 prior to training + # reflect this in the inference graph so we can perform inference directly + # on raw uint8 data + in_name = model.graph.input[0].name + new_in_name = model.make_new_valueinfo_name() + new_param_name = model.make_new_valueinfo_name() + div_param = np.asarray(255.0, dtype=np.float32) + new_div = oh.make_node( + "Div", + [in_name, new_param_name], + [new_in_name], + name="PreprocDiv", + ) + model.set_initializer(new_param_name, div_param) + model.graph.node.insert(0, new_div) + model.graph.node[1].input[0] = new_in_name + # set input dtype to uint8 + model.set_tensor_datatype(in_name, DataType["UINT8"]) + return model + + +custom_build_steps = [custom_step_add_preproc] + default_build_dataflow_steps + + +# determine which shell flow to use for a given platform +def platform_to_shell(platform): + if platform in zynq_platforms: + return build_cfg.ShellFlowType.VIVADO_ZYNQ + else: + raise Exception("Unknown platform, can't determine ShellFlowType") + + +# create a release dir, used for finn-examples release packaging +os.makedirs("release", exist_ok=True) + +for platform_name in platforms_to_build: + shell_flow_type = platform_to_shell(platform_name) + vitis_platform = None + # for Zynq, use the board name as the release name + # e.g. ZCU104 + release_platform_name = platform_name + platform_dir = "release/%s" % release_platform_name + os.makedirs(platform_dir, exist_ok=True) + for model_name in models: + # set up the build configuration for this model + cfg = build_cfg.DataflowBuildConfig( + output_dir="output_%s_%s" % (model_name, release_platform_name), + target_fps=3000, + synth_clk_period_ns=10.0, + board=platform_name, + steps=custom_build_steps, + folding_config_file="folding_config/cnv_gtsrb_folding_config.json", + shell_flow_type=shell_flow_type, + vitis_platform=vitis_platform, + generate_outputs=[ + build_cfg.DataflowOutputType.ESTIMATE_REPORTS, + build_cfg.DataflowOutputType.STITCHED_IP, + build_cfg.DataflowOutputType.RTLSIM_PERFORMANCE, + build_cfg.DataflowOutputType.BITFILE, + ], + save_intermediate_models=True, + ) + model_file = "models/%s.onnx" % model_name + # launch FINN compiler to build + build.build_dataflow_cfg(model_file, cfg) + # copy bitfiles into release dir if found + bitfile_gen_dir = cfg.output_dir + "/bitfile" + files_to_check_and_copy = [ + "finn-accel.bit", + "finn-accel.hwh", + "finn-accel.xclbin", + ] + for f in files_to_check_and_copy: + src_file = bitfile_gen_dir + "/" + f + dst_file = platform_dir + "/" + f.replace("finn-accel", model_name) + if os.path.isfile(src_file): + shutil.copy(src_file, dst_file) diff --git a/build/gtsrb/folding_config/cnv_gtsrb_folding_config.json b/build/gtsrb/folding_config/cnv_gtsrb_folding_config.json new file mode 100644 index 0000000..d96d432 --- /dev/null +++ b/build/gtsrb/folding_config/cnv_gtsrb_folding_config.json @@ -0,0 +1,78 @@ +{ + "Defaults": {}, + "Thresholding_rtl_0": { + "PE": 1 + }, + "ConvolutionInputGenerator_rtl_0": { + "SIMD": 3, + "ram_style": "distributed" + }, + "MVAU_hls_0": { + "PE": 16, + "SIMD": 3, + "ram_style": "auto" + }, + "ConvolutionInputGenerator_rtl_1": { + "SIMD": 32, + "ram_style": "distributed" + }, + "MVAU_hls_1": { + "PE": 32, + "SIMD": 32, + "ram_style": "auto" + }, + "ConvolutionInputGenerator_rtl_2": { + "SIMD": 32, + "ram_style": "distributed" + }, + "MVAU_hls_2": { + "PE": 16, + "SIMD": 32, + "ram_style": "auto" + }, + "ConvolutionInputGenerator_rtl_3": { + "SIMD": 32, + "ram_style": "distributed" + }, + "MVAU_hls_3": { + "PE": 16, + "SIMD": 32, + "ram_style": "auto" + }, + "ConvolutionInputGenerator_rtl_4": { + "SIMD": 32, + "ram_style": "distributed" + }, + "MVAU_hls_4": { + "PE": 4, + "SIMD": 32, + "ram_style": "auto" + }, + "ConvolutionInputGenerator_rtl_5": { + "SIMD": 32, + "ram_style": "distributed" + }, + "MVAU_hls_5": { + "PE": 1, + "SIMD": 32, + "ram_style": "auto" + }, + "MVAU_hls_6": { + "PE": 1, + "SIMD": 4, + "ram_style": "auto" + }, + "MVAU_hls_7": { + "PE": 1, + "SIMD": 8, + "ram_style": "auto" + }, + "MVAU_hls_8": { + "PE": 4, + "SIMD": 1, + "ram_style": "auto" + }, + "LabelSelect_hls_0": { + "PE": 1 + } +} diff --git a/build/gtsrb/models/download-model.sh b/build/gtsrb/models/download-model.sh new file mode 100755 index 0000000..b4b07c6 --- /dev/null +++ b/build/gtsrb/models/download-model.sh @@ -0,0 +1,2 @@ +#!/bin/bash +wget https://github.com/fastmachinelearning/qonnx_model_zoo/raw/feature/gtsrb_cnv/models/GTSRB/Brevitas_CNV1W1A/cnv_1w1a_gtsrb.onnx diff --git a/build/kws/README.md b/build/kws/README.md index c683701..40cb024 100644 --- a/build/kws/README.md +++ b/build/kws/README.md @@ -1,8 +1,8 @@ -# The KWS example +# The keyword spotting (KWS) example The KWS example includes an MLP for the Google SpeechCommandsV2 dataset. -## Build bitfiles for BNN-PYNQ examples +## Build bitfiles for KWS examples The build is currently configured for the PYNQ-Z1 board and a throughput of 200k FPS at a clock frequency of 100 MHz. @@ -18,7 +18,7 @@ cd $FINN_EXAMPLES/build/finn bash run-docker.sh build_custom $FINN_EXAMPLES/build/kws ``` -3. The generated outputs will be under `kws/_output__`. +3. The generated outputs will be under `kws/output__`. You can find a description of the generated files [here](https://finn-dev.readthedocs.io/en/latest/command_line.html#simple-dataflow-build-mode). The folder will additionally include the quantized inputs for verification (`all_validation_KWS_data_inputs_len_10102.npy`) and the expected outputs (`all_validation_KWS_data_outputs_len_10102.npy`). When running the network on hardware the validation should achieve an accuracy of 89.78 % with 9070 of the 10102 samples being classified correctly. diff --git a/build/kws/build.py b/build/kws/build.py index 99a908e..3a967e3 100644 --- a/build/kws/build.py +++ b/build/kws/build.py @@ -93,7 +93,7 @@ def step_preprocess(model: ModelWrapper, cfg: DataflowBuildConfig): steps=build_steps, generate_outputs=build_outputs, output_dir=last_output_dir, - target_fps=200000, + folding_config_file="folding_config/%s_kws_folding_config.json" % platform_name, synth_clk_period_ns=10.0, board=platform_name, shell_flow_type=build_cfg.ShellFlowType.VIVADO_ZYNQ, diff --git a/build/kws/folding_config/Pynq-Z1_kws_folding_config.json b/build/kws/folding_config/Pynq-Z1_kws_folding_config.json new file mode 100644 index 0000000..932e745 --- /dev/null +++ b/build/kws/folding_config/Pynq-Z1_kws_folding_config.json @@ -0,0 +1,38 @@ +{ + "Defaults": {}, + "MVAU_hls_0": { + "PE": 32, + "SIMD": 10, + "ram_style": "auto", + "resType": "lut", + "mem_mode": "internal_decoupled", + "runtime_writeable_weights": 0 + }, + "MVAU_hls_1": { + "PE": 32, + "SIMD": 8, + "ram_style": "auto", + "resType": "lut", + "mem_mode": "internal_decoupled", + "runtime_writeable_weights": 0 + }, + "MVAU_hls_2": { + "PE": 32, + "SIMD": 8, + "ram_style": "auto", + "resType": "lut", + "mem_mode": "internal_decoupled", + "runtime_writeable_weights": 0 + }, + "MVAU_hls_3": { + "PE": 1, + "SIMD": 8, + "ram_style": "auto", + "resType": "lut", + "mem_mode": "internal_decoupled", + "runtime_writeable_weights": 0 + }, + "LabelSelect_hls_0": { + "PE": 1 + } +} diff --git a/build/mobilenet-v1/README.md b/build/mobilenet-v1/README.md index f31ad57..3746108 100644 --- a/build/mobilenet-v1/README.md +++ b/build/mobilenet-v1/README.md @@ -36,13 +36,13 @@ cd $FINN_EXAMPLES/build/finn ./run-docker.sh build_custom $FINN_EXAMPLES/build/mobilenet-v1 ``` -5. The generated outputs will be under `mobilenet-v1/output__`. You can find a description of the generated files [here](https://finn-dev.readthedocs.io/en/latest/command_line.html#simple-dataflow-build-mode). +3. The generated outputs will be under `mobilenet-v1/output__`. You can find a description of the generated files [here](https://finn-dev.readthedocs.io/en/latest/command_line.html#simple-dataflow-build-mode). ## Where did the ONNX model files come from? The 4-bit quantized MobileNet-v1 is part of the [Brevitas examples](https://github.com/Xilinx/brevitas/tree/master/src/brevitas_examples/imagenet_classification). -Subsequently, the trained networks is [exported to ONNX](https://github.com/Xilinx/finn/blob/master/notebooks/basics/1_brevitas_network_import.ipynb). In addition, the particular version used here has two additions for pre- and postprocessing: +Subsequently, the trained networks is [exported to ONNX](https://github.com/Xilinx/finn/blob/main/notebooks/basics/1_brevitas_network_import_via_QONNX.ipynb). In addition, the particular version used here has two additions for pre- and postprocessing: * A divide-by-255 node is added at the input, and the input is marked as 8-bit (to directly accept 8-bit images as input) * Normalization is added at the input with `mean = [0.485, 0.456, 0.406]` and `std = 0.226`. Note that the `std` is global and not per-channel to facilitate its removal via the [streamlining transform](https://arxiv.org/pdf/1709.04060). diff --git a/build/mobilenet-v1/build.py b/build/mobilenet-v1/build.py index 7ec022d..711a603 100644 --- a/build/mobilenet-v1/build.py +++ b/build/mobilenet-v1/build.py @@ -1,4 +1,5 @@ -# Copyright (c) 2020, Xilinx +# Copyright (C) 2020-2022, Xilinx, Inc. +# Copyright (C) 2024, Advanced Micro Devices, Inc. # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -35,8 +36,8 @@ # custom steps for mobilenetv1 from custom_steps import ( step_mobilenet_streamline, - step_mobilenet_convert_to_hls_layers, - step_mobilenet_convert_to_hls_layers_separate_th, + step_mobilenet_convert_to_hw_layers, + step_mobilenet_convert_to_hw_layers_separate_th, step_mobilenet_lower_convs, step_mobilenet_slr_floorplan, ) @@ -44,10 +45,8 @@ model_name = "mobilenetv1-w4a4" # which platforms to build the networks for -# zynq_platforms = ["ZCU102", "ZCU104"] -zynq_platforms = ["ZCU102"] -# alveo_platforms = ["U50", "U200", "U250", "U280"] -alveo_platforms = ["U250"] +zynq_platforms = ["ZCU104", "ZCU102"] +alveo_platforms = ["U250"] # "U50", "U200", "U280" platforms_to_build = zynq_platforms + alveo_platforms @@ -75,13 +74,14 @@ def select_build_steps(platform): return [ step_mobilenet_streamline, step_mobilenet_lower_convs, - step_mobilenet_convert_to_hls_layers_separate_th, + step_mobilenet_convert_to_hw_layers_separate_th, "step_create_dataflow_partition", + "step_specialize_layers", "step_apply_folding_config", "step_minimize_bit_width", "step_generate_estimate_reports", - "step_hls_codegen", - "step_hls_ipgen", + "step_hw_codegen", + "step_hw_ipgen", "step_set_fifo_depths", "step_create_stitched_ip", "step_synthesize_bitfile", @@ -92,13 +92,14 @@ def select_build_steps(platform): return [ step_mobilenet_streamline, step_mobilenet_lower_convs, - step_mobilenet_convert_to_hls_layers, + step_mobilenet_convert_to_hw_layers, "step_create_dataflow_partition", + "step_specialize_layers", "step_apply_folding_config", "step_minimize_bit_width", "step_generate_estimate_reports", - "step_hls_codegen", - "step_hls_ipgen", + "step_hw_codegen", + "step_hw_ipgen", "step_set_fifo_depths", step_mobilenet_slr_floorplan, "step_synthesize_bitfile", @@ -123,6 +124,12 @@ def select_build_steps(platform): # for Zynq, use the board name as the release name # e.g. ZCU104 release_platform_name = platform_name + # for ZCU104 we provide a specialize layer json + specialize_layer_file = ( + "specialize_layers_config/ZCU104_specialize_layers_config.json" + if platform_name == "ZCU104" + else None + ) platform_dir = "release/%s" % release_platform_name os.makedirs(platform_dir, exist_ok=True) @@ -130,6 +137,7 @@ def select_build_steps(platform): steps=select_build_steps(platform_name), output_dir="output_%s_%s" % (model_name, release_platform_name), folding_config_file="folding_config/%s_folding_config.json" % platform_name, + specialize_layers_config_file=specialize_layer_file, synth_clk_period_ns=select_clk_period(platform_name), board=platform_name, shell_flow_type=shell_flow_type, diff --git a/build/mobilenet-v1/custom_steps.py b/build/mobilenet-v1/custom_steps.py index cb66421..6cd54af 100644 --- a/build/mobilenet-v1/custom_steps.py +++ b/build/mobilenet-v1/custom_steps.py @@ -44,7 +44,7 @@ GiveUniqueNodeNames, ApplyConfig, ) -import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls +import finn.transformation.fpgadataflow.convert_to_hw_layers as to_hw from qonnx.transformation.infer_shapes import InferShapes from qonnx.transformation.change_datalayout import ChangeDataLayoutQuantAvgPool2d from qonnx.transformation.infer_datatypes import InferDataTypes @@ -87,14 +87,13 @@ def step_mobilenet_lower_convs(model: ModelWrapper, cfg: DataflowBuildConfig): return model -def step_mobilenet_convert_to_hls_layers(model: ModelWrapper, cfg: DataflowBuildConfig): - mem_mode = cfg.default_mem_mode.value - model = model.transform(to_hls.InferPool_Batch()) - model = model.transform(to_hls.InferConvInpGen()) - model = model.transform(to_hls.InferVectorVectorActivation()) - model = model.transform(to_hls.InferQuantizedMatrixVectorActivation(mem_mode)) - model = model.transform(to_hls.InferChannelwiseLinearLayer()) - model = model.transform(to_hls.InferLabelSelectLayer()) +def step_mobilenet_convert_to_hw_layers(model: ModelWrapper, cfg: DataflowBuildConfig): + model = model.transform(to_hw.InferPool()) + model = model.transform(to_hw.InferConvInpGen()) + model = model.transform(to_hw.InferVectorVectorActivation()) + model = model.transform(to_hw.InferQuantizedMatrixVectorActivation()) + model = model.transform(to_hw.InferChannelwiseLinearLayer()) + model = model.transform(to_hw.InferLabelSelectLayer()) model = model.transform(InferShapes()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) @@ -104,7 +103,7 @@ def step_mobilenet_convert_to_hls_layers(model: ModelWrapper, cfg: DataflowBuild def step_mobilenet_slr_floorplan(model: ModelWrapper, cfg: DataflowBuildConfig): if cfg.shell_flow_type == ShellFlowType.VITIS_ALVEO: try: - from finn.analysis.partitioning import partition + from finnexperimental.analysis.partitioning import partition # apply partitioning of the model, restricting the first and last layers # to SLR0 @@ -125,15 +124,14 @@ def step_mobilenet_slr_floorplan(model: ModelWrapper, cfg: DataflowBuildConfig): return model -def step_mobilenet_convert_to_hls_layers_separate_th(model: ModelWrapper, cfg: DataflowBuildConfig): - mem_mode = cfg.default_mem_mode.value - model = model.transform(to_hls.InferPool_Batch()) - model = model.transform(to_hls.InferConvInpGen()) - model = model.transform(to_hls.InferThresholdingLayer()) - model = model.transform(to_hls.InferVectorVectorActivation()) - model = model.transform(to_hls.InferQuantizedMatrixVectorActivation(mem_mode)) - model = model.transform(to_hls.InferChannelwiseLinearLayer()) - model = model.transform(to_hls.InferLabelSelectLayer()) +def step_mobilenet_convert_to_hw_layers_separate_th(model: ModelWrapper, cfg: DataflowBuildConfig): + model = model.transform(to_hw.InferPool()) + model = model.transform(to_hw.InferConvInpGen()) + model = model.transform(to_hw.InferThresholdingLayer()) + model = model.transform(to_hw.InferVectorVectorActivation()) + model = model.transform(to_hw.InferQuantizedMatrixVectorActivation()) + model = model.transform(to_hw.InferChannelwiseLinearLayer()) + model = model.transform(to_hw.InferLabelSelectLayer()) model = model.transform(InferShapes()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) diff --git a/build/mobilenet-v1/folding_config/U200_folding_config.json b/build/mobilenet-v1/folding_config/U200_folding_config.json index a7b129a..fe3c3d2 100644 --- a/build/mobilenet-v1/folding_config/U200_folding_config.json +++ b/build/mobilenet-v1/folding_config/U200_folding_config.json @@ -1,499 +1,439 @@ { "Defaults": {}, - "StreamingFIFO_0": { + "StreamingFIFO_rtl_0": { "ram_style": "ultra", "depth": 512, "impl_style": "vivado" }, - "ConvolutionInputGenerator_0": { + "ConvolutionInputGenerator_rtl_0": { "SIMD": 3, "ram_style": "distributed" }, - "MatrixVectorActivation_0": { + "MVAU_hls_0": { "PE": 32, "SIMD": 3, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "dsp" }, - "FMPadding_Batch_0": { + "FMPadding_rtl_0": { "SIMD": 32 }, - "StreamingFIFO_3": { + "StreamingFIFO_rtl_3": { "ram_style": "auto", "depth": 256, "impl_style": "rtl" }, - "ConvolutionInputGenerator_1": { + "ConvolutionInputGenerator_rtl_1": { "SIMD": 32, "ram_style": "distributed" }, - "VectorVectorActivation_0": { + "VVAU_hls_0": { "PE": 32, "resType": "lut" }, - "StreamingDataWidthConverter_Batch_0": { - "impl_style": "hls" - }, - "MatrixVectorActivation_1": { + "MVAU_hls_1": { "PE": 16, "SIMD": 16, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "lut" }, - "StreamingDataWidthConverter_Batch_1": { - "impl_style": "hls" - }, - "FMPadding_Batch_1": { + "FMPadding_rtl_1": { "SIMD": 32 }, - "StreamingFIFO_9": { + "StreamingFIFO_rtl_9": { "ram_style": "ultra", "depth": 512, "impl_style": "vivado" }, - "ConvolutionInputGenerator_2": { + "ConvolutionInputGenerator_rtl_2": { "SIMD": 32, "ram_style": "distributed" }, - "VectorVectorActivation_1": { + "VVAU_hls_1": { "PE": 32, "resType": "lut" }, - "StreamingDataWidthConverter_Batch_2": { - "impl_style": "hls" - }, - "StreamingFIFO_12": { + "StreamingFIFO_rtl_12": { "ram_style": "auto", "depth": 256, "impl_style": "rtl" }, - "MatrixVectorActivation_2": { + "MVAU_hls_2": { "PE": 16, "SIMD": 16, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "lut" }, - "StreamingDataWidthConverter_Batch_3": { - "impl_style": "hls" - }, - "FMPadding_Batch_2": { + "FMPadding_rtl_2": { "SIMD": 64 }, - "StreamingFIFO_15": { + "StreamingFIFO_rtl_15": { "ram_style": "auto", "depth": 256, "impl_style": "rtl" }, - "ConvolutionInputGenerator_3": { + "ConvolutionInputGenerator_rtl_3": { "SIMD": 64, "ram_style": "distributed" }, - "VectorVectorActivation_2": { + "VVAU_hls_2": { "PE": 64, "resType": "lut" }, - "StreamingDataWidthConverter_Batch_4": { - "impl_style": "hls" - }, - "StreamingFIFO_18": { + "StreamingFIFO_rtl_18": { "ram_style": "ultra", "depth": 1024, "impl_style": "vivado" }, - "MatrixVectorActivation_3": { + "MVAU_hls_3": { "PE": 32, "SIMD": 16, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "lut" }, - "StreamingDataWidthConverter_Batch_5": { - "impl_style": "hls" - }, - "StreamingFIFO_20": { + "StreamingFIFO_rtl_20": { "ram_style": "auto", "depth": 32, "impl_style": "rtl" }, - "FMPadding_Batch_3": { + "FMPadding_rtl_3": { "SIMD": 16 }, - "StreamingFIFO_21": { + "StreamingFIFO_rtl_21": { "ram_style": "ultra", "depth": 1024, "impl_style": "vivado" }, - "ConvolutionInputGenerator_4": { + "ConvolutionInputGenerator_rtl_4": { "SIMD": 16, "ram_style": "distributed" }, - "VectorVectorActivation_3": { + "VVAU_hls_3": { "PE": 16, "resType": "lut" }, - "StreamingFIFO_23": { + "StreamingFIFO_rtl_23": { "ram_style": "auto", "depth": 256, "impl_style": "rtl" }, - "MatrixVectorActivation_4": { + "MVAU_hls_4": { "PE": 16, "SIMD": 16, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "lut" }, - "StreamingDataWidthConverter_Batch_6": { - "impl_style": "hls" - }, - "FMPadding_Batch_4": { + "FMPadding_rtl_4": { "SIMD": 32 }, - "StreamingFIFO_26": { + "StreamingFIFO_rtl_26": { "ram_style": "ultra", "depth": 512, "impl_style": "vivado" }, - "ConvolutionInputGenerator_5": { + "ConvolutionInputGenerator_rtl_5": { "SIMD": 32, "ram_style": "distributed" }, - "VectorVectorActivation_4": { + "VVAU_hls_4": { "PE": 32, "resType": "lut" }, - "StreamingDataWidthConverter_Batch_7": { - "impl_style": "hls" - }, - "StreamingFIFO_29": { + "StreamingFIFO_rtl_29": { "ram_style": "ultra", "depth": 1024, "impl_style": "vivado" }, - "MatrixVectorActivation_5": { + "MVAU_hls_5": { "PE": 32, "SIMD": 16, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "lut" }, - "StreamingDataWidthConverter_Batch_8": { - "impl_style": "hls" - }, - "StreamingFIFO_31": { + "StreamingFIFO_rtl_31": { "ram_style": "auto", "depth": 32, "impl_style": "rtl" }, - "FMPadding_Batch_5": { + "FMPadding_rtl_5": { "SIMD": 8 }, - "StreamingFIFO_32": { + "StreamingFIFO_rtl_32": { "ram_style": "ultra", "depth": 2048, "impl_style": "vivado" }, - "ConvolutionInputGenerator_6": { + "ConvolutionInputGenerator_rtl_6": { "SIMD": 8, "ram_style": "distributed" }, - "VectorVectorActivation_5": { + "VVAU_hls_5": { "PE": 8, "resType": "lut" }, - "StreamingDataWidthConverter_Batch_9": { - "impl_style": "hls" - }, - "StreamingFIFO_35": { + "StreamingFIFO_rtl_35": { "ram_style": "auto", "depth": 256, "impl_style": "rtl" }, - "MatrixVectorActivation_6": { + "MVAU_hls_6": { "PE": 16, "SIMD": 16, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "lut" }, - "FMPadding_Batch_6": { + "FMPadding_rtl_6": { "SIMD": 16 }, - "StreamingFIFO_37": { + "StreamingFIFO_rtl_37": { "ram_style": "ultra", "depth": 2048, "impl_style": "vivado" }, - "ConvolutionInputGenerator_7": { + "ConvolutionInputGenerator_rtl_7": { "SIMD": 16, "ram_style": "distributed" }, - "VectorVectorActivation_6": { + "VVAU_hls_6": { "PE": 16, "resType": "lut" }, - "StreamingFIFO_39": { + "StreamingFIFO_rtl_39": { "ram_style": "ultra", "depth": 1024, "impl_style": "vivado" }, - "MatrixVectorActivation_7": { + "MVAU_hls_7": { "PE": 32, "SIMD": 16, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "lut" }, - "StreamingDataWidthConverter_Batch_10": { - "impl_style": "hls" - }, - "StreamingFIFO_41": { + "StreamingFIFO_rtl_41": { "ram_style": "auto", "depth": 32, "impl_style": "rtl" }, - "FMPadding_Batch_7": { + "FMPadding_rtl_7": { "SIMD": 16 }, - "StreamingFIFO_42": { + "StreamingFIFO_rtl_42": { "ram_style": "ultra", "depth": 2048, "impl_style": "vivado" }, - "ConvolutionInputGenerator_8": { + "ConvolutionInputGenerator_rtl_8": { "SIMD": 16, "ram_style": "distributed" }, - "VectorVectorActivation_7": { + "VVAU_hls_7": { "PE": 16, "resType": "lut" }, - "StreamingFIFO_44": { + "StreamingFIFO_rtl_44": { "ram_style": "ultra", "depth": 1024, "impl_style": "vivado" }, - "MatrixVectorActivation_8": { + "MVAU_hls_8": { "PE": 32, "SIMD": 16, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "lut" }, - "StreamingDataWidthConverter_Batch_11": { - "impl_style": "hls" - }, - "StreamingFIFO_46": { + "StreamingFIFO_rtl_46": { "ram_style": "auto", "depth": 32, "impl_style": "rtl" }, - "FMPadding_Batch_8": { + "FMPadding_rtl_8": { "SIMD": 16 }, - "StreamingFIFO_47": { + "StreamingFIFO_rtl_47": { "ram_style": "ultra", "depth": 2048, "impl_style": "vivado" }, - "ConvolutionInputGenerator_9": { + "ConvolutionInputGenerator_rtl_9": { "SIMD": 16, "ram_style": "distributed" }, - "VectorVectorActivation_8": { + "VVAU_hls_8": { "PE": 16, "resType": "lut" }, - "StreamingFIFO_49": { + "StreamingFIFO_rtl_49": { "ram_style": "ultra", "depth": 1024, "impl_style": "vivado" }, - "MatrixVectorActivation_9": { + "MVAU_hls_9": { "PE": 32, "SIMD": 16, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "lut" }, - "StreamingDataWidthConverter_Batch_12": { - "impl_style": "hls" - }, - "StreamingFIFO_51": { + "StreamingFIFO_rtl_51": { "ram_style": "auto", "depth": 32, "impl_style": "rtl" }, - "FMPadding_Batch_9": { + "FMPadding_rtl_9": { "SIMD": 16 }, - "StreamingFIFO_52": { + "StreamingFIFO_rtl_52": { "ram_style": "ultra", "depth": 2048, "impl_style": "vivado" }, - "ConvolutionInputGenerator_10": { + "ConvolutionInputGenerator_rtl_10": { "SIMD": 16, "ram_style": "distributed" }, - "VectorVectorActivation_9": { + "VVAU_hls_9": { "PE": 16, "resType": "lut" }, - "StreamingFIFO_54": { + "StreamingFIFO_rtl_54": { "ram_style": "ultra", "depth": 1024, "impl_style": "vivado" }, - "MatrixVectorActivation_10": { + "MVAU_hls_10": { "PE": 32, "SIMD": 16, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "lut" }, - "StreamingDataWidthConverter_Batch_13": { - "impl_style": "hls" - }, - "StreamingFIFO_56": { + "StreamingFIFO_rtl_56": { "ram_style": "auto", "depth": 32, "impl_style": "rtl" }, - "FMPadding_Batch_10": { + "FMPadding_rtl_10": { "SIMD": 16 }, - "StreamingFIFO_57": { + "StreamingFIFO_rtl_57": { "ram_style": "ultra", "depth": 2048, "impl_style": "vivado" }, - "ConvolutionInputGenerator_11": { + "ConvolutionInputGenerator_rtl_11": { "SIMD": 16, "ram_style": "distributed" }, - "VectorVectorActivation_10": { + "VVAU_hls_10": { "PE": 16, "resType": "lut" }, - "StreamingFIFO_59": { + "StreamingFIFO_rtl_59": { "ram_style": "ultra", "depth": 1024, "impl_style": "vivado" }, - "MatrixVectorActivation_11": { + "MVAU_hls_11": { "PE": 32, "SIMD": 16, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "lut" }, - "StreamingDataWidthConverter_Batch_14": { - "impl_style": "hls" - }, - "StreamingFIFO_61": { + "StreamingFIFO_rtl_61": { "ram_style": "auto", "depth": 128, "impl_style": "rtl" }, - "FMPadding_Batch_11": { + "FMPadding_rtl_11": { "SIMD": 4 }, - "StreamingFIFO_62": { + "StreamingFIFO_rtl_62": { "ram_style": "ultra", "depth": 4096, "impl_style": "vivado" }, - "ConvolutionInputGenerator_12": { + "ConvolutionInputGenerator_rtl_12": { "SIMD": 4, "ram_style": "distributed" }, - "VectorVectorActivation_11": { + "VVAU_hls_11": { "PE": 4, "resType": "lut" }, - "StreamingDataWidthConverter_Batch_15": { - "impl_style": "hls" - }, - "StreamingFIFO_65": { + "StreamingFIFO_rtl_65": { "ram_style": "auto", "depth": 256, "impl_style": "rtl" }, - "MatrixVectorActivation_12": { + "MVAU_hls_12": { "PE": 16, "SIMD": 16, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "lut" }, - "StreamingDataWidthConverter_Batch_16": { - "impl_style": "hls" - }, - "StreamingFIFO_67": { + "StreamingFIFO_rtl_67": { "ram_style": "auto", "depth": 32, "impl_style": "rtl" }, - "FMPadding_Batch_12": { + "FMPadding_rtl_12": { "SIMD": 8 }, - "StreamingFIFO_68": { + "StreamingFIFO_rtl_68": { "ram_style": "ultra", "depth": 4096, "impl_style": "vivado" }, - "ConvolutionInputGenerator_13": { + "ConvolutionInputGenerator_rtl_13": { "SIMD": 8, "ram_style": "distributed" }, - "VectorVectorActivation_12": { + "VVAU_hls_12": { "PE": 8, "resType": "lut" }, - "StreamingDataWidthConverter_Batch_17": { - "impl_style": "hls" - }, - "StreamingFIFO_71": { + "StreamingFIFO_rtl_71": { "ram_style": "ultra", "depth": 1024, "impl_style": "vivado" }, - "MatrixVectorActivation_13": { + "MVAU_hls_13": { "PE": 32, "SIMD": 16, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "lut" }, - "StreamingDataWidthConverter_Batch_18": { - "impl_style": "hls" - }, - "ConvolutionInputGenerator_14": { + "ConvolutionInputGenerator_rtl_14": { "SIMD": 4, "ram_style": "distributed" }, - "Pool_Batch_0": { + "Pool_hls_0": { "PE": 4 }, - "MatrixVectorActivation_14": { + "MVAU_hls_14": { "PE": 4, "SIMD": 4, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "lut" }, - "StreamingDataWidthConverter_Batch_19": { - "impl_style": "hls" - }, - "ChannelwiseOp_Batch_0": { + "ChannelwiseOp_hls_0": { "PE": 1, "ram_style": "distributed" }, - "LabelSelect_Batch_0": { + "LabelSelect_hls_0": { "PE": 1 } } diff --git a/build/mobilenet-v1/folding_config/U250_folding_config.json b/build/mobilenet-v1/folding_config/U250_folding_config.json index a7b129a..fe3c3d2 100644 --- a/build/mobilenet-v1/folding_config/U250_folding_config.json +++ b/build/mobilenet-v1/folding_config/U250_folding_config.json @@ -1,499 +1,439 @@ { "Defaults": {}, - "StreamingFIFO_0": { + "StreamingFIFO_rtl_0": { "ram_style": "ultra", "depth": 512, "impl_style": "vivado" }, - "ConvolutionInputGenerator_0": { + "ConvolutionInputGenerator_rtl_0": { "SIMD": 3, "ram_style": "distributed" }, - "MatrixVectorActivation_0": { + "MVAU_hls_0": { "PE": 32, "SIMD": 3, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "dsp" }, - "FMPadding_Batch_0": { + "FMPadding_rtl_0": { "SIMD": 32 }, - "StreamingFIFO_3": { + "StreamingFIFO_rtl_3": { "ram_style": "auto", "depth": 256, "impl_style": "rtl" }, - "ConvolutionInputGenerator_1": { + "ConvolutionInputGenerator_rtl_1": { "SIMD": 32, "ram_style": "distributed" }, - "VectorVectorActivation_0": { + "VVAU_hls_0": { "PE": 32, "resType": "lut" }, - "StreamingDataWidthConverter_Batch_0": { - "impl_style": "hls" - }, - "MatrixVectorActivation_1": { + "MVAU_hls_1": { "PE": 16, "SIMD": 16, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "lut" }, - "StreamingDataWidthConverter_Batch_1": { - "impl_style": "hls" - }, - "FMPadding_Batch_1": { + "FMPadding_rtl_1": { "SIMD": 32 }, - "StreamingFIFO_9": { + "StreamingFIFO_rtl_9": { "ram_style": "ultra", "depth": 512, "impl_style": "vivado" }, - "ConvolutionInputGenerator_2": { + "ConvolutionInputGenerator_rtl_2": { "SIMD": 32, "ram_style": "distributed" }, - "VectorVectorActivation_1": { + "VVAU_hls_1": { "PE": 32, "resType": "lut" }, - "StreamingDataWidthConverter_Batch_2": { - "impl_style": "hls" - }, - "StreamingFIFO_12": { + "StreamingFIFO_rtl_12": { "ram_style": "auto", "depth": 256, "impl_style": "rtl" }, - "MatrixVectorActivation_2": { + "MVAU_hls_2": { "PE": 16, "SIMD": 16, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "lut" }, - "StreamingDataWidthConverter_Batch_3": { - "impl_style": "hls" - }, - "FMPadding_Batch_2": { + "FMPadding_rtl_2": { "SIMD": 64 }, - "StreamingFIFO_15": { + "StreamingFIFO_rtl_15": { "ram_style": "auto", "depth": 256, "impl_style": "rtl" }, - "ConvolutionInputGenerator_3": { + "ConvolutionInputGenerator_rtl_3": { "SIMD": 64, "ram_style": "distributed" }, - "VectorVectorActivation_2": { + "VVAU_hls_2": { "PE": 64, "resType": "lut" }, - "StreamingDataWidthConverter_Batch_4": { - "impl_style": "hls" - }, - "StreamingFIFO_18": { + "StreamingFIFO_rtl_18": { "ram_style": "ultra", "depth": 1024, "impl_style": "vivado" }, - "MatrixVectorActivation_3": { + "MVAU_hls_3": { "PE": 32, "SIMD": 16, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "lut" }, - "StreamingDataWidthConverter_Batch_5": { - "impl_style": "hls" - }, - "StreamingFIFO_20": { + "StreamingFIFO_rtl_20": { "ram_style": "auto", "depth": 32, "impl_style": "rtl" }, - "FMPadding_Batch_3": { + "FMPadding_rtl_3": { "SIMD": 16 }, - "StreamingFIFO_21": { + "StreamingFIFO_rtl_21": { "ram_style": "ultra", "depth": 1024, "impl_style": "vivado" }, - "ConvolutionInputGenerator_4": { + "ConvolutionInputGenerator_rtl_4": { "SIMD": 16, "ram_style": "distributed" }, - "VectorVectorActivation_3": { + "VVAU_hls_3": { "PE": 16, "resType": "lut" }, - "StreamingFIFO_23": { + "StreamingFIFO_rtl_23": { "ram_style": "auto", "depth": 256, "impl_style": "rtl" }, - "MatrixVectorActivation_4": { + "MVAU_hls_4": { "PE": 16, "SIMD": 16, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "lut" }, - "StreamingDataWidthConverter_Batch_6": { - "impl_style": "hls" - }, - "FMPadding_Batch_4": { + "FMPadding_rtl_4": { "SIMD": 32 }, - "StreamingFIFO_26": { + "StreamingFIFO_rtl_26": { "ram_style": "ultra", "depth": 512, "impl_style": "vivado" }, - "ConvolutionInputGenerator_5": { + "ConvolutionInputGenerator_rtl_5": { "SIMD": 32, "ram_style": "distributed" }, - "VectorVectorActivation_4": { + "VVAU_hls_4": { "PE": 32, "resType": "lut" }, - "StreamingDataWidthConverter_Batch_7": { - "impl_style": "hls" - }, - "StreamingFIFO_29": { + "StreamingFIFO_rtl_29": { "ram_style": "ultra", "depth": 1024, "impl_style": "vivado" }, - "MatrixVectorActivation_5": { + "MVAU_hls_5": { "PE": 32, "SIMD": 16, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "lut" }, - "StreamingDataWidthConverter_Batch_8": { - "impl_style": "hls" - }, - "StreamingFIFO_31": { + "StreamingFIFO_rtl_31": { "ram_style": "auto", "depth": 32, "impl_style": "rtl" }, - "FMPadding_Batch_5": { + "FMPadding_rtl_5": { "SIMD": 8 }, - "StreamingFIFO_32": { + "StreamingFIFO_rtl_32": { "ram_style": "ultra", "depth": 2048, "impl_style": "vivado" }, - "ConvolutionInputGenerator_6": { + "ConvolutionInputGenerator_rtl_6": { "SIMD": 8, "ram_style": "distributed" }, - "VectorVectorActivation_5": { + "VVAU_hls_5": { "PE": 8, "resType": "lut" }, - "StreamingDataWidthConverter_Batch_9": { - "impl_style": "hls" - }, - "StreamingFIFO_35": { + "StreamingFIFO_rtl_35": { "ram_style": "auto", "depth": 256, "impl_style": "rtl" }, - "MatrixVectorActivation_6": { + "MVAU_hls_6": { "PE": 16, "SIMD": 16, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "lut" }, - "FMPadding_Batch_6": { + "FMPadding_rtl_6": { "SIMD": 16 }, - "StreamingFIFO_37": { + "StreamingFIFO_rtl_37": { "ram_style": "ultra", "depth": 2048, "impl_style": "vivado" }, - "ConvolutionInputGenerator_7": { + "ConvolutionInputGenerator_rtl_7": { "SIMD": 16, "ram_style": "distributed" }, - "VectorVectorActivation_6": { + "VVAU_hls_6": { "PE": 16, "resType": "lut" }, - "StreamingFIFO_39": { + "StreamingFIFO_rtl_39": { "ram_style": "ultra", "depth": 1024, "impl_style": "vivado" }, - "MatrixVectorActivation_7": { + "MVAU_hls_7": { "PE": 32, "SIMD": 16, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "lut" }, - "StreamingDataWidthConverter_Batch_10": { - "impl_style": "hls" - }, - "StreamingFIFO_41": { + "StreamingFIFO_rtl_41": { "ram_style": "auto", "depth": 32, "impl_style": "rtl" }, - "FMPadding_Batch_7": { + "FMPadding_rtl_7": { "SIMD": 16 }, - "StreamingFIFO_42": { + "StreamingFIFO_rtl_42": { "ram_style": "ultra", "depth": 2048, "impl_style": "vivado" }, - "ConvolutionInputGenerator_8": { + "ConvolutionInputGenerator_rtl_8": { "SIMD": 16, "ram_style": "distributed" }, - "VectorVectorActivation_7": { + "VVAU_hls_7": { "PE": 16, "resType": "lut" }, - "StreamingFIFO_44": { + "StreamingFIFO_rtl_44": { "ram_style": "ultra", "depth": 1024, "impl_style": "vivado" }, - "MatrixVectorActivation_8": { + "MVAU_hls_8": { "PE": 32, "SIMD": 16, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "lut" }, - "StreamingDataWidthConverter_Batch_11": { - "impl_style": "hls" - }, - "StreamingFIFO_46": { + "StreamingFIFO_rtl_46": { "ram_style": "auto", "depth": 32, "impl_style": "rtl" }, - "FMPadding_Batch_8": { + "FMPadding_rtl_8": { "SIMD": 16 }, - "StreamingFIFO_47": { + "StreamingFIFO_rtl_47": { "ram_style": "ultra", "depth": 2048, "impl_style": "vivado" }, - "ConvolutionInputGenerator_9": { + "ConvolutionInputGenerator_rtl_9": { "SIMD": 16, "ram_style": "distributed" }, - "VectorVectorActivation_8": { + "VVAU_hls_8": { "PE": 16, "resType": "lut" }, - "StreamingFIFO_49": { + "StreamingFIFO_rtl_49": { "ram_style": "ultra", "depth": 1024, "impl_style": "vivado" }, - "MatrixVectorActivation_9": { + "MVAU_hls_9": { "PE": 32, "SIMD": 16, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "lut" }, - "StreamingDataWidthConverter_Batch_12": { - "impl_style": "hls" - }, - "StreamingFIFO_51": { + "StreamingFIFO_rtl_51": { "ram_style": "auto", "depth": 32, "impl_style": "rtl" }, - "FMPadding_Batch_9": { + "FMPadding_rtl_9": { "SIMD": 16 }, - "StreamingFIFO_52": { + "StreamingFIFO_rtl_52": { "ram_style": "ultra", "depth": 2048, "impl_style": "vivado" }, - "ConvolutionInputGenerator_10": { + "ConvolutionInputGenerator_rtl_10": { "SIMD": 16, "ram_style": "distributed" }, - "VectorVectorActivation_9": { + "VVAU_hls_9": { "PE": 16, "resType": "lut" }, - "StreamingFIFO_54": { + "StreamingFIFO_rtl_54": { "ram_style": "ultra", "depth": 1024, "impl_style": "vivado" }, - "MatrixVectorActivation_10": { + "MVAU_hls_10": { "PE": 32, "SIMD": 16, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "lut" }, - "StreamingDataWidthConverter_Batch_13": { - "impl_style": "hls" - }, - "StreamingFIFO_56": { + "StreamingFIFO_rtl_56": { "ram_style": "auto", "depth": 32, "impl_style": "rtl" }, - "FMPadding_Batch_10": { + "FMPadding_rtl_10": { "SIMD": 16 }, - "StreamingFIFO_57": { + "StreamingFIFO_rtl_57": { "ram_style": "ultra", "depth": 2048, "impl_style": "vivado" }, - "ConvolutionInputGenerator_11": { + "ConvolutionInputGenerator_rtl_11": { "SIMD": 16, "ram_style": "distributed" }, - "VectorVectorActivation_10": { + "VVAU_hls_10": { "PE": 16, "resType": "lut" }, - "StreamingFIFO_59": { + "StreamingFIFO_rtl_59": { "ram_style": "ultra", "depth": 1024, "impl_style": "vivado" }, - "MatrixVectorActivation_11": { + "MVAU_hls_11": { "PE": 32, "SIMD": 16, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "lut" }, - "StreamingDataWidthConverter_Batch_14": { - "impl_style": "hls" - }, - "StreamingFIFO_61": { + "StreamingFIFO_rtl_61": { "ram_style": "auto", "depth": 128, "impl_style": "rtl" }, - "FMPadding_Batch_11": { + "FMPadding_rtl_11": { "SIMD": 4 }, - "StreamingFIFO_62": { + "StreamingFIFO_rtl_62": { "ram_style": "ultra", "depth": 4096, "impl_style": "vivado" }, - "ConvolutionInputGenerator_12": { + "ConvolutionInputGenerator_rtl_12": { "SIMD": 4, "ram_style": "distributed" }, - "VectorVectorActivation_11": { + "VVAU_hls_11": { "PE": 4, "resType": "lut" }, - "StreamingDataWidthConverter_Batch_15": { - "impl_style": "hls" - }, - "StreamingFIFO_65": { + "StreamingFIFO_rtl_65": { "ram_style": "auto", "depth": 256, "impl_style": "rtl" }, - "MatrixVectorActivation_12": { + "MVAU_hls_12": { "PE": 16, "SIMD": 16, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "lut" }, - "StreamingDataWidthConverter_Batch_16": { - "impl_style": "hls" - }, - "StreamingFIFO_67": { + "StreamingFIFO_rtl_67": { "ram_style": "auto", "depth": 32, "impl_style": "rtl" }, - "FMPadding_Batch_12": { + "FMPadding_rtl_12": { "SIMD": 8 }, - "StreamingFIFO_68": { + "StreamingFIFO_rtl_68": { "ram_style": "ultra", "depth": 4096, "impl_style": "vivado" }, - "ConvolutionInputGenerator_13": { + "ConvolutionInputGenerator_rtl_13": { "SIMD": 8, "ram_style": "distributed" }, - "VectorVectorActivation_12": { + "VVAU_hls_12": { "PE": 8, "resType": "lut" }, - "StreamingDataWidthConverter_Batch_17": { - "impl_style": "hls" - }, - "StreamingFIFO_71": { + "StreamingFIFO_rtl_71": { "ram_style": "ultra", "depth": 1024, "impl_style": "vivado" }, - "MatrixVectorActivation_13": { + "MVAU_hls_13": { "PE": 32, "SIMD": 16, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "lut" }, - "StreamingDataWidthConverter_Batch_18": { - "impl_style": "hls" - }, - "ConvolutionInputGenerator_14": { + "ConvolutionInputGenerator_rtl_14": { "SIMD": 4, "ram_style": "distributed" }, - "Pool_Batch_0": { + "Pool_hls_0": { "PE": 4 }, - "MatrixVectorActivation_14": { + "MVAU_hls_14": { "PE": 4, "SIMD": 4, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "lut" }, - "StreamingDataWidthConverter_Batch_19": { - "impl_style": "hls" - }, - "ChannelwiseOp_Batch_0": { + "ChannelwiseOp_hls_0": { "PE": 1, "ram_style": "distributed" }, - "LabelSelect_Batch_0": { + "LabelSelect_hls_0": { "PE": 1 } } diff --git a/build/mobilenet-v1/folding_config/U280_folding_config.json b/build/mobilenet-v1/folding_config/U280_folding_config.json index a7b129a..fe3c3d2 100644 --- a/build/mobilenet-v1/folding_config/U280_folding_config.json +++ b/build/mobilenet-v1/folding_config/U280_folding_config.json @@ -1,499 +1,439 @@ { "Defaults": {}, - "StreamingFIFO_0": { + "StreamingFIFO_rtl_0": { "ram_style": "ultra", "depth": 512, "impl_style": "vivado" }, - "ConvolutionInputGenerator_0": { + "ConvolutionInputGenerator_rtl_0": { "SIMD": 3, "ram_style": "distributed" }, - "MatrixVectorActivation_0": { + "MVAU_hls_0": { "PE": 32, "SIMD": 3, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "dsp" }, - "FMPadding_Batch_0": { + "FMPadding_rtl_0": { "SIMD": 32 }, - "StreamingFIFO_3": { + "StreamingFIFO_rtl_3": { "ram_style": "auto", "depth": 256, "impl_style": "rtl" }, - "ConvolutionInputGenerator_1": { + "ConvolutionInputGenerator_rtl_1": { "SIMD": 32, "ram_style": "distributed" }, - "VectorVectorActivation_0": { + "VVAU_hls_0": { "PE": 32, "resType": "lut" }, - "StreamingDataWidthConverter_Batch_0": { - "impl_style": "hls" - }, - "MatrixVectorActivation_1": { + "MVAU_hls_1": { "PE": 16, "SIMD": 16, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "lut" }, - "StreamingDataWidthConverter_Batch_1": { - "impl_style": "hls" - }, - "FMPadding_Batch_1": { + "FMPadding_rtl_1": { "SIMD": 32 }, - "StreamingFIFO_9": { + "StreamingFIFO_rtl_9": { "ram_style": "ultra", "depth": 512, "impl_style": "vivado" }, - "ConvolutionInputGenerator_2": { + "ConvolutionInputGenerator_rtl_2": { "SIMD": 32, "ram_style": "distributed" }, - "VectorVectorActivation_1": { + "VVAU_hls_1": { "PE": 32, "resType": "lut" }, - "StreamingDataWidthConverter_Batch_2": { - "impl_style": "hls" - }, - "StreamingFIFO_12": { + "StreamingFIFO_rtl_12": { "ram_style": "auto", "depth": 256, "impl_style": "rtl" }, - "MatrixVectorActivation_2": { + "MVAU_hls_2": { "PE": 16, "SIMD": 16, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "lut" }, - "StreamingDataWidthConverter_Batch_3": { - "impl_style": "hls" - }, - "FMPadding_Batch_2": { + "FMPadding_rtl_2": { "SIMD": 64 }, - "StreamingFIFO_15": { + "StreamingFIFO_rtl_15": { "ram_style": "auto", "depth": 256, "impl_style": "rtl" }, - "ConvolutionInputGenerator_3": { + "ConvolutionInputGenerator_rtl_3": { "SIMD": 64, "ram_style": "distributed" }, - "VectorVectorActivation_2": { + "VVAU_hls_2": { "PE": 64, "resType": "lut" }, - "StreamingDataWidthConverter_Batch_4": { - "impl_style": "hls" - }, - "StreamingFIFO_18": { + "StreamingFIFO_rtl_18": { "ram_style": "ultra", "depth": 1024, "impl_style": "vivado" }, - "MatrixVectorActivation_3": { + "MVAU_hls_3": { "PE": 32, "SIMD": 16, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "lut" }, - "StreamingDataWidthConverter_Batch_5": { - "impl_style": "hls" - }, - "StreamingFIFO_20": { + "StreamingFIFO_rtl_20": { "ram_style": "auto", "depth": 32, "impl_style": "rtl" }, - "FMPadding_Batch_3": { + "FMPadding_rtl_3": { "SIMD": 16 }, - "StreamingFIFO_21": { + "StreamingFIFO_rtl_21": { "ram_style": "ultra", "depth": 1024, "impl_style": "vivado" }, - "ConvolutionInputGenerator_4": { + "ConvolutionInputGenerator_rtl_4": { "SIMD": 16, "ram_style": "distributed" }, - "VectorVectorActivation_3": { + "VVAU_hls_3": { "PE": 16, "resType": "lut" }, - "StreamingFIFO_23": { + "StreamingFIFO_rtl_23": { "ram_style": "auto", "depth": 256, "impl_style": "rtl" }, - "MatrixVectorActivation_4": { + "MVAU_hls_4": { "PE": 16, "SIMD": 16, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "lut" }, - "StreamingDataWidthConverter_Batch_6": { - "impl_style": "hls" - }, - "FMPadding_Batch_4": { + "FMPadding_rtl_4": { "SIMD": 32 }, - "StreamingFIFO_26": { + "StreamingFIFO_rtl_26": { "ram_style": "ultra", "depth": 512, "impl_style": "vivado" }, - "ConvolutionInputGenerator_5": { + "ConvolutionInputGenerator_rtl_5": { "SIMD": 32, "ram_style": "distributed" }, - "VectorVectorActivation_4": { + "VVAU_hls_4": { "PE": 32, "resType": "lut" }, - "StreamingDataWidthConverter_Batch_7": { - "impl_style": "hls" - }, - "StreamingFIFO_29": { + "StreamingFIFO_rtl_29": { "ram_style": "ultra", "depth": 1024, "impl_style": "vivado" }, - "MatrixVectorActivation_5": { + "MVAU_hls_5": { "PE": 32, "SIMD": 16, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "lut" }, - "StreamingDataWidthConverter_Batch_8": { - "impl_style": "hls" - }, - "StreamingFIFO_31": { + "StreamingFIFO_rtl_31": { "ram_style": "auto", "depth": 32, "impl_style": "rtl" }, - "FMPadding_Batch_5": { + "FMPadding_rtl_5": { "SIMD": 8 }, - "StreamingFIFO_32": { + "StreamingFIFO_rtl_32": { "ram_style": "ultra", "depth": 2048, "impl_style": "vivado" }, - "ConvolutionInputGenerator_6": { + "ConvolutionInputGenerator_rtl_6": { "SIMD": 8, "ram_style": "distributed" }, - "VectorVectorActivation_5": { + "VVAU_hls_5": { "PE": 8, "resType": "lut" }, - "StreamingDataWidthConverter_Batch_9": { - "impl_style": "hls" - }, - "StreamingFIFO_35": { + "StreamingFIFO_rtl_35": { "ram_style": "auto", "depth": 256, "impl_style": "rtl" }, - "MatrixVectorActivation_6": { + "MVAU_hls_6": { "PE": 16, "SIMD": 16, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "lut" }, - "FMPadding_Batch_6": { + "FMPadding_rtl_6": { "SIMD": 16 }, - "StreamingFIFO_37": { + "StreamingFIFO_rtl_37": { "ram_style": "ultra", "depth": 2048, "impl_style": "vivado" }, - "ConvolutionInputGenerator_7": { + "ConvolutionInputGenerator_rtl_7": { "SIMD": 16, "ram_style": "distributed" }, - "VectorVectorActivation_6": { + "VVAU_hls_6": { "PE": 16, "resType": "lut" }, - "StreamingFIFO_39": { + "StreamingFIFO_rtl_39": { "ram_style": "ultra", "depth": 1024, "impl_style": "vivado" }, - "MatrixVectorActivation_7": { + "MVAU_hls_7": { "PE": 32, "SIMD": 16, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "lut" }, - "StreamingDataWidthConverter_Batch_10": { - "impl_style": "hls" - }, - "StreamingFIFO_41": { + "StreamingFIFO_rtl_41": { "ram_style": "auto", "depth": 32, "impl_style": "rtl" }, - "FMPadding_Batch_7": { + "FMPadding_rtl_7": { "SIMD": 16 }, - "StreamingFIFO_42": { + "StreamingFIFO_rtl_42": { "ram_style": "ultra", "depth": 2048, "impl_style": "vivado" }, - "ConvolutionInputGenerator_8": { + "ConvolutionInputGenerator_rtl_8": { "SIMD": 16, "ram_style": "distributed" }, - "VectorVectorActivation_7": { + "VVAU_hls_7": { "PE": 16, "resType": "lut" }, - "StreamingFIFO_44": { + "StreamingFIFO_rtl_44": { "ram_style": "ultra", "depth": 1024, "impl_style": "vivado" }, - "MatrixVectorActivation_8": { + "MVAU_hls_8": { "PE": 32, "SIMD": 16, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "lut" }, - "StreamingDataWidthConverter_Batch_11": { - "impl_style": "hls" - }, - "StreamingFIFO_46": { + "StreamingFIFO_rtl_46": { "ram_style": "auto", "depth": 32, "impl_style": "rtl" }, - "FMPadding_Batch_8": { + "FMPadding_rtl_8": { "SIMD": 16 }, - "StreamingFIFO_47": { + "StreamingFIFO_rtl_47": { "ram_style": "ultra", "depth": 2048, "impl_style": "vivado" }, - "ConvolutionInputGenerator_9": { + "ConvolutionInputGenerator_rtl_9": { "SIMD": 16, "ram_style": "distributed" }, - "VectorVectorActivation_8": { + "VVAU_hls_8": { "PE": 16, "resType": "lut" }, - "StreamingFIFO_49": { + "StreamingFIFO_rtl_49": { "ram_style": "ultra", "depth": 1024, "impl_style": "vivado" }, - "MatrixVectorActivation_9": { + "MVAU_hls_9": { "PE": 32, "SIMD": 16, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "lut" }, - "StreamingDataWidthConverter_Batch_12": { - "impl_style": "hls" - }, - "StreamingFIFO_51": { + "StreamingFIFO_rtl_51": { "ram_style": "auto", "depth": 32, "impl_style": "rtl" }, - "FMPadding_Batch_9": { + "FMPadding_rtl_9": { "SIMD": 16 }, - "StreamingFIFO_52": { + "StreamingFIFO_rtl_52": { "ram_style": "ultra", "depth": 2048, "impl_style": "vivado" }, - "ConvolutionInputGenerator_10": { + "ConvolutionInputGenerator_rtl_10": { "SIMD": 16, "ram_style": "distributed" }, - "VectorVectorActivation_9": { + "VVAU_hls_9": { "PE": 16, "resType": "lut" }, - "StreamingFIFO_54": { + "StreamingFIFO_rtl_54": { "ram_style": "ultra", "depth": 1024, "impl_style": "vivado" }, - "MatrixVectorActivation_10": { + "MVAU_hls_10": { "PE": 32, "SIMD": 16, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "lut" }, - "StreamingDataWidthConverter_Batch_13": { - "impl_style": "hls" - }, - "StreamingFIFO_56": { + "StreamingFIFO_rtl_56": { "ram_style": "auto", "depth": 32, "impl_style": "rtl" }, - "FMPadding_Batch_10": { + "FMPadding_rtl_10": { "SIMD": 16 }, - "StreamingFIFO_57": { + "StreamingFIFO_rtl_57": { "ram_style": "ultra", "depth": 2048, "impl_style": "vivado" }, - "ConvolutionInputGenerator_11": { + "ConvolutionInputGenerator_rtl_11": { "SIMD": 16, "ram_style": "distributed" }, - "VectorVectorActivation_10": { + "VVAU_hls_10": { "PE": 16, "resType": "lut" }, - "StreamingFIFO_59": { + "StreamingFIFO_rtl_59": { "ram_style": "ultra", "depth": 1024, "impl_style": "vivado" }, - "MatrixVectorActivation_11": { + "MVAU_hls_11": { "PE": 32, "SIMD": 16, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "lut" }, - "StreamingDataWidthConverter_Batch_14": { - "impl_style": "hls" - }, - "StreamingFIFO_61": { + "StreamingFIFO_rtl_61": { "ram_style": "auto", "depth": 128, "impl_style": "rtl" }, - "FMPadding_Batch_11": { + "FMPadding_rtl_11": { "SIMD": 4 }, - "StreamingFIFO_62": { + "StreamingFIFO_rtl_62": { "ram_style": "ultra", "depth": 4096, "impl_style": "vivado" }, - "ConvolutionInputGenerator_12": { + "ConvolutionInputGenerator_rtl_12": { "SIMD": 4, "ram_style": "distributed" }, - "VectorVectorActivation_11": { + "VVAU_hls_11": { "PE": 4, "resType": "lut" }, - "StreamingDataWidthConverter_Batch_15": { - "impl_style": "hls" - }, - "StreamingFIFO_65": { + "StreamingFIFO_rtl_65": { "ram_style": "auto", "depth": 256, "impl_style": "rtl" }, - "MatrixVectorActivation_12": { + "MVAU_hls_12": { "PE": 16, "SIMD": 16, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "lut" }, - "StreamingDataWidthConverter_Batch_16": { - "impl_style": "hls" - }, - "StreamingFIFO_67": { + "StreamingFIFO_rtl_67": { "ram_style": "auto", "depth": 32, "impl_style": "rtl" }, - "FMPadding_Batch_12": { + "FMPadding_rtl_12": { "SIMD": 8 }, - "StreamingFIFO_68": { + "StreamingFIFO_rtl_68": { "ram_style": "ultra", "depth": 4096, "impl_style": "vivado" }, - "ConvolutionInputGenerator_13": { + "ConvolutionInputGenerator_rtl_13": { "SIMD": 8, "ram_style": "distributed" }, - "VectorVectorActivation_12": { + "VVAU_hls_12": { "PE": 8, "resType": "lut" }, - "StreamingDataWidthConverter_Batch_17": { - "impl_style": "hls" - }, - "StreamingFIFO_71": { + "StreamingFIFO_rtl_71": { "ram_style": "ultra", "depth": 1024, "impl_style": "vivado" }, - "MatrixVectorActivation_13": { + "MVAU_hls_13": { "PE": 32, "SIMD": 16, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "lut" }, - "StreamingDataWidthConverter_Batch_18": { - "impl_style": "hls" - }, - "ConvolutionInputGenerator_14": { + "ConvolutionInputGenerator_rtl_14": { "SIMD": 4, "ram_style": "distributed" }, - "Pool_Batch_0": { + "Pool_hls_0": { "PE": 4 }, - "MatrixVectorActivation_14": { + "MVAU_hls_14": { "PE": 4, "SIMD": 4, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "lut" }, - "StreamingDataWidthConverter_Batch_19": { - "impl_style": "hls" - }, - "ChannelwiseOp_Batch_0": { + "ChannelwiseOp_hls_0": { "PE": 1, "ram_style": "distributed" }, - "LabelSelect_Batch_0": { + "LabelSelect_hls_0": { "PE": 1 } } diff --git a/build/mobilenet-v1/folding_config/U50_folding_config.json b/build/mobilenet-v1/folding_config/U50_folding_config.json index a7b129a..fe3c3d2 100644 --- a/build/mobilenet-v1/folding_config/U50_folding_config.json +++ b/build/mobilenet-v1/folding_config/U50_folding_config.json @@ -1,499 +1,439 @@ { "Defaults": {}, - "StreamingFIFO_0": { + "StreamingFIFO_rtl_0": { "ram_style": "ultra", "depth": 512, "impl_style": "vivado" }, - "ConvolutionInputGenerator_0": { + "ConvolutionInputGenerator_rtl_0": { "SIMD": 3, "ram_style": "distributed" }, - "MatrixVectorActivation_0": { + "MVAU_hls_0": { "PE": 32, "SIMD": 3, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "dsp" }, - "FMPadding_Batch_0": { + "FMPadding_rtl_0": { "SIMD": 32 }, - "StreamingFIFO_3": { + "StreamingFIFO_rtl_3": { "ram_style": "auto", "depth": 256, "impl_style": "rtl" }, - "ConvolutionInputGenerator_1": { + "ConvolutionInputGenerator_rtl_1": { "SIMD": 32, "ram_style": "distributed" }, - "VectorVectorActivation_0": { + "VVAU_hls_0": { "PE": 32, "resType": "lut" }, - "StreamingDataWidthConverter_Batch_0": { - "impl_style": "hls" - }, - "MatrixVectorActivation_1": { + "MVAU_hls_1": { "PE": 16, "SIMD": 16, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "lut" }, - "StreamingDataWidthConverter_Batch_1": { - "impl_style": "hls" - }, - "FMPadding_Batch_1": { + "FMPadding_rtl_1": { "SIMD": 32 }, - "StreamingFIFO_9": { + "StreamingFIFO_rtl_9": { "ram_style": "ultra", "depth": 512, "impl_style": "vivado" }, - "ConvolutionInputGenerator_2": { + "ConvolutionInputGenerator_rtl_2": { "SIMD": 32, "ram_style": "distributed" }, - "VectorVectorActivation_1": { + "VVAU_hls_1": { "PE": 32, "resType": "lut" }, - "StreamingDataWidthConverter_Batch_2": { - "impl_style": "hls" - }, - "StreamingFIFO_12": { + "StreamingFIFO_rtl_12": { "ram_style": "auto", "depth": 256, "impl_style": "rtl" }, - "MatrixVectorActivation_2": { + "MVAU_hls_2": { "PE": 16, "SIMD": 16, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "lut" }, - "StreamingDataWidthConverter_Batch_3": { - "impl_style": "hls" - }, - "FMPadding_Batch_2": { + "FMPadding_rtl_2": { "SIMD": 64 }, - "StreamingFIFO_15": { + "StreamingFIFO_rtl_15": { "ram_style": "auto", "depth": 256, "impl_style": "rtl" }, - "ConvolutionInputGenerator_3": { + "ConvolutionInputGenerator_rtl_3": { "SIMD": 64, "ram_style": "distributed" }, - "VectorVectorActivation_2": { + "VVAU_hls_2": { "PE": 64, "resType": "lut" }, - "StreamingDataWidthConverter_Batch_4": { - "impl_style": "hls" - }, - "StreamingFIFO_18": { + "StreamingFIFO_rtl_18": { "ram_style": "ultra", "depth": 1024, "impl_style": "vivado" }, - "MatrixVectorActivation_3": { + "MVAU_hls_3": { "PE": 32, "SIMD": 16, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "lut" }, - "StreamingDataWidthConverter_Batch_5": { - "impl_style": "hls" - }, - "StreamingFIFO_20": { + "StreamingFIFO_rtl_20": { "ram_style": "auto", "depth": 32, "impl_style": "rtl" }, - "FMPadding_Batch_3": { + "FMPadding_rtl_3": { "SIMD": 16 }, - "StreamingFIFO_21": { + "StreamingFIFO_rtl_21": { "ram_style": "ultra", "depth": 1024, "impl_style": "vivado" }, - "ConvolutionInputGenerator_4": { + "ConvolutionInputGenerator_rtl_4": { "SIMD": 16, "ram_style": "distributed" }, - "VectorVectorActivation_3": { + "VVAU_hls_3": { "PE": 16, "resType": "lut" }, - "StreamingFIFO_23": { + "StreamingFIFO_rtl_23": { "ram_style": "auto", "depth": 256, "impl_style": "rtl" }, - "MatrixVectorActivation_4": { + "MVAU_hls_4": { "PE": 16, "SIMD": 16, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "lut" }, - "StreamingDataWidthConverter_Batch_6": { - "impl_style": "hls" - }, - "FMPadding_Batch_4": { + "FMPadding_rtl_4": { "SIMD": 32 }, - "StreamingFIFO_26": { + "StreamingFIFO_rtl_26": { "ram_style": "ultra", "depth": 512, "impl_style": "vivado" }, - "ConvolutionInputGenerator_5": { + "ConvolutionInputGenerator_rtl_5": { "SIMD": 32, "ram_style": "distributed" }, - "VectorVectorActivation_4": { + "VVAU_hls_4": { "PE": 32, "resType": "lut" }, - "StreamingDataWidthConverter_Batch_7": { - "impl_style": "hls" - }, - "StreamingFIFO_29": { + "StreamingFIFO_rtl_29": { "ram_style": "ultra", "depth": 1024, "impl_style": "vivado" }, - "MatrixVectorActivation_5": { + "MVAU_hls_5": { "PE": 32, "SIMD": 16, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "lut" }, - "StreamingDataWidthConverter_Batch_8": { - "impl_style": "hls" - }, - "StreamingFIFO_31": { + "StreamingFIFO_rtl_31": { "ram_style": "auto", "depth": 32, "impl_style": "rtl" }, - "FMPadding_Batch_5": { + "FMPadding_rtl_5": { "SIMD": 8 }, - "StreamingFIFO_32": { + "StreamingFIFO_rtl_32": { "ram_style": "ultra", "depth": 2048, "impl_style": "vivado" }, - "ConvolutionInputGenerator_6": { + "ConvolutionInputGenerator_rtl_6": { "SIMD": 8, "ram_style": "distributed" }, - "VectorVectorActivation_5": { + "VVAU_hls_5": { "PE": 8, "resType": "lut" }, - "StreamingDataWidthConverter_Batch_9": { - "impl_style": "hls" - }, - "StreamingFIFO_35": { + "StreamingFIFO_rtl_35": { "ram_style": "auto", "depth": 256, "impl_style": "rtl" }, - "MatrixVectorActivation_6": { + "MVAU_hls_6": { "PE": 16, "SIMD": 16, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "lut" }, - "FMPadding_Batch_6": { + "FMPadding_rtl_6": { "SIMD": 16 }, - "StreamingFIFO_37": { + "StreamingFIFO_rtl_37": { "ram_style": "ultra", "depth": 2048, "impl_style": "vivado" }, - "ConvolutionInputGenerator_7": { + "ConvolutionInputGenerator_rtl_7": { "SIMD": 16, "ram_style": "distributed" }, - "VectorVectorActivation_6": { + "VVAU_hls_6": { "PE": 16, "resType": "lut" }, - "StreamingFIFO_39": { + "StreamingFIFO_rtl_39": { "ram_style": "ultra", "depth": 1024, "impl_style": "vivado" }, - "MatrixVectorActivation_7": { + "MVAU_hls_7": { "PE": 32, "SIMD": 16, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "lut" }, - "StreamingDataWidthConverter_Batch_10": { - "impl_style": "hls" - }, - "StreamingFIFO_41": { + "StreamingFIFO_rtl_41": { "ram_style": "auto", "depth": 32, "impl_style": "rtl" }, - "FMPadding_Batch_7": { + "FMPadding_rtl_7": { "SIMD": 16 }, - "StreamingFIFO_42": { + "StreamingFIFO_rtl_42": { "ram_style": "ultra", "depth": 2048, "impl_style": "vivado" }, - "ConvolutionInputGenerator_8": { + "ConvolutionInputGenerator_rtl_8": { "SIMD": 16, "ram_style": "distributed" }, - "VectorVectorActivation_7": { + "VVAU_hls_7": { "PE": 16, "resType": "lut" }, - "StreamingFIFO_44": { + "StreamingFIFO_rtl_44": { "ram_style": "ultra", "depth": 1024, "impl_style": "vivado" }, - "MatrixVectorActivation_8": { + "MVAU_hls_8": { "PE": 32, "SIMD": 16, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "lut" }, - "StreamingDataWidthConverter_Batch_11": { - "impl_style": "hls" - }, - "StreamingFIFO_46": { + "StreamingFIFO_rtl_46": { "ram_style": "auto", "depth": 32, "impl_style": "rtl" }, - "FMPadding_Batch_8": { + "FMPadding_rtl_8": { "SIMD": 16 }, - "StreamingFIFO_47": { + "StreamingFIFO_rtl_47": { "ram_style": "ultra", "depth": 2048, "impl_style": "vivado" }, - "ConvolutionInputGenerator_9": { + "ConvolutionInputGenerator_rtl_9": { "SIMD": 16, "ram_style": "distributed" }, - "VectorVectorActivation_8": { + "VVAU_hls_8": { "PE": 16, "resType": "lut" }, - "StreamingFIFO_49": { + "StreamingFIFO_rtl_49": { "ram_style": "ultra", "depth": 1024, "impl_style": "vivado" }, - "MatrixVectorActivation_9": { + "MVAU_hls_9": { "PE": 32, "SIMD": 16, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "lut" }, - "StreamingDataWidthConverter_Batch_12": { - "impl_style": "hls" - }, - "StreamingFIFO_51": { + "StreamingFIFO_rtl_51": { "ram_style": "auto", "depth": 32, "impl_style": "rtl" }, - "FMPadding_Batch_9": { + "FMPadding_rtl_9": { "SIMD": 16 }, - "StreamingFIFO_52": { + "StreamingFIFO_rtl_52": { "ram_style": "ultra", "depth": 2048, "impl_style": "vivado" }, - "ConvolutionInputGenerator_10": { + "ConvolutionInputGenerator_rtl_10": { "SIMD": 16, "ram_style": "distributed" }, - "VectorVectorActivation_9": { + "VVAU_hls_9": { "PE": 16, "resType": "lut" }, - "StreamingFIFO_54": { + "StreamingFIFO_rtl_54": { "ram_style": "ultra", "depth": 1024, "impl_style": "vivado" }, - "MatrixVectorActivation_10": { + "MVAU_hls_10": { "PE": 32, "SIMD": 16, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "lut" }, - "StreamingDataWidthConverter_Batch_13": { - "impl_style": "hls" - }, - "StreamingFIFO_56": { + "StreamingFIFO_rtl_56": { "ram_style": "auto", "depth": 32, "impl_style": "rtl" }, - "FMPadding_Batch_10": { + "FMPadding_rtl_10": { "SIMD": 16 }, - "StreamingFIFO_57": { + "StreamingFIFO_rtl_57": { "ram_style": "ultra", "depth": 2048, "impl_style": "vivado" }, - "ConvolutionInputGenerator_11": { + "ConvolutionInputGenerator_rtl_11": { "SIMD": 16, "ram_style": "distributed" }, - "VectorVectorActivation_10": { + "VVAU_hls_10": { "PE": 16, "resType": "lut" }, - "StreamingFIFO_59": { + "StreamingFIFO_rtl_59": { "ram_style": "ultra", "depth": 1024, "impl_style": "vivado" }, - "MatrixVectorActivation_11": { + "MVAU_hls_11": { "PE": 32, "SIMD": 16, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "lut" }, - "StreamingDataWidthConverter_Batch_14": { - "impl_style": "hls" - }, - "StreamingFIFO_61": { + "StreamingFIFO_rtl_61": { "ram_style": "auto", "depth": 128, "impl_style": "rtl" }, - "FMPadding_Batch_11": { + "FMPadding_rtl_11": { "SIMD": 4 }, - "StreamingFIFO_62": { + "StreamingFIFO_rtl_62": { "ram_style": "ultra", "depth": 4096, "impl_style": "vivado" }, - "ConvolutionInputGenerator_12": { + "ConvolutionInputGenerator_rtl_12": { "SIMD": 4, "ram_style": "distributed" }, - "VectorVectorActivation_11": { + "VVAU_hls_11": { "PE": 4, "resType": "lut" }, - "StreamingDataWidthConverter_Batch_15": { - "impl_style": "hls" - }, - "StreamingFIFO_65": { + "StreamingFIFO_rtl_65": { "ram_style": "auto", "depth": 256, "impl_style": "rtl" }, - "MatrixVectorActivation_12": { + "MVAU_hls_12": { "PE": 16, "SIMD": 16, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "lut" }, - "StreamingDataWidthConverter_Batch_16": { - "impl_style": "hls" - }, - "StreamingFIFO_67": { + "StreamingFIFO_rtl_67": { "ram_style": "auto", "depth": 32, "impl_style": "rtl" }, - "FMPadding_Batch_12": { + "FMPadding_rtl_12": { "SIMD": 8 }, - "StreamingFIFO_68": { + "StreamingFIFO_rtl_68": { "ram_style": "ultra", "depth": 4096, "impl_style": "vivado" }, - "ConvolutionInputGenerator_13": { + "ConvolutionInputGenerator_rtl_13": { "SIMD": 8, "ram_style": "distributed" }, - "VectorVectorActivation_12": { + "VVAU_hls_12": { "PE": 8, "resType": "lut" }, - "StreamingDataWidthConverter_Batch_17": { - "impl_style": "hls" - }, - "StreamingFIFO_71": { + "StreamingFIFO_rtl_71": { "ram_style": "ultra", "depth": 1024, "impl_style": "vivado" }, - "MatrixVectorActivation_13": { + "MVAU_hls_13": { "PE": 32, "SIMD": 16, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "lut" }, - "StreamingDataWidthConverter_Batch_18": { - "impl_style": "hls" - }, - "ConvolutionInputGenerator_14": { + "ConvolutionInputGenerator_rtl_14": { "SIMD": 4, "ram_style": "distributed" }, - "Pool_Batch_0": { + "Pool_hls_0": { "PE": 4 }, - "MatrixVectorActivation_14": { + "MVAU_hls_14": { "PE": 4, "SIMD": 4, "ram_style": "block", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "resType": "lut" }, - "StreamingDataWidthConverter_Batch_19": { - "impl_style": "hls" - }, - "ChannelwiseOp_Batch_0": { + "ChannelwiseOp_hls_0": { "PE": 1, "ram_style": "distributed" }, - "LabelSelect_Batch_0": { + "LabelSelect_hls_0": { "PE": 1 } } diff --git a/build/mobilenet-v1/folding_config/ZCU102_folding_config.json b/build/mobilenet-v1/folding_config/ZCU102_folding_config.json index 8862cf2..bbeea73 100755 --- a/build/mobilenet-v1/folding_config/ZCU102_folding_config.json +++ b/build/mobilenet-v1/folding_config/ZCU102_folding_config.json @@ -1,816 +1,570 @@ { "Defaults": {}, - "StreamingFIFO_0": { + "StreamingFIFO_rtl_0": { "ram_style": "block", "depth": 1024, "impl_style": "vivado" }, - "ConvolutionInputGenerator_0": { + "ConvolutionInputGenerator_rtl_0": { "SIMD": 1, "ram_style": "distributed" }, - "StreamingDataWidthConverter_Batch_0": { - "impl_style": "hls" - }, - "MatrixVectorActivation_0": { + "MVAU_rtl_0": { "PE": 16, "SIMD": 3, "ram_style": "auto", "resType": "dsp", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "runtime_writeable_weights": 0 }, - "StreamingFIFO_3": { + "StreamingFIFO_rtl_3": { "ram_style": "auto", "depth": 64, "impl_style": "rtl" }, - "StreamingDataWidthConverter_Batch_1": { - "impl_style": "hls" - }, - "Thresholding_Batch_0": { - "PE": 1, - "ram_style": "distributed", - "mem_mode": "const", - "runtime_writeable_weights": 0 - }, - "StreamingDataWidthConverter_Batch_2": { - "impl_style": "hls" + "Thresholding_rtl_0": { + "PE": 1 }, - "StreamingFIFO_6": { + "StreamingFIFO_rtl_6": { "ram_style": "auto", "depth": 256, "impl_style": "rtl" }, - "FMPadding_Batch_0": { + "FMPadding_rtl_0": { "SIMD": 2 }, - "StreamingDataWidthConverter_Batch_3": { - "impl_style": "hls" - }, - "StreamingFIFO_8": { + "StreamingFIFO_rtl_8": { "ram_style": "block", "depth": 512, "impl_style": "vivado" }, - "ConvolutionInputGenerator_1": { + "ConvolutionInputGenerator_rtl_1": { "SIMD": 16, "ram_style": "distributed" }, - "VectorVectorActivation_0": { + "VVAU_hls_0": { "PE": 16, "resType": "lut" }, - "StreamingFIFO_10": { + "StreamingFIFO_rtl_10": { "ram_style": "auto", "depth": 256, "impl_style": "rtl" }, - "StreamingDataWidthConverter_Batch_4": { - "impl_style": "hls" - }, - "Thresholding_Batch_1": { - "PE": 1, - "ram_style": "distributed", - "mem_mode": "const", - "runtime_writeable_weights": 0 - }, - "StreamingDataWidthConverter_Batch_5": { - "impl_style": "hls" + "Thresholding_rtl_1": { + "PE": 1 }, - "MatrixVectorActivation_1": { + "MVAU_rtl_1": { "PE": 8, "SIMD": 8, "ram_style": "auto", - "resType": "lut", - "mem_mode": "decoupled", - "runtime_writeable_weights": 0 - }, - "StreamingDataWidthConverter_Batch_6": { - "impl_style": "hls" - }, - "Thresholding_Batch_2": { - "PE": 2, - "ram_style": "distributed", - "mem_mode": "const", + "resType": "dsp", + "mem_mode": "internal_decoupled", "runtime_writeable_weights": 0 }, - "StreamingDataWidthConverter_Batch_7": { - "impl_style": "hls" + "Thresholding_rtl_2": { + "PE": 2 }, - "StreamingFIFO_17": { + "StreamingFIFO_rtl_17": { "ram_style": "auto", "depth": 32, "impl_style": "rtl" }, - "FMPadding_Batch_1": { + "FMPadding_rtl_1": { "SIMD": 4 }, - "StreamingDataWidthConverter_Batch_8": { - "impl_style": "hls" - }, - "StreamingFIFO_19": { + "StreamingFIFO_rtl_19": { "ram_style": "block", "depth": 1024, "impl_style": "vivado" }, - "ConvolutionInputGenerator_2": { + "ConvolutionInputGenerator_rtl_2": { "SIMD": 8, "ram_style": "distributed" }, - "VectorVectorActivation_1": { + "VVAU_hls_1": { "PE": 8, "resType": "lut" }, - "StreamingDataWidthConverter_Batch_9": { - "impl_style": "hls" - }, - "Thresholding_Batch_3": { - "PE": 1, - "ram_style": "distributed", - "mem_mode": "const", - "runtime_writeable_weights": 0 - }, - "StreamingDataWidthConverter_Batch_10": { - "impl_style": "hls" + "Thresholding_rtl_3": { + "PE": 1 }, - "StreamingFIFO_24": { + "StreamingFIFO_rtl_24": { "ram_style": "auto", "depth": 32, "impl_style": "rtl" }, - "MatrixVectorActivation_2": { + "MVAU_rtl_2": { "PE": 16, "SIMD": 8, "ram_style": "auto", - "resType": "lut", - "mem_mode": "decoupled", + "resType": "dsp", + "mem_mode": "internal_decoupled", "runtime_writeable_weights": 0 }, - "StreamingDataWidthConverter_Batch_11": { - "impl_style": "hls" + "Thresholding_rtl_4": { + "PE": 2 }, - "Thresholding_Batch_4": { - "PE": 2, - "ram_style": "distributed", - "mem_mode": "const", - "runtime_writeable_weights": 0 - }, - "StreamingFIFO_27": { + "StreamingFIFO_rtl_27": { "ram_style": "auto", "depth": 128, "impl_style": "rtl" }, - "FMPadding_Batch_2": { + "FMPadding_rtl_2": { "SIMD": 2 }, - "StreamingDataWidthConverter_Batch_12": { - "impl_style": "hls" - }, - "StreamingFIFO_29": { + "StreamingFIFO_rtl_29": { "ram_style": "block", "depth": 1024, "impl_style": "vivado" }, - "ConvolutionInputGenerator_3": { + "ConvolutionInputGenerator_rtl_3": { "SIMD": 16, "ram_style": "distributed" }, - "VectorVectorActivation_2": { + "VVAU_hls_2": { "PE": 16, "resType": "lut" }, - "StreamingDataWidthConverter_Batch_13": { - "impl_style": "hls" + "Thresholding_rtl_5": { + "PE": 2 }, - "Thresholding_Batch_5": { - "PE": 2, - "ram_style": "distributed", - "mem_mode": "const", - "runtime_writeable_weights": 0 - }, - "StreamingDataWidthConverter_Batch_14": { - "impl_style": "hls" - }, - "StreamingFIFO_34": { + "StreamingFIFO_rtl_34": { "ram_style": "auto", "depth": 32, "impl_style": "rtl" }, - "MatrixVectorActivation_3": { + "MVAU_rtl_3": { "PE": 32, "SIMD": 8, "ram_style": "auto", - "resType": "lut", - "mem_mode": "decoupled", + "resType": "dsp", + "mem_mode": "internal_decoupled", "runtime_writeable_weights": 0 }, - "StreamingDataWidthConverter_Batch_15": { - "impl_style": "hls" + "Thresholding_rtl_6": { + "PE": 2 }, - "Thresholding_Batch_6": { - "PE": 2, - "ram_style": "distributed", - "mem_mode": "const", - "runtime_writeable_weights": 0 - }, - "StreamingFIFO_37": { + "StreamingFIFO_rtl_37": { "ram_style": "auto", "depth": 128, "impl_style": "rtl" }, - "FMPadding_Batch_3": { + "FMPadding_rtl_3": { "SIMD": 2 }, - "StreamingDataWidthConverter_Batch_16": { - "impl_style": "hls" - }, - "StreamingFIFO_39": { + "StreamingFIFO_rtl_39": { "ram_style": "block", "depth": 4096, "impl_style": "vivado" }, - "ConvolutionInputGenerator_4": { + "ConvolutionInputGenerator_rtl_4": { "SIMD": 4, "ram_style": "distributed" }, - "VectorVectorActivation_3": { + "VVAU_hls_3": { "PE": 4, "resType": "lut" }, - "StreamingDataWidthConverter_Batch_17": { - "impl_style": "hls" - }, - "Thresholding_Batch_7": { - "PE": 1, - "ram_style": "distributed", - "mem_mode": "const", - "runtime_writeable_weights": 0 - }, - "StreamingDataWidthConverter_Batch_18": { - "impl_style": "hls" + "Thresholding_rtl_7": { + "PE": 1 }, - "StreamingFIFO_44": { + "StreamingFIFO_rtl_44": { "ram_style": "auto", "depth": 32, "impl_style": "rtl" }, - "MatrixVectorActivation_4": { + "MVAU_rtl_4": { "PE": 16, "SIMD": 8, "ram_style": "auto", - "resType": "lut", - "mem_mode": "decoupled", + "resType": "dsp", + "mem_mode": "internal_decoupled", "runtime_writeable_weights": 0 }, - "StreamingDataWidthConverter_Batch_19": { - "impl_style": "hls" - }, - "Thresholding_Batch_8": { - "PE": 1, - "ram_style": "distributed", - "mem_mode": "const", - "runtime_writeable_weights": 0 + "Thresholding_rtl_8": { + "PE": 1 }, - "StreamingFIFO_47": { + "StreamingFIFO_rtl_47": { "ram_style": "block", "depth": 512, "impl_style": "vivado" }, - "FMPadding_Batch_4": { + "FMPadding_rtl_4": { "SIMD": 1 }, - "StreamingDataWidthConverter_Batch_20": { - "impl_style": "hls" - }, - "StreamingFIFO_49": { + "StreamingFIFO_rtl_49": { "ram_style": "block", "depth": 1024, "impl_style": "vivado" }, - "ConvolutionInputGenerator_5": { + "ConvolutionInputGenerator_rtl_5": { "SIMD": 8, "ram_style": "distributed" }, - "VectorVectorActivation_4": { + "VVAU_hls_4": { "PE": 8, "resType": "lut" }, - "StreamingDataWidthConverter_Batch_21": { - "impl_style": "hls" - }, - "Thresholding_Batch_9": { - "PE": 1, - "ram_style": "distributed", - "mem_mode": "const", - "runtime_writeable_weights": 0 - }, - "StreamingDataWidthConverter_Batch_22": { - "impl_style": "hls" + "Thresholding_rtl_9": { + "PE": 1 }, - "StreamingFIFO_54": { + "StreamingFIFO_rtl_54": { "ram_style": "auto", "depth": 32, "impl_style": "rtl" }, - "MatrixVectorActivation_5": { + "MVAU_rtl_5": { "PE": 32, "SIMD": 8, "ram_style": "auto", - "resType": "lut", - "mem_mode": "decoupled", + "resType": "dsp", + "mem_mode": "internal_decoupled", "runtime_writeable_weights": 0 }, - "StreamingDataWidthConverter_Batch_23": { - "impl_style": "hls" - }, - "Thresholding_Batch_10": { - "PE": 1, - "ram_style": "distributed", - "mem_mode": "const", - "runtime_writeable_weights": 0 + "Thresholding_rtl_10": { + "PE": 1 }, - "StreamingFIFO_57": { + "StreamingFIFO_rtl_57": { "ram_style": "block", "depth": 512, "impl_style": "vivado" }, - "FMPadding_Batch_5": { + "FMPadding_rtl_5": { "SIMD": 1 }, - "StreamingDataWidthConverter_Batch_24": { - "impl_style": "hls" - }, - "StreamingFIFO_59": { + "StreamingFIFO_rtl_59": { "ram_style": "block", "depth": 8192, "impl_style": "vivado" }, - "ConvolutionInputGenerator_6": { + "ConvolutionInputGenerator_rtl_6": { "SIMD": 2, "ram_style": "distributed" }, - "VectorVectorActivation_5": { + "VVAU_hls_5": { "PE": 2, "resType": "lut" }, - "StreamingDataWidthConverter_Batch_25": { - "impl_style": "hls" - }, - "Thresholding_Batch_11": { - "PE": 1, - "ram_style": "distributed", - "mem_mode": "const", - "runtime_writeable_weights": 0 - }, - "StreamingDataWidthConverter_Batch_26": { - "impl_style": "hls" + "Thresholding_rtl_11": { + "PE": 1 }, - "StreamingFIFO_64": { + "StreamingFIFO_rtl_64": { "ram_style": "auto", "depth": 32, "impl_style": "rtl" }, - "MatrixVectorActivation_6": { + "MVAU_rtl_6": { "PE": 16, "SIMD": 8, "ram_style": "auto", - "resType": "lut", - "mem_mode": "decoupled", + "resType": "dsp", + "mem_mode": "internal_decoupled", "runtime_writeable_weights": 0 }, - "StreamingDataWidthConverter_Batch_27": { - "impl_style": "hls" - }, - "Thresholding_Batch_12": { - "PE": 1, - "ram_style": "distributed", - "mem_mode": "const", - "runtime_writeable_weights": 0 + "Thresholding_rtl_12": { + "PE": 1 }, - "StreamingFIFO_67": { + "StreamingFIFO_rtl_67": { "ram_style": "block", "depth": 512, "impl_style": "vivado" }, - "FMPadding_Batch_6": { + "FMPadding_rtl_6": { "SIMD": 1 }, - "StreamingDataWidthConverter_Batch_28": { - "impl_style": "hls" - }, - "StreamingFIFO_69": { + "StreamingFIFO_rtl_69": { "ram_style": "block", "depth": 4096, "impl_style": "vivado" }, - "ConvolutionInputGenerator_7": { + "ConvolutionInputGenerator_rtl_7": { "SIMD": 4, "ram_style": "distributed" }, - "VectorVectorActivation_6": { + "VVAU_hls_6": { "PE": 4, "resType": "lut" }, - "StreamingDataWidthConverter_Batch_29": { - "impl_style": "hls" - }, - "Thresholding_Batch_13": { - "PE": 1, - "ram_style": "distributed", - "mem_mode": "const", - "runtime_writeable_weights": 0 - }, - "StreamingDataWidthConverter_Batch_30": { - "impl_style": "hls" + "Thresholding_rtl_13": { + "PE": 1 }, - "StreamingFIFO_74": { + "StreamingFIFO_rtl_74": { "ram_style": "auto", "depth": 64, "impl_style": "rtl" }, - "MatrixVectorActivation_7": { + "MVAU_rtl_7": { "PE": 32, "SIMD": 8, "ram_style": "auto", - "resType": "lut", - "mem_mode": "decoupled", + "resType": "dsp", + "mem_mode": "internal_decoupled", "runtime_writeable_weights": 0 }, - "StreamingDataWidthConverter_Batch_31": { - "impl_style": "hls" - }, - "Thresholding_Batch_14": { - "PE": 1, - "ram_style": "distributed", - "mem_mode": "const", - "runtime_writeable_weights": 0 + "Thresholding_rtl_14": { + "PE": 1 }, - "StreamingFIFO_77": { + "StreamingFIFO_rtl_77": { "ram_style": "block", "depth": 512, "impl_style": "vivado" }, - "FMPadding_Batch_7": { + "FMPadding_rtl_7": { "SIMD": 1 }, - "StreamingDataWidthConverter_Batch_32": { - "impl_style": "hls" - }, - "StreamingFIFO_79": { + "StreamingFIFO_rtl_79": { "ram_style": "block", "depth": 4096, "impl_style": "vivado" }, - "ConvolutionInputGenerator_8": { + "ConvolutionInputGenerator_rtl_8": { "SIMD": 4, "ram_style": "distributed" }, - "VectorVectorActivation_7": { + "VVAU_hls_7": { "PE": 4, "resType": "lut" }, - "StreamingDataWidthConverter_Batch_33": { - "impl_style": "hls" - }, - "Thresholding_Batch_15": { - "PE": 1, - "ram_style": "distributed", - "mem_mode": "const", - "runtime_writeable_weights": 0 - }, - "StreamingDataWidthConverter_Batch_34": { - "impl_style": "hls" + "Thresholding_rtl_15": { + "PE": 1 }, - "StreamingFIFO_84": { + "StreamingFIFO_rtl_84": { "ram_style": "auto", "depth": 64, "impl_style": "rtl" }, - "MatrixVectorActivation_8": { + "MVAU_rtl_8": { "PE": 32, "SIMD": 8, "ram_style": "auto", - "resType": "lut", - "mem_mode": "decoupled", + "resType": "dsp", + "mem_mode": "internal_decoupled", "runtime_writeable_weights": 0 }, - "StreamingDataWidthConverter_Batch_35": { - "impl_style": "hls" - }, - "Thresholding_Batch_16": { - "PE": 1, - "ram_style": "distributed", - "mem_mode": "const", - "runtime_writeable_weights": 0 + "Thresholding_rtl_16": { + "PE": 1 }, - "StreamingFIFO_87": { + "StreamingFIFO_rtl_87": { "ram_style": "block", "depth": 512, "impl_style": "vivado" }, - "FMPadding_Batch_8": { + "FMPadding_rtl_8": { "SIMD": 1 }, - "StreamingDataWidthConverter_Batch_36": { - "impl_style": "hls" - }, - "StreamingFIFO_89": { + "StreamingFIFO_rtl_89": { "ram_style": "block", "depth": 4096, "impl_style": "vivado" }, - "ConvolutionInputGenerator_9": { + "ConvolutionInputGenerator_rtl_9": { "SIMD": 4, "ram_style": "distributed" }, - "VectorVectorActivation_8": { + "VVAU_hls_8": { "PE": 4, "resType": "lut" }, - "StreamingDataWidthConverter_Batch_37": { - "impl_style": "hls" - }, - "Thresholding_Batch_17": { - "PE": 1, - "ram_style": "distributed", - "mem_mode": "const", - "runtime_writeable_weights": 0 - }, - "StreamingDataWidthConverter_Batch_38": { - "impl_style": "hls" + "Thresholding_rtl_17": { + "PE": 1 }, - "StreamingFIFO_94": { + "StreamingFIFO_rtl_94": { "ram_style": "auto", "depth": 64, "impl_style": "rtl" }, - "MatrixVectorActivation_9": { + "MVAU_rtl_9": { "PE": 32, "SIMD": 8, "ram_style": "auto", - "resType": "lut", - "mem_mode": "decoupled", + "resType": "dsp", + "mem_mode": "internal_decoupled", "runtime_writeable_weights": 0 }, - "StreamingDataWidthConverter_Batch_39": { - "impl_style": "hls" - }, - "Thresholding_Batch_18": { - "PE": 1, - "ram_style": "distributed", - "mem_mode": "const", - "runtime_writeable_weights": 0 + "Thresholding_rtl_18": { + "PE": 1 }, - "StreamingFIFO_97": { + "StreamingFIFO_rtl_97": { "ram_style": "block", "depth": 512, "impl_style": "vivado" }, - "FMPadding_Batch_9": { + "FMPadding_rtl_9": { "SIMD": 1 }, - "StreamingDataWidthConverter_Batch_40": { - "impl_style": "hls" - }, - "StreamingFIFO_99": { + "StreamingFIFO_rtl_99": { "ram_style": "block", "depth": 4096, "impl_style": "vivado" }, - "ConvolutionInputGenerator_10": { + "ConvolutionInputGenerator_rtl_10": { "SIMD": 4, "ram_style": "distributed" }, - "VectorVectorActivation_9": { + "VVAU_hls_9": { "PE": 4, "resType": "lut" }, - "StreamingDataWidthConverter_Batch_41": { - "impl_style": "hls" - }, - "Thresholding_Batch_19": { - "PE": 1, - "ram_style": "distributed", - "mem_mode": "const", - "runtime_writeable_weights": 0 - }, - "StreamingDataWidthConverter_Batch_42": { - "impl_style": "hls" + "Thresholding_rtl_19": { + "PE": 1 }, - "StreamingFIFO_104": { + "StreamingFIFO_rtl_104": { "ram_style": "auto", "depth": 64, "impl_style": "rtl" }, - "MatrixVectorActivation_10": { + "MVAU_rtl_10": { "PE": 32, "SIMD": 8, "ram_style": "auto", - "resType": "lut", - "mem_mode": "decoupled", + "resType": "dsp", + "mem_mode": "internal_decoupled", "runtime_writeable_weights": 0 }, - "StreamingDataWidthConverter_Batch_43": { - "impl_style": "hls" - }, - "Thresholding_Batch_20": { - "PE": 1, - "ram_style": "distributed", - "mem_mode": "const", - "runtime_writeable_weights": 0 + "Thresholding_rtl_20": { + "PE": 1 }, - "StreamingFIFO_107": { + "StreamingFIFO_rtl_107": { "ram_style": "block", "depth": 512, "impl_style": "vivado" }, - "FMPadding_Batch_10": { + "FMPadding_rtl_10": { "SIMD": 1 }, - "StreamingDataWidthConverter_Batch_44": { - "impl_style": "hls" - }, - "StreamingFIFO_109": { + "StreamingFIFO_rtl_109": { "ram_style": "block", "depth": 4096, "impl_style": "vivado" }, - "ConvolutionInputGenerator_11": { + "ConvolutionInputGenerator_rtl_11": { "SIMD": 4, "ram_style": "block" }, - "VectorVectorActivation_10": { + "VVAU_hls_10": { "PE": 4, "resType": "lut" }, - "StreamingDataWidthConverter_Batch_45": { - "impl_style": "hls" - }, - "Thresholding_Batch_21": { - "PE": 1, - "ram_style": "distributed", - "mem_mode": "const", - "runtime_writeable_weights": 0 - }, - "StreamingDataWidthConverter_Batch_46": { - "impl_style": "hls" + "Thresholding_rtl_21": { + "PE": 1 }, - "StreamingFIFO_114": { + "StreamingFIFO_rtl_114": { "ram_style": "auto", "depth": 64, "impl_style": "rtl" }, - "MatrixVectorActivation_11": { + "MVAU_rtl_11": { "PE": 32, "SIMD": 8, "ram_style": "auto", - "resType": "lut", - "mem_mode": "decoupled", + "resType": "dsp", + "mem_mode": "internal_decoupled", "runtime_writeable_weights": 0 }, - "StreamingDataWidthConverter_Batch_47": { - "impl_style": "hls" - }, - "Thresholding_Batch_22": { - "PE": 1, - "ram_style": "distributed", - "mem_mode": "const", - "runtime_writeable_weights": 0 + "Thresholding_rtl_22": { + "PE": 1 }, - "StreamingFIFO_117": { + "StreamingFIFO_rtl_117": { "ram_style": "block", "depth": 512, "impl_style": "vivado" }, - "FMPadding_Batch_11": { + "FMPadding_rtl_11": { "SIMD": 1 }, - "StreamingFIFO_118": { + "StreamingFIFO_rtl_118": { "ram_style": "block", "depth": 16384, "impl_style": "vivado" }, - "ConvolutionInputGenerator_12": { + "ConvolutionInputGenerator_rtl_12": { "SIMD": 1, "ram_style": "block" }, - "VectorVectorActivation_11": { + "VVAU_hls_11": { "PE": 1, "resType": "lut" }, - "Thresholding_Batch_23": { - "PE": 1, - "ram_style": "distributed", - "mem_mode": "const", - "runtime_writeable_weights": 0 - }, - "StreamingDataWidthConverter_Batch_48": { - "impl_style": "hls" + "Thresholding_rtl_23": { + "PE": 1 }, - "StreamingFIFO_122": { + "StreamingFIFO_rtl_122": { "ram_style": "auto", "depth": 64, "impl_style": "rtl" }, - "MatrixVectorActivation_12": { + "MVAU_rtl_12": { "PE": 16, "SIMD": 8, "ram_style": "auto", - "resType": "lut", - "mem_mode": "decoupled", + "resType": "dsp", + "mem_mode": "internal_decoupled", "runtime_writeable_weights": 0 }, - "StreamingDataWidthConverter_Batch_49": { - "impl_style": "hls" - }, - "Thresholding_Batch_24": { - "PE": 1, - "ram_style": "distributed", - "mem_mode": "const", - "runtime_writeable_weights": 0 + "Thresholding_rtl_24": { + "PE": 1 }, - "StreamingFIFO_125": { + "StreamingFIFO_rtl_125": { "ram_style": "block", "depth": 1024, "impl_style": "vivado" }, - "FMPadding_Batch_12": { + "FMPadding_rtl_12": { "SIMD": 1 }, - "StreamingDataWidthConverter_Batch_50": { - "impl_style": "hls" - }, - "StreamingFIFO_127": { + "StreamingFIFO_rtl_127": { "ram_style": "block", "depth": 16384, "impl_style": "vivado" }, - "ConvolutionInputGenerator_13": { + "ConvolutionInputGenerator_rtl_13": { "SIMD": 2, "ram_style": "block" }, - "VectorVectorActivation_12": { + "VVAU_hls_12": { "PE": 2, "resType": "lut" }, - "StreamingDataWidthConverter_Batch_51": { - "impl_style": "hls" - }, - "Thresholding_Batch_25": { - "PE": 1, - "ram_style": "distributed", - "mem_mode": "const", - "runtime_writeable_weights": 0 - }, - "StreamingDataWidthConverter_Batch_52": { - "impl_style": "hls" + "Thresholding_rtl_25": { + "PE": 1 }, - "StreamingFIFO_132": { + "StreamingFIFO_rtl_132": { "ram_style": "auto", "depth": 128, "impl_style": "rtl" }, - "MatrixVectorActivation_13": { + "MVAU_rtl_13": { "PE": 32, "SIMD": 8, "ram_style": "block", - "resType": "lut", - "mem_mode": "decoupled", + "resType": "dsp", + "mem_mode": "internal_decoupled", "runtime_writeable_weights": 0 }, - "StreamingDataWidthConverter_Batch_53": { - "impl_style": "hls" - }, - "Thresholding_Batch_26": { - "PE": 1, - "ram_style": "distributed", - "mem_mode": "const", - "runtime_writeable_weights": 0 + "Thresholding_rtl_26": { + "PE": 1 }, - "ConvolutionInputGenerator_14": { + "ConvolutionInputGenerator_rtl_14": { "SIMD": 1, "ram_style": "block" }, - "Pool_Batch_0": { + "Pool_hls_0": { "PE": 1 }, - "StreamingDataWidthConverter_Batch_54": { - "impl_style": "hls" - }, - "MatrixVectorActivation_14": { + "MVAU_rtl_14": { "PE": 1, "SIMD": 16, "ram_style": "block", - "resType": "lut", - "mem_mode": "decoupled", + "resType": "dsp", + "mem_mode": "internal_decoupled", "runtime_writeable_weights": 0 }, - "ChannelwiseOp_Batch_0": { + "ChannelwiseOp_hls_0": { "PE": 1, "ram_style": "distributed" }, - "LabelSelect_Batch_0": { + "LabelSelect_hls_0": { "PE": 1 } } diff --git a/build/mobilenet-v1/folding_config/ZCU104_folding_config.json b/build/mobilenet-v1/folding_config/ZCU104_folding_config.json index 40a687a..e300886 100755 --- a/build/mobilenet-v1/folding_config/ZCU104_folding_config.json +++ b/build/mobilenet-v1/folding_config/ZCU104_folding_config.json @@ -1,816 +1,610 @@ { "Defaults": {}, - "StreamingFIFO_0": { + "StreamingFIFO_rtl_0": { "ram_style": "ultra", - "depth": 1024, - "impl_style": "vivado" + "depth": 1024 }, - "ConvolutionInputGenerator_0": { + "ConvolutionInputGenerator_rtl_0": { "SIMD": 1, "ram_style": "distributed" }, - "StreamingDataWidthConverter_Batch_0": { - "impl_style": "hls" - }, - "MatrixVectorActivation_0": { + "MVAU_rtl_0": { "PE": 16, "SIMD": 3, "ram_style": "auto", "resType": "dsp", - "mem_mode": "decoupled", + "mem_mode": "internal_decoupled", "runtime_writeable_weights": 0 }, - "StreamingFIFO_3": { + "StreamingFIFO_rtl_3": { "ram_style": "auto", - "depth": 64, - "impl_style": "rtl" - }, - "StreamingDataWidthConverter_Batch_1": { - "impl_style": "hls" + "depth": 64 }, - "Thresholding_Batch_0": { + "Thresholding_hls_0": { "PE": 1, "ram_style": "distributed", - "mem_mode": "const", + "mem_mode": "internal_embedded", "runtime_writeable_weights": 0 }, - "StreamingDataWidthConverter_Batch_2": { - "impl_style": "hls" - }, - "StreamingFIFO_6": { + "StreamingFIFO_rtl_6": { "ram_style": "auto", - "depth": 256, - "impl_style": "rtl" + "depth": 256 }, - "FMPadding_Batch_0": { + "FMPadding_rtl_0": { "SIMD": 2 }, - "StreamingDataWidthConverter_Batch_3": { - "impl_style": "hls" - }, - "StreamingFIFO_8": { + "StreamingFIFO_rtl_8": { "ram_style": "ultra", - "depth": 512, - "impl_style": "vivado" + "depth": 512 }, - "ConvolutionInputGenerator_1": { + "ConvolutionInputGenerator_rtl_1": { "SIMD": 16, "ram_style": "distributed" }, - "VectorVectorActivation_0": { + "VVAU_hls_0": { "PE": 16, "resType": "lut" }, - "StreamingFIFO_10": { + "StreamingFIFO_rtl_10": { "ram_style": "auto", - "depth": 256, - "impl_style": "rtl" - }, - "StreamingDataWidthConverter_Batch_4": { - "impl_style": "hls" + "depth": 256 }, - "Thresholding_Batch_1": { + "Thresholding_hls_1": { "PE": 1, "ram_style": "distributed", - "mem_mode": "const", + "mem_mode": "internal_embedded", "runtime_writeable_weights": 0 }, - "StreamingDataWidthConverter_Batch_5": { - "impl_style": "hls" - }, - "MatrixVectorActivation_1": { + "MVAU_rtl_1": { "PE": 8, "SIMD": 8, "ram_style": "auto", - "resType": "lut", - "mem_mode": "decoupled", + "resType": "dsp", + "mem_mode": "internal_decoupled", "runtime_writeable_weights": 0 }, - "StreamingDataWidthConverter_Batch_6": { - "impl_style": "hls" - }, - "Thresholding_Batch_2": { + "Thresholding_hls_2": { "PE": 2, "ram_style": "distributed", - "mem_mode": "const", + "mem_mode": "internal_embedded", "runtime_writeable_weights": 0 }, - "StreamingDataWidthConverter_Batch_7": { - "impl_style": "hls" - }, - "StreamingFIFO_17": { + "StreamingFIFO_rtl_17": { "ram_style": "auto", - "depth": 32, - "impl_style": "rtl" + "depth": 32 }, - "FMPadding_Batch_1": { + "FMPadding_rtl_1": { "SIMD": 4 }, - "StreamingDataWidthConverter_Batch_8": { - "impl_style": "hls" - }, - "StreamingFIFO_19": { + "StreamingFIFO_rtl_19": { "ram_style": "ultra", - "depth": 1024, - "impl_style": "vivado" + "depth": 1024 }, - "ConvolutionInputGenerator_2": { + "ConvolutionInputGenerator_rtl_2": { "SIMD": 8, "ram_style": "distributed" }, - "VectorVectorActivation_1": { + "VVAU_hls_1": { "PE": 8, "resType": "lut" }, - "StreamingDataWidthConverter_Batch_9": { - "impl_style": "hls" - }, - "Thresholding_Batch_3": { + "Thresholding_hls_3": { "PE": 1, "ram_style": "distributed", - "mem_mode": "const", + "mem_mode": "internal_embedded", "runtime_writeable_weights": 0 }, - "StreamingDataWidthConverter_Batch_10": { - "impl_style": "hls" - }, - "StreamingFIFO_24": { + "StreamingFIFO_rtl_24": { "ram_style": "auto", - "depth": 32, - "impl_style": "rtl" + "depth": 32 }, - "MatrixVectorActivation_2": { + "MVAU_rtl_2": { "PE": 16, "SIMD": 8, "ram_style": "auto", - "resType": "lut", - "mem_mode": "decoupled", + "resType": "dsp", + "mem_mode": "internal_decoupled", "runtime_writeable_weights": 0 }, - "StreamingDataWidthConverter_Batch_11": { - "impl_style": "hls" - }, - "Thresholding_Batch_4": { + "Thresholding_hls_4": { "PE": 2, "ram_style": "distributed", - "mem_mode": "const", + "mem_mode": "internal_embedded", "runtime_writeable_weights": 0 }, - "StreamingFIFO_27": { + "StreamingFIFO_rtl_27": { "ram_style": "auto", - "depth": 128, - "impl_style": "rtl" + "depth": 128 }, - "FMPadding_Batch_2": { + "FMPadding_rtl_2": { "SIMD": 2 }, - "StreamingDataWidthConverter_Batch_12": { - "impl_style": "hls" - }, - "StreamingFIFO_29": { + "StreamingFIFO_rtl_29": { "ram_style": "ultra", - "depth": 1024, - "impl_style": "vivado" + "depth": 1024 }, - "ConvolutionInputGenerator_3": { + "ConvolutionInputGenerator_rtl_3": { "SIMD": 16, "ram_style": "block" }, - "VectorVectorActivation_2": { + "VVAU_hls_2": { "PE": 16, "resType": "lut" }, - "StreamingDataWidthConverter_Batch_13": { - "impl_style": "hls" - }, - "Thresholding_Batch_5": { + "Thresholding_hls_5": { "PE": 2, "ram_style": "distributed", - "mem_mode": "const", + "mem_mode": "internal_embedded", "runtime_writeable_weights": 0 }, - "StreamingDataWidthConverter_Batch_14": { - "impl_style": "hls" - }, - "StreamingFIFO_34": { + "StreamingFIFO_rtl_34": { "ram_style": "auto", - "depth": 32, - "impl_style": "rtl" + "depth": 32 }, - "MatrixVectorActivation_3": { + "MVAU_rtl_3": { "PE": 32, "SIMD": 8, "ram_style": "auto", - "resType": "lut", - "mem_mode": "decoupled", + "resType": "dsp", + "mem_mode": "internal_decoupled", "runtime_writeable_weights": 0 }, - "StreamingDataWidthConverter_Batch_15": { - "impl_style": "hls" - }, - "Thresholding_Batch_6": { + "Thresholding_hls_6": { "PE": 2, "ram_style": "distributed", - "mem_mode": "const", + "mem_mode": "internal_embedded", "runtime_writeable_weights": 0 }, - "StreamingFIFO_37": { + "StreamingFIFO_rtl_37": { "ram_style": "auto", - "depth": 128, - "impl_style": "rtl" + "depth": 128 }, - "FMPadding_Batch_3": { + "FMPadding_rtl_3": { "SIMD": 2 }, - "StreamingDataWidthConverter_Batch_16": { - "impl_style": "hls" - }, - "StreamingFIFO_39": { + "StreamingFIFO_rtl_39": { "ram_style": "ultra", - "depth": 4096, - "impl_style": "vivado" + "depth": 4096 }, - "ConvolutionInputGenerator_4": { + "ConvolutionInputGenerator_rtl_4": { "SIMD": 4, "ram_style": "block" }, - "VectorVectorActivation_3": { + "VVAU_hls_3": { "PE": 4, "resType": "lut" }, - "StreamingDataWidthConverter_Batch_17": { - "impl_style": "hls" - }, - "Thresholding_Batch_7": { + "Thresholding_hls_7": { "PE": 1, "ram_style": "distributed", - "mem_mode": "const", + "mem_mode": "internal_embedded", "runtime_writeable_weights": 0 }, - "StreamingDataWidthConverter_Batch_18": { - "impl_style": "hls" - }, - "StreamingFIFO_44": { + "StreamingFIFO_rtl_44": { "ram_style": "auto", - "depth": 32, - "impl_style": "rtl" + "depth": 32 }, - "MatrixVectorActivation_4": { + "MVAU_rtl_4": { "PE": 16, "SIMD": 8, "ram_style": "auto", - "resType": "lut", - "mem_mode": "decoupled", + "resType": "dsp", + "mem_mode": "internal_decoupled", "runtime_writeable_weights": 0 }, - "StreamingDataWidthConverter_Batch_19": { - "impl_style": "hls" - }, - "Thresholding_Batch_8": { + "Thresholding_hls_8": { "PE": 1, "ram_style": "distributed", - "mem_mode": "const", + "mem_mode": "internal_embedded", "runtime_writeable_weights": 0 }, - "StreamingFIFO_47": { + "StreamingFIFO_rtl_47": { "ram_style": "ultra", - "depth": 512, - "impl_style": "vivado" + "depth": 512 }, - "FMPadding_Batch_4": { + "FMPadding_rtl_4": { "SIMD": 1 }, - "StreamingDataWidthConverter_Batch_20": { - "impl_style": "hls" - }, - "StreamingFIFO_49": { + "StreamingFIFO_rtl_49": { "ram_style": "ultra", - "depth": 1024, - "impl_style": "vivado" + "depth": 1024 }, - "ConvolutionInputGenerator_5": { + "ConvolutionInputGenerator_rtl_5": { "SIMD": 8, "ram_style": "block" }, - "VectorVectorActivation_4": { + "VVAU_hls_4": { "PE": 8, "resType": "lut" }, - "StreamingDataWidthConverter_Batch_21": { - "impl_style": "hls" - }, - "Thresholding_Batch_9": { + "Thresholding_hls_9": { "PE": 1, "ram_style": "distributed", - "mem_mode": "const", + "mem_mode": "internal_embedded", "runtime_writeable_weights": 0 }, - "StreamingDataWidthConverter_Batch_22": { - "impl_style": "hls" - }, - "StreamingFIFO_54": { + "StreamingFIFO_rtl_54": { "ram_style": "auto", - "depth": 32, - "impl_style": "rtl" + "depth": 32 }, - "MatrixVectorActivation_5": { + "MVAU_rtl_5": { "PE": 32, "SIMD": 8, "ram_style": "auto", - "resType": "lut", - "mem_mode": "decoupled", + "resType": "dsp", + "mem_mode": "internal_decoupled", "runtime_writeable_weights": 0 }, - "StreamingDataWidthConverter_Batch_23": { - "impl_style": "hls" - }, - "Thresholding_Batch_10": { + "Thresholding_hls_10": { "PE": 1, "ram_style": "distributed", - "mem_mode": "const", + "mem_mode": "internal_embedded", "runtime_writeable_weights": 0 }, - "StreamingFIFO_57": { + "StreamingFIFO_rtl_57": { "ram_style": "ultra", - "depth": 512, - "impl_style": "vivado" + "depth": 512 }, - "FMPadding_Batch_5": { + "FMPadding_rtl_5": { "SIMD": 1 }, - "StreamingDataWidthConverter_Batch_24": { - "impl_style": "hls" - }, - "StreamingFIFO_59": { + "StreamingFIFO_rtl_59": { "ram_style": "ultra", - "depth": 8192, - "impl_style": "vivado" + "depth": 8192 }, - "ConvolutionInputGenerator_6": { + "ConvolutionInputGenerator_rtl_6": { "SIMD": 2, "ram_style": "block" }, - "VectorVectorActivation_5": { + "VVAU_hls_5": { "PE": 2, "resType": "lut" }, - "StreamingDataWidthConverter_Batch_25": { - "impl_style": "hls" - }, - "Thresholding_Batch_11": { + "Thresholding_hls_11": { "PE": 1, "ram_style": "distributed", - "mem_mode": "const", + "mem_mode": "internal_embedded", "runtime_writeable_weights": 0 }, - "StreamingDataWidthConverter_Batch_26": { - "impl_style": "hls" - }, - "StreamingFIFO_64": { + "StreamingFIFO_rtl_64": { "ram_style": "auto", - "depth": 32, - "impl_style": "rtl" + "depth": 32 }, - "MatrixVectorActivation_6": { + "MVAU_rtl_6": { "PE": 16, "SIMD": 8, "ram_style": "auto", - "resType": "lut", - "mem_mode": "decoupled", + "resType": "dsp", + "mem_mode": "internal_decoupled", "runtime_writeable_weights": 0 }, - "StreamingDataWidthConverter_Batch_27": { - "impl_style": "hls" - }, - "Thresholding_Batch_12": { + "Thresholding_hls_12": { "PE": 1, "ram_style": "distributed", - "mem_mode": "const", + "mem_mode": "internal_embedded", "runtime_writeable_weights": 0 }, - "StreamingFIFO_67": { + "StreamingFIFO_rtl_67": { "ram_style": "ultra", - "depth": 512, - "impl_style": "vivado" + "depth": 512 }, - "FMPadding_Batch_6": { + "FMPadding_rtl_6": { "SIMD": 1 }, - "StreamingDataWidthConverter_Batch_28": { - "impl_style": "hls" - }, - "StreamingFIFO_69": { + "StreamingFIFO_rtl_69": { "ram_style": "ultra", - "depth": 4096, - "impl_style": "vivado" + "depth": 4096 }, - "ConvolutionInputGenerator_7": { + "ConvolutionInputGenerator_rtl_7": { "SIMD": 4, "ram_style": "block" }, - "VectorVectorActivation_6": { + "VVAU_hls_6": { "PE": 4, "resType": "lut" }, - "StreamingDataWidthConverter_Batch_29": { - "impl_style": "hls" - }, - "Thresholding_Batch_13": { + "Thresholding_hls_13": { "PE": 1, "ram_style": "distributed", - "mem_mode": "const", + "mem_mode": "internal_embedded", "runtime_writeable_weights": 0 }, - "StreamingDataWidthConverter_Batch_30": { - "impl_style": "hls" - }, - "StreamingFIFO_74": { + "StreamingFIFO_rtl_74": { "ram_style": "auto", - "depth": 64, - "impl_style": "rtl" + "depth": 64 }, - "MatrixVectorActivation_7": { + "MVAU_rtl_7": { "PE": 32, "SIMD": 8, "ram_style": "auto", - "resType": "lut", - "mem_mode": "decoupled", + "resType": "dsp", + "mem_mode": "internal_decoupled", "runtime_writeable_weights": 0 }, - "StreamingDataWidthConverter_Batch_31": { - "impl_style": "hls" - }, - "Thresholding_Batch_14": { + "Thresholding_hls_14": { "PE": 1, "ram_style": "distributed", - "mem_mode": "const", + "mem_mode": "internal_embedded", "runtime_writeable_weights": 0 }, - "StreamingFIFO_77": { + "StreamingFIFO_rtl_77": { "ram_style": "ultra", - "depth": 512, - "impl_style": "vivado" + "depth": 512 }, - "FMPadding_Batch_7": { + "FMPadding_rtl_7": { "SIMD": 1 }, - "StreamingDataWidthConverter_Batch_32": { - "impl_style": "hls" - }, - "StreamingFIFO_79": { + "StreamingFIFO_rtl_79": { "ram_style": "ultra", - "depth": 4096, - "impl_style": "vivado" + "depth": 4096 }, - "ConvolutionInputGenerator_8": { + "ConvolutionInputGenerator_rtl_8": { "SIMD": 4, "ram_style": "block" }, - "VectorVectorActivation_7": { + "VVAU_hls_7": { "PE": 4, "resType": "lut" }, - "StreamingDataWidthConverter_Batch_33": { - "impl_style": "hls" - }, - "Thresholding_Batch_15": { + "Thresholding_hls_15": { "PE": 1, "ram_style": "distributed", - "mem_mode": "const", + "mem_mode": "internal_embedded", "runtime_writeable_weights": 0 }, - "StreamingDataWidthConverter_Batch_34": { - "impl_style": "hls" - }, - "StreamingFIFO_84": { + "StreamingFIFO_rtl_84": { "ram_style": "auto", - "depth": 64, - "impl_style": "rtl" + "depth": 64 }, - "MatrixVectorActivation_8": { + "MVAU_rtl_8": { "PE": 32, "SIMD": 8, "ram_style": "auto", - "resType": "lut", - "mem_mode": "decoupled", + "resType": "dsp", + "mem_mode": "internal_decoupled", "runtime_writeable_weights": 0 }, - "StreamingDataWidthConverter_Batch_35": { - "impl_style": "hls" - }, - "Thresholding_Batch_16": { + "Thresholding_hls_16": { "PE": 1, "ram_style": "distributed", - "mem_mode": "const", + "mem_mode": "internal_embedded", "runtime_writeable_weights": 0 }, - "StreamingFIFO_87": { + "StreamingFIFO_rtl_87": { "ram_style": "ultra", - "depth": 512, - "impl_style": "vivado" + "depth": 512 }, - "FMPadding_Batch_8": { + "FMPadding_rtl_8": { "SIMD": 1 }, - "StreamingDataWidthConverter_Batch_36": { - "impl_style": "hls" - }, - "StreamingFIFO_89": { + "StreamingFIFO_rtl_89": { "ram_style": "ultra", - "depth": 4096, - "impl_style": "vivado" + "depth": 4096 }, - "ConvolutionInputGenerator_9": { + "ConvolutionInputGenerator_rtl_9": { "SIMD": 4, "ram_style": "block" }, - "VectorVectorActivation_8": { + "VVAU_hls_8": { "PE": 4, "resType": "lut" }, - "StreamingDataWidthConverter_Batch_37": { - "impl_style": "hls" - }, - "Thresholding_Batch_17": { + "Thresholding_hls_17": { "PE": 1, "ram_style": "distributed", - "mem_mode": "const", + "mem_mode": "internal_embedded", "runtime_writeable_weights": 0 }, - "StreamingDataWidthConverter_Batch_38": { - "impl_style": "hls" - }, - "StreamingFIFO_94": { + "StreamingFIFO_rtl_94": { "ram_style": "auto", - "depth": 64, - "impl_style": "rtl" + "depth": 64 }, - "MatrixVectorActivation_9": { + "MVAU_rtl_9": { "PE": 32, "SIMD": 8, "ram_style": "auto", - "resType": "lut", - "mem_mode": "decoupled", + "resType": "dsp", + "mem_mode": "internal_decoupled", "runtime_writeable_weights": 0 }, - "StreamingDataWidthConverter_Batch_39": { - "impl_style": "hls" - }, - "Thresholding_Batch_18": { + "Thresholding_hls_18": { "PE": 1, "ram_style": "distributed", - "mem_mode": "const", + "mem_mode": "internal_embedded", "runtime_writeable_weights": 0 }, - "StreamingFIFO_97": { + "StreamingFIFO_rtl_97": { "ram_style": "ultra", - "depth": 512, - "impl_style": "vivado" + "depth": 512 }, - "FMPadding_Batch_9": { + "FMPadding_rtl_9": { "SIMD": 1 }, - "StreamingDataWidthConverter_Batch_40": { - "impl_style": "hls" - }, - "StreamingFIFO_99": { + "StreamingFIFO_rtl_99": { "ram_style": "ultra", - "depth": 4096, - "impl_style": "vivado" + "depth": 4096 }, - "ConvolutionInputGenerator_10": { + "ConvolutionInputGenerator_rtl_10": { "SIMD": 4, "ram_style": "block" }, - "VectorVectorActivation_9": { + "VVAU_hls_9": { "PE": 4, "resType": "lut" }, - "StreamingDataWidthConverter_Batch_41": { - "impl_style": "hls" - }, - "Thresholding_Batch_19": { + "Thresholding_hls_19": { "PE": 1, "ram_style": "distributed", - "mem_mode": "const", + "mem_mode": "internal_embedded", "runtime_writeable_weights": 0 }, - "StreamingDataWidthConverter_Batch_42": { - "impl_style": "hls" - }, - "StreamingFIFO_104": { + "StreamingFIFO_rtl_104": { "ram_style": "auto", - "depth": 64, - "impl_style": "rtl" + "depth": 64 }, - "MatrixVectorActivation_10": { + "MVAU_rtl_10": { "PE": 32, "SIMD": 8, "ram_style": "auto", - "resType": "lut", - "mem_mode": "decoupled", + "resType": "dsp", + "mem_mode": "internal_decoupled", "runtime_writeable_weights": 0 }, - "StreamingDataWidthConverter_Batch_43": { - "impl_style": "hls" - }, - "Thresholding_Batch_20": { + "Thresholding_hls_20": { "PE": 1, "ram_style": "distributed", - "mem_mode": "const", + "mem_mode": "internal_embedded", "runtime_writeable_weights": 0 }, - "StreamingFIFO_107": { + "StreamingFIFO_rtl_107": { "ram_style": "ultra", - "depth": 512, - "impl_style": "vivado" + "depth": 512 }, - "FMPadding_Batch_10": { + "FMPadding_rtl_10": { "SIMD": 1 }, - "StreamingDataWidthConverter_Batch_44": { - "impl_style": "hls" - }, - "StreamingFIFO_109": { + "StreamingFIFO_rtl_109": { "ram_style": "ultra", - "depth": 4096, - "impl_style": "vivado" + "depth": 4096 }, - "ConvolutionInputGenerator_11": { + "ConvolutionInputGenerator_rtl_11": { "SIMD": 4, "ram_style": "block" }, - "VectorVectorActivation_10": { + "VVAU_hls_10": { "PE": 4, "resType": "lut" }, - "StreamingDataWidthConverter_Batch_45": { - "impl_style": "hls" - }, - "Thresholding_Batch_21": { + "Thresholding_hls_21": { "PE": 1, "ram_style": "distributed", - "mem_mode": "const", + "mem_mode": "internal_embedded", "runtime_writeable_weights": 0 }, - "StreamingDataWidthConverter_Batch_46": { - "impl_style": "hls" - }, - "StreamingFIFO_114": { + "StreamingFIFO_rtl_114": { "ram_style": "auto", - "depth": 64, - "impl_style": "rtl" + "depth": 64 }, - "MatrixVectorActivation_11": { + "MVAU_rtl_11": { "PE": 32, "SIMD": 8, "ram_style": "auto", - "resType": "lut", - "mem_mode": "decoupled", + "resType": "dsp", + "mem_mode": "internal_decoupled", "runtime_writeable_weights": 0 }, - "StreamingDataWidthConverter_Batch_47": { - "impl_style": "hls" - }, - "Thresholding_Batch_22": { + "Thresholding_hls_22": { "PE": 1, "ram_style": "distributed", - "mem_mode": "const", + "mem_mode": "internal_embedded", "runtime_writeable_weights": 0 }, - "StreamingFIFO_117": { + "StreamingFIFO_rtl_117": { "ram_style": "ultra", - "depth": 512, - "impl_style": "vivado" + "depth": 512 }, - "FMPadding_Batch_11": { + "FMPadding_rtl_11": { "SIMD": 1 }, - "StreamingFIFO_118": { + "StreamingFIFO_rtl_118": { "ram_style": "ultra", - "depth": 16384, - "impl_style": "vivado" + "depth": 16384 }, - "ConvolutionInputGenerator_12": { + "ConvolutionInputGenerator_rtl_12": { "SIMD": 1, "ram_style": "block" }, - "VectorVectorActivation_11": { + "VVAU_hls_11": { "PE": 1, "resType": "lut" }, - "Thresholding_Batch_23": { + "Thresholding_hls_23": { "PE": 1, "ram_style": "distributed", - "mem_mode": "const", + "mem_mode": "internal_embedded", "runtime_writeable_weights": 0 }, - "StreamingDataWidthConverter_Batch_48": { - "impl_style": "hls" - }, - "StreamingFIFO_122": { + "StreamingFIFO_rtl_122": { "ram_style": "auto", - "depth": 64, - "impl_style": "rtl" + "depth": 64 }, - "MatrixVectorActivation_12": { + "MVAU_rtl_12": { "PE": 16, "SIMD": 8, "ram_style": "ultra", - "resType": "lut", - "mem_mode": "decoupled", + "resType": "dsp", + "mem_mode": "internal_decoupled", "runtime_writeable_weights": 1 }, - "StreamingDataWidthConverter_Batch_49": { - "impl_style": "hls" - }, - "Thresholding_Batch_24": { + "Thresholding_hls_24": { "PE": 1, "ram_style": "distributed", - "mem_mode": "const", + "mem_mode": "internal_embedded", "runtime_writeable_weights": 0 }, - "StreamingFIFO_125": { + "StreamingFIFO_rtl_125": { "ram_style": "ultra", - "depth": 1024, - "impl_style": "vivado" + "depth": 1024 }, - "FMPadding_Batch_12": { + "FMPadding_rtl_12": { "SIMD": 1 }, - "StreamingDataWidthConverter_Batch_50": { - "impl_style": "hls" - }, - "StreamingFIFO_127": { + "StreamingFIFO_rtl_127": { "ram_style": "ultra", - "depth": 16384, - "impl_style": "vivado" + "depth": 16384 }, - "ConvolutionInputGenerator_13": { + "ConvolutionInputGenerator_rtl_13": { "SIMD": 2, "ram_style": "block" }, - "VectorVectorActivation_12": { + "VVAU_hls_12": { "PE": 2, "resType": "lut" }, - "StreamingDataWidthConverter_Batch_51": { - "impl_style": "hls" - }, - "Thresholding_Batch_25": { + "Thresholding_hls_25": { "PE": 1, "ram_style": "distributed", - "mem_mode": "const", + "mem_mode": "internal_embedded", "runtime_writeable_weights": 0 }, - "StreamingDataWidthConverter_Batch_52": { - "impl_style": "hls" - }, - "StreamingFIFO_132": { + "StreamingFIFO_rtl_132": { "ram_style": "auto", - "depth": 128, - "impl_style": "rtl" + "depth": 128 }, - "MatrixVectorActivation_13": { + "MVAU_rtl_13": { "PE": 32, "SIMD": 8, "ram_style": "ultra", - "resType": "lut", - "mem_mode": "decoupled", + "resType": "dsp", + "mem_mode": "internal_decoupled", "runtime_writeable_weights": 1 }, - "StreamingDataWidthConverter_Batch_53": { - "impl_style": "hls" - }, - "Thresholding_Batch_26": { + "Thresholding_hls_26": { "PE": 1, "ram_style": "distributed", - "mem_mode": "const", + "mem_mode": "internal_embedded", "runtime_writeable_weights": 0 }, - "ConvolutionInputGenerator_14": { + "ConvolutionInputGenerator_rtl_14": { "SIMD": 1, "ram_style": "block" }, - "Pool_Batch_0": { + "Pool_hls_0": { "PE": 1 }, - "StreamingDataWidthConverter_Batch_54": { - "impl_style": "hls" - }, - "MatrixVectorActivation_14": { + "MVAU_rtl_14": { "PE": 1, "SIMD": 16, "ram_style": "ultra", - "resType": "lut", - "mem_mode": "decoupled", + "resType": "dsp", + "mem_mode": "internal_decoupled", "runtime_writeable_weights": 1 }, - "ChannelwiseOp_Batch_0": { + "ChannelwiseOp_hls_0": { "PE": 1, "ram_style": "distributed" }, - "LabelSelect_Batch_0": { + "LabelSelect_hls_0": { "PE": 1 } } diff --git a/build/mobilenet-v1/specialize_layers_config/ZCU104_specialize_layers_config.json b/build/mobilenet-v1/specialize_layers_config/ZCU104_specialize_layers_config.json new file mode 100644 index 0000000..f766d4a --- /dev/null +++ b/build/mobilenet-v1/specialize_layers_config/ZCU104_specialize_layers_config.json @@ -0,0 +1,261 @@ +{ + "Defaults": {}, + "ConvolutionInputGenerator_0": { + "preferred_impl_style": "rtl" + }, + "MVAU_0": { + "preferred_impl_style": "rtl" + }, + "Thresholding_0": { + "preferred_impl_style": "hls" + }, + "FMPadding_0": { + "preferred_impl_style": "rtl" + }, + "ConvolutionInputGenerator_1": { + "preferred_impl_style": "rtl" + }, + "VVAU_0": { + "preferred_impl_style": "hls" + }, + "Thresholding_1": { + "preferred_impl_style": "hls" + }, + "MVAU_1": { + "preferred_impl_style": "rtl" + }, + "Thresholding_2": { + "preferred_impl_style": "hls" + }, + "FMPadding_1": { + "preferred_impl_style": "rtl" + }, + "ConvolutionInputGenerator_2": { + "preferred_impl_style": "rtl" + }, + "VVAU_1": { + "preferred_impl_style": "hls" + }, + "Thresholding_3": { + "preferred_impl_style": "hls" + }, + "MVAU_2": { + "preferred_impl_style": "rtl" + }, + "Thresholding_4": { + "preferred_impl_style": "hls" + }, + "FMPadding_2": { + "preferred_impl_style": "rtl" + }, + "ConvolutionInputGenerator_3": { + "preferred_impl_style": "rtl" + }, + "VVAU_2": { + "preferred_impl_style": "hls" + }, + "Thresholding_5": { + "preferred_impl_style": "hls" + }, + "MVAU_3": { + "preferred_impl_style": "rtl" + }, + "Thresholding_6": { + "preferred_impl_style": "hls" + }, + "FMPadding_3": { + "preferred_impl_style": "rtl" + }, + "ConvolutionInputGenerator_4": { + "preferred_impl_style": "rtl" + }, + "VVAU_3": { + "preferred_impl_style": "hls" + }, + "Thresholding_7": { + "preferred_impl_style": "hls" + }, + "MVAU_4": { + "preferred_impl_style": "rtl" + }, + "Thresholding_8": { + "preferred_impl_style": "hls" + }, + "FMPadding_4": { + "preferred_impl_style": "rtl" + }, + "ConvolutionInputGenerator_5": { + "preferred_impl_style": "rtl" + }, + "VVAU_4": { + "preferred_impl_style": "hls" + }, + "Thresholding_9": { + "preferred_impl_style": "hls" + }, + "MVAU_5": { + "preferred_impl_style": "rtl" + }, + "Thresholding_10": { + "preferred_impl_style": "hls" + }, + "FMPadding_5": { + "preferred_impl_style": "rtl" + }, + "ConvolutionInputGenerator_6": { + "preferred_impl_style": "rtl" + }, + "VVAU_5": { + "preferred_impl_style": "hls" + }, + "Thresholding_11": { + "preferred_impl_style": "hls" + }, + "MVAU_6": { + "preferred_impl_style": "rtl" + }, + "Thresholding_12": { + "preferred_impl_style": "hls" + }, + "FMPadding_6": { + "preferred_impl_style": "rtl" + }, + "ConvolutionInputGenerator_7": { + "preferred_impl_style": "rtl" + }, + "VVAU_6": { + "preferred_impl_style": "hls" + }, + "Thresholding_13": { + "preferred_impl_style": "hls" + }, + "MVAU_7": { + "preferred_impl_style": "rtl" + }, + "Thresholding_14": { + "preferred_impl_style": "hls" + }, + "FMPadding_7": { + "preferred_impl_style": "rtl" + }, + "ConvolutionInputGenerator_8": { + "preferred_impl_style": "rtl" + }, + "VVAU_7": { + "preferred_impl_style": "hls" + }, + "Thresholding_15": { + "preferred_impl_style": "hls" + }, + "MVAU_8": { + "preferred_impl_style": "rtl" + }, + "Thresholding_16": { + "preferred_impl_style": "hls" + }, + "FMPadding_8": { + "preferred_impl_style": "rtl" + }, + "ConvolutionInputGenerator_9": { + "preferred_impl_style": "rtl" + }, + "VVAU_8": { + "preferred_impl_style": "hls" + }, + "Thresholding_17": { + "preferred_impl_style": "hls" + }, + "MVAU_9": { + "preferred_impl_style": "rtl" + }, + "Thresholding_18": { + "preferred_impl_style": "hls" + }, + "FMPadding_9": { + "preferred_impl_style": "rtl" + }, + "ConvolutionInputGenerator_10": { + "preferred_impl_style": "rtl" + }, + "VVAU_9": { + "preferred_impl_style": "hls" + }, + "Thresholding_19": { + "preferred_impl_style": "hls" + }, + "MVAU_10": { + "preferred_impl_style": "rtl" + }, + "Thresholding_20": { + "preferred_impl_style": "hls" + }, + "FMPadding_10": { + "preferred_impl_style": "rtl" + }, + "ConvolutionInputGenerator_11": { + "preferred_impl_style": "rtl" + }, + "VVAU_10": { + "preferred_impl_style": "hls" + }, + "Thresholding_21": { + "preferred_impl_style": "hls" + }, + "MVAU_11": { + "preferred_impl_style": "rtl" + }, + "Thresholding_22": { + "preferred_impl_style": "hls" + }, + "FMPadding_11": { + "preferred_impl_style": "rtl" + }, + "ConvolutionInputGenerator_12": { + "preferred_impl_style": "rtl" + }, + "VVAU_11": { + "preferred_impl_style": "hls" + }, + "Thresholding_23": { + "preferred_impl_style": "hls" + }, + "MVAU_12": { + "preferred_impl_style": "rtl" + }, + "Thresholding_24": { + "preferred_impl_style": "hls" + }, + "FMPadding_12": { + "preferred_impl_style": "rtl" + }, + "ConvolutionInputGenerator_13": { + "preferred_impl_style": "rtl" + }, + "VVAU_12": { + "preferred_impl_style": "hls" + }, + "Thresholding_25": { + "preferred_impl_style": "hls" + }, + "MVAU_13": { + "preferred_impl_style": "rtl" + }, + "Thresholding_26": { + "preferred_impl_style": "hls" + }, + "ConvolutionInputGenerator_14": { + "preferred_impl_style": "rtl" + }, + "Pool_0": { + "preferred_impl_style": "hls" + }, + "MVAU_14": { + "preferred_impl_style": "rtl" + }, + "ChannelwiseOp_0": { + "preferred_impl_style": "hls" + }, + "LabelSelect_0": { + "preferred_impl_style": "hls" + } + } diff --git a/build/resnet50/README.md b/build/resnet50/README.md index 3eab06f..8300f27 100644 --- a/build/resnet50/README.md +++ b/build/resnet50/README.md @@ -28,6 +28,6 @@ cd $FINN_EXAMPLES/build/finn ./run-docker.sh build_custom $FINN_EXAMPLES/build/resnet50 ``` -5. The generated outputs will be under `resnet50/output__`. You can find a description of the generated files [here](https://finn-dev.readthedocs.io/en/latest/command_line.html#simple-dataflow-build-mode). +3. The generated outputs will be under `resnet50/output__`. You can find a description of the generated files [here](https://finn-dev.readthedocs.io/en/latest/command_line.html#simple-dataflow-build-mode). diff --git a/build/resnet50/build.py b/build/resnet50/build.py index 5ed6b2c..6f0dfc8 100644 --- a/build/resnet50/build.py +++ b/build/resnet50/build.py @@ -38,8 +38,7 @@ from custom_steps import ( step_resnet50_tidy, step_resnet50_streamline, - step_resnet50_convert_to_hls, - step_resnet50_set_fifo_depths, + step_resnet50_convert_to_hw, step_resnet50_slr_floorplan, ) @@ -52,14 +51,15 @@ resnet50_build_steps = [ step_resnet50_tidy, step_resnet50_streamline, - step_resnet50_convert_to_hls, + step_resnet50_convert_to_hw, "step_create_dataflow_partition", + "step_specialize_layers", "step_apply_folding_config", "step_minimize_bit_width", "step_generate_estimate_reports", - "step_hls_codegen", - "step_hls_ipgen", - step_resnet50_set_fifo_depths, + "step_hw_codegen", + "step_hw_ipgen", + "step_set_fifo_depths", step_resnet50_slr_floorplan, "step_synthesize_bitfile", "step_make_pynq_driver", @@ -101,13 +101,7 @@ def platform_to_shell(platform): platform_dir = "release/%s" % release_platform_name os.makedirs(platform_dir, exist_ok=True) - # try: - # from finnexperimental.transformation.fpgadataflow.infer_doublepacked_dsp import InferDoublePackedConv # noqa: E501 - # folding_config_file="folding_config/U250_folding_config.json" - # print("DoublePackedConv detected") - # except: - # warn(" FINN Experimental not available. Using non-packed folded down convolution. This is 16 times slower per MHz ") # noqa: E501 - folding_config_file = "folding_config/U250_folding_config_no_doublepack_pe_folded_16.json" + folding_config_file = "folding_config/U250_folding_config.json" cfg = build_cfg.DataflowBuildConfig( steps=resnet50_build_steps, @@ -115,6 +109,8 @@ def platform_to_shell(platform): synth_clk_period_ns=synth_clk_period_ns, board=board, shell_flow_type=build_cfg.ShellFlowType.VITIS_ALVEO, + split_large_fifos=True, + specialize_layers_config_file="specialize_layers_config.json", vitis_platform=vitis_platform, # throughput parameters (auto-folding) mvau_wwidth_max=24, diff --git a/build/resnet50/custom_steps.py b/build/resnet50/custom_steps.py index 6bc6008..5e59514 100644 --- a/build/resnet50/custom_steps.py +++ b/build/resnet50/custom_steps.py @@ -1,4 +1,5 @@ -# Copyright (c) 2020, Xilinx +# Copyright (C) 2020-2022, Xilinx, Inc. +# Copyright (C) 2022-2024, Advanced Micro Devices, Inc. # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -87,7 +88,7 @@ from qonnx.transformation.infer_datatypes import InferDataTypes from qonnx.transformation.infer_data_layouts import InferDataLayouts from qonnx.transformation.insert_topk import InsertTopK -import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls +import finn.transformation.fpgadataflow.convert_to_hw_layers as to_hw from qonnx.transformation.lower_convs_to_matmul import LowerConvsToMatMul from finn.builder.build_dataflow_config import ( @@ -95,23 +96,8 @@ ShellFlowType, ) -from finn.transformation.fpgadataflow.prepare_ip import PrepareIP -from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP -from finn.transformation.fpgadataflow.replace_verilog_relpaths import ( - ReplaceVerilogRelPaths, -) - from finn.transformation.move_reshape import RemoveCNVtoFCFlatten -from qonnx.util.config import extract_model_config_to_json -from finn.transformation.fpgadataflow.set_fifo_depths import ( - InsertAndSetFIFODepths, - RemoveShallowFIFOs, - SplitLargeFIFOs, -) -from finn.transformation.fpgadataflow.insert_dwc import InsertDWC -from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO - def step_resnet50_tidy(model: ModelWrapper, cfg: DataflowBuildConfig): model = model.transform(GiveUniqueParameterTensors()) @@ -188,38 +174,28 @@ def step_resnet50_streamline(model: ModelWrapper, cfg: DataflowBuildConfig): return model -def step_resnet50_convert_to_hls(model: ModelWrapper, cfg: DataflowBuildConfig): +def step_resnet50_convert_to_hw(model: ModelWrapper, cfg: DataflowBuildConfig): model.set_tensor_datatype(model.graph.input[0].name, DataType["UINT8"]) model = model.transform(InferDataLayouts()) - - # try: - # from finnexperimental.transformation.fpgadataflow.infer_doublepacked_dsp import ( - # InferDoublePackedConv, - # ) - - # model = model.transform(InferDoublePackedConv([1])) - # except Exception: - # print(" FINN Experimental not available. Using non-packed convolution ") - model = model.transform(DoubleToSingleFloat()) model = model.transform(InferDataTypes()) model = model.transform(SortGraph()) - to_hls_transformations = [ - to_hls.InferAddStreamsLayer, + to_hw_transformations = [ + to_hw.InferAddStreamsLayer, LowerConvsToMatMul, - to_hls.InferChannelwiseLinearLayer, - to_hls.InferPool_Batch, + to_hw.InferChannelwiseLinearLayer, + to_hw.InferPool, AbsorbTransposeIntoMultiThreshold, RoundAndClipThresholds, - to_hls.InferQuantizedMatrixVectorActivation, - to_hls.InferThresholdingLayer, + to_hw.InferQuantizedMatrixVectorActivation, + to_hw.InferThresholdingLayer, AbsorbConsecutiveTransposes, - to_hls.InferConvInpGen, - to_hls.InferDuplicateStreamsLayer, - to_hls.InferLabelSelectLayer, + to_hw.InferConvInpGen, + to_hw.InferDuplicateStreamsLayer, + to_hw.InferLabelSelectLayer, ] - for trn in to_hls_transformations: + for trn in to_hw_transformations: model = model.transform(trn()) model = model.transform(InferDataLayouts()) model = model.transform(GiveUniqueNodeNames()) @@ -233,64 +209,6 @@ def step_resnet50_convert_to_hls(model: ModelWrapper, cfg: DataflowBuildConfig): return model -def step_resnet50_set_fifo_depths(model: ModelWrapper, cfg: DataflowBuildConfig): - """ - Depending on the auto_fifo_depths setting, do one of the following: - * if auto_fifo_depths=True: Run the `InsertAndSetFIFODepths` transformation - to attempt to determine the FIFO sizes that provide full throughput. Involves - running stitched-IP rtlsim and may take a long time. - * if auto_fifo_depths=False: Assume the folding config file contains FIFO - sizes as well. Runs the `InsertFIFO` transformation, then - `ApplyConfig(cfg.folding_config_file)`, and finally `RemoveShallowFIFOs`. - Coherency with config file node naming is ensured by calling - `GiveUniqueNodeNames`. - """ - - if cfg.auto_fifo_depths: - model = model.transform( - InsertAndSetFIFODepths( - cfg._resolve_fpga_part(), - cfg._resolve_hls_clk_period(), - vivado_ram_style=cfg.large_fifo_mem_style.value, - ) - ) - else: - # assume folding cfg json contains FIFO sizes too - # insert DWCs, FIFOs and run ApplyConfig once more - model = model.transform(InsertDWC()) - # need to make sure all FIFOs are created so that their depth can be - # set by ApplyConfig, so create_shallow_fifos=True - model = model.transform(InsertFIFO(create_shallow_fifos=True)) - model = model.transform(GiveUniqueNodeNames()) - model = model.transform(GiveReadableTensorNames()) - if cfg.folding_config_file is not None: - model = model.transform(ApplyConfig(cfg.folding_config_file)) - # split large FIFOs into multiple FIFOs - model = model.transform(SplitLargeFIFOs()) - # remove any shallow FIFOs - model = model.transform(RemoveShallowFIFOs()) - - # extract the final configuration and save it as json - hw_attrs = [ - "PE", - "SIMD", - "ram_style", - "depth", - "impl_style", - "resType", - "mem_mode", - "runtime_writeable_weights", - ] - extract_model_config_to_json(model, cfg.output_dir + "/final_hw_config.json", hw_attrs) - - # after FIFOs are ready to go, call PrepareIP and HLSSynthIP again - # this will only run for the new nodes (e.g. FIFOs and DWCs) - model = model.transform(PrepareIP(cfg._resolve_fpga_part(), cfg._resolve_hls_clk_period())) - model = model.transform(HLSSynthIP()) - model = model.transform(ReplaceVerilogRelPaths()) - return model - - def step_resnet50_slr_floorplan(model: ModelWrapper, cfg: DataflowBuildConfig): if cfg.shell_flow_type == ShellFlowType.VITIS_ALVEO: try: diff --git a/build/resnet50/folding_config/U250_folding_config.json b/build/resnet50/folding_config/U250_folding_config.json index da4f7da..e25bfd8 100644 --- a/build/resnet50/folding_config/U250_folding_config.json +++ b/build/resnet50/folding_config/U250_folding_config.json @@ -1,616 +1,631 @@ { "Defaults": { - "outFIFODepths":[[32],"all"], - "inFIFODepths":[[32],"all"], - "mem_mode":["decoupled",["MatrixVectorActivation"]] - }, - "ConvDoublePacked_Batch_0": { + "outFIFODepths": [ + [32], + "all" + ], + "inFIFODepths": [ + [32], + "all" + ], + "mem_mode": [ + "internal_decoupled", + [ + "MVAU" + ] + ] + }, + "FMPadding_hls_0": { + "SIMD": 3 + }, + "ConvolutionInputGenerator_hls_0": { + "SIMD": 3 + }, + "MVAU_hls_0": { "SIMD": 3, - "PE": 64, - "MMV": 16 + "PE": 64 }, - "FMPadding_Batch_0": { - "SIMD": 64 + "FMPadding_hls_1": { + "SIMD": 4 }, - "ConvolutionInputGenerator_0": { - "SIMD": 64 + "ConvolutionInputGenerator_hls_1": { + "SIMD": 4 }, - "Pool_Batch_0": { - "PE": 64 + "Pool_hls_0": { + "PE": 4 }, - "DuplicateStreams_Batch_0": { - "PE": 32, + "DuplicateStreams_hls_0": { + "PE": 2, "outFIFODepths": [32, 32] }, - "MatrixVectorActivation_1": { - "PE": 32, + "MVAU_hls_2": { + "PE": 2, "SIMD": 32 }, - "MatrixVectorActivation_0": { - "PE": 8, - "SIMD": 32 + "MVAU_hls_1": { + "PE": 1, + "SIMD": 16 }, - "FMPadding_Batch_1": { - "SIMD": 64 + "FMPadding_hls_2": { + "SIMD": 4 }, - "ConvolutionInputGenerator_1": { - "SIMD": 64 + "ConvolutionInputGenerator_hls_2": { + "SIMD": 4 }, - "MatrixVectorActivation_2": { - "PE": 32, + "MVAU_hls_3": { + "PE": 2, "SIMD": 64 }, - "MatrixVectorActivation_3": { - "PE": 32, + "MVAU_hls_4": { + "PE": 2, "SIMD": 32 }, - "AddStreams_Batch_0": { - "PE": 32, + "AddStreams_hls_0": { + "PE": 2, "inFIFODepths": [32, 32] }, - "Thresholding_Batch_0": { - "PE": 32 + "Thresholding_hls_0": { + "PE": 2 }, - "DuplicateStreams_Batch_1": { - "PE": 32, + "DuplicateStreams_hls_1": { + "PE": 2, "outFIFODepths": [32, 32] }, - "Thresholding_Batch_1": { - "PE": 32 + "Thresholding_hls_1": { + "PE": 2 }, - "Thresholding_Batch_2": { - "PE": 32 + "Thresholding_hls_2": { + "PE": 2 }, - "MatrixVectorActivation_4": { - "PE": 32, + "MVAU_hls_5": { + "PE": 2, "SIMD": 32 }, - "FMPadding_Batch_2": { - "SIMD": 64 + "FMPadding_hls_3": { + "SIMD": 4 }, - "ConvolutionInputGenerator_2": { - "SIMD": 64 + "ConvolutionInputGenerator_hls_3": { + "SIMD": 4 }, - "MatrixVectorActivation_5": { - "PE": 32, + "MVAU_hls_6": { + "PE": 2, "SIMD": 64 }, - "MatrixVectorActivation_6": { - "PE": 32, + "MVAU_hls_7": { + "PE": 2, "SIMD": 32 }, - "AddStreams_Batch_1": { - "PE": 32, + "AddStreams_hls_1": { + "PE": 2, "inFIFODepths": [32, 32] }, - "Thresholding_Batch_3": { - "PE": 32 + "Thresholding_hls_3": { + "PE": 2 }, - "DuplicateStreams_Batch_2": { - "PE": 32, + "DuplicateStreams_hls_2": { + "PE": 2, "outFIFODepths": [32, 32] }, - "Thresholding_Batch_4": { - "PE": 32 + "Thresholding_hls_4": { + "PE": 2 }, - "Thresholding_Batch_5": { - "PE": 32 + "Thresholding_hls_5": { + "PE": 2 }, - "MatrixVectorActivation_7": { - "PE": 32, + "MVAU_hls_8": { + "PE": 2, "SIMD": 32 }, - "FMPadding_Batch_3": { - "SIMD": 64 + "FMPadding_hls_4": { + "SIMD": 4 }, - "ConvolutionInputGenerator_3": { - "SIMD": 64 + "ConvolutionInputGenerator_hls_4": { + "SIMD": 4 }, - "MatrixVectorActivation_8": { - "PE": 32, + "MVAU_hls_9": { + "PE": 2, "SIMD": 64 }, - "MatrixVectorActivation_9": { - "PE": 32, + "MVAU_hls_10": { + "PE": 2, "SIMD": 64 }, - "AddStreams_Batch_2": { - "PE":32, + "AddStreams_hls_2": { + "PE": 2, "inFIFODepths": [32, 32] }, - "Thresholding_Batch_6": { - "PE": 32 + "Thresholding_hls_6": { + "PE": 2 }, - "Thresholding_Batch_7": { - "PE": 32 + "Thresholding_hls_7": { + "PE": 2 }, - "DuplicateStreams_Batch_3": { - "PE": 32, + "DuplicateStreams_hls_3": { + "PE": 2, "outFIFODepths": [32, 32] }, - "DownSampler_0": { - "SIMD": 64 + "DownSampler_hls_0": { + "SIMD": 4 }, - "MatrixVectorActivation_10": { - "PE": 32, + "MVAU_hls_11": { + "PE": 2, "SIMD": 64 }, - "MatrixVectorActivation_11": { - "PE": 32, + "MVAU_hls_12": { + "PE": 2, "SIMD": 64 }, - "FMPadding_Batch_4": { - "SIMD": 64 + "FMPadding_hls_5": { + "SIMD": 4 }, - "ConvolutionInputGenerator_4": { - "SIMD": 64 + "ConvolutionInputGenerator_hls_5": { + "SIMD": 4 }, - "MatrixVectorActivation_12": { - "PE": 32, + "MVAU_hls_13": { + "PE": 2, "SIMD": 64 }, - "MatrixVectorActivation_13": { - "PE": 32, + "MVAU_hls_14": { + "PE": 2, "SIMD": 32 }, - "AddStreams_Batch_3": { - "PE":32, + "AddStreams_hls_3": { + "PE": 2, "inFIFODepths": [32, 32] }, - "Thresholding_Batch_8": { - "PE": 32 + "Thresholding_hls_8": { + "PE": 2 }, - "DuplicateStreams_Batch_4": { - "PE": 32, + "DuplicateStreams_hls_4": { + "PE": 2, "outFIFODepths": [32, 32] }, - "Thresholding_Batch_9": { - "PE": 32 + "Thresholding_hls_9": { + "PE": 2 }, - "Thresholding_Batch_10": { - "PE": 32 + "Thresholding_hls_10": { + "PE": 2 }, - "MatrixVectorActivation_14": { - "PE": 32, + "MVAU_hls_15": { + "PE": 2, "SIMD": 32 }, - "FMPadding_Batch_5": { - "SIMD": 64 + "FMPadding_hls_6": { + "SIMD": 4 }, - "ConvolutionInputGenerator_5": { - "SIMD": 64 + "ConvolutionInputGenerator_hls_6": { + "SIMD": 4 }, - "MatrixVectorActivation_15": { - "PE": 32, + "MVAU_hls_16": { + "PE": 2, "SIMD": 64 }, - "MatrixVectorActivation_16": { - "PE": 32, + "MVAU_hls_17": { + "PE": 2, "SIMD": 32 }, - "AddStreams_Batch_4": { - "PE":32, + "AddStreams_hls_4": { + "PE": 2, "inFIFODepths": [32, 32] }, - "Thresholding_Batch_11": { - "PE": 32 + "Thresholding_hls_11": { + "PE": 2 }, - "DuplicateStreams_Batch_5": { - "PE": 32, + "DuplicateStreams_hls_5": { + "PE": 2, "outFIFODepths": [32, 32] }, - "Thresholding_Batch_12": { - "PE": 32 + "Thresholding_hls_12": { + "PE": 2 }, - "Thresholding_Batch_13": { - "PE": 32 + "Thresholding_hls_13": { + "PE": 2 }, - "MatrixVectorActivation_17": { - "PE": 32, + "MVAU_hls_18": { + "PE": 2, "SIMD": 32 }, - "FMPadding_Batch_6": { - "SIMD": 64 + "FMPadding_hls_7": { + "SIMD": 4 }, - "ConvolutionInputGenerator_6": { - "SIMD": 64 + "ConvolutionInputGenerator_hls_7": { + "SIMD": 4 }, - "MatrixVectorActivation_18": { - "PE": 32, + "MVAU_hls_19": { + "PE": 2, "SIMD": 64 }, - "MatrixVectorActivation_19": { - "PE": 32, + "MVAU_hls_20": { + "PE": 2, "SIMD": 32 }, - "AddStreams_Batch_5": { - "PE":32, + "AddStreams_hls_5": { + "PE": 2, "inFIFODepths": [32, 32] }, - "Thresholding_Batch_14": { - "PE": 32 + "Thresholding_hls_14": { + "PE": 2 }, - "DuplicateStreams_Batch_6": { - "PE": 32, + "DuplicateStreams_hls_6": { + "PE": 2, "outFIFODepths": [32, 32] }, - "Thresholding_Batch_15": { - "PE": 32 + "Thresholding_hls_15": { + "PE": 2 }, - "Thresholding_Batch_16": { - "PE": 32 + "Thresholding_hls_16": { + "PE": 2 }, - "MatrixVectorActivation_20": { - "PE": 32, + "MVAU_hls_21": { + "PE": 2, "SIMD": 32 }, - "FMPadding_Batch_7": { - "SIMD": 64 + "FMPadding_hls_8": { + "SIMD": 4 }, - "ConvolutionInputGenerator_7": { - "SIMD": 64 + "ConvolutionInputGenerator_hls_8": { + "SIMD": 4 }, - "MatrixVectorActivation_21": { - "PE": 32, + "MVAU_hls_22": { + "PE": 2, "SIMD": 64 }, - "MatrixVectorActivation_22": { - "PE": 32, + "MVAU_hls_23": { + "PE": 2, "SIMD": 32 }, - "AddStreams_Batch_6": { - "PE":32, + "AddStreams_hls_6": { + "PE": 2, "inFIFODepths": [32, 32] }, - "Thresholding_Batch_17": { - "PE": 32 + "Thresholding_hls_17": { + "PE": 2 }, - "Thresholding_Batch_18": { - "PE": 32 + "Thresholding_hls_18": { + "PE": 2 }, - "DuplicateStreams_Batch_7": { - "PE": 32, + "DuplicateStreams_hls_7": { + "PE": 2, "outFIFODepths": [32, 32] }, - "DownSampler_1": { - "SIMD": 64 + "DownSampler_hls_1": { + "SIMD": 4 }, - "MatrixVectorActivation_23": { - "PE": 32, + "MVAU_hls_24": { + "PE": 2, "SIMD": 64 }, - "MatrixVectorActivation_24": { - "PE": 32, + "MVAU_hls_25": { + "PE": 2, "SIMD": 64 }, - "FMPadding_Batch_8": { - "SIMD": 64 + "FMPadding_hls_9": { + "SIMD": 4 }, - "ConvolutionInputGenerator_8": { - "SIMD": 64 + "ConvolutionInputGenerator_hls_9": { + "SIMD": 4 }, - "MatrixVectorActivation_25": { - "PE": 32, + "MVAU_hls_26": { + "PE": 2, "SIMD": 64 }, - "MatrixVectorActivation_26": { - "PE": 32, + "MVAU_hls_27": { + "PE": 2, "SIMD": 32 }, - "AddStreams_Batch_7": { - "PE":32, + "AddStreams_hls_7": { + "PE": 2, "inFIFODepths": [32, 32] }, - "Thresholding_Batch_19": { - "PE": 32 + "Thresholding_hls_19": { + "PE": 2 }, - "DuplicateStreams_Batch_8": { - "PE": 32, + "DuplicateStreams_hls_8": { + "PE": 2, "outFIFODepths": [32, 32] }, - "Thresholding_Batch_20": { - "PE": 32 + "Thresholding_hls_20": { + "PE": 2 }, - "Thresholding_Batch_21": { - "PE": 32 + "Thresholding_hls_21": { + "PE": 2 }, - "MatrixVectorActivation_27": { - "PE": 32, + "MVAU_hls_28": { + "PE": 2, "SIMD": 32 }, - "FMPadding_Batch_9": { - "SIMD": 64 + "FMPadding_hls_10": { + "SIMD": 4 }, - "ConvolutionInputGenerator_9": { - "SIMD": 64 + "ConvolutionInputGenerator_hls_10": { + "SIMD": 4 }, - "MatrixVectorActivation_28": { - "PE": 32, + "MVAU_hls_29": { + "PE": 2, "SIMD": 64 }, - "MatrixVectorActivation_29": { - "PE": 32, + "MVAU_hls_30": { + "PE": 2, "SIMD": 32 }, - "AddStreams_Batch_8": { - "PE":32, + "AddStreams_hls_8": { + "PE": 2, "inFIFODepths": [32, 32] }, - "Thresholding_Batch_22": { - "PE": 32 + "Thresholding_hls_22": { + "PE": 2 }, - "DuplicateStreams_Batch_9": { - "PE": 32, + "DuplicateStreams_hls_9": { + "PE": 2, "outFIFODepths": [32, 32] }, - "Thresholding_Batch_23": { - "PE": 32 + "Thresholding_hls_23": { + "PE": 2 }, - "Thresholding_Batch_24": { - "PE": 32 + "Thresholding_hls_24": { + "PE": 2 }, - "MatrixVectorActivation_30": { - "PE": 32, + "MVAU_hls_31": { + "PE": 2, "SIMD": 32 }, - "FMPadding_Batch_10": { - "SIMD": 64 + "FMPadding_hls_11": { + "SIMD": 4 }, - "ConvolutionInputGenerator_10": { - "SIMD": 64 + "ConvolutionInputGenerator_hls_11": { + "SIMD": 4 }, - "MatrixVectorActivation_31": { - "PE": 32, + "MVAU_hls_32": { + "PE": 2, "SIMD": 64 }, - "MatrixVectorActivation_32": { - "PE": 32, + "MVAU_hls_33": { + "PE": 2, "SIMD": 32 }, - "AddStreams_Batch_9": { - "PE":32, + "AddStreams_hls_9": { + "PE": 2, "inFIFODepths": [32, 32] }, - "Thresholding_Batch_25": { - "PE": 32 + "Thresholding_hls_25": { + "PE": 2 }, - "DuplicateStreams_Batch_10": { - "PE": 32, + "DuplicateStreams_hls_10": { + "PE": 2, "outFIFODepths": [32, 32] }, - "Thresholding_Batch_26": { - "PE": 32 + "Thresholding_hls_26": { + "PE": 2 }, - "Thresholding_Batch_27": { - "PE": 32 + "Thresholding_hls_27": { + "PE": 2 }, - "MatrixVectorActivation_33": { - "PE": 32, + "MVAU_hls_34": { + "PE": 2, "SIMD": 32 }, - "FMPadding_Batch_11": { - "SIMD": 64 + "FMPadding_hls_12": { + "SIMD": 4 }, - "ConvolutionInputGenerator_11": { - "SIMD": 64 + "ConvolutionInputGenerator_hls_12": { + "SIMD": 4 }, - "MatrixVectorActivation_34": { - "PE": 32, + "MVAU_hls_35": { + "PE": 2, "SIMD": 64 }, - "MatrixVectorActivation_35": { - "PE": 32, + "MVAU_hls_36": { + "PE": 2, "SIMD": 32 }, - "AddStreams_Batch_10": { - "PE":32, + "AddStreams_hls_10": { + "PE": 2, "inFIFODepths": [32, 32] }, - "Thresholding_Batch_28": { - "PE": 32 + "Thresholding_hls_28": { + "PE": 2 }, - "DuplicateStreams_Batch_11": { - "PE": 32, + "DuplicateStreams_hls_11": { + "PE": 2, "outFIFODepths": [32, 32] }, - "Thresholding_Batch_29": { - "PE": 32 + "Thresholding_hls_29": { + "PE": 2 }, - "Thresholding_Batch_30": { - "PE": 32 + "Thresholding_hls_30": { + "PE": 2 }, - "MatrixVectorActivation_36": { - "PE": 32, + "MVAU_hls_37": { + "PE": 2, "SIMD": 32 }, - "FMPadding_Batch_12": { - "SIMD": 64 + "FMPadding_hls_13": { + "SIMD": 4 }, - "ConvolutionInputGenerator_12": { - "SIMD": 64 + "ConvolutionInputGenerator_hls_13": { + "SIMD": 4 }, - "MatrixVectorActivation_37": { - "PE": 32, + "MVAU_hls_38": { + "PE": 2, "SIMD": 64 }, - "MatrixVectorActivation_38": { - "PE": 32, + "MVAU_hls_39": { + "PE": 2, "SIMD": 32 }, - "AddStreams_Batch_11": { - "PE":32, + "AddStreams_hls_11": { + "PE": 2, "inFIFODepths": [32, 32] }, - "Thresholding_Batch_31": { - "PE": 32 + "Thresholding_hls_31": { + "PE": 2 }, - "DuplicateStreams_Batch_12": { - "PE": 32, + "DuplicateStreams_hls_12": { + "PE": 2, "outFIFODepths": [32, 32] }, - "Thresholding_Batch_32": { - "PE": 32 + "Thresholding_hls_32": { + "PE": 2 }, - "Thresholding_Batch_33": { - "PE": 32 + "Thresholding_hls_33": { + "PE": 2 }, - "MatrixVectorActivation_39": { - "PE": 32, + "MVAU_hls_40": { + "PE": 2, "SIMD": 32 }, - "FMPadding_Batch_13": { - "SIMD": 64 + "FMPadding_hls_14": { + "SIMD": 4 }, - "ConvolutionInputGenerator_13": { - "SIMD": 64 + "ConvolutionInputGenerator_hls_14": { + "SIMD": 4 }, - "MatrixVectorActivation_40": { - "PE": 32, + "MVAU_hls_41": { + "PE": 2, "SIMD": 64 }, - "MatrixVectorActivation_41": { - "PE": 32, + "MVAU_hls_42": { + "PE": 2, "SIMD": 32 }, - "AddStreams_Batch_12": { - "PE":32, + "AddStreams_hls_12": { + "PE": 2, "inFIFODepths": [32, 32] }, - "Thresholding_Batch_34": { - "PE": 32 + "Thresholding_hls_34": { + "PE": 2 }, - "Thresholding_Batch_35": { - "PE": 32 + "Thresholding_hls_35": { + "PE": 2 }, - "DuplicateStreams_Batch_13": { - "PE": 32, + "DuplicateStreams_hls_13": { + "PE": 2, "outFIFODepths": [32, 32] }, - "DownSampler_2": { - "SIMD": 64 + "DownSampler_hls_2": { + "SIMD": 4 }, - "MatrixVectorActivation_42": { - "PE": 32, + "MVAU_hls_43": { + "PE": 2, "SIMD": 64 }, - "MatrixVectorActivation_43": { - "PE": 32, + "MVAU_hls_44": { + "PE": 2, "SIMD": 64 }, - "FMPadding_Batch_14": { - "SIMD": 64 + "FMPadding_hls_15": { + "SIMD": 4 }, - "ConvolutionInputGenerator_14": { - "SIMD": 64 + "ConvolutionInputGenerator_hls_15": { + "SIMD": 4 }, - "MatrixVectorActivation_44": { - "PE": 32, + "MVAU_hls_45": { + "PE": 2, "SIMD": 64 }, - "MatrixVectorActivation_45": { - "PE": 32, + "MVAU_hls_46": { + "PE": 2, "SIMD": 32 }, - "AddStreams_Batch_13": { - "PE":32, + "AddStreams_hls_13": { + "PE": 2, "inFIFODepths": [32, 32] }, - "Thresholding_Batch_36": { - "PE": 32 + "Thresholding_hls_36": { + "PE": 2 }, - "DuplicateStreams_Batch_14": { - "PE": 32, + "DuplicateStreams_hls_14": { + "PE": 2, "outFIFODepths": [32, 32] }, - "Thresholding_Batch_37": { - "PE": 32 + "Thresholding_hls_37": { + "PE": 2 }, - "Thresholding_Batch_38": { - "PE": 32 + "Thresholding_hls_38": { + "PE": 2 }, - "MatrixVectorActivation_46": { - "PE": 32, + "MVAU_hls_47": { + "PE": 2, "SIMD": 32 }, - "FMPadding_Batch_15": { - "SIMD": 64 + "FMPadding_hls_16": { + "SIMD": 4 }, - "ConvolutionInputGenerator_15": { - "SIMD": 64 + "ConvolutionInputGenerator_hls_16": { + "SIMD": 4 }, - "MatrixVectorActivation_47": { - "PE": 32, + "MVAU_hls_48": { + "PE": 2, "SIMD": 64 }, - "MatrixVectorActivation_48": { - "PE": 32, + "MVAU_hls_49": { + "PE": 2, "SIMD": 32 }, - "AddStreams_Batch_14": { - "PE":32, + "AddStreams_hls_14": { + "PE": 2, "inFIFODepths": [32, 32] }, - "Thresholding_Batch_39": { - "PE": 32 + "Thresholding_hls_39": { + "PE": 2 }, - "DuplicateStreams_Batch_15": { - "PE": 32, + "DuplicateStreams_hls_15": { + "PE": 2, "outFIFODepths": [32, 32] }, - "Thresholding_Batch_40": { - "PE": 32 + "Thresholding_hls_40": { + "PE": 2 }, - "Thresholding_Batch_41": { - "PE": 32 + "Thresholding_hls_41": { + "PE": 2 }, - "MatrixVectorActivation_49": { - "PE": 32, + "MVAU_hls_50": { + "PE": 2, "SIMD": 32 }, - "FMPadding_Batch_16": { - "SIMD": 64 + "FMPadding_hls_17": { + "SIMD": 4 }, - "ConvolutionInputGenerator_16": { - "SIMD": 64 + "ConvolutionInputGenerator_hls_17": { + "SIMD": 4 }, - "MatrixVectorActivation_50": { - "PE": 32, + "MVAU_hls_51": { + "PE": 2, "SIMD": 64 }, - "MatrixVectorActivation_51": { - "PE": 32, + "MVAU_hls_52": { + "PE": 2, "SIMD": 32 }, - "AddStreams_Batch_15": { - "PE":32, + "AddStreams_hls_15": { + "PE": 2, "inFIFODepths": [32, 32] }, - "Thresholding_Batch_42": { - "PE": 32 + "Thresholding_hls_42": { + "PE": 2 }, - - "ConvolutionInputGenerator_17": { - "SIMD": 64 + "ConvolutionInputGenerator_hls_18": { + "SIMD": 4 }, - "Pool_Batch_1": { - "PE": 64 + "Pool_hls_1": { + "PE": 4 }, - "MatrixVectorActivation_52": { + "MVAU_hls_53": { "PE": 1, - "SIMD": 64, - "mem_mode" : "external" + "SIMD": 4, + "mem_mode": "external" }, - "LabelSelect_Batch_0": { - "outputDataType":"UINT16", + "LabelSelect_hls_0": { + "outputDataType": "UINT16", "PE": 1 }, - "ChannelwiseOp_Batch_0": { - "PE": 32 + "ChannelwiseOp_hls_0": { + "PE": 2 }, - "ChannelwiseOp_Batch_1": { - "PE": 32 + "ChannelwiseOp_hls_1": { + "PE": 2 }, - "ChannelwiseOp_Batch_2": { + "ChannelwiseOp_hls_2": { "PE": 1 } } diff --git a/build/resnet50/folding_config/U250_folding_config_no_doublepack_pe_folded_16.json b/build/resnet50/folding_config/U250_folding_config_no_doublepack_pe_folded_16.json deleted file mode 100644 index 09aa2dc..0000000 --- a/build/resnet50/folding_config/U250_folding_config_no_doublepack_pe_folded_16.json +++ /dev/null @@ -1,631 +0,0 @@ -{ - "Defaults": { - "outFIFODepths": [ - [32], - "all" - ], - "inFIFODepths": [ - [32], - "all" - ], - "mem_mode": [ - "decoupled", - [ - "MatrixVectorActivation" - ] - ] - }, - "FMPadding_Batch_0": { - "SIMD": 3 - }, - "ConvolutionInputGenerator_0": { - "SIMD": 3 - }, - "MatrixVectorActivation_0": { - "SIMD": 3, - "PE": 64 - }, - "FMPadding_Batch_1": { - "SIMD": 4 - }, - "ConvolutionInputGenerator_1": { - "SIMD": 4 - }, - "Pool_Batch_0": { - "PE": 4 - }, - "DuplicateStreams_Batch_0": { - "PE": 2, - "outFIFODepths": [32, 32] - }, - "MatrixVectorActivation_2": { - "PE": 2, - "SIMD": 32 - }, - "MatrixVectorActivation_1": { - "PE": 1, - "SIMD": 16 - }, - "FMPadding_Batch_2": { - "SIMD": 4 - }, - "ConvolutionInputGenerator_2": { - "SIMD": 4 - }, - "MatrixVectorActivation_3": { - "PE": 2, - "SIMD": 64 - }, - "MatrixVectorActivation_4": { - "PE": 2, - "SIMD": 32 - }, - "AddStreams_Batch_0": { - "PE": 2, - "inFIFODepths": [32, 32] - }, - "Thresholding_Batch_0": { - "PE": 2 - }, - "DuplicateStreams_Batch_1": { - "PE": 2, - "outFIFODepths": [32, 32] - }, - "Thresholding_Batch_1": { - "PE": 2 - }, - "Thresholding_Batch_2": { - "PE": 2 - }, - "MatrixVectorActivation_5": { - "PE": 2, - "SIMD": 32 - }, - "FMPadding_Batch_3": { - "SIMD": 4 - }, - "ConvolutionInputGenerator_3": { - "SIMD": 4 - }, - "MatrixVectorActivation_6": { - "PE": 2, - "SIMD": 64 - }, - "MatrixVectorActivation_7": { - "PE": 2, - "SIMD": 32 - }, - "AddStreams_Batch_1": { - "PE": 2, - "inFIFODepths": [32, 32] - }, - "Thresholding_Batch_3": { - "PE": 2 - }, - "DuplicateStreams_Batch_2": { - "PE": 2, - "outFIFODepths": [32, 32] - }, - "Thresholding_Batch_4": { - "PE": 2 - }, - "Thresholding_Batch_5": { - "PE": 2 - }, - "MatrixVectorActivation_8": { - "PE": 2, - "SIMD": 32 - }, - "FMPadding_Batch_4": { - "SIMD": 4 - }, - "ConvolutionInputGenerator_4": { - "SIMD": 4 - }, - "MatrixVectorActivation_9": { - "PE": 2, - "SIMD": 64 - }, - "MatrixVectorActivation_10": { - "PE": 2, - "SIMD": 64 - }, - "AddStreams_Batch_2": { - "PE": 2, - "inFIFODepths": [32, 32] - }, - "Thresholding_Batch_6": { - "PE": 2 - }, - "Thresholding_Batch_7": { - "PE": 2 - }, - "DuplicateStreams_Batch_3": { - "PE": 2, - "outFIFODepths": [32, 32] - }, - "DownSampler_0": { - "SIMD": 4 - }, - "MatrixVectorActivation_11": { - "PE": 2, - "SIMD": 64 - }, - "MatrixVectorActivation_12": { - "PE": 2, - "SIMD": 64 - }, - "FMPadding_Batch_5": { - "SIMD": 4 - }, - "ConvolutionInputGenerator_5": { - "SIMD": 4 - }, - "MatrixVectorActivation_13": { - "PE": 2, - "SIMD": 64 - }, - "MatrixVectorActivation_14": { - "PE": 2, - "SIMD": 32 - }, - "AddStreams_Batch_3": { - "PE": 2, - "inFIFODepths": [32, 32] - }, - "Thresholding_Batch_8": { - "PE": 2 - }, - "DuplicateStreams_Batch_4": { - "PE": 2, - "outFIFODepths": [32, 32] - }, - "Thresholding_Batch_9": { - "PE": 2 - }, - "Thresholding_Batch_10": { - "PE": 2 - }, - "MatrixVectorActivation_15": { - "PE": 2, - "SIMD": 32 - }, - "FMPadding_Batch_6": { - "SIMD": 4 - }, - "ConvolutionInputGenerator_6": { - "SIMD": 4 - }, - "MatrixVectorActivation_16": { - "PE": 2, - "SIMD": 64 - }, - "MatrixVectorActivation_17": { - "PE": 2, - "SIMD": 32 - }, - "AddStreams_Batch_4": { - "PE": 2, - "inFIFODepths": [32, 32] - }, - "Thresholding_Batch_11": { - "PE": 2 - }, - "DuplicateStreams_Batch_5": { - "PE": 2, - "outFIFODepths": [32, 32] - }, - "Thresholding_Batch_12": { - "PE": 2 - }, - "Thresholding_Batch_13": { - "PE": 2 - }, - "MatrixVectorActivation_18": { - "PE": 2, - "SIMD": 32 - }, - "FMPadding_Batch_7": { - "SIMD": 4 - }, - "ConvolutionInputGenerator_7": { - "SIMD": 4 - }, - "MatrixVectorActivation_19": { - "PE": 2, - "SIMD": 64 - }, - "MatrixVectorActivation_20": { - "PE": 2, - "SIMD": 32 - }, - "AddStreams_Batch_5": { - "PE": 2, - "inFIFODepths": [32, 32] - }, - "Thresholding_Batch_14": { - "PE": 2 - }, - "DuplicateStreams_Batch_6": { - "PE": 2, - "outFIFODepths": [32, 32] - }, - "Thresholding_Batch_15": { - "PE": 2 - }, - "Thresholding_Batch_16": { - "PE": 2 - }, - "MatrixVectorActivation_21": { - "PE": 2, - "SIMD": 32 - }, - "FMPadding_Batch_8": { - "SIMD": 4 - }, - "ConvolutionInputGenerator_8": { - "SIMD": 4 - }, - "MatrixVectorActivation_22": { - "PE": 2, - "SIMD": 64 - }, - "MatrixVectorActivation_23": { - "PE": 2, - "SIMD": 32 - }, - "AddStreams_Batch_6": { - "PE": 2, - "inFIFODepths": [32, 32] - }, - "Thresholding_Batch_17": { - "PE": 2 - }, - "Thresholding_Batch_18": { - "PE": 2 - }, - "DuplicateStreams_Batch_7": { - "PE": 2, - "outFIFODepths": [32, 32] - }, - "DownSampler_1": { - "SIMD": 4 - }, - "MatrixVectorActivation_24": { - "PE": 2, - "SIMD": 64 - }, - "MatrixVectorActivation_25": { - "PE": 2, - "SIMD": 64 - }, - "FMPadding_Batch_9": { - "SIMD": 4 - }, - "ConvolutionInputGenerator_9": { - "SIMD": 4 - }, - "MatrixVectorActivation_26": { - "PE": 2, - "SIMD": 64 - }, - "MatrixVectorActivation_27": { - "PE": 2, - "SIMD": 32 - }, - "AddStreams_Batch_7": { - "PE": 2, - "inFIFODepths": [32, 32] - }, - "Thresholding_Batch_19": { - "PE": 2 - }, - "DuplicateStreams_Batch_8": { - "PE": 2, - "outFIFODepths": [32, 32] - }, - "Thresholding_Batch_20": { - "PE": 2 - }, - "Thresholding_Batch_21": { - "PE": 2 - }, - "MatrixVectorActivation_28": { - "PE": 2, - "SIMD": 32 - }, - "FMPadding_Batch_10": { - "SIMD": 4 - }, - "ConvolutionInputGenerator_10": { - "SIMD": 4 - }, - "MatrixVectorActivation_29": { - "PE": 2, - "SIMD": 64 - }, - "MatrixVectorActivation_30": { - "PE": 2, - "SIMD": 32 - }, - "AddStreams_Batch_8": { - "PE": 2, - "inFIFODepths": [32, 32] - }, - "Thresholding_Batch_22": { - "PE": 2 - }, - "DuplicateStreams_Batch_9": { - "PE": 2, - "outFIFODepths": [32, 32] - }, - "Thresholding_Batch_23": { - "PE": 2 - }, - "Thresholding_Batch_24": { - "PE": 2 - }, - "MatrixVectorActivation_31": { - "PE": 2, - "SIMD": 32 - }, - "FMPadding_Batch_11": { - "SIMD": 4 - }, - "ConvolutionInputGenerator_11": { - "SIMD": 4 - }, - "MatrixVectorActivation_32": { - "PE": 2, - "SIMD": 64 - }, - "MatrixVectorActivation_33": { - "PE": 2, - "SIMD": 32 - }, - "AddStreams_Batch_9": { - "PE": 2, - "inFIFODepths": [32, 32] - }, - "Thresholding_Batch_25": { - "PE": 2 - }, - "DuplicateStreams_Batch_10": { - "PE": 2, - "outFIFODepths": [32, 32] - }, - "Thresholding_Batch_26": { - "PE": 2 - }, - "Thresholding_Batch_27": { - "PE": 2 - }, - "MatrixVectorActivation_34": { - "PE": 2, - "SIMD": 32 - }, - "FMPadding_Batch_12": { - "SIMD": 4 - }, - "ConvolutionInputGenerator_12": { - "SIMD": 4 - }, - "MatrixVectorActivation_35": { - "PE": 2, - "SIMD": 64 - }, - "MatrixVectorActivation_36": { - "PE": 2, - "SIMD": 32 - }, - "AddStreams_Batch_10": { - "PE": 2, - "inFIFODepths": [32, 32] - }, - "Thresholding_Batch_28": { - "PE": 2 - }, - "DuplicateStreams_Batch_11": { - "PE": 2, - "outFIFODepths": [32, 32] - }, - "Thresholding_Batch_29": { - "PE": 2 - }, - "Thresholding_Batch_30": { - "PE": 2 - }, - "MatrixVectorActivation_37": { - "PE": 2, - "SIMD": 32 - }, - "FMPadding_Batch_13": { - "SIMD": 4 - }, - "ConvolutionInputGenerator_13": { - "SIMD": 4 - }, - "MatrixVectorActivation_38": { - "PE": 2, - "SIMD": 64 - }, - "MatrixVectorActivation_39": { - "PE": 2, - "SIMD": 32 - }, - "AddStreams_Batch_11": { - "PE": 2, - "inFIFODepths": [32, 32] - }, - "Thresholding_Batch_31": { - "PE": 2 - }, - "DuplicateStreams_Batch_12": { - "PE": 2, - "outFIFODepths": [32, 32] - }, - "Thresholding_Batch_32": { - "PE": 2 - }, - "Thresholding_Batch_33": { - "PE": 2 - }, - "MatrixVectorActivation_40": { - "PE": 2, - "SIMD": 32 - }, - "FMPadding_Batch_14": { - "SIMD": 4 - }, - "ConvolutionInputGenerator_14": { - "SIMD": 4 - }, - "MatrixVectorActivation_41": { - "PE": 2, - "SIMD": 64 - }, - "MatrixVectorActivation_42": { - "PE": 2, - "SIMD": 32 - }, - "AddStreams_Batch_12": { - "PE": 2, - "inFIFODepths": [32, 32] - }, - "Thresholding_Batch_34": { - "PE": 2 - }, - "Thresholding_Batch_35": { - "PE": 2 - }, - "DuplicateStreams_Batch_13": { - "PE": 2, - "outFIFODepths": [32, 32] - }, - "DownSampler_2": { - "SIMD": 4 - }, - "MatrixVectorActivation_43": { - "PE": 2, - "SIMD": 64 - }, - "MatrixVectorActivation_44": { - "PE": 2, - "SIMD": 64 - }, - "FMPadding_Batch_15": { - "SIMD": 4 - }, - "ConvolutionInputGenerator_15": { - "SIMD": 4 - }, - "MatrixVectorActivation_45": { - "PE": 2, - "SIMD": 64 - }, - "MatrixVectorActivation_46": { - "PE": 2, - "SIMD": 32 - }, - "AddStreams_Batch_13": { - "PE": 2, - "inFIFODepths": [32, 32] - }, - "Thresholding_Batch_36": { - "PE": 2 - }, - "DuplicateStreams_Batch_14": { - "PE": 2, - "outFIFODepths": [32, 32] - }, - "Thresholding_Batch_37": { - "PE": 2 - }, - "Thresholding_Batch_38": { - "PE": 2 - }, - "MatrixVectorActivation_47": { - "PE": 2, - "SIMD": 32 - }, - "FMPadding_Batch_16": { - "SIMD": 4 - }, - "ConvolutionInputGenerator_16": { - "SIMD": 4 - }, - "MatrixVectorActivation_48": { - "PE": 2, - "SIMD": 64 - }, - "MatrixVectorActivation_49": { - "PE": 2, - "SIMD": 32 - }, - "AddStreams_Batch_14": { - "PE": 2, - "inFIFODepths": [32, 32] - }, - "Thresholding_Batch_39": { - "PE": 2 - }, - "DuplicateStreams_Batch_15": { - "PE": 2, - "outFIFODepths": [32, 32] - }, - "Thresholding_Batch_40": { - "PE": 2 - }, - "Thresholding_Batch_41": { - "PE": 2 - }, - "MatrixVectorActivation_50": { - "PE": 2, - "SIMD": 32 - }, - "FMPadding_Batch_17": { - "SIMD": 4 - }, - "ConvolutionInputGenerator_17": { - "SIMD": 4 - }, - "MatrixVectorActivation_51": { - "PE": 2, - "SIMD": 64 - }, - "MatrixVectorActivation_52": { - "PE": 2, - "SIMD": 32 - }, - "AddStreams_Batch_15": { - "PE": 2, - "inFIFODepths": [32, 32] - }, - "Thresholding_Batch_42": { - "PE": 2 - }, - "ConvolutionInputGenerator_18": { - "SIMD": 4 - }, - "Pool_Batch_1": { - "PE": 4 - }, - "MatrixVectorActivation_53": { - "PE": 1, - "SIMD": 4, - "mem_mode": "external" - }, - "LabelSelect_Batch_0": { - "outputDataType": "UINT16", - "PE": 1 - }, - "ChannelwiseOp_Batch_0": { - "PE": 2 - }, - "ChannelwiseOp_Batch_1": { - "PE": 2 - }, - "ChannelwiseOp_Batch_2": { - "PE": 1 - } -} diff --git a/build/resnet50/specialize_layers_config.json b/build/resnet50/specialize_layers_config.json new file mode 100644 index 0000000..2fb4afe --- /dev/null +++ b/build/resnet50/specialize_layers_config.json @@ -0,0 +1,528 @@ +{ + "Defaults": {}, + "FMPadding_0": { + "preferred_impl_style": "hls" + }, + "ConvolutionInputGenerator_0": { + "preferred_impl_style": "hls" + }, + "MVAU_0": { + "preferred_impl_style": "hls" + }, + "FMPadding_1": { + "preferred_impl_style": "hls" + }, + "ConvolutionInputGenerator_1": { + "preferred_impl_style": "hls" + }, + "Pool_0": { + "preferred_impl_style": "hls" + }, + "DuplicateStreams_0": { + "preferred_impl_style": "hls" + }, + "MVAU_1": { + "preferred_impl_style": "hls" + }, + "MVAU_2": { + "preferred_impl_style": "hls" + }, + "FMPadding_2": { + "preferred_impl_style": "hls" + }, + "ConvolutionInputGenerator_2": { + "preferred_impl_style": "hls" + }, + "MVAU_3": { + "preferred_impl_style": "hls" + }, + "MVAU_4": { + "preferred_impl_style": "hls" + }, + "AddStreams_0": { + "preferred_impl_style": "hls" + }, + "Thresholding_0": { + "preferred_impl_style": "hls" + }, + "DuplicateStreams_1": { + "preferred_impl_style": "hls" + }, + "Thresholding_1": { + "preferred_impl_style": "hls" + }, + "Thresholding_2": { + "preferred_impl_style": "hls" + }, + "MVAU_5": { + "preferred_impl_style": "hls" + }, + "FMPadding_3": { + "preferred_impl_style": "hls" + }, + "ConvolutionInputGenerator_3": { + "preferred_impl_style": "hls" + }, + "MVAU_6": { + "preferred_impl_style": "hls" + }, + "MVAU_7": { + "preferred_impl_style": "hls" + }, + "AddStreams_1": { + "preferred_impl_style": "hls" + }, + "Thresholding_3": { + "preferred_impl_style": "hls" + }, + "DuplicateStreams_2": { + "preferred_impl_style": "hls" + }, + "Thresholding_4": { + "preferred_impl_style": "hls" + }, + "Thresholding_5": { + "preferred_impl_style": "hls" + }, + "MVAU_8": { + "preferred_impl_style": "hls" + }, + "FMPadding_4": { + "preferred_impl_style": "hls" + }, + "ConvolutionInputGenerator_4": { + "preferred_impl_style": "hls" + }, + "MVAU_9": { + "preferred_impl_style": "hls" + }, + "MVAU_10": { + "preferred_impl_style": "hls" + }, + "AddStreams_2": { + "preferred_impl_style": "hls" + }, + "Thresholding_6": { + "preferred_impl_style": "hls" + }, + "Thresholding_7": { + "preferred_impl_style": "hls" + }, + "DuplicateStreams_3": { + "preferred_impl_style": "hls" + }, + "MVAU_11": { + "preferred_impl_style": "hls" + }, + "DownSampler_0": { + "preferred_impl_style": "hls" + }, + "MVAU_12": { + "preferred_impl_style": "hls" + }, + "FMPadding_5": { + "preferred_impl_style": "hls" + }, + "ConvolutionInputGenerator_5": { + "preferred_impl_style": "hls" + }, + "MVAU_13": { + "preferred_impl_style": "hls" + }, + "MVAU_14": { + "preferred_impl_style": "hls" + }, + "AddStreams_3": { + "preferred_impl_style": "hls" + }, + "Thresholding_8": { + "preferred_impl_style": "hls" + }, + "DuplicateStreams_4": { + "preferred_impl_style": "hls" + }, + "Thresholding_9": { + "preferred_impl_style": "hls" + }, + "Thresholding_10": { + "preferred_impl_style": "hls" + }, + "MVAU_15": { + "preferred_impl_style": "hls" + }, + "FMPadding_6": { + "preferred_impl_style": "hls" + }, + "ConvolutionInputGenerator_6": { + "preferred_impl_style": "hls" + }, + "MVAU_16": { + "preferred_impl_style": "hls" + }, + "MVAU_17": { + "preferred_impl_style": "hls" + }, + "AddStreams_4": { + "preferred_impl_style": "hls" + }, + "Thresholding_11": { + "preferred_impl_style": "hls" + }, + "DuplicateStreams_5": { + "preferred_impl_style": "hls" + }, + "Thresholding_12": { + "preferred_impl_style": "hls" + }, + "Thresholding_13": { + "preferred_impl_style": "hls" + }, + "MVAU_18": { + "preferred_impl_style": "hls" + }, + "FMPadding_7": { + "preferred_impl_style": "hls" + }, + "ConvolutionInputGenerator_7": { + "preferred_impl_style": "hls" + }, + "MVAU_19": { + "preferred_impl_style": "hls" + }, + "MVAU_20": { + "preferred_impl_style": "hls" + }, + "AddStreams_5": { + "preferred_impl_style": "hls" + }, + "Thresholding_14": { + "preferred_impl_style": "hls" + }, + "DuplicateStreams_6": { + "preferred_impl_style": "hls" + }, + "Thresholding_15": { + "preferred_impl_style": "hls" + }, + "Thresholding_16": { + "preferred_impl_style": "hls" + }, + "MVAU_21": { + "preferred_impl_style": "hls" + }, + "FMPadding_8": { + "preferred_impl_style": "hls" + }, + "ConvolutionInputGenerator_8": { + "preferred_impl_style": "hls" + }, + "MVAU_22": { + "preferred_impl_style": "hls" + }, + "MVAU_23": { + "preferred_impl_style": "hls" + }, + "AddStreams_6": { + "preferred_impl_style": "hls" + }, + "Thresholding_17": { + "preferred_impl_style": "hls" + }, + "Thresholding_18": { + "preferred_impl_style": "hls" + }, + "DuplicateStreams_7": { + "preferred_impl_style": "hls" + }, + "MVAU_24": { + "preferred_impl_style": "hls" + }, + "DownSampler_1": { + "preferred_impl_style": "hls" + }, + "MVAU_25": { + "preferred_impl_style": "hls" + }, + "FMPadding_9": { + "preferred_impl_style": "hls" + }, + "ConvolutionInputGenerator_9": { + "preferred_impl_style": "hls" + }, + "MVAU_26": { + "preferred_impl_style": "hls" + }, + "MVAU_27": { + "preferred_impl_style": "hls" + }, + "AddStreams_7": { + "preferred_impl_style": "hls" + }, + "Thresholding_19": { + "preferred_impl_style": "hls" + }, + "DuplicateStreams_8": { + "preferred_impl_style": "hls" + }, + "Thresholding_20": { + "preferred_impl_style": "hls" + }, + "Thresholding_21": { + "preferred_impl_style": "hls" + }, + "MVAU_28": { + "preferred_impl_style": "hls" + }, + "FMPadding_10": { + "preferred_impl_style": "hls" + }, + "ConvolutionInputGenerator_10": { + "preferred_impl_style": "hls" + }, + "MVAU_29": { + "preferred_impl_style": "hls" + }, + "MVAU_30": { + "preferred_impl_style": "hls" + }, + "AddStreams_8": { + "preferred_impl_style": "hls" + }, + "Thresholding_22": { + "preferred_impl_style": "hls" + }, + "DuplicateStreams_9": { + "preferred_impl_style": "hls" + }, + "Thresholding_23": { + "preferred_impl_style": "hls" + }, + "Thresholding_24": { + "preferred_impl_style": "hls" + }, + "MVAU_31": { + "preferred_impl_style": "hls" + }, + "FMPadding_11": { + "preferred_impl_style": "hls" + }, + "ConvolutionInputGenerator_11": { + "preferred_impl_style": "hls" + }, + "MVAU_32": { + "preferred_impl_style": "hls" + }, + "MVAU_33": { + "preferred_impl_style": "hls" + }, + "AddStreams_9": { + "preferred_impl_style": "hls" + }, + "Thresholding_25": { + "preferred_impl_style": "hls" + }, + "DuplicateStreams_10": { + "preferred_impl_style": "hls" + }, + "Thresholding_26": { + "preferred_impl_style": "hls" + }, + "Thresholding_27": { + "preferred_impl_style": "hls" + }, + "MVAU_34": { + "preferred_impl_style": "hls" + }, + "FMPadding_12": { + "preferred_impl_style": "hls" + }, + "ConvolutionInputGenerator_12": { + "preferred_impl_style": "hls" + }, + "MVAU_35": { + "preferred_impl_style": "hls" + }, + "MVAU_36": { + "preferred_impl_style": "hls" + }, + "AddStreams_10": { + "preferred_impl_style": "hls" + }, + "Thresholding_28": { + "preferred_impl_style": "hls" + }, + "DuplicateStreams_11": { + "preferred_impl_style": "hls" + }, + "Thresholding_29": { + "preferred_impl_style": "hls" + }, + "Thresholding_30": { + "preferred_impl_style": "hls" + }, + "MVAU_37": { + "preferred_impl_style": "hls" + }, + "FMPadding_13": { + "preferred_impl_style": "hls" + }, + "ConvolutionInputGenerator_13": { + "preferred_impl_style": "hls" + }, + "MVAU_38": { + "preferred_impl_style": "hls" + }, + "MVAU_39": { + "preferred_impl_style": "hls" + }, + "AddStreams_11": { + "preferred_impl_style": "hls" + }, + "Thresholding_31": { + "preferred_impl_style": "hls" + }, + "DuplicateStreams_12": { + "preferred_impl_style": "hls" + }, + "Thresholding_32": { + "preferred_impl_style": "hls" + }, + "Thresholding_33": { + "preferred_impl_style": "hls" + }, + "MVAU_40": { + "preferred_impl_style": "hls" + }, + "FMPadding_14": { + "preferred_impl_style": "hls" + }, + "ConvolutionInputGenerator_14": { + "preferred_impl_style": "hls" + }, + "MVAU_41": { + "preferred_impl_style": "hls" + }, + "MVAU_42": { + "preferred_impl_style": "hls" + }, + "AddStreams_12": { + "preferred_impl_style": "hls" + }, + "Thresholding_34": { + "preferred_impl_style": "hls" + }, + "ChannelwiseOp_0": { + "preferred_impl_style": "hls" + }, + "Thresholding_35": { + "preferred_impl_style": "hls" + }, + "DuplicateStreams_13": { + "preferred_impl_style": "hls" + }, + "MVAU_43": { + "preferred_impl_style": "hls" + }, + "DownSampler_2": { + "preferred_impl_style": "hls" + }, + "MVAU_44": { + "preferred_impl_style": "hls" + }, + "FMPadding_15": { + "preferred_impl_style": "hls" + }, + "ConvolutionInputGenerator_15": { + "preferred_impl_style": "hls" + }, + "MVAU_45": { + "preferred_impl_style": "hls" + }, + "MVAU_46": { + "preferred_impl_style": "hls" + }, + "AddStreams_13": { + "preferred_impl_style": "hls" + }, + "Thresholding_36": { + "preferred_impl_style": "hls" + }, + "DuplicateStreams_14": { + "preferred_impl_style": "hls" + }, + "Thresholding_37": { + "preferred_impl_style": "hls" + }, + "Thresholding_38": { + "preferred_impl_style": "hls" + }, + "MVAU_47": { + "preferred_impl_style": "hls" + }, + "FMPadding_16": { + "preferred_impl_style": "hls" + }, + "ConvolutionInputGenerator_16": { + "preferred_impl_style": "hls" + }, + "MVAU_48": { + "preferred_impl_style": "hls" + }, + "MVAU_49": { + "preferred_impl_style": "hls" + }, + "AddStreams_14": { + "preferred_impl_style": "hls" + }, + "Thresholding_39": { + "preferred_impl_style": "hls" + }, + "DuplicateStreams_15": { + "preferred_impl_style": "hls" + }, + "Thresholding_40": { + "preferred_impl_style": "hls" + }, + "Thresholding_41": { + "preferred_impl_style": "hls" + }, + "MVAU_50": { + "preferred_impl_style": "hls" + }, + "FMPadding_17": { + "preferred_impl_style": "hls" + }, + "ConvolutionInputGenerator_17": { + "preferred_impl_style": "hls" + }, + "MVAU_51": { + "preferred_impl_style": "hls" + }, + "MVAU_52": { + "preferred_impl_style": "hls" + }, + "AddStreams_15": { + "preferred_impl_style": "hls" + }, + "Thresholding_42": { + "preferred_impl_style": "hls" + }, + "ChannelwiseOp_1": { + "preferred_impl_style": "hls" + }, + "ConvolutionInputGenerator_18": { + "preferred_impl_style": "hls" + }, + "Pool_1": { + "preferred_impl_style": "hls" + }, + "MVAU_53": { + "preferred_impl_style": "hls" + }, + "ChannelwiseOp_2": { + "preferred_impl_style": "hls" + }, + "LabelSelect_0": { + "preferred_impl_style": "hls" + } +} diff --git a/build/vgg10-radioml/README.md b/build/vgg10-radioml/README.md index 17a4524..18df19d 100755 --- a/build/vgg10-radioml/README.md +++ b/build/vgg10-radioml/README.md @@ -12,7 +12,7 @@ Due to the 1-dimensional topology in VGG10 we use a specialized build script tha 0. Ensure you have performed the *Setup* steps in the top-level README for setting up the FINN requirements and environment variables. -1. Run the `download_vgg10.sh` script under the `models` directory to download the pretrained VGG10 ONNX model. You should have e.g. `vgg10-radioml/models/radioml_w4a4_small_tidy.onnx` as a result. +1. Run the `download_vgg10.sh` script under the `models` directory to download the pretrained VGG10 ONNX model. You should have `vgg10-radioml/models/radioml_w4a4_small_tidy.onnx` as a result. 2. Launch the build as follows: ```SHELL @@ -24,7 +24,7 @@ cd $FINN_EXAMPLES/build/finn ./run-docker.sh build_custom $FINN_EXAMPLES/build/vgg10 ``` -5. The generated outputs will be under `vgg10-radioml/output__`. You can find a description of the generated files [here](https://finn-dev.readthedocs.io/en/latest/command_line.html#simple-dataflow-build-mode). +3. The generated outputs will be under `vgg10-radioml/output__`. You can find a description of the generated files [here](https://finn-dev.readthedocs.io/en/latest/command_line.html#simple-dataflow-build-mode). ## Where did the ONNX model files come from? diff --git a/build/vgg10-radioml/build.py b/build/vgg10-radioml/build.py index 4641020..0567763 100755 --- a/build/vgg10-radioml/build.py +++ b/build/vgg10-radioml/build.py @@ -64,15 +64,16 @@ def select_build_steps(platform): "step_tidy_up", step_pre_streamline, "step_streamline", - "step_convert_to_hls", + "step_convert_to_hw", step_convert_final_layers, "step_create_dataflow_partition", + "step_specialize_layers", "step_target_fps_parallelization", "step_apply_folding_config", "step_minimize_bit_width", "step_generate_estimate_reports", - "step_hls_codegen", - "step_hls_ipgen", + "step_hw_codegen", + "step_hw_ipgen", "step_set_fifo_depths", "step_create_stitched_ip", "step_measure_rtlsim_performance", @@ -109,14 +110,13 @@ def select_build_steps(platform): shell_flow_type=shell_flow_type, vitis_platform=vitis_platform, folding_config_file="folding_config/%s_folding_config.json" % platform_name, - auto_fifo_depths=True, - standalone_thresholds=False, + split_large_fifos=True, + standalone_thresholds=True, # enable extra performance optimizations (physopt) vitis_opt_strategy=build_cfg.VitisOptStrategyCfg.PERFORMANCE_BEST, generate_outputs=[ build_cfg.DataflowOutputType.ESTIMATE_REPORTS, build_cfg.DataflowOutputType.STITCHED_IP, - # build_cfg.DataflowOutputType.OOC_SYNTH, # build_cfg.DataflowOutputType.RTLSIM_PERFORMANCE, build_cfg.DataflowOutputType.BITFILE, build_cfg.DataflowOutputType.DEPLOYMENT_PACKAGE, diff --git a/build/vgg10-radioml/custom_steps.py b/build/vgg10-radioml/custom_steps.py index 8be2ef4..509efbc 100755 --- a/build/vgg10-radioml/custom_steps.py +++ b/build/vgg10-radioml/custom_steps.py @@ -29,7 +29,7 @@ from finn.builder.build_dataflow_config import DataflowBuildConfig from qonnx.transformation.change_3d_tensors_to_4d import Change3DTo4DTensors from qonnx.transformation.general import GiveUniqueNodeNames -import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls +import finn.transformation.fpgadataflow.convert_to_hw_layers as to_hw import finn.transformation.streamline.absorb as absorb @@ -40,7 +40,7 @@ def step_pre_streamline(model: ModelWrapper, cfg: DataflowBuildConfig): def step_convert_final_layers(model: ModelWrapper, cfg: DataflowBuildConfig): - model = model.transform(to_hls.InferChannelwiseLinearLayer()) - model = model.transform(to_hls.InferLabelSelectLayer()) + model = model.transform(to_hw.InferChannelwiseLinearLayer()) + model = model.transform(to_hw.InferLabelSelectLayer()) model = model.transform(GiveUniqueNodeNames()) return model diff --git a/build/vgg10-radioml/folding_config/ZCU104_folding_config.json b/build/vgg10-radioml/folding_config/ZCU104_folding_config.json index 14f908a..44fced1 100755 --- a/build/vgg10-radioml/folding_config/ZCU104_folding_config.json +++ b/build/vgg10-radioml/folding_config/ZCU104_folding_config.json @@ -1,136 +1,221 @@ { "Defaults": {}, - "FMPadding_Batch_0": { + "FMPadding_rtl_0": { "SIMD": 2 }, - "ConvolutionInputGenerator1D_0": { + "ConvolutionInputGenerator_rtl_0": { "SIMD": 2, - "ram_style": "auto" + "parallel_window": 0, + "ram_style": "distributed" }, - "MatrixVectorActivation_0": { - "PE": 32, + "MVAU_rtl_0": { + "PE": 16, "SIMD": 6, "ram_style": "auto", - "mem_mode": "const", + "resType": "auto", + "mem_mode": "internal_decoupled", "runtime_writeable_weights": 0 }, - "StreamingMaxPool_Batch_0": { + "Thresholding_rtl_0": { + "PE": 16, + "runtime_writeable_weights": 0, + "depth_trigger_uram": 0, + "depth_trigger_bram": 0 + }, + "StreamingMaxPool_hls_0": { + "PE": 16 }, - "FMPadding_Batch_1": { + "FMPadding_rtl_1": { "SIMD": 16 }, - "ConvolutionInputGenerator1D_1": { - "SIMD": 32, - "ram_style": "auto" + "ConvolutionInputGenerator_rtl_1": { + "SIMD": 16, + "parallel_window": 0, + "ram_style": "distributed" }, - "MatrixVectorActivation_1": { + "MVAU_rtl_1": { "PE": 16, - "SIMD": 96, + "SIMD": 48, "ram_style": "auto", - "mem_mode": "const", + "resType": "auto", + "mem_mode": "internal_decoupled", "runtime_writeable_weights": 0 }, - "StreamingMaxPool_Batch_1": { + "Thresholding_rtl_1": { + "PE": 8, + "runtime_writeable_weights": 0, + "depth_trigger_uram": 0, + "depth_trigger_bram": 0 + }, + "StreamingMaxPool_hls_1": { + "PE": 8 }, - "FMPadding_Batch_2": { + "FMPadding_rtl_2": { "SIMD": 8 }, - "ConvolutionInputGenerator1D_2": { - "SIMD": 32, - "ram_style": "auto" + "ConvolutionInputGenerator_rtl_2": { + "SIMD": 8, + "parallel_window": 0, + "ram_style": "distributed" }, - "MatrixVectorActivation_2": { + "MVAU_rtl_2": { "PE": 8, - "SIMD": 96, + "SIMD": 48, "ram_style": "auto", - "mem_mode": "const", + "resType": "auto", + "mem_mode": "internal_decoupled", "runtime_writeable_weights": 0 }, - "StreamingMaxPool_Batch_2": { + "Thresholding_rtl_2": { + "PE": 4, + "runtime_writeable_weights": 0, + "depth_trigger_uram": 0, + "depth_trigger_bram": 0 }, - "FMPadding_Batch_3": { - "SIMD": 8 + "StreamingMaxPool_hls_2": { + "PE": 4 }, - "ConvolutionInputGenerator1D_3": { - "SIMD": 32, - "ram_style": "auto" + "FMPadding_rtl_3": { + "SIMD": 4 }, - "MatrixVectorActivation_3": { - "PE": 4, - "SIMD": 96, + "ConvolutionInputGenerator_rtl_3": { + "SIMD": 4, + "parallel_window": 0, + "ram_style": "distributed" + }, + "MVAU_rtl_3": { + "PE": 8, + "SIMD": 24, "ram_style": "auto", - "mem_mode": "const", + "resType": "auto", + "mem_mode": "internal_decoupled", "runtime_writeable_weights": 0 }, - "StreamingMaxPool_Batch_3": { + "Thresholding_rtl_3": { + "PE": 2, + "runtime_writeable_weights": 0, + "depth_trigger_uram": 0, + "depth_trigger_bram": 0 }, - "FMPadding_Batch_4": { - "SIMD": 4 + "StreamingMaxPool_hls_3": { + "PE": 2 }, - "ConvolutionInputGenerator1D_4": { - "SIMD": 32, - "ram_style": "auto" + "FMPadding_rtl_4": { + "SIMD": 2 }, - "MatrixVectorActivation_4": { - "PE": 2, - "SIMD": 96, + "ConvolutionInputGenerator_rtl_4": { + "SIMD": 2, + "parallel_window": 0, + "ram_style": "distributed" + }, + "MVAU_rtl_4": { + "PE": 4, + "SIMD": 24, "ram_style": "auto", - "mem_mode": "const", + "resType": "auto", + "mem_mode": "internal_decoupled", "runtime_writeable_weights": 0 }, - "StreamingMaxPool_Batch_4": { + "Thresholding_rtl_4": { + "PE": 1, + "runtime_writeable_weights": 0, + "depth_trigger_uram": 0, + "depth_trigger_bram": 0 }, - "FMPadding_Batch_5": { - "SIMD": 2 + "StreamingMaxPool_hls_4": { + "PE": 1 }, - "ConvolutionInputGenerator1D_5": { - "SIMD": 32, - "ram_style": "auto" + "FMPadding_rtl_5": { + "SIMD": 1 }, - "MatrixVectorActivation_5": { - "PE": 1, - "SIMD": 96, + "ConvolutionInputGenerator_rtl_5": { + "SIMD": 1, + "parallel_window": 0, + "ram_style": "distributed" + }, + "MVAU_rtl_5": { + "PE": 4, + "SIMD": 12, "ram_style": "auto", - "mem_mode": "const", + "resType": "auto", + "mem_mode": "internal_decoupled", "runtime_writeable_weights": 0 }, - "StreamingMaxPool_Batch_5": { + "Thresholding_rtl_5": { + "PE": 1, + "runtime_writeable_weights": 0, + "depth_trigger_uram": 0, + "depth_trigger_bram": 0 + }, + "StreamingMaxPool_hls_5": { + "PE": 1 }, - "FMPadding_Batch_6": { + "FMPadding_rtl_6": { "SIMD": 1 }, - "ConvolutionInputGenerator1D_6": { - "SIMD": 32, - "ram_style": "auto" + "ConvolutionInputGenerator_rtl_6": { + "SIMD": 1, + "parallel_window": 0, + "ram_style": "distributed" }, - "MatrixVectorActivation_6": { - "PE": 1, - "SIMD": 96, + "MVAU_rtl_6": { + "PE": 4, + "SIMD": 6, "ram_style": "auto", - "mem_mode": "const", + "resType": "auto", + "mem_mode": "internal_decoupled", "runtime_writeable_weights": 0 }, - "StreamingMaxPool_Batch_6": { + "Thresholding_rtl_6": { + "PE": 1, + "runtime_writeable_weights": 0, + "depth_trigger_uram": 0, + "depth_trigger_bram": 0 }, - "MatrixVectorActivation_7": { - "PE": 2, - "SIMD": 32, + "StreamingMaxPool_hls_6": { + "PE": 1 + }, + "MVAU_rtl_7": { + "PE": 4, + "SIMD": 4, "ram_style": "auto", - "mem_mode": "const", + "resType": "auto", + "mem_mode": "internal_decoupled", "runtime_writeable_weights": 0 }, - "MatrixVectorActivation_8": { + "Thresholding_rtl_7": { "PE": 1, - "SIMD": 32, + "runtime_writeable_weights": 0, + "depth_trigger_uram": 0, + "depth_trigger_bram": 0 + }, + "MVAU_rtl_8": { + "PE": 4, + "SIMD": 2, "ram_style": "auto", - "mem_mode": "const", + "resType": "auto", + "mem_mode": "internal_decoupled", "runtime_writeable_weights": 0 }, - "MatrixVectorActivation_9": { + "Thresholding_rtl_8": { "PE": 1, - "SIMD": 8, + "runtime_writeable_weights": 0, + "depth_trigger_uram": 0, + "depth_trigger_bram": 0 + }, + "MVAU_rtl_9": { + "PE": 4, + "SIMD": 1, "ram_style": "auto", - "mem_mode": "const", + "resType": "auto", + "mem_mode": "internal_decoupled", "runtime_writeable_weights": 0 + }, + "ChannelwiseOp_hls_0": { + "PE": 1, + "ram_style": "distributed" + }, + "LabelSelect_hls_0": { + "PE": 1 } } diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index 60ac21d..1219b35 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -23,7 +23,8 @@ pipeline { "kws", "mobilenet-v1", "resnet50", - "vgg10-radioml"] + "vgg10-radioml", + "gtsrb"] createParallelBuilds(buildList) createReleaseArea(buildList) } diff --git a/finn_examples/models.py b/finn_examples/models.py index 8076124..bcacabf 100644 --- a/finn_examples/models.py +++ b/finn_examples/models.py @@ -67,6 +67,17 @@ "num_outputs": 1, } +_gtsrb_cnv_io_shape_dict = { + "idt": DataType["UINT8"], + "odt": DataType["INT16"], + "ishape_normal": (1, 32, 32, 3), + "oshape_normal": (1, 44), + "ishape_folded": (1, 1, 32, 32, 3, 1), + "oshape_folded": (1, 11, 4), + "ishape_packed": (1, 1, 32, 32, 3, 1), + "oshape_packed": (1, 11, 8), +} + _bincop_cnv_io_shape_dict = { "idt": [DataType["UINT8"]], "odt": [DataType["UINT8"]], @@ -179,24 +190,29 @@ def get_edge_or_pcie(): raise OSError("Platform is not supported.") -def find_bitfile(model_name, target_platform): - bitfile_exts = {"edge": "bit", "pcie": "xclbin"} - bitfile_ext = bitfile_exts[get_edge_or_pcie()] - bitfile_name = "%s.%s" % (model_name, bitfile_ext) - bitfile_candidates = [ - pk.resource_filename("finn_examples", "bitfiles/%s/%s" % (target_platform, bitfile_name)), - pk.resource_filename( - "finn_examples", - "bitfiles/bitfiles.zip.d/%s/%s" % (target_platform, bitfile_name), - ), - ] - for candidate in bitfile_candidates: - if os.path.isfile(candidate): - return candidate - raise Exception( - "Bitfile for model = %s target platform = %s not found. Looked in: %s" - % (model_name, target_platform, str(bitfile_candidates)) - ) +def find_bitfile(model_name, target_platform, bitfile_path): + if bitfile_path is not None: + return bitfile_path + else: + bitfile_exts = {"edge": "bit", "pcie": "xclbin"} + bitfile_ext = bitfile_exts[get_edge_or_pcie()] + bitfile_name = "%s.%s" % (model_name, bitfile_ext) + bitfile_candidates = [ + pk.resource_filename( + "finn_examples", "bitfiles/%s/%s" % (target_platform, bitfile_name) + ), + pk.resource_filename( + "finn_examples", + "bitfiles/bitfiles.zip.d/%s/%s" % (target_platform, bitfile_name), + ), + ] + for candidate in bitfile_candidates: + if os.path.isfile(candidate): + return candidate + raise Exception( + "Bitfile for model = %s target platform = %s not found. Looked in: %s" + % (model_name, target_platform, str(bitfile_candidates)) + ) def find_runtime_weights(model_name, target_platform): @@ -255,75 +271,75 @@ def resolve_target_platform(target_platform): return check_platform_is_valid(platform) -def kws_mlp(target_platform=None): +def kws_mlp(target_platform=None, bitfile_path=None): target_platform = resolve_target_platform(target_platform) driver_mode = get_driver_mode() model_name = "kwsmlp-w3a3" - filename = find_bitfile(model_name, target_platform) + filename = find_bitfile(model_name, target_platform, bitfile_path) return FINNExampleOverlay(filename, driver_mode, _gscv2_mlp_io_shape_dict) -def tfc_w1a1_mnist(target_platform=None): +def tfc_w1a1_mnist(target_platform=None, bitfile_path=None): target_platform = resolve_target_platform(target_platform) driver_mode = get_driver_mode() model_name = "tfc-w1a1" - filename = find_bitfile(model_name, target_platform) + filename = find_bitfile(model_name, target_platform, bitfile_path) return FINNExampleOverlay(filename, driver_mode, _mnist_fc_io_shape_dict) -def tfc_w1a2_mnist(target_platform=None): +def tfc_w1a2_mnist(target_platform=None, bitfile_path=None): target_platform = resolve_target_platform(target_platform) driver_mode = get_driver_mode() model_name = "tfc-w1a2" - filename = find_bitfile(model_name, target_platform) + filename = find_bitfile(model_name, target_platform, bitfile_path) return FINNExampleOverlay(filename, driver_mode, _mnist_fc_io_shape_dict) -def tfc_w2a2_mnist(target_platform=None): +def tfc_w2a2_mnist(target_platform=None, bitfile_path=None): target_platform = resolve_target_platform(target_platform) driver_mode = get_driver_mode() model_name = "tfc-w2a2" - filename = find_bitfile(model_name, target_platform) + filename = find_bitfile(model_name, target_platform, bitfile_path) return FINNExampleOverlay(filename, driver_mode, _mnist_fc_io_shape_dict) -def cnv_w1a1_cifar10(target_platform=None): +def cnv_w1a1_cifar10(target_platform=None, bitfile_path=None): target_platform = resolve_target_platform(target_platform) driver_mode = get_driver_mode() model_name = "cnv-w1a1" - filename = find_bitfile(model_name, target_platform) + filename = find_bitfile(model_name, target_platform, bitfile_path) return FINNExampleOverlay(filename, driver_mode, _cifar10_cnv_io_shape_dict) -def cnv_w1a2_cifar10(target_platform=None): +def cnv_w1a2_cifar10(target_platform=None, bitfile_path=None): target_platform = resolve_target_platform(target_platform) driver_mode = get_driver_mode() model_name = "cnv-w1a2" - filename = find_bitfile(model_name, target_platform) + filename = find_bitfile(model_name, target_platform, bitfile_path) return FINNExampleOverlay(filename, driver_mode, _cifar10_cnv_io_shape_dict) -def cnv_w2a2_cifar10(target_platform=None): +def cnv_w2a2_cifar10(target_platform=None, bitfile_path=None): target_platform = resolve_target_platform(target_platform) driver_mode = get_driver_mode() model_name = "cnv-w2a2" - filename = find_bitfile(model_name, target_platform) + filename = find_bitfile(model_name, target_platform, bitfile_path) return FINNExampleOverlay(filename, driver_mode, _cifar10_cnv_io_shape_dict) -def bincop_cnv(target_platform=None): +def bincop_cnv(target_platform=None, bitfile_path=None): target_platform = resolve_target_platform(target_platform) driver_mode = get_driver_mode() model_name = "bincop-cnv" - filename = find_bitfile(model_name, target_platform) + filename = find_bitfile(model_name, target_platform, bitfile_path) return FINNExampleOverlay(filename, driver_mode, _bincop_cnv_io_shape_dict) -def mobilenetv1_w4a4_imagenet(target_platform=None): +def mobilenetv1_w4a4_imagenet(target_platform=None, bitfile_path=None): target_platform = resolve_target_platform(target_platform) driver_mode = get_driver_mode() model_name = "mobilenetv1-w4a4" - filename = find_bitfile(model_name, target_platform) + filename = find_bitfile(model_name, target_platform, bitfile_path) if target_platform in ["ZCU104"]: runtime_weight_dir = find_runtime_weights(model_name, target_platform) else: @@ -339,11 +355,11 @@ def mobilenetv1_w4a4_imagenet(target_platform=None): ) -def resnet50_w1a2_imagenet(target_platform=None): +def resnet50_w1a2_imagenet(target_platform=None, bitfile_path=None): target_platform = resolve_target_platform(target_platform) driver_mode = get_driver_mode() model_name = "resnet50-w1a2" - filename = find_bitfile(model_name, target_platform) + filename = find_bitfile(model_name, target_platform, bitfile_path) runtime_weight_dir = find_runtime_weights(model_name, target_platform) return FINNExampleOverlay( filename, @@ -353,11 +369,11 @@ def resnet50_w1a2_imagenet(target_platform=None): ) -def vgg10_w4a4_radioml(target_platform=None): +def vgg10_w4a4_radioml(target_platform=None, bitfile_path=None): target_platform = resolve_target_platform(target_platform) driver_mode = get_driver_mode() model_name = "radioml_w4a4_small_tidy" - filename = find_bitfile(model_name, target_platform) + filename = find_bitfile(model_name, target_platform, bitfile_path) fclk_mhz = 250.0 return FINNExampleOverlay( filename, @@ -367,12 +383,20 @@ def vgg10_w4a4_radioml(target_platform=None): ) -def mlp_w2a2_unsw_nb15(target_platform=None): +def mlp_w2a2_unsw_nb15(target_platform=None, bitfile_path=None): target_platform = resolve_target_platform(target_platform) driver_mode = get_driver_mode() model_name = "unsw_nb15-mlp-w2a2" - filename = find_bitfile(model_name, target_platform) + filename = find_bitfile(model_name, target_platform, bitfile_path) fclk_mhz = 100.0 return FINNExampleOverlay( filename, driver_mode, _unsw_nb15_mlp_io_shape_dict, fclk_mhz=fclk_mhz ) + + +def cnv_w1a1_gtsrb(target_platform=None, bitfile_path=None): + target_platform = resolve_target_platform(target_platform) + driver_mode = get_driver_mode() + model_name = "cnv-gtsrb-w1a1" + filename = find_bitfile(model_name, target_platform, bitfile_path) + return FINNExampleOverlay(filename, driver_mode, _gtsrb_cnv_io_shape_dict) diff --git a/finn_examples/notebooks/2_imagenet_with_cnns.ipynb b/finn_examples/notebooks/2_imagenet_with_cnns.ipynb index a30607e..3ed5f3d 100755 --- a/finn_examples/notebooks/2_imagenet_with_cnns.ipynb +++ b/finn_examples/notebooks/2_imagenet_with_cnns.ipynb @@ -21,46 +21,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "application/javascript": [ - "\n", - "try {\n", - "require(['notebook/js/codecell'], function(codecell) {\n", - " codecell.CodeCell.options_default.highlight_modes[\n", - " 'magic_text/x-csrc'] = {'reg':[/^%%microblaze/]};\n", - " Jupyter.notebook.events.one('kernel_ready.Kernel', function(){\n", - " Jupyter.notebook.get_cells().map(function(cell){\n", - " if (cell.cell_type == 'code'){ cell.auto_highlight(); } }) ;\n", - " });\n", - "});\n", - "} catch (e) {};\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/javascript": [ - "\n", - "try {\n", - "require(['notebook/js/codecell'], function(codecell) {\n", - " codecell.CodeCell.options_default.highlight_modes[\n", - " 'magic_text/x-csrc'] = {'reg':[/^%%pybind11/]};\n", - " Jupyter.notebook.events.one('kernel_ready.Kernel', function(){\n", - " Jupyter.notebook.get_cells().map(function(cell){\n", - " if (cell.cell_type == 'code'){ cell.auto_highlight(); } }) ;\n", - " });\n", - "});\n", - "} catch (e) {};\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "#mobilenetv1_w4a4 is available on U250 and ZCU104\n", "accel = models.mobilenetv1_w4a4_imagenet()\n", diff --git a/finn_examples/notebooks/4_keyword_spotting.ipynb b/finn_examples/notebooks/4_keyword_spotting.ipynb index 914b372..7f903b0 100644 --- a/finn_examples/notebooks/4_keyword_spotting.ipynb +++ b/finn_examples/notebooks/4_keyword_spotting.ipynb @@ -18,7 +18,7 @@ "\n", "\n", "\n", - "A more in-depth explenation of MFCC features can be found on wikipedia: https://en.wikipedia.org/wiki/Mel-frequency_cepstrum\n", + "A more in-depth explanation of MFCC features can be found on wikipedia: https://en.wikipedia.org/wiki/Mel-frequency_cepstrum\n", "\n", "For this concrete case we used the python library [python_speech_features](https://github.com/jameslyons/python_speech_features) to produce these features.\n", "\n", @@ -194,7 +194,7 @@ "### Using the built-in performance benchmark\n", "\n", "To measure the performance of indivudual components of the PYNQ stack and the FINN accelerator on the FPGA,\n", - "FINN comes with a buit-in benchmark. This benchmark computes the throughput of the FINN accelerator as seen on the FPGA." + "FINN comes with a built-in benchmark. This benchmark computes the throughput of the FINN accelerator as seen on the FPGA." ] }, { diff --git a/finn_examples/notebooks/7_traffic_sign_recognition_gtsrb.ipynb b/finn_examples/notebooks/7_traffic_sign_recognition_gtsrb.ipynb new file mode 100644 index 0000000..8d938d5 --- /dev/null +++ b/finn_examples/notebooks/7_traffic_sign_recognition_gtsrb.ipynb @@ -0,0 +1,280 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Initialize the accelerator" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from finn_examples import models\n", + "accel = models.cnv_w1a1_gtsrb()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"Expected input shape and datatype: %s %s\" % (str(accel.ishape_normal), str(accel.idt)))\n", + "print(\"Expected output shape and datatype: %s %s\" % (str(accel.oshape_normal), str(accel.odt)))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Load the GTSRB dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from os import path\n", + "import urllib\n", + "import numpy as np\n", + "dataset_local = \"/tmp/traffic-signs-data.zip\"\n", + "if not path.isfile(dataset_local):\n", + " dataset_url = \"https://d17h27t6h515a5.cloudfront.net/topher/2017/February/5898cd6f_traffic-signs-data/traffic-signs-data.zip\"\n", + " urllib.request.urlretrieve(dataset_url, dataset_local)\n", + " ! unzip {dataset_local} -d /tmp\n", + "\n", + "dataset_dict = np.load(\"/tmp/test.p\")\n", + "testx = dataset_dict[\"features\"]\n", + "testy = dataset_dict[\"labels\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "gtsrb_classes = [\n", + " '20 Km/h', \n", + " '30 Km/h', \n", + " '50 Km/h', \n", + " '60 Km/h', \n", + " '70 Km/h', \n", + " '80 Km/h', \n", + " 'End 80 Km/h', \n", + " '100 Km/h', \n", + " '120 Km/h', \n", + " 'No overtaking', \n", + " 'No overtaking for large trucks', \n", + " 'Priority crossroad', \n", + " 'Priority road', \n", + " 'Give way', \n", + " 'Stop', \n", + " 'No vehicles', \n", + " 'Prohibited for vehicles with a permitted gross weight over 3.5t including their trailers, and for tractors except passenger cars and buses', \n", + " 'No entry for vehicular traffic', \n", + " 'Danger Ahead', \n", + " 'Bend to left', \n", + " 'Bend to right', \n", + " 'Double bend (first to left)', \n", + " 'Uneven road', \n", + " 'Road slippery when wet or dirty', \n", + " 'Road narrows (right)', \n", + " 'Road works', \n", + " 'Traffic signals', \n", + " 'Pedestrians in road ahead', \n", + " 'Children crossing ahead', \n", + " 'Bicycles prohibited', \n", + " 'Risk of snow or ice', \n", + " 'Wild animals', \n", + " 'End of all speed and overtaking restrictions', \n", + " 'Turn right ahead', \n", + " 'Turn left ahead', \n", + " 'Ahead only', \n", + " 'Ahead or right only', \n", + " 'Ahead or left only', \n", + " 'Pass by on right', \n", + " 'Pass by on left', \n", + " 'Roundabout', \n", + " 'End of no-overtaking zone', \n", + " 'End of no-overtaking zone for vehicles with a permitted gross weight over 3.5t including their trailers, and for tractors except passenger cars and buses', \n", + " 'Not a roadsign'\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"Dataset shape is \" + str(testx.shape))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Classify a single image" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "test_single_x = testx[0]\n", + "test_single_y = testy[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "from matplotlib import pyplot as plt\n", + "\n", + "plt.imshow(test_single_x)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"Expected class is:\\n%s\" % (gtsrb_classes[test_single_y]))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "accel_y = accel.execute(test_single_x.reshape(accel.ishape_normal))\n", + "print(\"Accelerator result is:\\n%s\" % (gtsrb_classes[np.argmax(accel_y)]))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Validate accuracy on GTSRB test set" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "batch_size = 421\n", + "total = testx.shape[0]\n", + "accel.batch_size = batch_size\n", + "n_batches = int(total / batch_size)\n", + "\n", + "batch_imgs = testx.reshape(n_batches, batch_size, -1)\n", + "batch_labels = testy.reshape(n_batches, batch_size)\n", + "obuf_normal = np.empty_like(accel.obuf_packed_device)\n", + "print(\"Ready to run validation, test images tensor has shape %s\" % str(batch_imgs.shape))\n", + "print(\"Accelerator buffer shapes are %s for input, %s for output\" % (str(accel.ishape_packed), str(accel.oshape_packed)) )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ok = 0\n", + "nok = 0\n", + "for i in range(n_batches):\n", + " ibuf_normal = batch_imgs[i].reshape(accel.ibuf_packed_device.shape)\n", + " exp = batch_labels[i]\n", + " # to avoid the slower software implementation during data unpacking,\n", + " # we make manual calls to buffer copies and execute_on_buffers\n", + " # all this could have been replaced with accel.execute() otherwise\n", + " accel.copy_input_data_to_device(ibuf_normal)\n", + " accel.execute_on_buffers()\n", + " obuf_normal = np.empty_like(accel.obuf_packed_device)\n", + " accel.copy_output_data_from_device(obuf_normal)\n", + " # this line provides fast unpacking using numpy primitives\n", + " # instead of using FINN's unpack functions\n", + " quick_out = obuf_normal.view(np.uint16).reshape(accel.batch_size, 44)\n", + " obuf_argmax = np.argmax(quick_out, axis=-1)\n", + " ok_batch = (obuf_argmax == exp).sum()\n", + " nok_batch = (batch_size-ok_batch)\n", + " ok += ok_batch\n", + " nok += nok_batch\n", + " \n", + " print(\"batch %d / %d : total OK %d NOK %d\" % (i+1, n_batches, ok, nok))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "acc = 100.0 * ok / (total)\n", + "print(\"Final accuracy: {}%\".format(acc))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Run built-in benchmarks" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "accel.batch_size = 100\n", + "accel.throughput_test()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}