diff --git a/AMD-license-agreement-for-non-commercial-models.md b/AMD-license-agreement-for-non-commercial-models.md
index b598532..8a384de 100644
--- a/AMD-license-agreement-for-non-commercial-models.md
+++ b/AMD-license-agreement-for-non-commercial-models.md
@@ -3,7 +3,7 @@ LICENSE AGREEMENT FOR NON-COMMERCIAL MODELS
 
 Trained Models:
 
-bincop-cnv, cnv-w1a1, cnv-w1a2, cnv-w2a2, kwsmlp-w3a3, mobilenetv1-w4a4, resnet50-w1a2, tfc-w1a1, tfc-w1a2, tfc-w2a2, unsw_nb15-mlp-w2a2, vgg10-radioml-w4a4
+bincop-cnv, cnv-w1a1, cnv-w1a2, cnv-w2a2, kwsmlp-w3a3, mobilenetv1-w4a4, resnet50-w1a2, tfc-w1a1, tfc-w1a2, tfc-w2a2, unsw_nb15-mlp-w2a2, vgg10-radioml-w4a4, cnv_1w1a_gtsrb.onnx
 
 This License Agreement for Non-Commercial Models (“Agreement”) is a legal agreement between you (either an individual or an entity) and Advanced Micro Devices, Inc. on behalf of itself and its subsidiaries and affiliates (collectively “AMD”). DO NOT USE THE TRAINED MODELS IDENTIFIED ABOVE UNTIL YOU HAVE CAREFULLY READ THIS AGREEMENT. BY USING, INSTALLING, MODIFYING, COPYING, TRAINING, BENCHMARKING, OR DISTRIBUTING THE TRAINED MODELS, YOU AGREE TO AND ACCEPT ALL TERMS AND CONDITIONS OF THIS AGREEMENT. If you do not accept these terms, do not use the Trained Models.
 
diff --git a/AUTHORS.rst b/AUTHORS.rst
index ba06e5d..f7616fa 100644
--- a/AUTHORS.rst
+++ b/AUTHORS.rst
@@ -2,9 +2,17 @@
 Contributors
 ============
 
-* Yaman Umuroglu (@maltanar) (maintainer)
+* Mirza Mrahorovic (@mmrahorovic) (maintainer)
 * Jakoba Petri-Koenig (@auphelia)
+* Yaman Umuroglu (@maltanar)
 * Lucian Petrica (@quetric)
 * Tobias Alonso (@Tobi-Alonso)
 * Hendrik Borras (@HenniOVP)
 * Felix Paul Jentzsch (@felixpj)
+* Aziz Bahri (@azizb-xlnx)
+* John Monks (@jmonks-amd)
+* Fionn O'Donohoe (@fionnodonohoe-xlnx)
+* Radoslav Pitoňák (@rpitonak)
+* Matthias Gehre (@mgehre-amd)
+* NaelF (@NaelF)
+* Tim Paine (@timkpaine)
diff --git a/LICENSE b/LICENSE
index 66e9909..788d4c6 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,20 +1,22 @@
 BSD 3-Clause License
 
-Copyright (c) 2023, Xilinx
+Copyright (C) 2020-2022, Xilinx, Inc.
+Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
+All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
 
-1. Redistributions of source code must retain the above copyright notice, this
-   list of conditions and the following disclaimer.
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
 
-2. Redistributions in binary form must reproduce the above copyright notice,
-   this list of conditions and the following disclaimer in the documentation
-   and/or other materials provided with the distribution.
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
 
-3. Neither the name of the copyright holder nor the names of its
-   contributors may be used to endorse or promote products derived from
-   this software without specific prior written permission.
+* Neither the name of FINN nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
 
 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
diff --git a/build/README.md b/build/README.md
index e284deb..7cdac19 100644
--- a/build/README.md
+++ b/build/README.md
@@ -16,7 +16,7 @@ to do this again in the future when the `finn-examples` repo gets updated and re
 Docker community edition `docker-ce`.
 
 3. Set up the environment variables to point to your Vivado/Vitis installation, depending on your target platform(s):
-    *  For Zynq platforms you'll need to set `VIVADO_PATH`, e.g. `VIVADO_PATH=/opt/xilinx/Vivado/2019.1/`
+    * For Zynq platforms you'll need to set `VIVADO_PATH`, e.g. `VIVADO_PATH=/opt/xilinx/Vivado/2022.2/`
     * For Alveo platforms you'll need to set `VITIS_PATH`, `PLATFORM_REPO_PATHS` and `XILINX_XRT`
 
 ## Build bitfiles
diff --git a/build/bnn-pynq/README.md b/build/bnn-pynq/README.md
index 0c2b65a..3ccb705 100644
--- a/build/bnn-pynq/README.md
+++ b/build/bnn-pynq/README.md
@@ -42,7 +42,7 @@ cd $FINN_EXAMPLES/build/finn
 The BNN-PYNQ networks are part of the
 [Brevitas examples](https://github.com/Xilinx/brevitas/tree/master/src/brevitas_examples/bnn_pynq). You can find the details on quantization, accuracy, layers used in the Brevitas repo, as well as the training scripts if you'd like to retrain them yourself.
 
-Subsequently, those trained networks are [exported to ONNX](https://github.com/Xilinx/finn/blob/master/notebooks/basics/1_brevitas_network_import.ipynb). In addition, the particular versions
+Subsequently, those trained networks are [exported to ONNX](https://github.com/Xilinx/finn/blob/main/notebooks/basics/1_brevitas_network_import_via_QONNX.ipynb). In addition, the particular versions
 used here have two additions, as described in the "Adding Pre- and Postprocessing" section of [this notebook](https://github.com/Xilinx/finn/blob/master/notebooks/end2end_example/bnn-pynq/tfc_end2end_example.ipynb):
 
 * A divide-by-255 node is added at the input, and the input is marked as 8-bit (to directly accept 8-bit images as input)
diff --git a/build/bnn-pynq/build.py b/build/bnn-pynq/build.py
index ea509bf..867410c 100644
--- a/build/bnn-pynq/build.py
+++ b/build/bnn-pynq/build.py
@@ -1,3 +1,31 @@
+# Copyright (C) 2024, Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
 import finn.builder.build_dataflow as build
 import finn.builder.build_dataflow_config as build_cfg
 from finn.util.basic import alveo_default_platform
@@ -54,12 +82,13 @@ def platform_to_shell(platform):
         cfg = build_cfg.DataflowBuildConfig(
             output_dir="output_%s_%s" % (model_name, release_platform_name),
             folding_config_file="folding_config/%s_folding_config.json" % model_name,
-            synth_clk_period_ns=10.0,
+            synth_clk_period_ns=5.0,
             board=platform_name,
             shell_flow_type=shell_flow_type,
             vitis_platform=vitis_platform,
             generate_outputs=[build_cfg.DataflowOutputType.BITFILE],
             save_intermediate_models=True,
+            default_swg_exception=True,
         )
         model_file = "models/%s.onnx" % model_name
         # launch FINN compiler to build
diff --git a/build/bnn-pynq/folding_config/cnv-w1a1_folding_config.json b/build/bnn-pynq/folding_config/cnv-w1a1_folding_config.json
index 03b462d..0bb17e9 100644
--- a/build/bnn-pynq/folding_config/cnv-w1a1_folding_config.json
+++ b/build/bnn-pynq/folding_config/cnv-w1a1_folding_config.json
@@ -1,79 +1,87 @@
 {
   "Defaults": {},
-  "Thresholding_Batch_0": {
-    "PE": 1,
-    "ram_style": "distributed"
+  "Thresholding_rtl_0": {
+    "PE": 1
   },
-  "ConvolutionInputGenerator_0": {
+  "ConvolutionInputGenerator_rtl_0": {
     "SIMD": 3,
     "ram_style": "distributed"
   },
-  "MatrixVectorActivation_0": {
+  "MVAU_hls_0": {
     "PE": 16,
     "SIMD": 3,
-    "ram_style": "auto"
+    "ram_style": "auto",
+    "resType": "lut"
   },
-  "ConvolutionInputGenerator_1": {
+  "ConvolutionInputGenerator_rtl_1": {
     "SIMD": 32,
     "ram_style": "distributed"
   },
-  "MatrixVectorActivation_1": {
+  "MVAU_hls_1": {
     "PE": 32,
     "SIMD": 32,
-    "ram_style": "auto"
+    "ram_style": "auto",
+    "resType": "lut"
   },
-  "ConvolutionInputGenerator_2": {
+  "ConvolutionInputGenerator_rtl_2": {
     "SIMD": 32,
     "ram_style": "distributed"
   },
-  "MatrixVectorActivation_2": {
+  "MVAU_hls_2": {
     "PE": 16,
     "SIMD": 32,
-    "ram_style": "auto"
+    "ram_style": "auto",
+    "resType": "lut"
   },
-  "ConvolutionInputGenerator_3": {
+  "ConvolutionInputGenerator_rtl_3": {
     "SIMD": 32,
     "ram_style": "distributed"
   },
-  "MatrixVectorActivation_3": {
+  "MVAU_hls_3": {
     "PE": 16,
     "SIMD": 32,
-    "ram_style": "auto"
+    "ram_style": "auto",
+    "resType": "lut"
   },
-  "ConvolutionInputGenerator_4": {
+  "ConvolutionInputGenerator_rtl_4": {
     "SIMD": 32,
     "ram_style": "distributed"
   },
-  "MatrixVectorActivation_4": {
+  "MVAU_hls_4": {
     "PE": 4,
     "SIMD": 32,
-    "ram_style": "auto"
+    "ram_style": "auto",
+    "resType": "lut"
   },
-  "ConvolutionInputGenerator_5": {
+  "ConvolutionInputGenerator_rtl_5": {
     "SIMD": 32,
     "ram_style": "distributed"
   },
-  "MatrixVectorActivation_5": {
+  "MVAU_hls_5": {
     "PE": 1,
     "SIMD": 32,
-    "ram_style": "auto"
+    "ram_style": "auto",
+    "resType": "lut"
   },
-  "MatrixVectorActivation_6": {
+  "MVAU_hls_6": {
     "PE": 1,
     "SIMD": 4,
-    "ram_style": "auto"
+    "ram_style": "auto",
+    "resType": "lut"
   },
-  "MatrixVectorActivation_7": {
+  "MVAU_hls_7": {
     "PE": 1,
     "SIMD": 8,
-    "ram_style": "auto"
+    "ram_style": "auto",
+    "resType": "lut"
   },
-  "MatrixVectorActivation_8": {
+  "MVAU_hls_8": {
     "PE": 5,
     "SIMD": 1,
-    "ram_style": "auto"
+    "ram_style": "auto",
+    "resType": "lut"
   },
-  "LabelSelect_Batch_0": {
+  "LabelSelect_hls_0": {
     "PE": 1
   }
 }
diff --git a/build/bnn-pynq/folding_config/cnv-w1a2_folding_config.json b/build/bnn-pynq/folding_config/cnv-w1a2_folding_config.json
index c83c614..954de5b 100644
--- a/build/bnn-pynq/folding_config/cnv-w1a2_folding_config.json
+++ b/build/bnn-pynq/folding_config/cnv-w1a2_folding_config.json
@@ -1,79 +1,87 @@
 {
   "Defaults": {},
-  "Thresholding_Batch_0": {
-    "PE": 1,
-    "ram_style": "distributed"
+  "Thresholding_rtl_0": {
+    "PE": 1
   },
-  "ConvolutionInputGenerator_0": {
+  "ConvolutionInputGenerator_rtl_0": {
     "SIMD": 3,
     "ram_style": "distributed"
   },
-  "MatrixVectorActivation_0": {
+  "MVAU_hls_0": {
     "PE": 8,
     "SIMD": 3,
-    "ram_style": "auto"
+    "ram_style": "auto",
+    "resType": "lut"
   },
-  "ConvolutionInputGenerator_1": {
+  "ConvolutionInputGenerator_rtl_1": {
     "SIMD": 16,
     "ram_style": "distributed"
   },
-  "MatrixVectorActivation_1": {
+  "MVAU_hls_1": {
     "PE": 16,
     "SIMD": 16,
-    "ram_style": "auto"
+    "ram_style": "auto",
+    "resType": "lut"
   },
-  "ConvolutionInputGenerator_2": {
+  "ConvolutionInputGenerator_rtl_2": {
     "SIMD": 16,
     "ram_style": "distributed"
   },
-  "MatrixVectorActivation_2": {
+  "MVAU_hls_2": {
     "PE": 8,
     "SIMD": 16,
-    "ram_style": "auto"
+    "ram_style": "auto",
+    "resType": "lut"
   },
-  "ConvolutionInputGenerator_3": {
+  "ConvolutionInputGenerator_rtl_3": {
     "SIMD": 16,
     "ram_style": "distributed"
   },
-  "MatrixVectorActivation_3": {
+  "MVAU_hls_3": {
     "PE": 8,
     "SIMD": 16,
-    "ram_style": "block"
+    "ram_style": "block",
+    "resType": "lut"
   },
-  "ConvolutionInputGenerator_4": {
+  "ConvolutionInputGenerator_rtl_4": {
     "SIMD": 8,
     "ram_style": "distributed"
   },
-  "MatrixVectorActivation_4": {
+  "MVAU_hls_4": {
     "PE": 4,
     "SIMD": 8,
-    "ram_style": "auto"
+    "ram_style": "auto",
+    "resType": "lut"
   },
-  "ConvolutionInputGenerator_5": {
+  "ConvolutionInputGenerator_rtl_5": {
     "SIMD": 8,
     "ram_style": "distributed"
   },
-  "MatrixVectorActivation_5": {
+  "MVAU_hls_5": {
     "PE": 1,
     "SIMD": 8,
-    "ram_style": "auto"
+    "ram_style": "auto",
+    "resType": "lut"
   },
-  "MatrixVectorActivation_6": {
+  "MVAU_hls_6": {
     "PE": 1,
     "SIMD": 2,
-    "ram_style": "distributed"
+    "ram_style": "distributed",
+    "resType": "lut"
   },
-  "MatrixVectorActivation_7": {
+  "MVAU_hls_7": {
     "PE": 2,
     "SIMD": 2,
-    "ram_style": "block"
+    "ram_style": "block",
+    "resType": "lut"
   },
-  "MatrixVectorActivation_8": {
+  "MVAU_hls_8": {
     "PE": 5,
     "SIMD": 1,
-    "ram_style": "distributed"
+    "ram_style": "distributed",
+    "resType": "lut"
   },
-  "LabelSelect_Batch_0": {
+  "LabelSelect_hls_0": {
     "PE": 1
   }
 }
diff --git a/build/bnn-pynq/folding_config/cnv-w2a2_folding_config.json b/build/bnn-pynq/folding_config/cnv-w2a2_folding_config.json
index c83c614..954de5b 100644
--- a/build/bnn-pynq/folding_config/cnv-w2a2_folding_config.json
+++ b/build/bnn-pynq/folding_config/cnv-w2a2_folding_config.json
@@ -1,79 +1,87 @@
 {
   "Defaults": {},
-  "Thresholding_Batch_0": {
-    "PE": 1,
-    "ram_style": "distributed"
+  "Thresholding_rtl_0": {
+    "PE": 1
   },
-  "ConvolutionInputGenerator_0": {
+  "ConvolutionInputGenerator_rtl_0": {
     "SIMD": 3,
     "ram_style": "distributed"
   },
-  "MatrixVectorActivation_0": {
+  "MVAU_hls_0": {
     "PE": 8,
     "SIMD": 3,
-    "ram_style": "auto"
+    "ram_style": "auto",
+    "resType": "lut"
   },
-  "ConvolutionInputGenerator_1": {
+  "ConvolutionInputGenerator_rtl_1": {
     "SIMD": 16,
     "ram_style": "distributed"
   },
-  "MatrixVectorActivation_1": {
+  "MVAU_hls_1": {
     "PE": 16,
     "SIMD": 16,
-    "ram_style": "auto"
+    "ram_style": "auto",
+    "resType": "lut"
   },
-  "ConvolutionInputGenerator_2": {
+  "ConvolutionInputGenerator_rtl_2": {
     "SIMD": 16,
     "ram_style": "distributed"
   },
-  "MatrixVectorActivation_2": {
+  "MVAU_hls_2": {
     "PE": 8,
     "SIMD": 16,
-    "ram_style": "auto"
+    "ram_style": "auto",
+    "resType": "lut"
   },
-  "ConvolutionInputGenerator_3": {
+  "ConvolutionInputGenerator_rtl_3": {
     "SIMD": 16,
     "ram_style": "distributed"
   },
-  "MatrixVectorActivation_3": {
+  "MVAU_hls_3": {
     "PE": 8,
     "SIMD": 16,
-    "ram_style": "block"
+    "ram_style": "block",
+    "resType": "lut"
   },
-  "ConvolutionInputGenerator_4": {
+  "ConvolutionInputGenerator_rtl_4": {
     "SIMD": 8,
     "ram_style": "distributed"
   },
-  "MatrixVectorActivation_4": {
+  "MVAU_hls_4": {
     "PE": 4,
     "SIMD": 8,
-    "ram_style": "auto"
+    "ram_style": "auto",
+    "resType": "lut"
   },
-  "ConvolutionInputGenerator_5": {
+  "ConvolutionInputGenerator_rtl_5": {
     "SIMD": 8,
     "ram_style": "distributed"
   },
-  "MatrixVectorActivation_5": {
+  "MVAU_hls_5": {
     "PE": 1,
     "SIMD": 8,
-    "ram_style": "auto"
+    "ram_style": "auto",
+    "resType": "lut"
   },
-  "MatrixVectorActivation_6": {
+  "MVAU_hls_6": {
     "PE": 1,
     "SIMD": 2,
-    "ram_style": "distributed"
+    "ram_style": "distributed",
+    "resType": "lut"
   },
-  "MatrixVectorActivation_7": {
+  "MVAU_hls_7": {
     "PE": 2,
     "SIMD": 2,
-    "ram_style": "block"
+    "ram_style": "block",
+    "resType": "lut"
   },
-  "MatrixVectorActivation_8": {
+  "MVAU_hls_8": {
     "PE": 5,
     "SIMD": 1,
-    "ram_style": "distributed"
+    "ram_style": "distributed",
+    "resType": "lut"
   },
-  "LabelSelect_Batch_0": {
+  "LabelSelect_hls_0": {
     "PE": 1
   }
 }
diff --git a/build/bnn-pynq/folding_config/tfc-w1a1_folding_config.json b/build/bnn-pynq/folding_config/tfc-w1a1_folding_config.json
index 95167f1..bbaad88 100644
--- a/build/bnn-pynq/folding_config/tfc-w1a1_folding_config.json
+++ b/build/bnn-pynq/folding_config/tfc-w1a1_folding_config.json
@@ -1,30 +1,33 @@
 {
   "Defaults": {},
-  "Thresholding_Batch_0": {
-    "PE": 49,
-    "ram_style": "distributed"
+  "Thresholding_rtl_0": {
+    "PE": 49
   },
-  "MatrixVectorActivation_0": {
+  "MVAU_hls_0": {
     "PE": 16,
     "SIMD": 49,
-    "ram_style": "block"
+    "ram_style": "block",
+    "resType": "lut"
   },
-  "MatrixVectorActivation_1": {
+  "MVAU_hls_1": {
     "PE": 8,
     "SIMD": 8,
-    "ram_style": "auto"
+    "ram_style": "auto",
+    "resType": "lut"
   },
-  "MatrixVectorActivation_2": {
+  "MVAU_hls_2": {
     "PE": 8,
     "SIMD": 8,
-    "ram_style": "auto"
+    "ram_style": "auto",
+    "resType": "lut"
   },
-  "MatrixVectorActivation_3": {
+  "MVAU_hls_3": {
     "PE": 10,
     "SIMD": 8,
-    "ram_style": "distributed"
+    "ram_style": "distributed",
+    "resType": "lut"
   },
-  "LabelSelect_Batch_0": {
+  "LabelSelect_hls_0": {
     "PE": 1
   }
 }
diff --git a/build/bnn-pynq/folding_config/tfc-w1a2_folding_config.json b/build/bnn-pynq/folding_config/tfc-w1a2_folding_config.json
index 95167f1..bbaad88 100644
--- a/build/bnn-pynq/folding_config/tfc-w1a2_folding_config.json
+++ b/build/bnn-pynq/folding_config/tfc-w1a2_folding_config.json
@@ -1,30 +1,33 @@
 {
   "Defaults": {},
-  "Thresholding_Batch_0": {
-    "PE": 49,
-    "ram_style": "distributed"
+  "Thresholding_rtl_0": {
+    "PE": 49
   },
-  "MatrixVectorActivation_0": {
+  "MVAU_hls_0": {
     "PE": 16,
     "SIMD": 49,
-    "ram_style": "block"
+    "ram_style": "block",
+    "resType": "lut"
   },
-  "MatrixVectorActivation_1": {
+  "MVAU_hls_1": {
     "PE": 8,
     "SIMD": 8,
-    "ram_style": "auto"
+    "ram_style": "auto",
+    "resType": "lut"
   },
-  "MatrixVectorActivation_2": {
+  "MVAU_hls_2": {
     "PE": 8,
     "SIMD": 8,
-    "ram_style": "auto"
+    "ram_style": "auto",
+    "resType": "lut"
   },
-  "MatrixVectorActivation_3": {
+  "MVAU_hls_3": {
     "PE": 10,
     "SIMD": 8,
-    "ram_style": "distributed"
+    "ram_style": "distributed",
+    "resType": "lut"
   },
-  "LabelSelect_Batch_0": {
+  "LabelSelect_hls_0": {
     "PE": 1
   }
 }
diff --git a/build/bnn-pynq/folding_config/tfc-w2a2_folding_config.json b/build/bnn-pynq/folding_config/tfc-w2a2_folding_config.json
index 95167f1..bbaad88 100644
--- a/build/bnn-pynq/folding_config/tfc-w2a2_folding_config.json
+++ b/build/bnn-pynq/folding_config/tfc-w2a2_folding_config.json
@@ -1,30 +1,33 @@
 {
   "Defaults": {},
-  "Thresholding_Batch_0": {
-    "PE": 49,
-    "ram_style": "distributed"
+  "Thresholding_rtl_0": {
+    "PE": 49
   },
-  "MatrixVectorActivation_0": {
+  "MVAU_hls_0": {
     "PE": 16,
     "SIMD": 49,
-    "ram_style": "block"
+    "ram_style": "block",
+    "resType": "lut"
   },
-  "MatrixVectorActivation_1": {
+  "MVAU_hls_1": {
     "PE": 8,
     "SIMD": 8,
-    "ram_style": "auto"
+    "ram_style": "auto",
+    "resType": "lut"
   },
-  "MatrixVectorActivation_2": {
+  "MVAU_hls_2": {
     "PE": 8,
     "SIMD": 8,
-    "ram_style": "auto"
+    "ram_style": "auto",
+    "resType": "lut"
   },
-  "MatrixVectorActivation_3": {
+  "MVAU_hls_3": {
     "PE": 10,
     "SIMD": 8,
-    "ram_style": "distributed"
+    "ram_style": "distributed",
+    "resType": "lut"
   },
-  "LabelSelect_Batch_0": {
+  "LabelSelect_hls_0": {
     "PE": 1
   }
 }
diff --git a/build/get-finn.sh b/build/get-finn.sh
index 6dbe581..de796a1 100755
--- a/build/get-finn.sh
+++ b/build/get-finn.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 # Copyright (C) 2020-2022, Xilinx
-# Copyright (C) 2023, Advanced Micro Devices, Inc.
+# Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -31,7 +31,7 @@
 # URL for git repo to be cloned
 REPO_URL=https://github.com/Xilinx/finn
 # commit hash for repo
-REPO_COMMIT=02ce6954c93963c8407cd5c20761fccf92e1c70d
+REPO_COMMIT=39fb8859fec0e47276ffadcafe43092d1b10af7e
 # directory (under the same folder as this script) to clone to
 REPO_DIR=finn
 
diff --git a/build/gtsrb/README.md b/build/gtsrb/README.md
new file mode 100644
index 0000000..f5179a1
--- /dev/null
+++ b/build/gtsrb/README.md
@@ -0,0 +1,26 @@
+# Brevitas GTSRB example
+
+This is the binarized CNV topology from the paper [FINN: A Framework for Fast, Scalable Binarized Neural Network Inference](https://arxiv.org/abs/1612.07119) which is trained
+on the [German Traffic Sign Recognition Benchmark (GTSRB)](https://benchmark.ini.rub.de/gtsrb_news.html) dataset.
+
+## Build bitfiles for GTSRB
+
+0. Ensure you have performed the *Setup* steps in the top-level README for setting up the FINN requirements and environment variables.
+
+1. Run the `download-model.sh` script under the `models` directory to download the pretrained QONNX model. You should have `gtsrb/models/cnv_1w1a_gtsrb.onnx` as a result.
+
+2. Launch the build as follows:
+```SHELL
+# update this according to where you cloned this repo:
+FINN_EXAMPLES=/path/to/finn-examples
+# cd into finn submodule
+cd $FINN_EXAMPLES/build/finn
+# launch the build on the gtsrb folder
+./run-docker.sh build_custom $FINN_EXAMPLES/build/gtsrb
+```
+
+3. The generated outputs will be under `gtsrb/output_<topology>_<board>`. You can find a description of the generated files [here](https://finn-dev.readthedocs.io/en/latest/command_line.html#simple-dataflow-build-mode).
+
+## Where did the ONNX model files come from?
+
+The model is part of the QONNX model zoo and gets directly downloaded from [here](https://github.com/fastmachinelearning/qonnx_model_zoo/tree/feature/gtsrb_cnv/models/GTSRB/Brevitas_CNV1W1A).
diff --git a/build/gtsrb/build.py b/build/gtsrb/build.py
new file mode 100644
index 0000000..a46d766
--- /dev/null
+++ b/build/gtsrb/build.py
@@ -0,0 +1,124 @@
+# Copyright (C) 2024, Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import finn.builder.build_dataflow as build
+import finn.builder.build_dataflow_config as build_cfg
+from finn.builder.build_dataflow_config import default_build_dataflow_steps
+from qonnx.core.datatype import DataType
+import os
+import shutil
+import numpy as np
+from onnx import helper as oh
+
+models = [
+    "cnv_1w1a_gtsrb",
+]
+
+# which platforms to build the networks for
+zynq_platforms = ["Pynq-Z1"]
+platforms_to_build = zynq_platforms
+
+
+def custom_step_add_preproc(model, cfg):
+    # GTSRB data with raw uint8 pixels is divided by 255 prior to training
+    # reflect this in the inference graph so we can perform inference directly
+    # on raw uint8 data
+    in_name = model.graph.input[0].name
+    new_in_name = model.make_new_valueinfo_name()
+    new_param_name = model.make_new_valueinfo_name()
+    div_param = np.asarray(255.0, dtype=np.float32)
+    new_div = oh.make_node(
+        "Div",
+        [in_name, new_param_name],
+        [new_in_name],
+        name="PreprocDiv",
+    )
+    model.set_initializer(new_param_name, div_param)
+    model.graph.node.insert(0, new_div)
+    model.graph.node[1].input[0] = new_in_name
+    # set input dtype to uint8
+    model.set_tensor_datatype(in_name, DataType["UINT8"])
+    return model
+
+
+custom_build_steps = [custom_step_add_preproc] + default_build_dataflow_steps
+
+
+# determine which shell flow to use for a given platform
+def platform_to_shell(platform):
+    if platform in zynq_platforms:
+        return build_cfg.ShellFlowType.VIVADO_ZYNQ
+    else:
+        raise Exception("Unknown platform, can't determine ShellFlowType")
+
+
+# create a release dir, used for finn-examples release packaging
+os.makedirs("release", exist_ok=True)
+
+for platform_name in platforms_to_build:
+    shell_flow_type = platform_to_shell(platform_name)
+    vitis_platform = None
+    # for Zynq, use the board name as the release name
+    # e.g. ZCU104
+    release_platform_name = platform_name
+    platform_dir = "release/%s" % release_platform_name
+    os.makedirs(platform_dir, exist_ok=True)
+    for model_name in models:
+        # set up the build configuration for this model
+        cfg = build_cfg.DataflowBuildConfig(
+            output_dir="output_%s_%s" % (model_name, release_platform_name),
+            target_fps=3000,
+            synth_clk_period_ns=10.0,
+            board=platform_name,
+            steps=custom_build_steps,
+            folding_config_file="folding_config/cnv_gtsrb_folding_config.json",
+            shell_flow_type=shell_flow_type,
+            vitis_platform=vitis_platform,
+            generate_outputs=[
+                build_cfg.DataflowOutputType.ESTIMATE_REPORTS,
+                build_cfg.DataflowOutputType.STITCHED_IP,
+                build_cfg.DataflowOutputType.RTLSIM_PERFORMANCE,
+                build_cfg.DataflowOutputType.BITFILE,
+            ],
+            save_intermediate_models=True,
+        )
+        model_file = "models/%s.onnx" % model_name
+        # launch FINN compiler to build
+        build.build_dataflow_cfg(model_file, cfg)
+        # copy bitfiles into release dir if found
+        bitfile_gen_dir = cfg.output_dir + "/bitfile"
+        files_to_check_and_copy = [
+            "finn-accel.bit",
+            "finn-accel.hwh",
+            "finn-accel.xclbin",
+        ]
+        for f in files_to_check_and_copy:
+            src_file = bitfile_gen_dir + "/" + f
+            dst_file = platform_dir + "/" + f.replace("finn-accel", model_name)
+            if os.path.isfile(src_file):
+                shutil.copy(src_file, dst_file)
diff --git a/build/gtsrb/folding_config/cnv_gtsrb_folding_config.json b/build/gtsrb/folding_config/cnv_gtsrb_folding_config.json
new file mode 100644
index 0000000..d96d432
--- /dev/null
+++ b/build/gtsrb/folding_config/cnv_gtsrb_folding_config.json
@@ -0,0 +1,78 @@
+{
+    "Defaults": {},
+    "Thresholding_rtl_0": {
+      "PE": 1
+    },
+    "ConvolutionInputGenerator_rtl_0": {
+      "SIMD": 3,
+      "ram_style": "distributed"
+    },
+    "MVAU_hls_0": {
+      "PE": 16,
+      "SIMD": 3,
+      "ram_style": "auto"
+    },
+    "ConvolutionInputGenerator_rtl_1": {
+      "SIMD": 32,
+      "ram_style": "distributed"
+    },
+    "MVAU_hls_1": {
+      "PE": 32,
+      "SIMD": 32,
+      "ram_style": "auto"
+    },
+    "ConvolutionInputGenerator_rtl_2": {
+      "SIMD": 32,
+      "ram_style": "distributed"
+    },
+    "MVAU_hls_2": {
+      "PE": 16,
+      "SIMD": 32,
+      "ram_style": "auto"
+    },
+    "ConvolutionInputGenerator_rtl_3": {
+      "SIMD": 32,
+      "ram_style": "distributed"
+    },
+    "MVAU_hls_3": {
+      "PE": 16,
+      "SIMD": 32,
+      "ram_style": "auto"
+    },
+    "ConvolutionInputGenerator_rtl_4": {
+      "SIMD": 32,
+      "ram_style": "distributed"
+    },
+    "MVAU_hls_4": {
+      "PE": 4,
+      "SIMD": 32,
+      "ram_style": "auto"
+    },
+    "ConvolutionInputGenerator_rtl_5": {
+      "SIMD": 32,
+      "ram_style": "distributed"
+    },
+    "MVAU_hls_5": {
+      "PE": 1,
+      "SIMD": 32,
+      "ram_style": "auto"
+    },
+    "MVAU_hls_6": {
+      "PE": 1,
+      "SIMD": 4,
+      "ram_style": "auto"
+    },
+    "MVAU_hls_7": {
+      "PE": 1,
+      "SIMD": 8,
+      "ram_style": "auto"
+    },
+    "MVAU_hls_8": {
+      "PE": 4,
+      "SIMD": 1,
+      "ram_style": "auto"
+    },
+    "LabelSelect_hls_0": {
+      "PE": 1
+    }
+}
diff --git a/build/gtsrb/models/download-model.sh b/build/gtsrb/models/download-model.sh
new file mode 100755
index 0000000..b4b07c6
--- /dev/null
+++ b/build/gtsrb/models/download-model.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+wget https://github.com/fastmachinelearning/qonnx_model_zoo/raw/feature/gtsrb_cnv/models/GTSRB/Brevitas_CNV1W1A/cnv_1w1a_gtsrb.onnx
diff --git a/build/kws/README.md b/build/kws/README.md
index c683701..40cb024 100644
--- a/build/kws/README.md
+++ b/build/kws/README.md
@@ -1,8 +1,8 @@
-# The KWS example
+# The keyword spotting (KWS) example
 
 The KWS example includes an MLP for the Google SpeechCommandsV2 dataset.
 
-## Build bitfiles for BNN-PYNQ examples
+## Build bitfiles for KWS examples
 
 The build is currently configured for the PYNQ-Z1 board and a throughput of 200k FPS at a clock frequency of 100 MHz.
 
@@ -18,7 +18,7 @@ cd $FINN_EXAMPLES/build/finn
 bash run-docker.sh build_custom $FINN_EXAMPLES/build/kws
 ```
 
-3. The generated outputs will be under `kws/<timestamp>_output_<onnx_file_name>_<platform>`.
+3. The generated outputs will be under `kws/output_<onnx_file_name>_<platform>`.
 You can find a description of the generated files [here](https://finn-dev.readthedocs.io/en/latest/command_line.html#simple-dataflow-build-mode).
 The folder will additionally include the quantized inputs for verification (`all_validation_KWS_data_inputs_len_10102.npy`) and the expected outputs (`all_validation_KWS_data_outputs_len_10102.npy`).
 When running the network on hardware the validation should achieve an accuracy of 89.78 % with 9070 of the 10102 samples being classified correctly.
diff --git a/build/kws/build.py b/build/kws/build.py
index 99a908e..3a967e3 100644
--- a/build/kws/build.py
+++ b/build/kws/build.py
@@ -93,7 +93,7 @@ def step_preprocess(model: ModelWrapper, cfg: DataflowBuildConfig):
         steps=build_steps,
         generate_outputs=build_outputs,
         output_dir=last_output_dir,
-        target_fps=200000,
+        folding_config_file="folding_config/%s_kws_folding_config.json" % platform_name,
         synth_clk_period_ns=10.0,
         board=platform_name,
         shell_flow_type=build_cfg.ShellFlowType.VIVADO_ZYNQ,
diff --git a/build/kws/folding_config/Pynq-Z1_kws_folding_config.json b/build/kws/folding_config/Pynq-Z1_kws_folding_config.json
new file mode 100644
index 0000000..932e745
--- /dev/null
+++ b/build/kws/folding_config/Pynq-Z1_kws_folding_config.json
@@ -0,0 +1,38 @@
+{
+  "Defaults": {},
+  "MVAU_hls_0": {
+    "PE": 32,
+    "SIMD": 10,
+    "ram_style": "auto",
+    "resType": "lut",
+    "mem_mode": "internal_decoupled",
+    "runtime_writeable_weights": 0
+  },
+  "MVAU_hls_1": {
+    "PE": 32,
+    "SIMD": 8,
+    "ram_style": "auto",
+    "resType": "lut",
+    "mem_mode": "internal_decoupled",
+    "runtime_writeable_weights": 0
+  },
+  "MVAU_hls_2": {
+    "PE": 32,
+    "SIMD": 8,
+    "ram_style": "auto",
+    "resType": "lut",
+    "mem_mode": "internal_decoupled",
+    "runtime_writeable_weights": 0
+  },
+  "MVAU_hls_3": {
+    "PE": 1,
+    "SIMD": 8,
+    "ram_style": "auto",
+    "resType": "lut",
+    "mem_mode": "internal_decoupled",
+    "runtime_writeable_weights": 0
+  },
+  "LabelSelect_hls_0": {
+    "PE": 1
+  }
+}
diff --git a/build/mobilenet-v1/README.md b/build/mobilenet-v1/README.md
index f31ad57..3746108 100644
--- a/build/mobilenet-v1/README.md
+++ b/build/mobilenet-v1/README.md
@@ -36,13 +36,13 @@ cd $FINN_EXAMPLES/build/finn
 ./run-docker.sh build_custom $FINN_EXAMPLES/build/mobilenet-v1
 ```
 
-5. The generated outputs will be under `mobilenet-v1/output_<topology>_<board>`. You can find a description of the generated files [here](https://finn-dev.readthedocs.io/en/latest/command_line.html#simple-dataflow-build-mode).
+3. The generated outputs will be under `mobilenet-v1/output_<topology>_<board>`. You can find a description of the generated files [here](https://finn-dev.readthedocs.io/en/latest/command_line.html#simple-dataflow-build-mode).
 
 ## Where did the ONNX model files come from?
 
 The 4-bit quantized MobileNet-v1 is part of the
 [Brevitas examples](https://github.com/Xilinx/brevitas/tree/master/src/brevitas_examples/imagenet_classification).
-Subsequently, the trained networks is [exported to ONNX](https://github.com/Xilinx/finn/blob/master/notebooks/basics/1_brevitas_network_import.ipynb). In addition, the particular version used here has two additions for pre- and postprocessing:
+Subsequently, the trained networks is [exported to ONNX](https://github.com/Xilinx/finn/blob/main/notebooks/basics/1_brevitas_network_import_via_QONNX.ipynb). In addition, the particular version used here has two additions for pre- and postprocessing:
 
 * A divide-by-255 node is added at the input, and the input is marked as 8-bit (to directly accept 8-bit images as input)
 * Normalization is added at the input with `mean = [0.485, 0.456, 0.406]` and `std = 0.226`. Note that the `std` is global and not per-channel to facilitate its removal via the [streamlining transform](https://arxiv.org/pdf/1709.04060).
diff --git a/build/mobilenet-v1/build.py b/build/mobilenet-v1/build.py
index 7ec022d..711a603 100644
--- a/build/mobilenet-v1/build.py
+++ b/build/mobilenet-v1/build.py
@@ -1,4 +1,5 @@
-# Copyright (c) 2020, Xilinx
+# Copyright (C) 2020-2022, Xilinx, Inc.
+# Copyright (C) 2024, Advanced Micro Devices, Inc.
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -35,8 +36,8 @@
 # custom steps for mobilenetv1
 from custom_steps import (
     step_mobilenet_streamline,
-    step_mobilenet_convert_to_hls_layers,
-    step_mobilenet_convert_to_hls_layers_separate_th,
+    step_mobilenet_convert_to_hw_layers,
+    step_mobilenet_convert_to_hw_layers_separate_th,
     step_mobilenet_lower_convs,
     step_mobilenet_slr_floorplan,
 )
@@ -44,10 +45,8 @@
 model_name = "mobilenetv1-w4a4"
 
 # which platforms to build the networks for
-# zynq_platforms = ["ZCU102", "ZCU104"]
-zynq_platforms = ["ZCU102"]
-# alveo_platforms = ["U50", "U200", "U250", "U280"]
-alveo_platforms = ["U250"]
+zynq_platforms = ["ZCU104", "ZCU102"]
+alveo_platforms = ["U250"]  # "U50", "U200", "U280"
 platforms_to_build = zynq_platforms + alveo_platforms
 
 
@@ -75,13 +74,14 @@ def select_build_steps(platform):
         return [
             step_mobilenet_streamline,
             step_mobilenet_lower_convs,
-            step_mobilenet_convert_to_hls_layers_separate_th,
+            step_mobilenet_convert_to_hw_layers_separate_th,
             "step_create_dataflow_partition",
+            "step_specialize_layers",
             "step_apply_folding_config",
             "step_minimize_bit_width",
             "step_generate_estimate_reports",
-            "step_hls_codegen",
-            "step_hls_ipgen",
+            "step_hw_codegen",
+            "step_hw_ipgen",
             "step_set_fifo_depths",
             "step_create_stitched_ip",
             "step_synthesize_bitfile",
@@ -92,13 +92,14 @@ def select_build_steps(platform):
         return [
             step_mobilenet_streamline,
             step_mobilenet_lower_convs,
-            step_mobilenet_convert_to_hls_layers,
+            step_mobilenet_convert_to_hw_layers,
             "step_create_dataflow_partition",
+            "step_specialize_layers",
             "step_apply_folding_config",
             "step_minimize_bit_width",
             "step_generate_estimate_reports",
-            "step_hls_codegen",
-            "step_hls_ipgen",
+            "step_hw_codegen",
+            "step_hw_ipgen",
             "step_set_fifo_depths",
             step_mobilenet_slr_floorplan,
             "step_synthesize_bitfile",
@@ -123,6 +124,12 @@ def select_build_steps(platform):
         # for Zynq, use the board name as the release name
         # e.g. ZCU104
         release_platform_name = platform_name
+        # for ZCU104 we provide a specialize layer json
+        specialize_layer_file = (
+            "specialize_layers_config/ZCU104_specialize_layers_config.json"
+            if platform_name == "ZCU104"
+            else None
+        )
     platform_dir = "release/%s" % release_platform_name
     os.makedirs(platform_dir, exist_ok=True)
 
@@ -130,6 +137,7 @@ def select_build_steps(platform):
         steps=select_build_steps(platform_name),
         output_dir="output_%s_%s" % (model_name, release_platform_name),
         folding_config_file="folding_config/%s_folding_config.json" % platform_name,
+        specialize_layers_config_file=specialize_layer_file,
         synth_clk_period_ns=select_clk_period(platform_name),
         board=platform_name,
         shell_flow_type=shell_flow_type,
diff --git a/build/mobilenet-v1/custom_steps.py b/build/mobilenet-v1/custom_steps.py
index cb66421..6cd54af 100644
--- a/build/mobilenet-v1/custom_steps.py
+++ b/build/mobilenet-v1/custom_steps.py
@@ -44,7 +44,7 @@
     GiveUniqueNodeNames,
     ApplyConfig,
 )
-import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
+import finn.transformation.fpgadataflow.convert_to_hw_layers as to_hw
 from qonnx.transformation.infer_shapes import InferShapes
 from qonnx.transformation.change_datalayout import ChangeDataLayoutQuantAvgPool2d
 from qonnx.transformation.infer_datatypes import InferDataTypes
@@ -87,14 +87,13 @@ def step_mobilenet_lower_convs(model: ModelWrapper, cfg: DataflowBuildConfig):
     return model
 
 
-def step_mobilenet_convert_to_hls_layers(model: ModelWrapper, cfg: DataflowBuildConfig):
-    mem_mode = cfg.default_mem_mode.value
-    model = model.transform(to_hls.InferPool_Batch())
-    model = model.transform(to_hls.InferConvInpGen())
-    model = model.transform(to_hls.InferVectorVectorActivation())
-    model = model.transform(to_hls.InferQuantizedMatrixVectorActivation(mem_mode))
-    model = model.transform(to_hls.InferChannelwiseLinearLayer())
-    model = model.transform(to_hls.InferLabelSelectLayer())
+def step_mobilenet_convert_to_hw_layers(model: ModelWrapper, cfg: DataflowBuildConfig):
+    model = model.transform(to_hw.InferPool())
+    model = model.transform(to_hw.InferConvInpGen())
+    model = model.transform(to_hw.InferVectorVectorActivation())
+    model = model.transform(to_hw.InferQuantizedMatrixVectorActivation())
+    model = model.transform(to_hw.InferChannelwiseLinearLayer())
+    model = model.transform(to_hw.InferLabelSelectLayer())
     model = model.transform(InferShapes())
     model = model.transform(GiveUniqueNodeNames())
     model = model.transform(GiveReadableTensorNames())
@@ -104,7 +103,7 @@ def step_mobilenet_convert_to_hls_layers(model: ModelWrapper, cfg: DataflowBuild
 def step_mobilenet_slr_floorplan(model: ModelWrapper, cfg: DataflowBuildConfig):
     if cfg.shell_flow_type == ShellFlowType.VITIS_ALVEO:
         try:
-            from finn.analysis.partitioning import partition
+            from finnexperimental.analysis.partitioning import partition
 
             # apply partitioning of the model, restricting the first and last layers
             # to SLR0
@@ -125,15 +124,14 @@ def step_mobilenet_slr_floorplan(model: ModelWrapper, cfg: DataflowBuildConfig):
     return model
 
 
-def step_mobilenet_convert_to_hls_layers_separate_th(model: ModelWrapper, cfg: DataflowBuildConfig):
-    mem_mode = cfg.default_mem_mode.value
-    model = model.transform(to_hls.InferPool_Batch())
-    model = model.transform(to_hls.InferConvInpGen())
-    model = model.transform(to_hls.InferThresholdingLayer())
-    model = model.transform(to_hls.InferVectorVectorActivation())
-    model = model.transform(to_hls.InferQuantizedMatrixVectorActivation(mem_mode))
-    model = model.transform(to_hls.InferChannelwiseLinearLayer())
-    model = model.transform(to_hls.InferLabelSelectLayer())
+def step_mobilenet_convert_to_hw_layers_separate_th(model: ModelWrapper, cfg: DataflowBuildConfig):
+    model = model.transform(to_hw.InferPool())
+    model = model.transform(to_hw.InferConvInpGen())
+    model = model.transform(to_hw.InferThresholdingLayer())
+    model = model.transform(to_hw.InferVectorVectorActivation())
+    model = model.transform(to_hw.InferQuantizedMatrixVectorActivation())
+    model = model.transform(to_hw.InferChannelwiseLinearLayer())
+    model = model.transform(to_hw.InferLabelSelectLayer())
     model = model.transform(InferShapes())
     model = model.transform(GiveUniqueNodeNames())
     model = model.transform(GiveReadableTensorNames())
diff --git a/build/mobilenet-v1/folding_config/U200_folding_config.json b/build/mobilenet-v1/folding_config/U200_folding_config.json
index a7b129a..fe3c3d2 100644
--- a/build/mobilenet-v1/folding_config/U200_folding_config.json
+++ b/build/mobilenet-v1/folding_config/U200_folding_config.json
@@ -1,499 +1,439 @@
 {
   "Defaults": {},
-  "StreamingFIFO_0": {
+  "StreamingFIFO_rtl_0": {
     "ram_style": "ultra",
     "depth": 512,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_0": {
+  "ConvolutionInputGenerator_rtl_0": {
     "SIMD": 3,
     "ram_style": "distributed"
   },
-  "MatrixVectorActivation_0": {
+  "MVAU_hls_0": {
     "PE": 32,
     "SIMD": 3,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "dsp"
   },
-  "FMPadding_Batch_0": {
+  "FMPadding_rtl_0": {
     "SIMD": 32
   },
-  "StreamingFIFO_3": {
+  "StreamingFIFO_rtl_3": {
     "ram_style": "auto",
     "depth": 256,
     "impl_style": "rtl"
   },
-  "ConvolutionInputGenerator_1": {
+  "ConvolutionInputGenerator_rtl_1": {
     "SIMD": 32,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_0": {
+  "VVAU_hls_0": {
     "PE": 32,
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_0": {
-    "impl_style": "hls"
-  },
-  "MatrixVectorActivation_1": {
+  "MVAU_hls_1": {
     "PE": 16,
     "SIMD": 16,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_1": {
-    "impl_style": "hls"
-  },
-  "FMPadding_Batch_1": {
+  "FMPadding_rtl_1": {
     "SIMD": 32
   },
-  "StreamingFIFO_9": {
+  "StreamingFIFO_rtl_9": {
     "ram_style": "ultra",
     "depth": 512,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_2": {
+  "ConvolutionInputGenerator_rtl_2": {
     "SIMD": 32,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_1": {
+  "VVAU_hls_1": {
     "PE": 32,
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_2": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_12": {
+  "StreamingFIFO_rtl_12": {
     "ram_style": "auto",
     "depth": 256,
     "impl_style": "rtl"
   },
-  "MatrixVectorActivation_2": {
+  "MVAU_hls_2": {
     "PE": 16,
     "SIMD": 16,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_3": {
-    "impl_style": "hls"
-  },
-  "FMPadding_Batch_2": {
+  "FMPadding_rtl_2": {
     "SIMD": 64
   },
-  "StreamingFIFO_15": {
+  "StreamingFIFO_rtl_15": {
     "ram_style": "auto",
     "depth": 256,
     "impl_style": "rtl"
   },
-  "ConvolutionInputGenerator_3": {
+  "ConvolutionInputGenerator_rtl_3": {
     "SIMD": 64,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_2": {
+  "VVAU_hls_2": {
     "PE": 64,
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_4": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_18": {
+  "StreamingFIFO_rtl_18": {
     "ram_style": "ultra",
     "depth": 1024,
     "impl_style": "vivado"
   },
-  "MatrixVectorActivation_3": {
+  "MVAU_hls_3": {
     "PE": 32,
     "SIMD": 16,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_5": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_20": {
+  "StreamingFIFO_rtl_20": {
     "ram_style": "auto",
     "depth": 32,
     "impl_style": "rtl"
   },
-  "FMPadding_Batch_3": {
+  "FMPadding_rtl_3": {
     "SIMD": 16
   },
-  "StreamingFIFO_21": {
+  "StreamingFIFO_rtl_21": {
     "ram_style": "ultra",
     "depth": 1024,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_4": {
+  "ConvolutionInputGenerator_rtl_4": {
     "SIMD": 16,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_3": {
+  "VVAU_hls_3": {
     "PE": 16,
     "resType": "lut"
   },
-  "StreamingFIFO_23": {
+  "StreamingFIFO_rtl_23": {
     "ram_style": "auto",
     "depth": 256,
     "impl_style": "rtl"
   },
-  "MatrixVectorActivation_4": {
+  "MVAU_hls_4": {
     "PE": 16,
     "SIMD": 16,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_6": {
-    "impl_style": "hls"
-  },
-  "FMPadding_Batch_4": {
+  "FMPadding_rtl_4": {
     "SIMD": 32
   },
-  "StreamingFIFO_26": {
+  "StreamingFIFO_rtl_26": {
     "ram_style": "ultra",
     "depth": 512,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_5": {
+  "ConvolutionInputGenerator_rtl_5": {
     "SIMD": 32,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_4": {
+  "VVAU_hls_4": {
     "PE": 32,
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_7": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_29": {
+  "StreamingFIFO_rtl_29": {
     "ram_style": "ultra",
     "depth": 1024,
     "impl_style": "vivado"
   },
-  "MatrixVectorActivation_5": {
+  "MVAU_hls_5": {
     "PE": 32,
     "SIMD": 16,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_8": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_31": {
+  "StreamingFIFO_rtl_31": {
     "ram_style": "auto",
     "depth": 32,
     "impl_style": "rtl"
   },
-  "FMPadding_Batch_5": {
+  "FMPadding_rtl_5": {
     "SIMD": 8
   },
-  "StreamingFIFO_32": {
+  "StreamingFIFO_rtl_32": {
     "ram_style": "ultra",
     "depth": 2048,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_6": {
+  "ConvolutionInputGenerator_rtl_6": {
     "SIMD": 8,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_5": {
+  "VVAU_hls_5": {
     "PE": 8,
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_9": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_35": {
+  "StreamingFIFO_rtl_35": {
     "ram_style": "auto",
     "depth": 256,
     "impl_style": "rtl"
   },
-  "MatrixVectorActivation_6": {
+  "MVAU_hls_6": {
     "PE": 16,
     "SIMD": 16,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "lut"
   },
-  "FMPadding_Batch_6": {
+  "FMPadding_rtl_6": {
     "SIMD": 16
   },
-  "StreamingFIFO_37": {
+  "StreamingFIFO_rtl_37": {
     "ram_style": "ultra",
     "depth": 2048,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_7": {
+  "ConvolutionInputGenerator_rtl_7": {
     "SIMD": 16,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_6": {
+  "VVAU_hls_6": {
     "PE": 16,
     "resType": "lut"
   },
-  "StreamingFIFO_39": {
+  "StreamingFIFO_rtl_39": {
     "ram_style": "ultra",
     "depth": 1024,
     "impl_style": "vivado"
   },
-  "MatrixVectorActivation_7": {
+  "MVAU_hls_7": {
     "PE": 32,
     "SIMD": 16,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_10": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_41": {
+  "StreamingFIFO_rtl_41": {
     "ram_style": "auto",
     "depth": 32,
     "impl_style": "rtl"
   },
-  "FMPadding_Batch_7": {
+  "FMPadding_rtl_7": {
     "SIMD": 16
   },
-  "StreamingFIFO_42": {
+  "StreamingFIFO_rtl_42": {
     "ram_style": "ultra",
     "depth": 2048,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_8": {
+  "ConvolutionInputGenerator_rtl_8": {
     "SIMD": 16,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_7": {
+  "VVAU_hls_7": {
     "PE": 16,
     "resType": "lut"
   },
-  "StreamingFIFO_44": {
+  "StreamingFIFO_rtl_44": {
     "ram_style": "ultra",
     "depth": 1024,
     "impl_style": "vivado"
   },
-  "MatrixVectorActivation_8": {
+  "MVAU_hls_8": {
     "PE": 32,
     "SIMD": 16,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_11": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_46": {
+  "StreamingFIFO_rtl_46": {
     "ram_style": "auto",
     "depth": 32,
     "impl_style": "rtl"
   },
-  "FMPadding_Batch_8": {
+  "FMPadding_rtl_8": {
     "SIMD": 16
   },
-  "StreamingFIFO_47": {
+  "StreamingFIFO_rtl_47": {
     "ram_style": "ultra",
     "depth": 2048,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_9": {
+  "ConvolutionInputGenerator_rtl_9": {
     "SIMD": 16,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_8": {
+  "VVAU_hls_8": {
     "PE": 16,
     "resType": "lut"
   },
-  "StreamingFIFO_49": {
+  "StreamingFIFO_rtl_49": {
     "ram_style": "ultra",
     "depth": 1024,
     "impl_style": "vivado"
   },
-  "MatrixVectorActivation_9": {
+  "MVAU_hls_9": {
     "PE": 32,
     "SIMD": 16,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_12": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_51": {
+  "StreamingFIFO_rtl_51": {
     "ram_style": "auto",
     "depth": 32,
     "impl_style": "rtl"
   },
-  "FMPadding_Batch_9": {
+  "FMPadding_rtl_9": {
     "SIMD": 16
   },
-  "StreamingFIFO_52": {
+  "StreamingFIFO_rtl_52": {
     "ram_style": "ultra",
     "depth": 2048,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_10": {
+  "ConvolutionInputGenerator_rtl_10": {
     "SIMD": 16,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_9": {
+  "VVAU_hls_9": {
     "PE": 16,
     "resType": "lut"
   },
-  "StreamingFIFO_54": {
+  "StreamingFIFO_rtl_54": {
     "ram_style": "ultra",
     "depth": 1024,
     "impl_style": "vivado"
   },
-  "MatrixVectorActivation_10": {
+  "MVAU_hls_10": {
     "PE": 32,
     "SIMD": 16,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_13": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_56": {
+  "StreamingFIFO_rtl_56": {
     "ram_style": "auto",
     "depth": 32,
     "impl_style": "rtl"
   },
-  "FMPadding_Batch_10": {
+  "FMPadding_rtl_10": {
     "SIMD": 16
   },
-  "StreamingFIFO_57": {
+  "StreamingFIFO_rtl_57": {
     "ram_style": "ultra",
     "depth": 2048,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_11": {
+  "ConvolutionInputGenerator_rtl_11": {
     "SIMD": 16,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_10": {
+  "VVAU_hls_10": {
     "PE": 16,
     "resType": "lut"
   },
-  "StreamingFIFO_59": {
+  "StreamingFIFO_rtl_59": {
     "ram_style": "ultra",
     "depth": 1024,
     "impl_style": "vivado"
   },
-  "MatrixVectorActivation_11": {
+  "MVAU_hls_11": {
     "PE": 32,
     "SIMD": 16,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_14": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_61": {
+  "StreamingFIFO_rtl_61": {
     "ram_style": "auto",
     "depth": 128,
     "impl_style": "rtl"
   },
-  "FMPadding_Batch_11": {
+  "FMPadding_rtl_11": {
     "SIMD": 4
   },
-  "StreamingFIFO_62": {
+  "StreamingFIFO_rtl_62": {
     "ram_style": "ultra",
     "depth": 4096,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_12": {
+  "ConvolutionInputGenerator_rtl_12": {
     "SIMD": 4,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_11": {
+  "VVAU_hls_11": {
     "PE": 4,
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_15": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_65": {
+  "StreamingFIFO_rtl_65": {
     "ram_style": "auto",
     "depth": 256,
     "impl_style": "rtl"
   },
-  "MatrixVectorActivation_12": {
+  "MVAU_hls_12": {
     "PE": 16,
     "SIMD": 16,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_16": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_67": {
+  "StreamingFIFO_rtl_67": {
     "ram_style": "auto",
     "depth": 32,
     "impl_style": "rtl"
   },
-  "FMPadding_Batch_12": {
+  "FMPadding_rtl_12": {
     "SIMD": 8
   },
-  "StreamingFIFO_68": {
+  "StreamingFIFO_rtl_68": {
     "ram_style": "ultra",
     "depth": 4096,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_13": {
+  "ConvolutionInputGenerator_rtl_13": {
     "SIMD": 8,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_12": {
+  "VVAU_hls_12": {
     "PE": 8,
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_17": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_71": {
+  "StreamingFIFO_rtl_71": {
     "ram_style": "ultra",
     "depth": 1024,
     "impl_style": "vivado"
   },
-  "MatrixVectorActivation_13": {
+  "MVAU_hls_13": {
     "PE": 32,
     "SIMD": 16,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_18": {
-    "impl_style": "hls"
-  },
-  "ConvolutionInputGenerator_14": {
+  "ConvolutionInputGenerator_rtl_14": {
     "SIMD": 4,
     "ram_style": "distributed"
   },
-  "Pool_Batch_0": {
+  "Pool_hls_0": {
     "PE": 4
   },
-  "MatrixVectorActivation_14": {
+  "MVAU_hls_14": {
     "PE": 4,
     "SIMD": 4,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_19": {
-    "impl_style": "hls"
-  },
-  "ChannelwiseOp_Batch_0": {
+  "ChannelwiseOp_hls_0": {
     "PE": 1,
     "ram_style": "distributed"
   },
-  "LabelSelect_Batch_0": {
+  "LabelSelect_hls_0": {
     "PE": 1
   }
 }
diff --git a/build/mobilenet-v1/folding_config/U250_folding_config.json b/build/mobilenet-v1/folding_config/U250_folding_config.json
index a7b129a..fe3c3d2 100644
--- a/build/mobilenet-v1/folding_config/U250_folding_config.json
+++ b/build/mobilenet-v1/folding_config/U250_folding_config.json
@@ -1,499 +1,439 @@
 {
   "Defaults": {},
-  "StreamingFIFO_0": {
+  "StreamingFIFO_rtl_0": {
     "ram_style": "ultra",
     "depth": 512,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_0": {
+  "ConvolutionInputGenerator_rtl_0": {
     "SIMD": 3,
     "ram_style": "distributed"
   },
-  "MatrixVectorActivation_0": {
+  "MVAU_hls_0": {
     "PE": 32,
     "SIMD": 3,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "dsp"
   },
-  "FMPadding_Batch_0": {
+  "FMPadding_rtl_0": {
     "SIMD": 32
   },
-  "StreamingFIFO_3": {
+  "StreamingFIFO_rtl_3": {
     "ram_style": "auto",
     "depth": 256,
     "impl_style": "rtl"
   },
-  "ConvolutionInputGenerator_1": {
+  "ConvolutionInputGenerator_rtl_1": {
     "SIMD": 32,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_0": {
+  "VVAU_hls_0": {
     "PE": 32,
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_0": {
-    "impl_style": "hls"
-  },
-  "MatrixVectorActivation_1": {
+  "MVAU_hls_1": {
     "PE": 16,
     "SIMD": 16,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_1": {
-    "impl_style": "hls"
-  },
-  "FMPadding_Batch_1": {
+  "FMPadding_rtl_1": {
     "SIMD": 32
   },
-  "StreamingFIFO_9": {
+  "StreamingFIFO_rtl_9": {
     "ram_style": "ultra",
     "depth": 512,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_2": {
+  "ConvolutionInputGenerator_rtl_2": {
     "SIMD": 32,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_1": {
+  "VVAU_hls_1": {
     "PE": 32,
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_2": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_12": {
+  "StreamingFIFO_rtl_12": {
     "ram_style": "auto",
     "depth": 256,
     "impl_style": "rtl"
   },
-  "MatrixVectorActivation_2": {
+  "MVAU_hls_2": {
     "PE": 16,
     "SIMD": 16,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_3": {
-    "impl_style": "hls"
-  },
-  "FMPadding_Batch_2": {
+  "FMPadding_rtl_2": {
     "SIMD": 64
   },
-  "StreamingFIFO_15": {
+  "StreamingFIFO_rtl_15": {
     "ram_style": "auto",
     "depth": 256,
     "impl_style": "rtl"
   },
-  "ConvolutionInputGenerator_3": {
+  "ConvolutionInputGenerator_rtl_3": {
     "SIMD": 64,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_2": {
+  "VVAU_hls_2": {
     "PE": 64,
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_4": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_18": {
+  "StreamingFIFO_rtl_18": {
     "ram_style": "ultra",
     "depth": 1024,
     "impl_style": "vivado"
   },
-  "MatrixVectorActivation_3": {
+  "MVAU_hls_3": {
     "PE": 32,
     "SIMD": 16,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_5": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_20": {
+  "StreamingFIFO_rtl_20": {
     "ram_style": "auto",
     "depth": 32,
     "impl_style": "rtl"
   },
-  "FMPadding_Batch_3": {
+  "FMPadding_rtl_3": {
     "SIMD": 16
   },
-  "StreamingFIFO_21": {
+  "StreamingFIFO_rtl_21": {
     "ram_style": "ultra",
     "depth": 1024,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_4": {
+  "ConvolutionInputGenerator_rtl_4": {
     "SIMD": 16,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_3": {
+  "VVAU_hls_3": {
     "PE": 16,
     "resType": "lut"
   },
-  "StreamingFIFO_23": {
+  "StreamingFIFO_rtl_23": {
     "ram_style": "auto",
     "depth": 256,
     "impl_style": "rtl"
   },
-  "MatrixVectorActivation_4": {
+  "MVAU_hls_4": {
     "PE": 16,
     "SIMD": 16,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_6": {
-    "impl_style": "hls"
-  },
-  "FMPadding_Batch_4": {
+  "FMPadding_rtl_4": {
     "SIMD": 32
   },
-  "StreamingFIFO_26": {
+  "StreamingFIFO_rtl_26": {
     "ram_style": "ultra",
     "depth": 512,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_5": {
+  "ConvolutionInputGenerator_rtl_5": {
     "SIMD": 32,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_4": {
+  "VVAU_hls_4": {
     "PE": 32,
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_7": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_29": {
+  "StreamingFIFO_rtl_29": {
     "ram_style": "ultra",
     "depth": 1024,
     "impl_style": "vivado"
   },
-  "MatrixVectorActivation_5": {
+  "MVAU_hls_5": {
     "PE": 32,
     "SIMD": 16,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_8": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_31": {
+  "StreamingFIFO_rtl_31": {
     "ram_style": "auto",
     "depth": 32,
     "impl_style": "rtl"
   },
-  "FMPadding_Batch_5": {
+  "FMPadding_rtl_5": {
     "SIMD": 8
   },
-  "StreamingFIFO_32": {
+  "StreamingFIFO_rtl_32": {
     "ram_style": "ultra",
     "depth": 2048,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_6": {
+  "ConvolutionInputGenerator_rtl_6": {
     "SIMD": 8,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_5": {
+  "VVAU_hls_5": {
     "PE": 8,
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_9": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_35": {
+  "StreamingFIFO_rtl_35": {
     "ram_style": "auto",
     "depth": 256,
     "impl_style": "rtl"
   },
-  "MatrixVectorActivation_6": {
+  "MVAU_hls_6": {
     "PE": 16,
     "SIMD": 16,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "lut"
   },
-  "FMPadding_Batch_6": {
+  "FMPadding_rtl_6": {
     "SIMD": 16
   },
-  "StreamingFIFO_37": {
+  "StreamingFIFO_rtl_37": {
     "ram_style": "ultra",
     "depth": 2048,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_7": {
+  "ConvolutionInputGenerator_rtl_7": {
     "SIMD": 16,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_6": {
+  "VVAU_hls_6": {
     "PE": 16,
     "resType": "lut"
   },
-  "StreamingFIFO_39": {
+  "StreamingFIFO_rtl_39": {
     "ram_style": "ultra",
     "depth": 1024,
     "impl_style": "vivado"
   },
-  "MatrixVectorActivation_7": {
+  "MVAU_hls_7": {
     "PE": 32,
     "SIMD": 16,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_10": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_41": {
+  "StreamingFIFO_rtl_41": {
     "ram_style": "auto",
     "depth": 32,
     "impl_style": "rtl"
   },
-  "FMPadding_Batch_7": {
+  "FMPadding_rtl_7": {
     "SIMD": 16
   },
-  "StreamingFIFO_42": {
+  "StreamingFIFO_rtl_42": {
     "ram_style": "ultra",
     "depth": 2048,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_8": {
+  "ConvolutionInputGenerator_rtl_8": {
     "SIMD": 16,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_7": {
+  "VVAU_hls_7": {
     "PE": 16,
     "resType": "lut"
   },
-  "StreamingFIFO_44": {
+  "StreamingFIFO_rtl_44": {
     "ram_style": "ultra",
     "depth": 1024,
     "impl_style": "vivado"
   },
-  "MatrixVectorActivation_8": {
+  "MVAU_hls_8": {
     "PE": 32,
     "SIMD": 16,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_11": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_46": {
+  "StreamingFIFO_rtl_46": {
     "ram_style": "auto",
     "depth": 32,
     "impl_style": "rtl"
   },
-  "FMPadding_Batch_8": {
+  "FMPadding_rtl_8": {
     "SIMD": 16
   },
-  "StreamingFIFO_47": {
+  "StreamingFIFO_rtl_47": {
     "ram_style": "ultra",
     "depth": 2048,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_9": {
+  "ConvolutionInputGenerator_rtl_9": {
     "SIMD": 16,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_8": {
+  "VVAU_hls_8": {
     "PE": 16,
     "resType": "lut"
   },
-  "StreamingFIFO_49": {
+  "StreamingFIFO_rtl_49": {
     "ram_style": "ultra",
     "depth": 1024,
     "impl_style": "vivado"
   },
-  "MatrixVectorActivation_9": {
+  "MVAU_hls_9": {
     "PE": 32,
     "SIMD": 16,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_12": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_51": {
+  "StreamingFIFO_rtl_51": {
     "ram_style": "auto",
     "depth": 32,
     "impl_style": "rtl"
   },
-  "FMPadding_Batch_9": {
+  "FMPadding_rtl_9": {
     "SIMD": 16
   },
-  "StreamingFIFO_52": {
+  "StreamingFIFO_rtl_52": {
     "ram_style": "ultra",
     "depth": 2048,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_10": {
+  "ConvolutionInputGenerator_rtl_10": {
     "SIMD": 16,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_9": {
+  "VVAU_hls_9": {
     "PE": 16,
     "resType": "lut"
   },
-  "StreamingFIFO_54": {
+  "StreamingFIFO_rtl_54": {
     "ram_style": "ultra",
     "depth": 1024,
     "impl_style": "vivado"
   },
-  "MatrixVectorActivation_10": {
+  "MVAU_hls_10": {
     "PE": 32,
     "SIMD": 16,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_13": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_56": {
+  "StreamingFIFO_rtl_56": {
     "ram_style": "auto",
     "depth": 32,
     "impl_style": "rtl"
   },
-  "FMPadding_Batch_10": {
+  "FMPadding_rtl_10": {
     "SIMD": 16
   },
-  "StreamingFIFO_57": {
+  "StreamingFIFO_rtl_57": {
     "ram_style": "ultra",
     "depth": 2048,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_11": {
+  "ConvolutionInputGenerator_rtl_11": {
     "SIMD": 16,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_10": {
+  "VVAU_hls_10": {
     "PE": 16,
     "resType": "lut"
   },
-  "StreamingFIFO_59": {
+  "StreamingFIFO_rtl_59": {
     "ram_style": "ultra",
     "depth": 1024,
     "impl_style": "vivado"
   },
-  "MatrixVectorActivation_11": {
+  "MVAU_hls_11": {
     "PE": 32,
     "SIMD": 16,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_14": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_61": {
+  "StreamingFIFO_rtl_61": {
     "ram_style": "auto",
     "depth": 128,
     "impl_style": "rtl"
   },
-  "FMPadding_Batch_11": {
+  "FMPadding_rtl_11": {
     "SIMD": 4
   },
-  "StreamingFIFO_62": {
+  "StreamingFIFO_rtl_62": {
     "ram_style": "ultra",
     "depth": 4096,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_12": {
+  "ConvolutionInputGenerator_rtl_12": {
     "SIMD": 4,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_11": {
+  "VVAU_hls_11": {
     "PE": 4,
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_15": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_65": {
+  "StreamingFIFO_rtl_65": {
     "ram_style": "auto",
     "depth": 256,
     "impl_style": "rtl"
   },
-  "MatrixVectorActivation_12": {
+  "MVAU_hls_12": {
     "PE": 16,
     "SIMD": 16,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_16": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_67": {
+  "StreamingFIFO_rtl_67": {
     "ram_style": "auto",
     "depth": 32,
     "impl_style": "rtl"
   },
-  "FMPadding_Batch_12": {
+  "FMPadding_rtl_12": {
     "SIMD": 8
   },
-  "StreamingFIFO_68": {
+  "StreamingFIFO_rtl_68": {
     "ram_style": "ultra",
     "depth": 4096,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_13": {
+  "ConvolutionInputGenerator_rtl_13": {
     "SIMD": 8,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_12": {
+  "VVAU_hls_12": {
     "PE": 8,
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_17": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_71": {
+  "StreamingFIFO_rtl_71": {
     "ram_style": "ultra",
     "depth": 1024,
     "impl_style": "vivado"
   },
-  "MatrixVectorActivation_13": {
+  "MVAU_hls_13": {
     "PE": 32,
     "SIMD": 16,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_18": {
-    "impl_style": "hls"
-  },
-  "ConvolutionInputGenerator_14": {
+  "ConvolutionInputGenerator_rtl_14": {
     "SIMD": 4,
     "ram_style": "distributed"
   },
-  "Pool_Batch_0": {
+  "Pool_hls_0": {
     "PE": 4
   },
-  "MatrixVectorActivation_14": {
+  "MVAU_hls_14": {
     "PE": 4,
     "SIMD": 4,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_19": {
-    "impl_style": "hls"
-  },
-  "ChannelwiseOp_Batch_0": {
+  "ChannelwiseOp_hls_0": {
     "PE": 1,
     "ram_style": "distributed"
   },
-  "LabelSelect_Batch_0": {
+  "LabelSelect_hls_0": {
     "PE": 1
   }
 }
diff --git a/build/mobilenet-v1/folding_config/U280_folding_config.json b/build/mobilenet-v1/folding_config/U280_folding_config.json
index a7b129a..fe3c3d2 100644
--- a/build/mobilenet-v1/folding_config/U280_folding_config.json
+++ b/build/mobilenet-v1/folding_config/U280_folding_config.json
@@ -1,499 +1,439 @@
 {
   "Defaults": {},
-  "StreamingFIFO_0": {
+  "StreamingFIFO_rtl_0": {
     "ram_style": "ultra",
     "depth": 512,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_0": {
+  "ConvolutionInputGenerator_rtl_0": {
     "SIMD": 3,
     "ram_style": "distributed"
   },
-  "MatrixVectorActivation_0": {
+  "MVAU_hls_0": {
     "PE": 32,
     "SIMD": 3,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "dsp"
   },
-  "FMPadding_Batch_0": {
+  "FMPadding_rtl_0": {
     "SIMD": 32
   },
-  "StreamingFIFO_3": {
+  "StreamingFIFO_rtl_3": {
     "ram_style": "auto",
     "depth": 256,
     "impl_style": "rtl"
   },
-  "ConvolutionInputGenerator_1": {
+  "ConvolutionInputGenerator_rtl_1": {
     "SIMD": 32,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_0": {
+  "VVAU_hls_0": {
     "PE": 32,
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_0": {
-    "impl_style": "hls"
-  },
-  "MatrixVectorActivation_1": {
+  "MVAU_hls_1": {
     "PE": 16,
     "SIMD": 16,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_1": {
-    "impl_style": "hls"
-  },
-  "FMPadding_Batch_1": {
+  "FMPadding_rtl_1": {
     "SIMD": 32
   },
-  "StreamingFIFO_9": {
+  "StreamingFIFO_rtl_9": {
     "ram_style": "ultra",
     "depth": 512,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_2": {
+  "ConvolutionInputGenerator_rtl_2": {
     "SIMD": 32,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_1": {
+  "VVAU_hls_1": {
     "PE": 32,
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_2": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_12": {
+  "StreamingFIFO_rtl_12": {
     "ram_style": "auto",
     "depth": 256,
     "impl_style": "rtl"
   },
-  "MatrixVectorActivation_2": {
+  "MVAU_hls_2": {
     "PE": 16,
     "SIMD": 16,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_3": {
-    "impl_style": "hls"
-  },
-  "FMPadding_Batch_2": {
+  "FMPadding_rtl_2": {
     "SIMD": 64
   },
-  "StreamingFIFO_15": {
+  "StreamingFIFO_rtl_15": {
     "ram_style": "auto",
     "depth": 256,
     "impl_style": "rtl"
   },
-  "ConvolutionInputGenerator_3": {
+  "ConvolutionInputGenerator_rtl_3": {
     "SIMD": 64,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_2": {
+  "VVAU_hls_2": {
     "PE": 64,
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_4": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_18": {
+  "StreamingFIFO_rtl_18": {
     "ram_style": "ultra",
     "depth": 1024,
     "impl_style": "vivado"
   },
-  "MatrixVectorActivation_3": {
+  "MVAU_hls_3": {
     "PE": 32,
     "SIMD": 16,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_5": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_20": {
+  "StreamingFIFO_rtl_20": {
     "ram_style": "auto",
     "depth": 32,
     "impl_style": "rtl"
   },
-  "FMPadding_Batch_3": {
+  "FMPadding_rtl_3": {
     "SIMD": 16
   },
-  "StreamingFIFO_21": {
+  "StreamingFIFO_rtl_21": {
     "ram_style": "ultra",
     "depth": 1024,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_4": {
+  "ConvolutionInputGenerator_rtl_4": {
     "SIMD": 16,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_3": {
+  "VVAU_hls_3": {
     "PE": 16,
     "resType": "lut"
   },
-  "StreamingFIFO_23": {
+  "StreamingFIFO_rtl_23": {
     "ram_style": "auto",
     "depth": 256,
     "impl_style": "rtl"
   },
-  "MatrixVectorActivation_4": {
+  "MVAU_hls_4": {
     "PE": 16,
     "SIMD": 16,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_6": {
-    "impl_style": "hls"
-  },
-  "FMPadding_Batch_4": {
+  "FMPadding_rtl_4": {
     "SIMD": 32
   },
-  "StreamingFIFO_26": {
+  "StreamingFIFO_rtl_26": {
     "ram_style": "ultra",
     "depth": 512,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_5": {
+  "ConvolutionInputGenerator_rtl_5": {
     "SIMD": 32,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_4": {
+  "VVAU_hls_4": {
     "PE": 32,
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_7": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_29": {
+  "StreamingFIFO_rtl_29": {
     "ram_style": "ultra",
     "depth": 1024,
     "impl_style": "vivado"
   },
-  "MatrixVectorActivation_5": {
+  "MVAU_hls_5": {
     "PE": 32,
     "SIMD": 16,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_8": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_31": {
+  "StreamingFIFO_rtl_31": {
     "ram_style": "auto",
     "depth": 32,
     "impl_style": "rtl"
   },
-  "FMPadding_Batch_5": {
+  "FMPadding_rtl_5": {
     "SIMD": 8
   },
-  "StreamingFIFO_32": {
+  "StreamingFIFO_rtl_32": {
     "ram_style": "ultra",
     "depth": 2048,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_6": {
+  "ConvolutionInputGenerator_rtl_6": {
     "SIMD": 8,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_5": {
+  "VVAU_hls_5": {
     "PE": 8,
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_9": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_35": {
+  "StreamingFIFO_rtl_35": {
     "ram_style": "auto",
     "depth": 256,
     "impl_style": "rtl"
   },
-  "MatrixVectorActivation_6": {
+  "MVAU_hls_6": {
     "PE": 16,
     "SIMD": 16,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "lut"
   },
-  "FMPadding_Batch_6": {
+  "FMPadding_rtl_6": {
     "SIMD": 16
   },
-  "StreamingFIFO_37": {
+  "StreamingFIFO_rtl_37": {
     "ram_style": "ultra",
     "depth": 2048,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_7": {
+  "ConvolutionInputGenerator_rtl_7": {
     "SIMD": 16,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_6": {
+  "VVAU_hls_6": {
     "PE": 16,
     "resType": "lut"
   },
-  "StreamingFIFO_39": {
+  "StreamingFIFO_rtl_39": {
     "ram_style": "ultra",
     "depth": 1024,
     "impl_style": "vivado"
   },
-  "MatrixVectorActivation_7": {
+  "MVAU_hls_7": {
     "PE": 32,
     "SIMD": 16,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_10": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_41": {
+  "StreamingFIFO_rtl_41": {
     "ram_style": "auto",
     "depth": 32,
     "impl_style": "rtl"
   },
-  "FMPadding_Batch_7": {
+  "FMPadding_rtl_7": {
     "SIMD": 16
   },
-  "StreamingFIFO_42": {
+  "StreamingFIFO_rtl_42": {
     "ram_style": "ultra",
     "depth": 2048,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_8": {
+  "ConvolutionInputGenerator_rtl_8": {
     "SIMD": 16,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_7": {
+  "VVAU_hls_7": {
     "PE": 16,
     "resType": "lut"
   },
-  "StreamingFIFO_44": {
+  "StreamingFIFO_rtl_44": {
     "ram_style": "ultra",
     "depth": 1024,
     "impl_style": "vivado"
   },
-  "MatrixVectorActivation_8": {
+  "MVAU_hls_8": {
     "PE": 32,
     "SIMD": 16,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_11": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_46": {
+  "StreamingFIFO_rtl_46": {
     "ram_style": "auto",
     "depth": 32,
     "impl_style": "rtl"
   },
-  "FMPadding_Batch_8": {
+  "FMPadding_rtl_8": {
     "SIMD": 16
   },
-  "StreamingFIFO_47": {
+  "StreamingFIFO_rtl_47": {
     "ram_style": "ultra",
     "depth": 2048,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_9": {
+  "ConvolutionInputGenerator_rtl_9": {
     "SIMD": 16,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_8": {
+  "VVAU_hls_8": {
     "PE": 16,
     "resType": "lut"
   },
-  "StreamingFIFO_49": {
+  "StreamingFIFO_rtl_49": {
     "ram_style": "ultra",
     "depth": 1024,
     "impl_style": "vivado"
   },
-  "MatrixVectorActivation_9": {
+  "MVAU_hls_9": {
     "PE": 32,
     "SIMD": 16,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_12": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_51": {
+  "StreamingFIFO_rtl_51": {
     "ram_style": "auto",
     "depth": 32,
     "impl_style": "rtl"
   },
-  "FMPadding_Batch_9": {
+  "FMPadding_rtl_9": {
     "SIMD": 16
   },
-  "StreamingFIFO_52": {
+  "StreamingFIFO_rtl_52": {
     "ram_style": "ultra",
     "depth": 2048,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_10": {
+  "ConvolutionInputGenerator_rtl_10": {
     "SIMD": 16,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_9": {
+  "VVAU_hls_9": {
     "PE": 16,
     "resType": "lut"
   },
-  "StreamingFIFO_54": {
+  "StreamingFIFO_rtl_54": {
     "ram_style": "ultra",
     "depth": 1024,
     "impl_style": "vivado"
   },
-  "MatrixVectorActivation_10": {
+  "MVAU_hls_10": {
     "PE": 32,
     "SIMD": 16,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_13": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_56": {
+  "StreamingFIFO_rtl_56": {
     "ram_style": "auto",
     "depth": 32,
     "impl_style": "rtl"
   },
-  "FMPadding_Batch_10": {
+  "FMPadding_rtl_10": {
     "SIMD": 16
   },
-  "StreamingFIFO_57": {
+  "StreamingFIFO_rtl_57": {
     "ram_style": "ultra",
     "depth": 2048,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_11": {
+  "ConvolutionInputGenerator_rtl_11": {
     "SIMD": 16,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_10": {
+  "VVAU_hls_10": {
     "PE": 16,
     "resType": "lut"
   },
-  "StreamingFIFO_59": {
+  "StreamingFIFO_rtl_59": {
     "ram_style": "ultra",
     "depth": 1024,
     "impl_style": "vivado"
   },
-  "MatrixVectorActivation_11": {
+  "MVAU_hls_11": {
     "PE": 32,
     "SIMD": 16,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_14": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_61": {
+  "StreamingFIFO_rtl_61": {
     "ram_style": "auto",
     "depth": 128,
     "impl_style": "rtl"
   },
-  "FMPadding_Batch_11": {
+  "FMPadding_rtl_11": {
     "SIMD": 4
   },
-  "StreamingFIFO_62": {
+  "StreamingFIFO_rtl_62": {
     "ram_style": "ultra",
     "depth": 4096,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_12": {
+  "ConvolutionInputGenerator_rtl_12": {
     "SIMD": 4,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_11": {
+  "VVAU_hls_11": {
     "PE": 4,
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_15": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_65": {
+  "StreamingFIFO_rtl_65": {
     "ram_style": "auto",
     "depth": 256,
     "impl_style": "rtl"
   },
-  "MatrixVectorActivation_12": {
+  "MVAU_hls_12": {
     "PE": 16,
     "SIMD": 16,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_16": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_67": {
+  "StreamingFIFO_rtl_67": {
     "ram_style": "auto",
     "depth": 32,
     "impl_style": "rtl"
   },
-  "FMPadding_Batch_12": {
+  "FMPadding_rtl_12": {
     "SIMD": 8
   },
-  "StreamingFIFO_68": {
+  "StreamingFIFO_rtl_68": {
     "ram_style": "ultra",
     "depth": 4096,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_13": {
+  "ConvolutionInputGenerator_rtl_13": {
     "SIMD": 8,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_12": {
+  "VVAU_hls_12": {
     "PE": 8,
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_17": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_71": {
+  "StreamingFIFO_rtl_71": {
     "ram_style": "ultra",
     "depth": 1024,
     "impl_style": "vivado"
   },
-  "MatrixVectorActivation_13": {
+  "MVAU_hls_13": {
     "PE": 32,
     "SIMD": 16,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_18": {
-    "impl_style": "hls"
-  },
-  "ConvolutionInputGenerator_14": {
+  "ConvolutionInputGenerator_rtl_14": {
     "SIMD": 4,
     "ram_style": "distributed"
   },
-  "Pool_Batch_0": {
+  "Pool_hls_0": {
     "PE": 4
   },
-  "MatrixVectorActivation_14": {
+  "MVAU_hls_14": {
     "PE": 4,
     "SIMD": 4,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_19": {
-    "impl_style": "hls"
-  },
-  "ChannelwiseOp_Batch_0": {
+  "ChannelwiseOp_hls_0": {
     "PE": 1,
     "ram_style": "distributed"
   },
-  "LabelSelect_Batch_0": {
+  "LabelSelect_hls_0": {
     "PE": 1
   }
 }
diff --git a/build/mobilenet-v1/folding_config/U50_folding_config.json b/build/mobilenet-v1/folding_config/U50_folding_config.json
index a7b129a..fe3c3d2 100644
--- a/build/mobilenet-v1/folding_config/U50_folding_config.json
+++ b/build/mobilenet-v1/folding_config/U50_folding_config.json
@@ -1,499 +1,439 @@
 {
   "Defaults": {},
-  "StreamingFIFO_0": {
+  "StreamingFIFO_rtl_0": {
     "ram_style": "ultra",
     "depth": 512,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_0": {
+  "ConvolutionInputGenerator_rtl_0": {
     "SIMD": 3,
     "ram_style": "distributed"
   },
-  "MatrixVectorActivation_0": {
+  "MVAU_hls_0": {
     "PE": 32,
     "SIMD": 3,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "dsp"
   },
-  "FMPadding_Batch_0": {
+  "FMPadding_rtl_0": {
     "SIMD": 32
   },
-  "StreamingFIFO_3": {
+  "StreamingFIFO_rtl_3": {
     "ram_style": "auto",
     "depth": 256,
     "impl_style": "rtl"
   },
-  "ConvolutionInputGenerator_1": {
+  "ConvolutionInputGenerator_rtl_1": {
     "SIMD": 32,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_0": {
+  "VVAU_hls_0": {
     "PE": 32,
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_0": {
-    "impl_style": "hls"
-  },
-  "MatrixVectorActivation_1": {
+  "MVAU_hls_1": {
     "PE": 16,
     "SIMD": 16,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_1": {
-    "impl_style": "hls"
-  },
-  "FMPadding_Batch_1": {
+  "FMPadding_rtl_1": {
     "SIMD": 32
   },
-  "StreamingFIFO_9": {
+  "StreamingFIFO_rtl_9": {
     "ram_style": "ultra",
     "depth": 512,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_2": {
+  "ConvolutionInputGenerator_rtl_2": {
     "SIMD": 32,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_1": {
+  "VVAU_hls_1": {
     "PE": 32,
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_2": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_12": {
+  "StreamingFIFO_rtl_12": {
     "ram_style": "auto",
     "depth": 256,
     "impl_style": "rtl"
   },
-  "MatrixVectorActivation_2": {
+  "MVAU_hls_2": {
     "PE": 16,
     "SIMD": 16,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_3": {
-    "impl_style": "hls"
-  },
-  "FMPadding_Batch_2": {
+  "FMPadding_rtl_2": {
     "SIMD": 64
   },
-  "StreamingFIFO_15": {
+  "StreamingFIFO_rtl_15": {
     "ram_style": "auto",
     "depth": 256,
     "impl_style": "rtl"
   },
-  "ConvolutionInputGenerator_3": {
+  "ConvolutionInputGenerator_rtl_3": {
     "SIMD": 64,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_2": {
+  "VVAU_hls_2": {
     "PE": 64,
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_4": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_18": {
+  "StreamingFIFO_rtl_18": {
     "ram_style": "ultra",
     "depth": 1024,
     "impl_style": "vivado"
   },
-  "MatrixVectorActivation_3": {
+  "MVAU_hls_3": {
     "PE": 32,
     "SIMD": 16,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_5": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_20": {
+  "StreamingFIFO_rtl_20": {
     "ram_style": "auto",
     "depth": 32,
     "impl_style": "rtl"
   },
-  "FMPadding_Batch_3": {
+  "FMPadding_rtl_3": {
     "SIMD": 16
   },
-  "StreamingFIFO_21": {
+  "StreamingFIFO_rtl_21": {
     "ram_style": "ultra",
     "depth": 1024,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_4": {
+  "ConvolutionInputGenerator_rtl_4": {
     "SIMD": 16,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_3": {
+  "VVAU_hls_3": {
     "PE": 16,
     "resType": "lut"
   },
-  "StreamingFIFO_23": {
+  "StreamingFIFO_rtl_23": {
     "ram_style": "auto",
     "depth": 256,
     "impl_style": "rtl"
   },
-  "MatrixVectorActivation_4": {
+  "MVAU_hls_4": {
     "PE": 16,
     "SIMD": 16,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_6": {
-    "impl_style": "hls"
-  },
-  "FMPadding_Batch_4": {
+  "FMPadding_rtl_4": {
     "SIMD": 32
   },
-  "StreamingFIFO_26": {
+  "StreamingFIFO_rtl_26": {
     "ram_style": "ultra",
     "depth": 512,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_5": {
+  "ConvolutionInputGenerator_rtl_5": {
     "SIMD": 32,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_4": {
+  "VVAU_hls_4": {
     "PE": 32,
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_7": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_29": {
+  "StreamingFIFO_rtl_29": {
     "ram_style": "ultra",
     "depth": 1024,
     "impl_style": "vivado"
   },
-  "MatrixVectorActivation_5": {
+  "MVAU_hls_5": {
     "PE": 32,
     "SIMD": 16,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_8": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_31": {
+  "StreamingFIFO_rtl_31": {
     "ram_style": "auto",
     "depth": 32,
     "impl_style": "rtl"
   },
-  "FMPadding_Batch_5": {
+  "FMPadding_rtl_5": {
     "SIMD": 8
   },
-  "StreamingFIFO_32": {
+  "StreamingFIFO_rtl_32": {
     "ram_style": "ultra",
     "depth": 2048,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_6": {
+  "ConvolutionInputGenerator_rtl_6": {
     "SIMD": 8,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_5": {
+  "VVAU_hls_5": {
     "PE": 8,
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_9": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_35": {
+  "StreamingFIFO_rtl_35": {
     "ram_style": "auto",
     "depth": 256,
     "impl_style": "rtl"
   },
-  "MatrixVectorActivation_6": {
+  "MVAU_hls_6": {
     "PE": 16,
     "SIMD": 16,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "lut"
   },
-  "FMPadding_Batch_6": {
+  "FMPadding_rtl_6": {
     "SIMD": 16
   },
-  "StreamingFIFO_37": {
+  "StreamingFIFO_rtl_37": {
     "ram_style": "ultra",
     "depth": 2048,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_7": {
+  "ConvolutionInputGenerator_rtl_7": {
     "SIMD": 16,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_6": {
+  "VVAU_hls_6": {
     "PE": 16,
     "resType": "lut"
   },
-  "StreamingFIFO_39": {
+  "StreamingFIFO_rtl_39": {
     "ram_style": "ultra",
     "depth": 1024,
     "impl_style": "vivado"
   },
-  "MatrixVectorActivation_7": {
+  "MVAU_hls_7": {
     "PE": 32,
     "SIMD": 16,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_10": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_41": {
+  "StreamingFIFO_rtl_41": {
     "ram_style": "auto",
     "depth": 32,
     "impl_style": "rtl"
   },
-  "FMPadding_Batch_7": {
+  "FMPadding_rtl_7": {
     "SIMD": 16
   },
-  "StreamingFIFO_42": {
+  "StreamingFIFO_rtl_42": {
     "ram_style": "ultra",
     "depth": 2048,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_8": {
+  "ConvolutionInputGenerator_rtl_8": {
     "SIMD": 16,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_7": {
+  "VVAU_hls_7": {
     "PE": 16,
     "resType": "lut"
   },
-  "StreamingFIFO_44": {
+  "StreamingFIFO_rtl_44": {
     "ram_style": "ultra",
     "depth": 1024,
     "impl_style": "vivado"
   },
-  "MatrixVectorActivation_8": {
+  "MVAU_hls_8": {
     "PE": 32,
     "SIMD": 16,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_11": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_46": {
+  "StreamingFIFO_rtl_46": {
     "ram_style": "auto",
     "depth": 32,
     "impl_style": "rtl"
   },
-  "FMPadding_Batch_8": {
+  "FMPadding_rtl_8": {
     "SIMD": 16
   },
-  "StreamingFIFO_47": {
+  "StreamingFIFO_rtl_47": {
     "ram_style": "ultra",
     "depth": 2048,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_9": {
+  "ConvolutionInputGenerator_rtl_9": {
     "SIMD": 16,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_8": {
+  "VVAU_hls_8": {
     "PE": 16,
     "resType": "lut"
   },
-  "StreamingFIFO_49": {
+  "StreamingFIFO_rtl_49": {
     "ram_style": "ultra",
     "depth": 1024,
     "impl_style": "vivado"
   },
-  "MatrixVectorActivation_9": {
+  "MVAU_hls_9": {
     "PE": 32,
     "SIMD": 16,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_12": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_51": {
+  "StreamingFIFO_rtl_51": {
     "ram_style": "auto",
     "depth": 32,
     "impl_style": "rtl"
   },
-  "FMPadding_Batch_9": {
+  "FMPadding_rtl_9": {
     "SIMD": 16
   },
-  "StreamingFIFO_52": {
+  "StreamingFIFO_rtl_52": {
     "ram_style": "ultra",
     "depth": 2048,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_10": {
+  "ConvolutionInputGenerator_rtl_10": {
     "SIMD": 16,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_9": {
+  "VVAU_hls_9": {
     "PE": 16,
     "resType": "lut"
   },
-  "StreamingFIFO_54": {
+  "StreamingFIFO_rtl_54": {
     "ram_style": "ultra",
     "depth": 1024,
     "impl_style": "vivado"
   },
-  "MatrixVectorActivation_10": {
+  "MVAU_hls_10": {
     "PE": 32,
     "SIMD": 16,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_13": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_56": {
+  "StreamingFIFO_rtl_56": {
     "ram_style": "auto",
     "depth": 32,
     "impl_style": "rtl"
   },
-  "FMPadding_Batch_10": {
+  "FMPadding_rtl_10": {
     "SIMD": 16
   },
-  "StreamingFIFO_57": {
+  "StreamingFIFO_rtl_57": {
     "ram_style": "ultra",
     "depth": 2048,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_11": {
+  "ConvolutionInputGenerator_rtl_11": {
     "SIMD": 16,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_10": {
+  "VVAU_hls_10": {
     "PE": 16,
     "resType": "lut"
   },
-  "StreamingFIFO_59": {
+  "StreamingFIFO_rtl_59": {
     "ram_style": "ultra",
     "depth": 1024,
     "impl_style": "vivado"
   },
-  "MatrixVectorActivation_11": {
+  "MVAU_hls_11": {
     "PE": 32,
     "SIMD": 16,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_14": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_61": {
+  "StreamingFIFO_rtl_61": {
     "ram_style": "auto",
     "depth": 128,
     "impl_style": "rtl"
   },
-  "FMPadding_Batch_11": {
+  "FMPadding_rtl_11": {
     "SIMD": 4
   },
-  "StreamingFIFO_62": {
+  "StreamingFIFO_rtl_62": {
     "ram_style": "ultra",
     "depth": 4096,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_12": {
+  "ConvolutionInputGenerator_rtl_12": {
     "SIMD": 4,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_11": {
+  "VVAU_hls_11": {
     "PE": 4,
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_15": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_65": {
+  "StreamingFIFO_rtl_65": {
     "ram_style": "auto",
     "depth": 256,
     "impl_style": "rtl"
   },
-  "MatrixVectorActivation_12": {
+  "MVAU_hls_12": {
     "PE": 16,
     "SIMD": 16,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_16": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_67": {
+  "StreamingFIFO_rtl_67": {
     "ram_style": "auto",
     "depth": 32,
     "impl_style": "rtl"
   },
-  "FMPadding_Batch_12": {
+  "FMPadding_rtl_12": {
     "SIMD": 8
   },
-  "StreamingFIFO_68": {
+  "StreamingFIFO_rtl_68": {
     "ram_style": "ultra",
     "depth": 4096,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_13": {
+  "ConvolutionInputGenerator_rtl_13": {
     "SIMD": 8,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_12": {
+  "VVAU_hls_12": {
     "PE": 8,
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_17": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_71": {
+  "StreamingFIFO_rtl_71": {
     "ram_style": "ultra",
     "depth": 1024,
     "impl_style": "vivado"
   },
-  "MatrixVectorActivation_13": {
+  "MVAU_hls_13": {
     "PE": 32,
     "SIMD": 16,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_18": {
-    "impl_style": "hls"
-  },
-  "ConvolutionInputGenerator_14": {
+  "ConvolutionInputGenerator_rtl_14": {
     "SIMD": 4,
     "ram_style": "distributed"
   },
-  "Pool_Batch_0": {
+  "Pool_hls_0": {
     "PE": 4
   },
-  "MatrixVectorActivation_14": {
+  "MVAU_hls_14": {
     "PE": 4,
     "SIMD": 4,
     "ram_style": "block",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_19": {
-    "impl_style": "hls"
-  },
-  "ChannelwiseOp_Batch_0": {
+  "ChannelwiseOp_hls_0": {
     "PE": 1,
     "ram_style": "distributed"
   },
-  "LabelSelect_Batch_0": {
+  "LabelSelect_hls_0": {
     "PE": 1
   }
 }
diff --git a/build/mobilenet-v1/folding_config/ZCU102_folding_config.json b/build/mobilenet-v1/folding_config/ZCU102_folding_config.json
index 8862cf2..bbeea73 100755
--- a/build/mobilenet-v1/folding_config/ZCU102_folding_config.json
+++ b/build/mobilenet-v1/folding_config/ZCU102_folding_config.json
@@ -1,816 +1,570 @@
 {
   "Defaults": {},
-  "StreamingFIFO_0": {
+  "StreamingFIFO_rtl_0": {
     "ram_style": "block",
     "depth": 1024,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_0": {
+  "ConvolutionInputGenerator_rtl_0": {
     "SIMD": 1,
     "ram_style": "distributed"
   },
-  "StreamingDataWidthConverter_Batch_0": {
-    "impl_style": "hls"
-  },
-  "MatrixVectorActivation_0": {
+  "MVAU_rtl_0": {
     "PE": 16,
     "SIMD": 3,
     "ram_style": "auto",
     "resType": "dsp",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "runtime_writeable_weights": 0
   },
-  "StreamingFIFO_3": {
+  "StreamingFIFO_rtl_3": {
     "ram_style": "auto",
     "depth": 64,
     "impl_style": "rtl"
   },
-  "StreamingDataWidthConverter_Batch_1": {
-    "impl_style": "hls"
-  },
-  "Thresholding_Batch_0": {
-    "PE": 1,
-    "ram_style": "distributed",
-    "mem_mode": "const",
-    "runtime_writeable_weights": 0
-  },
-  "StreamingDataWidthConverter_Batch_2": {
-    "impl_style": "hls"
+  "Thresholding_rtl_0": {
+    "PE": 1
   },
-  "StreamingFIFO_6": {
+  "StreamingFIFO_rtl_6": {
     "ram_style": "auto",
     "depth": 256,
     "impl_style": "rtl"
   },
-  "FMPadding_Batch_0": {
+  "FMPadding_rtl_0": {
     "SIMD": 2
   },
-  "StreamingDataWidthConverter_Batch_3": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_8": {
+  "StreamingFIFO_rtl_8": {
     "ram_style": "block",
     "depth": 512,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_1": {
+  "ConvolutionInputGenerator_rtl_1": {
     "SIMD": 16,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_0": {
+  "VVAU_hls_0": {
     "PE": 16,
     "resType": "lut"
   },
-  "StreamingFIFO_10": {
+  "StreamingFIFO_rtl_10": {
     "ram_style": "auto",
     "depth": 256,
     "impl_style": "rtl"
   },
-  "StreamingDataWidthConverter_Batch_4": {
-    "impl_style": "hls"
-  },
-  "Thresholding_Batch_1": {
-    "PE": 1,
-    "ram_style": "distributed",
-    "mem_mode": "const",
-    "runtime_writeable_weights": 0
-  },
-  "StreamingDataWidthConverter_Batch_5": {
-    "impl_style": "hls"
+  "Thresholding_rtl_1": {
+    "PE": 1
   },
-  "MatrixVectorActivation_1": {
+  "MVAU_rtl_1": {
     "PE": 8,
     "SIMD": 8,
     "ram_style": "auto",
-    "resType": "lut",
-    "mem_mode": "decoupled",
-    "runtime_writeable_weights": 0
-  },
-  "StreamingDataWidthConverter_Batch_6": {
-    "impl_style": "hls"
-  },
-  "Thresholding_Batch_2": {
-    "PE": 2,
-    "ram_style": "distributed",
-    "mem_mode": "const",
+    "resType": "dsp",
+    "mem_mode": "internal_decoupled",
     "runtime_writeable_weights": 0
   },
-  "StreamingDataWidthConverter_Batch_7": {
-    "impl_style": "hls"
+  "Thresholding_rtl_2": {
+    "PE": 2
   },
-  "StreamingFIFO_17": {
+  "StreamingFIFO_rtl_17": {
     "ram_style": "auto",
     "depth": 32,
     "impl_style": "rtl"
   },
-  "FMPadding_Batch_1": {
+  "FMPadding_rtl_1": {
     "SIMD": 4
   },
-  "StreamingDataWidthConverter_Batch_8": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_19": {
+  "StreamingFIFO_rtl_19": {
     "ram_style": "block",
     "depth": 1024,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_2": {
+  "ConvolutionInputGenerator_rtl_2": {
     "SIMD": 8,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_1": {
+  "VVAU_hls_1": {
     "PE": 8,
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_9": {
-    "impl_style": "hls"
-  },
-  "Thresholding_Batch_3": {
-    "PE": 1,
-    "ram_style": "distributed",
-    "mem_mode": "const",
-    "runtime_writeable_weights": 0
-  },
-  "StreamingDataWidthConverter_Batch_10": {
-    "impl_style": "hls"
+  "Thresholding_rtl_3": {
+    "PE": 1
   },
-  "StreamingFIFO_24": {
+  "StreamingFIFO_rtl_24": {
     "ram_style": "auto",
     "depth": 32,
     "impl_style": "rtl"
   },
-  "MatrixVectorActivation_2": {
+  "MVAU_rtl_2": {
     "PE": 16,
     "SIMD": 8,
     "ram_style": "auto",
-    "resType": "lut",
-    "mem_mode": "decoupled",
+    "resType": "dsp",
+    "mem_mode": "internal_decoupled",
     "runtime_writeable_weights": 0
   },
-  "StreamingDataWidthConverter_Batch_11": {
-    "impl_style": "hls"
+  "Thresholding_rtl_4": {
+    "PE": 2
   },
-  "Thresholding_Batch_4": {
-    "PE": 2,
-    "ram_style": "distributed",
-    "mem_mode": "const",
-    "runtime_writeable_weights": 0
-  },
-  "StreamingFIFO_27": {
+  "StreamingFIFO_rtl_27": {
     "ram_style": "auto",
     "depth": 128,
     "impl_style": "rtl"
   },
-  "FMPadding_Batch_2": {
+  "FMPadding_rtl_2": {
     "SIMD": 2
   },
-  "StreamingDataWidthConverter_Batch_12": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_29": {
+  "StreamingFIFO_rtl_29": {
     "ram_style": "block",
     "depth": 1024,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_3": {
+  "ConvolutionInputGenerator_rtl_3": {
     "SIMD": 16,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_2": {
+  "VVAU_hls_2": {
     "PE": 16,
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_13": {
-    "impl_style": "hls"
+  "Thresholding_rtl_5": {
+    "PE": 2
   },
-  "Thresholding_Batch_5": {
-    "PE": 2,
-    "ram_style": "distributed",
-    "mem_mode": "const",
-    "runtime_writeable_weights": 0
-  },
-  "StreamingDataWidthConverter_Batch_14": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_34": {
+  "StreamingFIFO_rtl_34": {
     "ram_style": "auto",
     "depth": 32,
     "impl_style": "rtl"
   },
-  "MatrixVectorActivation_3": {
+  "MVAU_rtl_3": {
     "PE": 32,
     "SIMD": 8,
     "ram_style": "auto",
-    "resType": "lut",
-    "mem_mode": "decoupled",
+    "resType": "dsp",
+    "mem_mode": "internal_decoupled",
     "runtime_writeable_weights": 0
   },
-  "StreamingDataWidthConverter_Batch_15": {
-    "impl_style": "hls"
+  "Thresholding_rtl_6": {
+    "PE": 2
   },
-  "Thresholding_Batch_6": {
-    "PE": 2,
-    "ram_style": "distributed",
-    "mem_mode": "const",
-    "runtime_writeable_weights": 0
-  },
-  "StreamingFIFO_37": {
+  "StreamingFIFO_rtl_37": {
     "ram_style": "auto",
     "depth": 128,
     "impl_style": "rtl"
   },
-  "FMPadding_Batch_3": {
+  "FMPadding_rtl_3": {
     "SIMD": 2
   },
-  "StreamingDataWidthConverter_Batch_16": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_39": {
+  "StreamingFIFO_rtl_39": {
     "ram_style": "block",
     "depth": 4096,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_4": {
+  "ConvolutionInputGenerator_rtl_4": {
     "SIMD": 4,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_3": {
+  "VVAU_hls_3": {
     "PE": 4,
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_17": {
-    "impl_style": "hls"
-  },
-  "Thresholding_Batch_7": {
-    "PE": 1,
-    "ram_style": "distributed",
-    "mem_mode": "const",
-    "runtime_writeable_weights": 0
-  },
-  "StreamingDataWidthConverter_Batch_18": {
-    "impl_style": "hls"
+  "Thresholding_rtl_7": {
+    "PE": 1
   },
-  "StreamingFIFO_44": {
+  "StreamingFIFO_rtl_44": {
     "ram_style": "auto",
     "depth": 32,
     "impl_style": "rtl"
   },
-  "MatrixVectorActivation_4": {
+  "MVAU_rtl_4": {
     "PE": 16,
     "SIMD": 8,
     "ram_style": "auto",
-    "resType": "lut",
-    "mem_mode": "decoupled",
+    "resType": "dsp",
+    "mem_mode": "internal_decoupled",
     "runtime_writeable_weights": 0
   },
-  "StreamingDataWidthConverter_Batch_19": {
-    "impl_style": "hls"
-  },
-  "Thresholding_Batch_8": {
-    "PE": 1,
-    "ram_style": "distributed",
-    "mem_mode": "const",
-    "runtime_writeable_weights": 0
+  "Thresholding_rtl_8": {
+    "PE": 1
   },
-  "StreamingFIFO_47": {
+  "StreamingFIFO_rtl_47": {
     "ram_style": "block",
     "depth": 512,
     "impl_style": "vivado"
   },
-  "FMPadding_Batch_4": {
+  "FMPadding_rtl_4": {
     "SIMD": 1
   },
-  "StreamingDataWidthConverter_Batch_20": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_49": {
+  "StreamingFIFO_rtl_49": {
     "ram_style": "block",
     "depth": 1024,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_5": {
+  "ConvolutionInputGenerator_rtl_5": {
     "SIMD": 8,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_4": {
+  "VVAU_hls_4": {
     "PE": 8,
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_21": {
-    "impl_style": "hls"
-  },
-  "Thresholding_Batch_9": {
-    "PE": 1,
-    "ram_style": "distributed",
-    "mem_mode": "const",
-    "runtime_writeable_weights": 0
-  },
-  "StreamingDataWidthConverter_Batch_22": {
-    "impl_style": "hls"
+  "Thresholding_rtl_9": {
+    "PE": 1
   },
-  "StreamingFIFO_54": {
+  "StreamingFIFO_rtl_54": {
     "ram_style": "auto",
     "depth": 32,
     "impl_style": "rtl"
   },
-  "MatrixVectorActivation_5": {
+  "MVAU_rtl_5": {
     "PE": 32,
     "SIMD": 8,
     "ram_style": "auto",
-    "resType": "lut",
-    "mem_mode": "decoupled",
+    "resType": "dsp",
+    "mem_mode": "internal_decoupled",
     "runtime_writeable_weights": 0
   },
-  "StreamingDataWidthConverter_Batch_23": {
-    "impl_style": "hls"
-  },
-  "Thresholding_Batch_10": {
-    "PE": 1,
-    "ram_style": "distributed",
-    "mem_mode": "const",
-    "runtime_writeable_weights": 0
+  "Thresholding_rtl_10": {
+    "PE": 1
   },
-  "StreamingFIFO_57": {
+  "StreamingFIFO_rtl_57": {
     "ram_style": "block",
     "depth": 512,
     "impl_style": "vivado"
   },
-  "FMPadding_Batch_5": {
+  "FMPadding_rtl_5": {
     "SIMD": 1
   },
-  "StreamingDataWidthConverter_Batch_24": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_59": {
+  "StreamingFIFO_rtl_59": {
     "ram_style": "block",
     "depth": 8192,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_6": {
+  "ConvolutionInputGenerator_rtl_6": {
     "SIMD": 2,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_5": {
+  "VVAU_hls_5": {
     "PE": 2,
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_25": {
-    "impl_style": "hls"
-  },
-  "Thresholding_Batch_11": {
-    "PE": 1,
-    "ram_style": "distributed",
-    "mem_mode": "const",
-    "runtime_writeable_weights": 0
-  },
-  "StreamingDataWidthConverter_Batch_26": {
-    "impl_style": "hls"
+  "Thresholding_rtl_11": {
+    "PE": 1
   },
-  "StreamingFIFO_64": {
+  "StreamingFIFO_rtl_64": {
     "ram_style": "auto",
     "depth": 32,
     "impl_style": "rtl"
   },
-  "MatrixVectorActivation_6": {
+  "MVAU_rtl_6": {
     "PE": 16,
     "SIMD": 8,
     "ram_style": "auto",
-    "resType": "lut",
-    "mem_mode": "decoupled",
+    "resType": "dsp",
+    "mem_mode": "internal_decoupled",
     "runtime_writeable_weights": 0
   },
-  "StreamingDataWidthConverter_Batch_27": {
-    "impl_style": "hls"
-  },
-  "Thresholding_Batch_12": {
-    "PE": 1,
-    "ram_style": "distributed",
-    "mem_mode": "const",
-    "runtime_writeable_weights": 0
+  "Thresholding_rtl_12": {
+    "PE": 1
   },
-  "StreamingFIFO_67": {
+  "StreamingFIFO_rtl_67": {
     "ram_style": "block",
     "depth": 512,
     "impl_style": "vivado"
   },
-  "FMPadding_Batch_6": {
+  "FMPadding_rtl_6": {
     "SIMD": 1
   },
-  "StreamingDataWidthConverter_Batch_28": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_69": {
+  "StreamingFIFO_rtl_69": {
     "ram_style": "block",
     "depth": 4096,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_7": {
+  "ConvolutionInputGenerator_rtl_7": {
     "SIMD": 4,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_6": {
+  "VVAU_hls_6": {
     "PE": 4,
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_29": {
-    "impl_style": "hls"
-  },
-  "Thresholding_Batch_13": {
-    "PE": 1,
-    "ram_style": "distributed",
-    "mem_mode": "const",
-    "runtime_writeable_weights": 0
-  },
-  "StreamingDataWidthConverter_Batch_30": {
-    "impl_style": "hls"
+  "Thresholding_rtl_13": {
+    "PE": 1
   },
-  "StreamingFIFO_74": {
+  "StreamingFIFO_rtl_74": {
     "ram_style": "auto",
     "depth": 64,
     "impl_style": "rtl"
   },
-  "MatrixVectorActivation_7": {
+  "MVAU_rtl_7": {
     "PE": 32,
     "SIMD": 8,
     "ram_style": "auto",
-    "resType": "lut",
-    "mem_mode": "decoupled",
+    "resType": "dsp",
+    "mem_mode": "internal_decoupled",
     "runtime_writeable_weights": 0
   },
-  "StreamingDataWidthConverter_Batch_31": {
-    "impl_style": "hls"
-  },
-  "Thresholding_Batch_14": {
-    "PE": 1,
-    "ram_style": "distributed",
-    "mem_mode": "const",
-    "runtime_writeable_weights": 0
+  "Thresholding_rtl_14": {
+    "PE": 1
   },
-  "StreamingFIFO_77": {
+  "StreamingFIFO_rtl_77": {
     "ram_style": "block",
     "depth": 512,
     "impl_style": "vivado"
   },
-  "FMPadding_Batch_7": {
+  "FMPadding_rtl_7": {
     "SIMD": 1
   },
-  "StreamingDataWidthConverter_Batch_32": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_79": {
+  "StreamingFIFO_rtl_79": {
     "ram_style": "block",
     "depth": 4096,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_8": {
+  "ConvolutionInputGenerator_rtl_8": {
     "SIMD": 4,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_7": {
+  "VVAU_hls_7": {
     "PE": 4,
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_33": {
-    "impl_style": "hls"
-  },
-  "Thresholding_Batch_15": {
-    "PE": 1,
-    "ram_style": "distributed",
-    "mem_mode": "const",
-    "runtime_writeable_weights": 0
-  },
-  "StreamingDataWidthConverter_Batch_34": {
-    "impl_style": "hls"
+  "Thresholding_rtl_15": {
+    "PE": 1
   },
-  "StreamingFIFO_84": {
+  "StreamingFIFO_rtl_84": {
     "ram_style": "auto",
     "depth": 64,
     "impl_style": "rtl"
   },
-  "MatrixVectorActivation_8": {
+  "MVAU_rtl_8": {
     "PE": 32,
     "SIMD": 8,
     "ram_style": "auto",
-    "resType": "lut",
-    "mem_mode": "decoupled",
+    "resType": "dsp",
+    "mem_mode": "internal_decoupled",
     "runtime_writeable_weights": 0
   },
-  "StreamingDataWidthConverter_Batch_35": {
-    "impl_style": "hls"
-  },
-  "Thresholding_Batch_16": {
-    "PE": 1,
-    "ram_style": "distributed",
-    "mem_mode": "const",
-    "runtime_writeable_weights": 0
+  "Thresholding_rtl_16": {
+    "PE": 1
   },
-  "StreamingFIFO_87": {
+  "StreamingFIFO_rtl_87": {
     "ram_style": "block",
     "depth": 512,
     "impl_style": "vivado"
   },
-  "FMPadding_Batch_8": {
+  "FMPadding_rtl_8": {
     "SIMD": 1
   },
-  "StreamingDataWidthConverter_Batch_36": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_89": {
+  "StreamingFIFO_rtl_89": {
     "ram_style": "block",
     "depth": 4096,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_9": {
+  "ConvolutionInputGenerator_rtl_9": {
     "SIMD": 4,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_8": {
+  "VVAU_hls_8": {
     "PE": 4,
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_37": {
-    "impl_style": "hls"
-  },
-  "Thresholding_Batch_17": {
-    "PE": 1,
-    "ram_style": "distributed",
-    "mem_mode": "const",
-    "runtime_writeable_weights": 0
-  },
-  "StreamingDataWidthConverter_Batch_38": {
-    "impl_style": "hls"
+  "Thresholding_rtl_17": {
+    "PE": 1
   },
-  "StreamingFIFO_94": {
+  "StreamingFIFO_rtl_94": {
     "ram_style": "auto",
     "depth": 64,
     "impl_style": "rtl"
   },
-  "MatrixVectorActivation_9": {
+  "MVAU_rtl_9": {
     "PE": 32,
     "SIMD": 8,
     "ram_style": "auto",
-    "resType": "lut",
-    "mem_mode": "decoupled",
+    "resType": "dsp",
+    "mem_mode": "internal_decoupled",
     "runtime_writeable_weights": 0
   },
-  "StreamingDataWidthConverter_Batch_39": {
-    "impl_style": "hls"
-  },
-  "Thresholding_Batch_18": {
-    "PE": 1,
-    "ram_style": "distributed",
-    "mem_mode": "const",
-    "runtime_writeable_weights": 0
+  "Thresholding_rtl_18": {
+    "PE": 1
   },
-  "StreamingFIFO_97": {
+  "StreamingFIFO_rtl_97": {
     "ram_style": "block",
     "depth": 512,
     "impl_style": "vivado"
   },
-  "FMPadding_Batch_9": {
+  "FMPadding_rtl_9": {
     "SIMD": 1
   },
-  "StreamingDataWidthConverter_Batch_40": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_99": {
+  "StreamingFIFO_rtl_99": {
     "ram_style": "block",
     "depth": 4096,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_10": {
+  "ConvolutionInputGenerator_rtl_10": {
     "SIMD": 4,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_9": {
+  "VVAU_hls_9": {
     "PE": 4,
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_41": {
-    "impl_style": "hls"
-  },
-  "Thresholding_Batch_19": {
-    "PE": 1,
-    "ram_style": "distributed",
-    "mem_mode": "const",
-    "runtime_writeable_weights": 0
-  },
-  "StreamingDataWidthConverter_Batch_42": {
-    "impl_style": "hls"
+  "Thresholding_rtl_19": {
+    "PE": 1
   },
-  "StreamingFIFO_104": {
+  "StreamingFIFO_rtl_104": {
     "ram_style": "auto",
     "depth": 64,
     "impl_style": "rtl"
   },
-  "MatrixVectorActivation_10": {
+  "MVAU_rtl_10": {
     "PE": 32,
     "SIMD": 8,
     "ram_style": "auto",
-    "resType": "lut",
-    "mem_mode": "decoupled",
+    "resType": "dsp",
+    "mem_mode": "internal_decoupled",
     "runtime_writeable_weights": 0
   },
-  "StreamingDataWidthConverter_Batch_43": {
-    "impl_style": "hls"
-  },
-  "Thresholding_Batch_20": {
-    "PE": 1,
-    "ram_style": "distributed",
-    "mem_mode": "const",
-    "runtime_writeable_weights": 0
+  "Thresholding_rtl_20": {
+    "PE": 1
   },
-  "StreamingFIFO_107": {
+  "StreamingFIFO_rtl_107": {
     "ram_style": "block",
     "depth": 512,
     "impl_style": "vivado"
   },
-  "FMPadding_Batch_10": {
+  "FMPadding_rtl_10": {
     "SIMD": 1
   },
-  "StreamingDataWidthConverter_Batch_44": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_109": {
+  "StreamingFIFO_rtl_109": {
     "ram_style": "block",
     "depth": 4096,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_11": {
+  "ConvolutionInputGenerator_rtl_11": {
     "SIMD": 4,
     "ram_style": "block"
   },
-  "VectorVectorActivation_10": {
+  "VVAU_hls_10": {
     "PE": 4,
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_45": {
-    "impl_style": "hls"
-  },
-  "Thresholding_Batch_21": {
-    "PE": 1,
-    "ram_style": "distributed",
-    "mem_mode": "const",
-    "runtime_writeable_weights": 0
-  },
-  "StreamingDataWidthConverter_Batch_46": {
-    "impl_style": "hls"
+  "Thresholding_rtl_21": {
+    "PE": 1
   },
-  "StreamingFIFO_114": {
+  "StreamingFIFO_rtl_114": {
     "ram_style": "auto",
     "depth": 64,
     "impl_style": "rtl"
   },
-  "MatrixVectorActivation_11": {
+  "MVAU_rtl_11": {
     "PE": 32,
     "SIMD": 8,
     "ram_style": "auto",
-    "resType": "lut",
-    "mem_mode": "decoupled",
+    "resType": "dsp",
+    "mem_mode": "internal_decoupled",
     "runtime_writeable_weights": 0
   },
-  "StreamingDataWidthConverter_Batch_47": {
-    "impl_style": "hls"
-  },
-  "Thresholding_Batch_22": {
-    "PE": 1,
-    "ram_style": "distributed",
-    "mem_mode": "const",
-    "runtime_writeable_weights": 0
+  "Thresholding_rtl_22": {
+    "PE": 1
   },
-  "StreamingFIFO_117": {
+  "StreamingFIFO_rtl_117": {
     "ram_style": "block",
     "depth": 512,
     "impl_style": "vivado"
   },
-  "FMPadding_Batch_11": {
+  "FMPadding_rtl_11": {
     "SIMD": 1
   },
-  "StreamingFIFO_118": {
+  "StreamingFIFO_rtl_118": {
     "ram_style": "block",
     "depth": 16384,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_12": {
+  "ConvolutionInputGenerator_rtl_12": {
     "SIMD": 1,
     "ram_style": "block"
   },
-  "VectorVectorActivation_11": {
+  "VVAU_hls_11": {
     "PE": 1,
     "resType": "lut"
   },
-  "Thresholding_Batch_23": {
-    "PE": 1,
-    "ram_style": "distributed",
-    "mem_mode": "const",
-    "runtime_writeable_weights": 0
-  },
-  "StreamingDataWidthConverter_Batch_48": {
-    "impl_style": "hls"
+  "Thresholding_rtl_23": {
+    "PE": 1
   },
-  "StreamingFIFO_122": {
+  "StreamingFIFO_rtl_122": {
     "ram_style": "auto",
     "depth": 64,
     "impl_style": "rtl"
   },
-  "MatrixVectorActivation_12": {
+  "MVAU_rtl_12": {
     "PE": 16,
     "SIMD": 8,
     "ram_style": "auto",
-    "resType": "lut",
-    "mem_mode": "decoupled",
+    "resType": "dsp",
+    "mem_mode": "internal_decoupled",
     "runtime_writeable_weights": 0
   },
-  "StreamingDataWidthConverter_Batch_49": {
-    "impl_style": "hls"
-  },
-  "Thresholding_Batch_24": {
-    "PE": 1,
-    "ram_style": "distributed",
-    "mem_mode": "const",
-    "runtime_writeable_weights": 0
+  "Thresholding_rtl_24": {
+    "PE": 1
   },
-  "StreamingFIFO_125": {
+  "StreamingFIFO_rtl_125": {
     "ram_style": "block",
     "depth": 1024,
     "impl_style": "vivado"
   },
-  "FMPadding_Batch_12": {
+  "FMPadding_rtl_12": {
     "SIMD": 1
   },
-  "StreamingDataWidthConverter_Batch_50": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_127": {
+  "StreamingFIFO_rtl_127": {
     "ram_style": "block",
     "depth": 16384,
     "impl_style": "vivado"
   },
-  "ConvolutionInputGenerator_13": {
+  "ConvolutionInputGenerator_rtl_13": {
     "SIMD": 2,
     "ram_style": "block"
   },
-  "VectorVectorActivation_12": {
+  "VVAU_hls_12": {
     "PE": 2,
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_51": {
-    "impl_style": "hls"
-  },
-  "Thresholding_Batch_25": {
-    "PE": 1,
-    "ram_style": "distributed",
-    "mem_mode": "const",
-    "runtime_writeable_weights": 0
-  },
-  "StreamingDataWidthConverter_Batch_52": {
-    "impl_style": "hls"
+  "Thresholding_rtl_25": {
+    "PE": 1
   },
-  "StreamingFIFO_132": {
+  "StreamingFIFO_rtl_132": {
     "ram_style": "auto",
     "depth": 128,
     "impl_style": "rtl"
   },
-  "MatrixVectorActivation_13": {
+  "MVAU_rtl_13": {
     "PE": 32,
     "SIMD": 8,
     "ram_style": "block",
-    "resType": "lut",
-    "mem_mode": "decoupled",
+    "resType": "dsp",
+    "mem_mode": "internal_decoupled",
     "runtime_writeable_weights": 0
   },
-  "StreamingDataWidthConverter_Batch_53": {
-    "impl_style": "hls"
-  },
-  "Thresholding_Batch_26": {
-    "PE": 1,
-    "ram_style": "distributed",
-    "mem_mode": "const",
-    "runtime_writeable_weights": 0
+  "Thresholding_rtl_26": {
+    "PE": 1
   },
-  "ConvolutionInputGenerator_14": {
+  "ConvolutionInputGenerator_rtl_14": {
     "SIMD": 1,
     "ram_style": "block"
   },
-  "Pool_Batch_0": {
+  "Pool_hls_0": {
     "PE": 1
   },
-  "StreamingDataWidthConverter_Batch_54": {
-    "impl_style": "hls"
-  },
-  "MatrixVectorActivation_14": {
+  "MVAU_rtl_14": {
     "PE": 1,
     "SIMD": 16,
     "ram_style": "block",
-    "resType": "lut",
-    "mem_mode": "decoupled",
+    "resType": "dsp",
+    "mem_mode": "internal_decoupled",
     "runtime_writeable_weights": 0
   },
-  "ChannelwiseOp_Batch_0": {
+  "ChannelwiseOp_hls_0": {
     "PE": 1,
     "ram_style": "distributed"
   },
-  "LabelSelect_Batch_0": {
+  "LabelSelect_hls_0": {
     "PE": 1
   }
 }
diff --git a/build/mobilenet-v1/folding_config/ZCU104_folding_config.json b/build/mobilenet-v1/folding_config/ZCU104_folding_config.json
index 40a687a..e300886 100755
--- a/build/mobilenet-v1/folding_config/ZCU104_folding_config.json
+++ b/build/mobilenet-v1/folding_config/ZCU104_folding_config.json
@@ -1,816 +1,610 @@
 {
   "Defaults": {},
-  "StreamingFIFO_0": {
+  "StreamingFIFO_rtl_0": {
     "ram_style": "ultra",
-    "depth": 1024,
-    "impl_style": "vivado"
+    "depth": 1024
   },
-  "ConvolutionInputGenerator_0": {
+  "ConvolutionInputGenerator_rtl_0": {
     "SIMD": 1,
     "ram_style": "distributed"
   },
-  "StreamingDataWidthConverter_Batch_0": {
-    "impl_style": "hls"
-  },
-  "MatrixVectorActivation_0": {
+  "MVAU_rtl_0": {
     "PE": 16,
     "SIMD": 3,
     "ram_style": "auto",
     "resType": "dsp",
-    "mem_mode": "decoupled",
+    "mem_mode": "internal_decoupled",
     "runtime_writeable_weights": 0
   },
-  "StreamingFIFO_3": {
+  "StreamingFIFO_rtl_3": {
     "ram_style": "auto",
-    "depth": 64,
-    "impl_style": "rtl"
-  },
-  "StreamingDataWidthConverter_Batch_1": {
-    "impl_style": "hls"
+    "depth": 64
   },
-  "Thresholding_Batch_0": {
+  "Thresholding_hls_0": {
     "PE": 1,
     "ram_style": "distributed",
-    "mem_mode": "const",
+    "mem_mode": "internal_embedded",
     "runtime_writeable_weights": 0
   },
-  "StreamingDataWidthConverter_Batch_2": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_6": {
+  "StreamingFIFO_rtl_6": {
     "ram_style": "auto",
-    "depth": 256,
-    "impl_style": "rtl"
+    "depth": 256
   },
-  "FMPadding_Batch_0": {
+  "FMPadding_rtl_0": {
     "SIMD": 2
   },
-  "StreamingDataWidthConverter_Batch_3": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_8": {
+  "StreamingFIFO_rtl_8": {
     "ram_style": "ultra",
-    "depth": 512,
-    "impl_style": "vivado"
+    "depth": 512
   },
-  "ConvolutionInputGenerator_1": {
+  "ConvolutionInputGenerator_rtl_1": {
     "SIMD": 16,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_0": {
+  "VVAU_hls_0": {
     "PE": 16,
     "resType": "lut"
   },
-  "StreamingFIFO_10": {
+  "StreamingFIFO_rtl_10": {
     "ram_style": "auto",
-    "depth": 256,
-    "impl_style": "rtl"
-  },
-  "StreamingDataWidthConverter_Batch_4": {
-    "impl_style": "hls"
+    "depth": 256
   },
-  "Thresholding_Batch_1": {
+  "Thresholding_hls_1": {
     "PE": 1,
     "ram_style": "distributed",
-    "mem_mode": "const",
+    "mem_mode": "internal_embedded",
     "runtime_writeable_weights": 0
   },
-  "StreamingDataWidthConverter_Batch_5": {
-    "impl_style": "hls"
-  },
-  "MatrixVectorActivation_1": {
+  "MVAU_rtl_1": {
     "PE": 8,
     "SIMD": 8,
     "ram_style": "auto",
-    "resType": "lut",
-    "mem_mode": "decoupled",
+    "resType": "dsp",
+    "mem_mode": "internal_decoupled",
     "runtime_writeable_weights": 0
   },
-  "StreamingDataWidthConverter_Batch_6": {
-    "impl_style": "hls"
-  },
-  "Thresholding_Batch_2": {
+  "Thresholding_hls_2": {
     "PE": 2,
     "ram_style": "distributed",
-    "mem_mode": "const",
+    "mem_mode": "internal_embedded",
     "runtime_writeable_weights": 0
   },
-  "StreamingDataWidthConverter_Batch_7": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_17": {
+  "StreamingFIFO_rtl_17": {
     "ram_style": "auto",
-    "depth": 32,
-    "impl_style": "rtl"
+    "depth": 32
   },
-  "FMPadding_Batch_1": {
+  "FMPadding_rtl_1": {
     "SIMD": 4
   },
-  "StreamingDataWidthConverter_Batch_8": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_19": {
+  "StreamingFIFO_rtl_19": {
     "ram_style": "ultra",
-    "depth": 1024,
-    "impl_style": "vivado"
+    "depth": 1024
   },
-  "ConvolutionInputGenerator_2": {
+  "ConvolutionInputGenerator_rtl_2": {
     "SIMD": 8,
     "ram_style": "distributed"
   },
-  "VectorVectorActivation_1": {
+  "VVAU_hls_1": {
     "PE": 8,
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_9": {
-    "impl_style": "hls"
-  },
-  "Thresholding_Batch_3": {
+  "Thresholding_hls_3": {
     "PE": 1,
     "ram_style": "distributed",
-    "mem_mode": "const",
+    "mem_mode": "internal_embedded",
     "runtime_writeable_weights": 0
   },
-  "StreamingDataWidthConverter_Batch_10": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_24": {
+  "StreamingFIFO_rtl_24": {
     "ram_style": "auto",
-    "depth": 32,
-    "impl_style": "rtl"
+    "depth": 32
   },
-  "MatrixVectorActivation_2": {
+  "MVAU_rtl_2": {
     "PE": 16,
     "SIMD": 8,
     "ram_style": "auto",
-    "resType": "lut",
-    "mem_mode": "decoupled",
+    "resType": "dsp",
+    "mem_mode": "internal_decoupled",
     "runtime_writeable_weights": 0
   },
-  "StreamingDataWidthConverter_Batch_11": {
-    "impl_style": "hls"
-  },
-  "Thresholding_Batch_4": {
+  "Thresholding_hls_4": {
     "PE": 2,
     "ram_style": "distributed",
-    "mem_mode": "const",
+    "mem_mode": "internal_embedded",
     "runtime_writeable_weights": 0
   },
-  "StreamingFIFO_27": {
+  "StreamingFIFO_rtl_27": {
     "ram_style": "auto",
-    "depth": 128,
-    "impl_style": "rtl"
+    "depth": 128
   },
-  "FMPadding_Batch_2": {
+  "FMPadding_rtl_2": {
     "SIMD": 2
   },
-  "StreamingDataWidthConverter_Batch_12": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_29": {
+  "StreamingFIFO_rtl_29": {
     "ram_style": "ultra",
-    "depth": 1024,
-    "impl_style": "vivado"
+    "depth": 1024
   },
-  "ConvolutionInputGenerator_3": {
+  "ConvolutionInputGenerator_rtl_3": {
     "SIMD": 16,
     "ram_style": "block"
   },
-  "VectorVectorActivation_2": {
+  "VVAU_hls_2": {
     "PE": 16,
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_13": {
-    "impl_style": "hls"
-  },
-  "Thresholding_Batch_5": {
+  "Thresholding_hls_5": {
     "PE": 2,
     "ram_style": "distributed",
-    "mem_mode": "const",
+    "mem_mode": "internal_embedded",
     "runtime_writeable_weights": 0
   },
-  "StreamingDataWidthConverter_Batch_14": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_34": {
+  "StreamingFIFO_rtl_34": {
     "ram_style": "auto",
-    "depth": 32,
-    "impl_style": "rtl"
+    "depth": 32
   },
-  "MatrixVectorActivation_3": {
+  "MVAU_rtl_3": {
     "PE": 32,
     "SIMD": 8,
     "ram_style": "auto",
-    "resType": "lut",
-    "mem_mode": "decoupled",
+    "resType": "dsp",
+    "mem_mode": "internal_decoupled",
     "runtime_writeable_weights": 0
   },
-  "StreamingDataWidthConverter_Batch_15": {
-    "impl_style": "hls"
-  },
-  "Thresholding_Batch_6": {
+  "Thresholding_hls_6": {
     "PE": 2,
     "ram_style": "distributed",
-    "mem_mode": "const",
+    "mem_mode": "internal_embedded",
     "runtime_writeable_weights": 0
   },
-  "StreamingFIFO_37": {
+  "StreamingFIFO_rtl_37": {
     "ram_style": "auto",
-    "depth": 128,
-    "impl_style": "rtl"
+    "depth": 128
   },
-  "FMPadding_Batch_3": {
+  "FMPadding_rtl_3": {
     "SIMD": 2
   },
-  "StreamingDataWidthConverter_Batch_16": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_39": {
+  "StreamingFIFO_rtl_39": {
     "ram_style": "ultra",
-    "depth": 4096,
-    "impl_style": "vivado"
+    "depth": 4096
   },
-  "ConvolutionInputGenerator_4": {
+  "ConvolutionInputGenerator_rtl_4": {
     "SIMD": 4,
     "ram_style": "block"
   },
-  "VectorVectorActivation_3": {
+  "VVAU_hls_3": {
     "PE": 4,
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_17": {
-    "impl_style": "hls"
-  },
-  "Thresholding_Batch_7": {
+  "Thresholding_hls_7": {
     "PE": 1,
     "ram_style": "distributed",
-    "mem_mode": "const",
+    "mem_mode": "internal_embedded",
     "runtime_writeable_weights": 0
   },
-  "StreamingDataWidthConverter_Batch_18": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_44": {
+  "StreamingFIFO_rtl_44": {
     "ram_style": "auto",
-    "depth": 32,
-    "impl_style": "rtl"
+    "depth": 32
   },
-  "MatrixVectorActivation_4": {
+  "MVAU_rtl_4": {
     "PE": 16,
     "SIMD": 8,
     "ram_style": "auto",
-    "resType": "lut",
-    "mem_mode": "decoupled",
+    "resType": "dsp",
+    "mem_mode": "internal_decoupled",
     "runtime_writeable_weights": 0
   },
-  "StreamingDataWidthConverter_Batch_19": {
-    "impl_style": "hls"
-  },
-  "Thresholding_Batch_8": {
+  "Thresholding_hls_8": {
     "PE": 1,
     "ram_style": "distributed",
-    "mem_mode": "const",
+    "mem_mode": "internal_embedded",
     "runtime_writeable_weights": 0
   },
-  "StreamingFIFO_47": {
+  "StreamingFIFO_rtl_47": {
     "ram_style": "ultra",
-    "depth": 512,
-    "impl_style": "vivado"
+    "depth": 512
   },
-  "FMPadding_Batch_4": {
+  "FMPadding_rtl_4": {
     "SIMD": 1
   },
-  "StreamingDataWidthConverter_Batch_20": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_49": {
+  "StreamingFIFO_rtl_49": {
     "ram_style": "ultra",
-    "depth": 1024,
-    "impl_style": "vivado"
+    "depth": 1024
   },
-  "ConvolutionInputGenerator_5": {
+  "ConvolutionInputGenerator_rtl_5": {
     "SIMD": 8,
     "ram_style": "block"
   },
-  "VectorVectorActivation_4": {
+  "VVAU_hls_4": {
     "PE": 8,
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_21": {
-    "impl_style": "hls"
-  },
-  "Thresholding_Batch_9": {
+  "Thresholding_hls_9": {
     "PE": 1,
     "ram_style": "distributed",
-    "mem_mode": "const",
+    "mem_mode": "internal_embedded",
     "runtime_writeable_weights": 0
   },
-  "StreamingDataWidthConverter_Batch_22": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_54": {
+  "StreamingFIFO_rtl_54": {
     "ram_style": "auto",
-    "depth": 32,
-    "impl_style": "rtl"
+    "depth": 32
   },
-  "MatrixVectorActivation_5": {
+  "MVAU_rtl_5": {
     "PE": 32,
     "SIMD": 8,
     "ram_style": "auto",
-    "resType": "lut",
-    "mem_mode": "decoupled",
+    "resType": "dsp",
+    "mem_mode": "internal_decoupled",
     "runtime_writeable_weights": 0
   },
-  "StreamingDataWidthConverter_Batch_23": {
-    "impl_style": "hls"
-  },
-  "Thresholding_Batch_10": {
+  "Thresholding_hls_10": {
     "PE": 1,
     "ram_style": "distributed",
-    "mem_mode": "const",
+    "mem_mode": "internal_embedded",
     "runtime_writeable_weights": 0
   },
-  "StreamingFIFO_57": {
+  "StreamingFIFO_rtl_57": {
     "ram_style": "ultra",
-    "depth": 512,
-    "impl_style": "vivado"
+    "depth": 512
   },
-  "FMPadding_Batch_5": {
+  "FMPadding_rtl_5": {
     "SIMD": 1
   },
-  "StreamingDataWidthConverter_Batch_24": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_59": {
+  "StreamingFIFO_rtl_59": {
     "ram_style": "ultra",
-    "depth": 8192,
-    "impl_style": "vivado"
+    "depth": 8192
   },
-  "ConvolutionInputGenerator_6": {
+  "ConvolutionInputGenerator_rtl_6": {
     "SIMD": 2,
     "ram_style": "block"
   },
-  "VectorVectorActivation_5": {
+  "VVAU_hls_5": {
     "PE": 2,
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_25": {
-    "impl_style": "hls"
-  },
-  "Thresholding_Batch_11": {
+  "Thresholding_hls_11": {
     "PE": 1,
     "ram_style": "distributed",
-    "mem_mode": "const",
+    "mem_mode": "internal_embedded",
     "runtime_writeable_weights": 0
   },
-  "StreamingDataWidthConverter_Batch_26": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_64": {
+  "StreamingFIFO_rtl_64": {
     "ram_style": "auto",
-    "depth": 32,
-    "impl_style": "rtl"
+    "depth": 32
   },
-  "MatrixVectorActivation_6": {
+  "MVAU_rtl_6": {
     "PE": 16,
     "SIMD": 8,
     "ram_style": "auto",
-    "resType": "lut",
-    "mem_mode": "decoupled",
+    "resType": "dsp",
+    "mem_mode": "internal_decoupled",
     "runtime_writeable_weights": 0
   },
-  "StreamingDataWidthConverter_Batch_27": {
-    "impl_style": "hls"
-  },
-  "Thresholding_Batch_12": {
+  "Thresholding_hls_12": {
     "PE": 1,
     "ram_style": "distributed",
-    "mem_mode": "const",
+    "mem_mode": "internal_embedded",
     "runtime_writeable_weights": 0
   },
-  "StreamingFIFO_67": {
+  "StreamingFIFO_rtl_67": {
     "ram_style": "ultra",
-    "depth": 512,
-    "impl_style": "vivado"
+    "depth": 512
   },
-  "FMPadding_Batch_6": {
+  "FMPadding_rtl_6": {
     "SIMD": 1
   },
-  "StreamingDataWidthConverter_Batch_28": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_69": {
+  "StreamingFIFO_rtl_69": {
     "ram_style": "ultra",
-    "depth": 4096,
-    "impl_style": "vivado"
+    "depth": 4096
   },
-  "ConvolutionInputGenerator_7": {
+  "ConvolutionInputGenerator_rtl_7": {
     "SIMD": 4,
     "ram_style": "block"
   },
-  "VectorVectorActivation_6": {
+  "VVAU_hls_6": {
     "PE": 4,
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_29": {
-    "impl_style": "hls"
-  },
-  "Thresholding_Batch_13": {
+  "Thresholding_hls_13": {
     "PE": 1,
     "ram_style": "distributed",
-    "mem_mode": "const",
+    "mem_mode": "internal_embedded",
     "runtime_writeable_weights": 0
   },
-  "StreamingDataWidthConverter_Batch_30": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_74": {
+  "StreamingFIFO_rtl_74": {
     "ram_style": "auto",
-    "depth": 64,
-    "impl_style": "rtl"
+    "depth": 64
   },
-  "MatrixVectorActivation_7": {
+  "MVAU_rtl_7": {
     "PE": 32,
     "SIMD": 8,
     "ram_style": "auto",
-    "resType": "lut",
-    "mem_mode": "decoupled",
+    "resType": "dsp",
+    "mem_mode": "internal_decoupled",
     "runtime_writeable_weights": 0
   },
-  "StreamingDataWidthConverter_Batch_31": {
-    "impl_style": "hls"
-  },
-  "Thresholding_Batch_14": {
+  "Thresholding_hls_14": {
     "PE": 1,
     "ram_style": "distributed",
-    "mem_mode": "const",
+    "mem_mode": "internal_embedded",
     "runtime_writeable_weights": 0
   },
-  "StreamingFIFO_77": {
+  "StreamingFIFO_rtl_77": {
     "ram_style": "ultra",
-    "depth": 512,
-    "impl_style": "vivado"
+    "depth": 512
   },
-  "FMPadding_Batch_7": {
+  "FMPadding_rtl_7": {
     "SIMD": 1
   },
-  "StreamingDataWidthConverter_Batch_32": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_79": {
+  "StreamingFIFO_rtl_79": {
     "ram_style": "ultra",
-    "depth": 4096,
-    "impl_style": "vivado"
+    "depth": 4096
   },
-  "ConvolutionInputGenerator_8": {
+  "ConvolutionInputGenerator_rtl_8": {
     "SIMD": 4,
     "ram_style": "block"
   },
-  "VectorVectorActivation_7": {
+  "VVAU_hls_7": {
     "PE": 4,
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_33": {
-    "impl_style": "hls"
-  },
-  "Thresholding_Batch_15": {
+  "Thresholding_hls_15": {
     "PE": 1,
     "ram_style": "distributed",
-    "mem_mode": "const",
+    "mem_mode": "internal_embedded",
     "runtime_writeable_weights": 0
   },
-  "StreamingDataWidthConverter_Batch_34": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_84": {
+  "StreamingFIFO_rtl_84": {
     "ram_style": "auto",
-    "depth": 64,
-    "impl_style": "rtl"
+    "depth": 64
   },
-  "MatrixVectorActivation_8": {
+  "MVAU_rtl_8": {
     "PE": 32,
     "SIMD": 8,
     "ram_style": "auto",
-    "resType": "lut",
-    "mem_mode": "decoupled",
+    "resType": "dsp",
+    "mem_mode": "internal_decoupled",
     "runtime_writeable_weights": 0
   },
-  "StreamingDataWidthConverter_Batch_35": {
-    "impl_style": "hls"
-  },
-  "Thresholding_Batch_16": {
+  "Thresholding_hls_16": {
     "PE": 1,
     "ram_style": "distributed",
-    "mem_mode": "const",
+    "mem_mode": "internal_embedded",
     "runtime_writeable_weights": 0
   },
-  "StreamingFIFO_87": {
+  "StreamingFIFO_rtl_87": {
     "ram_style": "ultra",
-    "depth": 512,
-    "impl_style": "vivado"
+    "depth": 512
   },
-  "FMPadding_Batch_8": {
+  "FMPadding_rtl_8": {
     "SIMD": 1
   },
-  "StreamingDataWidthConverter_Batch_36": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_89": {
+  "StreamingFIFO_rtl_89": {
     "ram_style": "ultra",
-    "depth": 4096,
-    "impl_style": "vivado"
+    "depth": 4096
   },
-  "ConvolutionInputGenerator_9": {
+  "ConvolutionInputGenerator_rtl_9": {
     "SIMD": 4,
     "ram_style": "block"
   },
-  "VectorVectorActivation_8": {
+  "VVAU_hls_8": {
     "PE": 4,
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_37": {
-    "impl_style": "hls"
-  },
-  "Thresholding_Batch_17": {
+  "Thresholding_hls_17": {
     "PE": 1,
     "ram_style": "distributed",
-    "mem_mode": "const",
+    "mem_mode": "internal_embedded",
     "runtime_writeable_weights": 0
   },
-  "StreamingDataWidthConverter_Batch_38": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_94": {
+  "StreamingFIFO_rtl_94": {
     "ram_style": "auto",
-    "depth": 64,
-    "impl_style": "rtl"
+    "depth": 64
   },
-  "MatrixVectorActivation_9": {
+  "MVAU_rtl_9": {
     "PE": 32,
     "SIMD": 8,
     "ram_style": "auto",
-    "resType": "lut",
-    "mem_mode": "decoupled",
+    "resType": "dsp",
+    "mem_mode": "internal_decoupled",
     "runtime_writeable_weights": 0
   },
-  "StreamingDataWidthConverter_Batch_39": {
-    "impl_style": "hls"
-  },
-  "Thresholding_Batch_18": {
+  "Thresholding_hls_18": {
     "PE": 1,
     "ram_style": "distributed",
-    "mem_mode": "const",
+    "mem_mode": "internal_embedded",
     "runtime_writeable_weights": 0
   },
-  "StreamingFIFO_97": {
+  "StreamingFIFO_rtl_97": {
     "ram_style": "ultra",
-    "depth": 512,
-    "impl_style": "vivado"
+    "depth": 512
   },
-  "FMPadding_Batch_9": {
+  "FMPadding_rtl_9": {
     "SIMD": 1
   },
-  "StreamingDataWidthConverter_Batch_40": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_99": {
+  "StreamingFIFO_rtl_99": {
     "ram_style": "ultra",
-    "depth": 4096,
-    "impl_style": "vivado"
+    "depth": 4096
   },
-  "ConvolutionInputGenerator_10": {
+  "ConvolutionInputGenerator_rtl_10": {
     "SIMD": 4,
     "ram_style": "block"
   },
-  "VectorVectorActivation_9": {
+  "VVAU_hls_9": {
     "PE": 4,
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_41": {
-    "impl_style": "hls"
-  },
-  "Thresholding_Batch_19": {
+  "Thresholding_hls_19": {
     "PE": 1,
     "ram_style": "distributed",
-    "mem_mode": "const",
+    "mem_mode": "internal_embedded",
     "runtime_writeable_weights": 0
   },
-  "StreamingDataWidthConverter_Batch_42": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_104": {
+  "StreamingFIFO_rtl_104": {
     "ram_style": "auto",
-    "depth": 64,
-    "impl_style": "rtl"
+    "depth": 64
   },
-  "MatrixVectorActivation_10": {
+  "MVAU_rtl_10": {
     "PE": 32,
     "SIMD": 8,
     "ram_style": "auto",
-    "resType": "lut",
-    "mem_mode": "decoupled",
+    "resType": "dsp",
+    "mem_mode": "internal_decoupled",
     "runtime_writeable_weights": 0
   },
-  "StreamingDataWidthConverter_Batch_43": {
-    "impl_style": "hls"
-  },
-  "Thresholding_Batch_20": {
+  "Thresholding_hls_20": {
     "PE": 1,
     "ram_style": "distributed",
-    "mem_mode": "const",
+    "mem_mode": "internal_embedded",
     "runtime_writeable_weights": 0
   },
-  "StreamingFIFO_107": {
+  "StreamingFIFO_rtl_107": {
     "ram_style": "ultra",
-    "depth": 512,
-    "impl_style": "vivado"
+    "depth": 512
   },
-  "FMPadding_Batch_10": {
+  "FMPadding_rtl_10": {
     "SIMD": 1
   },
-  "StreamingDataWidthConverter_Batch_44": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_109": {
+  "StreamingFIFO_rtl_109": {
     "ram_style": "ultra",
-    "depth": 4096,
-    "impl_style": "vivado"
+    "depth": 4096
   },
-  "ConvolutionInputGenerator_11": {
+  "ConvolutionInputGenerator_rtl_11": {
     "SIMD": 4,
     "ram_style": "block"
   },
-  "VectorVectorActivation_10": {
+  "VVAU_hls_10": {
     "PE": 4,
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_45": {
-    "impl_style": "hls"
-  },
-  "Thresholding_Batch_21": {
+  "Thresholding_hls_21": {
     "PE": 1,
     "ram_style": "distributed",
-    "mem_mode": "const",
+    "mem_mode": "internal_embedded",
     "runtime_writeable_weights": 0
   },
-  "StreamingDataWidthConverter_Batch_46": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_114": {
+  "StreamingFIFO_rtl_114": {
     "ram_style": "auto",
-    "depth": 64,
-    "impl_style": "rtl"
+    "depth": 64
   },
-  "MatrixVectorActivation_11": {
+  "MVAU_rtl_11": {
     "PE": 32,
     "SIMD": 8,
     "ram_style": "auto",
-    "resType": "lut",
-    "mem_mode": "decoupled",
+    "resType": "dsp",
+    "mem_mode": "internal_decoupled",
     "runtime_writeable_weights": 0
   },
-  "StreamingDataWidthConverter_Batch_47": {
-    "impl_style": "hls"
-  },
-  "Thresholding_Batch_22": {
+  "Thresholding_hls_22": {
     "PE": 1,
     "ram_style": "distributed",
-    "mem_mode": "const",
+    "mem_mode": "internal_embedded",
     "runtime_writeable_weights": 0
   },
-  "StreamingFIFO_117": {
+  "StreamingFIFO_rtl_117": {
     "ram_style": "ultra",
-    "depth": 512,
-    "impl_style": "vivado"
+    "depth": 512
   },
-  "FMPadding_Batch_11": {
+  "FMPadding_rtl_11": {
     "SIMD": 1
   },
-  "StreamingFIFO_118": {
+  "StreamingFIFO_rtl_118": {
     "ram_style": "ultra",
-    "depth": 16384,
-    "impl_style": "vivado"
+    "depth": 16384
   },
-  "ConvolutionInputGenerator_12": {
+  "ConvolutionInputGenerator_rtl_12": {
     "SIMD": 1,
     "ram_style": "block"
   },
-  "VectorVectorActivation_11": {
+  "VVAU_hls_11": {
     "PE": 1,
     "resType": "lut"
   },
-  "Thresholding_Batch_23": {
+  "Thresholding_hls_23": {
     "PE": 1,
     "ram_style": "distributed",
-    "mem_mode": "const",
+    "mem_mode": "internal_embedded",
     "runtime_writeable_weights": 0
   },
-  "StreamingDataWidthConverter_Batch_48": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_122": {
+  "StreamingFIFO_rtl_122": {
     "ram_style": "auto",
-    "depth": 64,
-    "impl_style": "rtl"
+    "depth": 64
   },
-  "MatrixVectorActivation_12": {
+  "MVAU_rtl_12": {
     "PE": 16,
     "SIMD": 8,
     "ram_style": "ultra",
-    "resType": "lut",
-    "mem_mode": "decoupled",
+    "resType": "dsp",
+    "mem_mode": "internal_decoupled",
     "runtime_writeable_weights": 1
   },
-  "StreamingDataWidthConverter_Batch_49": {
-    "impl_style": "hls"
-  },
-  "Thresholding_Batch_24": {
+  "Thresholding_hls_24": {
     "PE": 1,
     "ram_style": "distributed",
-    "mem_mode": "const",
+    "mem_mode": "internal_embedded",
     "runtime_writeable_weights": 0
   },
-  "StreamingFIFO_125": {
+  "StreamingFIFO_rtl_125": {
     "ram_style": "ultra",
-    "depth": 1024,
-    "impl_style": "vivado"
+    "depth": 1024
   },
-  "FMPadding_Batch_12": {
+  "FMPadding_rtl_12": {
     "SIMD": 1
   },
-  "StreamingDataWidthConverter_Batch_50": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_127": {
+  "StreamingFIFO_rtl_127": {
     "ram_style": "ultra",
-    "depth": 16384,
-    "impl_style": "vivado"
+    "depth": 16384
   },
-  "ConvolutionInputGenerator_13": {
+  "ConvolutionInputGenerator_rtl_13": {
     "SIMD": 2,
     "ram_style": "block"
   },
-  "VectorVectorActivation_12": {
+  "VVAU_hls_12": {
     "PE": 2,
     "resType": "lut"
   },
-  "StreamingDataWidthConverter_Batch_51": {
-    "impl_style": "hls"
-  },
-  "Thresholding_Batch_25": {
+  "Thresholding_hls_25": {
     "PE": 1,
     "ram_style": "distributed",
-    "mem_mode": "const",
+    "mem_mode": "internal_embedded",
     "runtime_writeable_weights": 0
   },
-  "StreamingDataWidthConverter_Batch_52": {
-    "impl_style": "hls"
-  },
-  "StreamingFIFO_132": {
+  "StreamingFIFO_rtl_132": {
     "ram_style": "auto",
-    "depth": 128,
-    "impl_style": "rtl"
+    "depth": 128
   },
-  "MatrixVectorActivation_13": {
+  "MVAU_rtl_13": {
     "PE": 32,
     "SIMD": 8,
     "ram_style": "ultra",
-    "resType": "lut",
-    "mem_mode": "decoupled",
+    "resType": "dsp",
+    "mem_mode": "internal_decoupled",
     "runtime_writeable_weights": 1
   },
-  "StreamingDataWidthConverter_Batch_53": {
-    "impl_style": "hls"
-  },
-  "Thresholding_Batch_26": {
+  "Thresholding_hls_26": {
     "PE": 1,
     "ram_style": "distributed",
-    "mem_mode": "const",
+    "mem_mode": "internal_embedded",
     "runtime_writeable_weights": 0
   },
-  "ConvolutionInputGenerator_14": {
+  "ConvolutionInputGenerator_rtl_14": {
     "SIMD": 1,
     "ram_style": "block"
   },
-  "Pool_Batch_0": {
+  "Pool_hls_0": {
     "PE": 1
   },
-  "StreamingDataWidthConverter_Batch_54": {
-    "impl_style": "hls"
-  },
-  "MatrixVectorActivation_14": {
+  "MVAU_rtl_14": {
     "PE": 1,
     "SIMD": 16,
     "ram_style": "ultra",
-    "resType": "lut",
-    "mem_mode": "decoupled",
+    "resType": "dsp",
+    "mem_mode": "internal_decoupled",
     "runtime_writeable_weights": 1
   },
-  "ChannelwiseOp_Batch_0": {
+  "ChannelwiseOp_hls_0": {
     "PE": 1,
     "ram_style": "distributed"
   },
-  "LabelSelect_Batch_0": {
+  "LabelSelect_hls_0": {
     "PE": 1
   }
 }
diff --git a/build/mobilenet-v1/specialize_layers_config/ZCU104_specialize_layers_config.json b/build/mobilenet-v1/specialize_layers_config/ZCU104_specialize_layers_config.json
new file mode 100644
index 0000000..f766d4a
--- /dev/null
+++ b/build/mobilenet-v1/specialize_layers_config/ZCU104_specialize_layers_config.json
@@ -0,0 +1,261 @@
+{
+    "Defaults": {},
+    "ConvolutionInputGenerator_0": {
+      "preferred_impl_style": "rtl"
+    },
+    "MVAU_0": {
+      "preferred_impl_style": "rtl"
+    },
+    "Thresholding_0": {
+      "preferred_impl_style": "hls"
+    },
+    "FMPadding_0": {
+      "preferred_impl_style": "rtl"
+    },
+    "ConvolutionInputGenerator_1": {
+      "preferred_impl_style": "rtl"
+    },
+    "VVAU_0": {
+      "preferred_impl_style": "hls"
+    },
+    "Thresholding_1": {
+      "preferred_impl_style": "hls"
+    },
+    "MVAU_1": {
+      "preferred_impl_style": "rtl"
+    },
+    "Thresholding_2": {
+      "preferred_impl_style": "hls"
+    },
+    "FMPadding_1": {
+      "preferred_impl_style": "rtl"
+    },
+    "ConvolutionInputGenerator_2": {
+      "preferred_impl_style": "rtl"
+    },
+    "VVAU_1": {
+      "preferred_impl_style": "hls"
+    },
+    "Thresholding_3": {
+      "preferred_impl_style": "hls"
+    },
+    "MVAU_2": {
+      "preferred_impl_style": "rtl"
+    },
+    "Thresholding_4": {
+      "preferred_impl_style": "hls"
+    },
+    "FMPadding_2": {
+      "preferred_impl_style": "rtl"
+    },
+    "ConvolutionInputGenerator_3": {
+      "preferred_impl_style": "rtl"
+    },
+    "VVAU_2": {
+      "preferred_impl_style": "hls"
+    },
+    "Thresholding_5": {
+      "preferred_impl_style": "hls"
+    },
+    "MVAU_3": {
+      "preferred_impl_style": "rtl"
+    },
+    "Thresholding_6": {
+      "preferred_impl_style": "hls"
+    },
+    "FMPadding_3": {
+      "preferred_impl_style": "rtl"
+    },
+    "ConvolutionInputGenerator_4": {
+      "preferred_impl_style": "rtl"
+    },
+    "VVAU_3": {
+      "preferred_impl_style": "hls"
+    },
+    "Thresholding_7": {
+      "preferred_impl_style": "hls"
+    },
+    "MVAU_4": {
+      "preferred_impl_style": "rtl"
+    },
+    "Thresholding_8": {
+      "preferred_impl_style": "hls"
+    },
+    "FMPadding_4": {
+      "preferred_impl_style": "rtl"
+    },
+    "ConvolutionInputGenerator_5": {
+      "preferred_impl_style": "rtl"
+    },
+    "VVAU_4": {
+      "preferred_impl_style": "hls"
+    },
+    "Thresholding_9": {
+      "preferred_impl_style": "hls"
+    },
+    "MVAU_5": {
+      "preferred_impl_style": "rtl"
+    },
+    "Thresholding_10": {
+      "preferred_impl_style": "hls"
+    },
+    "FMPadding_5": {
+      "preferred_impl_style": "rtl"
+    },
+    "ConvolutionInputGenerator_6": {
+      "preferred_impl_style": "rtl"
+    },
+    "VVAU_5": {
+      "preferred_impl_style": "hls"
+    },
+    "Thresholding_11": {
+      "preferred_impl_style": "hls"
+    },
+    "MVAU_6": {
+      "preferred_impl_style": "rtl"
+    },
+    "Thresholding_12": {
+      "preferred_impl_style": "hls"
+    },
+    "FMPadding_6": {
+      "preferred_impl_style": "rtl"
+    },
+    "ConvolutionInputGenerator_7": {
+      "preferred_impl_style": "rtl"
+    },
+    "VVAU_6": {
+      "preferred_impl_style": "hls"
+    },
+    "Thresholding_13": {
+      "preferred_impl_style": "hls"
+    },
+    "MVAU_7": {
+      "preferred_impl_style": "rtl"
+    },
+    "Thresholding_14": {
+      "preferred_impl_style": "hls"
+    },
+    "FMPadding_7": {
+      "preferred_impl_style": "rtl"
+    },
+    "ConvolutionInputGenerator_8": {
+      "preferred_impl_style": "rtl"
+    },
+    "VVAU_7": {
+      "preferred_impl_style": "hls"
+    },
+    "Thresholding_15": {
+      "preferred_impl_style": "hls"
+    },
+    "MVAU_8": {
+      "preferred_impl_style": "rtl"
+    },
+    "Thresholding_16": {
+      "preferred_impl_style": "hls"
+    },
+    "FMPadding_8": {
+      "preferred_impl_style": "rtl"
+    },
+    "ConvolutionInputGenerator_9": {
+      "preferred_impl_style": "rtl"
+    },
+    "VVAU_8": {
+      "preferred_impl_style": "hls"
+    },
+    "Thresholding_17": {
+      "preferred_impl_style": "hls"
+    },
+    "MVAU_9": {
+      "preferred_impl_style": "rtl"
+    },
+    "Thresholding_18": {
+      "preferred_impl_style": "hls"
+    },
+    "FMPadding_9": {
+      "preferred_impl_style": "rtl"
+    },
+    "ConvolutionInputGenerator_10": {
+      "preferred_impl_style": "rtl"
+    },
+    "VVAU_9": {
+      "preferred_impl_style": "hls"
+    },
+    "Thresholding_19": {
+      "preferred_impl_style": "hls"
+    },
+    "MVAU_10": {
+      "preferred_impl_style": "rtl"
+    },
+    "Thresholding_20": {
+      "preferred_impl_style": "hls"
+    },
+    "FMPadding_10": {
+      "preferred_impl_style": "rtl"
+    },
+    "ConvolutionInputGenerator_11": {
+      "preferred_impl_style": "rtl"
+    },
+    "VVAU_10": {
+      "preferred_impl_style": "hls"
+    },
+    "Thresholding_21": {
+      "preferred_impl_style": "hls"
+    },
+    "MVAU_11": {
+      "preferred_impl_style": "rtl"
+    },
+    "Thresholding_22": {
+      "preferred_impl_style": "hls"
+    },
+    "FMPadding_11": {
+      "preferred_impl_style": "rtl"
+    },
+    "ConvolutionInputGenerator_12": {
+      "preferred_impl_style": "rtl"
+    },
+    "VVAU_11": {
+      "preferred_impl_style": "hls"
+    },
+    "Thresholding_23": {
+      "preferred_impl_style": "hls"
+    },
+    "MVAU_12": {
+      "preferred_impl_style": "rtl"
+    },
+    "Thresholding_24": {
+      "preferred_impl_style": "hls"
+    },
+    "FMPadding_12": {
+      "preferred_impl_style": "rtl"
+    },
+    "ConvolutionInputGenerator_13": {
+      "preferred_impl_style": "rtl"
+    },
+    "VVAU_12": {
+      "preferred_impl_style": "hls"
+    },
+    "Thresholding_25": {
+      "preferred_impl_style": "hls"
+    },
+    "MVAU_13": {
+      "preferred_impl_style": "rtl"
+    },
+    "Thresholding_26": {
+      "preferred_impl_style": "hls"
+    },
+    "ConvolutionInputGenerator_14": {
+      "preferred_impl_style": "rtl"
+    },
+    "Pool_0": {
+      "preferred_impl_style": "hls"
+    },
+    "MVAU_14": {
+      "preferred_impl_style": "rtl"
+    },
+    "ChannelwiseOp_0": {
+      "preferred_impl_style": "hls"
+    },
+    "LabelSelect_0": {
+      "preferred_impl_style": "hls"
+    }
+  }
diff --git a/build/resnet50/README.md b/build/resnet50/README.md
index 3eab06f..8300f27 100644
--- a/build/resnet50/README.md
+++ b/build/resnet50/README.md
@@ -28,6 +28,6 @@ cd $FINN_EXAMPLES/build/finn
 ./run-docker.sh build_custom $FINN_EXAMPLES/build/resnet50
 ```
 
-5. The generated outputs will be under `resnet50/output_<topology>_<board>`. You can find a description of the generated files [here](https://finn-dev.readthedocs.io/en/latest/command_line.html#simple-dataflow-build-mode).
+3. The generated outputs will be under `resnet50/output_<topology>_<board>`. You can find a description of the generated files [here](https://finn-dev.readthedocs.io/en/latest/command_line.html#simple-dataflow-build-mode).
 
 <!-- ## Where did the ONNX model files come from? -->
diff --git a/build/resnet50/build.py b/build/resnet50/build.py
index 5ed6b2c..6f0dfc8 100644
--- a/build/resnet50/build.py
+++ b/build/resnet50/build.py
@@ -38,8 +38,7 @@
 from custom_steps import (
     step_resnet50_tidy,
     step_resnet50_streamline,
-    step_resnet50_convert_to_hls,
-    step_resnet50_set_fifo_depths,
+    step_resnet50_convert_to_hw,
     step_resnet50_slr_floorplan,
 )
 
@@ -52,14 +51,15 @@
 resnet50_build_steps = [
     step_resnet50_tidy,
     step_resnet50_streamline,
-    step_resnet50_convert_to_hls,
+    step_resnet50_convert_to_hw,
     "step_create_dataflow_partition",
+    "step_specialize_layers",
     "step_apply_folding_config",
     "step_minimize_bit_width",
     "step_generate_estimate_reports",
-    "step_hls_codegen",
-    "step_hls_ipgen",
-    step_resnet50_set_fifo_depths,
+    "step_hw_codegen",
+    "step_hw_ipgen",
+    "step_set_fifo_depths",
     step_resnet50_slr_floorplan,
     "step_synthesize_bitfile",
     "step_make_pynq_driver",
@@ -101,13 +101,7 @@ def platform_to_shell(platform):
     platform_dir = "release/%s" % release_platform_name
     os.makedirs(platform_dir, exist_ok=True)
 
-    #    try:
-    #        from finnexperimental.transformation.fpgadataflow.infer_doublepacked_dsp import InferDoublePackedConv # noqa: E501
-    #        folding_config_file="folding_config/U250_folding_config.json"
-    #        print("DoublePackedConv detected")
-    #    except:
-    #        warn(" FINN Experimental not available. Using non-packed folded down convolution. This is 16 times slower per MHz ") # noqa: E501
-    folding_config_file = "folding_config/U250_folding_config_no_doublepack_pe_folded_16.json"
+    folding_config_file = "folding_config/U250_folding_config.json"
 
     cfg = build_cfg.DataflowBuildConfig(
         steps=resnet50_build_steps,
@@ -115,6 +109,8 @@ def platform_to_shell(platform):
         synth_clk_period_ns=synth_clk_period_ns,
         board=board,
         shell_flow_type=build_cfg.ShellFlowType.VITIS_ALVEO,
+        split_large_fifos=True,
+        specialize_layers_config_file="specialize_layers_config.json",
         vitis_platform=vitis_platform,
         # throughput parameters (auto-folding)
         mvau_wwidth_max=24,
diff --git a/build/resnet50/custom_steps.py b/build/resnet50/custom_steps.py
index 6bc6008..5e59514 100644
--- a/build/resnet50/custom_steps.py
+++ b/build/resnet50/custom_steps.py
@@ -1,4 +1,5 @@
-# Copyright (c) 2020, Xilinx
+# Copyright (C) 2020-2022, Xilinx, Inc.
+# Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -87,7 +88,7 @@
 from qonnx.transformation.infer_datatypes import InferDataTypes
 from qonnx.transformation.infer_data_layouts import InferDataLayouts
 from qonnx.transformation.insert_topk import InsertTopK
-import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
+import finn.transformation.fpgadataflow.convert_to_hw_layers as to_hw
 from qonnx.transformation.lower_convs_to_matmul import LowerConvsToMatMul
 
 from finn.builder.build_dataflow_config import (
@@ -95,23 +96,8 @@
     ShellFlowType,
 )
 
-from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
-from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
-from finn.transformation.fpgadataflow.replace_verilog_relpaths import (
-    ReplaceVerilogRelPaths,
-)
-
 from finn.transformation.move_reshape import RemoveCNVtoFCFlatten
 
-from qonnx.util.config import extract_model_config_to_json
-from finn.transformation.fpgadataflow.set_fifo_depths import (
-    InsertAndSetFIFODepths,
-    RemoveShallowFIFOs,
-    SplitLargeFIFOs,
-)
-from finn.transformation.fpgadataflow.insert_dwc import InsertDWC
-from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO
-
 
 def step_resnet50_tidy(model: ModelWrapper, cfg: DataflowBuildConfig):
     model = model.transform(GiveUniqueParameterTensors())
@@ -188,38 +174,28 @@ def step_resnet50_streamline(model: ModelWrapper, cfg: DataflowBuildConfig):
     return model
 
 
-def step_resnet50_convert_to_hls(model: ModelWrapper, cfg: DataflowBuildConfig):
+def step_resnet50_convert_to_hw(model: ModelWrapper, cfg: DataflowBuildConfig):
     model.set_tensor_datatype(model.graph.input[0].name, DataType["UINT8"])
     model = model.transform(InferDataLayouts())
-
-    #    try:
-    #        from finnexperimental.transformation.fpgadataflow.infer_doublepacked_dsp import (
-    #            InferDoublePackedConv,
-    #        )
-
-    #        model = model.transform(InferDoublePackedConv([1]))
-    #    except Exception:
-    #        print(" FINN Experimental not available. Using non-packed convolution ")
-
     model = model.transform(DoubleToSingleFloat())
     model = model.transform(InferDataTypes())
     model = model.transform(SortGraph())
 
-    to_hls_transformations = [
-        to_hls.InferAddStreamsLayer,
+    to_hw_transformations = [
+        to_hw.InferAddStreamsLayer,
         LowerConvsToMatMul,
-        to_hls.InferChannelwiseLinearLayer,
-        to_hls.InferPool_Batch,
+        to_hw.InferChannelwiseLinearLayer,
+        to_hw.InferPool,
         AbsorbTransposeIntoMultiThreshold,
         RoundAndClipThresholds,
-        to_hls.InferQuantizedMatrixVectorActivation,
-        to_hls.InferThresholdingLayer,
+        to_hw.InferQuantizedMatrixVectorActivation,
+        to_hw.InferThresholdingLayer,
         AbsorbConsecutiveTransposes,
-        to_hls.InferConvInpGen,
-        to_hls.InferDuplicateStreamsLayer,
-        to_hls.InferLabelSelectLayer,
+        to_hw.InferConvInpGen,
+        to_hw.InferDuplicateStreamsLayer,
+        to_hw.InferLabelSelectLayer,
     ]
-    for trn in to_hls_transformations:
+    for trn in to_hw_transformations:
         model = model.transform(trn())
         model = model.transform(InferDataLayouts())
         model = model.transform(GiveUniqueNodeNames())
@@ -233,64 +209,6 @@ def step_resnet50_convert_to_hls(model: ModelWrapper, cfg: DataflowBuildConfig):
     return model
 
 
-def step_resnet50_set_fifo_depths(model: ModelWrapper, cfg: DataflowBuildConfig):
-    """
-    Depending on the auto_fifo_depths setting, do one of the following:
-    * if auto_fifo_depths=True:  Run the `InsertAndSetFIFODepths` transformation
-    to attempt to determine the FIFO sizes that provide full throughput. Involves
-    running stitched-IP rtlsim and may take a long time.
-    * if auto_fifo_depths=False:  Assume the folding config file contains FIFO
-    sizes as well. Runs the `InsertFIFO` transformation, then
-    `ApplyConfig(cfg.folding_config_file)`, and finally `RemoveShallowFIFOs`.
-    Coherency with config file node naming is ensured by calling
-    `GiveUniqueNodeNames`.
-    """
-
-    if cfg.auto_fifo_depths:
-        model = model.transform(
-            InsertAndSetFIFODepths(
-                cfg._resolve_fpga_part(),
-                cfg._resolve_hls_clk_period(),
-                vivado_ram_style=cfg.large_fifo_mem_style.value,
-            )
-        )
-    else:
-        # assume folding cfg json contains FIFO sizes too
-        # insert DWCs, FIFOs and run ApplyConfig once more
-        model = model.transform(InsertDWC())
-        # need to make sure all FIFOs are created so that their depth can be
-        # set by ApplyConfig, so create_shallow_fifos=True
-        model = model.transform(InsertFIFO(create_shallow_fifos=True))
-        model = model.transform(GiveUniqueNodeNames())
-        model = model.transform(GiveReadableTensorNames())
-        if cfg.folding_config_file is not None:
-            model = model.transform(ApplyConfig(cfg.folding_config_file))
-    # split large FIFOs into multiple FIFOs
-    model = model.transform(SplitLargeFIFOs())
-    # remove any shallow FIFOs
-    model = model.transform(RemoveShallowFIFOs())
-
-    # extract the final configuration and save it as json
-    hw_attrs = [
-        "PE",
-        "SIMD",
-        "ram_style",
-        "depth",
-        "impl_style",
-        "resType",
-        "mem_mode",
-        "runtime_writeable_weights",
-    ]
-    extract_model_config_to_json(model, cfg.output_dir + "/final_hw_config.json", hw_attrs)
-
-    # after FIFOs are ready to go, call PrepareIP and HLSSynthIP again
-    # this will only run for the new nodes (e.g. FIFOs and DWCs)
-    model = model.transform(PrepareIP(cfg._resolve_fpga_part(), cfg._resolve_hls_clk_period()))
-    model = model.transform(HLSSynthIP())
-    model = model.transform(ReplaceVerilogRelPaths())
-    return model
-
-
 def step_resnet50_slr_floorplan(model: ModelWrapper, cfg: DataflowBuildConfig):
     if cfg.shell_flow_type == ShellFlowType.VITIS_ALVEO:
         try:
diff --git a/build/resnet50/folding_config/U250_folding_config.json b/build/resnet50/folding_config/U250_folding_config.json
index da4f7da..e25bfd8 100644
--- a/build/resnet50/folding_config/U250_folding_config.json
+++ b/build/resnet50/folding_config/U250_folding_config.json
@@ -1,616 +1,631 @@
 {
     "Defaults": {
-        "outFIFODepths":[[32],"all"],
-        "inFIFODepths":[[32],"all"],
-        "mem_mode":["decoupled",["MatrixVectorActivation"]]
-    },
-    "ConvDoublePacked_Batch_0": {
+        "outFIFODepths": [
+            [32],
+            "all"
+        ],
+        "inFIFODepths": [
+            [32],
+            "all"
+        ],
+        "mem_mode": [
+            "internal_decoupled",
+            [
+                "MVAU"
+            ]
+        ]
+    },
+    "FMPadding_hls_0": {
+        "SIMD": 3
+    },
+    "ConvolutionInputGenerator_hls_0": {
+        "SIMD": 3
+    },
+    "MVAU_hls_0": {
         "SIMD": 3,
-        "PE": 64,
-        "MMV": 16
+        "PE": 64
     },
-    "FMPadding_Batch_0": {
-        "SIMD": 64
+    "FMPadding_hls_1": {
+        "SIMD": 4
     },
-    "ConvolutionInputGenerator_0": {
-        "SIMD": 64
+    "ConvolutionInputGenerator_hls_1": {
+        "SIMD": 4
     },
-    "Pool_Batch_0": {
-        "PE": 64
+    "Pool_hls_0": {
+        "PE": 4
     },
-    "DuplicateStreams_Batch_0": {
-        "PE": 32,
+    "DuplicateStreams_hls_0": {
+        "PE": 2,
         "outFIFODepths": [32, 32]
     },
-    "MatrixVectorActivation_1": {
-        "PE": 32,
+    "MVAU_hls_2": {
+        "PE": 2,
         "SIMD": 32
     },
-    "MatrixVectorActivation_0": {
-        "PE": 8,
-        "SIMD": 32
+    "MVAU_hls_1": {
+        "PE": 1,
+        "SIMD": 16
     },
-    "FMPadding_Batch_1": {
-        "SIMD": 64
+    "FMPadding_hls_2": {
+        "SIMD": 4
     },
-    "ConvolutionInputGenerator_1": {
-        "SIMD": 64
+    "ConvolutionInputGenerator_hls_2": {
+        "SIMD": 4
     },
-    "MatrixVectorActivation_2": {
-        "PE": 32,
+    "MVAU_hls_3": {
+        "PE": 2,
         "SIMD": 64
     },
-    "MatrixVectorActivation_3": {
-        "PE": 32,
+    "MVAU_hls_4": {
+        "PE": 2,
         "SIMD": 32
     },
-    "AddStreams_Batch_0": {
-        "PE": 32,
+    "AddStreams_hls_0": {
+        "PE": 2,
         "inFIFODepths": [32, 32]
     },
-    "Thresholding_Batch_0": {
-        "PE": 32
+    "Thresholding_hls_0": {
+        "PE": 2
     },
-    "DuplicateStreams_Batch_1": {
-        "PE": 32,
+    "DuplicateStreams_hls_1": {
+        "PE": 2,
         "outFIFODepths": [32, 32]
     },
-    "Thresholding_Batch_1": {
-        "PE": 32
+    "Thresholding_hls_1": {
+        "PE": 2
     },
-    "Thresholding_Batch_2": {
-        "PE": 32
+    "Thresholding_hls_2": {
+        "PE": 2
     },
-    "MatrixVectorActivation_4": {
-        "PE": 32,
+    "MVAU_hls_5": {
+        "PE": 2,
         "SIMD": 32
     },
-    "FMPadding_Batch_2": {
-        "SIMD": 64
+    "FMPadding_hls_3": {
+        "SIMD": 4
     },
-    "ConvolutionInputGenerator_2": {
-        "SIMD": 64
+    "ConvolutionInputGenerator_hls_3": {
+        "SIMD": 4
     },
-    "MatrixVectorActivation_5": {
-        "PE": 32,
+    "MVAU_hls_6": {
+        "PE": 2,
         "SIMD": 64
     },
-    "MatrixVectorActivation_6": {
-        "PE": 32,
+    "MVAU_hls_7": {
+        "PE": 2,
         "SIMD": 32
     },
-    "AddStreams_Batch_1": {
-        "PE": 32,
+    "AddStreams_hls_1": {
+        "PE": 2,
         "inFIFODepths": [32, 32]
     },
-    "Thresholding_Batch_3": {
-        "PE": 32
+    "Thresholding_hls_3": {
+        "PE": 2
     },
-    "DuplicateStreams_Batch_2": {
-        "PE": 32,
+    "DuplicateStreams_hls_2": {
+        "PE": 2,
         "outFIFODepths": [32, 32]
     },
-    "Thresholding_Batch_4": {
-        "PE": 32
+    "Thresholding_hls_4": {
+        "PE": 2
     },
-    "Thresholding_Batch_5": {
-        "PE": 32
+    "Thresholding_hls_5": {
+        "PE": 2
     },
-    "MatrixVectorActivation_7": {
-        "PE": 32,
+    "MVAU_hls_8": {
+        "PE": 2,
         "SIMD": 32
     },
-    "FMPadding_Batch_3": {
-        "SIMD": 64
+    "FMPadding_hls_4": {
+        "SIMD": 4
     },
-    "ConvolutionInputGenerator_3": {
-        "SIMD": 64
+    "ConvolutionInputGenerator_hls_4": {
+        "SIMD": 4
     },
-    "MatrixVectorActivation_8": {
-        "PE": 32,
+    "MVAU_hls_9": {
+        "PE": 2,
         "SIMD": 64
     },
-    "MatrixVectorActivation_9": {
-        "PE": 32,
+    "MVAU_hls_10": {
+        "PE": 2,
         "SIMD": 64
     },
-    "AddStreams_Batch_2": {
-        "PE":32,
+    "AddStreams_hls_2": {
+        "PE": 2,
         "inFIFODepths": [32, 32]
     },
-    "Thresholding_Batch_6": {
-        "PE": 32
+    "Thresholding_hls_6": {
+        "PE": 2
     },
-    "Thresholding_Batch_7": {
-        "PE": 32
+    "Thresholding_hls_7": {
+        "PE": 2
     },
-    "DuplicateStreams_Batch_3": {
-        "PE": 32,
+    "DuplicateStreams_hls_3": {
+        "PE": 2,
         "outFIFODepths": [32, 32]
     },
-    "DownSampler_0": {
-        "SIMD": 64
+    "DownSampler_hls_0": {
+        "SIMD": 4
     },
-    "MatrixVectorActivation_10": {
-        "PE": 32,
+    "MVAU_hls_11": {
+        "PE": 2,
         "SIMD": 64
     },
-    "MatrixVectorActivation_11": {
-        "PE": 32,
+    "MVAU_hls_12": {
+        "PE": 2,
         "SIMD": 64
     },
-    "FMPadding_Batch_4": {
-        "SIMD": 64
+    "FMPadding_hls_5": {
+        "SIMD": 4
     },
-    "ConvolutionInputGenerator_4": {
-        "SIMD": 64
+    "ConvolutionInputGenerator_hls_5": {
+        "SIMD": 4
     },
-    "MatrixVectorActivation_12": {
-        "PE": 32,
+    "MVAU_hls_13": {
+        "PE": 2,
         "SIMD": 64
     },
-    "MatrixVectorActivation_13": {
-        "PE": 32,
+    "MVAU_hls_14": {
+        "PE": 2,
         "SIMD": 32
     },
-    "AddStreams_Batch_3": {
-        "PE":32,
+    "AddStreams_hls_3": {
+        "PE": 2,
         "inFIFODepths": [32, 32]
     },
-    "Thresholding_Batch_8": {
-        "PE": 32
+    "Thresholding_hls_8": {
+        "PE": 2
     },
-    "DuplicateStreams_Batch_4": {
-        "PE": 32,
+    "DuplicateStreams_hls_4": {
+        "PE": 2,
         "outFIFODepths": [32, 32]
     },
-    "Thresholding_Batch_9": {
-        "PE": 32
+    "Thresholding_hls_9": {
+        "PE": 2
     },
-    "Thresholding_Batch_10": {
-        "PE": 32
+    "Thresholding_hls_10": {
+        "PE": 2
     },
-    "MatrixVectorActivation_14": {
-        "PE": 32,
+    "MVAU_hls_15": {
+        "PE": 2,
         "SIMD": 32
     },
-    "FMPadding_Batch_5": {
-        "SIMD": 64
+    "FMPadding_hls_6": {
+        "SIMD": 4
     },
-    "ConvolutionInputGenerator_5": {
-        "SIMD": 64
+    "ConvolutionInputGenerator_hls_6": {
+        "SIMD": 4
     },
-    "MatrixVectorActivation_15": {
-        "PE": 32,
+    "MVAU_hls_16": {
+        "PE": 2,
         "SIMD": 64
     },
-    "MatrixVectorActivation_16": {
-        "PE": 32,
+    "MVAU_hls_17": {
+        "PE": 2,
         "SIMD": 32
     },
-    "AddStreams_Batch_4": {
-        "PE":32,
+    "AddStreams_hls_4": {
+        "PE": 2,
         "inFIFODepths": [32, 32]
     },
-    "Thresholding_Batch_11": {
-        "PE": 32
+    "Thresholding_hls_11": {
+        "PE": 2
     },
-    "DuplicateStreams_Batch_5": {
-        "PE": 32,
+    "DuplicateStreams_hls_5": {
+        "PE": 2,
         "outFIFODepths": [32, 32]
     },
-    "Thresholding_Batch_12": {
-        "PE": 32
+    "Thresholding_hls_12": {
+        "PE": 2
     },
-    "Thresholding_Batch_13": {
-        "PE": 32
+    "Thresholding_hls_13": {
+        "PE": 2
     },
-    "MatrixVectorActivation_17": {
-        "PE": 32,
+    "MVAU_hls_18": {
+        "PE": 2,
         "SIMD": 32
     },
-    "FMPadding_Batch_6": {
-        "SIMD": 64
+    "FMPadding_hls_7": {
+        "SIMD": 4
     },
-    "ConvolutionInputGenerator_6": {
-        "SIMD": 64
+    "ConvolutionInputGenerator_hls_7": {
+        "SIMD": 4
     },
-    "MatrixVectorActivation_18": {
-        "PE": 32,
+    "MVAU_hls_19": {
+        "PE": 2,
         "SIMD": 64
     },
-    "MatrixVectorActivation_19": {
-        "PE": 32,
+    "MVAU_hls_20": {
+        "PE": 2,
         "SIMD": 32
     },
-    "AddStreams_Batch_5": {
-        "PE":32,
+    "AddStreams_hls_5": {
+        "PE": 2,
         "inFIFODepths": [32, 32]
     },
-    "Thresholding_Batch_14": {
-        "PE": 32
+    "Thresholding_hls_14": {
+        "PE": 2
     },
-    "DuplicateStreams_Batch_6": {
-        "PE": 32,
+    "DuplicateStreams_hls_6": {
+        "PE": 2,
         "outFIFODepths": [32, 32]
     },
-    "Thresholding_Batch_15": {
-        "PE": 32
+    "Thresholding_hls_15": {
+        "PE": 2
     },
-    "Thresholding_Batch_16": {
-        "PE": 32
+    "Thresholding_hls_16": {
+        "PE": 2
     },
-    "MatrixVectorActivation_20": {
-        "PE": 32,
+    "MVAU_hls_21": {
+        "PE": 2,
         "SIMD": 32
     },
-    "FMPadding_Batch_7": {
-        "SIMD": 64
+    "FMPadding_hls_8": {
+        "SIMD": 4
     },
-    "ConvolutionInputGenerator_7": {
-        "SIMD": 64
+    "ConvolutionInputGenerator_hls_8": {
+        "SIMD": 4
     },
-    "MatrixVectorActivation_21": {
-        "PE": 32,
+    "MVAU_hls_22": {
+        "PE": 2,
         "SIMD": 64
     },
-    "MatrixVectorActivation_22": {
-        "PE": 32,
+    "MVAU_hls_23": {
+        "PE": 2,
         "SIMD": 32
     },
-    "AddStreams_Batch_6": {
-        "PE":32,
+    "AddStreams_hls_6": {
+        "PE": 2,
         "inFIFODepths": [32, 32]
     },
-    "Thresholding_Batch_17": {
-        "PE": 32
+    "Thresholding_hls_17": {
+        "PE": 2
     },
-    "Thresholding_Batch_18": {
-        "PE": 32
+    "Thresholding_hls_18": {
+        "PE": 2
     },
-    "DuplicateStreams_Batch_7": {
-        "PE": 32,
+    "DuplicateStreams_hls_7": {
+        "PE": 2,
         "outFIFODepths": [32, 32]
     },
-    "DownSampler_1": {
-        "SIMD": 64
+    "DownSampler_hls_1": {
+        "SIMD": 4
     },
-    "MatrixVectorActivation_23": {
-        "PE": 32,
+    "MVAU_hls_24": {
+        "PE": 2,
         "SIMD": 64
     },
-    "MatrixVectorActivation_24": {
-        "PE": 32,
+    "MVAU_hls_25": {
+        "PE": 2,
         "SIMD": 64
     },
-    "FMPadding_Batch_8": {
-        "SIMD": 64
+    "FMPadding_hls_9": {
+        "SIMD": 4
     },
-    "ConvolutionInputGenerator_8": {
-        "SIMD": 64
+    "ConvolutionInputGenerator_hls_9": {
+        "SIMD": 4
     },
-    "MatrixVectorActivation_25": {
-        "PE": 32,
+    "MVAU_hls_26": {
+        "PE": 2,
         "SIMD": 64
     },
-    "MatrixVectorActivation_26": {
-        "PE": 32,
+    "MVAU_hls_27": {
+        "PE": 2,
         "SIMD": 32
     },
-    "AddStreams_Batch_7": {
-        "PE":32,
+    "AddStreams_hls_7": {
+        "PE": 2,
         "inFIFODepths": [32, 32]
     },
-    "Thresholding_Batch_19": {
-        "PE": 32
+    "Thresholding_hls_19": {
+        "PE": 2
     },
-    "DuplicateStreams_Batch_8": {
-        "PE": 32,
+    "DuplicateStreams_hls_8": {
+        "PE": 2,
         "outFIFODepths": [32, 32]
     },
-    "Thresholding_Batch_20": {
-        "PE": 32
+    "Thresholding_hls_20": {
+        "PE": 2
     },
-    "Thresholding_Batch_21": {
-        "PE": 32
+    "Thresholding_hls_21": {
+        "PE": 2
     },
-    "MatrixVectorActivation_27": {
-        "PE": 32,
+    "MVAU_hls_28": {
+        "PE": 2,
         "SIMD": 32
     },
-    "FMPadding_Batch_9": {
-        "SIMD": 64
+    "FMPadding_hls_10": {
+        "SIMD": 4
     },
-    "ConvolutionInputGenerator_9": {
-        "SIMD": 64
+    "ConvolutionInputGenerator_hls_10": {
+        "SIMD": 4
     },
-    "MatrixVectorActivation_28": {
-        "PE": 32,
+    "MVAU_hls_29": {
+        "PE": 2,
         "SIMD": 64
     },
-    "MatrixVectorActivation_29": {
-        "PE": 32,
+    "MVAU_hls_30": {
+        "PE": 2,
         "SIMD": 32
     },
-    "AddStreams_Batch_8": {
-        "PE":32,
+    "AddStreams_hls_8": {
+        "PE": 2,
         "inFIFODepths": [32, 32]
     },
-    "Thresholding_Batch_22": {
-        "PE": 32
+    "Thresholding_hls_22": {
+        "PE": 2
     },
-    "DuplicateStreams_Batch_9": {
-        "PE": 32,
+    "DuplicateStreams_hls_9": {
+        "PE": 2,
         "outFIFODepths": [32, 32]
     },
-    "Thresholding_Batch_23": {
-        "PE": 32
+    "Thresholding_hls_23": {
+        "PE": 2
     },
-    "Thresholding_Batch_24": {
-        "PE": 32
+    "Thresholding_hls_24": {
+        "PE": 2
     },
-    "MatrixVectorActivation_30": {
-        "PE": 32,
+    "MVAU_hls_31": {
+        "PE": 2,
         "SIMD": 32
     },
-    "FMPadding_Batch_10": {
-        "SIMD": 64
+    "FMPadding_hls_11": {
+        "SIMD": 4
     },
-    "ConvolutionInputGenerator_10": {
-        "SIMD": 64
+    "ConvolutionInputGenerator_hls_11": {
+        "SIMD": 4
     },
-    "MatrixVectorActivation_31": {
-        "PE": 32,
+    "MVAU_hls_32": {
+        "PE": 2,
         "SIMD": 64
     },
-    "MatrixVectorActivation_32": {
-        "PE": 32,
+    "MVAU_hls_33": {
+        "PE": 2,
         "SIMD": 32
     },
-    "AddStreams_Batch_9": {
-        "PE":32,
+    "AddStreams_hls_9": {
+        "PE": 2,
         "inFIFODepths": [32, 32]
     },
-    "Thresholding_Batch_25": {
-        "PE": 32
+    "Thresholding_hls_25": {
+        "PE": 2
     },
-    "DuplicateStreams_Batch_10": {
-        "PE": 32,
+    "DuplicateStreams_hls_10": {
+        "PE": 2,
         "outFIFODepths": [32, 32]
     },
-    "Thresholding_Batch_26": {
-        "PE": 32
+    "Thresholding_hls_26": {
+        "PE": 2
     },
-    "Thresholding_Batch_27": {
-        "PE": 32
+    "Thresholding_hls_27": {
+        "PE": 2
     },
-    "MatrixVectorActivation_33": {
-        "PE": 32,
+    "MVAU_hls_34": {
+        "PE": 2,
         "SIMD": 32
     },
-    "FMPadding_Batch_11": {
-        "SIMD": 64
+    "FMPadding_hls_12": {
+        "SIMD": 4
     },
-    "ConvolutionInputGenerator_11": {
-        "SIMD": 64
+    "ConvolutionInputGenerator_hls_12": {
+        "SIMD": 4
     },
-    "MatrixVectorActivation_34": {
-        "PE": 32,
+    "MVAU_hls_35": {
+        "PE": 2,
         "SIMD": 64
     },
-    "MatrixVectorActivation_35": {
-        "PE": 32,
+    "MVAU_hls_36": {
+        "PE": 2,
         "SIMD": 32
     },
-    "AddStreams_Batch_10": {
-        "PE":32,
+    "AddStreams_hls_10": {
+        "PE": 2,
         "inFIFODepths": [32, 32]
     },
-    "Thresholding_Batch_28": {
-        "PE": 32
+    "Thresholding_hls_28": {
+        "PE": 2
     },
-    "DuplicateStreams_Batch_11": {
-        "PE": 32,
+    "DuplicateStreams_hls_11": {
+        "PE": 2,
         "outFIFODepths": [32, 32]
     },
-    "Thresholding_Batch_29": {
-        "PE": 32
+    "Thresholding_hls_29": {
+        "PE": 2
     },
-    "Thresholding_Batch_30": {
-        "PE": 32
+    "Thresholding_hls_30": {
+        "PE": 2
     },
-    "MatrixVectorActivation_36": {
-        "PE": 32,
+    "MVAU_hls_37": {
+        "PE": 2,
         "SIMD": 32
     },
-    "FMPadding_Batch_12": {
-        "SIMD": 64
+    "FMPadding_hls_13": {
+        "SIMD": 4
     },
-    "ConvolutionInputGenerator_12": {
-        "SIMD": 64
+    "ConvolutionInputGenerator_hls_13": {
+        "SIMD": 4
     },
-    "MatrixVectorActivation_37": {
-        "PE": 32,
+    "MVAU_hls_38": {
+        "PE": 2,
         "SIMD": 64
     },
-    "MatrixVectorActivation_38": {
-        "PE": 32,
+    "MVAU_hls_39": {
+        "PE": 2,
         "SIMD": 32
     },
-    "AddStreams_Batch_11": {
-        "PE":32,
+    "AddStreams_hls_11": {
+        "PE": 2,
         "inFIFODepths": [32, 32]
     },
-    "Thresholding_Batch_31": {
-        "PE": 32
+    "Thresholding_hls_31": {
+        "PE": 2
     },
-    "DuplicateStreams_Batch_12": {
-        "PE": 32,
+    "DuplicateStreams_hls_12": {
+        "PE": 2,
         "outFIFODepths": [32, 32]
     },
-    "Thresholding_Batch_32": {
-        "PE": 32
+    "Thresholding_hls_32": {
+        "PE": 2
     },
-    "Thresholding_Batch_33": {
-        "PE": 32
+    "Thresholding_hls_33": {
+        "PE": 2
     },
-    "MatrixVectorActivation_39": {
-        "PE": 32,
+    "MVAU_hls_40": {
+        "PE": 2,
         "SIMD": 32
     },
-    "FMPadding_Batch_13": {
-        "SIMD": 64
+    "FMPadding_hls_14": {
+        "SIMD": 4
     },
-    "ConvolutionInputGenerator_13": {
-        "SIMD": 64
+    "ConvolutionInputGenerator_hls_14": {
+        "SIMD": 4
     },
-    "MatrixVectorActivation_40": {
-        "PE": 32,
+    "MVAU_hls_41": {
+        "PE": 2,
         "SIMD": 64
     },
-    "MatrixVectorActivation_41": {
-        "PE": 32,
+    "MVAU_hls_42": {
+        "PE": 2,
         "SIMD": 32
     },
-    "AddStreams_Batch_12": {
-        "PE":32,
+    "AddStreams_hls_12": {
+        "PE": 2,
         "inFIFODepths": [32, 32]
     },
-    "Thresholding_Batch_34": {
-        "PE": 32
+    "Thresholding_hls_34": {
+        "PE": 2
     },
-    "Thresholding_Batch_35": {
-        "PE": 32
+    "Thresholding_hls_35": {
+        "PE": 2
     },
-    "DuplicateStreams_Batch_13": {
-        "PE": 32,
+    "DuplicateStreams_hls_13": {
+        "PE": 2,
         "outFIFODepths": [32, 32]
     },
-    "DownSampler_2": {
-        "SIMD": 64
+    "DownSampler_hls_2": {
+        "SIMD": 4
     },
-    "MatrixVectorActivation_42": {
-        "PE": 32,
+    "MVAU_hls_43": {
+        "PE": 2,
         "SIMD": 64
     },
-    "MatrixVectorActivation_43": {
-        "PE": 32,
+    "MVAU_hls_44": {
+        "PE": 2,
         "SIMD": 64
     },
-    "FMPadding_Batch_14": {
-        "SIMD": 64
+    "FMPadding_hls_15": {
+        "SIMD": 4
     },
-    "ConvolutionInputGenerator_14": {
-        "SIMD": 64
+    "ConvolutionInputGenerator_hls_15": {
+        "SIMD": 4
     },
-    "MatrixVectorActivation_44": {
-        "PE": 32,
+    "MVAU_hls_45": {
+        "PE": 2,
         "SIMD": 64
     },
-    "MatrixVectorActivation_45": {
-        "PE": 32,
+    "MVAU_hls_46": {
+        "PE": 2,
         "SIMD": 32
     },
-    "AddStreams_Batch_13": {
-        "PE":32,
+    "AddStreams_hls_13": {
+        "PE": 2,
         "inFIFODepths": [32, 32]
     },
-    "Thresholding_Batch_36": {
-        "PE": 32
+    "Thresholding_hls_36": {
+        "PE": 2
     },
-    "DuplicateStreams_Batch_14": {
-        "PE": 32,
+    "DuplicateStreams_hls_14": {
+        "PE": 2,
         "outFIFODepths": [32, 32]
     },
-    "Thresholding_Batch_37": {
-        "PE": 32
+    "Thresholding_hls_37": {
+        "PE": 2
     },
-    "Thresholding_Batch_38": {
-        "PE": 32
+    "Thresholding_hls_38": {
+        "PE": 2
     },
-    "MatrixVectorActivation_46": {
-        "PE": 32,
+    "MVAU_hls_47": {
+        "PE": 2,
         "SIMD": 32
     },
-    "FMPadding_Batch_15": {
-        "SIMD": 64
+    "FMPadding_hls_16": {
+        "SIMD": 4
     },
-    "ConvolutionInputGenerator_15": {
-        "SIMD": 64
+    "ConvolutionInputGenerator_hls_16": {
+        "SIMD": 4
     },
-    "MatrixVectorActivation_47": {
-        "PE": 32,
+    "MVAU_hls_48": {
+        "PE": 2,
         "SIMD": 64
     },
-    "MatrixVectorActivation_48": {
-        "PE": 32,
+    "MVAU_hls_49": {
+        "PE": 2,
         "SIMD": 32
     },
-    "AddStreams_Batch_14": {
-        "PE":32,
+    "AddStreams_hls_14": {
+        "PE": 2,
         "inFIFODepths": [32, 32]
     },
-    "Thresholding_Batch_39": {
-        "PE": 32
+    "Thresholding_hls_39": {
+        "PE": 2
     },
-    "DuplicateStreams_Batch_15": {
-        "PE": 32,
+    "DuplicateStreams_hls_15": {
+        "PE": 2,
         "outFIFODepths": [32, 32]
     },
-    "Thresholding_Batch_40": {
-        "PE": 32
+    "Thresholding_hls_40": {
+        "PE": 2
     },
-    "Thresholding_Batch_41": {
-        "PE": 32
+    "Thresholding_hls_41": {
+        "PE": 2
     },
-    "MatrixVectorActivation_49": {
-        "PE": 32,
+    "MVAU_hls_50": {
+        "PE": 2,
         "SIMD": 32
     },
-    "FMPadding_Batch_16": {
-        "SIMD": 64
+    "FMPadding_hls_17": {
+        "SIMD": 4
     },
-    "ConvolutionInputGenerator_16": {
-        "SIMD": 64
+    "ConvolutionInputGenerator_hls_17": {
+        "SIMD": 4
     },
-    "MatrixVectorActivation_50": {
-        "PE": 32,
+    "MVAU_hls_51": {
+        "PE": 2,
         "SIMD": 64
     },
-    "MatrixVectorActivation_51": {
-        "PE": 32,
+    "MVAU_hls_52": {
+        "PE": 2,
         "SIMD": 32
     },
-    "AddStreams_Batch_15": {
-        "PE":32,
+    "AddStreams_hls_15": {
+        "PE": 2,
         "inFIFODepths": [32, 32]
     },
-    "Thresholding_Batch_42": {
-        "PE": 32
+    "Thresholding_hls_42": {
+        "PE": 2
     },
-
-    "ConvolutionInputGenerator_17": {
-        "SIMD": 64
+    "ConvolutionInputGenerator_hls_18": {
+        "SIMD": 4
     },
-    "Pool_Batch_1": {
-        "PE": 64
+    "Pool_hls_1": {
+        "PE": 4
     },
-    "MatrixVectorActivation_52": {
+    "MVAU_hls_53": {
         "PE": 1,
-        "SIMD": 64,
-        "mem_mode" : "external"
+        "SIMD": 4,
+        "mem_mode": "external"
     },
-    "LabelSelect_Batch_0": {
-        "outputDataType":"UINT16",
+    "LabelSelect_hls_0": {
+        "outputDataType": "UINT16",
         "PE": 1
     },
-    "ChannelwiseOp_Batch_0": {
-        "PE": 32
+    "ChannelwiseOp_hls_0": {
+        "PE": 2
     },
-	"ChannelwiseOp_Batch_1": {
-        "PE": 32
+    "ChannelwiseOp_hls_1": {
+        "PE": 2
     },
-	"ChannelwiseOp_Batch_2": {
+    "ChannelwiseOp_hls_2": {
         "PE": 1
     }
 }
diff --git a/build/resnet50/folding_config/U250_folding_config_no_doublepack_pe_folded_16.json b/build/resnet50/folding_config/U250_folding_config_no_doublepack_pe_folded_16.json
deleted file mode 100644
index 09aa2dc..0000000
--- a/build/resnet50/folding_config/U250_folding_config_no_doublepack_pe_folded_16.json
+++ /dev/null
@@ -1,631 +0,0 @@
-{
-    "Defaults": {
-        "outFIFODepths": [
-            [32],
-            "all"
-        ],
-        "inFIFODepths": [
-            [32],
-            "all"
-        ],
-        "mem_mode": [
-            "decoupled",
-            [
-                "MatrixVectorActivation"
-            ]
-        ]
-    },
-    "FMPadding_Batch_0": {
-        "SIMD": 3
-    },
-    "ConvolutionInputGenerator_0": {
-        "SIMD": 3
-    },
-    "MatrixVectorActivation_0": {
-        "SIMD": 3,
-        "PE": 64
-    },
-    "FMPadding_Batch_1": {
-        "SIMD": 4
-    },
-    "ConvolutionInputGenerator_1": {
-        "SIMD": 4
-    },
-    "Pool_Batch_0": {
-        "PE": 4
-    },
-    "DuplicateStreams_Batch_0": {
-        "PE": 2,
-        "outFIFODepths": [32, 32]
-    },
-    "MatrixVectorActivation_2": {
-        "PE": 2,
-        "SIMD": 32
-    },
-    "MatrixVectorActivation_1": {
-        "PE": 1,
-        "SIMD": 16
-    },
-    "FMPadding_Batch_2": {
-        "SIMD": 4
-    },
-    "ConvolutionInputGenerator_2": {
-        "SIMD": 4
-    },
-    "MatrixVectorActivation_3": {
-        "PE": 2,
-        "SIMD": 64
-    },
-    "MatrixVectorActivation_4": {
-        "PE": 2,
-        "SIMD": 32
-    },
-    "AddStreams_Batch_0": {
-        "PE": 2,
-        "inFIFODepths": [32, 32]
-    },
-    "Thresholding_Batch_0": {
-        "PE": 2
-    },
-    "DuplicateStreams_Batch_1": {
-        "PE": 2,
-        "outFIFODepths": [32, 32]
-    },
-    "Thresholding_Batch_1": {
-        "PE": 2
-    },
-    "Thresholding_Batch_2": {
-        "PE": 2
-    },
-    "MatrixVectorActivation_5": {
-        "PE": 2,
-        "SIMD": 32
-    },
-    "FMPadding_Batch_3": {
-        "SIMD": 4
-    },
-    "ConvolutionInputGenerator_3": {
-        "SIMD": 4
-    },
-    "MatrixVectorActivation_6": {
-        "PE": 2,
-        "SIMD": 64
-    },
-    "MatrixVectorActivation_7": {
-        "PE": 2,
-        "SIMD": 32
-    },
-    "AddStreams_Batch_1": {
-        "PE": 2,
-        "inFIFODepths": [32, 32]
-    },
-    "Thresholding_Batch_3": {
-        "PE": 2
-    },
-    "DuplicateStreams_Batch_2": {
-        "PE": 2,
-        "outFIFODepths": [32, 32]
-    },
-    "Thresholding_Batch_4": {
-        "PE": 2
-    },
-    "Thresholding_Batch_5": {
-        "PE": 2
-    },
-    "MatrixVectorActivation_8": {
-        "PE": 2,
-        "SIMD": 32
-    },
-    "FMPadding_Batch_4": {
-        "SIMD": 4
-    },
-    "ConvolutionInputGenerator_4": {
-        "SIMD": 4
-    },
-    "MatrixVectorActivation_9": {
-        "PE": 2,
-        "SIMD": 64
-    },
-    "MatrixVectorActivation_10": {
-        "PE": 2,
-        "SIMD": 64
-    },
-    "AddStreams_Batch_2": {
-        "PE": 2,
-        "inFIFODepths": [32, 32]
-    },
-    "Thresholding_Batch_6": {
-        "PE": 2
-    },
-    "Thresholding_Batch_7": {
-        "PE": 2
-    },
-    "DuplicateStreams_Batch_3": {
-        "PE": 2,
-        "outFIFODepths": [32, 32]
-    },
-    "DownSampler_0": {
-        "SIMD": 4
-    },
-    "MatrixVectorActivation_11": {
-        "PE": 2,
-        "SIMD": 64
-    },
-    "MatrixVectorActivation_12": {
-        "PE": 2,
-        "SIMD": 64
-    },
-    "FMPadding_Batch_5": {
-        "SIMD": 4
-    },
-    "ConvolutionInputGenerator_5": {
-        "SIMD": 4
-    },
-    "MatrixVectorActivation_13": {
-        "PE": 2,
-        "SIMD": 64
-    },
-    "MatrixVectorActivation_14": {
-        "PE": 2,
-        "SIMD": 32
-    },
-    "AddStreams_Batch_3": {
-        "PE": 2,
-        "inFIFODepths": [32, 32]
-    },
-    "Thresholding_Batch_8": {
-        "PE": 2
-    },
-    "DuplicateStreams_Batch_4": {
-        "PE": 2,
-        "outFIFODepths": [32, 32]
-    },
-    "Thresholding_Batch_9": {
-        "PE": 2
-    },
-    "Thresholding_Batch_10": {
-        "PE": 2
-    },
-    "MatrixVectorActivation_15": {
-        "PE": 2,
-        "SIMD": 32
-    },
-    "FMPadding_Batch_6": {
-        "SIMD": 4
-    },
-    "ConvolutionInputGenerator_6": {
-        "SIMD": 4
-    },
-    "MatrixVectorActivation_16": {
-        "PE": 2,
-        "SIMD": 64
-    },
-    "MatrixVectorActivation_17": {
-        "PE": 2,
-        "SIMD": 32
-    },
-    "AddStreams_Batch_4": {
-        "PE": 2,
-        "inFIFODepths": [32, 32]
-    },
-    "Thresholding_Batch_11": {
-        "PE": 2
-    },
-    "DuplicateStreams_Batch_5": {
-        "PE": 2,
-        "outFIFODepths": [32, 32]
-    },
-    "Thresholding_Batch_12": {
-        "PE": 2
-    },
-    "Thresholding_Batch_13": {
-        "PE": 2
-    },
-    "MatrixVectorActivation_18": {
-        "PE": 2,
-        "SIMD": 32
-    },
-    "FMPadding_Batch_7": {
-        "SIMD": 4
-    },
-    "ConvolutionInputGenerator_7": {
-        "SIMD": 4
-    },
-    "MatrixVectorActivation_19": {
-        "PE": 2,
-        "SIMD": 64
-    },
-    "MatrixVectorActivation_20": {
-        "PE": 2,
-        "SIMD": 32
-    },
-    "AddStreams_Batch_5": {
-        "PE": 2,
-        "inFIFODepths": [32, 32]
-    },
-    "Thresholding_Batch_14": {
-        "PE": 2
-    },
-    "DuplicateStreams_Batch_6": {
-        "PE": 2,
-        "outFIFODepths": [32, 32]
-    },
-    "Thresholding_Batch_15": {
-        "PE": 2
-    },
-    "Thresholding_Batch_16": {
-        "PE": 2
-    },
-    "MatrixVectorActivation_21": {
-        "PE": 2,
-        "SIMD": 32
-    },
-    "FMPadding_Batch_8": {
-        "SIMD": 4
-    },
-    "ConvolutionInputGenerator_8": {
-        "SIMD": 4
-    },
-    "MatrixVectorActivation_22": {
-        "PE": 2,
-        "SIMD": 64
-    },
-    "MatrixVectorActivation_23": {
-        "PE": 2,
-        "SIMD": 32
-    },
-    "AddStreams_Batch_6": {
-        "PE": 2,
-        "inFIFODepths": [32, 32]
-    },
-    "Thresholding_Batch_17": {
-        "PE": 2
-    },
-    "Thresholding_Batch_18": {
-        "PE": 2
-    },
-    "DuplicateStreams_Batch_7": {
-        "PE": 2,
-        "outFIFODepths": [32, 32]
-    },
-    "DownSampler_1": {
-        "SIMD": 4
-    },
-    "MatrixVectorActivation_24": {
-        "PE": 2,
-        "SIMD": 64
-    },
-    "MatrixVectorActivation_25": {
-        "PE": 2,
-        "SIMD": 64
-    },
-    "FMPadding_Batch_9": {
-        "SIMD": 4
-    },
-    "ConvolutionInputGenerator_9": {
-        "SIMD": 4
-    },
-    "MatrixVectorActivation_26": {
-        "PE": 2,
-        "SIMD": 64
-    },
-    "MatrixVectorActivation_27": {
-        "PE": 2,
-        "SIMD": 32
-    },
-    "AddStreams_Batch_7": {
-        "PE": 2,
-        "inFIFODepths": [32, 32]
-    },
-    "Thresholding_Batch_19": {
-        "PE": 2
-    },
-    "DuplicateStreams_Batch_8": {
-        "PE": 2,
-        "outFIFODepths": [32, 32]
-    },
-    "Thresholding_Batch_20": {
-        "PE": 2
-    },
-    "Thresholding_Batch_21": {
-        "PE": 2
-    },
-    "MatrixVectorActivation_28": {
-        "PE": 2,
-        "SIMD": 32
-    },
-    "FMPadding_Batch_10": {
-        "SIMD": 4
-    },
-    "ConvolutionInputGenerator_10": {
-        "SIMD": 4
-    },
-    "MatrixVectorActivation_29": {
-        "PE": 2,
-        "SIMD": 64
-    },
-    "MatrixVectorActivation_30": {
-        "PE": 2,
-        "SIMD": 32
-    },
-    "AddStreams_Batch_8": {
-        "PE": 2,
-        "inFIFODepths": [32, 32]
-    },
-    "Thresholding_Batch_22": {
-        "PE": 2
-    },
-    "DuplicateStreams_Batch_9": {
-        "PE": 2,
-        "outFIFODepths": [32, 32]
-    },
-    "Thresholding_Batch_23": {
-        "PE": 2
-    },
-    "Thresholding_Batch_24": {
-        "PE": 2
-    },
-    "MatrixVectorActivation_31": {
-        "PE": 2,
-        "SIMD": 32
-    },
-    "FMPadding_Batch_11": {
-        "SIMD": 4
-    },
-    "ConvolutionInputGenerator_11": {
-        "SIMD": 4
-    },
-    "MatrixVectorActivation_32": {
-        "PE": 2,
-        "SIMD": 64
-    },
-    "MatrixVectorActivation_33": {
-        "PE": 2,
-        "SIMD": 32
-    },
-    "AddStreams_Batch_9": {
-        "PE": 2,
-        "inFIFODepths": [32, 32]
-    },
-    "Thresholding_Batch_25": {
-        "PE": 2
-    },
-    "DuplicateStreams_Batch_10": {
-        "PE": 2,
-        "outFIFODepths": [32, 32]
-    },
-    "Thresholding_Batch_26": {
-        "PE": 2
-    },
-    "Thresholding_Batch_27": {
-        "PE": 2
-    },
-    "MatrixVectorActivation_34": {
-        "PE": 2,
-        "SIMD": 32
-    },
-    "FMPadding_Batch_12": {
-        "SIMD": 4
-    },
-    "ConvolutionInputGenerator_12": {
-        "SIMD": 4
-    },
-    "MatrixVectorActivation_35": {
-        "PE": 2,
-        "SIMD": 64
-    },
-    "MatrixVectorActivation_36": {
-        "PE": 2,
-        "SIMD": 32
-    },
-    "AddStreams_Batch_10": {
-        "PE": 2,
-        "inFIFODepths": [32, 32]
-    },
-    "Thresholding_Batch_28": {
-        "PE": 2
-    },
-    "DuplicateStreams_Batch_11": {
-        "PE": 2,
-        "outFIFODepths": [32, 32]
-    },
-    "Thresholding_Batch_29": {
-        "PE": 2
-    },
-    "Thresholding_Batch_30": {
-        "PE": 2
-    },
-    "MatrixVectorActivation_37": {
-        "PE": 2,
-        "SIMD": 32
-    },
-    "FMPadding_Batch_13": {
-        "SIMD": 4
-    },
-    "ConvolutionInputGenerator_13": {
-        "SIMD": 4
-    },
-    "MatrixVectorActivation_38": {
-        "PE": 2,
-        "SIMD": 64
-    },
-    "MatrixVectorActivation_39": {
-        "PE": 2,
-        "SIMD": 32
-    },
-    "AddStreams_Batch_11": {
-        "PE": 2,
-        "inFIFODepths": [32, 32]
-    },
-    "Thresholding_Batch_31": {
-        "PE": 2
-    },
-    "DuplicateStreams_Batch_12": {
-        "PE": 2,
-        "outFIFODepths": [32, 32]
-    },
-    "Thresholding_Batch_32": {
-        "PE": 2
-    },
-    "Thresholding_Batch_33": {
-        "PE": 2
-    },
-    "MatrixVectorActivation_40": {
-        "PE": 2,
-        "SIMD": 32
-    },
-    "FMPadding_Batch_14": {
-        "SIMD": 4
-    },
-    "ConvolutionInputGenerator_14": {
-        "SIMD": 4
-    },
-    "MatrixVectorActivation_41": {
-        "PE": 2,
-        "SIMD": 64
-    },
-    "MatrixVectorActivation_42": {
-        "PE": 2,
-        "SIMD": 32
-    },
-    "AddStreams_Batch_12": {
-        "PE": 2,
-        "inFIFODepths": [32, 32]
-    },
-    "Thresholding_Batch_34": {
-        "PE": 2
-    },
-    "Thresholding_Batch_35": {
-        "PE": 2
-    },
-    "DuplicateStreams_Batch_13": {
-        "PE": 2,
-        "outFIFODepths": [32, 32]
-    },
-    "DownSampler_2": {
-        "SIMD": 4
-    },
-    "MatrixVectorActivation_43": {
-        "PE": 2,
-        "SIMD": 64
-    },
-    "MatrixVectorActivation_44": {
-        "PE": 2,
-        "SIMD": 64
-    },
-    "FMPadding_Batch_15": {
-        "SIMD": 4
-    },
-    "ConvolutionInputGenerator_15": {
-        "SIMD": 4
-    },
-    "MatrixVectorActivation_45": {
-        "PE": 2,
-        "SIMD": 64
-    },
-    "MatrixVectorActivation_46": {
-        "PE": 2,
-        "SIMD": 32
-    },
-    "AddStreams_Batch_13": {
-        "PE": 2,
-        "inFIFODepths": [32, 32]
-    },
-    "Thresholding_Batch_36": {
-        "PE": 2
-    },
-    "DuplicateStreams_Batch_14": {
-        "PE": 2,
-        "outFIFODepths": [32, 32]
-    },
-    "Thresholding_Batch_37": {
-        "PE": 2
-    },
-    "Thresholding_Batch_38": {
-        "PE": 2
-    },
-    "MatrixVectorActivation_47": {
-        "PE": 2,
-        "SIMD": 32
-    },
-    "FMPadding_Batch_16": {
-        "SIMD": 4
-    },
-    "ConvolutionInputGenerator_16": {
-        "SIMD": 4
-    },
-    "MatrixVectorActivation_48": {
-        "PE": 2,
-        "SIMD": 64
-    },
-    "MatrixVectorActivation_49": {
-        "PE": 2,
-        "SIMD": 32
-    },
-    "AddStreams_Batch_14": {
-        "PE": 2,
-        "inFIFODepths": [32, 32]
-    },
-    "Thresholding_Batch_39": {
-        "PE": 2
-    },
-    "DuplicateStreams_Batch_15": {
-        "PE": 2,
-        "outFIFODepths": [32, 32]
-    },
-    "Thresholding_Batch_40": {
-        "PE": 2
-    },
-    "Thresholding_Batch_41": {
-        "PE": 2
-    },
-    "MatrixVectorActivation_50": {
-        "PE": 2,
-        "SIMD": 32
-    },
-    "FMPadding_Batch_17": {
-        "SIMD": 4
-    },
-    "ConvolutionInputGenerator_17": {
-        "SIMD": 4
-    },
-    "MatrixVectorActivation_51": {
-        "PE": 2,
-        "SIMD": 64
-    },
-    "MatrixVectorActivation_52": {
-        "PE": 2,
-        "SIMD": 32
-    },
-    "AddStreams_Batch_15": {
-        "PE": 2,
-        "inFIFODepths": [32, 32]
-    },
-    "Thresholding_Batch_42": {
-        "PE": 2
-    },
-    "ConvolutionInputGenerator_18": {
-        "SIMD": 4
-    },
-    "Pool_Batch_1": {
-        "PE": 4
-    },
-    "MatrixVectorActivation_53": {
-        "PE": 1,
-        "SIMD": 4,
-        "mem_mode": "external"
-    },
-    "LabelSelect_Batch_0": {
-        "outputDataType": "UINT16",
-        "PE": 1
-    },
-    "ChannelwiseOp_Batch_0": {
-        "PE": 2
-    },
-    "ChannelwiseOp_Batch_1": {
-        "PE": 2
-    },
-    "ChannelwiseOp_Batch_2": {
-        "PE": 1
-    }
-}
diff --git a/build/resnet50/specialize_layers_config.json b/build/resnet50/specialize_layers_config.json
new file mode 100644
index 0000000..2fb4afe
--- /dev/null
+++ b/build/resnet50/specialize_layers_config.json
@@ -0,0 +1,528 @@
+{
+  "Defaults": {},
+  "FMPadding_0": {
+    "preferred_impl_style": "hls"
+  },
+  "ConvolutionInputGenerator_0": {
+    "preferred_impl_style": "hls"
+  },
+  "MVAU_0": {
+    "preferred_impl_style": "hls"
+  },
+  "FMPadding_1": {
+    "preferred_impl_style": "hls"
+  },
+  "ConvolutionInputGenerator_1": {
+    "preferred_impl_style": "hls"
+  },
+  "Pool_0": {
+    "preferred_impl_style": "hls"
+  },
+  "DuplicateStreams_0": {
+    "preferred_impl_style": "hls"
+  },
+  "MVAU_1": {
+    "preferred_impl_style": "hls"
+  },
+  "MVAU_2": {
+    "preferred_impl_style": "hls"
+  },
+  "FMPadding_2": {
+    "preferred_impl_style": "hls"
+  },
+  "ConvolutionInputGenerator_2": {
+    "preferred_impl_style": "hls"
+  },
+  "MVAU_3": {
+    "preferred_impl_style": "hls"
+  },
+  "MVAU_4": {
+    "preferred_impl_style": "hls"
+  },
+  "AddStreams_0": {
+    "preferred_impl_style": "hls"
+  },
+  "Thresholding_0": {
+    "preferred_impl_style": "hls"
+  },
+  "DuplicateStreams_1": {
+    "preferred_impl_style": "hls"
+  },
+  "Thresholding_1": {
+    "preferred_impl_style": "hls"
+  },
+  "Thresholding_2": {
+    "preferred_impl_style": "hls"
+  },
+  "MVAU_5": {
+    "preferred_impl_style": "hls"
+  },
+  "FMPadding_3": {
+    "preferred_impl_style": "hls"
+  },
+  "ConvolutionInputGenerator_3": {
+    "preferred_impl_style": "hls"
+  },
+  "MVAU_6": {
+    "preferred_impl_style": "hls"
+  },
+  "MVAU_7": {
+    "preferred_impl_style": "hls"
+  },
+  "AddStreams_1": {
+    "preferred_impl_style": "hls"
+  },
+  "Thresholding_3": {
+    "preferred_impl_style": "hls"
+  },
+  "DuplicateStreams_2": {
+    "preferred_impl_style": "hls"
+  },
+  "Thresholding_4": {
+    "preferred_impl_style": "hls"
+  },
+  "Thresholding_5": {
+    "preferred_impl_style": "hls"
+  },
+  "MVAU_8": {
+    "preferred_impl_style": "hls"
+  },
+  "FMPadding_4": {
+    "preferred_impl_style": "hls"
+  },
+  "ConvolutionInputGenerator_4": {
+    "preferred_impl_style": "hls"
+  },
+  "MVAU_9": {
+    "preferred_impl_style": "hls"
+  },
+  "MVAU_10": {
+    "preferred_impl_style": "hls"
+  },
+  "AddStreams_2": {
+    "preferred_impl_style": "hls"
+  },
+  "Thresholding_6": {
+    "preferred_impl_style": "hls"
+  },
+  "Thresholding_7": {
+    "preferred_impl_style": "hls"
+  },
+  "DuplicateStreams_3": {
+    "preferred_impl_style": "hls"
+  },
+  "MVAU_11": {
+    "preferred_impl_style": "hls"
+  },
+  "DownSampler_0": {
+    "preferred_impl_style": "hls"
+  },
+  "MVAU_12": {
+    "preferred_impl_style": "hls"
+  },
+  "FMPadding_5": {
+    "preferred_impl_style": "hls"
+  },
+  "ConvolutionInputGenerator_5": {
+    "preferred_impl_style": "hls"
+  },
+  "MVAU_13": {
+    "preferred_impl_style": "hls"
+  },
+  "MVAU_14": {
+    "preferred_impl_style": "hls"
+  },
+  "AddStreams_3": {
+    "preferred_impl_style": "hls"
+  },
+  "Thresholding_8": {
+    "preferred_impl_style": "hls"
+  },
+  "DuplicateStreams_4": {
+    "preferred_impl_style": "hls"
+  },
+  "Thresholding_9": {
+    "preferred_impl_style": "hls"
+  },
+  "Thresholding_10": {
+    "preferred_impl_style": "hls"
+  },
+  "MVAU_15": {
+    "preferred_impl_style": "hls"
+  },
+  "FMPadding_6": {
+    "preferred_impl_style": "hls"
+  },
+  "ConvolutionInputGenerator_6": {
+    "preferred_impl_style": "hls"
+  },
+  "MVAU_16": {
+    "preferred_impl_style": "hls"
+  },
+  "MVAU_17": {
+    "preferred_impl_style": "hls"
+  },
+  "AddStreams_4": {
+    "preferred_impl_style": "hls"
+  },
+  "Thresholding_11": {
+    "preferred_impl_style": "hls"
+  },
+  "DuplicateStreams_5": {
+    "preferred_impl_style": "hls"
+  },
+  "Thresholding_12": {
+    "preferred_impl_style": "hls"
+  },
+  "Thresholding_13": {
+    "preferred_impl_style": "hls"
+  },
+  "MVAU_18": {
+    "preferred_impl_style": "hls"
+  },
+  "FMPadding_7": {
+    "preferred_impl_style": "hls"
+  },
+  "ConvolutionInputGenerator_7": {
+    "preferred_impl_style": "hls"
+  },
+  "MVAU_19": {
+    "preferred_impl_style": "hls"
+  },
+  "MVAU_20": {
+    "preferred_impl_style": "hls"
+  },
+  "AddStreams_5": {
+    "preferred_impl_style": "hls"
+  },
+  "Thresholding_14": {
+    "preferred_impl_style": "hls"
+  },
+  "DuplicateStreams_6": {
+    "preferred_impl_style": "hls"
+  },
+  "Thresholding_15": {
+    "preferred_impl_style": "hls"
+  },
+  "Thresholding_16": {
+    "preferred_impl_style": "hls"
+  },
+  "MVAU_21": {
+    "preferred_impl_style": "hls"
+  },
+  "FMPadding_8": {
+    "preferred_impl_style": "hls"
+  },
+  "ConvolutionInputGenerator_8": {
+    "preferred_impl_style": "hls"
+  },
+  "MVAU_22": {
+    "preferred_impl_style": "hls"
+  },
+  "MVAU_23": {
+    "preferred_impl_style": "hls"
+  },
+  "AddStreams_6": {
+    "preferred_impl_style": "hls"
+  },
+  "Thresholding_17": {
+    "preferred_impl_style": "hls"
+  },
+  "Thresholding_18": {
+    "preferred_impl_style": "hls"
+  },
+  "DuplicateStreams_7": {
+    "preferred_impl_style": "hls"
+  },
+  "MVAU_24": {
+    "preferred_impl_style": "hls"
+  },
+  "DownSampler_1": {
+    "preferred_impl_style": "hls"
+  },
+  "MVAU_25": {
+    "preferred_impl_style": "hls"
+  },
+  "FMPadding_9": {
+    "preferred_impl_style": "hls"
+  },
+  "ConvolutionInputGenerator_9": {
+    "preferred_impl_style": "hls"
+  },
+  "MVAU_26": {
+    "preferred_impl_style": "hls"
+  },
+  "MVAU_27": {
+    "preferred_impl_style": "hls"
+  },
+  "AddStreams_7": {
+    "preferred_impl_style": "hls"
+  },
+  "Thresholding_19": {
+    "preferred_impl_style": "hls"
+  },
+  "DuplicateStreams_8": {
+    "preferred_impl_style": "hls"
+  },
+  "Thresholding_20": {
+    "preferred_impl_style": "hls"
+  },
+  "Thresholding_21": {
+    "preferred_impl_style": "hls"
+  },
+  "MVAU_28": {
+    "preferred_impl_style": "hls"
+  },
+  "FMPadding_10": {
+    "preferred_impl_style": "hls"
+  },
+  "ConvolutionInputGenerator_10": {
+    "preferred_impl_style": "hls"
+  },
+  "MVAU_29": {
+    "preferred_impl_style": "hls"
+  },
+  "MVAU_30": {
+    "preferred_impl_style": "hls"
+  },
+  "AddStreams_8": {
+    "preferred_impl_style": "hls"
+  },
+  "Thresholding_22": {
+    "preferred_impl_style": "hls"
+  },
+  "DuplicateStreams_9": {
+    "preferred_impl_style": "hls"
+  },
+  "Thresholding_23": {
+    "preferred_impl_style": "hls"
+  },
+  "Thresholding_24": {
+    "preferred_impl_style": "hls"
+  },
+  "MVAU_31": {
+    "preferred_impl_style": "hls"
+  },
+  "FMPadding_11": {
+    "preferred_impl_style": "hls"
+  },
+  "ConvolutionInputGenerator_11": {
+    "preferred_impl_style": "hls"
+  },
+  "MVAU_32": {
+    "preferred_impl_style": "hls"
+  },
+  "MVAU_33": {
+    "preferred_impl_style": "hls"
+  },
+  "AddStreams_9": {
+    "preferred_impl_style": "hls"
+  },
+  "Thresholding_25": {
+    "preferred_impl_style": "hls"
+  },
+  "DuplicateStreams_10": {
+    "preferred_impl_style": "hls"
+  },
+  "Thresholding_26": {
+    "preferred_impl_style": "hls"
+  },
+  "Thresholding_27": {
+    "preferred_impl_style": "hls"
+  },
+  "MVAU_34": {
+    "preferred_impl_style": "hls"
+  },
+  "FMPadding_12": {
+    "preferred_impl_style": "hls"
+  },
+  "ConvolutionInputGenerator_12": {
+    "preferred_impl_style": "hls"
+  },
+  "MVAU_35": {
+    "preferred_impl_style": "hls"
+  },
+  "MVAU_36": {
+    "preferred_impl_style": "hls"
+  },
+  "AddStreams_10": {
+    "preferred_impl_style": "hls"
+  },
+  "Thresholding_28": {
+    "preferred_impl_style": "hls"
+  },
+  "DuplicateStreams_11": {
+    "preferred_impl_style": "hls"
+  },
+  "Thresholding_29": {
+    "preferred_impl_style": "hls"
+  },
+  "Thresholding_30": {
+    "preferred_impl_style": "hls"
+  },
+  "MVAU_37": {
+    "preferred_impl_style": "hls"
+  },
+  "FMPadding_13": {
+    "preferred_impl_style": "hls"
+  },
+  "ConvolutionInputGenerator_13": {
+    "preferred_impl_style": "hls"
+  },
+  "MVAU_38": {
+    "preferred_impl_style": "hls"
+  },
+  "MVAU_39": {
+    "preferred_impl_style": "hls"
+  },
+  "AddStreams_11": {
+    "preferred_impl_style": "hls"
+  },
+  "Thresholding_31": {
+    "preferred_impl_style": "hls"
+  },
+  "DuplicateStreams_12": {
+    "preferred_impl_style": "hls"
+  },
+  "Thresholding_32": {
+    "preferred_impl_style": "hls"
+  },
+  "Thresholding_33": {
+    "preferred_impl_style": "hls"
+  },
+  "MVAU_40": {
+    "preferred_impl_style": "hls"
+  },
+  "FMPadding_14": {
+    "preferred_impl_style": "hls"
+  },
+  "ConvolutionInputGenerator_14": {
+    "preferred_impl_style": "hls"
+  },
+  "MVAU_41": {
+    "preferred_impl_style": "hls"
+  },
+  "MVAU_42": {
+    "preferred_impl_style": "hls"
+  },
+  "AddStreams_12": {
+    "preferred_impl_style": "hls"
+  },
+  "Thresholding_34": {
+    "preferred_impl_style": "hls"
+  },
+  "ChannelwiseOp_0": {
+    "preferred_impl_style": "hls"
+  },
+  "Thresholding_35": {
+    "preferred_impl_style": "hls"
+  },
+  "DuplicateStreams_13": {
+    "preferred_impl_style": "hls"
+  },
+  "MVAU_43": {
+    "preferred_impl_style": "hls"
+  },
+  "DownSampler_2": {
+    "preferred_impl_style": "hls"
+  },
+  "MVAU_44": {
+    "preferred_impl_style": "hls"
+  },
+  "FMPadding_15": {
+    "preferred_impl_style": "hls"
+  },
+  "ConvolutionInputGenerator_15": {
+    "preferred_impl_style": "hls"
+  },
+  "MVAU_45": {
+    "preferred_impl_style": "hls"
+  },
+  "MVAU_46": {
+    "preferred_impl_style": "hls"
+  },
+  "AddStreams_13": {
+    "preferred_impl_style": "hls"
+  },
+  "Thresholding_36": {
+    "preferred_impl_style": "hls"
+  },
+  "DuplicateStreams_14": {
+    "preferred_impl_style": "hls"
+  },
+  "Thresholding_37": {
+    "preferred_impl_style": "hls"
+  },
+  "Thresholding_38": {
+    "preferred_impl_style": "hls"
+  },
+  "MVAU_47": {
+    "preferred_impl_style": "hls"
+  },
+  "FMPadding_16": {
+    "preferred_impl_style": "hls"
+  },
+  "ConvolutionInputGenerator_16": {
+    "preferred_impl_style": "hls"
+  },
+  "MVAU_48": {
+    "preferred_impl_style": "hls"
+  },
+  "MVAU_49": {
+    "preferred_impl_style": "hls"
+  },
+  "AddStreams_14": {
+    "preferred_impl_style": "hls"
+  },
+  "Thresholding_39": {
+    "preferred_impl_style": "hls"
+  },
+  "DuplicateStreams_15": {
+    "preferred_impl_style": "hls"
+  },
+  "Thresholding_40": {
+    "preferred_impl_style": "hls"
+  },
+  "Thresholding_41": {
+    "preferred_impl_style": "hls"
+  },
+  "MVAU_50": {
+    "preferred_impl_style": "hls"
+  },
+  "FMPadding_17": {
+    "preferred_impl_style": "hls"
+  },
+  "ConvolutionInputGenerator_17": {
+    "preferred_impl_style": "hls"
+  },
+  "MVAU_51": {
+    "preferred_impl_style": "hls"
+  },
+  "MVAU_52": {
+    "preferred_impl_style": "hls"
+  },
+  "AddStreams_15": {
+    "preferred_impl_style": "hls"
+  },
+  "Thresholding_42": {
+    "preferred_impl_style": "hls"
+  },
+  "ChannelwiseOp_1": {
+    "preferred_impl_style": "hls"
+  },
+  "ConvolutionInputGenerator_18": {
+    "preferred_impl_style": "hls"
+  },
+  "Pool_1": {
+    "preferred_impl_style": "hls"
+  },
+  "MVAU_53": {
+    "preferred_impl_style": "hls"
+  },
+  "ChannelwiseOp_2": {
+    "preferred_impl_style": "hls"
+  },
+  "LabelSelect_0": {
+    "preferred_impl_style": "hls"
+  }
+}
diff --git a/build/vgg10-radioml/README.md b/build/vgg10-radioml/README.md
index 17a4524..18df19d 100755
--- a/build/vgg10-radioml/README.md
+++ b/build/vgg10-radioml/README.md
@@ -12,7 +12,7 @@ Due to the 1-dimensional topology in VGG10 we use a specialized build script tha
 
 0. Ensure you have performed the *Setup* steps in the top-level README for setting up the FINN requirements and environment variables.
 
-1. Run the `download_vgg10.sh` script under the `models` directory to download the pretrained VGG10 ONNX model. You should have e.g. `vgg10-radioml/models/radioml_w4a4_small_tidy.onnx` as a result.
+1. Run the `download_vgg10.sh` script under the `models` directory to download the pretrained VGG10 ONNX model. You should have `vgg10-radioml/models/radioml_w4a4_small_tidy.onnx` as a result.
 
 2. Launch the build as follows:
 ```SHELL
@@ -24,7 +24,7 @@ cd $FINN_EXAMPLES/build/finn
 ./run-docker.sh build_custom $FINN_EXAMPLES/build/vgg10
 ```
 
-5. The generated outputs will be under `vgg10-radioml/output_<topology>_<board>`. You can find a description of the generated files [here](https://finn-dev.readthedocs.io/en/latest/command_line.html#simple-dataflow-build-mode).
+3. The generated outputs will be under `vgg10-radioml/output_<topology>_<board>`. You can find a description of the generated files [here](https://finn-dev.readthedocs.io/en/latest/command_line.html#simple-dataflow-build-mode).
 
 ## Where did the ONNX model files come from?
 
diff --git a/build/vgg10-radioml/build.py b/build/vgg10-radioml/build.py
index 4641020..0567763 100755
--- a/build/vgg10-radioml/build.py
+++ b/build/vgg10-radioml/build.py
@@ -64,15 +64,16 @@ def select_build_steps(platform):
         "step_tidy_up",
         step_pre_streamline,
         "step_streamline",
-        "step_convert_to_hls",
+        "step_convert_to_hw",
         step_convert_final_layers,
         "step_create_dataflow_partition",
+        "step_specialize_layers",
         "step_target_fps_parallelization",
         "step_apply_folding_config",
         "step_minimize_bit_width",
         "step_generate_estimate_reports",
-        "step_hls_codegen",
-        "step_hls_ipgen",
+        "step_hw_codegen",
+        "step_hw_ipgen",
         "step_set_fifo_depths",
         "step_create_stitched_ip",
         "step_measure_rtlsim_performance",
@@ -109,14 +110,13 @@ def select_build_steps(platform):
         shell_flow_type=shell_flow_type,
         vitis_platform=vitis_platform,
         folding_config_file="folding_config/%s_folding_config.json" % platform_name,
-        auto_fifo_depths=True,
-        standalone_thresholds=False,
+        split_large_fifos=True,
+        standalone_thresholds=True,
         # enable extra performance optimizations (physopt)
         vitis_opt_strategy=build_cfg.VitisOptStrategyCfg.PERFORMANCE_BEST,
         generate_outputs=[
             build_cfg.DataflowOutputType.ESTIMATE_REPORTS,
             build_cfg.DataflowOutputType.STITCHED_IP,
-            # build_cfg.DataflowOutputType.OOC_SYNTH,
             # build_cfg.DataflowOutputType.RTLSIM_PERFORMANCE,
             build_cfg.DataflowOutputType.BITFILE,
             build_cfg.DataflowOutputType.DEPLOYMENT_PACKAGE,
diff --git a/build/vgg10-radioml/custom_steps.py b/build/vgg10-radioml/custom_steps.py
index 8be2ef4..509efbc 100755
--- a/build/vgg10-radioml/custom_steps.py
+++ b/build/vgg10-radioml/custom_steps.py
@@ -29,7 +29,7 @@
 from finn.builder.build_dataflow_config import DataflowBuildConfig
 from qonnx.transformation.change_3d_tensors_to_4d import Change3DTo4DTensors
 from qonnx.transformation.general import GiveUniqueNodeNames
-import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
+import finn.transformation.fpgadataflow.convert_to_hw_layers as to_hw
 import finn.transformation.streamline.absorb as absorb
 
 
@@ -40,7 +40,7 @@ def step_pre_streamline(model: ModelWrapper, cfg: DataflowBuildConfig):
 
 
 def step_convert_final_layers(model: ModelWrapper, cfg: DataflowBuildConfig):
-    model = model.transform(to_hls.InferChannelwiseLinearLayer())
-    model = model.transform(to_hls.InferLabelSelectLayer())
+    model = model.transform(to_hw.InferChannelwiseLinearLayer())
+    model = model.transform(to_hw.InferLabelSelectLayer())
     model = model.transform(GiveUniqueNodeNames())
     return model
diff --git a/build/vgg10-radioml/folding_config/ZCU104_folding_config.json b/build/vgg10-radioml/folding_config/ZCU104_folding_config.json
index 14f908a..44fced1 100755
--- a/build/vgg10-radioml/folding_config/ZCU104_folding_config.json
+++ b/build/vgg10-radioml/folding_config/ZCU104_folding_config.json
@@ -1,136 +1,221 @@
 {
   "Defaults": {},
-  "FMPadding_Batch_0": {
+  "FMPadding_rtl_0": {
     "SIMD": 2
   },
-  "ConvolutionInputGenerator1D_0": {
+  "ConvolutionInputGenerator_rtl_0": {
     "SIMD": 2,
-    "ram_style": "auto"
+    "parallel_window": 0,
+    "ram_style": "distributed"
   },
-  "MatrixVectorActivation_0": {
-    "PE": 32,
+  "MVAU_rtl_0": {
+    "PE": 16,
     "SIMD": 6,
     "ram_style": "auto",
-    "mem_mode": "const",
+    "resType": "auto",
+    "mem_mode": "internal_decoupled",
     "runtime_writeable_weights": 0
   },
-  "StreamingMaxPool_Batch_0": {
+  "Thresholding_rtl_0": {
+    "PE": 16,
+    "runtime_writeable_weights": 0,
+    "depth_trigger_uram": 0,
+    "depth_trigger_bram": 0
+  },
+  "StreamingMaxPool_hls_0": {
+    "PE": 16
   },
-  "FMPadding_Batch_1": {
+  "FMPadding_rtl_1": {
     "SIMD": 16
   },
-  "ConvolutionInputGenerator1D_1": {
-    "SIMD": 32,
-    "ram_style": "auto"
+  "ConvolutionInputGenerator_rtl_1": {
+    "SIMD": 16,
+    "parallel_window": 0,
+    "ram_style": "distributed"
   },
-  "MatrixVectorActivation_1": {
+  "MVAU_rtl_1": {
     "PE": 16,
-    "SIMD": 96,
+    "SIMD": 48,
     "ram_style": "auto",
-    "mem_mode": "const",
+    "resType": "auto",
+    "mem_mode": "internal_decoupled",
     "runtime_writeable_weights": 0
   },
-  "StreamingMaxPool_Batch_1": {
+  "Thresholding_rtl_1": {
+    "PE": 8,
+    "runtime_writeable_weights": 0,
+    "depth_trigger_uram": 0,
+    "depth_trigger_bram": 0
+  },
+  "StreamingMaxPool_hls_1": {
+    "PE": 8
   },
-  "FMPadding_Batch_2": {
+  "FMPadding_rtl_2": {
     "SIMD": 8
   },
-  "ConvolutionInputGenerator1D_2": {
-    "SIMD": 32,
-    "ram_style": "auto"
+  "ConvolutionInputGenerator_rtl_2": {
+    "SIMD": 8,
+    "parallel_window": 0,
+    "ram_style": "distributed"
   },
-  "MatrixVectorActivation_2": {
+  "MVAU_rtl_2": {
     "PE": 8,
-    "SIMD": 96,
+    "SIMD": 48,
     "ram_style": "auto",
-    "mem_mode": "const",
+    "resType": "auto",
+    "mem_mode": "internal_decoupled",
     "runtime_writeable_weights": 0
   },
-  "StreamingMaxPool_Batch_2": {
+  "Thresholding_rtl_2": {
+    "PE": 4,
+    "runtime_writeable_weights": 0,
+    "depth_trigger_uram": 0,
+    "depth_trigger_bram": 0
   },
-  "FMPadding_Batch_3": {
-    "SIMD": 8
+  "StreamingMaxPool_hls_2": {
+    "PE": 4
   },
-  "ConvolutionInputGenerator1D_3": {
-    "SIMD": 32,
-    "ram_style": "auto"
+  "FMPadding_rtl_3": {
+    "SIMD": 4
   },
-  "MatrixVectorActivation_3": {
-    "PE": 4,
-    "SIMD": 96,
+  "ConvolutionInputGenerator_rtl_3": {
+    "SIMD": 4,
+    "parallel_window": 0,
+    "ram_style": "distributed"
+  },
+  "MVAU_rtl_3": {
+    "PE": 8,
+    "SIMD": 24,
     "ram_style": "auto",
-    "mem_mode": "const",
+    "resType": "auto",
+    "mem_mode": "internal_decoupled",
     "runtime_writeable_weights": 0
   },
-  "StreamingMaxPool_Batch_3": {
+  "Thresholding_rtl_3": {
+    "PE": 2,
+    "runtime_writeable_weights": 0,
+    "depth_trigger_uram": 0,
+    "depth_trigger_bram": 0
   },
-  "FMPadding_Batch_4": {
-    "SIMD": 4
+  "StreamingMaxPool_hls_3": {
+    "PE": 2
   },
-  "ConvolutionInputGenerator1D_4": {
-    "SIMD": 32,
-    "ram_style": "auto"
+  "FMPadding_rtl_4": {
+    "SIMD": 2
   },
-  "MatrixVectorActivation_4": {
-    "PE": 2,
-    "SIMD": 96,
+  "ConvolutionInputGenerator_rtl_4": {
+    "SIMD": 2,
+    "parallel_window": 0,
+    "ram_style": "distributed"
+  },
+  "MVAU_rtl_4": {
+    "PE": 4,
+    "SIMD": 24,
     "ram_style": "auto",
-    "mem_mode": "const",
+    "resType": "auto",
+    "mem_mode": "internal_decoupled",
     "runtime_writeable_weights": 0
   },
-  "StreamingMaxPool_Batch_4": {
+  "Thresholding_rtl_4": {
+    "PE": 1,
+    "runtime_writeable_weights": 0,
+    "depth_trigger_uram": 0,
+    "depth_trigger_bram": 0
   },
-  "FMPadding_Batch_5": {
-    "SIMD": 2
+  "StreamingMaxPool_hls_4": {
+    "PE": 1
   },
-  "ConvolutionInputGenerator1D_5": {
-    "SIMD": 32,
-    "ram_style": "auto"
+  "FMPadding_rtl_5": {
+    "SIMD": 1
   },
-  "MatrixVectorActivation_5": {
-    "PE": 1,
-    "SIMD": 96,
+  "ConvolutionInputGenerator_rtl_5": {
+    "SIMD": 1,
+    "parallel_window": 0,
+    "ram_style": "distributed"
+  },
+  "MVAU_rtl_5": {
+    "PE": 4,
+    "SIMD": 12,
     "ram_style": "auto",
-    "mem_mode": "const",
+    "resType": "auto",
+    "mem_mode": "internal_decoupled",
     "runtime_writeable_weights": 0
   },
-  "StreamingMaxPool_Batch_5": {
+  "Thresholding_rtl_5": {
+    "PE": 1,
+    "runtime_writeable_weights": 0,
+    "depth_trigger_uram": 0,
+    "depth_trigger_bram": 0
+  },
+  "StreamingMaxPool_hls_5": {
+    "PE": 1
   },
-  "FMPadding_Batch_6": {
+  "FMPadding_rtl_6": {
     "SIMD": 1
   },
-  "ConvolutionInputGenerator1D_6": {
-    "SIMD": 32,
-    "ram_style": "auto"
+  "ConvolutionInputGenerator_rtl_6": {
+    "SIMD": 1,
+    "parallel_window": 0,
+    "ram_style": "distributed"
   },
-  "MatrixVectorActivation_6": {
-    "PE": 1,
-    "SIMD": 96,
+  "MVAU_rtl_6": {
+    "PE": 4,
+    "SIMD": 6,
     "ram_style": "auto",
-    "mem_mode": "const",
+    "resType": "auto",
+    "mem_mode": "internal_decoupled",
     "runtime_writeable_weights": 0
   },
-  "StreamingMaxPool_Batch_6": {
+  "Thresholding_rtl_6": {
+    "PE": 1,
+    "runtime_writeable_weights": 0,
+    "depth_trigger_uram": 0,
+    "depth_trigger_bram": 0
   },
-  "MatrixVectorActivation_7": {
-    "PE": 2,
-    "SIMD": 32,
+  "StreamingMaxPool_hls_6": {
+    "PE": 1
+  },
+  "MVAU_rtl_7": {
+    "PE": 4,
+    "SIMD": 4,
     "ram_style": "auto",
-    "mem_mode": "const",
+    "resType": "auto",
+    "mem_mode": "internal_decoupled",
     "runtime_writeable_weights": 0
   },
-  "MatrixVectorActivation_8": {
+  "Thresholding_rtl_7": {
     "PE": 1,
-    "SIMD": 32,
+    "runtime_writeable_weights": 0,
+    "depth_trigger_uram": 0,
+    "depth_trigger_bram": 0
+  },
+  "MVAU_rtl_8": {
+    "PE": 4,
+    "SIMD": 2,
     "ram_style": "auto",
-    "mem_mode": "const",
+    "resType": "auto",
+    "mem_mode": "internal_decoupled",
     "runtime_writeable_weights": 0
   },
-  "MatrixVectorActivation_9": {
+  "Thresholding_rtl_8": {
     "PE": 1,
-    "SIMD": 8,
+    "runtime_writeable_weights": 0,
+    "depth_trigger_uram": 0,
+    "depth_trigger_bram": 0
+  },
+  "MVAU_rtl_9": {
+    "PE": 4,
+    "SIMD": 1,
     "ram_style": "auto",
-    "mem_mode": "const",
+    "resType": "auto",
+    "mem_mode": "internal_decoupled",
     "runtime_writeable_weights": 0
+  },
+  "ChannelwiseOp_hls_0": {
+    "PE": 1,
+    "ram_style": "distributed"
+  },
+  "LabelSelect_hls_0": {
+    "PE": 1
   }
 }
diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile
index 60ac21d..1219b35 100644
--- a/ci/Jenkinsfile
+++ b/ci/Jenkinsfile
@@ -23,7 +23,8 @@ pipeline {
                              "kws",
                              "mobilenet-v1",
                              "resnet50",
-                             "vgg10-radioml"]
+                             "vgg10-radioml",
+                             "gtsrb"]
             createParallelBuilds(buildList)
             createReleaseArea(buildList)
           }
diff --git a/finn_examples/models.py b/finn_examples/models.py
index 8076124..bcacabf 100644
--- a/finn_examples/models.py
+++ b/finn_examples/models.py
@@ -67,6 +67,17 @@
     "num_outputs": 1,
 }
 
+_gtsrb_cnv_io_shape_dict = {
+    "idt": DataType["UINT8"],
+    "odt": DataType["INT16"],
+    "ishape_normal": (1, 32, 32, 3),
+    "oshape_normal": (1, 44),
+    "ishape_folded": (1, 1, 32, 32, 3, 1),
+    "oshape_folded": (1, 11, 4),
+    "ishape_packed": (1, 1, 32, 32, 3, 1),
+    "oshape_packed": (1, 11, 8),
+}
+
 _bincop_cnv_io_shape_dict = {
     "idt": [DataType["UINT8"]],
     "odt": [DataType["UINT8"]],
@@ -179,24 +190,29 @@ def get_edge_or_pcie():
         raise OSError("Platform is not supported.")
 
 
-def find_bitfile(model_name, target_platform):
-    bitfile_exts = {"edge": "bit", "pcie": "xclbin"}
-    bitfile_ext = bitfile_exts[get_edge_or_pcie()]
-    bitfile_name = "%s.%s" % (model_name, bitfile_ext)
-    bitfile_candidates = [
-        pk.resource_filename("finn_examples", "bitfiles/%s/%s" % (target_platform, bitfile_name)),
-        pk.resource_filename(
-            "finn_examples",
-            "bitfiles/bitfiles.zip.d/%s/%s" % (target_platform, bitfile_name),
-        ),
-    ]
-    for candidate in bitfile_candidates:
-        if os.path.isfile(candidate):
-            return candidate
-    raise Exception(
-        "Bitfile for model = %s target platform = %s not found. Looked in: %s"
-        % (model_name, target_platform, str(bitfile_candidates))
-    )
+def find_bitfile(model_name, target_platform, bitfile_path):
+    if bitfile_path is not None:
+        return bitfile_path
+    else:
+        bitfile_exts = {"edge": "bit", "pcie": "xclbin"}
+        bitfile_ext = bitfile_exts[get_edge_or_pcie()]
+        bitfile_name = "%s.%s" % (model_name, bitfile_ext)
+        bitfile_candidates = [
+            pk.resource_filename(
+                "finn_examples", "bitfiles/%s/%s" % (target_platform, bitfile_name)
+            ),
+            pk.resource_filename(
+                "finn_examples",
+                "bitfiles/bitfiles.zip.d/%s/%s" % (target_platform, bitfile_name),
+            ),
+        ]
+        for candidate in bitfile_candidates:
+            if os.path.isfile(candidate):
+                return candidate
+        raise Exception(
+            "Bitfile for model = %s target platform = %s not found. Looked in: %s"
+            % (model_name, target_platform, str(bitfile_candidates))
+        )
 
 
 def find_runtime_weights(model_name, target_platform):
@@ -255,75 +271,75 @@ def resolve_target_platform(target_platform):
     return check_platform_is_valid(platform)
 
 
-def kws_mlp(target_platform=None):
+def kws_mlp(target_platform=None, bitfile_path=None):
     target_platform = resolve_target_platform(target_platform)
     driver_mode = get_driver_mode()
     model_name = "kwsmlp-w3a3"
-    filename = find_bitfile(model_name, target_platform)
+    filename = find_bitfile(model_name, target_platform, bitfile_path)
     return FINNExampleOverlay(filename, driver_mode, _gscv2_mlp_io_shape_dict)
 
 
-def tfc_w1a1_mnist(target_platform=None):
+def tfc_w1a1_mnist(target_platform=None, bitfile_path=None):
     target_platform = resolve_target_platform(target_platform)
     driver_mode = get_driver_mode()
     model_name = "tfc-w1a1"
-    filename = find_bitfile(model_name, target_platform)
+    filename = find_bitfile(model_name, target_platform, bitfile_path)
     return FINNExampleOverlay(filename, driver_mode, _mnist_fc_io_shape_dict)
 
 
-def tfc_w1a2_mnist(target_platform=None):
+def tfc_w1a2_mnist(target_platform=None, bitfile_path=None):
     target_platform = resolve_target_platform(target_platform)
     driver_mode = get_driver_mode()
     model_name = "tfc-w1a2"
-    filename = find_bitfile(model_name, target_platform)
+    filename = find_bitfile(model_name, target_platform, bitfile_path)
     return FINNExampleOverlay(filename, driver_mode, _mnist_fc_io_shape_dict)
 
 
-def tfc_w2a2_mnist(target_platform=None):
+def tfc_w2a2_mnist(target_platform=None, bitfile_path=None):
     target_platform = resolve_target_platform(target_platform)
     driver_mode = get_driver_mode()
     model_name = "tfc-w2a2"
-    filename = find_bitfile(model_name, target_platform)
+    filename = find_bitfile(model_name, target_platform, bitfile_path)
     return FINNExampleOverlay(filename, driver_mode, _mnist_fc_io_shape_dict)
 
 
-def cnv_w1a1_cifar10(target_platform=None):
+def cnv_w1a1_cifar10(target_platform=None, bitfile_path=None):
     target_platform = resolve_target_platform(target_platform)
     driver_mode = get_driver_mode()
     model_name = "cnv-w1a1"
-    filename = find_bitfile(model_name, target_platform)
+    filename = find_bitfile(model_name, target_platform, bitfile_path)
     return FINNExampleOverlay(filename, driver_mode, _cifar10_cnv_io_shape_dict)
 
 
-def cnv_w1a2_cifar10(target_platform=None):
+def cnv_w1a2_cifar10(target_platform=None, bitfile_path=None):
     target_platform = resolve_target_platform(target_platform)
     driver_mode = get_driver_mode()
     model_name = "cnv-w1a2"
-    filename = find_bitfile(model_name, target_platform)
+    filename = find_bitfile(model_name, target_platform, bitfile_path)
     return FINNExampleOverlay(filename, driver_mode, _cifar10_cnv_io_shape_dict)
 
 
-def cnv_w2a2_cifar10(target_platform=None):
+def cnv_w2a2_cifar10(target_platform=None, bitfile_path=None):
     target_platform = resolve_target_platform(target_platform)
     driver_mode = get_driver_mode()
     model_name = "cnv-w2a2"
-    filename = find_bitfile(model_name, target_platform)
+    filename = find_bitfile(model_name, target_platform, bitfile_path)
     return FINNExampleOverlay(filename, driver_mode, _cifar10_cnv_io_shape_dict)
 
 
-def bincop_cnv(target_platform=None):
+def bincop_cnv(target_platform=None, bitfile_path=None):
     target_platform = resolve_target_platform(target_platform)
     driver_mode = get_driver_mode()
     model_name = "bincop-cnv"
-    filename = find_bitfile(model_name, target_platform)
+    filename = find_bitfile(model_name, target_platform, bitfile_path)
     return FINNExampleOverlay(filename, driver_mode, _bincop_cnv_io_shape_dict)
 
 
-def mobilenetv1_w4a4_imagenet(target_platform=None):
+def mobilenetv1_w4a4_imagenet(target_platform=None, bitfile_path=None):
     target_platform = resolve_target_platform(target_platform)
     driver_mode = get_driver_mode()
     model_name = "mobilenetv1-w4a4"
-    filename = find_bitfile(model_name, target_platform)
+    filename = find_bitfile(model_name, target_platform, bitfile_path)
     if target_platform in ["ZCU104"]:
         runtime_weight_dir = find_runtime_weights(model_name, target_platform)
     else:
@@ -339,11 +355,11 @@ def mobilenetv1_w4a4_imagenet(target_platform=None):
     )
 
 
-def resnet50_w1a2_imagenet(target_platform=None):
+def resnet50_w1a2_imagenet(target_platform=None, bitfile_path=None):
     target_platform = resolve_target_platform(target_platform)
     driver_mode = get_driver_mode()
     model_name = "resnet50-w1a2"
-    filename = find_bitfile(model_name, target_platform)
+    filename = find_bitfile(model_name, target_platform, bitfile_path)
     runtime_weight_dir = find_runtime_weights(model_name, target_platform)
     return FINNExampleOverlay(
         filename,
@@ -353,11 +369,11 @@ def resnet50_w1a2_imagenet(target_platform=None):
     )
 
 
-def vgg10_w4a4_radioml(target_platform=None):
+def vgg10_w4a4_radioml(target_platform=None, bitfile_path=None):
     target_platform = resolve_target_platform(target_platform)
     driver_mode = get_driver_mode()
     model_name = "radioml_w4a4_small_tidy"
-    filename = find_bitfile(model_name, target_platform)
+    filename = find_bitfile(model_name, target_platform, bitfile_path)
     fclk_mhz = 250.0
     return FINNExampleOverlay(
         filename,
@@ -367,12 +383,20 @@ def vgg10_w4a4_radioml(target_platform=None):
     )
 
 
-def mlp_w2a2_unsw_nb15(target_platform=None):
+def mlp_w2a2_unsw_nb15(target_platform=None, bitfile_path=None):
     target_platform = resolve_target_platform(target_platform)
     driver_mode = get_driver_mode()
     model_name = "unsw_nb15-mlp-w2a2"
-    filename = find_bitfile(model_name, target_platform)
+    filename = find_bitfile(model_name, target_platform, bitfile_path)
     fclk_mhz = 100.0
     return FINNExampleOverlay(
         filename, driver_mode, _unsw_nb15_mlp_io_shape_dict, fclk_mhz=fclk_mhz
     )
+
+
+def cnv_w1a1_gtsrb(target_platform=None, bitfile_path=None):
+    target_platform = resolve_target_platform(target_platform)
+    driver_mode = get_driver_mode()
+    model_name = "cnv-gtsrb-w1a1"
+    filename = find_bitfile(model_name, target_platform, bitfile_path)
+    return FINNExampleOverlay(filename, driver_mode, _gtsrb_cnv_io_shape_dict)
diff --git a/finn_examples/notebooks/2_imagenet_with_cnns.ipynb b/finn_examples/notebooks/2_imagenet_with_cnns.ipynb
index a30607e..3ed5f3d 100755
--- a/finn_examples/notebooks/2_imagenet_with_cnns.ipynb
+++ b/finn_examples/notebooks/2_imagenet_with_cnns.ipynb
@@ -21,46 +21,7 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/javascript": [
-       "\n",
-       "try {\n",
-       "require(['notebook/js/codecell'], function(codecell) {\n",
-       "  codecell.CodeCell.options_default.highlight_modes[\n",
-       "      'magic_text/x-csrc'] = {'reg':[/^%%microblaze/]};\n",
-       "  Jupyter.notebook.events.one('kernel_ready.Kernel', function(){\n",
-       "      Jupyter.notebook.get_cells().map(function(cell){\n",
-       "          if (cell.cell_type == 'code'){ cell.auto_highlight(); } }) ;\n",
-       "  });\n",
-       "});\n",
-       "} catch (e) {};\n"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/javascript": [
-       "\n",
-       "try {\n",
-       "require(['notebook/js/codecell'], function(codecell) {\n",
-       "  codecell.CodeCell.options_default.highlight_modes[\n",
-       "      'magic_text/x-csrc'] = {'reg':[/^%%pybind11/]};\n",
-       "  Jupyter.notebook.events.one('kernel_ready.Kernel', function(){\n",
-       "      Jupyter.notebook.get_cells().map(function(cell){\n",
-       "          if (cell.cell_type == 'code'){ cell.auto_highlight(); } }) ;\n",
-       "  });\n",
-       "});\n",
-       "} catch (e) {};\n"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
    "source": [
     "#mobilenetv1_w4a4 is available on U250 and ZCU104\n",
     "accel = models.mobilenetv1_w4a4_imagenet()\n",
diff --git a/finn_examples/notebooks/4_keyword_spotting.ipynb b/finn_examples/notebooks/4_keyword_spotting.ipynb
index 914b372..7f903b0 100644
--- a/finn_examples/notebooks/4_keyword_spotting.ipynb
+++ b/finn_examples/notebooks/4_keyword_spotting.ipynb
@@ -18,7 +18,7 @@
     "\n",
     "<img src=\"images/mfcc_py.png\">\n",
     "\n",
-    "A more in-depth explenation of MFCC features can be found on wikipedia: https://en.wikipedia.org/wiki/Mel-frequency_cepstrum\n",
+    "A more in-depth explanation of MFCC features can be found on wikipedia: https://en.wikipedia.org/wiki/Mel-frequency_cepstrum\n",
     "\n",
     "For this concrete case we used the python library [python_speech_features](https://github.com/jameslyons/python_speech_features) to produce these features.\n",
     "\n",
@@ -194,7 +194,7 @@
     "### Using the built-in performance benchmark\n",
     "\n",
     "To measure the performance of indivudual components of the PYNQ stack and the FINN accelerator on the FPGA,\n",
-    "FINN comes with a buit-in benchmark. This benchmark computes the throughput of the FINN accelerator as seen on the FPGA."
+    "FINN comes with a built-in benchmark. This benchmark computes the throughput of the FINN accelerator as seen on the FPGA."
    ]
   },
   {
diff --git a/finn_examples/notebooks/7_traffic_sign_recognition_gtsrb.ipynb b/finn_examples/notebooks/7_traffic_sign_recognition_gtsrb.ipynb
new file mode 100644
index 0000000..8d938d5
--- /dev/null
+++ b/finn_examples/notebooks/7_traffic_sign_recognition_gtsrb.ipynb
@@ -0,0 +1,280 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Initialize the accelerator"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from finn_examples import models\n",
+    "accel = models.cnv_w1a1_gtsrb()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(\"Expected input shape and datatype: %s %s\" % (str(accel.ishape_normal), str(accel.idt)))\n",
+    "print(\"Expected output shape and datatype: %s %s\" % (str(accel.oshape_normal), str(accel.odt)))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Load the GTSRB dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from os import path\n",
+    "import urllib\n",
+    "import numpy as np\n",
+    "dataset_local = \"/tmp/traffic-signs-data.zip\"\n",
+    "if not path.isfile(dataset_local):\n",
+    "    dataset_url = \"https://d17h27t6h515a5.cloudfront.net/topher/2017/February/5898cd6f_traffic-signs-data/traffic-signs-data.zip\"\n",
+    "    urllib.request.urlretrieve(dataset_url, dataset_local)\n",
+    "    ! unzip {dataset_local} -d /tmp\n",
+    "\n",
+    "dataset_dict = np.load(\"/tmp/test.p\")\n",
+    "testx = dataset_dict[\"features\"]\n",
+    "testy = dataset_dict[\"labels\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "gtsrb_classes = [\n",
+    "    '20 Km/h', \n",
+    "    '30 Km/h', \n",
+    "    '50 Km/h', \n",
+    "    '60 Km/h', \n",
+    "    '70 Km/h', \n",
+    "    '80 Km/h', \n",
+    "    'End 80 Km/h', \n",
+    "    '100 Km/h', \n",
+    "    '120 Km/h', \n",
+    "    'No overtaking', \n",
+    "    'No overtaking for large trucks', \n",
+    "    'Priority crossroad', \n",
+    "    'Priority road', \n",
+    "    'Give way', \n",
+    "    'Stop', \n",
+    "    'No vehicles', \n",
+    "    'Prohibited for vehicles with a permitted gross weight over 3.5t including their trailers, and for tractors except passenger cars and buses', \n",
+    "    'No entry for vehicular traffic', \n",
+    "    'Danger Ahead', \n",
+    "    'Bend to left', \n",
+    "    'Bend to right', \n",
+    "    'Double bend (first to left)', \n",
+    "    'Uneven road', \n",
+    "    'Road slippery when wet or dirty', \n",
+    "    'Road narrows (right)', \n",
+    "    'Road works', \n",
+    "    'Traffic signals', \n",
+    "    'Pedestrians in road ahead', \n",
+    "    'Children crossing ahead', \n",
+    "    'Bicycles prohibited', \n",
+    "    'Risk of snow or ice', \n",
+    "    'Wild animals', \n",
+    "    'End of all speed and overtaking restrictions', \n",
+    "    'Turn right ahead', \n",
+    "    'Turn left ahead', \n",
+    "    'Ahead only', \n",
+    "    'Ahead or right only', \n",
+    "    'Ahead or left only', \n",
+    "    'Pass by on right', \n",
+    "    'Pass by on left', \n",
+    "    'Roundabout', \n",
+    "    'End of no-overtaking zone', \n",
+    "    'End of no-overtaking zone for vehicles with a permitted gross weight over 3.5t including their trailers, and for tractors except passenger cars and buses', \n",
+    "    'Not a roadsign'\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(\"Dataset shape is \" + str(testx.shape))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Classify a single image"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "test_single_x = testx[0]\n",
+    "test_single_y = testy[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%matplotlib inline\n",
+    "from matplotlib import pyplot as plt\n",
+    "\n",
+    "plt.imshow(test_single_x)\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(\"Expected class is:\\n%s\" % (gtsrb_classes[test_single_y]))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "accel_y = accel.execute(test_single_x.reshape(accel.ishape_normal))\n",
+    "print(\"Accelerator result is:\\n%s\" % (gtsrb_classes[np.argmax(accel_y)]))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Validate accuracy on GTSRB test set"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "batch_size = 421\n",
+    "total = testx.shape[0]\n",
+    "accel.batch_size = batch_size\n",
+    "n_batches = int(total / batch_size)\n",
+    "\n",
+    "batch_imgs = testx.reshape(n_batches, batch_size, -1)\n",
+    "batch_labels = testy.reshape(n_batches, batch_size)\n",
+    "obuf_normal = np.empty_like(accel.obuf_packed_device)\n",
+    "print(\"Ready to run validation, test images tensor has shape %s\" % str(batch_imgs.shape))\n",
+    "print(\"Accelerator buffer shapes are %s for input, %s for output\" % (str(accel.ishape_packed), str(accel.oshape_packed)) )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ok = 0\n",
+    "nok = 0\n",
+    "for i in range(n_batches):\n",
+    "    ibuf_normal = batch_imgs[i].reshape(accel.ibuf_packed_device.shape)\n",
+    "    exp = batch_labels[i]\n",
+    "    # to avoid the slower software implementation during data unpacking,\n",
+    "    # we make manual calls to buffer copies and execute_on_buffers\n",
+    "    # all this could have been replaced with accel.execute() otherwise\n",
+    "    accel.copy_input_data_to_device(ibuf_normal)\n",
+    "    accel.execute_on_buffers()\n",
+    "    obuf_normal = np.empty_like(accel.obuf_packed_device)\n",
+    "    accel.copy_output_data_from_device(obuf_normal)\n",
+    "    # this line provides fast unpacking using numpy primitives\n",
+    "    # instead of using FINN's unpack functions\n",
+    "    quick_out = obuf_normal.view(np.uint16).reshape(accel.batch_size, 44)\n",
+    "    obuf_argmax = np.argmax(quick_out, axis=-1)\n",
+    "    ok_batch = (obuf_argmax == exp).sum()\n",
+    "    nok_batch = (batch_size-ok_batch)\n",
+    "    ok += ok_batch\n",
+    "    nok += nok_batch\n",
+    "    \n",
+    "    print(\"batch %d / %d : total OK %d NOK %d\" % (i+1, n_batches, ok, nok))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "acc = 100.0 * ok / (total)\n",
+    "print(\"Final accuracy: {}%\".format(acc))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Run built-in benchmarks"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "accel.batch_size = 100\n",
+    "accel.throughput_test()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}