diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
new file mode 100644
index 0000000000..88ae4659c8
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -0,0 +1,46 @@
+---
+name: Bug report
+about: Something isn't working as expected
+title: ''
+labels: bug
+assignees: ''
+
+---
+
+
+## Prerequisites
+Please make sure to check off these prerequisites before submitting a bug report.
+- [ ] Test that the bug appears on the current version of the dev-branch. Make sure to include the commit hash of the commit you checked out.
+- [ ] Check that the issue hasn't already been reported, by checking the currently open issues.
+- [ ] If there are steps to reproduce the problem, make sure to write them down below.
+- [ ] If relevant, please include the ONNX files, which were created directly before and/or after the bug.
+
+## Quick summary
+Please give a brief and concise description of the bug.
+
+## Details
+Please add to the following sections to describe the bug as accurately as possible.
+
+### Steps to Reproduce
+Add what needs to be done to reproduce the bug. Add code examples where useful
+and make sure to include the resulting ONNX files, and the commit hash you are working on.
+
+1. Clone the FINN repository
+2. Checkout the dev branch, with commit hash: [...]
+3. Start the docker container with the command: [...]
+4. Run transformation [...] on ONNX file [...] or run the dataflow builder with the following settings: [...]
+5. [Further steps ...]
+
+### Expected behavior
+Please add a brief description of what you expected to happen.
+
+### Actual behavior
+Describe what actually happens instead.
+
+## Optional
+
+### Possible fix
+If you already know where the issue stems from, or you have a hint please let us know.
+
+### Additional context
+Add any other context about the problem here.
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
new file mode 100644
index 0000000000..57e3d54952
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -0,0 +1,8 @@
+blank_issues_enabled: false
+contact_links:
+  - name: Getting started with FINN
+    url: https://finn.readthedocs.io/en/latest/getting_started.html
+    about: Documentation about how to get up and running with FINN.
+  - name: Ask for help and get in touch with the community
+    url: https://gitter.im/xilinx-finn/community
+    about: Check out our gitter channel, if you have a question about FINN or a general problem that is likely not a bug.
diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
new file mode 100644
index 0000000000..dfd71f4308
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -0,0 +1,27 @@
+---
+name: Feature request
+about: Suggest an idea for FINN
+title: ''
+labels: enhancement
+assignees: ''
+
+---
+
+## Prerequisites
+Please make sure to check that the idea is not already being worked on
+by looking at the currently open issues and the [project Kanbans](https://github.com/Xilinx/finn/projects).
+
+Even if an idea is already being worked on you can still create a feature request,
+if you would like to open a discussion about the feature or want to contribute to it.
+
+## Details
+Please add to the following sections to describe the feature as accurately as possible.
+
+### New behavior
+Please add a brief and concise description of what you would like to happen in FINN in the future.
+
+### Motivation
+Please tell us why this feature is important to the FINN community.
+
+### Parts of FINN affected
+Please describe which parts of FINN would be affected by this feature.
diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml
new file mode 100644
index 0000000000..4374111f22
--- /dev/null
+++ b/.github/workflows/docker-image.yml
@@ -0,0 +1,33 @@
+name: DockerImage
+
+on:
+  push:
+    branches:
+      - 'dev'
+
+jobs:
+  docker:
+    runs-on: ubuntu-18.04
+    steps:
+      -
+        name: checkout
+        uses: actions/checkout@v2
+      -
+        name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v1
+      -
+        name: Login to DockerHub
+        uses: docker/login-action@v1
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
+      - name: Build and push
+        uses: docker/build-push-action@v2
+        with:
+          file: docker/Dockerfile.finn
+          context: .
+          push: true
+          tags: maltanar/finn:dev_latest
+      -
+        name: Image digest
+        run: echo ${{ steps.docker_build.outputs.digest }}
diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
new file mode 100644
index 0000000000..2fbb9265be
--- /dev/null
+++ b/.github/workflows/pre-commit.yml
@@ -0,0 +1,22 @@
+name: Pre-commit
+
+on:
+  pull_request:
+    branches: [ main, dev ]
+  push:
+    branches: [ main, dev ]
+
+jobs:
+  lint:
+    name: Lint PR or Push to DEV
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+
+      - name: Setup Python
+        uses: actions/setup-python@v2
+
+      - name: Run Lint
+        uses: pre-commit/action@v2.0.0
diff --git a/.github/workflows/quicktest-dev-pr.yml b/.github/workflows/quicktest-dev-pr.yml
index 051fd506ca..80ac0b61e6 100644
--- a/.github/workflows/quicktest-dev-pr.yml
+++ b/.github/workflows/quicktest-dev-pr.yml
@@ -17,7 +17,37 @@ jobs:
       - name: checkout
         uses: actions/checkout@v2
 
+      - name: set up Docker Buildx
+        uses: docker/setup-buildx-action@v1
+
+      - name: cache Docker layers
+        uses: actions/cache@v2
+        with:
+          path: /tmp/.buildx-cache
+          key: ${{ runner.os }}-buildx-${{ github.sha }}
+          restore-keys: |
+            ${{ runner.os }}-buildx-
+
+      - name: Build and push
+        uses: docker/build-push-action@v2
+        with:
+          file: docker/Dockerfile.finn
+          context: .
+          push: false
+          load: true
+          tags: finn_gha
+          cache-from: type=local,src=/tmp/.buildx-cache
+          cache-to: type=local,dest=/tmp/.buildx-cache-new
+      -
+        # Temp fix
+        # https://github.com/docker/build-push-action/issues/252
+        # https://github.com/moby/buildkit/issues/1896
+        name: Move cache
+        run: |
+          rm -rf /tmp/.buildx-cache
+          mv /tmp/.buildx-cache-new /tmp/.buildx-cache
+
+
       - name: DockerRunQuicktest
         run: |
-          docker build -t finn_gha -f docker/Dockerfile.finn_ci --build-arg BUILD_PATH=/tmp/finn_gha .
           docker run --init --hostname finn_gha -v $(pwd):/workspace/finn -e FINN_BUILD_DIR=/tmp/finn_gha -e FINN_INST_NAME=finn_gha finn_gha quicktest.sh
diff --git a/.isort.cfg b/.isort.cfg
new file mode 100644
index 0000000000..6cfe1c8919
--- /dev/null
+++ b/.isort.cfg
@@ -0,0 +1,11 @@
+[settings]
+line_length=88
+indent='    '
+skip=.tox,.venv,build,dist
+known_standard_library=setuptools,pkg_resources
+known_test=pytest
+known_first_party=finn
+sections=FUTURE,STDLIB,TEST,THIRDPARTY,FIRSTPARTY,LOCALFOLDER
+default_section=THIRDPARTY
+multi_line_output=3
+profile=black
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index c513c5493d..143514b36b 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -27,28 +27,44 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 exclude: '^docs/conf.py'
+
 default_language_version:
     python: python3
+
 repos:
-# black
-- repo: https://github.com/ambv/black
-  rev: stable
-  hooks:
-  - id: black
-    language_version: python3
 - repo: git://github.com/pre-commit/pre-commit-hooks
-  rev: v2.2.3
+  rev: v3.2.0
   hooks:
   - id: trailing-whitespace
+    exclude: '\.dat$'
+  - id: check-added-large-files
   - id: check-ast
   - id: check-json
   - id: check-merge-conflict
   - id: check-xml
   - id: check-yaml
   - id: debug-statements
+    exclude: '^src/finn/builder/build_dataflow.py$'
   - id: end-of-file-fixer
   - id: requirements-txt-fixer
   - id: mixed-line-ending
     args: ['--fix=no']
+
+- repo: git://github.com/PyCQA/isort
+  rev: 5.5.3
+  hooks:
+  - id: isort
+
+- repo: git://github.com/psf/black
+  rev: stable
+  hooks:
+  - id: black
+    language_version: python3
+
+- repo: https://gitlab.com/pycqa/flake8
+  rev: 3.8.3
+  hooks:
   - id: flake8
-    args: ['--max-line-length=88']  # default of Black
+    # black-compatible flake-8 config
+    args: ['--max-line-length=88',  # black default
+           '--extend-ignore=E203']  # E203 is not PEP8 compliant
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
new file mode 100644
index 0000000000..3601fcdccf
--- /dev/null
+++ b/.readthedocs.yaml
@@ -0,0 +1,43 @@
+# Copyright (c) 2021, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# .readthedocs.yaml
+# Read the Docs configuration file
+# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
+
+version: 2
+
+sphinx:
+   configuration: docs/finn/conf.py
+
+python:
+   version: 3.7
+   install:
+    - method: pip
+      path: .
+      extra_requirements:
+        - docs
diff --git a/AUTHORS.rst b/AUTHORS.rst
index 533ed62e1d..1d42d35a3b 100644
--- a/AUTHORS.rst
+++ b/AUTHORS.rst
@@ -13,3 +13,12 @@ Contributors
 * Suranga Mahesh (@surangamh)
 * Peter Lehnhardt (@pete-lennart)
 * Neil Kim Nielsen (@neilkimn)
+* Jon Ander Lezeta (@jalezeta)
+* John Terry (@jterry-x)
+* Alina Vasilciuc (@alinavalinav)
+* Alessandro Pappalardo (@volcacius)
+* Giuseppe Franco (@Giuseppe5)
+* Syed Asad Alam (@asadalam)
+* Javier Duarte (@jmduarte)
+* Uma Maheshwari (@umav1511)
+* José Rosa (@pinxau1000)
diff --git a/README.md b/README.md
index 10ac25cb8f..f36eac3a91 100644
--- a/README.md
+++ b/README.md
@@ -24,9 +24,9 @@ Please see the [Getting Started](https://finn.readthedocs.io/en/latest/getting_s
 
 ## What's New in FINN?
 
+* **2021-11-05:** v0.7 is released, introducing QONNX support, three new example networks and many other improvements. Read more on the [v0.7 release blog post](https://xilinx.github.io/finn//2021/11/05/finn-v07-is-released.html).
 * **2021-06-15:** v0.6 is released, with ResNet-50 on U250 and ZCU104 MobileNet-v1 in finn-examples showcasing new features plus a lot more. Read more on the [v0.6 release blog post](https://xilinx.github.io/finn//2021/06/15/finn-v06-is-released.html).
 * **2020-12-17:** v0.5b (beta) is released, with a new [examples repo](https://github.com/Xilinx/finn-examples) including MobileNet-v1. Read more on the <a href="https://xilinx.github.io/finn/2020/12/17/finn-v05b-beta-is-released.html">release blog post</a>.
-* **2020-09-21:** v0.4b (beta) is released. Read more on the <a href="https://xilinx.github.io/finn/2020/09/21/finn-v04b-beta-is-released.html">release blog post</a>.
 
 ## Documentation
 
diff --git a/custom_hls/lookup.hpp b/custom_hls/lookup.hpp
new file mode 100644
index 0000000000..3001f6613e
--- /dev/null
+++ b/custom_hls/lookup.hpp
@@ -0,0 +1,60 @@
+/******************************************************************************
+* Copyright (c) 2021, Xilinx
+* All rights reserved.
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are met:
+*
+* * Redistributions of source code must retain the above copyright notice, this
+*   list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above copyright notice,
+*   this list of conditions and the following disclaimer in the documentation
+*   and/or other materials provided with the distribution.
+*
+* * Neither the name of FINN nor the names of its
+*   contributors may be used to endorse or promote products derived from
+*   this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ *******************************************************************************/
+
+#include <ap_int.h>
+#include <hls_stream.h>
+
+#ifndef LOOKUP_HPP
+#define LOOKUP_HPP
+
+template <
+    unsigned NumEmbeddings,
+    unsigned EmbeddingDim,
+    unsigned NumInputs,
+    typename InputType,
+    typename EmbeddingType,
+    typename InputPackedType = ap_uint<InputType::width>,
+    typename OutputPackedType = ap_uint<EmbeddingDim*EmbeddingType::width>>
+void StreamingLookup(
+    hls::stream<InputPackedType> &in,
+    hls::stream<OutputPackedType> &out,
+    OutputPackedType const embeddings[NumEmbeddings]
+) {
+    for(unsigned i = 0; i < NumInputs; i++) {
+#pragma HLS PIPELINE II=1
+        InputPackedType inPackedElem = in.read();
+        InputType inElem = *(reinterpret_cast<InputType*>(&inPackedElem));
+        OutputPackedType outElem = embeddings[inElem];
+        out.write(outElem);
+    }
+}
+
+#endif
diff --git a/docker/Dockerfile.finn_dev b/docker/Dockerfile.finn
similarity index 55%
rename from docker/Dockerfile.finn_dev
rename to docker/Dockerfile.finn
index 1feeb51a53..4d03e2fbb5 100644
--- a/docker/Dockerfile.finn_dev
+++ b/docker/Dockerfile.finn
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, Xilinx
+# Copyright (c) 2021, Xilinx
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -28,11 +28,9 @@
 
 FROM pytorch/pytorch:1.7.1-cuda11.0-cudnn8-runtime
 LABEL maintainer="Yaman Umuroglu <yamanu@xilinx.com>"
-ARG GID
-ARG GNAME
-ARG UNAME
-ARG UID
-ARG PASSWD
+
+# XRT version to be installed
+ARG XRT_DEB_VERSION="xrt_202010.2.7.766_18.04-amd64-xrt"
 
 WORKDIR /workspace
 
@@ -41,7 +39,6 @@ WORKDIR /workspace
 ENV TZ="Europe/Dublin"
 RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
 
-
 RUN apt-get update
 RUN apt-get -y upgrade
 RUN apt-get install -y build-essential
@@ -56,14 +53,24 @@ RUN apt-get install -y rsync
 RUN apt-get install -y git
 RUN apt-get install -y sshpass
 RUN apt-get install -y wget
+RUN apt-get install -y sudo
 RUN apt-get install -y unzip
 RUN apt-get install -y zip
 RUN echo "StrictHostKeyChecking no" >> /etc/ssh/ssh_config
 
+# install XRT
+RUN wget https://www.xilinx.com/bin/public/openDownload?filename=$XRT_DEB_VERSION.deb -O /tmp/$XRT_DEB_VERSION.deb
+RUN apt install -y /tmp/$XRT_DEB_VERSION.deb
+RUN rm /tmp/$XRT_DEB_VERSION.deb
+
+# versioned Python package requirements for FINN compiler
+# these are given in requirements.txt
 COPY requirements.txt .
 RUN pip install -r requirements.txt
 RUN rm requirements.txt
+# extra Python package dependencies (for testing and interaction)
 RUN pip install pygments==2.4.1
+RUN pip install ipykernel==5.5.5
 RUN pip install jupyter==1.0.0
 RUN pip install matplotlib==3.3.1 --ignore-installed
 RUN pip install pytest-dependency==0.5.1
@@ -71,67 +78,80 @@ RUN pip install sphinx==3.1.2
 RUN pip install sphinx_rtd_theme==0.5.0
 RUN pip install pytest-xdist==2.0.0
 RUN pip install pytest-parallel==0.1.0
-RUN pip install netron>=4.7.9
+RUN pip install "netron>=5.0.0"
 RUN pip install pandas==1.1.5
 RUN pip install scikit-learn==0.24.1
 RUN pip install tqdm==4.31.1
 RUN pip install -e git+https://github.com/fbcotter/dataset_loading.git@0.0.4#egg=dataset_loading
 
+# git-based Python repo dependencies
+# these are installed in editable mode for easier co-development
+ARG FINN_BASE_COMMIT="e8facdd719b55839cca46da2cc4f4a4a372afb41"
+ARG QONNX_COMMIT="9f9eff95227cc57aadc6eafcbd44b7acda89f067"
+ARG FINN_EXP_COMMIT="af6102769226b82b639f243dc36f065340991513"
+ARG BREVITAS_COMMIT="a5b71d6de1389d3e7db898fef72e014842670f03"
+ARG PYVERILATOR_COMMIT="0c3eb9343500fc1352a02c020a736c8c2db47e8e"
+ARG CNPY_COMMIT="4e8810b1a8637695171ed346ce68f6984e585ef4"
+ARG HLSLIB_COMMIT="966d17d3fddd801927b2167627d23a9a15ed1461"
+ARG OMX_COMMIT="1dfc4aa2f2895632742cd5751520c6b472feb74e"
+ARG AVNET_BDF_COMMIT="2d49cfc25766f07792c0b314489f21fe916b639b"
 
-# switch user
-RUN groupadd -g $GID $GNAME
-RUN useradd -M -u $UID $UNAME -g $GNAME
-RUN usermod -aG sudo $UNAME
-RUN echo "$UNAME:$PASSWD" | chpasswd
-RUN echo "root:$PASSWD" | chpasswd
-RUN chown -R $UNAME:$GNAME /workspace
-RUN ln -s /workspace /home/$UNAME
-USER $UNAME
-
-
-# cloning dependency repos (as user)
 # finn-base
 RUN git clone https://github.com/Xilinx/finn-base.git /workspace/finn-base
+RUN git -C /workspace/finn-base checkout $FINN_BASE_COMMIT
+RUN pip install -e /workspace/finn-base
+# Install qonnx without dependencies, currently its only dependency is finn-base
+RUN git clone https://github.com/fastmachinelearning/qonnx.git /workspace/qonnx
+RUN git -C /workspace/qonnx checkout $QONNX_COMMIT
+RUN pip install --no-dependencies -e /workspace/qonnx
 # finn-experimental
 RUN git clone https://github.com/Xilinx/finn-experimental.git /workspace/finn-experimental
-# Brevitas
+RUN git -C /workspace/finn-experimental checkout $FINN_EXP_COMMIT
+RUN pip install -e /workspace/finn-experimental
+# brevitas
 RUN git clone https://github.com/Xilinx/brevitas.git /workspace/brevitas
-# CNPY
+RUN git -C /workspace/brevitas checkout $BREVITAS_COMMIT
+RUN pip install -e /workspace/brevitas
+# pyverilator
+RUN git clone https://github.com/maltanar/pyverilator.git /workspace/pyverilator
+RUN git -C /workspace/pyverilator checkout $PYVERILATOR_COMMIT
+RUN pip install -e /workspace/pyverilator
+# other git-based dependencies (non-Python)
+# cnpy
 RUN git clone https://github.com/rogersce/cnpy.git /workspace/cnpy
-# FINN hlslib
+RUN git -C /workspace/cnpy checkout $CNPY_COMMIT
+# finn-hlslib
 RUN git clone https://github.com/Xilinx/finn-hlslib.git /workspace/finn-hlslib
-# PyVerilator
-RUN git clone https://github.com/maltanar/pyverilator /workspace/pyverilator
+RUN git -C /workspace/finn-hlslib checkout $HLSLIB_COMMIT
 # oh-my-xilinx
 RUN git clone https://bitbucket.org/maltanar/oh-my-xilinx.git /workspace/oh-my-xilinx
+RUN git -C /workspace/oh-my-xilinx checkout $OMX_COMMIT
+# board files
+RUN cd /tmp; \
+    wget -q https://github.com/cathalmccabe/pynq-z1_board_files/raw/master/pynq-z1.zip; \
+    wget -q https://dpoauwgwqsy2x.cloudfront.net/Download/pynq-z2.zip; \
+    unzip -q pynq-z1.zip; \
+    unzip -q pynq-z2.zip; \
+    mkdir /workspace/board_files; \
+    mv pynq-z1/ /workspace/board_files/; \
+    mv pynq-z2/ /workspace/board_files/; \
+    rm pynq-z1.zip; \
+    rm pynq-z2.zip; \
+    git clone https://github.com/Avnet/bdf.git /workspace/avnet-bdf; \
+    git -C /workspace/avnet-bdf checkout  $AVNET_BDF_COMMIT; \
+    mv /workspace/avnet-bdf/* /workspace/board_files/;
+
 
-# for this developer-oriented Docker container we assume the FINN repo is cloned and mounted from the host
-# at /workspace/finn -- see run-docker.sh for an example of how to do this.
-ENV PATH "${PATH}:/workspace/oh-my-xilinx:/home/$UNAME/.local/bin"
+# extra environment variables for FINN compiler
+ENV VIVADO_IP_CACHE "/tmp/vivado_ip_cache"
+ENV PATH "${PATH}:/workspace/oh-my-xilinx"
 ENV OHMYXILINX "/workspace/oh-my-xilinx"
 
-WORKDIR /home/$UNAME/finn
-RUN echo "PS1='\[\033[1;36m\]\u\[\033[1;31m\]@\[\033[1;32m\]\h:\[\033[1;35m\]\w\[\033[1;31m\]\$\[\033[0m\] '" >>  /home/$UNAME/.bashrc
-RUN echo "source \$VIVADO_PATH/settings64.sh" >> /home/$UNAME/.bashrc
+WORKDIR /workspace/finn
 
-# copy entrypoint script
-USER root
 COPY docker/finn_entrypoint.sh /usr/local/bin/
 COPY docker/quicktest.sh /usr/local/bin/
 RUN chmod 755 /usr/local/bin/finn_entrypoint.sh
 RUN chmod 755 /usr/local/bin/quicktest.sh
-# install vitis deps if required
-ARG INSTALL_XRT_DEPS
-ARG XRT_DEB_VERSION
-RUN if [ "$INSTALL_XRT_DEPS" = "1" ] ; then \
-    echo "Installing XRT: $XRT_DEB_VERSION"; \
-    wget https://www.xilinx.com/bin/public/openDownload?filename=$XRT_DEB_VERSION.deb -O /tmp/$XRT_DEB_VERSION.deb; \
-    apt install -y /tmp/$XRT_DEB_VERSION.deb; \
-  else \
-    echo "Skipping installation of XRT dependencies"; \
-  fi
-
-USER $UNAME
-
 ENTRYPOINT ["finn_entrypoint.sh"]
 CMD ["bash"]
diff --git a/docker/Dockerfile.finn_ci b/docker/Dockerfile.finn_ci
deleted file mode 100644
index c424891969..0000000000
--- a/docker/Dockerfile.finn_ci
+++ /dev/null
@@ -1,107 +0,0 @@
-# Copyright (c) 2020, Xilinx
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# * Redistributions of source code must retain the above copyright notice, this
-#   list of conditions and the following disclaimer.
-#
-# * Redistributions in binary form must reproduce the above copyright notice,
-#   this list of conditions and the following disclaimer in the documentation
-#   and/or other materials provided with the distribution.
-#
-# * Neither the name of FINN nor the names of its
-#   contributors may be used to endorse or promote products derived from
-#   this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-FROM pytorch/pytorch:1.1.0-cuda10.0-cudnn7.5-devel
-LABEL maintainer="Yaman Umuroglu <yamanu@xilinx.com>"
-
-WORKDIR /workspace
-
-# some Vitis deps require a timezone to be specified, which hangs in Docker
-# use workaround from https://grigorkh.medium.com/fix-tzdata-hangs-docker-image-build-cdb52cc3360d
-ENV TZ="Europe/Dublin"
-RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
-
-RUN apt-get update
-RUN apt-get -y upgrade
-RUN apt-get install -y build-essential
-RUN apt-get install -y libglib2.0-0
-RUN apt-get install -y libsm6
-RUN apt-get install -y libxext6
-RUN apt-get install -y libxrender-dev
-RUN apt-get install -y verilator
-RUN apt-get install -y nano
-RUN apt-get install -y zsh
-RUN apt-get install -y rsync
-RUN apt-get install -y git
-RUN apt-get install -y sshpass
-RUN apt-get install -y wget
-RUN apt-get install -y unzip
-RUN apt-get install -y zip
-RUN echo "StrictHostKeyChecking no" >> /etc/ssh/ssh_config
-
-# XRT deps
-# install vitis deps if required
-ARG INSTALL_XRT_DEPS="0"
-ARG XRT_DEB_VERSION="xrt_202010.2.7.766_18.04-amd64-xrt"
-RUN if [ "$INSTALL_XRT_DEPS" = "1" ] ; then \
-    echo "Installing XRT: $XRT_DEB_VERSION"; \
-    wget https://www.xilinx.com/bin/public/openDownload?filename=$XRT_DEB_VERSION.deb -O /tmp/$XRT_DEB_VERSION.deb; \
-    apt install -y /tmp/$XRT_DEB_VERSION.deb; \
-  else \
-    echo "Skipping installation of XRT dependencies"; \
-  fi
-
-# cloning dependency repos
-# finn-base
-RUN git clone https://github.com/Xilinx/finn-base.git /workspace/finn-base
-# finn-experimental
-RUN git clone https://github.com/Xilinx/finn-experimental.git /workspace/finn-experimental
-# Brevitas
-RUN git clone https://github.com/Xilinx/brevitas.git /workspace/brevitas
-# CNPY
-RUN git clone https://github.com/rogersce/cnpy.git /workspace/cnpy
-# FINN hlslib
-RUN git clone https://github.com/Xilinx/finn-hlslib.git /workspace/finn-hlslib
-# PyVerilator
-RUN git clone https://github.com/maltanar/pyverilator /workspace/pyverilator
-# oh-my-xilinx
-RUN git clone https://bitbucket.org/maltanar/oh-my-xilinx.git /workspace/oh-my-xilinx
-
-COPY requirements.txt .
-RUN pip install -r requirements.txt
-RUN rm requirements.txt
-RUN pip install pytest-dependency
-RUN pip install pytest-xdist
-RUN pip install pytest-parallel
-RUN pip install -e git+https://github.com/fbcotter/dataset_loading.git@0.0.4#egg=dataset_loading
-
-ENV VIVADO_IP_CACHE "/tmp/vivado_ip_cache"
-ENV PATH "${PATH}:/workspace/oh-my-xilinx"
-ENV OHMYXILINX "/workspace/oh-my-xilinx"
-
-# colorful terminal output
-RUN echo "PS1='\[\033[1;36m\]\u\[\033[1;31m\]@\[\033[1;32m\]\h:\[\033[1;35m\]\w\[\033[1;31m\]\$\[\033[0m\] '" >>  /root/.bashrc
-
-WORKDIR /workspace/finn
-
-COPY docker/finn_entrypoint.sh /usr/local/bin/
-COPY docker/quicktest.sh /usr/local/bin/
-RUN chmod 755 /usr/local/bin/finn_entrypoint.sh
-RUN chmod 755 /usr/local/bin/quicktest.sh
-ENTRYPOINT ["finn_entrypoint.sh"]
-CMD ["bash"]
diff --git a/docker/finn_entrypoint.sh b/docker/finn_entrypoint.sh
index 7edb3f9a9e..a2312d025b 100644
--- a/docker/finn_entrypoint.sh
+++ b/docker/finn_entrypoint.sh
@@ -1,12 +1,48 @@
 #!/bin/bash
+# Copyright (c) 2021, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
 
-export SHELL=/bin/bash
 export FINN_ROOT=/workspace/finn
+export HOME=/tmp/home_dir
+export SHELL=/bin/bash
+# colorful terminal output
+export PS1='\[\033[1;36m\]\u\[\033[1;31m\]@\[\033[1;32m\]\h:\[\033[1;35m\]\w\[\033[1;31m\]\$\[\033[0m\] '
 
+YELLOW='\033[0;33m'
 GREEN='\033[0;32m'
 RED='\033[0;31m'
 NC='\033[0m' # No Color
 
+yecho () {
+  echo -e "${YELLOW}WARNING: $1${NC}"
+}
+
 gecho () {
   echo -e "${GREEN}$1${NC}"
 }
@@ -15,97 +51,44 @@ recho () {
   echo -e "${RED}ERROR: $1${NC}"
 }
 
-# checkout the correct dependency repo commits
-# the repos themselves are cloned in the Dockerfile
-FINN_BASE_COMMIT=ac0b86a63eb937b869bfa453a996a8a8b8506546
-FINN_EXP_COMMIT=f82c0d9868bb88ea045dfadb28508d327d287221
-BREVITAS_COMMIT=d7ded80fa9557da2998ea310669edee7fb2d9526
-CNPY_COMMIT=4e8810b1a8637695171ed346ce68f6984e585ef4
-HLSLIB_COMMIT=4d74baefa79df48b5a0348d63f39a26df075de51
-PYVERILATOR_COMMIT=e2ff74030de3992dcac54bf1b6aad2915946e8cb
-OMX_COMMIT=1bae737669901e762f581af73348332b5c4b2ada
-AVNET_BDF_COMMIT=2d49cfc25766f07792c0b314489f21fe916b639b
-
-gecho "Setting up known-good commit versions for FINN dependencies"
-# finn-base
-gecho "finn-base @ $FINN_BASE_COMMIT"
-git -C /workspace/finn-base pull --quiet
-git -C /workspace/finn-base checkout $FINN_BASE_COMMIT --quiet
-pip install --user -e /workspace/finn-base
-# finn-experimental
-gecho "finn-experimental @ $FINN_EXP_COMMIT"
-git -C /workspace/finn-experimental pull --quiet
-git -C /workspace/finn-experimental checkout $FINN_EXP_COMMIT --quiet
-pip install --user -e /workspace/finn-experimental
-# Brevitas
-gecho "brevitas @ $BREVITAS_COMMIT"
-git -C /workspace/brevitas pull --quiet
-git -C /workspace/brevitas checkout $BREVITAS_COMMIT --quiet
-pip install --user -e /workspace/brevitas
-# CNPY
-gecho "cnpy @ $CNPY_COMMIT"
-git -C /workspace/cnpy pull --quiet
-git -C /workspace/cnpy checkout $CNPY_COMMIT --quiet
-# FINN hlslib
-gecho "finn-hlslib @ $HLSLIB_COMMIT"
-git -C /workspace/finn-hlslib pull --quiet
-git -C /workspace/finn-hlslib checkout $HLSLIB_COMMIT --quiet
-# PyVerilator
-gecho "PyVerilator @ $PYVERILATOR_COMMIT"
-git -C /workspace/pyverilator pull --quiet
-git -C /workspace/pyverilator checkout $PYVERILATOR_COMMIT --quiet
-pip install --user -e /workspace/pyverilator
-# oh-my-xilinx
-gecho "oh-my-xilinx @ $OMX_COMMIT"
-git -C /workspace/oh-my-xilinx pull --quiet
-git -C /workspace/oh-my-xilinx checkout $OMX_COMMIT --quiet
-# remove old version egg-info, if any
-rm -rf $FINN_ROOT/src/FINN.egg-info
-# run pip install for finn
-pip install --user -e $FINN_ROOT
-
-if [ ! -z "$VIVADO_PATH" ];then
-  # source Vivado env.vars
-  export XILINX_VIVADO=$VIVADO_PATH
-  source $VIVADO_PATH/settings64.sh
+if [ -f "$FINN_ROOT/setup.py" ];then
+  # run pip install for finn
+  pip install --user -e $FINN_ROOT
+else
+  recho "Unable to find FINN source code in /workspace/finn"
+  recho "Ensure you have passed -v <path-to-finn-repo>:/workspace/finn to the docker run command"
+  exit -1
 fi
 
-# download PYNQ board files if not already there
-if [ ! -d "/workspace/finn/board_files" ]; then
-    gecho "Downloading PYNQ board files for Vivado"
-    OLD_PWD=$(pwd)
-    cd /workspace/finn
-    wget -q https://github.com/cathalmccabe/pynq-z1_board_files/raw/master/pynq-z1.zip
-    wget -q https://d2m32eurp10079.cloudfront.net/Download/pynq-z2.zip
-    unzip -q pynq-z1.zip
-    unzip -q pynq-z2.zip
-    mkdir /workspace/finn/board_files
-    mv pynq-z1/ board_files/
-    mv pynq-z2/ board_files/
-    rm pynq-z1.zip
-    rm pynq-z2.zip
-    cd $OLD_PWD
-fi
-if [ ! -d "/workspace/finn/board_files/ultra96v2" ]; then
-    gecho "Downloading Avnet BDF files from known-good commit into board_files"
-    OLD_PWD=$(pwd)
-    cd /workspace/finn
-    git clone https://github.com/Avnet/bdf.git
-    git -C /workspace/finn/bdf checkout $AVNET_BDF_COMMIT --quiet
-    mv /workspace/finn/bdf/* /workspace/finn/board_files/
-    rm -rf /workspace/finn/bdf
-    cd $OLD_PWD
-fi
-if [ ! -z "$VITIS_PATH" ];then
+if [ -f "$VITIS_PATH/settings64.sh" ];then
   # source Vitis env.vars
   export XILINX_VITIS=$VITIS_PATH
   export XILINX_XRT=/opt/xilinx/xrt
   source $VITIS_PATH/settings64.sh
-  if [ ! -z "$XILINX_XRT" ];then
+  gecho "Found Vitis at $VITIS_PATH"
+  if [ -f "$XILINX_XRT/setup.sh" ];then
     # source XRT
     source $XILINX_XRT/setup.sh
+    gecho "Found XRT at $XILINX_XRT"
   else
     recho "XRT not found on $XILINX_XRT, did the installation fail?"
+    exit -1
+  fi
+else
+  yecho "Unable to find $VITIS_PATH/settings64.sh"
+  yecho "Functionality dependent on Vitis will not be available."
+  yecho "If you need Vitis, ensure VITIS_PATH is set correctly and mounted into the Docker container."
+  if [ -f "$VIVADO_PATH/settings64.sh" ];then
+    # source Vivado env.vars
+    export XILINX_VIVADO=$VIVADO_PATH
+    source $VIVADO_PATH/settings64.sh
+    gecho "Found Vivado at $VIVADO_PATH"
+  else
+    yecho "Unable to find $VIVADO_PATH/settings64.sh"
+    yecho "Functionality dependent on Vivado will not be available."
+    yecho "If you need Vivado, ensure VIVADO_PATH is set correctly and mounted into the Docker container."
   fi
 fi
+
+# execute the provided command(s) as root
 exec "$@"
diff --git a/docker/Dockerfile.jenkins b/docker/jenkins/Dockerfile.jenkins
similarity index 100%
rename from docker/Dockerfile.jenkins
rename to docker/jenkins/Dockerfile.jenkins
diff --git a/docker/Jenkinsfile b/docker/jenkins/Jenkinsfile
similarity index 85%
rename from docker/Jenkinsfile
rename to docker/jenkins/Jenkinsfile
index b2d3102bd4..f321194189 100644
--- a/docker/Jenkinsfile
+++ b/docker/jenkins/Jenkinsfile
@@ -2,7 +2,8 @@ pipeline {
     agent any
     parameters {
         string(name: 'FINN_CI_BRANCH', defaultValue: '', description: 'FINN branch to build')
-        string(name: 'VIVADO_PATH', defaultValue: '', description: 'Path to Vivado installation')
+        string(name: 'FINN_XILINX_PATH', defaultValue: '', description: 'Path to Xilinx tool installation')
+        string(name: 'FINN_XILINX_VERSION', defaultValue: '2020.1', description: 'Xilinx tool version')
         string(name: 'PYNQ_BOARD', defaultValue: 'Pynq-Z1', description: 'PYNQ board type')
         string(name: 'PYNQ_IP', defaultValue: '', description: 'PYNQ board IP address')
         string(name: 'PYNQ_USERNAME', defaultValue: 'xilinx', description: 'PYNQ board username')
@@ -22,6 +23,8 @@ pipeline {
         DOCKER_TAG='finn_ci:$BUILD_ID'
         DOCKER_INST_NAME='finn_ci'
         BUILD_PATH='/tmp/finn_ci'
+        VIVADO_PATH=${params.FINN_XILINX_PATH}/Vivado/${params.FINN_XILINX_VERSION}
+        VITIS_PATH=${params.FINN_XILINX_PATH}/Vitis/${params.FINN_XILINX_VERSION}
     }
     stages {
         stage("Clone") {
@@ -45,10 +48,11 @@ pipeline {
                 docker run --init \
                 --hostname $DOCKER_INST_NAME \
                 -v ${params.WORKSPACE_MOUNT}:/workspace/finn \
-                -v ${params.VIVADO_PATH}:${params.VIVADO_PATH}:ro \
+                -v ${params.FINN_XILINX_PATH}:${params.FINN_XILINX_PATH}:ro \
                 -e NUM_DEFAULT_WORKERS=1 \
                 -e FINN_INST_NAME=$DOCKER_INST_NAME \
-                -e VIVADO_PATH=${params.VIVADO_PATH} \
+                -e VIVADO_PATH=$VIVADO_PATH \
+                -e VITIS_PATH=$VITIS_PATH \
                 -e PYNQ_BOARD=${params.PYNQ_BOARD} \
                 -e PYNQ_IP=${params.PYNQ_IP} \
                 -e PYNQ_USERNAME=${params.PYNQ_USERNAME} \
@@ -65,10 +69,11 @@ pipeline {
                 docker run --init \
                 --hostname $DOCKER_INST_NAME \
                 -v ${params.WORKSPACE_MOUNT}:/workspace/finn \
-                -v ${params.VIVADO_PATH}:${params.VIVADO_PATH}:ro \
+                -v $VIVADO_PATH:$VIVADO_PATH:ro \
                 -e NUM_DEFAULT_WORKERS=1 \
                 -e FINN_INST_NAME=$DOCKER_INST_NAME \
-                -e VIVADO_PATH=${params.VIVADO_PATH} \
+                -e VIVADO_PATH=$VIVADO_PATH \
+                -e VITIS_PATH=$VITIS_PATH \
                 -e PYNQ_BOARD=${params.PYNQ_BOARD} \
                 -e PYNQ_IP=${params.PYNQ_IP} \
                 -e PYNQ_USERNAME=${params.PYNQ_USERNAME} \
@@ -85,10 +90,11 @@ pipeline {
                 docker run --init \
                 --hostname $DOCKER_INST_NAME \
                 -v ${params.WORKSPACE_MOUNT}:/workspace/finn \
-                -v ${params.VIVADO_PATH}:${params.VIVADO_PATH}:ro \
+                -v $VIVADO_PATH:$VIVADO_PATH:ro \
                 -e NUM_DEFAULT_WORKERS=${params.NUM_DEFAULT_WORKERS} \
                 -e FINN_INST_NAME=$DOCKER_INST_NAME \
-                -e VIVADO_PATH=${params.VIVADO_PATH} \
+                -e VIVADO_PATH=$VIVADO_PATH \
+                -e VITIS_PATH=$VITIS_PATH \
                 -e PYNQ_BOARD=${params.PYNQ_BOARD} \
                 -e PYNQ_IP=${params.PYNQ_IP} \
                 -e PYNQ_USERNAME=${params.PYNQ_USERNAME} \
diff --git a/docker/launch-jenkins.sh b/docker/jenkins/launch-jenkins.sh
similarity index 100%
rename from docker/launch-jenkins.sh
rename to docker/jenkins/launch-jenkins.sh
diff --git a/docs/finn/brevitas_export.rst b/docs/finn/brevitas_export.rst
index 65f6ab6b30..408b14fd2b 100644
--- a/docs/finn/brevitas_export.rst
+++ b/docs/finn/brevitas_export.rst
@@ -8,7 +8,13 @@ Brevitas Export
    :scale: 70%
    :align: center
 
-FINN expects an ONNX model as input. This can be a model trained with `Brevitas <https://github.com/Xilinx/brevitas>`_. Brevitas is a PyTorch library for quantization-aware training and the FINN Docker image comes with several `example Brevitas networks <https://github.com/Xilinx/brevitas/tree/master/brevitas_examples/bnn_pynq>`_. Brevitas provides an export of a quantized network in ONNX representation. The resulting model consists only of `ONNX standard nodes <https://github.com/onnx/onnx/blob/master/docs/Operators.md>`_, but also contains additional attributes for the ONNX nodes to represent low precision datatypes. To work with the model it is wrapped into :ref:`modelwrapper` provided by FINN.
+FINN expects an ONNX model as input. This can be a model trained with `Brevitas <https://github.com/Xilinx/brevitas>`_. Brevitas is a PyTorch library for quantization-aware training and the FINN Docker image comes with several `example Brevitas networks <https://github.com/Xilinx/brevitas/tree/master/brevitas_examples/bnn_pynq>`_. Brevitas provides an export of a quantized network in ONNX representation in several flavors.
+Two of the Brevitas-exported ONNX variants can be ingested by FINN:
+
+   * FINN-ONNX: Quantized weights exported as tensors with additional attributes to mark low-precision datatypes. Quantized activations exported as MultiThreshold nodes.
+   * QONNX: All quantization is represented using Quant, BinaryQuant or Trunc nodes. QONNX must be converted into FINN-ONNX by :py:mod:`finn.transformation.qonnx.convert_qonnx_to_finn`
+
+To work with either type of ONNX model, it is loaded into a :ref:`modelwrapper` provided by FINN.
 
 At this stage we can already use the functional verification flow to simulate the model using Python, this is marked in the graphic with the dotted arrow. For more details please have look at :ref:`verification`.
 
diff --git a/docs/finn/conf.py b/docs/finn/conf.py
index 1bd179c3f7..47ba99fb5f 100644
--- a/docs/finn/conf.py
+++ b/docs/finn/conf.py
@@ -12,14 +12,15 @@
 #
 import os
 import sys
-sys.path.insert(0, os.path.abspath('../../src/'))
+
+sys.path.insert(0, os.path.abspath("../../src/"))
 
 
 # -- Project information -----------------------------------------------------
 
-project = 'FINN'
-copyright = '2020, Xilinx'
-author = 'Y. Umuroglu and J. Petri-Koenig'
+project = "FINN"
+copyright = "2020, Xilinx"
+author = "Y. Umuroglu and J. Petri-Koenig"
 
 
 # -- General configuration ---------------------------------------------------
@@ -27,17 +28,17 @@
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
-extensions = [
-]
-extensions.append('sphinx.ext.autodoc')
+extensions = []
+extensions.append("sphinx.ext.autodoc")
+extensions.append("sphinx.ext.autosectionlabel")
 
 # Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
+templates_path = ["_templates"]
 
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
 # This pattern also affects html_static_path and html_extra_path.
-exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
 
 
 # -- Options for HTML output -------------------------------------------------
@@ -45,11 +46,11 @@
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
 #
-html_theme = 'sphinx_rtd_theme'
+html_theme = "sphinx_rtd_theme"
 
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
+html_static_path = ["_static"]
 
-master_doc = 'index'
+master_doc = "index"
diff --git a/docs/finn/developers.rst b/docs/finn/developers.rst
index 6e7fa0d920..508cd86a31 100644
--- a/docs/finn/developers.rst
+++ b/docs/finn/developers.rst
@@ -7,7 +7,7 @@ Developer documentation
 This page is intended to serve as a starting point for new FINN developers.
 Power users may also find this information useful.
 
-Getting started
+Prerequisites
 ================
 
 Before starting to do development on FINN it's a good idea to start
diff --git a/docs/finn/faq.rst b/docs/finn/faq.rst
index 093344e703..e426bdb4e2 100644
--- a/docs/finn/faq.rst
+++ b/docs/finn/faq.rst
@@ -4,68 +4,109 @@
 Frequently Asked Questions
 ***********************
 
-.. note:: **This page is under construction.**
+Can't find the answer to your question here? Check `FINN GitHub Discussions <https://github.com/Xilinx/finn/discussions>`_.
 
-Can I install FINN out of the Docker container?
-===============================================
 
-We do not support out of the Docker implementations at the moment. This is due 
-to the high complexity of the FINN project dependencies.
+Can I install FINN out of the Docker container?
+    We do not support out of the Docker implementations at the moment. This is due
+    to the high complexity of the FINN project dependencies.
 
 Since FINN uses ONNX, can I compile any model from the ONNX Model Zoo to an FPGA accelerator?
-=============================================================================================
+    The short answer is no. FINN uses ONNX in a specific (non-standard) way, including custom layer
+    types and quantization annotations. Networks must be first quantized using Brevitas and exported
+    to FINN-ONNX to be converted to FPGA accelerators.
 
-The short answer is no. FINN uses ONNX in a specific (non-standard) way, including custom layer 
-types and quantization annotations. Networks must be first quantized using Brevitas and exported
-to FINN-ONNX to be converted to FPGA accelerators.
 
+Can I install FINN out of the Docker container?
+    We do not support out of the Docker implementations at the moment. This is due
+    to the high complexity of the FINN project dependencies.
 
-Can I deploy custom NNs with arbitrary precisions and layers using FINN? 
-=========================================================================
+Since FINN uses ONNX, can I compile any model from the ONNX Model Zoo to an FPGA accelerator?
+    The short answer is no. FINN uses ONNX in a specific (non-standard) way, including custom layer
+    types and quantization annotations. Networks must be first quantized using Brevitas and exported
+    to FINN-ONNX to be converted to FPGA accelerators.
 
-Yes, though the effort required and quality of results will vary.
-Although we do support arbitrary 
-precision, the way we create the hardware isn't typically practical for more than 
-4 bits, or very large networks for a single FPGA. 
-In terms of layers, only a subset of quantized layers covered by the various FINN examples 
-are currently supported.
-It is possible to add support for new layers, though we don't have tutorials for this in place
-just yet.
 
-Does FINN only work with the example networks?
-==============================================
+Can I deploy custom NNs with arbitrary precisions and layers using FINN?
+    Yes, though the effort required and quality of results will vary.
+    Although we do support arbitrary
+    precision, the way we create the hardware isn't typically practical for more than
+    4 bits, or very large networks for a single FPGA.
+    In terms of layers, only a subset of quantized layers covered by the various FINN examples
+    are currently supported.
+    It is possible to add support for new layers, though we don't have tutorials for this in place
+    just yet.
 
-FINN isn't restricted to the example networks; 
-rather, it's restricted to certain patterns (e.g. certain layer types and their combinations). 
-The current best practice for custom networks is to take a working network and gradually modify it. 
+Does FINN only work with the example networks?
+    FINN isn't restricted to the example networks;
+    rather, it's restricted to certain patterns (e.g. certain layer types and their combinations).
+    The current best practice for custom networks is to take a working network and gradually modify it.
 
 What is the expected background for using FINN?
-===============================================
-
-Some general knowledge of Python, Docker, machine learning with neural networks and Jupyter notebooks
-is expected.
-Our goal is to make the tool in a shape and form so that no hardware/FPGA background 
-should be necessary, although having some knowledge would give better results.
+    Some general knowledge of Python, Docker, machine learning with neural networks and Jupyter notebooks
+    is expected.
+    Our goal is to make the tool in a shape and form so that no hardware/FPGA background
+    should be necessary, although having some knowledge would give better results.
 
 What operating systems are supported by FINN?
-=============================================
-
-FINN should work fine under any Linux-based OS capable of running Vivado/Vitis, as long
-as you install Docker (``docker-ce``) on your machine .
+    FINN should work fine under any Linux-based OS capable of running Vivado/Vitis, as long
+    as you install Docker (``docker-ce``) on your machine.
 
 
 I am getting DocNav and Model_Composer errors when launching the Docker image.
-==============================================================================
-
-We do not mount those particular directories into the Docker container because they are not
-used. The errors are Vivado related but you can safely ignore them.
+    We do not mount those particular directories into the Docker container because they are not
+    used. The errors are Vivado related but you can safely ignore them.
 
 What board do you recommend to start working with FINN?
-=======================================================
-
-Our preferred target platforms are those supported by  `PYNQ <http://www.pynq.io/board.html>`_.
-For those boards we can offer end-to-end (DNN-to-bitstream) deployment,
-see the `finn-examples <https://github.com/Xilinx/finn-examples>`_ repository for some examples.
-However, FINN also supports Vivado IP Integrator designs. The IPs connect using AXI stream (FIFO) 
-in-and-out interfaces. This means that it can be integrated onto any Xilinx FPGA board,
-though you will have to do the system integration manually.
+    Our preferred target platforms are those supported by  `PYNQ <http://www.pynq.io/board.html>`_.
+    For those boards we can offer end-to-end (DNN-to-bitstream) deployment,
+    see the `finn-examples <https://github.com/Xilinx/finn-examples>`_ repository for some examples.
+    However, FINN also supports Vivado IP Integrator designs. The IPs connect using AXI stream (FIFO)
+    in-and-out interfaces. This means that it can be integrated onto any Xilinx FPGA board,
+    though you will have to do the system integration manually.
+
+FINN-generated builds break after I restart my computer, because ``/tmp`` gets wiped.
+    See https://github.com/Xilinx/finn/discussions/404
+
+How can I target an arbitrary Xilinx FPGA without PYNQ support?
+    See https://github.com/Xilinx/finn/discussions/387
+
+Why does FINN-generated architectures need FIFOs between layers?
+    See https://github.com/Xilinx/finn/discussions/383
+
+How do I tell FINN to utilize DSPs instead of LUTs for MAC operations in particular layers?
+    This is done with the ``resType="dsp"`` attribute on ``StreamingFCLayer`` and ``Vector_Vector_Activate`` instances.
+    When using the ``build_dataflow`` system, this can be specified at a per layer basis by specifying it as part of one or more layers’
+    folding config (:py:mod:`finn.builder.build_dataflow_config.DataflowBuildConfig.folding_config_file`).
+    This is a good idea for layers with more weight/input act bits and high PE*SIMD.
+    See the `MobileNet-v1 build config for ZCU104 in finn-examples <https://github.com/Xilinx/finn-examples/blob/main/build/mobilenet-v1/folding_config/ZCU104_folding_config.json#L15>`_ for reference.
+
+
+How do I tell FINN to utilize a particular type of memory resource in particular layers?
+    This is done with the ``ram_style`` attribute. Check the particular ``HLSCustomOp`` attribute definition to see
+    which modes are supported (`example for StreamingFCLayer <https://github.com/Xilinx/finn/blob/dev/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py#L95>`_).
+    When using the ``build_dataflow`` system, this can be specified at a per layer basis by specifying it as part of one or more layers’
+    folding config (:py:mod:`finn.builder.build_dataflow_config.DataflowBuildConfig.folding_config_file`).
+    See the `MobileNet-v1 build config for ZCU104 in finn-examples <https://github.com/Xilinx/finn-examples/blob/main/build/mobilenet-v1/folding_config/ZCU104_folding_config.json#L15>`_ for reference.
+
+Which data layout do FINN-generated accelerators use? Big-endian? Little-endian?
+    The data layout used by FINN does not correspond to system-level big or little endian due to difficulties in defining what
+    the “word size” is and bit packing for smaller datatypes. FINN’s “word size” is dependent on the parallelization of the
+    first/last layers. For instance, if the first HLS layer is using SIMD=3 this means the “innermost dimension” in the
+    data packing functions will be of size 3.
+    When you use the verification infrastructure or the generated PYNQ Python drivers that FINN provides, the tool normally
+    takes care of any required data layout conversion on standard numpy arrays before presenting the data to the accelerator,
+    and vice versa on the output side. Doing this data packing and layout conversion manually can be messy at the moment.
+    If you need to do this manually, first examine how the `FINN PYNQ Python drivers <https://github.com/Xilinx/finn-examples/blob/main/finn_examples/driver.py#L379>`_ do this – notice how the input data is
+    first reshaped to create the “folded input shape” that reflects the word size of the first layer based on how much it
+    was parallelized, then data packing is applied to obtain a raw byte array (with some reversals going on) that can be
+    fed directly to the hardware. Another example of this is the `npy_to_rtlsim_input <https://github.com/Xilinx/finn-base/blob/dev/src/finn/util/data_packing.py#L289>`_ function, which converts npy arrays to lists of Python arbitrary-precision integers that we feed into pyverilator for rtl simulation:
+
+Why does FIFO sizing take so long for my network? Is something wrong?
+    The automatic FIFO sizing in FINN can take quite long. It unfortunately doesn’t really parallelize on multiple cores since
+    it’s based on running an rtl simulation with lots of inputs and very large FIFOs, then observing the max occupancy/count
+    in each FIFO.
+
+What's a good starting point for the folding configuration if I want to make manual changes?
+    First, enable automatic folding options in ``build_dataflow`` such ``target_fps``. This should find a decent set of
+    folding factors and save them to ``output_folder/auto_folding_config.json`` which you can use as a basis for creating the desired config.
diff --git a/docs/finn/getting_started.rst b/docs/finn/getting_started.rst
index f7ca6af31c..af7a05751b 100644
--- a/docs/finn/getting_started.rst
+++ b/docs/finn/getting_started.rst
@@ -4,90 +4,65 @@
 Getting Started
 ***************
 
-How to use the FINN compiler
-============================
-Currently, it's best to think of the FINN compiler as *compiler infrastructure*
-instead of a full *compiler* like `gcc` (although the aim is to get there).
-Although we provide a :ref:`command_line` entry for building dataflow
-accelerators, this only exposes a basic flow that works for simpler networks.
-A better way of looking at the FINN compiler is as a collection of scripts/tools that will help
-you convert a QNN into a custom FPGA accelerator that performs high-performance inference.
-
-**So where do I get started?** The best way of getting started with the FINN
-compiler is to follow the existing
-`Jupyter notebooks <tutorials>`_ and check out the prebuilt
-`examples <https://github.com/Xilinx/finn-examples>`_.
-
-**How do I compile my custom network?**
-This depends on how similar your custom network is to the examples we provide.
+Quickstart
+==========
+
+1. Install Docker to run `without root <https://docs.docker.com/engine/install/linux-postinstall/#manage-docker-as-a-non-root-user>`_
+2. Set up ``FINN_XILINX_PATH`` and ``FINN_XILINX_VERSION`` environment variables pointing respectively to the Xilinx tools installation directory and version (e.g. ``FINN_XILINX_PATH=/opt/Xilinx`` and ``FINN_XILINX_VERSION=2020.1``)
+3. Clone the FINN compiler from the repo: ``git clone https://github.com/Xilinx/finn/`` and go into the directory where it is cloned
+4. Execute ``./run-docker.sh quicktest`` to verify your installation.
+5. Optionally, follow the instructions on :ref:`PYNQ board first-time setup` or :ref:`Alveo first-time setup` for board setup.
+6. Optionally, set up a `Vivado/Vitis license`_.
+7. All done! See :ref:`Running FINN in Docker` for the various options on how to run the FINN compiler.
+
+
+How do I use FINN?
+==================
+
+We strongly recommend that you first watch one of the pre-recorded `FINN tutorial <https://www.youtube.com/watch?v=zw2aG4PhzmA&amp%3Bindex=2>`_
+videos, then follow the Jupyter notebook tutorials for `training and deploying an MLP for network intrusion detection <https://github.com/Xilinx/finn/tree/master/notebooks/end2end_example/cybersecurity>`_ .
+You may also want to check out the other :ref:`tutorials`, and the `FINN examples repository <https://github.com/Xilinx/finn-examples>`_ .
+
+Our aim in FINN is *not* to accelerate common off-the-shelf neural networks, but instead provide you with a set of tools
+to train *customized* networks and create highly-efficient FPGA implementations from them.
+In general, the approach for using the FINN framework is as follows:
+
+1. Train your own quantized neural network (QNN) in `Brevitas <https://github.com/Xilinx/brevitas>`_. We have some `guidelines <https://bit.ly/finn-hls4ml-qat-guidelines>`_ on quantization-aware training (QAT).
+2. Export to FINN-ONNX by following `this tutorial <https://github.com/Xilinx/finn/blob/master/notebooks/basics/1_brevitas_network_import.ipynb>`_ .
+3. Use FINN's ``build_dataflow`` system on the exported model by following this `tutorial <https://github.com/Xilinx/finn/blob/master/notebooks/end2end_example/cybersecurity/3-build-accelerator-with-finn.ipynb>`_
+4. Adjust your QNN topology, quantization settings and ``build_dataflow`` configuration to get the desired results.
+
+Please note that the framework is still under development, and how well this works will depend on how similar your custom network is to the examples we provide.
 If there are substantial differences, you will most likely have to write your own
 Python scripts that call the appropriate FINN compiler
 functions that process your design correctly, or adding new functions (including
 Vivado HLS layers)
 as required.
-For custom networks, we recommend making a copy of the end-to-end
-Jupyter notebook as a starting point, visualizing the model at intermediate
+The `advanced FINN tutorials <https://github.com/Xilinx/finn/tree/master/notebooks/advanced>`_ can be useful here.
+For custom networks, we recommend making a copy of the `BNN-PYNQ end-to-end
+Jupyter notebook tutorials <https://github.com/Xilinx/finn/tree/master/notebooks/end2end_example/bnn-pynq>`_ as a starting point, visualizing the model at intermediate
 steps and adding calls to new transformations as needed.
 Once you have a working flow, you can implement a command line entry for this
 by using the "advanced mode" described in the :ref:`command_line` section.
 
-
-
-
-System Requirements
-====================
-
-* Ubuntu 18.04 with ``bash`` installed
-* Docker `without root <https://docs.docker.com/engine/install/linux-postinstall/#manage-docker-as-a-non-root-user>`_
-* A working Vivado 2019.1 or 2020.1 installation
-* A ``VIVADO_PATH`` environment variable pointing to the Vivado installation directory (e.g. the directory where settings64.sh is located)
-* *(optional)* A PYNQ board with a network connection, see `PYNQ board first-time setup`_ below
-* *(optional)* An Alveo board, and a working Vitis 2020.1 installation if you want to use Vitis and Alveo (see `Alveo first-time setup`_ below)
-
-We also recommend running the FINN compiler on a system with sufficiently
-strong hardware:
-
-* **RAM.** Depending on your target FPGA platform, your system must have sufficient RAM to be
-  able to run Vivado/Vitis synthesis for that part. See `this page <https://www.xilinx.com/products/design-tools/vivado/memory.html>`_
-  for more information. For targeting Zynq and Zynq UltraScale+ parts, at least 8 GB is recommended. Larger parts may require up to 16 GB.
-  For targeting Alveo parts with Vitis, at least 64 GB RAM is recommended.
-
-* **CPU.** FINN can parallelize HLS synthesis and several other operations for different
-  layers, so using a multi-core CPU is recommended. However, this should be balanced
-  against the memory usage as a high degree of parallelization will require more
-  memory. See the ``NUM_DEFAULT_WORKERS`` environment variable below for more on
-  how to control the degree of parallelization.
-
-* **Storage.** While going through the build steps, FINN will generate many files as part of
-  the process. For larger networks, you may need 10s of GB of space for the temporary
-  files generated during the build.
-  By default, these generated files will be placed under ``/tmp/finn_dev_<username>``.
-  You can override this location by using the ``FINN_HOST_BUILD_DIR`` environment
-  variable.
-  Mapping the generated file dir to a fast SSD will result in quicker builds.
-
-
 Running FINN in Docker
 ======================
-We use Docker extensively for developing and deploying FINN. If you are not familiar with Docker, there are many excellent `online resources <https://docker-curriculum.com/>`_ to get started. There is a Dockerfile in the root of the repository, as well as a `run-docker.sh` script that can be launched in the following modes:
+FINN only running inside a Docker container, and comes with a script to easily build and launch the container. If you are not familiar with Docker, there are many excellent `online resources <https://docker-curriculum.com/>`_ to get started.
+You may want to review the :ref:`General FINN Docker tips` and :ref:`Environment variables` as well.
+If you want to use prebuilt images, read :ref:`Using a prebuilt image`.
+The ``run-docker.sh`` script that can be launched in the following modes:
 
-Getting an interactive shell for development or experimentation
-***************************************************************
-.. warning:: Do not use ``sudo`` to launch the FINN Docker. Instead, setup Docker to run `without root <https://docs.docker.com/engine/install/linux-postinstall/#manage-docker-as-a-non-root-user>`_
+Launch interactive shell
+************************
+Simply running sh run-docker.sh without any additional arguments will create a Docker container with all dependencies and give you a terminal with you can use for development for experimentation:
 
 ::
 
   bash ./run_docker.sh
 
-Simply running sh run-docker.sh without any additional arguments will clone the dependency repos, create a Docker container and give you a terminal with you can use for development for experimentation.
-If you want a new terminal on an already-running container, you can do this with `docker exec -it finn_dev_<username> bash`.
-
-.. warning:: The Docker container is spawned with the `--rm` option, so make sure that any important files you created inside the container are either in the /workspace/finn folder (which is mounted from the host computer) or otherwise backed up.
-
-.. note:: **Develop from host, run inside container:** The FINN repository directory will be mounted from the host, so that you can use a text editor on your host computer to develop and the changes will be reflected directly inside the container.
 
-Command Line Entry
-*******************
+Launch a Build with ``build_dataflow``
+**************************************
 FINN is currently more compiler infrastructure than compiler, but we do offer
 a :ref:`command_line` entry for certain use-cases. These run a predefined flow
 or a user-defined flow from the command line as follows:
@@ -98,16 +73,17 @@ or a user-defined flow from the command line as follows:
   bash ./run_docker.sh build_custom <path/to/custom_build_dir/>
 
 
-Running the Jupyter notebooks
-*****************************
+Launch Jupyter notebooks
+************************
+FINN comes with numerous Jupyter notebook tutorials, which you can launch with:
+
 ::
 
   bash ./run-docker.sh notebook
 
 This will launch the `Jupyter notebook <https://jupyter.org/>`_ server inside a Docker container, and print a link on the terminal that you can open in your browser to run the FINN notebooks or create new ones.
 .. note:: The link will look something like this (the token you get will be different):
-http://127.0.0.1:8888/?token=f5c6bd32ae93ec103a88152214baedff4ce1850d81065bfc
-
+http://127.0.0.1:8888/?token=f5c6bd32ae93ec103a88152214baedff4ce1850d81065bfc.
 The ``run-docker.sh`` script forwards ports 8888 for Jupyter and 8081 for Netron, and launches the notebook server with appropriate arguments.
 
 
@@ -117,23 +93,50 @@ Environment variables
 Prior to running the `run-docker.sh` script, there are several environment variables you can set to configure certain aspects of FINN.
 These are summarized below:
 
-* ``VIVADO_PATH`` points to your Vivado installation on the host
-* (optional, for Vitis & Alveo only) ``VITIS_PATH``, and ``PLATFORM_REPO_PATHS`` respectively point to your Vitis installation, and the Vitis platform files.
-* (optional, for Vitis & Alveo only) ``XRT_DEB_VERSION`` specifies the .deb to be installed for XRT inside the container (see default value in ``run-docker.sh``).
+* (required) ``FINN_XILINX_PATH`` points to your Xilinx tools installation on the host (e.g. ``/opt/Xilinx``)
+* (required) ``FINN_XILINX_VERSION`` sets the Xilinx tools version to be used (e.g. ``2020.1``)
+* (required for Alveo) ``PLATFORM_REPO_PATHS`` points to the Vitis platform files (DSA).
+* (required for Alveo) ``XRT_DEB_VERSION`` specifies the .deb to be installed for XRT inside the container (see default value in ``run-docker.sh``).
+* (optional) ``NUM_DEFAULT_WORKERS`` (default 4) specifies the degree of parallelization for the transformations that can be run in parallel, potentially reducing build time
+* (optional) ``FINN_HOST_BUILD_DIR`` specifies which directory on the host will be used as the build directory. Defaults to ``/tmp/finn_dev_<username>``
 * (optional) ``JUPYTER_PORT`` (default 8888) changes the port for Jupyter inside Docker
 * (optional) ``JUPYTER_PASSWD_HASH`` (default "") Set the Jupyter notebook password hash. If set to empty string, token authentication will be used (token printed in terminal on launch).
 * (optional) ``LOCALHOST_URL`` (default localhost) sets the base URL for accessing e.g. Netron from inside the container. Useful when running FINN remotely.
 * (optional) ``NETRON_PORT`` (default 8081) changes the port for Netron inside Docker
-* (optional) ``NUM_DEFAULT_WORKERS`` (default 1) specifies the degree of parallelization for the transformations that can be run in parallel
 * (optional) ``PYNQ_BOARD`` or ``ALVEO_BOARD`` specifies the type of PYNQ/Alveo board used (see "supported hardware" below) for the test suite
 * (optional) ``PYNQ_IP`` and ``PYNQ_PORT`` (or ``ALVEO_IP`` and ``ALVEO_PORT``) specify ip address and port number to access the PYNQ board / Alveo target
 * (optional) ``PYNQ_USERNAME`` and ``PYNQ_PASSWORD`` (or ``ALVEO_USERNAME`` and ``ALVEO_PASSWORD``) specify the PYNQ board / Alveo host access credentials for the test suite. For PYNQ, password is always needed to run as sudo. For Alveo, you can leave the password empty and place your ssh private key in the ``finn/ssh_keys`` folder to use keypair authentication.
 * (optional) ``PYNQ_TARGET_DIR`` (or ``ALVEO_TARGET_DIR``) specifies the target dir on the PYNQ board / Alveo host for the test suite
-* (optional) ``FINN_HOST_BUILD_DIR`` specifies which directory on the host will be used as the build directory. Defaults to ``/tmp/finn_dev_<username>``
 * (optional) ``IMAGENET_VAL_PATH`` specifies the path to the ImageNet validation directory for tests.
+* (optional) ``FINN_DOCKER_PREBUILT`` (default 0) if set to 1 then skip Docker image building and use the image tagged with ``FINN_DOCKER_TAG``.
+* (optional) ``FINN_DOCKER_TAG`` (autogenerated) specifies the Docker image tag to use.
+* (optional) ``FINN_DOCKER_RUN_AS_ROOT`` (default 0) if set to 1 then run Docker container as root, default is the current user.
+* (optional) ``FINN_DOCKER_GPU`` (autodetected) if not 0 then expose all Nvidia GPUs or those selected by ``NVIDIA_VISIBLE_DEVICES`` to Docker container for accelerated DNN training. Requires `Nvidia Container Toolkit <https://github.com/NVIDIA/nvidia-docker>`_
+* (optional) ``FINN_DOCKER_EXTRA`` (default "") pass extra arguments to the ``docker run`` command when executing ``./run-docker.sh``
+* (optional) ``NVIDIA_VISIBLE_DEVICES`` (default "") specifies specific Nvidia GPUs to use in Docker container. Possible values are a comma-separated list of GPU UUID(s) or index(es) e.g. ``0,1,2``, ``all``, ``none``, or void/empty/unset.
+* (optional) ``DOCKER_BUILDKIT`` (default "1") enables `Docker BuildKit <https://docs.docker.com/develop/develop-images/build_enhancements/>`_ for faster Docker image rebuilding (recommended).
+
+General FINN Docker tips
+************************
+* Several folders including the root directory of the FINN compiler and the ``FINN_HOST_BUILD_DIR`` will be mounted into the Docker container and can be used to exchange files.
+* Do not use ``sudo`` to launch the FINN Docker. Instead, setup Docker to run `without root <https://docs.docker.com/engine/install/linux-postinstall/#manage-docker-as-a-non-root-user>`_.
+* If you want a new terminal on an already-running container, you can do this with `docker exec -it <name_of_container> bash`.
+* The container is spawned with the `--rm` option, so make sure that any important files you created inside the container are either in the /workspace/finn folder (which is mounted from the host computer) or otherwise backed up.
+
+Using a prebuilt image
+**********************
+
+By default the ``run-docker.sh`` script tries to re-build the Docker image with each run. After the first run this should go quite fast thanks to Docker caching.
+If you are having trouble building the Docker image or need offline access, you can use prebuilt images by following these steps:
 
-Supported Hardware
-===================
+1. Pull a prebuilt Docker image with ``docker pull maltanar/finn:<tag>`` where ``<tag>`` can be ``dev_latest`` or ``main_latest``
+2. Set the ``FINN_DOCKER_TAG`` to the name of the image you just pulled e.g. ``FINN_DOCKER_TAG=maltanar/finn:dev_latest``
+3. Set ``FINN_DOCKER_PREBUILT=1``
+4. You can now launch the Docker image in all modes without re-building or any internet access.
+
+
+Supported FPGA Hardware
+=======================
 **Shell-integrated accelerator + driver:** For quick deployment, we target boards supported by  `PYNQ <https://pynq.io/>`_ . For these platforms, we can build a full bitfile including DMAs to move data into and out of the FINN-generated accelerator, as well as a Python driver to launch the accelerator. We support the Pynq-Z1, Pynq-Z2, Ultra96, ZCU102 and ZCU104 boards.
 As of FINN v0.4b we also have preliminary support for `Xilinx Alveo boards <https://www.xilinx.com/products/boards-and-kits/alveo.html>`_ using PYNQ and Vitis, see instructions below for Alveo setup.
 
@@ -178,4 +181,49 @@ On the host side:
 3. Install the Vitis platform files for Alveo and set up the ``PLATFORM_REPO_PATHS`` environment variable to point to your installation. *This must be the same path as the target's platform files (target step 2)*
 4. Set up the ``ALVEO_*`` environment variables accordingly for your target, see description of environment variables above.
 5. `Set up public key authentication <https://www.digitalocean.com/community/tutorials/how-to-configure-ssh-key-based-authentication-on-a-linux-server>`_. Copy your private key to the ``finn/ssh_keys`` folder on the host to get password-less deployment and remote execution.
-5. Done! You can try the ``test_end2end_vitis`` tests in the FINN Docker to verify your setup, although this will take some time.
+6. Done! You can try the ``test_end2end_vitis`` tests in the FINN Docker to verify your setup, although this will take some time.
+
+Vivado/Vitis license
+*********************
+If you are targeting Xilinx FPGA parts that needs specific licenses (non-WebPack) you can make these available to the
+FINN Docker container by passing extra arguments. To do this, you can use the ``FINN_DOCKER_EXTRA`` environment variable as follows:
+
+::
+
+  export FINN_DOCKER_EXTRA=" -v /path/to/licenses:/path/to/licenses -e XILINXD_LICENSE_FILE=/path/to/licenses "
+
+The above example mounts ``/path/to/licenses`` from the host into the same path on the Docker container, and sets the
+value of the ``XILINXD_LICENSE_FILE`` environment variable.
+
+System Requirements
+====================
+
+* Ubuntu 18.04 with ``bash`` installed
+* Docker `without root <https://docs.docker.com/engine/install/linux-postinstall/#manage-docker-as-a-non-root-user>`_
+* A working Vivado 2020.1 installation
+* ``FINN_XILINX_PATH`` and ``FINN_XILINX_VERSION`` environment variables correctly set, see `Quickstart`_
+* *(optional)* `Vivado/Vitis license`_ if targeting non-WebPack FPGA parts.
+* *(optional)* A PYNQ board with a network connection, see `PYNQ board first-time setup`_
+* *(optional)* An Alveo board, and a working Vitis 2020.1 installation if you want to use Vitis and Alveo (see `Alveo first-time setup`_ )
+
+We also recommend running the FINN compiler on a system with sufficiently
+strong hardware:
+
+* **RAM.** Depending on your target FPGA platform, your system must have sufficient RAM to be
+  able to run Vivado/Vitis synthesis for that part. See `this page <https://www.xilinx.com/products/design-tools/vivado/memory.html>`_
+  for more information. For targeting Zynq and Zynq UltraScale+ parts, at least 8 GB is recommended. Larger parts may require up to 16 GB.
+  For targeting Alveo parts with Vitis, at least 64 GB RAM is recommended.
+
+* **CPU.** FINN can parallelize HLS synthesis and several other operations for different
+  layers, so using a multi-core CPU is recommended. However, this should be balanced
+  against the memory usage as a high degree of parallelization will require more
+  memory. See the ``NUM_DEFAULT_WORKERS`` environment variable below for more on
+  how to control the degree of parallelization.
+
+* **Storage.** While going through the build steps, FINN will generate many files as part of
+  the process. For larger networks, you may need 10s of GB of space for the temporary
+  files generated during the build.
+  By default, these generated files will be placed under ``/tmp/finn_dev_<username>``.
+  You can override this location by using the ``FINN_HOST_BUILD_DIR`` environment
+  variable.
+  Mapping the generated file dir to a fast SSD will result in quicker builds.
diff --git a/docs/finn/index.rst b/docs/finn/index.rst
index 320cd88fe9..751b105bb4 100644
--- a/docs/finn/index.rst
+++ b/docs/finn/index.rst
@@ -12,20 +12,20 @@ What is FINN?
 
 'FINN' is colloquially used to refer to two separate but highly related things:
 
-* The FINN **project**, which is an experimental framework from Xilinx Research Labs
-to explore deep neural network inference on FPGAs. It specifically targets
-quantized neural networks (QNNs), with emphasis on generating dataflow-style
-architectures customized for each network.
-The key components are illustrated in the figure above;
-including tools for training
-quantized neural networks (Brevitas), the FINN compiler, and the finn-hlslib
-Vivado HLS library of FPGA components for QNNs.
-Read more on the `FINN project homepage <https://xilinx.github.io/finn/>`_.
-
-* The FINN **compiler**, which this Read the Docs website is the documentation for.
-The compiler is a central part of the FINN project (above) that maps QNNs to
-dataflow-style FPGA architectures.
-You can find the FINN compiler in this `GitHub repository <https://github.com/Xilinx/finn>`_.
+*  The FINN **project**, which is an experimental framework from Xilinx Research Labs
+   to explore deep neural network inference on FPGAs. It specifically targets
+   quantized neural networks (QNNs), with emphasis on generating dataflow-style
+   architectures customized for each network.
+   The key components are illustrated in the figure above;
+   including tools for training
+   quantized neural networks (Brevitas), the FINN compiler, and the finn-hlslib
+   Vivado HLS library of FPGA components for QNNs.
+   Read more on the `FINN project homepage <https://xilinx.github.io/finn/>`_.
+
+*  The FINN **compiler**, which this Read the Docs website is the documentation for.
+   The compiler is a central part of the FINN project (above) that maps QNNs to
+   dataflow-style FPGA architectures.
+   You can find the FINN compiler in this `GitHub repository <https://github.com/Xilinx/finn>`_.
 
 
 More FINN Resources
diff --git a/docs/finn/internals.rst b/docs/finn/internals.rst
index 0fbc3cf727..9305f78402 100644
--- a/docs/finn/internals.rst
+++ b/docs/finn/internals.rst
@@ -4,12 +4,12 @@
 Internals
 *********
 
-Intermediate Representation: FINN-ONNX
-======================================
+Intermediate Representation: QONNX and FINN-ONNX
+================================================
 
 FINN uses `ONNX <https://github.com/onnx/onnx>`_ as an intermediate representation (IR) for neural networks. As such, almost every component inside FINN uses ONNX and its `Python API <https://github.com/onnx/onnx/blob/master/docs/PythonAPIOverview.md>`_, so you may want to familiarize yourself with how ONNX represents DNNs. Specifically, the `ONNX protobuf description <https://github.com/onnx/onnx/blob/master/onnx/onnx.proto>`_ (or its `human-readable documentation <https://github.com/onnx/onnx/blob/master/docs/IR.md>`_ and the `operator schemas <https://github.com/onnx/onnx/blob/master/docs/Operators.md>`_ are useful as reference documents. We also provide a Jupyter notebook that can help to get familiar with ONNX by showing how to work with a simple ONNX model in FINN, see chapter :ref:`tutorials` for details.
 
-.. note:: FINN uses ONNX is a specific way that we refer to as FINN-ONNX, and not all ONNX graphs are supported by FINN (and vice versa).
+.. note:: FINN supports two specialized variants of ONNX called QONNX and FINN-ONNX, and not all ONNX graphs are supported by FINN (and vice versa).
 
 Custom Quantization Annotations
 ===============================
diff --git a/docs/finn/source_code/finn.analysis.rst b/docs/finn/source_code/finn.analysis.rst
index 7312150657..1de42ac32b 100644
--- a/docs/finn/source_code/finn.analysis.rst
+++ b/docs/finn/source_code/finn.analysis.rst
@@ -23,6 +23,13 @@ finn.analysis.base
    :undoc-members:
    :show-inheritance:
 
+finn.analysis.inference\_cost
+-----------------------------
+
+.. automodule:: finn.analysis.inference_cost
+   :members:
+   :undoc-members:
+   :show-inheritance:
 
 finn.analysis.topology
 -----------------------------
diff --git a/docs/finn/source_code/finn.custom_op.fpgadataflow.rst b/docs/finn/source_code/finn.custom_op.fpgadataflow.rst
index 7b4e7bfa05..34a6285f22 100644
--- a/docs/finn/source_code/finn.custom_op.fpgadataflow.rst
+++ b/docs/finn/source_code/finn.custom_op.fpgadataflow.rst
@@ -13,6 +13,23 @@ Base Class
    :undoc-members:
    :show-inheritance:
 
+finn.custom\_op.fpgadataflow.addstreams\_batch
+-----------------------------------------------
+
+.. automodule:: finn.custom_op.fpgadataflow.addstreams_batch
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+finn.custom\_op.fpgadataflow.channelwise\_op\_batch
+-----------------------------------------------
+
+.. automodule:: finn.custom_op.fpgadataflow.channelwise_op_batch
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+
 finn.custom\_op.fpgadataflow.convolutioninputgenerator
 -------------------------------------------------------------
 
@@ -21,6 +38,87 @@ finn.custom\_op.fpgadataflow.convolutioninputgenerator
    :undoc-members:
    :show-inheritance:
 
+finn.custom\_op.fpgadataflow.convolutioninputgenerator1d
+-------------------------------------------------------------
+
+.. automodule:: finn.custom_op.fpgadataflow.convolutioninputgenerator1d
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+finn.custom\_op.fpgadataflow.downsampler
+-----------------------------------------
+
+.. automodule:: finn.custom_op.fpgadataflow.downsampler
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+finn.custom\_op.fpgadataflow.duplicatestreams\_batch
+-----------------------------------------------
+
+.. automodule:: finn.custom_op.fpgadataflow.duplicatestreams_batch
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+finn.custom\_op.fpgadataflow.fmpadding\_batch
+-----------------------------------------------
+
+.. automodule:: finn.custom_op.fpgadataflow.fmpadding_batch
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+finn.custom\_op.fpgadataflow.globalaccpool\_batch
+-----------------------------------------------
+
+.. automodule:: finn.custom_op.fpgadataflow.globalaccpool_batch
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+finn.custom\_op.fpgadataflow.iodma
+-----------------------------------------------
+
+.. automodule:: finn.custom_op.fpgadataflow.iodma
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+finn.custom\_op.fpgadataflow.labelselect\_batch
+-----------------------------------------------
+
+.. automodule:: finn.custom_op.fpgadataflow.labelselect_batch
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+finn.custom\_op.fpgadataflow.lookup
+-----------------------------------------------
+
+.. automodule:: finn.custom_op.fpgadataflow.lookup
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+finn.custom\_op.fpgadataflow.pool\_batch
+-----------------------------------------------
+
+.. automodule:: finn.custom_op.fpgadataflow.pool_batch
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+finn.custom\_op.fpgadataflow.streamingdataflowpartition
+--------------------------------------------------------
+
+.. automodule:: finn.custom_op.fpgadataflow.streamingdataflowpartition
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+
 finn.custom\_op.fpgadataflow.streamingdatawidthconverter\_batch
 ----------------------------------------------------------------------
 
@@ -61,6 +159,15 @@ finn.custom\_op.fpgadataflow.templates
    :undoc-members:
    :show-inheritance:
 
+finn.custom\_op.fpgadataflow.thresholding\_batch
+-----------------------------------------------
+
+.. automodule:: finn.custom_op.fpgadataflow.thresholding_batch
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+
 finn.custom\_op.fpgadataflow.tlastmarker
 -----------------------------------------------
 
@@ -68,3 +175,19 @@ finn.custom\_op.fpgadataflow.tlastmarker
    :members:
    :undoc-members:
    :show-inheritance:
+
+finn.custom\_op.fpgadataflow.upsampler
+-----------------------------------------------
+
+.. automodule:: finn.custom_op.fpgadataflow.upsampler
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+finn.custom\_op.fpgadataflow.vector\_vector\_activate\_batch
+-----------------------------------------------
+
+.. automodule:: finn.custom_op.fpgadataflow.vector_vector_activate_batch
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/finn/source_code/finn.custom_op.general.rst b/docs/finn/source_code/finn.custom_op.general.rst
index e86774a48e..87749fd69e 100644
--- a/docs/finn/source_code/finn.custom_op.general.rst
+++ b/docs/finn/source_code/finn.custom_op.general.rst
@@ -5,6 +5,14 @@ Custom Op - General
 General Custom Ops
 ===================
 
+finn.custom\_op.general.bipolar_quant
+--------------------------------------
+
+.. automodule:: finn.custom_op.general.bipolar_quant
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
 finn.custom\_op.general.debugmarker
 -----------------------------------
 
@@ -13,6 +21,14 @@ finn.custom\_op.general.debugmarker
    :undoc-members:
    :show-inheritance:
 
+finn.custom\_op.general.genericpartition
+-----------------------------------------
+
+.. automodule:: finn.custom_op.general.genericpartition
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
 finn.custom\_op.general.im2col
 ------------------------------
 
@@ -37,6 +53,14 @@ finn.custom\_op.general.multithreshold
    :undoc-members:
    :show-inheritance:
 
+finn.custom\_op.general.quant
+------------------------------
+
+.. automodule:: finn.custom_op.general.quant
+  :members:
+  :undoc-members:
+  :show-inheritance:
+
 finn.custom\_op.general.quantavgpool2d
 --------------------------------------
 
@@ -45,13 +69,13 @@ finn.custom\_op.general.quantavgpool2d
   :undoc-members:
   :show-inheritance:
 
-finn.custom\_op.general.streamingdataflowpartition
----------------------------------------------------
+finn.custom\_op.general.trunc
+------------------------------
 
-.. automodule:: finn.custom_op.general.streamingdataflowpartition
-   :members:
-   :undoc-members:
-   :show-inheritance:
+.. automodule:: finn.custom_op.general.trunc
+  :members:
+  :undoc-members:
+  :show-inheritance:
 
 finn.custom\_op.general.xnorpopcount
 -------------------------------------
diff --git a/docs/finn/source_code/finn.transformation.fpgadataflow.rst b/docs/finn/source_code/finn.transformation.fpgadataflow.rst
index 42bc7fb531..b1e7075bdc 100644
--- a/docs/finn/source_code/finn.transformation.fpgadataflow.rst
+++ b/docs/finn/source_code/finn.transformation.fpgadataflow.rst
@@ -62,6 +62,14 @@ finn.transformation.fpgadataflow.create\_stitched\_ip
    :undoc-members:
    :show-inheritance:
 
+finn.transformation.fpgadataflow.externalize\_params
+------------------------------------------------------------
+
+.. automodule:: finn.transformation.fpgadataflow.externalize_params
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
 finn.transformation.fpgadataflow.floorplan
 ----------------------------------------------------
 
diff --git a/docs/finn/source_code/finn.transformation.qonnx.rst b/docs/finn/source_code/finn.transformation.qonnx.rst
new file mode 100644
index 0000000000..8320e19efb
--- /dev/null
+++ b/docs/finn/source_code/finn.transformation.qonnx.rst
@@ -0,0 +1,51 @@
+***********************
+Transformation - QONNX
+************************
+
+Transformation (QONNX)
+===========================
+
+.. automodule:: finn.transformation.qonnx
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+finn.transformation.qonnx.convert\_qonnx\_to\_finn
+---------------------------------------------------
+
+.. automodule:: finn.transformation.qonnx.convert_qonnx_to_finn
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+finn.transformation.qonnx.fold\_quant\_weights
+-----------------------------------------------
+
+.. automodule:: finn.transformation.qonnx.fold_quant_weights
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+finn.transformation.qonnx.infer\_quant\_avg\_pool\_2d
+------------------------------------------------------
+
+.. automodule:: finn.transformation.qonnx.infer_quant_avg_pool_2d
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+finn.transformation.qonnx.qonnx\_activation\_handlers
+-------------------------------------------------------
+
+.. automodule:: finn.transformation.qonnx.qonnx_activation_handlers
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+finn.transformation.qonnx.quant\_act\_to\_multithreshold
+---------------------------------------------------------
+
+.. automodule:: finn.transformation.qonnx.quant_act_to_multithreshold
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/finn/source_code/finn.transformation.rst b/docs/finn/source_code/finn.transformation.rst
index aeb0d76142..cffb0fd0f9 100644
--- a/docs/finn/source_code/finn.transformation.rst
+++ b/docs/finn/source_code/finn.transformation.rst
@@ -11,6 +11,7 @@ Submodules
    :maxdepth: 2
 
    finn.transformation.fpgadataflow
+   finn.transformation.qonnx
    finn.transformation.streamline
 
 Transformation Passes
@@ -40,6 +41,14 @@ finn.transformation.bipolar\_to\_xnor
    :undoc-members:
    :show-inheritance:
 
+finn.transformation.change\_3d\_tensors\_to\_4d
+------------------------------------------------
+
+.. automodule:: finn.transformation.change_3d_tensors_to_4d
+  :members:
+  :undoc-members:
+  :show-inheritance:
+
 finn.transformation.change\_datalayout
 --------------------------------------------
 
@@ -48,6 +57,13 @@ finn.transformation.change\_datalayout
   :undoc-members:
   :show-inheritance:
 
+finn.transformation.create\_generic\_partitions
+------------------------------------------------
+
+.. automodule:: finn.transformation.create_generic_partitions
+  :members:
+  :undoc-members:
+  :show-inheritance:
 
 finn.transformation.double\_to\_single\_float
 ----------------------------------------------------
@@ -57,6 +73,23 @@ finn.transformation.double\_to\_single\_float
    :undoc-members:
    :show-inheritance:
 
+finn.transformation.extend\_partition
+------------------------------------------
+
+.. automodule:: finn.transformation.extend_partition
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+finn.transformation.extract\_conv\_bias
+------------------------------------------
+
+.. automodule:: finn.transformation.extract_conv_bias
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+
 finn.transformation.fold\_constants
 ------------------------------------------
 
@@ -65,6 +98,14 @@ finn.transformation.fold\_constants
    :undoc-members:
    :show-inheritance:
 
+finn.transformation.gemm\_to\_matmul
+------------------------------------------
+
+.. automodule:: finn.transformation.gemm_to_matmul
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
 finn.transformation.general
 ----------------------------------
 
@@ -113,6 +154,13 @@ finn.transformation.lower\_convs\_to\_matmul
    :undoc-members:
    :show-inheritance:
 
+finn.transformation.make\_input\_chanlast
+------------------------------------------
+
+.. automodule:: finn.transformation.make_input_chanlast
+  :members:
+  :undoc-members:
+  :show-inheritance:
 
 finn.transformation.merge\_onnx\_models
 ----------------------------------------
@@ -130,3 +178,11 @@ finn.transformation.move\_reshape
    :members:
    :undoc-members:
    :show-inheritance:
+
+finn.transformation.remove
+-------------------------------------
+
+.. automodule:: finn.transformation.remove
+  :members:
+  :undoc-members:
+  :show-inheritance:
diff --git a/docs/finn/source_code/finn.transformation.streamline.rst b/docs/finn/source_code/finn.transformation.streamline.rst
index f43d6d1231..9ed4bbe1d8 100644
--- a/docs/finn/source_code/finn.transformation.streamline.rst
+++ b/docs/finn/source_code/finn.transformation.streamline.rst
@@ -26,13 +26,6 @@ finn.transformation.streamline.collapse\_repeated
    :undoc-members:
    :show-inheritance:
 
-finn.transformation.streamline.remove
--------------------------------------
-
-.. automodule:: finn.transformation.streamline.remove
-  :members:
-  :undoc-members:
-  :show-inheritance:
 
 finn.transformation.streamline.reorder
 ---------------------------------------------
diff --git a/docs/finn/source_code/finn.util.rst b/docs/finn/source_code/finn.util.rst
index 82e4bf3261..62b72c2ac8 100644
--- a/docs/finn/source_code/finn.util.rst
+++ b/docs/finn/source_code/finn.util.rst
@@ -72,6 +72,15 @@ finn.util.onnx
    :undoc-members:
    :show-inheritance:
 
+finn.util.platforms
+--------------------
+
+.. automodule:: finn.util.platforms
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+
 finn.util.pytorch
 ------------------
 
diff --git a/finn-rtllib/memstream/component.xml b/finn-rtllib/memstream/component.xml
index 3d6767abfc..1e5b710dc8 100644
--- a/finn-rtllib/memstream/component.xml
+++ b/finn-rtllib/memstream/component.xml
@@ -1662,9 +1662,27 @@
   <spirit:vendorExtensions>
     <xilinx:coreExtensions>
       <xilinx:supportedFamilies>
-        <xilinx:family xilinx:lifeCycle="Production">zynq</xilinx:family>
-        <xilinx:family xilinx:lifeCycle="Production">virtexuplusHBM</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Beta">aartix7</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Beta">akintex7</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Beta">artix7</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Beta">artix7l</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Beta">azynq</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">kintex7</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">kintex7l</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">kintexu</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">kintexuplus</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">qkintex7</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">qkintex7l</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">qvirtex7</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">qzynq</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">qzynqplus</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">versal</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">virtex7</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">virtexu</xilinx:family>
         <xilinx:family xilinx:lifeCycle="Production">virtexuplus</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">virtexuplusHBM</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">virtexupluse58g</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">zynq</xilinx:family>
         <xilinx:family xilinx:lifeCycle="Production">zynquplus</xilinx:family>
       </xilinx:supportedFamilies>
       <xilinx:taxonomies>
diff --git a/finn-rtllib/memstream/hdl/mux.v b/finn-rtllib/memstream/hdl/mux.v
index c5b89aeb4e..f7087f9735 100644
--- a/finn-rtllib/memstream/hdl/mux.v
+++ b/finn-rtllib/memstream/hdl/mux.v
@@ -1,44 +1,44 @@
-/*
- Copyright (c) 2020, Xilinx
- All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are met:
-
- * Redistributions of source code must retain the above copyright notice, this
-   list of conditions and the following disclaimer.
-
- * Redistributions in binary form must reproduce the above copyright notice,
-   this list of conditions and the following disclaimer in the documentation
-   and/or other materials provided with the distribution.
-
- * Neither the name of FINN nor the names of its
-   contributors may be used to endorse or promote products derived from
-   this software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
- FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
- SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-module mux
-#(
-    parameter NINPUTS = 1,
-	parameter WIDTH = 16
-)
-(
-	input [NINPUTS*WIDTH-1:0] in,
-	output [WIDTH-1:0] out,
-	input [$clog2(NINPUTS)-1:0] sel
-);
-
-assign out = in >> (sel*WIDTH);
-
-endmodule
\ No newline at end of file
+/*
+ Copyright (c) 2020, Xilinx
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+ * Neither the name of FINN nor the names of its
+   contributors may be used to endorse or promote products derived from
+   this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+module mux
+#(
+    parameter NINPUTS = 1,
+	parameter WIDTH = 16
+)
+(
+	input [NINPUTS*WIDTH-1:0] in,
+	output [WIDTH-1:0] out,
+	input [$clog2(NINPUTS)-1:0] sel
+);
+
+assign out = in >> (sel*WIDTH);
+
+endmodule
diff --git a/finn-rtllib/memstream/sim/gen_memblocks.sh b/finn-rtllib/memstream/sim/gen_memblocks.sh
index 05962f7be8..b6e6b656ad 100644
--- a/finn-rtllib/memstream/sim/gen_memblocks.sh
+++ b/finn-rtllib/memstream/sim/gen_memblocks.sh
@@ -36,4 +36,4 @@ for (( i=0; i<$NBLOCKS; i++ ))
 do
     START=$(( 1 + $i * 1024 ))
     tail -n +$START $1 | head -n 1024 >> memblock_$i.dat
-done
\ No newline at end of file
+done
diff --git a/finn-rtllib/memstream/sim/tb_memstream.v b/finn-rtllib/memstream/sim/tb_memstream.v
index d63fa30046..ad3efad5bd 100644
--- a/finn-rtllib/memstream/sim/tb_memstream.v
+++ b/finn-rtllib/memstream/sim/tb_memstream.v
@@ -1,369 +1,369 @@
-/*
- Copyright (c) 2020, Xilinx
- All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are met:
-
- * Redistributions of source code must retain the above copyright notice, this
-   list of conditions and the following disclaimer.
-
- * Redistributions in binary form must reproduce the above copyright notice,
-   this list of conditions and the following disclaimer in the documentation
-   and/or other materials provided with the distribution.
-
- * Neither the name of FINN nor the names of its
-   contributors may be used to endorse or promote products derived from
-   this software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
- FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
- SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-`timescale 1ns/10ps
-
-module tb_memstream;
-
-//parameters to enable/disable axi-mm, set number of streams, set readmemh for memory, set per-stream offsets in memory, set per-stream widths
-parameter CONFIG_EN = 1;
-parameter NSTREAMS = 4;//1 up to 6
-
-parameter MEM_DEPTH = 9216;
-parameter MEM_WIDTH = 32;
-parameter MEM_INIT = "./";
-parameter MEM_CHECK = "golden.dat";
-
-//widths per stream
-parameter STRM0_WIDTH = 32;
-parameter STRM1_WIDTH = 32;
-parameter STRM2_WIDTH = 32;
-parameter STRM3_WIDTH = 32;
-parameter STRM4_WIDTH = 1;
-parameter STRM5_WIDTH = 1;
-
-//depths per stream
-parameter STRM0_DEPTH = 2304;
-parameter STRM1_DEPTH = 2304;
-parameter STRM2_DEPTH = 2304;
-parameter STRM3_DEPTH = 2304;
-parameter STRM4_DEPTH = 1;
-parameter STRM5_DEPTH = 1;
-
-//offsets for each stream
-parameter STRM0_OFFSET = 0;
-parameter STRM1_OFFSET = 2304;
-parameter STRM2_OFFSET = 4608;
-parameter STRM3_OFFSET = 6912;
-parameter STRM4_OFFSET = 0;
-parameter STRM5_OFFSET = 0;
-
-
-reg clk;
-reg rst;
-
-reg [31:0] config_address = 0;
-reg config_ce = 0;
-reg config_we = 0;
-reg [31:0] config_d0 = 0;
-wire [31:0] config_q0;
-
-//multiple wire AXI Streams
-reg m_axis_0_afull;
-reg m_axis_0_tready;
-wire m_axis_0_tvalid;
-wire [STRM0_WIDTH-1:0] m_axis_0_tdata;
-
-reg m_axis_1_afull;
-reg m_axis_1_tready;
-wire m_axis_1_tvalid;
-wire [STRM1_WIDTH-1:0] m_axis_1_tdata;
-
-reg m_axis_2_afull;
-reg m_axis_2_tready;
-wire m_axis_2_tvalid;
-wire [STRM2_WIDTH-1:0] m_axis_2_tdata;
-
-reg m_axis_3_afull;
-reg m_axis_3_tready;
-wire m_axis_3_tvalid;
-wire [STRM3_WIDTH-1:0] m_axis_3_tdata;
-
-reg m_axis_4_afull;
-reg m_axis_4_tready;
-wire m_axis_4_tvalid;
-wire [STRM4_WIDTH-1:0] m_axis_4_tdata;
-
-reg m_axis_5_afull;
-reg m_axis_5_tready;
-wire m_axis_5_tvalid;
-wire [STRM5_WIDTH-1:0] m_axis_5_tdata;
-
-reg [MEM_WIDTH-1:0] golden[MEM_DEPTH-1:0];
-integer ptr0, ptr1, ptr2, ptr3, ptr4, ptr5;
-integer done = 0;
-reg [5:0] rng;
-
-//clock
-initial begin
-    clk = 0;
-    forever #5 clk = ~clk;
-end
-
-initial begin
-    rst = 1;
-	config_ce = 0;
-    m_axis_0_afull = 0;
-    m_axis_1_afull = 0;
-    m_axis_2_afull = 0;
-    m_axis_3_afull = 0;
-    m_axis_4_afull = 0;
-    m_axis_5_afull = 0;
-    m_axis_0_tready = 1;
-    m_axis_1_tready = 1;
-    m_axis_2_tready = 1;
-    m_axis_3_tready = 1;
-    m_axis_4_tready = 1;
-    m_axis_5_tready = 1;
-    repeat(100) @(negedge clk);
-    rst = 0;
-    #100
-    fork
-	    begin
-		    $display("Starting to generate random AFULL");
-			while(~done) begin
-			    rng = $random;
-				m_axis_0_afull = rng[0];
-				m_axis_1_afull = rng[1];
-				m_axis_2_afull = rng[2];
-				m_axis_3_afull = rng[3];
-				m_axis_4_afull = rng[4];
-				m_axis_5_afull = rng[5];
-				@(negedge clk);
-			end
-		end
-	join
-end
-
-
-//DUT
-memstream
-#(
-    CONFIG_EN,
-    NSTREAMS,
-    MEM_DEPTH,
-    MEM_WIDTH,
-    MEM_INIT,
-    
-    //widths per stream
-    STRM0_WIDTH,
-    STRM1_WIDTH,
-    STRM2_WIDTH,
-    STRM3_WIDTH,
-    STRM4_WIDTH,
-    STRM5_WIDTH,
-    
-    //depths per stream
-    STRM0_DEPTH,
-    STRM1_DEPTH,
-    STRM2_DEPTH,
-    STRM3_DEPTH,
-    STRM4_DEPTH,
-    STRM5_DEPTH,
-    
-    //offsets for each stream
-    STRM0_OFFSET,
-    STRM1_OFFSET,
-    STRM2_OFFSET,
-    STRM3_OFFSET,
-    STRM4_OFFSET,
-    STRM5_OFFSET
-)
-dut
-(
-    clk,
-    ~rst,
-
-    //optional AXI-Lite interface
-    config_address,
-    config_ce,
-    config_we,
-    config_d0,
-    config_q0,
-
-    //multiple output AXI Streams
-    m_axis_0_afull,
-    m_axis_0_tready,
-    m_axis_0_tvalid,
-    m_axis_0_tdata,
-    
-    m_axis_1_afull,
-    m_axis_1_tready,
-    m_axis_1_tvalid,
-    m_axis_1_tdata,
-    
-    m_axis_2_afull,
-    m_axis_2_tready,
-    m_axis_2_tvalid,
-    m_axis_2_tdata,
-    
-    m_axis_3_afull,
-    m_axis_3_tready,
-    m_axis_3_tvalid,
-    m_axis_3_tdata,
-    
-    m_axis_4_afull,
-    m_axis_4_tready,
-    m_axis_4_tvalid,
-    m_axis_4_tdata,
-    
-    m_axis_5_afull,
-    m_axis_5_tready,
-    m_axis_5_tvalid,
-    m_axis_5_tdata
-    
-
-);
-
-//stream checkers
-initial begin
-    ptr0 = STRM0_OFFSET;
-	ptr1 = STRM1_OFFSET;
-	ptr2 = STRM2_OFFSET;
-	ptr3 = STRM3_OFFSET;
-	ptr4 = STRM4_OFFSET;
-	ptr5 = STRM5_OFFSET;
-    fork
-		//check stream 0
-	    begin
-		    $display("Starting stream 0 checker");
-		    while(~done & (NSTREAMS > 0)) begin
-				@(negedge clk);
-				if(m_axis_0_tvalid) begin
-					if(m_axis_0_tdata != golden[ptr0]) begin
-						$display("Mismatch on stream 0");
-						$stop();
-					end
-					//increment pointer
-					ptr0 = ptr0 + 1;
-					//rewind pointer if it's reached end
-					if(ptr0 == (STRM0_OFFSET + STRM0_DEPTH))
-				    ptr0 = STRM0_OFFSET;
-				end
-			end
-		end
-		//check stream 1
-	    begin
-		    $display("Starting stream 1 checker");
-		    while(~done & (NSTREAMS > 1)) begin
-				@(negedge clk);
-				if(m_axis_1_tvalid) begin
-					if(m_axis_1_tdata != golden[ptr1]) begin
-						$display("Mismatch on stream 1");
-						$stop();
-					end
-					//increment pointer
-					ptr1 = ptr1 + 1;
-					//rewind pointer if it's reached end
-					if(ptr1 == (STRM1_OFFSET + STRM1_DEPTH))
-						ptr1 = STRM1_OFFSET;
-				end
-			end
-		end
-		
-		//check stream 2
-	    begin
-		    $display("Starting stream 2 checker");
-		    while(~done & (NSTREAMS > 2)) begin
-				@(negedge clk);
-				if(m_axis_2_tvalid) begin
-					if(m_axis_2_tdata != golden[ptr2]) begin
-						$display("Mismatch on stream 2");
-						$stop();
-					end
-					//increment pointer
-					ptr2 = ptr2 + 1;
-					//rewind pointer if it's reached end
-					if(ptr2 == (STRM2_OFFSET + STRM2_DEPTH))
-						ptr2 = STRM2_OFFSET;
-				end
-			end
-		end
-		//check stream 3
-	    begin
-		    $display("Starting stream 3 checker");
-		    while(~done & (NSTREAMS > 3)) begin
-				@(negedge clk);
-				if(m_axis_3_tvalid) begin
-					if(m_axis_3_tdata != golden[ptr3]) begin
-						$display("Mismatch on stream 3");
-						$stop();
-					end
-					//increment pointer
-					ptr3 = ptr3 + 1;
-					//rewind pointer if it's reached end
-					if(ptr3 == (STRM3_OFFSET + STRM3_DEPTH))
-						ptr3 = STRM3_OFFSET;
-				end
-			end
-		end
-		//check stream 4
-	    begin
-		    $display("Starting stream 4 checker");
-		    while(~done & (NSTREAMS > 4)) begin
-				@(negedge clk);
-				if(m_axis_4_tvalid) begin
-					if(m_axis_4_tdata != golden[ptr4]) begin
-						$display("Mismatch on stream 4");
-						$stop();
-					end
-					//increment pointer
-					ptr4 = ptr4 + 1;
-					//rewind pointer if it's reached end
-					if(ptr4 == (STRM4_OFFSET + STRM4_DEPTH))
-						ptr4 = STRM4_OFFSET;
-				end
-			end
-		end
-		//check stream 5
-	    begin
-		    $display("Starting stream 5 checker");
-		    while(~done & (NSTREAMS > 5)) begin
-				@(negedge clk);
-				if(m_axis_5_tvalid) begin
-					if(m_axis_5_tdata != golden[ptr5]) begin
-						$display("Mismatch on stream 5");
-						$stop();
-					end
-					//increment pointer
-					ptr5 = ptr5 + 1;
-					//rewind pointer if it's reached end
-					if(ptr5 == (STRM5_OFFSET + STRM5_DEPTH))
-						ptr5 = STRM5_OFFSET;
-				end
-			end
-		end
-	join
-end
-
-initial begin
-    done = 0;
-	$readmemh(MEM_CHECK,golden);
-//    $dumpfile("wave.vcd");
-//    $dumpvars(0,tb_memstream);
-    @(negedge rst);
-    #10000000
-	$display("Test done!");
-	done = 1;
-	#1000
-    $finish();
-end
-
-endmodule
+/*
+ Copyright (c) 2020, Xilinx
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+ * Neither the name of FINN nor the names of its
+   contributors may be used to endorse or promote products derived from
+   this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+`timescale 1ns/10ps
+
+module tb_memstream;
+
+//parameters to enable/disable axi-mm, set number of streams, set readmemh for memory, set per-stream offsets in memory, set per-stream widths
+parameter CONFIG_EN = 1;
+parameter NSTREAMS = 4;//1 up to 6
+
+parameter MEM_DEPTH = 9216;
+parameter MEM_WIDTH = 32;
+parameter MEM_INIT = "./";
+parameter MEM_CHECK = "golden.dat";
+
+//widths per stream
+parameter STRM0_WIDTH = 32;
+parameter STRM1_WIDTH = 32;
+parameter STRM2_WIDTH = 32;
+parameter STRM3_WIDTH = 32;
+parameter STRM4_WIDTH = 1;
+parameter STRM5_WIDTH = 1;
+
+//depths per stream
+parameter STRM0_DEPTH = 2304;
+parameter STRM1_DEPTH = 2304;
+parameter STRM2_DEPTH = 2304;
+parameter STRM3_DEPTH = 2304;
+parameter STRM4_DEPTH = 1;
+parameter STRM5_DEPTH = 1;
+
+//offsets for each stream
+parameter STRM0_OFFSET = 0;
+parameter STRM1_OFFSET = 2304;
+parameter STRM2_OFFSET = 4608;
+parameter STRM3_OFFSET = 6912;
+parameter STRM4_OFFSET = 0;
+parameter STRM5_OFFSET = 0;
+
+
+reg clk;
+reg rst;
+
+reg [31:0] config_address = 0;
+reg config_ce = 0;
+reg config_we = 0;
+reg [31:0] config_d0 = 0;
+wire [31:0] config_q0;
+
+//multiple wire AXI Streams
+reg m_axis_0_afull;
+reg m_axis_0_tready;
+wire m_axis_0_tvalid;
+wire [STRM0_WIDTH-1:0] m_axis_0_tdata;
+
+reg m_axis_1_afull;
+reg m_axis_1_tready;
+wire m_axis_1_tvalid;
+wire [STRM1_WIDTH-1:0] m_axis_1_tdata;
+
+reg m_axis_2_afull;
+reg m_axis_2_tready;
+wire m_axis_2_tvalid;
+wire [STRM2_WIDTH-1:0] m_axis_2_tdata;
+
+reg m_axis_3_afull;
+reg m_axis_3_tready;
+wire m_axis_3_tvalid;
+wire [STRM3_WIDTH-1:0] m_axis_3_tdata;
+
+reg m_axis_4_afull;
+reg m_axis_4_tready;
+wire m_axis_4_tvalid;
+wire [STRM4_WIDTH-1:0] m_axis_4_tdata;
+
+reg m_axis_5_afull;
+reg m_axis_5_tready;
+wire m_axis_5_tvalid;
+wire [STRM5_WIDTH-1:0] m_axis_5_tdata;
+
+reg [MEM_WIDTH-1:0] golden[MEM_DEPTH-1:0];
+integer ptr0, ptr1, ptr2, ptr3, ptr4, ptr5;
+integer done = 0;
+reg [5:0] rng;
+
+//clock
+initial begin
+    clk = 0;
+    forever #5 clk = ~clk;
+end
+
+initial begin
+    rst = 1;
+	config_ce = 0;
+    m_axis_0_afull = 0;
+    m_axis_1_afull = 0;
+    m_axis_2_afull = 0;
+    m_axis_3_afull = 0;
+    m_axis_4_afull = 0;
+    m_axis_5_afull = 0;
+    m_axis_0_tready = 1;
+    m_axis_1_tready = 1;
+    m_axis_2_tready = 1;
+    m_axis_3_tready = 1;
+    m_axis_4_tready = 1;
+    m_axis_5_tready = 1;
+    repeat(100) @(negedge clk);
+    rst = 0;
+    #100
+    fork
+	    begin
+		    $display("Starting to generate random AFULL");
+			while(~done) begin
+			    rng = $random;
+				m_axis_0_afull = rng[0];
+				m_axis_1_afull = rng[1];
+				m_axis_2_afull = rng[2];
+				m_axis_3_afull = rng[3];
+				m_axis_4_afull = rng[4];
+				m_axis_5_afull = rng[5];
+				@(negedge clk);
+			end
+		end
+	join
+end
+
+
+//DUT
+memstream
+#(
+    CONFIG_EN,
+    NSTREAMS,
+    MEM_DEPTH,
+    MEM_WIDTH,
+    MEM_INIT,
+
+    //widths per stream
+    STRM0_WIDTH,
+    STRM1_WIDTH,
+    STRM2_WIDTH,
+    STRM3_WIDTH,
+    STRM4_WIDTH,
+    STRM5_WIDTH,
+
+    //depths per stream
+    STRM0_DEPTH,
+    STRM1_DEPTH,
+    STRM2_DEPTH,
+    STRM3_DEPTH,
+    STRM4_DEPTH,
+    STRM5_DEPTH,
+
+    //offsets for each stream
+    STRM0_OFFSET,
+    STRM1_OFFSET,
+    STRM2_OFFSET,
+    STRM3_OFFSET,
+    STRM4_OFFSET,
+    STRM5_OFFSET
+)
+dut
+(
+    clk,
+    ~rst,
+
+    //optional AXI-Lite interface
+    config_address,
+    config_ce,
+    config_we,
+    config_d0,
+    config_q0,
+
+    //multiple output AXI Streams
+    m_axis_0_afull,
+    m_axis_0_tready,
+    m_axis_0_tvalid,
+    m_axis_0_tdata,
+
+    m_axis_1_afull,
+    m_axis_1_tready,
+    m_axis_1_tvalid,
+    m_axis_1_tdata,
+
+    m_axis_2_afull,
+    m_axis_2_tready,
+    m_axis_2_tvalid,
+    m_axis_2_tdata,
+
+    m_axis_3_afull,
+    m_axis_3_tready,
+    m_axis_3_tvalid,
+    m_axis_3_tdata,
+
+    m_axis_4_afull,
+    m_axis_4_tready,
+    m_axis_4_tvalid,
+    m_axis_4_tdata,
+
+    m_axis_5_afull,
+    m_axis_5_tready,
+    m_axis_5_tvalid,
+    m_axis_5_tdata
+
+
+);
+
+//stream checkers
+initial begin
+    ptr0 = STRM0_OFFSET;
+	ptr1 = STRM1_OFFSET;
+	ptr2 = STRM2_OFFSET;
+	ptr3 = STRM3_OFFSET;
+	ptr4 = STRM4_OFFSET;
+	ptr5 = STRM5_OFFSET;
+    fork
+		//check stream 0
+	    begin
+		    $display("Starting stream 0 checker");
+		    while(~done & (NSTREAMS > 0)) begin
+				@(negedge clk);
+				if(m_axis_0_tvalid) begin
+					if(m_axis_0_tdata != golden[ptr0]) begin
+						$display("Mismatch on stream 0");
+						$stop();
+					end
+					//increment pointer
+					ptr0 = ptr0 + 1;
+					//rewind pointer if it's reached end
+					if(ptr0 == (STRM0_OFFSET + STRM0_DEPTH))
+				    ptr0 = STRM0_OFFSET;
+				end
+			end
+		end
+		//check stream 1
+	    begin
+		    $display("Starting stream 1 checker");
+		    while(~done & (NSTREAMS > 1)) begin
+				@(negedge clk);
+				if(m_axis_1_tvalid) begin
+					if(m_axis_1_tdata != golden[ptr1]) begin
+						$display("Mismatch on stream 1");
+						$stop();
+					end
+					//increment pointer
+					ptr1 = ptr1 + 1;
+					//rewind pointer if it's reached end
+					if(ptr1 == (STRM1_OFFSET + STRM1_DEPTH))
+						ptr1 = STRM1_OFFSET;
+				end
+			end
+		end
+
+		//check stream 2
+	    begin
+		    $display("Starting stream 2 checker");
+		    while(~done & (NSTREAMS > 2)) begin
+				@(negedge clk);
+				if(m_axis_2_tvalid) begin
+					if(m_axis_2_tdata != golden[ptr2]) begin
+						$display("Mismatch on stream 2");
+						$stop();
+					end
+					//increment pointer
+					ptr2 = ptr2 + 1;
+					//rewind pointer if it's reached end
+					if(ptr2 == (STRM2_OFFSET + STRM2_DEPTH))
+						ptr2 = STRM2_OFFSET;
+				end
+			end
+		end
+		//check stream 3
+	    begin
+		    $display("Starting stream 3 checker");
+		    while(~done & (NSTREAMS > 3)) begin
+				@(negedge clk);
+				if(m_axis_3_tvalid) begin
+					if(m_axis_3_tdata != golden[ptr3]) begin
+						$display("Mismatch on stream 3");
+						$stop();
+					end
+					//increment pointer
+					ptr3 = ptr3 + 1;
+					//rewind pointer if it's reached end
+					if(ptr3 == (STRM3_OFFSET + STRM3_DEPTH))
+						ptr3 = STRM3_OFFSET;
+				end
+			end
+		end
+		//check stream 4
+	    begin
+		    $display("Starting stream 4 checker");
+		    while(~done & (NSTREAMS > 4)) begin
+				@(negedge clk);
+				if(m_axis_4_tvalid) begin
+					if(m_axis_4_tdata != golden[ptr4]) begin
+						$display("Mismatch on stream 4");
+						$stop();
+					end
+					//increment pointer
+					ptr4 = ptr4 + 1;
+					//rewind pointer if it's reached end
+					if(ptr4 == (STRM4_OFFSET + STRM4_DEPTH))
+						ptr4 = STRM4_OFFSET;
+				end
+			end
+		end
+		//check stream 5
+	    begin
+		    $display("Starting stream 5 checker");
+		    while(~done & (NSTREAMS > 5)) begin
+				@(negedge clk);
+				if(m_axis_5_tvalid) begin
+					if(m_axis_5_tdata != golden[ptr5]) begin
+						$display("Mismatch on stream 5");
+						$stop();
+					end
+					//increment pointer
+					ptr5 = ptr5 + 1;
+					//rewind pointer if it's reached end
+					if(ptr5 == (STRM5_OFFSET + STRM5_DEPTH))
+						ptr5 = STRM5_OFFSET;
+				end
+			end
+		end
+	join
+end
+
+initial begin
+    done = 0;
+	$readmemh(MEM_CHECK,golden);
+//    $dumpfile("wave.vcd");
+//    $dumpvars(0,tb_memstream);
+    @(negedge rst);
+    #10000000
+	$display("Test done!");
+	done = 1;
+	#1000
+    $finish();
+end
+
+endmodule
diff --git a/finn-rtllib/memstream/sim/tb_memstream_writes.v b/finn-rtllib/memstream/sim/tb_memstream_writes.v
index a6ac747e96..c66807454b 100644
--- a/finn-rtllib/memstream/sim/tb_memstream_writes.v
+++ b/finn-rtllib/memstream/sim/tb_memstream_writes.v
@@ -1,486 +1,486 @@
-/*
- Copyright (c) 2020, Xilinx
- All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are met:
-
- * Redistributions of source code must retain the above copyright notice, this
-   list of conditions and the following disclaimer.
-
- * Redistributions in binary form must reproduce the above copyright notice,
-   this list of conditions and the following disclaimer in the documentation
-   and/or other materials provided with the distribution.
-
- * Neither the name of FINN nor the names of its
-   contributors may be used to endorse or promote products derived from
-   this software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
- FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
- SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-`timescale 1ns/10ps
-
-module tb_memstream_writes;
-
-//parameters to enable/disable axi-mm, set number of streams, set readmemh for memory, set per-stream offsets in memory, set per-stream widths
-parameter CONFIG_EN = 1;
-parameter NSTREAMS = 2;//1 up to 6
-
-parameter MEM_DEPTH = 40;
-parameter MEM_WIDTH = 70;
-
-//widths per stream
-parameter STRM0_WIDTH = 70;
-parameter STRM1_WIDTH = 32;
-parameter STRM2_WIDTH = 32;
-parameter STRM3_WIDTH = 32;
-parameter STRM4_WIDTH = 1;
-parameter STRM5_WIDTH = 1;
-
-//depths per stream
-parameter STRM0_DEPTH = 20;
-parameter STRM1_DEPTH = 20;
-parameter STRM2_DEPTH = 2304;
-parameter STRM3_DEPTH = 2304;
-parameter STRM4_DEPTH = 1;
-parameter STRM5_DEPTH = 1;
-
-//offsets for each stream
-parameter STRM0_OFFSET = 0;
-parameter STRM1_OFFSET = 20;
-parameter STRM2_OFFSET = 4608;
-parameter STRM3_OFFSET = 6912;
-parameter STRM4_OFFSET = 0;
-parameter STRM5_OFFSET = 0;
-
-
-reg clk;
-reg rst;
-
-wire        awready;
-reg         awvalid;
-reg [31:0]  awaddr;
-reg [2:0]   awprot;
-//write data
-wire        wready;
-reg         wvalid;
-reg [31:0]  wdata;
-reg [3:0]   wstrb;
-//burst response
-reg         bready;
-wire        bvalid;
-wire [1:0]  bresp;
-
-//Read channels
-//read address
-wire        arready;
-reg         arvalid;
-reg [31:0]  araddr;
-reg [2:0]   arprot;
-//read data
-reg         rready;
-wire        rvalid;
-wire [1:0]  rresp;
-wire [31:0] rdata;
-
-//multiple wire AXI Streams
-reg m_axis_0_afull;
-reg m_axis_0_tready;
-wire m_axis_0_tvalid;
-wire [STRM0_WIDTH-1:0] m_axis_0_tdata;
-
-reg m_axis_1_afull;
-reg m_axis_1_tready;
-wire m_axis_1_tvalid;
-wire [STRM1_WIDTH-1:0] m_axis_1_tdata;
-
-reg m_axis_2_afull;
-reg m_axis_2_tready;
-wire m_axis_2_tvalid;
-wire [STRM2_WIDTH-1:0] m_axis_2_tdata;
-
-reg m_axis_3_afull;
-reg m_axis_3_tready;
-wire m_axis_3_tvalid;
-wire [STRM3_WIDTH-1:0] m_axis_3_tdata;
-
-reg m_axis_4_afull;
-reg m_axis_4_tready;
-wire m_axis_4_tvalid;
-wire [STRM4_WIDTH-1:0] m_axis_4_tdata;
-
-reg m_axis_5_afull;
-reg m_axis_5_tready;
-wire m_axis_5_tvalid;
-wire [STRM5_WIDTH-1:0] m_axis_5_tdata;
-
-reg [MEM_WIDTH-1:0] golden[MEM_DEPTH-1:0];
-reg [MEM_WIDTH-1:0] gword;
-integer ptr0, ptr1, ptr2, ptr3, ptr4, ptr5;
-integer done = 0;
-integer i, j;
-reg [5:0] rng;
-
-parameter NFOLDS_PER_WORD = (MEM_WIDTH+31)/32;
-
-task axi_write;
-    input [MEM_WIDTH-1:0] data;
-    input [31:0] adr;
-    begin
-        for(j=0; j<(1<<$clog2(NFOLDS_PER_WORD)); j=j+1) begin
-            @(negedge clk);
-            awvalid = 1;
-            wvalid = 1;
-            wdata = data>>(j*32);
-            awaddr = (adr*(1<<$clog2(NFOLDS_PER_WORD))+j)*4;
-            fork
-                begin
-                    @(posedge awready);
-                    @(posedge clk) awvalid = 0;
-                end
-                begin
-                    @(posedge wready);
-                    @(posedge clk) wvalid = 0;
-                end
-            join
-            @(posedge clk);
-        end
-    end
-endtask
-
-task axi_read;
-    input [31:0] adr;
-    output [MEM_WIDTH-1:0] data;
-    begin
-        data = 0;
-        for(j=0; j<NFOLDS_PER_WORD; j=j+1) begin
-            @(negedge clk);
-            arvalid = 1;
-            araddr = (adr*(1<<$clog2(NFOLDS_PER_WORD))+j)*4;
-            rready = 1;
-            fork
-                begin
-                    @(posedge arready);
-                    @(posedge clk) arvalid = 0;
-                end
-                begin
-                    @(posedge rvalid);
-                    @(posedge clk) rready = 0;
-                    data = data | (rdata<<(32*j));
-                end
-            join
-            @(posedge clk);
-        end
-    end
-endtask
-
-//clock
-initial begin
-    clk = 0;
-    forever #5 clk = ~clk;
-end
-
-initial begin
-    rst = 1;
-    awvalid = 0;
-    arvalid = 0;
-    wvalid = 0;
-    rready = 1;
-    bready = 1;
-    m_axis_0_afull = 1;
-    m_axis_1_afull = 1;
-    m_axis_2_afull = 1;
-    m_axis_3_afull = 1;
-    m_axis_4_afull = 1;
-    m_axis_5_afull = 1;
-    m_axis_0_tready = 0;
-    m_axis_1_tready = 0;
-    m_axis_2_tready = 0;
-    m_axis_3_tready = 0;
-    m_axis_4_tready = 0;
-    m_axis_5_tready = 0;
-    repeat(100) @(negedge clk);
-    rst = 0;
-    #100
-    //random initialization of golden data
-    for(i=0; i<MEM_DEPTH; i=i+1) begin
-        gword = 0;
-        repeat(NFOLDS_PER_WORD)
-            gword = (gword << 32) | $random;
-        golden[i] = gword;
-        axi_write(golden[i],i);
-        axi_read(i,gword);
-    end
-    //re-reset
-    repeat(100) @(negedge clk);
-    rst = 1;
-    #100
-    repeat(100) @(negedge clk);
-    rst = 0;
-    #100
-    @(negedge clk);
-    //start reads
-    m_axis_0_afull = 0;
-    m_axis_1_afull = 0;
-    m_axis_2_afull = 0;
-    m_axis_3_afull = 0;
-    m_axis_4_afull = 0;
-    m_axis_5_afull = 0;
-    m_axis_0_tready = 1;
-    m_axis_1_tready = 1;
-    m_axis_2_tready = 1;
-    m_axis_3_tready = 1;
-    m_axis_4_tready = 1;
-    m_axis_5_tready = 1;
-    fork
-	    begin
-		    $display("Starting to generate random AFULL");
-			while(~done) begin
-			    rng = $random;
-				m_axis_0_afull = rng[0];
-				m_axis_1_afull = rng[1];
-				m_axis_2_afull = rng[2];
-				m_axis_3_afull = rng[3];
-				m_axis_4_afull = rng[4];
-				m_axis_5_afull = rng[5];
-				@(negedge clk);
-			end
-		end
-	join
-end
-
-
-//DUT
-memstream
-#(
-    CONFIG_EN,
-    NSTREAMS,
-    MEM_DEPTH,
-    MEM_WIDTH,
-    ".",
-    "auto",
-    //widths per stream
-    STRM0_WIDTH,
-    STRM1_WIDTH,
-    STRM2_WIDTH,
-    STRM3_WIDTH,
-    STRM4_WIDTH,
-    STRM5_WIDTH,
-    //depths per stream
-    STRM0_DEPTH,
-    STRM1_DEPTH,
-    STRM2_DEPTH,
-    STRM3_DEPTH,
-    STRM4_DEPTH,
-    STRM5_DEPTH,
-    //offsets for each stream
-    STRM0_OFFSET,
-    STRM1_OFFSET,
-    STRM2_OFFSET,
-    STRM3_OFFSET,
-    STRM4_OFFSET,
-    STRM5_OFFSET
-)
-dut
-(
-    clk,
-    ~rst,
-
-    //optional AXI-Lite interface
-    awready,
-    awvalid,
-    awaddr,
-    awprot,
-    //write data
-    wready,
-    wvalid,
-    wdata,
-    wstrb,
-    //burst response
-    bready,
-    bvalid,
-    bresp,
-
-    //Read channels
-    //read address
-    arready,
-    arvalid,
-    araddr,
-    arprot,
-    //read data
-    rready,
-    rvalid,
-    rresp,
-    rdata,
-
-    //multiple output AXI Streams
-    m_axis_0_afull,
-    m_axis_0_tready,
-    m_axis_0_tvalid,
-    m_axis_0_tdata,
-    m_axis_1_afull,
-    m_axis_1_tready,
-    m_axis_1_tvalid,
-    m_axis_1_tdata,
-    m_axis_2_afull,
-    m_axis_2_tready,
-    m_axis_2_tvalid,
-    m_axis_2_tdata,
-    m_axis_3_afull,
-    m_axis_3_tready,
-    m_axis_3_tvalid,
-    m_axis_3_tdata,
-    m_axis_4_afull,
-    m_axis_4_tready,
-    m_axis_4_tvalid,
-    m_axis_4_tdata,
-    m_axis_5_afull,
-    m_axis_5_tready,
-    m_axis_5_tvalid,
-    m_axis_5_tdata
-
-);
-
-//stream checkers
-initial begin
-    ptr0 = STRM0_OFFSET;
-	ptr1 = STRM1_OFFSET;
-	ptr2 = STRM2_OFFSET;
-	ptr3 = STRM3_OFFSET;
-	ptr4 = STRM4_OFFSET;
-	ptr5 = STRM5_OFFSET;
-    fork
-		//check stream 0
-	    begin
-		    $display("Starting stream 0 checker");
-		    while(~done & (NSTREAMS > 0)) begin
-				@(negedge clk);
-				if(m_axis_0_tvalid & m_axis_0_tready) begin
-					if(m_axis_0_tdata != golden[ptr0]) begin
-						$display("Mismatch on stream 0");
-						$stop();
-					end
-					//increment pointer
-					ptr0 = ptr0 + 1;
-					//rewind pointer if it's reached end
-					if(ptr0 == (STRM0_OFFSET + STRM0_DEPTH))
-				        ptr0 = STRM0_OFFSET;
-				end
-			end
-		end
-		//check stream 1
-	    begin
-		    $display("Starting stream 1 checker");
-		    while(~done & (NSTREAMS > 1)) begin
-				@(negedge clk);
-				if(m_axis_1_tvalid & m_axis_1_tready) begin
-					if(m_axis_1_tdata != golden[ptr1]) begin
-						$display("Mismatch on stream 1");
-						$stop();
-					end
-					//increment pointer
-					ptr1 = ptr1 + 1;
-					//rewind pointer if it's reached end
-					if(ptr1 == (STRM1_OFFSET + STRM1_DEPTH))
-						ptr1 = STRM1_OFFSET;
-				end
-			end
-		end
-		//check stream 2
-	    begin
-		    $display("Starting stream 2 checker");
-		    while(~done & (NSTREAMS > 2)) begin
-				@(negedge clk);
-				if(m_axis_2_tvalid & m_axis_2_tready) begin
-					if(m_axis_2_tdata != golden[ptr2]) begin
-						$display("Mismatch on stream 2");
-						$stop();
-					end
-					//increment pointer
-					ptr2 = ptr2 + 1;
-					//rewind pointer if it's reached end
-					if(ptr2 == (STRM2_OFFSET + STRM2_DEPTH))
-						ptr2 = STRM2_OFFSET;
-				end
-			end
-		end
-		//check stream 3
-	    begin
-		    $display("Starting stream 3 checker");
-		    while(~done & (NSTREAMS > 3)) begin
-				@(negedge clk);
-				if(m_axis_3_tvalid & m_axis_3_tready) begin
-					if(m_axis_3_tdata != golden[ptr3]) begin
-						$display("Mismatch on stream 3");
-						$stop();
-					end
-					//increment pointer
-					ptr3 = ptr3 + 1;
-					//rewind pointer if it's reached end
-					if(ptr3 == (STRM3_OFFSET + STRM3_DEPTH))
-						ptr3 = STRM3_OFFSET;
-				end
-			end
-		end
-		//check stream 4
-	    begin
-		    $display("Starting stream 4 checker");
-		    while(~done & (NSTREAMS > 4)) begin
-				@(negedge clk);
-				if(m_axis_4_tvalid & m_axis_4_tready) begin
-					if(m_axis_4_tdata != golden[ptr4]) begin
-						$display("Mismatch on stream 4");
-						$stop();
-					end
-					//increment pointer
-					ptr4 = ptr4 + 1;
-					//rewind pointer if it's reached end
-					if(ptr4 == (STRM4_OFFSET + STRM4_DEPTH))
-						ptr4 = STRM4_OFFSET;
-				end
-			end
-		end
-		//check stream 5
-	    begin
-		    $display("Starting stream 5 checker");
-		    while(~done & (NSTREAMS > 5)) begin
-				@(negedge clk);
-				if(m_axis_5_tvalid & m_axis_5_tready) begin
-					if(m_axis_5_tdata != golden[ptr5]) begin
-						$display("Mismatch on stream 5");
-						$stop();
-					end
-					//increment pointer
-					ptr5 = ptr5 + 1;
-					//rewind pointer if it's reached end
-					if(ptr5 == (STRM5_OFFSET + STRM5_DEPTH))
-						ptr5 = STRM5_OFFSET;
-				end
-			end
-		end
-	join
-end
-
-initial begin
-    done = 0;
-    @(negedge rst);
-    $dumpfile("wave.vcd");
-    $dumpvars(0,tb_memstream_writes);
-    #50000
-	$display("Test done!");
-	done = 1;
-	#1000
-    $finish();
-end
-
-endmodule
+/*
+ Copyright (c) 2020, Xilinx
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+ * Neither the name of FINN nor the names of its
+   contributors may be used to endorse or promote products derived from
+   this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+`timescale 1ns/10ps
+
+module tb_memstream_writes;
+
+//parameters to enable/disable axi-mm, set number of streams, set readmemh for memory, set per-stream offsets in memory, set per-stream widths
+parameter CONFIG_EN = 1;
+parameter NSTREAMS = 2;//1 up to 6
+
+parameter MEM_DEPTH = 40;
+parameter MEM_WIDTH = 70;
+
+//widths per stream
+parameter STRM0_WIDTH = 70;
+parameter STRM1_WIDTH = 32;
+parameter STRM2_WIDTH = 32;
+parameter STRM3_WIDTH = 32;
+parameter STRM4_WIDTH = 1;
+parameter STRM5_WIDTH = 1;
+
+//depths per stream
+parameter STRM0_DEPTH = 20;
+parameter STRM1_DEPTH = 20;
+parameter STRM2_DEPTH = 2304;
+parameter STRM3_DEPTH = 2304;
+parameter STRM4_DEPTH = 1;
+parameter STRM5_DEPTH = 1;
+
+//offsets for each stream
+parameter STRM0_OFFSET = 0;
+parameter STRM1_OFFSET = 20;
+parameter STRM2_OFFSET = 4608;
+parameter STRM3_OFFSET = 6912;
+parameter STRM4_OFFSET = 0;
+parameter STRM5_OFFSET = 0;
+
+
+reg clk;
+reg rst;
+
+wire        awready;
+reg         awvalid;
+reg [31:0]  awaddr;
+reg [2:0]   awprot;
+//write data
+wire        wready;
+reg         wvalid;
+reg [31:0]  wdata;
+reg [3:0]   wstrb;
+//burst response
+reg         bready;
+wire        bvalid;
+wire [1:0]  bresp;
+
+//Read channels
+//read address
+wire        arready;
+reg         arvalid;
+reg [31:0]  araddr;
+reg [2:0]   arprot;
+//read data
+reg         rready;
+wire        rvalid;
+wire [1:0]  rresp;
+wire [31:0] rdata;
+
+//multiple wire AXI Streams
+reg m_axis_0_afull;
+reg m_axis_0_tready;
+wire m_axis_0_tvalid;
+wire [STRM0_WIDTH-1:0] m_axis_0_tdata;
+
+reg m_axis_1_afull;
+reg m_axis_1_tready;
+wire m_axis_1_tvalid;
+wire [STRM1_WIDTH-1:0] m_axis_1_tdata;
+
+reg m_axis_2_afull;
+reg m_axis_2_tready;
+wire m_axis_2_tvalid;
+wire [STRM2_WIDTH-1:0] m_axis_2_tdata;
+
+reg m_axis_3_afull;
+reg m_axis_3_tready;
+wire m_axis_3_tvalid;
+wire [STRM3_WIDTH-1:0] m_axis_3_tdata;
+
+reg m_axis_4_afull;
+reg m_axis_4_tready;
+wire m_axis_4_tvalid;
+wire [STRM4_WIDTH-1:0] m_axis_4_tdata;
+
+reg m_axis_5_afull;
+reg m_axis_5_tready;
+wire m_axis_5_tvalid;
+wire [STRM5_WIDTH-1:0] m_axis_5_tdata;
+
+reg [MEM_WIDTH-1:0] golden[MEM_DEPTH-1:0];
+reg [MEM_WIDTH-1:0] gword;
+integer ptr0, ptr1, ptr2, ptr3, ptr4, ptr5;
+integer done = 0;
+integer i, j;
+reg [5:0] rng;
+
+parameter NFOLDS_PER_WORD = (MEM_WIDTH+31)/32;
+
+task axi_write;
+    input [MEM_WIDTH-1:0] data;
+    input [31:0] adr;
+    begin
+        for(j=0; j<(1<<$clog2(NFOLDS_PER_WORD)); j=j+1) begin
+            @(negedge clk);
+            awvalid = 1;
+            wvalid = 1;
+            wdata = data>>(j*32);
+            awaddr = (adr*(1<<$clog2(NFOLDS_PER_WORD))+j)*4;
+            fork
+                begin
+                    @(posedge awready);
+                    @(posedge clk) awvalid = 0;
+                end
+                begin
+                    @(posedge wready);
+                    @(posedge clk) wvalid = 0;
+                end
+            join
+            @(posedge clk);
+        end
+    end
+endtask
+
+task axi_read;
+    input [31:0] adr;
+    output [MEM_WIDTH-1:0] data;
+    begin
+        data = 0;
+        for(j=0; j<NFOLDS_PER_WORD; j=j+1) begin
+            @(negedge clk);
+            arvalid = 1;
+            araddr = (adr*(1<<$clog2(NFOLDS_PER_WORD))+j)*4;
+            rready = 1;
+            fork
+                begin
+                    @(posedge arready);
+                    @(posedge clk) arvalid = 0;
+                end
+                begin
+                    @(posedge rvalid);
+                    @(posedge clk) rready = 0;
+                    data = data | (rdata<<(32*j));
+                end
+            join
+            @(posedge clk);
+        end
+    end
+endtask
+
+//clock
+initial begin
+    clk = 0;
+    forever #5 clk = ~clk;
+end
+
+initial begin
+    rst = 1;
+    awvalid = 0;
+    arvalid = 0;
+    wvalid = 0;
+    rready = 1;
+    bready = 1;
+    m_axis_0_afull = 1;
+    m_axis_1_afull = 1;
+    m_axis_2_afull = 1;
+    m_axis_3_afull = 1;
+    m_axis_4_afull = 1;
+    m_axis_5_afull = 1;
+    m_axis_0_tready = 0;
+    m_axis_1_tready = 0;
+    m_axis_2_tready = 0;
+    m_axis_3_tready = 0;
+    m_axis_4_tready = 0;
+    m_axis_5_tready = 0;
+    repeat(100) @(negedge clk);
+    rst = 0;
+    #100
+    //random initialization of golden data
+    for(i=0; i<MEM_DEPTH; i=i+1) begin
+        gword = 0;
+        repeat(NFOLDS_PER_WORD)
+            gword = (gword << 32) | $random;
+        golden[i] = gword;
+        axi_write(golden[i],i);
+        axi_read(i,gword);
+    end
+    //re-reset
+    repeat(100) @(negedge clk);
+    rst = 1;
+    #100
+    repeat(100) @(negedge clk);
+    rst = 0;
+    #100
+    @(negedge clk);
+    //start reads
+    m_axis_0_afull = 0;
+    m_axis_1_afull = 0;
+    m_axis_2_afull = 0;
+    m_axis_3_afull = 0;
+    m_axis_4_afull = 0;
+    m_axis_5_afull = 0;
+    m_axis_0_tready = 1;
+    m_axis_1_tready = 1;
+    m_axis_2_tready = 1;
+    m_axis_3_tready = 1;
+    m_axis_4_tready = 1;
+    m_axis_5_tready = 1;
+    fork
+	    begin
+		    $display("Starting to generate random AFULL");
+			while(~done) begin
+			    rng = $random;
+				m_axis_0_afull = rng[0];
+				m_axis_1_afull = rng[1];
+				m_axis_2_afull = rng[2];
+				m_axis_3_afull = rng[3];
+				m_axis_4_afull = rng[4];
+				m_axis_5_afull = rng[5];
+				@(negedge clk);
+			end
+		end
+	join
+end
+
+
+//DUT
+memstream
+#(
+    CONFIG_EN,
+    NSTREAMS,
+    MEM_DEPTH,
+    MEM_WIDTH,
+    ".",
+    "auto",
+    //widths per stream
+    STRM0_WIDTH,
+    STRM1_WIDTH,
+    STRM2_WIDTH,
+    STRM3_WIDTH,
+    STRM4_WIDTH,
+    STRM5_WIDTH,
+    //depths per stream
+    STRM0_DEPTH,
+    STRM1_DEPTH,
+    STRM2_DEPTH,
+    STRM3_DEPTH,
+    STRM4_DEPTH,
+    STRM5_DEPTH,
+    //offsets for each stream
+    STRM0_OFFSET,
+    STRM1_OFFSET,
+    STRM2_OFFSET,
+    STRM3_OFFSET,
+    STRM4_OFFSET,
+    STRM5_OFFSET
+)
+dut
+(
+    clk,
+    ~rst,
+
+    //optional AXI-Lite interface
+    awready,
+    awvalid,
+    awaddr,
+    awprot,
+    //write data
+    wready,
+    wvalid,
+    wdata,
+    wstrb,
+    //burst response
+    bready,
+    bvalid,
+    bresp,
+
+    //Read channels
+    //read address
+    arready,
+    arvalid,
+    araddr,
+    arprot,
+    //read data
+    rready,
+    rvalid,
+    rresp,
+    rdata,
+
+    //multiple output AXI Streams
+    m_axis_0_afull,
+    m_axis_0_tready,
+    m_axis_0_tvalid,
+    m_axis_0_tdata,
+    m_axis_1_afull,
+    m_axis_1_tready,
+    m_axis_1_tvalid,
+    m_axis_1_tdata,
+    m_axis_2_afull,
+    m_axis_2_tready,
+    m_axis_2_tvalid,
+    m_axis_2_tdata,
+    m_axis_3_afull,
+    m_axis_3_tready,
+    m_axis_3_tvalid,
+    m_axis_3_tdata,
+    m_axis_4_afull,
+    m_axis_4_tready,
+    m_axis_4_tvalid,
+    m_axis_4_tdata,
+    m_axis_5_afull,
+    m_axis_5_tready,
+    m_axis_5_tvalid,
+    m_axis_5_tdata
+
+);
+
+//stream checkers
+initial begin
+    ptr0 = STRM0_OFFSET;
+	ptr1 = STRM1_OFFSET;
+	ptr2 = STRM2_OFFSET;
+	ptr3 = STRM3_OFFSET;
+	ptr4 = STRM4_OFFSET;
+	ptr5 = STRM5_OFFSET;
+    fork
+		//check stream 0
+	    begin
+		    $display("Starting stream 0 checker");
+		    while(~done & (NSTREAMS > 0)) begin
+				@(negedge clk);
+				if(m_axis_0_tvalid & m_axis_0_tready) begin
+					if(m_axis_0_tdata != golden[ptr0]) begin
+						$display("Mismatch on stream 0");
+						$stop();
+					end
+					//increment pointer
+					ptr0 = ptr0 + 1;
+					//rewind pointer if it's reached end
+					if(ptr0 == (STRM0_OFFSET + STRM0_DEPTH))
+				        ptr0 = STRM0_OFFSET;
+				end
+			end
+		end
+		//check stream 1
+	    begin
+		    $display("Starting stream 1 checker");
+		    while(~done & (NSTREAMS > 1)) begin
+				@(negedge clk);
+				if(m_axis_1_tvalid & m_axis_1_tready) begin
+					if(m_axis_1_tdata != golden[ptr1]) begin
+						$display("Mismatch on stream 1");
+						$stop();
+					end
+					//increment pointer
+					ptr1 = ptr1 + 1;
+					//rewind pointer if it's reached end
+					if(ptr1 == (STRM1_OFFSET + STRM1_DEPTH))
+						ptr1 = STRM1_OFFSET;
+				end
+			end
+		end
+		//check stream 2
+	    begin
+		    $display("Starting stream 2 checker");
+		    while(~done & (NSTREAMS > 2)) begin
+				@(negedge clk);
+				if(m_axis_2_tvalid & m_axis_2_tready) begin
+					if(m_axis_2_tdata != golden[ptr2]) begin
+						$display("Mismatch on stream 2");
+						$stop();
+					end
+					//increment pointer
+					ptr2 = ptr2 + 1;
+					//rewind pointer if it's reached end
+					if(ptr2 == (STRM2_OFFSET + STRM2_DEPTH))
+						ptr2 = STRM2_OFFSET;
+				end
+			end
+		end
+		//check stream 3
+	    begin
+		    $display("Starting stream 3 checker");
+		    while(~done & (NSTREAMS > 3)) begin
+				@(negedge clk);
+				if(m_axis_3_tvalid & m_axis_3_tready) begin
+					if(m_axis_3_tdata != golden[ptr3]) begin
+						$display("Mismatch on stream 3");
+						$stop();
+					end
+					//increment pointer
+					ptr3 = ptr3 + 1;
+					//rewind pointer if it's reached end
+					if(ptr3 == (STRM3_OFFSET + STRM3_DEPTH))
+						ptr3 = STRM3_OFFSET;
+				end
+			end
+		end
+		//check stream 4
+	    begin
+		    $display("Starting stream 4 checker");
+		    while(~done & (NSTREAMS > 4)) begin
+				@(negedge clk);
+				if(m_axis_4_tvalid & m_axis_4_tready) begin
+					if(m_axis_4_tdata != golden[ptr4]) begin
+						$display("Mismatch on stream 4");
+						$stop();
+					end
+					//increment pointer
+					ptr4 = ptr4 + 1;
+					//rewind pointer if it's reached end
+					if(ptr4 == (STRM4_OFFSET + STRM4_DEPTH))
+						ptr4 = STRM4_OFFSET;
+				end
+			end
+		end
+		//check stream 5
+	    begin
+		    $display("Starting stream 5 checker");
+		    while(~done & (NSTREAMS > 5)) begin
+				@(negedge clk);
+				if(m_axis_5_tvalid & m_axis_5_tready) begin
+					if(m_axis_5_tdata != golden[ptr5]) begin
+						$display("Mismatch on stream 5");
+						$stop();
+					end
+					//increment pointer
+					ptr5 = ptr5 + 1;
+					//rewind pointer if it's reached end
+					if(ptr5 == (STRM5_OFFSET + STRM5_DEPTH))
+						ptr5 = STRM5_OFFSET;
+				end
+			end
+		end
+	join
+end
+
+initial begin
+    done = 0;
+    @(negedge rst);
+    $dumpfile("wave.vcd");
+    $dumpvars(0,tb_memstream_writes);
+    #50000
+	$display("Test done!");
+	done = 1;
+	#1000
+    $finish();
+end
+
+endmodule
diff --git a/notebooks/advanced/2_custom_op.ipynb b/notebooks/advanced/2_custom_op.ipynb
index 7d7bc5c50b..57f2601c73 100644
--- a/notebooks/advanced/2_custom_op.ipynb
+++ b/notebooks/advanced/2_custom_op.ipynb
@@ -58,13 +58,11 @@
        " '__repr__',\n",
        " '__setattr__',\n",
        " '__sizeof__',\n",
+       " '__slots__',\n",
        " '__str__',\n",
        " '__subclasshook__',\n",
        " '__weakref__',\n",
-       " '_abc_cache',\n",
-       " '_abc_negative_cache',\n",
-       " '_abc_negative_cache_version',\n",
-       " '_abc_registry',\n",
+       " '_abc_impl',\n",
        " 'execute_node',\n",
        " 'get_nodeattr',\n",
        " 'get_nodeattr_allowed_values',\n",
@@ -211,7 +209,7 @@
        "{'DebugMarker': finn.custom_op.general.debugmarker.DebugMarker,\n",
        " 'QuantAvgPool2d': finn.custom_op.general.quantavgpool2d.QuantAvgPool2d,\n",
        " 'MaxPoolNHWC': finn.custom_op.general.maxpoolnhwc.MaxPoolNHWC,\n",
-       " 'StreamingDataflowPartition': finn.custom_op.general.streamingdataflowpartition.StreamingDataflowPartition,\n",
+       " 'GenericPartition': finn.custom_op.general.genericpartition.GenericPartition,\n",
        " 'MultiThreshold': finn.custom_op.general.multithreshold.MultiThreshold,\n",
        " 'XnorPopcountMatMul': finn.custom_op.general.xnorpopcount.XnorPopcountMatMul,\n",
        " 'Im2Col': finn.custom_op.general.im2col.Im2Col,\n",
@@ -335,8 +333,8 @@
     {
      "data": {
       "text/plain": [
-       "array([[[-6.,  2., -3., -6.],\n",
-       "        [-6.,  0.,  1., -2.]]], dtype=float32)"
+       "array([[[ 0., -3.,  1., -8.],\n",
+       "        [ 2., -2., -4., -8.]]], dtype=float32)"
       ]
      },
      "execution_count": 7,
@@ -349,7 +347,7 @@
     "from finn.util.basic import gen_finn_dt_tensor\n",
     "\n",
     "# generate a random input of e.g signed 4-bit values\n",
-    "random_input = gen_finn_dt_tensor(DataType.INT4, input_shape)\n",
+    "random_input = gen_finn_dt_tensor(DataType[\"INT4\"], input_shape)\n",
     "random_input\n"
    ]
   },
@@ -368,8 +366,8 @@
     {
      "data": {
       "text/plain": [
-       "{'outp': array([[[36.,  4.,  9., 36.],\n",
-       "         [36.,  0.,  1.,  4.]]], dtype=float32)}"
+       "{'outp': array([[[ 0.,  9.,  1., 64.],\n",
+       "         [ 4.,  4., 16., 64.]]], dtype=float32)}"
       ]
      },
      "execution_count": 8,
@@ -576,7 +574,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Available functions: ['__abstractmethods__', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_abc_cache', '_abc_negative_cache', '_abc_negative_cache_version', '_abc_registry', 'execute_node', 'get_nodeattr', 'get_nodeattr_allowed_values', 'get_nodeattr_def', 'get_nodeattr_types', 'infer_node_datatype', 'make_shape_compatible_op', 'my_custom_cpp_gen', 'onnx_node', 'set_nodeattr', 'verify_node']\n",
+      "Available functions: ['__abstractmethods__', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__slots__', '__str__', '__subclasshook__', '__weakref__', '_abc_impl', 'execute_node', 'get_nodeattr', 'get_nodeattr_allowed_values', 'get_nodeattr_def', 'get_nodeattr_types', 'infer_node_datatype', 'make_shape_compatible_op', 'my_custom_cpp_gen', 'onnx_node', 'set_nodeattr', 'verify_node']\n",
       "codegen_dir: \n",
       "exec_mode: python\n"
      ]
@@ -666,7 +664,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "/tmp/finn_dev_jalezeta/my_custom_oppaxpincq\n"
+      "/tmp/finn_dev_maltanar/my_custom_oppswiou3i\n"
      ]
     }
    ],
@@ -820,8 +818,8 @@
     {
      "data": {
       "text/plain": [
-       "array([[[-8.,  4.,  7.,  2.],\n",
-       "        [-5., -1.,  2.,  0.]]], dtype=float32)"
+       "array([[[-6.,  3.,  2., -5.],\n",
+       "        [ 5.,  2.,  0., -2.]]], dtype=float32)"
       ]
      },
      "execution_count": 21,
@@ -831,7 +829,7 @@
    ],
    "source": [
     "# generate a random input of e.g signed 4-bit values\n",
-    "random_input = gen_finn_dt_tensor(DataType.INT4, input_shape)\n",
+    "random_input = gen_finn_dt_tensor(DataType[\"INT4\"], input_shape)\n",
     "random_input"
    ]
   },
@@ -850,8 +848,8 @@
     {
      "data": {
       "text/plain": [
-       "{'outp': array([[[64., 16., 49.,  4.],\n",
-       "         [25.,  1.,  4.,  0.]]], dtype=float32)}"
+       "{'outp': array([[[36.,  9.,  4., 25.],\n",
+       "         [25.,  4.,  0.,  4.]]], dtype=float32)}"
       ]
      },
      "execution_count": 22,
@@ -882,8 +880,8 @@
     {
      "data": {
       "text/plain": [
-       "{'outp': array([[[64., 16., 49.,  4.],\n",
-       "         [25.,  1.,  4.,  0.]]])}"
+       "{'outp': array([[[36.,  9.,  4., 25.],\n",
+       "         [25.,  4.,  0.,  4.]]])}"
       ]
      },
      "execution_count": 23,
@@ -897,6 +895,13 @@
     "ret = execute_onnx(mixedop_graph_new, inp_dict)\n",
     "ret"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
@@ -915,7 +920,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.8"
+   "version": "3.8.5"
   }
  },
  "nbformat": 4,
diff --git a/notebooks/basics/1_brevitas_network_import.ipynb b/notebooks/basics/1_brevitas_network_import.ipynb
index 8ba7d00a17..b6d6c3bdfd 100644
--- a/notebooks/basics/1_brevitas_network_import.ipynb
+++ b/notebooks/basics/1_brevitas_network_import.ipynb
@@ -17,7 +17,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -36,121 +36,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "# MIT License\n",
-      "#\n",
-      "# Copyright (c) 2019 Xilinx\n",
-      "#\n",
-      "# Permission is hereby granted, free of charge, to any person obtaining a copy\n",
-      "# of this software and associated documentation files (the \"Software\"), to deal\n",
-      "# in the Software without restriction, including without limitation the rights\n",
-      "# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n",
-      "# copies of the Software, and to permit persons to whom the Software is\n",
-      "# furnished to do so, subject to the following conditions:\n",
-      "#\n",
-      "# The above copyright notice and this permission notice shall be included in all\n",
-      "# copies or substantial portions of the Software.\n",
-      "#\n",
-      "# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n",
-      "# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n",
-      "# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n",
-      "# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n",
-      "# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n",
-      "# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n",
-      "# SOFTWARE.\n",
-      "\n",
-      "import ast\n",
-      "from functools import reduce\n",
-      "from operator import mul\n",
-      "\n",
-      "from torch.nn import Module, ModuleList, BatchNorm1d, Dropout\n",
-      "import torch\n",
-      "\n",
-      "from brevitas.nn import QuantIdentity, QuantLinear\n",
-      "from .common import CommonWeightQuant, CommonActQuant\n",
-      "from .tensor_norm import TensorNorm\n",
-      "\n",
-      "DROPOUT = 0.2\n",
-      "\n",
-      "\n",
-      "class FC(Module):\n",
-      "\n",
-      "    def __init__(\n",
-      "            self,\n",
-      "            num_classes,\n",
-      "            weight_bit_width,\n",
-      "            act_bit_width,\n",
-      "            in_bit_width,\n",
-      "            in_channels,\n",
-      "            out_features,\n",
-      "            in_features=(28, 28)):\n",
-      "        super(FC, self).__init__()\n",
-      "\n",
-      "        self.features = ModuleList()\n",
-      "        self.features.append(QuantIdentity(act_quant=CommonActQuant, bit_width=in_bit_width))\n",
-      "        self.features.append(Dropout(p=DROPOUT))\n",
-      "        in_features = reduce(mul, in_features)\n",
-      "        for out_features in out_features:\n",
-      "            self.features.append(QuantLinear(\n",
-      "                in_features=in_features,\n",
-      "                out_features=out_features,\n",
-      "                bias=False,\n",
-      "                weight_bit_width=weight_bit_width,\n",
-      "                weight_quant=CommonWeightQuant))\n",
-      "            in_features = out_features\n",
-      "            self.features.append(BatchNorm1d(num_features=in_features))\n",
-      "            self.features.append(QuantIdentity(act_quant=CommonActQuant, bit_width=act_bit_width))\n",
-      "            self.features.append(Dropout(p=DROPOUT))\n",
-      "        self.features.append(QuantLinear(\n",
-      "                in_features=in_features,\n",
-      "                out_features=num_classes,\n",
-      "                bias=False,\n",
-      "                weight_bit_width=weight_bit_width,\n",
-      "                weight_quant=CommonWeightQuant))\n",
-      "        self.features.append(TensorNorm())\n",
-      "\n",
-      "        for m in self.modules():\n",
-      "          if isinstance(m, QuantLinear):\n",
-      "            torch.nn.init.uniform_(m.weight.data, -1, 1)\n",
-      "\n",
-      "    def clip_weights(self, min_val, max_val):\n",
-      "        for mod in self.features:\n",
-      "            if isinstance(mod, QuantLinear):\n",
-      "                mod.weight.data.clamp_(min_val, max_val)\n",
-      "    \n",
-      "    def forward(self, x):\n",
-      "        x = x.view(x.shape[0], -1)\n",
-      "        x = 2.0 * x - torch.tensor([1.0], device=x.device)\n",
-      "        for mod in self.features:\n",
-      "            x = mod(x)\n",
-      "        return x\n",
-      "\n",
-      "\n",
-      "def fc(cfg):\n",
-      "    weight_bit_width = cfg.getint('QUANT', 'WEIGHT_BIT_WIDTH')\n",
-      "    act_bit_width = cfg.getint('QUANT', 'ACT_BIT_WIDTH')\n",
-      "    in_bit_width = cfg.getint('QUANT', 'IN_BIT_WIDTH')\n",
-      "    num_classes = cfg.getint('MODEL', 'NUM_CLASSES')\n",
-      "    in_channels = cfg.getint('MODEL', 'IN_CHANNELS')\n",
-      "    out_features = ast.literal_eval(cfg.get('MODEL', 'OUT_FEATURES'))\n",
-      "    net = FC(\n",
-      "        weight_bit_width=weight_bit_width,\n",
-      "        act_bit_width=act_bit_width,\n",
-      "        in_bit_width=in_bit_width,\n",
-      "        in_channels=in_channels,\n",
-      "        out_features=out_features,\n",
-      "        num_classes=num_classes)\n",
-      "    return net\n",
-      "\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "from brevitas_examples import bnn_pynq\n",
     "showSrc(bnn_pynq.models.FC)"
@@ -165,255 +53,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "FC(\n",
-       "  (features): ModuleList(\n",
-       "    (0): QuantIdentity(\n",
-       "      (input_quant): IdentityQuantProxyFromInjector(\n",
-       "        (_zero_hw_sentinel): StatelessBuffer()\n",
-       "      )\n",
-       "      (act_quant): ActQuantProxyFromInjector(\n",
-       "        (_zero_hw_sentinel): StatelessBuffer()\n",
-       "        (fused_activation_quant_proxy): FusedActivationQuantProxy(\n",
-       "          (activation_impl): Identity()\n",
-       "          (tensor_quant): ClampedBinaryQuant(\n",
-       "            (scaling_impl): ConstScaling(\n",
-       "              (restrict_clamp_scaling): _RestrictClampValue(\n",
-       "                (restrict_value_impl): FloatRestrictValue()\n",
-       "                (clamp_min_ste): Identity()\n",
-       "              )\n",
-       "              (value): StatelessBuffer()\n",
-       "            )\n",
-       "            (bit_width): BitWidthConst(\n",
-       "              (bit_width): StatelessBuffer()\n",
-       "            )\n",
-       "            (delay_wrapper): DelayWrapper(\n",
-       "              (delay_impl): _NoDelay()\n",
-       "            )\n",
-       "          )\n",
-       "        )\n",
-       "      )\n",
-       "    )\n",
-       "    (1): Dropout(p=0.2)\n",
-       "    (2): QuantLinear(\n",
-       "      in_features=784, out_features=1024, bias=False\n",
-       "      (input_quant): IdentityQuantProxyFromInjector(\n",
-       "        (_zero_hw_sentinel): StatelessBuffer()\n",
-       "      )\n",
-       "      (output_quant): IdentityQuantProxyFromInjector(\n",
-       "        (_zero_hw_sentinel): StatelessBuffer()\n",
-       "      )\n",
-       "      (weight_quant): WeightQuantProxyFromInjector(\n",
-       "        (_zero_hw_sentinel): StatelessBuffer()\n",
-       "        (tensor_quant): BinaryQuant(\n",
-       "          (scaling_impl): ConstScaling(\n",
-       "            (restrict_clamp_scaling): _RestrictClampValue(\n",
-       "              (restrict_value_impl): FloatRestrictValue()\n",
-       "              (clamp_min_ste): Identity()\n",
-       "            )\n",
-       "            (value): StatelessBuffer()\n",
-       "          )\n",
-       "          (bit_width): BitWidthConst(\n",
-       "            (bit_width): StatelessBuffer()\n",
-       "          )\n",
-       "          (delay_wrapper): DelayWrapper(\n",
-       "            (delay_impl): _NoDelay()\n",
-       "          )\n",
-       "        )\n",
-       "      )\n",
-       "      (bias_quant): BiasQuantProxyFromInjector(\n",
-       "        (_zero_hw_sentinel): StatelessBuffer()\n",
-       "      )\n",
-       "    )\n",
-       "    (3): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-       "    (4): QuantIdentity(\n",
-       "      (input_quant): IdentityQuantProxyFromInjector(\n",
-       "        (_zero_hw_sentinel): StatelessBuffer()\n",
-       "      )\n",
-       "      (act_quant): ActQuantProxyFromInjector(\n",
-       "        (_zero_hw_sentinel): StatelessBuffer()\n",
-       "        (fused_activation_quant_proxy): FusedActivationQuantProxy(\n",
-       "          (activation_impl): Identity()\n",
-       "          (tensor_quant): ClampedBinaryQuant(\n",
-       "            (scaling_impl): ConstScaling(\n",
-       "              (restrict_clamp_scaling): _RestrictClampValue(\n",
-       "                (restrict_value_impl): FloatRestrictValue()\n",
-       "                (clamp_min_ste): Identity()\n",
-       "              )\n",
-       "              (value): StatelessBuffer()\n",
-       "            )\n",
-       "            (bit_width): BitWidthConst(\n",
-       "              (bit_width): StatelessBuffer()\n",
-       "            )\n",
-       "            (delay_wrapper): DelayWrapper(\n",
-       "              (delay_impl): _NoDelay()\n",
-       "            )\n",
-       "          )\n",
-       "        )\n",
-       "      )\n",
-       "    )\n",
-       "    (5): Dropout(p=0.2)\n",
-       "    (6): QuantLinear(\n",
-       "      in_features=1024, out_features=1024, bias=False\n",
-       "      (input_quant): IdentityQuantProxyFromInjector(\n",
-       "        (_zero_hw_sentinel): StatelessBuffer()\n",
-       "      )\n",
-       "      (output_quant): IdentityQuantProxyFromInjector(\n",
-       "        (_zero_hw_sentinel): StatelessBuffer()\n",
-       "      )\n",
-       "      (weight_quant): WeightQuantProxyFromInjector(\n",
-       "        (_zero_hw_sentinel): StatelessBuffer()\n",
-       "        (tensor_quant): BinaryQuant(\n",
-       "          (scaling_impl): ConstScaling(\n",
-       "            (restrict_clamp_scaling): _RestrictClampValue(\n",
-       "              (restrict_value_impl): FloatRestrictValue()\n",
-       "              (clamp_min_ste): Identity()\n",
-       "            )\n",
-       "            (value): StatelessBuffer()\n",
-       "          )\n",
-       "          (bit_width): BitWidthConst(\n",
-       "            (bit_width): StatelessBuffer()\n",
-       "          )\n",
-       "          (delay_wrapper): DelayWrapper(\n",
-       "            (delay_impl): _NoDelay()\n",
-       "          )\n",
-       "        )\n",
-       "      )\n",
-       "      (bias_quant): BiasQuantProxyFromInjector(\n",
-       "        (_zero_hw_sentinel): StatelessBuffer()\n",
-       "      )\n",
-       "    )\n",
-       "    (7): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-       "    (8): QuantIdentity(\n",
-       "      (input_quant): IdentityQuantProxyFromInjector(\n",
-       "        (_zero_hw_sentinel): StatelessBuffer()\n",
-       "      )\n",
-       "      (act_quant): ActQuantProxyFromInjector(\n",
-       "        (_zero_hw_sentinel): StatelessBuffer()\n",
-       "        (fused_activation_quant_proxy): FusedActivationQuantProxy(\n",
-       "          (activation_impl): Identity()\n",
-       "          (tensor_quant): ClampedBinaryQuant(\n",
-       "            (scaling_impl): ConstScaling(\n",
-       "              (restrict_clamp_scaling): _RestrictClampValue(\n",
-       "                (restrict_value_impl): FloatRestrictValue()\n",
-       "                (clamp_min_ste): Identity()\n",
-       "              )\n",
-       "              (value): StatelessBuffer()\n",
-       "            )\n",
-       "            (bit_width): BitWidthConst(\n",
-       "              (bit_width): StatelessBuffer()\n",
-       "            )\n",
-       "            (delay_wrapper): DelayWrapper(\n",
-       "              (delay_impl): _NoDelay()\n",
-       "            )\n",
-       "          )\n",
-       "        )\n",
-       "      )\n",
-       "    )\n",
-       "    (9): Dropout(p=0.2)\n",
-       "    (10): QuantLinear(\n",
-       "      in_features=1024, out_features=1024, bias=False\n",
-       "      (input_quant): IdentityQuantProxyFromInjector(\n",
-       "        (_zero_hw_sentinel): StatelessBuffer()\n",
-       "      )\n",
-       "      (output_quant): IdentityQuantProxyFromInjector(\n",
-       "        (_zero_hw_sentinel): StatelessBuffer()\n",
-       "      )\n",
-       "      (weight_quant): WeightQuantProxyFromInjector(\n",
-       "        (_zero_hw_sentinel): StatelessBuffer()\n",
-       "        (tensor_quant): BinaryQuant(\n",
-       "          (scaling_impl): ConstScaling(\n",
-       "            (restrict_clamp_scaling): _RestrictClampValue(\n",
-       "              (restrict_value_impl): FloatRestrictValue()\n",
-       "              (clamp_min_ste): Identity()\n",
-       "            )\n",
-       "            (value): StatelessBuffer()\n",
-       "          )\n",
-       "          (bit_width): BitWidthConst(\n",
-       "            (bit_width): StatelessBuffer()\n",
-       "          )\n",
-       "          (delay_wrapper): DelayWrapper(\n",
-       "            (delay_impl): _NoDelay()\n",
-       "          )\n",
-       "        )\n",
-       "      )\n",
-       "      (bias_quant): BiasQuantProxyFromInjector(\n",
-       "        (_zero_hw_sentinel): StatelessBuffer()\n",
-       "      )\n",
-       "    )\n",
-       "    (11): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-       "    (12): QuantIdentity(\n",
-       "      (input_quant): IdentityQuantProxyFromInjector(\n",
-       "        (_zero_hw_sentinel): StatelessBuffer()\n",
-       "      )\n",
-       "      (act_quant): ActQuantProxyFromInjector(\n",
-       "        (_zero_hw_sentinel): StatelessBuffer()\n",
-       "        (fused_activation_quant_proxy): FusedActivationQuantProxy(\n",
-       "          (activation_impl): Identity()\n",
-       "          (tensor_quant): ClampedBinaryQuant(\n",
-       "            (scaling_impl): ConstScaling(\n",
-       "              (restrict_clamp_scaling): _RestrictClampValue(\n",
-       "                (restrict_value_impl): FloatRestrictValue()\n",
-       "                (clamp_min_ste): Identity()\n",
-       "              )\n",
-       "              (value): StatelessBuffer()\n",
-       "            )\n",
-       "            (bit_width): BitWidthConst(\n",
-       "              (bit_width): StatelessBuffer()\n",
-       "            )\n",
-       "            (delay_wrapper): DelayWrapper(\n",
-       "              (delay_impl): _NoDelay()\n",
-       "            )\n",
-       "          )\n",
-       "        )\n",
-       "      )\n",
-       "    )\n",
-       "    (13): Dropout(p=0.2)\n",
-       "    (14): QuantLinear(\n",
-       "      in_features=1024, out_features=10, bias=False\n",
-       "      (input_quant): IdentityQuantProxyFromInjector(\n",
-       "        (_zero_hw_sentinel): StatelessBuffer()\n",
-       "      )\n",
-       "      (output_quant): IdentityQuantProxyFromInjector(\n",
-       "        (_zero_hw_sentinel): StatelessBuffer()\n",
-       "      )\n",
-       "      (weight_quant): WeightQuantProxyFromInjector(\n",
-       "        (_zero_hw_sentinel): StatelessBuffer()\n",
-       "        (tensor_quant): BinaryQuant(\n",
-       "          (scaling_impl): ConstScaling(\n",
-       "            (restrict_clamp_scaling): _RestrictClampValue(\n",
-       "              (restrict_value_impl): FloatRestrictValue()\n",
-       "              (clamp_min_ste): Identity()\n",
-       "            )\n",
-       "            (value): StatelessBuffer()\n",
-       "          )\n",
-       "          (bit_width): BitWidthConst(\n",
-       "            (bit_width): StatelessBuffer()\n",
-       "          )\n",
-       "          (delay_wrapper): DelayWrapper(\n",
-       "            (delay_impl): _NoDelay()\n",
-       "          )\n",
-       "        )\n",
-       "      )\n",
-       "      (bias_quant): BiasQuantProxyFromInjector(\n",
-       "        (_zero_hw_sentinel): StatelessBuffer()\n",
-       "      )\n",
-       "    )\n",
-       "    (15): TensorNorm()\n",
-       "  )\n",
-       ")"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "from finn.util.test import get_test_model\n",
     "lfc = get_test_model(netname = \"LFC\", wbits = 1, abits = 1, pretrained = True)\n",
@@ -429,22 +71,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAPsAAAD4CAYAAAAq5pAIAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/d3fzzAAAACXBIWXMAAAsTAAALEwEAmpwYAAARYElEQVR4nO3dfYyVZXrH8d/FoDAw8iYRCaisG/5QqmUbgk1KyOKmxlUMbKJm/aPauAmarMmqTVqz/UOSaqJVa/pH3YStL9CsmiWoq0a7a82mWo1GNFQQW1CULGR4E5H3t+HqH/NgZ3We6549z3nOc9z7+0kmM3Ouec65OTM/zsv13Pdt7i4Af/xGNT0AAJ1B2IFMEHYgE4QdyARhBzIxupM3Zma89Z+ZUaPKH09OnTpV23VXvf6enp6wPjAw0PJ1183dbbjLK4XdzK6U9M+SeiT9q7vfV+X6cmU27O/mS6k/6ip/eKNHx38CqcCk6r29vaW1Q4cOhcem9PX1hfUDBw6U1lIt50mTJoX1zz77LKx3o5afxptZj6R/kfR9SRdLusHMLm7XwAC0V5XX7PMlfeTuW9z9uKSnJS1pz7AAtFuVsM+Q9Lsh328rLvs9ZrbMzNaa2doKtwWgotrfoHP3FZJWSLxBBzSpyiP7dknnDfl+ZnEZgC5UJezvSJptZt8yszMl/VDS8+0ZFoB2a/lpvLufNLPbJP1ag623x9z9g7aNLCPjx48P6wcPHmz5useMGRPWjx07FtZTbcFx48aF9ai9lmoppqSOj9prqT76vn37WhlSV6v0mt3dX5L0UpvGAqBGnC4LZIKwA5kg7EAmCDuQCcIOZIKwA5mwTq4um+vpsqled6qXffTo0bA+duzYlo9Nia676vWfffbZYb3qNNLofp06dWp47O7du8N6amrwyZMnw3qdyuaz88gOZIKwA5kg7EAmCDuQCcIOZIKwA5mg9fYNkGrNVfkd1nnddUtNDa6yem1q6m5qanCTS03TegMyR9iBTBB2IBOEHcgEYQcyQdiBTBB2IBP02TvgrLPOCuvRbqOSNHHixLB+4sSJ0lpqN9LUFNbPP/88rC9YsCCs33rrraW1VC/6jjvuCOtbt24N601OM20SfXYgc4QdyARhBzJB2IFMEHYgE4QdyARhBzJBn/0b4JFHHgnrUS871Wuuuox1b29vWI+ktk2+5JJLwvqmTZvC+vHjx0trZ5xxRnhsdO6ClP53HzlyJKzXqazPXmnLZjP7VNIBSQOSTrr7vCrXB6A+lcJeWOTue9pwPQBqxGt2IBNVw+6SfmNm75rZsuF+wMyWmdlaM1tb8bYAVFD1afwCd99uZudIesXM/sfdXxv6A+6+QtIKiTfogCZVemR39+3F512SnpU0vx2DAtB+LYfdzMab2Vmnv5Z0haQN7RoYgPaq8jR+mqRniz7taElPuvu/t2VUf2RSWzYvWrQorF922WVhPeqVHzx4MDw21W/u6+sL66nzNKI566m11x999NGWr1uS7rzzztLaW2+9FR5b93bSTWg57O6+RdKftnEsAGpE6w3IBGEHMkHYgUwQdiAThB3IBFNcu0Bqqubs2bPD+v79+0trEyZMCI+NpoFK6SmwVbZ8TrX9UlJLcO/du7e0tnTp0vDYdevWhfVUSzLV8qwTS0kDmSPsQCYIO5AJwg5kgrADmSDsQCYIO5CJdiw42TFRT7fOfnBK6thU/ZZbbgnrq1atCuszZ85s+bZTffZ77rknrK9evTqsn3nmmaW1K664Ijz2wQcfDOuprbCj2168eHF47LZt28L6nj3fvDVWeWQHMkHYgUwQdiAThB3IBGEHMkHYgUwQdiATHZ/Pnup3Rzo51naqOvd54cKFYf2iiy4qrY0bNy48dvTo+FSLNWvWhPUtW7aE9SpSyz3PmTMnrKfu90jq75T57AC6FmEHMkHYgUwQdiAThB3IBGEHMkHYgUx0vM8+alT5/y9V54XXqcpc+lOnTlW67eg+S9VPnjwZHjt+/PiwfujQobCe2o46+p2l5tJfffXVYf3pp58O61X67Kk17VP3a5Na7rOb2WNmtsvMNgy5bIqZvWJmm4vPk9s5WADtN5Kn8U9IuvIrl90l6VV3ny3p1eJ7AF0sGXZ3f03SV/fRWSJpZfH1SklL2zssAO3W6hp009y9v/h6h6RpZT9oZsskLWvxdgC0SeUFJ93dow0b3X2FpBUSGzsCTWq19bbTzKZLUvF5V/uGBKAOrYb9eUk3FV/fJOlX7RkOgLok++xm9pSk70qaKmmnpLslPSfpl5LOl7RV0vXuXr4Z9v9fV21P46uuG1+1Hkn1ZFN7qEf7r1fV29sb1o8cORLWU+cAVDnH4MILLwzrH3/8ccvXnRpXak36lMOHD1c6voqyPnvyNbu731BS+l6lEQHoKE6XBTJB2IFMEHYgE4QdyARhBzLBls2FVAtyYGAgrEd6enrCetVlh6M2UarFlJrCmpK6/mjb5KgmSYsWLWppTKdFv9MTJ06Ex6amuFb5e2gKj+xAJgg7kAnCDmSCsAOZIOxAJgg7kAnCDmSiq/rsdW7nXHU55yrqvu0DBw6U1lL94lSvO3V8qk8fLRedWsb6uuuuC+tHjx4N62PHji2tpfrsqd9Zk1syt4pHdiAThB3IBGEHMkHYgUwQdiAThB3IBGEHMtHxPns0t7ube+XRksmp5ZRT6txW+dJLLw2PnTNnTlhPLSX93HPPhfVI1AeXpIULF4b1Klt4p5ahjs5dkKovwd0EHtmBTBB2IBOEHcgEYQcyQdiBTBB2IBOEHchEx/vs0Zz1OvvoqbnyqXndUU949Oj4bly6dGlYTx2/ZMmSsD5mzJjS2ty5c8NjJ02aFNZTvezXX3+95eNnz54dHptamz3V616/fn1p7fLLLw+Pje5TqTv76CnJR3Yze8zMdpnZhiGXLTez7Wa2rvi4qt5hAqhqJE/jn5B05TCXP+zuc4uPl9o7LADtlgy7u78maW8HxgKgRlXeoLvNzN4vnuZPLvshM1tmZmvNbG2F2wJQUath/5mkb0uaK6lf0kNlP+juK9x9nrvPa/G2ALRBS2F3953uPuDupyT9XNL89g4LQLu1FHYzmz7k2x9I2lD2swC6g6X6qGb2lKTvSpoqaaeku4vv50pySZ9KusXd+5M3ZhbeWKrfnJr3HZk1a1ZYv+aaa8L64sWLS2upedepedupudPR/utSvIZ5X19feGxK1Xnd0e/0iy++CI+dOHFiWE/ZvHlzaW3VqlXhsQ89VPrKVFJ399ndfdiTSpIn1bj7DcNc/GjlEQHoKE6XBTJB2IFMEHYgE4QdyARhBzKRbL219cbMPFp2uc4prnfffXdYX758eVjfs2dPaW3q1KmtDOlLqa2H9+6NpyZE9QsuuCA8NtUWTG3ZnHLs2LHSWmoaaervIdWKjaYtp7Zcfvnll8P6zTffHNab3NK5rPXGIzuQCcIOZIKwA5kg7EAmCDuQCcIOZIKwA5noeJ89qlfZmjg11TLV96yy7fKuXbvC+tatW8P6Aw88ENZXr14d1ufNK18E6OGHHw6PTW3ZPHly6YpjkqRt27aF9eh3+sQTT4THfvLJJ2H92muvDevR1OOq02tffPHFsJ6aMl0n+uxA5gg7kAnCDmSCsAOZIOxAJgg7kAnCDmSio332UaNGeTQ/+vjx4+Hx55xzTmlt9+7d4bGpPntq7nTUL05tB71p06awPmXKlLCeWrY4Wu75/PPPD49NzWdPLe+9b9++sH7jjTeW1l544YXw2JTUOgLRctGLFi0Kj02tMZC6X1LLf9eJPjuQOcIOZIKwA5kg7EAmCDuQCcIOZIKwA5noqvnsVaT6nitXrgzr119/fcvXf/jw4fDYcePGhfXUtsipef4DAwOltdS672+++WZYf/LJJ8P6unXrwvobb7xRWkudX5Dq4ad+59F5G/Pnzw+Pffvtt8P6448/HtZT68rXqeU+u5mdZ2a/NbONZvaBmf2kuHyKmb1iZpuLz/EqBwAaNZKn8Scl/Y27XyzpzyX92MwulnSXpFfdfbakV4vvAXSpZNjdvd/d3yu+PiDpQ0kzJC2RdPq58UpJS2saI4A2iF/0fIWZzZL0HUlvS5rm7v1FaYekaSXHLJO0rMIYAbTBiN+NN7M+SWsk3e7u+4fWfPBdvmHffHP3Fe4+z93LV0UEULsRhd3MztBg0H/h7s8UF+80s+lFfbqkeIlVAI1Ktt5scP7mSkl73f32IZc/IOkzd7/PzO6SNMXd/zZxXeGNnXvuueFYduzYEdYj0fa9kjRz5sywfu+995bWZsyYER6b2nI5tXVxtF20JN1///2ltY0bN4bHpqa4prZFTklNW46k2oYnTpwI69HU49Tf/YQJE8J61SnTdSprvY3kNftfSPorSevNbF1x2U8l3Sfpl2b2I0lbJcWNagCNSobd3f9LUtl/kd9r73AA1IXTZYFMEHYgE4QdyARhBzJB2IFMdHSKa09Pj0d93dRU0aj3uX///tKaJPX19YX1VN806vlW6fdK6Z5v6hyBqJed6uEfO3YsrFcV/b5TyzWnpgan/l6q/M5Sqo6tTiwlDWSOsAOZIOxAJgg7kAnCDmSCsAOZIOxAJrpqKenUHOKol55aVrjqvOzp06eX1vr7+0trI9Hb2xvWU1s213ndqWWsDx06FNarzClPGTUqfqyqMqe86fMTqqDPDmSOsAOZIOxAJgg7kAnCDmSCsAOZIOxAJrqqzw6gOvrsQOYIO5AJwg5kgrADmSDsQCYIO5AJwg5kIhl2MzvPzH5rZhvN7AMz+0lx+XIz225m64qPq+ofLoBWJU+qMbPpkqa7+3tmdpakdyUt1eB+7Afd/cER3xgn1QC1KzupZiT7s/dL6i++PmBmH0qa0d7hAajbH/Sa3cxmSfqOpLeLi24zs/fN7DEzm1xyzDIzW2tma6sNFUAVIz433sz6JP2npHvd/RkzmyZpjySX9A8afKp/c+I6eBoP1KzsafyIwm5mZ0h6UdKv3f2fhqnPkvSiu/9J4noIO1CzlifC2ODyoI9K+nBo0Is37k77gaQNVQcJoD4jeTd+gaTXJa2XdHpt3p9KukHSXA0+jf9U0i3Fm3nRdfHIDtSs0tP4diHsQP2Yzw5kjrADmSDsQCYIO5AJwg5kgrADmSDsQCYIO5AJwg5kgrADmSDsQCYIO5AJwg5kgrADmUguONlmeyRtHfL91OKybtStY+vWcUmMrVXtHNsFZYWOzmf/2o2brXX3eY0NINCtY+vWcUmMrVWdGhtP44FMEHYgE02HfUXDtx/p1rF167gkxtaqjoyt0dfsADqn6Ud2AB1C2IFMNBJ2M7vSzP7XzD4ys7uaGEMZM/vUzNYX21A3uj9dsYfeLjPbMOSyKWb2ipltLj4Pu8deQ2Prim28g23GG73vmt7+vOOv2c2sR9ImSX8paZukdyTd4O4bOzqQEmb2qaR57t74CRhmtlDSQUmrTm+tZWb/KGmvu99X/Ec52d3/rkvGtlx/4DbeNY2tbJvxv1aD9107tz9vRROP7PMlfeTuW9z9uKSnJS1pYBxdz91fk7T3KxcvkbSy+HqlBv9YOq5kbF3B3fvd/b3i6wOSTm8z3uh9F4yrI5oI+wxJvxvy/TZ1137vLuk3ZvaumS1rejDDmDZkm60dkqY1OZhhJLfx7qSvbDPeNfddK9ufV8UbdF+3wN3/TNL3Jf24eLralXzwNVg39U5/JunbGtwDsF/SQ00OpthmfI2k2919/9Bak/fdMOPqyP3WRNi3SzpvyPczi8u6grtvLz7vkvSsBl92dJOdp3fQLT7vang8X3L3ne4+4O6nJP1cDd53xTbjayT9wt2fKS5u/L4bblydut+aCPs7kmab2bfM7ExJP5T0fAPj+BozG1+8cSIzGy/pCnXfVtTPS7qp+PomSb9qcCy/p1u28S7bZlwN33eNb3/u7h3/kHSVBt+R/1jS3zcxhpJxXSjpv4uPD5oem6SnNPi07oQG39v4kaSzJb0qabOk/5A0pYvG9m8a3Nr7fQ0Ga3pDY1ugwafo70taV3xc1fR9F4yrI/cbp8sCmeANOiAThB3IBGEHMkHYgUwQdiAThB3IBGEHMvF/rSIwqVQD1iIAAAAASUVORK5CYII=\n",
-      "text/plain": [
-       "<Figure size 432x288 with 1 Axes>"
-      ]
-     },
-     "metadata": {
-      "needs_background": "light"
-     },
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
    "source": [
     "import torch\n",
     "import matplotlib.pyplot as plt\n",
@@ -460,21 +89,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "tensor([0.1020, 0.0113, 0.4806, 0.0571, 0.0482, 0.0079, 0.0450, 0.0076, 0.1851,\n",
-       "        0.0552])"
-      ]
-     },
-     "execution_count": 13,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "from torch.nn.functional import softmax\n",
     "# do forward pass in PyTorch/Brevitas\n",
@@ -485,22 +102,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEICAYAAABS0fM3AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/d3fzzAAAACXBIWXMAAAsTAAALEwEAmpwYAAAbi0lEQVR4nO3debxdZXn28d9FIDKFQRIVEiDMNk6IERAVZWpDq2AREV4nrEwtsSi+VlTUSp3qhFWxCgRBKfACgo0WZShKHYGAKIRBwhzGMAmiLxC4+sd6Dm6O++yzMqx1yFnX9/PZn6z5vvc+sO+9nmetZ8k2ERHRXSuNdQIRETG2UggiIjouhSAiouNSCCIiOi6FICKi41IIIiI6LoUgoiZJlrR5mf66pI8s5XF+L2nT5ZvdwHiS9E1JD0i6pK24seJIIRiHJN0sadc+y18r6cnyRTT0+l7P+i0lnSHpXkm/k/QbSYdLmrCM+cyWNE/So5JOXMJ995H0c0l/kPTjUbbtfX8PS7pO0juXJfeR2D7E9r+Mtp2kH0s6YNi+a9q+sYm8RvAqYDdgmu1tl/VgkqaXorjysqcWzwQpBN1zR/kiGnq9HkDSZsDFwG3Ai2yvDbwJmAlMWtaYwCeAE5Zi3/uBLwGfqRvL9prAWsAHgOMkzRi+Uce+xDYGbrb9yJLu2LHPqbNSCGLIx4Gf2z7c9p0Atq+z/X9sPzh8Y0k7SbqyZ/58SZf2zP9E0hvKcc6y/V3gvj7HWVfS9yUtKk0X35c0bWi97Qtsn05VTGpz5bvAA8AMSftL+pmkoyXdB/yzpGdJ+rykWyXdXZp7VuvJ7f2S7pR0h6S/G5b3iZI+0TO/p6QrJD0k6QZJsyR9Eng18NVylvLVsm1vE9Pakr5V3v8tko6UtFJZt7+kn5YcH5B0k6Tde2LuL+nGcvZzk6S39Pl83wUcD7yi5PDxsvxASQsk3S9prqQNevaxpEMlXQ9cP9pnXT6Lr0n6QYnxM0nPk/Slkve1kl7as/0R5TN6WNLVkv62Z90ESV8oZ6U3lbPJp84+yuc1p/xdbpf0iWU9Y40UgviTXYEzl2D7XwJbSJosaRXgxcAGkiaVL9OZwE9qHGcl4JtUv1o3Av4IfHWJMu9D0krlC2YdYKhgbQfcCDwX+CTVWcaWwNbA5sBU4KNl/1nA/6VqUtmC6vMZKda2wLeA95d4O1L9Av8w1Wcwu5x9ze6z+1eAtYFNgdcAbwd6m7O2A64DJgOfBeaosgbwZWB325OAHYArhh/c9hzgEOAXJYePSdoZ+DSwD7A+cAtw2rBd31Bi/9nZ1Aj2AY4seT4K/AK4vMyfCXyxZ9sbqArk2lQ/QE6WtH5ZdyCwO9XfZJuSR68TgcVUf6+XAn8JHEAsG9t5jbMXcDOwa5/lrwWeBB7see1T1j0OzFrCOD8B9gK2B84DTgdmATsBv+mz/SeAE0c55tbAA32WHwD8eJR9e9/f/VRfjPuWdfsDt/ZsK+ARYLOeZa8AbirTJwCf6Vm3JWBg8zJ/IvCJMv0N4OgRcvoxcMCwZab6IpsAPAbM6Fl38ND7LDkv6Fm3etn3ecAa5X2+EVhtlM9lf+CnPfNzgM/2zK9Z/v7Te/LbecDxppdtVu75LI7rWf9u4Jqe+RcBDw443hXAnmX6QuDgnnW7DsWiKuCP9r5fYD/gR23/PzbeXmn/6547bE/rs/w+ql+HfUn6OvDWMvsp258CLqL68l1Yph+g+lX7aJkflaTVgaOpCsi6ZfEkSRNsP1HnGMOM9P6g6v8YMoXqi/UySU+lQ/XlDLABcFnP9rcMiLkhcM6Sp8pkYJVhx76F6sxkyF1DE7b/UHJd0/Zdkt5MddYyR9LPgPfZvrZG3A2ofq0PHff3pblsKtWPCHj6Z1XH3T3Tf+wzv+bQjKS3A4dTFRTKusk9ufXG7p3emOrzurPnb7bSUuQaw6RpKIZcQPXrsi9XV8kMdTB/qiweKgQ7lumLqArBa6hZCID3AVsB29leqxwLqi/l5a13qN17qb6gXmB7nfJa21VHM8CdVF/wQzYacNzbgM1qxBzuXqpf4hsPi3P7gH3+dGD7XNu7URXwa4Hj6uxH1d/yVMzSzLTesLiNDEssaWOqPGcD69leB7iKP/297wR6C3nv3+A2qh8Zk3v+ZmvZfkETuXZJCsH4tYqkVXteo539fQzYQdLnJD0PQNLmkk6WtM4I+/yc6kt8W+AS2/OpvmC2A/5naCNJK0talerX9oRh+Uyi+kJ+UNKzSx707Duh7LsysFLZd5X6H0N/tp+k+kI6WtJzSqypkv6qbHI6sL+kGeWs5WMjHAqqppZ3Stql9E1MlfT8su5uqvb/fjk8UeJ8svStbEz1S/nk0fKX9NzSQb0G1Zfj76maxeo4teS7taRnAZ8CLrZ9c839l8UaVEVmEYCqy3tf2LP+dOCw8hmuQ3XlFwCuLmI4D/iCpLXKZ72ZpNe0kPe4lkIwfp1D9QU79PrnQRvbvoGqjXw6MF/S74DvAPOAh0fY5xGqJob5th8ri38B3GL7np5Njyw5HEHVvPTHsgyqS0NXo/p1/Evgh8PCvK1s/+9UHYx/pP4v39F8AFgA/FLSQ1RnRVuV9/aDktuFZZsLRzqI7UuoOniPBn5HdTY09Iv734C9y9UzX+6z+7up+ipuBH4KnEK9y2xXoioad1D1h7wG+Psa+2H7AuAjVH/fO6nOZvats++ysn018AWq/07upuo/+FnPJsdRfdn/BvgV1X/Hi4GhZsK3AxOBq6maIs9kQJNm1KPS4RIR8YxTLpf9uu2NR904llrOCCLiGUPSapL+ujQnTqVqkjt7rPMa73JGEBHPGKU/5iLg+VTNgP8FHGb7oTFNbJxLIYiI6LhGm4ZU3WZ/XbmV/Yg+6/dXdWv9FeWVOwQjIlrW2A1lZfyPY6hu0V8IXCppbrlqoNf/c/9b7/uaPHmyp0+fvvwSjYjogMsuu+xe21P6rWvyzuJtqW6PvxFA0mnAnlSXfS216dOnM2/evOWQXkREd0ga8e74JpuGpvL0W78X8vRb54e8UdW492dK2rDPeiQdpGo8+3mLFi1qIteIiM4a68tHv0c10NWLgfOBk/ptZPtY2zNtz5wype+ZTURELKUmC8HtPH2ckGkMG0PF9n22Hy2zxwMvazCfiIjoo8lCcCnVePWbSJpIdQv73N4NesYgB9gDuKbBfCIioo/GOottL5Y0GziXarCxE2zPl3QUMM/2XOAfJe1BNZbI/VTjpkdERItWuBvKZs6c6Vw1FBGxZCRdZntmv3Vj3VkcERFjLIUgIqLjUggiIjouzyzugKPP/23jMd6725aNx4iIZuSMICKi41IIIiI6LoUgIqLjUggiIjouhSAiouNSCCIiOi6FICKi41IIIiI6LoUgIqLjUggiIjouhSAiouNSCCIiOi6FICKi41IIIiI6LoUgIqLjUggiIjouhSAiouNSCCIiOi6FICKi41IIIiI6LoUgIqLjUggiIjouhSAiouNSCCIiOi6FICKi41IIIiI6LoUgIqLjUggiIjouhSAiouNSCCIiOq7RQiBplqTrJC2QdMSA7d4oyZJmNplPRET8ucYKgaQJwDHA7sAMYD9JM/psNwk4DLi4qVwiImJkTZ4RbAsssH2j7ceA04A9+2z3L8C/Av+/wVwiImIETRaCqcBtPfMLy7KnSNoG2ND2fw06kKSDJM2TNG/RokXLP9OIiA4bs85iSSsBXwTeN9q2to+1PdP2zClTpjSfXEREhzRZCG4HNuyZn1aWDZkEvBD4saSbge2BuekwjohoV5OF4FJgC0mbSJoI7AvMHVpp+3e2J9uebns68EtgD9vzGswpIiKGaawQ2F4MzAbOBa4BTrc9X9JRkvZoKm5ERCyZlZs8uO1zgHOGLfvoCNu+tslcIiKiv9xZHBHRcSkEEREdl0IQEdFxKQQRER2XQhAR0XEpBBERHZdCEBHRcSkEEREdN2ohkPRuSeu2kUxERLSvzhnBc4FLJZ1enjimppOKiIj2jFoIbB8JbAHMAfYHrpf0KUmbNZxbRES0oFYfgW0Dd5XXYmBd4ExJn20wt4iIaMGog85JOgx4O3AvcDzwftuPlwfLXA/8U7MpRkREk+qMPvpsYC/bt/QutP2kpNc1k1ZERLSlTtPQpsOLgKRvA9i+ppGsIiKiNXUKwQt6ZyRNAF7WTDoREdG2EQuBpA9Kehh4saSHyuth4B7gP1vLMCIiGjViIbD9aduTgM/ZXqu8Jtlez/YHW8wxIiIaNGJnsaTn274WOEPSNsPX27680cwiIqIVg64aeh9wIPCFPusM7NxIRhER0aoRC4HtA8u/O7WXTkREtG1Q09Beg3a0fdbyTyciIto2qGno9QPWGUghiIgYBwY1Db2zzUQiImJsDGoaeqvtkyUd3m+97S82l1ZERLRlUNPQGuXfSW0kEhERY2NQ09A3yr8fby+diIhoW51HVW4q6XuSFkm6R9J/Stq0jeQiIqJ5dQadOwU4HVgf2AA4Azi1yaQiIqI9dQrB6ra/bXtxeZ0MrNp0YhER0Y5BVw09u0z+QNIRwGlU9w+8GTinhdwiIqIFg64auozqi19l/uCedQYyAmlExDgw6KqhTdpMJCIixkadZxYj6YXADHr6Bmx/q6mkIiKiPXUuH/0Y8JXy2gn4LLBHnYNLmiXpOkkLSj/D8PWHSLpS0hWSfippxhLmHxERy6jOVUN7A7sAd5Xxh14CrD3aTuXZxscAu1OdTezX54v+FNsvsr01VYHJsBURES2rUwj+aPtJYLGktaieWbxhjf22BRbYvtH2Y1RXHe3Zu4Hth3pm16DqhI6IiBbV6SOYJ2kd4DiqK4l+D/yixn5Tgdt65hcC2w3fSNKhwOHARPLUs4iI1o16RmD7H2w/aPvrwG7AO5bnENW2j7G9GfAB4Mh+20g6SNI8SfMWLVq0vEJHRAT1moaQtJekLwLvBjareezbeXoT0rSybCSnAW/ot8L2sbZn2p45ZcqUmuEjIqKOOlcNfQ04BLgSuAo4WNIxNY59KbCFpE0kTQT2BeYOO/YWPbN/A1xfN/GIiFg+6vQR7Az8hW0DSDoJmD/aTrYXS5oNnAtMAE6wPV/SUcA823OB2ZJ2BR4HHgDesZTvIyIillKdQrAA2Ai4pcxvWJaNyvY5DBuXyPZHe6YPq5dmREQ0ZdCgc9+jupxzEnCNpEvKqm2BS0baLyIiViyDzgg+31oWERExZgYNOnfR0LSk5wIvL7OX2L6n6cQiIqIdda4a2oeqKehNwD7AxZL2bjqxiIhoR53O4g8DLx86C5A0BbgAOLPJxCIioh11bihbaVhT0H0194uIiBVAnTOCH0o6lz89sD6PqoyIGEcGFgJJAr5M1VH8qrL4WNtnN51YRES0Y2AhsG1J59h+EXBWSzlFRESL6rT1Xy7p5aNvFhERK6I6fQTbAW+VdDPwCCCqk4UXN5lYRES0o04h+KvGs4iIiDEzaKyh5wAfAjanGoL608MeLRkREePAoD6Cb1E1BX0FWJPq6qGIiBhnBjUNrW/7w2X6XEmXt5FQRES0a7T7CNal6hwGmNA7b/v+hnOLiIgWDCoEawOX8adCADB0VmBg06aSioiI9gwahnp6i3lERMQYyeBxEREdl0IQEdFxKQQRER036IayZw/aMVcNRUSMD4OuGrqM6uogARsBD5TpdYBbgU2aTi4iIpo3YtOQ7U1sb0r1WMrX255sez3gdcB5bSUYERHNqtNHsL3tp55IZvsHwA7NpRQREW2qM/roHZKOBE4u828B7mgupYiIaFOdM4L9gCnA2VRPKZtSlkVExDgw6hlBuTroMElr2H6khZwiIqJFo54RSNpB0tXANWX+JZK+1nhmERHRijpNQ0dTPaXsPgDbvwZ2bDKpiIhoT607i23fNmzREw3kEhERY6DOVUO3SdoBsKRVgMMozUQREbHiq3NGcAhwKDAVuB3YGviHBnOKiIgW1Tkj2Mr2W3oXSHol8LNmUoqIiDbVOSP4Ss1lf0bSLEnXSVog6Yg+6w+XdLWk30j6b0kb1zluREQsP4NGH30F1VASUyQd3rNqLWDCaAeWNAE4BtgNWAhcKmmu7at7NvsVMNP2HyT9PfBZ4M1L/jYiImJpDTojmAisSVUsJvW8HgL2rnHsbYEFtm+0/RhwGrBn7wa2f2T7D2X2l8C0JUs/IiKW1aBnFl8EXCTpRNu3LMWxpwK9l50uBLYbsP27gB8sRZyIiFgGdfoIjpe0ztCMpHUlnbs8k5D0VmAm8LkR1h8kaZ6keYsWLVqeoSMiOq9OIZhs+8GhGdsPAM+psd/twIY989PKsqeRtCvwYWAP24/2O5DtY23PtD1zypQpNUJHRERddQrBk5I2GpopV/a4xn6XAltI2kTSRGBfYG7vBpJeCnyDqgjcUz/tiIhYXurcR/Bh4KeSLqJ6VOWrgYNG28n2YkmzgXOprjI6wfZ8SUcB82zPpWoKWhM4QxLArbb3WLq3EhERS6POMNQ/lLQNsH1Z9B7b99Y5eHmy2TnDln20Z3rXJcg1IiIaMGLTkKTnl3+3oXp4/R3ltVFZFhER48CgM4L3AQcCX+izzsDOjWQUEbGUjj7/t43HeO9uWzYeo22D7iM4sPy7U3vpRERE2wYNMbHXoB1tn7X804mIiLYNahp6ffn3OVRjDl1Y5ncCfk71IPuIiFjBDWoaeieApPOAGbbvLPPrAye2kl1ERDSuzg1lGw4VgeJuqquIIiJiHKhzQ9l/l7GFTi3zbwYuaC6liIhoU50bymZL+ltgx7LoWNtnN5tWRES0pc4ZAcDlwMO2L5C0uqRJth9uMrGIiGjHqH0Ekg4EzqQaHA6q5wx8t8GcIiKiRXU6iw8FXkn1ZDJsX0+9YagjImIFUKcQPFoeNQmApJWpNwx1RESsAOoUgoskfQhYTdJuwBnA95pNKyIi2lKnEHwAWARcCRxMNaz0kU0mFRER7Rl41ZCkCcB8288HjmsnpYiIaNPAMwLbTwDX9T6qMiIixpc69xGsC8yXdAnwyNDCPFIyImJ8qFMIPtJ4FhERMWYGPY9gVeAQYHOqjuI5the3lVhERLRjUB/BScBMqiKwO/0fWRkRESu4QU1DM2y/CEDSHOCSdlKKiIg2DTojeHxoIk1CERHj16AzgpdIeqhMi+rO4ofKtG2v1Xh2ERHRuEGPqpzQZiIRETE26gwxERER41gKQUREx6UQRER0XApBRETHpRBERHRcCkFERMelEEREdFwKQUREx6UQRER0XApBRETHNVoIJM2SdJ2kBZKO6LN+R0mXS1osae8mc4mIiP4aKwTlwffHUD3LYAawn6QZwza7FdgfOKWpPCIiYrA6j6pcWtsCC2zfCCDpNGBP4OqhDWzfXNY92WAeTzn6/N82HuO9u23ZeIyIiOWpyaahqcBtPfMLy7IlJukgSfMkzVu0aNFySS4iIiorRGex7WNtz7Q9c8qUKWOdTkTEuNJkIbgd2LBnflpZFhERzyBNFoJLgS0kbSJpIrAvMLfBeBERsRQaKwTlOcezgXOBa4DTbc+XdJSkPQAkvVzSQuBNwDckzW8qn4iI6K/Jq4awfQ5wzrBlH+2ZvpSqySgiIsbICtFZHBERzUkhiIjouBSCiIiOSyGIiOi4FIKIiI5LIYiI6LgUgoiIjkshiIjouBSCiIiOSyGIiOi4FIKIiI5LIYiI6LhGB52LyONBI575UggiIpaDFflHT5qGIiI6LoUgIqLj0jQU41rTp+vpn4jxIIUgYhxKAYwlkaahiIiOSyGIiOi4FIKIiI5LIYiI6LgUgoiIjkshiIjouBSCiIiOSyGIiOi4FIKIiI5LIYiI6LgUgoiIjkshiIjouBSCiIiOSyGIiOi4DEPdkhX5MXYRMb6lEEQ0JM8EiBVFo4VA0izg34AJwPG2PzNs/bOAbwEvA+4D3mz75iZziohm5ex3xdNYH4GkCcAxwO7ADGA/STOGbfYu4AHbmwNHA//aVD4REdFfk53F2wILbN9o+zHgNGDPYdvsCZxUps8EdpGkBnOKiIhhZLuZA0t7A7NsH1Dm3wZsZ3t2zzZXlW0Wlvkbyjb3DjvWQcBBZXYr4LpGku5vMnDvqFsldmIndmI/s2NvbHtKvxUrRGex7WOBY8citqR5tmcmdmIndmKPl9jDNdk0dDuwYc/8tLKs7zaSVgbWpuo0joiIljRZCC4FtpC0iaSJwL7A3GHbzAXeUab3Bi50U21VERHRV2NNQ7YXS5oNnEt1+egJtudLOgqYZ3suMAf4tqQFwP1UxeKZZkyapBI7sRM7sdvSWGdxRESsGDLWUEREx6UQRER0XArBCCTNknSdpAWSjmg59gmS7in3WbQZd0NJP5J0taT5kg5rMfaqki6R9OsS++Ntxe7JYYKkX0n6/hjEvlnSlZKukDSv5djrSDpT0rWSrpH0ipbiblXe79DrIUnvaSN2if/e8t/aVZJOlbRqi7EPK3Hnt/meR2Q7r2Evqs7tG4BNgYnAr4EZLcbfEdgGuKrl970+sE2ZngT8tq33DQhYs0yvAlwMbN/y+z8cOAX4fptxS+ybgcltxy2xTwIOKNMTgXXGIIcJwF1UNz21EW8qcBOwWpk/Hdi/pdgvBK4CVqe6YOcCYPOx+NsPvXJG0F+d4TEaY/t/qK6iapXtO21fXqYfBq6h+h+mjdi2/fsyu0p5tXYlg6RpwN8Ax7cV85lA0tpUPzzmANh+zPaDY5DKLsANtm9pMebKwGrlHqbVgTtaivsXwMW2/2B7MXARsFdLsftKIehvKnBbz/xCWvpCfKaQNB14KdUv87ZiTpB0BXAPcL7t1mIDXwL+CXiyxZi9DJwn6bIypEpbNgEWAd8szWLHS1qjxfhD9gVObSuY7duBzwO3AncCv7N9XkvhrwJeLWk9SasDf83Tb75tXQpB/BlJawLfAd5j+6G24tp+wvbWVHehbyvphW3ElfQ64B7bl7URbwSvsr0N1Wi9h0rasaW4K1M1Q/677ZcCjwBt94lNBPYAzmgx5rpUZ/mbABsAa0h6axuxbV9DNdLyecAPgSuAJ9qIPZIUgv7qDI8xLklahaoI/Ifts8Yih9I08SNgVkshXwnsIelmqmbAnSWd3FJs4KlfqNi+BzibqnmyDQuBhT1nX2dSFYY27Q5cbvvuFmPuCtxke5Htx4GzgB3aCm57ju2X2d4ReICqP27MpBD0V2d4jHGnDAE+B7jG9hdbjj1F0jplejVgN+DaNmLb/qDtabanU/2tL7Tdyq9DAElrSJo0NA38JVXzQeNs3wXcJmmrsmgX4Oo2YvfYjxabhYpbge0lrV7+u9+Fqk+sFZKeU/7diKp/4JS2YvezQow+2jaPMDxGW/ElnQq8FpgsaSHwMdtzWgj9SuBtwJWlrR7gQ7bPaSH2+sBJ5YFGKwGn2279Ms4x8lzg7PIojpWBU2z/sMX47wb+o/zouRF4Z1uBS+HbDTi4rZgAti+WdCZwObAY+BXtDvnwHUnrAY8Dh45RB/1TMsRERETHpWkoIqLjUggiIjouhSAiouNSCCIiOi6FICKi41IIIiI6LoUgIqLj/hdRB2LXFx7MKAAAAABJRU5ErkJggg==\n",
-      "text/plain": [
-       "<Figure size 432x288 with 1 Axes>"
-      ]
-     },
-     "metadata": {
-      "needs_background": "light"
-     },
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
    "source": [
     "import numpy as np\n",
     "objects = [str(x) for x in range(10)]\n",
@@ -529,7 +133,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -548,39 +152,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Stopping http://0.0.0.0:8081\n",
-      "Serving '/tmp/LFCW1A1.onnx' at http://0.0.0.0:8081\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "\n",
-       "        <iframe\n",
-       "            width=\"100%\"\n",
-       "            height=\"400\"\n",
-       "            src=\"http://0.0.0.0:8081/\"\n",
-       "            frameborder=\"0\"\n",
-       "            allowfullscreen\n",
-       "        ></iframe>\n",
-       "        "
-      ],
-      "text/plain": [
-       "<IPython.lib.display.IFrame at 0x7f3a27be9ac8>"
-      ]
-     },
-     "execution_count": 16,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "showInNetron('/tmp/LFCW1A1.onnx')"
    ]
@@ -603,27 +177,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "input: \"37\"\n",
-       "input: \"38\"\n",
-       "output: \"40\"\n",
-       "op_type: \"MatMul\""
-      ]
-     },
-     "execution_count": 17,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "from finn.core.modelwrapper import ModelWrapper\n",
     "model = ModelWrapper(export_onnx_path)\n",
-    "model.graph.node[9]"
+    "model.graph.node[8]"
    ]
   },
   {
@@ -635,28 +195,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([[-1., -1.,  1., ..., -1.,  1., -1.],\n",
-       "       [ 1.,  1., -1., ...,  1., -1.,  1.],\n",
-       "       [-1., -1., -1., ...,  1., -1.,  1.],\n",
-       "       ...,\n",
-       "       [ 1., -1., -1., ..., -1., -1.,  1.],\n",
-       "       [ 1., -1., -1., ...,  1.,  1.,  1.],\n",
-       "       [ 1., -1.,  1., ...,  1., -1.,  1.]], dtype=float32)"
-      ]
-     },
-     "execution_count": 18,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
-    "model.get_initializer(model.graph.node[9].input[1])"
+    "model.get_initializer(model.graph.node[8].input[1])"
    ]
   },
   {
@@ -668,42 +211,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "<DataType.BIPOLAR: 34>"
-      ]
-     },
-     "execution_count": 19,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
-    "model.get_tensor_datatype(model.graph.node[9].input[1])"
+    "model.get_tensor_datatype(model.graph.node[8].input[1]).name"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[784, 1024]"
-      ]
-     },
-     "execution_count": 20,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
-    "model.get_tensor_shape(model.graph.node[9].input[1])"
+    "model.get_tensor_shape(model.graph.node[8].input[1])"
    ]
   },
   {
@@ -715,7 +236,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -729,39 +250,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Stopping http://0.0.0.0:8081\n",
-      "Serving '/tmp/LFCW1A1-clean.onnx' at http://0.0.0.0:8081\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "\n",
-       "        <iframe\n",
-       "            width=\"100%\"\n",
-       "            height=\"400\"\n",
-       "            src=\"http://0.0.0.0:8081/\"\n",
-       "            frameborder=\"0\"\n",
-       "            allowfullscreen\n",
-       "        ></iframe>\n",
-       "        "
-      ],
-      "text/plain": [
-       "<IPython.lib.display.IFrame at 0x7f3a27b49e10>"
-      ]
-     },
-     "execution_count": 22,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "showInNetron('/tmp/LFCW1A1-clean.onnx')"
    ]
@@ -775,22 +266,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([[-1.3736125, -3.5715756,  0.1768887, -1.9529207, -2.1233053,\n",
-       "        -3.9293835, -2.1914592, -3.9634604, -0.7772659, -1.9869976]],\n",
-       "      dtype=float32)"
-      ]
-     },
-     "execution_count": 23,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "import finn.core.onnx_exec as oxe\n",
     "input_dict = {\"0\": nph.to_array(input_tensor)}\n",
@@ -802,20 +280,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "True"
-      ]
-     },
-     "execution_count": 17,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "np.isclose(produced, produced_finn).all()"
    ]
@@ -844,7 +311,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.8"
+   "version": "3.8.5"
   }
  },
  "nbformat": 4,
diff --git a/notebooks/end2end_example/bnn-pynq/cnv_end2end_example.ipynb b/notebooks/end2end_example/bnn-pynq/cnv_end2end_example.ipynb
index a141caf423..2d668f3e04 100644
--- a/notebooks/end2end_example/bnn-pynq/cnv_end2end_example.ipynb
+++ b/notebooks/end2end_example/bnn-pynq/cnv_end2end_example.ipynb
@@ -71,14 +71,25 @@
    "source": [
     "## 1. Brevitas Export, FINN Import and Tidy-Up\n",
     "\n",
-    "Similar to what we did in the TFC-w1a1 end-to-end notebook, we will start by exporting the [pretrained CNV-w1a1 network](https://github.com/Xilinx/brevitas/tree/master/brevitas_examples/bnn_pynq) to ONNX, importing that into FINN and running the \"tidy-up\" transformations to have a first look at the topology."
+    "Similar to what we did in the TFC-w1a1 end-to-end notebook, we will start by exporting the [pretrained CNV-w1a1 network](https://github.com/Xilinx/brevitas/tree/master/src/brevitas_examples/bnn_pynq) to ONNX, importing that into FINN and running the \"tidy-up\" transformations to have a first look at the topology."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 2,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/workspace/brevitas/src/brevitas_examples/bnn_pynq/models/CNV.py:106: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect.\n",
+      "  x = 2.0 * x - torch.tensor([1.0], device=x.device)\n",
+      "/workspace/brevitas/src/brevitas/quant_tensor/__init__.py:74: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect.\n",
+      "  training = torch.tensor(training, dtype=torch.bool)\n"
+     ]
+    }
+   ],
    "source": [
     "import onnx\n",
     "from finn.util.test import get_test_model_trained\n",
@@ -108,7 +119,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
@@ -125,17 +136,17 @@
        "        <iframe\n",
        "            width=\"100%\"\n",
        "            height=\"400\"\n",
-       "            src=\"http://0.0.0.0:8081/\"\n",
+       "            src=\"http://localhost:8081/\"\n",
        "            frameborder=\"0\"\n",
        "            allowfullscreen\n",
        "        ></iframe>\n",
        "        "
       ],
       "text/plain": [
-       "<IPython.lib.display.IFrame at 0x7f25b19194a8>"
+       "<IPython.lib.display.IFrame at 0x7f912af76550>"
       ]
      },
-     "execution_count": 27,
+     "execution_count": 3,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -157,19 +168,19 @@
    "source": [
     "### Adding Pre- and Postprocessing <a id='prepost'></a>\n",
     "\n",
-    "TODO"
+    "Preprocessing and postprocessing steps can be added directly in the ONNX graph. In this case, the preprocessing step divides the input `uint8` data by 255 so the inputs to the CNV-w1a1 network are bounded between [0, 1]. The postprocessing step takes the output of the network and returns the index (0-9) of the image category with the highest probability (top-1). "
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/workspace/finn/src/finn/transformation/infer_data_layouts.py:113: UserWarning: Assuming 4D input is NCHW\n",
+      "/workspace/finn-base/src/finn/transformation/infer_data_layouts.py:114: UserWarning: Assuming 4D input is NCHW\n",
       "  warnings.warn(\"Assuming 4D input is NCHW\")\n"
      ]
     }
@@ -192,19 +203,40 @@
     "model = model.transform(MergeONNXModels(pre_model))\n",
     "# add input quantization annotation: UINT8 for all BNN-PYNQ models\n",
     "global_inp_name = model.graph.input[0].name\n",
-    "model.set_tensor_datatype(global_inp_name, DataType.UINT8)"
+    "model.set_tensor_datatype(global_inp_name, DataType[\"UINT8\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from finn.transformation.insert_topk import InsertTopK\n",
+    "from finn.transformation.infer_datatypes import InferDataTypes\n",
+    "\n",
+    "# postprocessing: insert Top-1 node at the end\n",
+    "model = model.transform(InsertTopK(k=1))\n",
+    "chkpt_name = build_dir+\"/end2end_cnv_w1a1_pre_post.onnx\"\n",
+    "# tidy-up again\n",
+    "model = model.transform(InferShapes())\n",
+    "model = model.transform(FoldConstants())\n",
+    "model = model.transform(GiveUniqueNodeNames())\n",
+    "model = model.transform(GiveReadableTensorNames())\n",
+    "model = model.transform(InferDataTypes())\n",
+    "model = model.transform(RemoveStaticGraphInputs())\n",
+    "model.save(chkpt_name)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\n",
       "Stopping http://0.0.0.0:8081\n",
       "Serving '/workspace/finn/end2end_cnv_w1a1_pre_post.onnx' at http://0.0.0.0:8081\n"
      ]
@@ -216,37 +248,22 @@
        "        <iframe\n",
        "            width=\"100%\"\n",
        "            height=\"400\"\n",
-       "            src=\"http://0.0.0.0:8081/\"\n",
+       "            src=\"http://localhost:8081/\"\n",
        "            frameborder=\"0\"\n",
        "            allowfullscreen\n",
        "        ></iframe>\n",
        "        "
       ],
       "text/plain": [
-       "<IPython.lib.display.IFrame at 0x7f25b1919518>"
+       "<IPython.lib.display.IFrame at 0x7f8ffd85a760>"
       ]
      },
-     "execution_count": 29,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "from finn.transformation.insert_topk import InsertTopK\n",
-    "from finn.transformation.infer_datatypes import InferDataTypes\n",
-    "\n",
-    "# postprocessing: insert Top-1 node at the end\n",
-    "model = model.transform(InsertTopK(k=1))\n",
-    "chkpt_name = build_dir+\"/end2end_cnv_w1a1_pre_post.onnx\"\n",
-    "# tidy-up again\n",
-    "model = model.transform(InferShapes())\n",
-    "model = model.transform(FoldConstants())\n",
-    "model = model.transform(GiveUniqueNodeNames())\n",
-    "model = model.transform(GiveReadableTensorNames())\n",
-    "model = model.transform(InferDataTypes())\n",
-    "model = model.transform(RemoveStaticGraphInputs())\n",
-    "model.save(chkpt_name)\n",
-    "\n",
     "showInNetron(build_dir+\"/end2end_cnv_w1a1_pre_post.onnx\")"
    ]
   },
@@ -268,7 +285,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -311,14 +328,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\n",
       "Stopping http://0.0.0.0:8081\n",
       "Serving '/workspace/finn/end2end_cnv_w1a1_streamlined.onnx' at http://0.0.0.0:8081\n"
      ]
@@ -330,17 +346,17 @@
        "        <iframe\n",
        "            width=\"100%\"\n",
        "            height=\"400\"\n",
-       "            src=\"http://0.0.0.0:8081/\"\n",
+       "            src=\"http://localhost:8081/\"\n",
        "            frameborder=\"0\"\n",
        "            allowfullscreen\n",
        "        ></iframe>\n",
        "        "
       ],
       "text/plain": [
-       "<IPython.lib.display.IFrame at 0x7f25b19a9470>"
+       "<IPython.lib.display.IFrame at 0x7f91ac6e6f70>"
       ]
      },
-     "execution_count": 31,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -360,9 +376,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": 9,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/workspace/finn/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py:591: UserWarning: Clipping some thresholds in \n",
+      "  warnings.warn(\"Clipping some thresholds in %s\" % self.onnx_node.name)\n"
+     ]
+    }
+   ],
    "source": [
     "import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls\n",
     "from finn.transformation.fpgadataflow.create_dataflow_partition import (\n",
@@ -409,7 +434,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 10,
    "metadata": {
     "scrolled": false
    },
@@ -418,7 +443,6 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\n",
       "Stopping http://0.0.0.0:8081\n",
       "Serving '/workspace/finn/end2end_cnv_w1a1_dataflow_parent.onnx' at http://0.0.0.0:8081\n"
      ]
@@ -430,17 +454,17 @@
        "        <iframe\n",
        "            width=\"100%\"\n",
        "            height=\"400\"\n",
-       "            src=\"http://0.0.0.0:8081/\"\n",
+       "            src=\"http://localhost:8081/\"\n",
        "            frameborder=\"0\"\n",
        "            allowfullscreen\n",
        "        ></iframe>\n",
        "        "
       ],
       "text/plain": [
-       "<IPython.lib.display.IFrame at 0x7f25b18b7668>"
+       "<IPython.lib.display.IFrame at 0x7f8ffd85ae20>"
       ]
      },
-     "execution_count": 36,
+     "execution_count": 10,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -458,14 +482,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 33,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\n",
       "Stopping http://0.0.0.0:8081\n",
       "Serving '/workspace/finn/end2end_cnv_w1a1_dataflow_model.onnx' at http://0.0.0.0:8081\n"
      ]
@@ -477,17 +500,17 @@
        "        <iframe\n",
        "            width=\"100%\"\n",
        "            height=\"400\"\n",
-       "            src=\"http://0.0.0.0:8081/\"\n",
+       "            src=\"http://localhost:8081/\"\n",
        "            frameborder=\"0\"\n",
        "            allowfullscreen\n",
        "        ></iframe>\n",
        "        "
       ],
       "text/plain": [
-       "<IPython.lib.display.IFrame at 0x7f25b18fe860>"
+       "<IPython.lib.display.IFrame at 0x7f8ffd832280>"
       ]
      },
-     "execution_count": 33,
+     "execution_count": 11,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -505,7 +528,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 34,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -549,14 +572,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 35,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\n",
       "Stopping http://0.0.0.0:8081\n",
       "Serving '/workspace/finn/end2end_cnv_w1a1_folded.onnx' at http://0.0.0.0:8081\n"
      ]
@@ -568,17 +590,17 @@
        "        <iframe\n",
        "            width=\"100%\"\n",
        "            height=\"400\"\n",
-       "            src=\"http://0.0.0.0:8081/\"\n",
+       "            src=\"http://localhost:8081/\"\n",
        "            frameborder=\"0\"\n",
        "            allowfullscreen\n",
        "        ></iframe>\n",
        "        "
       ],
       "text/plain": [
-       "<IPython.lib.display.IFrame at 0x7f252e5a6278>"
+       "<IPython.lib.display.IFrame at 0x7f8ff1243af0>"
       ]
      },
-     "execution_count": 35,
+     "execution_count": 13,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -605,11 +627,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 14,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/workspace/finn/src/finn/transformation/fpgadataflow/floorplan.py:107: UserWarning: 32 nodes have no entry in the provided floorplan, SLR was set to -1\n",
+      "  warnings.warn(\n",
+      "/workspace/finn/src/finn/transformation/fpgadataflow/insert_fifo.py:154: UserWarning: Overriding input FIFO depth to 32\n",
+      "  warnings.warn(\"Overriding input FIFO depth to 32\")\n",
+      "/workspace/finn/src/finn/transformation/fpgadataflow/insert_fifo.py:200: UserWarning: Overriding output FIFO depth to 32\n",
+      "  warnings.warn(\"Overriding output FIFO depth to 32\")\n"
+     ]
+    }
+   ],
    "source": [
-    "test_pynq_board = \"Pynq-Z1\"\n",
+    "test_pynq_board = \"Pynq-Z2\"\n",
     "target_clk_ns = 10\n",
     "\n",
     "from finn.transformation.fpgadataflow.make_zynq_proj import ZynqBuild\n",
@@ -631,18 +666,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Welcome to PYNQ Linux, based on Ubuntu 18.04 (GNU/Linux 5.4.0-xilinx-v2020.1 armv7l)\r\n",
+      "Welcome to PYNQ Linux, based on Ubuntu 18.04 (GNU/Linux 4.19.0-xilinx-v2019.1 armv7l)\r\n",
       "\r\n",
-      " * Pure upstream Kubernetes 1.21, smallest, simplest cluster ops!\r\n",
+      " * Super-optimized for small spaces - read how we shrank the memory\r\n",
+      "   footprint of MicroK8s to make it the smallest full K8s around.\r\n",
       "\r\n",
-      "     https://microk8s.io/\r\n"
+      "   https://ubuntu.com/blog/microk8s-memory-optimisation\r\n"
      ]
     }
    ],
@@ -665,7 +701,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -678,16 +714,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "'/home/xilinx/finn_dev_maltanar/pynq_deployment_obskagv5'"
+       "'/home/xilinx/finn_dev_jduarte/pynq_deployment_yrxnwrak'"
       ]
      },
-     "execution_count": 23,
+     "execution_count": 17,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -699,19 +735,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "total 4216\r\n",
-      "-rw-r--r-- 1 xilinx xilinx    8508 Sep 21 13:19 driver.py\r\n",
-      "drwxr-xr-x 4 xilinx xilinx    4096 Sep 21 13:19 finn\r\n",
-      "-rw-r--r-- 1 xilinx xilinx 4045671 Sep 21 13:19 resizer.bit\r\n",
-      "-rw-r--r-- 1 xilinx xilinx  246205 Sep 21 13:19 resizer.hwh\r\n",
-      "-rw-r--r-- 1 xilinx xilinx    1727 Sep 21 13:19 validate.py\r\n"
+      "total 4240\r\n",
+      "-rw-rw-r-- 1 xilinx xilinx   18616 Jun 28 20:42 driver_base.py\r\n",
+      "-rw-r--r-- 1 xilinx xilinx    4868 Jun 28 20:42 driver.py\r\n",
+      "drwxr-xr-x 4 xilinx xilinx    4096 Jun 28 20:42 finn\r\n",
+      "-rw-r--r-- 1 xilinx xilinx 4045671 Jun 28 20:42 resizer.bit\r\n",
+      "-rw-r--r-- 1 xilinx xilinx  247083 Jun 28 20:42 resizer.hwh\r\n",
+      "drwxr-xr-x 2 xilinx xilinx    4096 Jun 28 20:42 runtime_weights\r\n",
+      "-rw-rw-r-- 1 xilinx xilinx    4107 Jun 28 20:42 validate.py\r\n"
      ]
     }
    ],
@@ -728,16 +766,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "<matplotlib.image.AxesImage at 0x7f89a07e6eb8>"
+       "<matplotlib.image.AxesImage at 0x7f917faeb6d0>"
       ]
      },
-     "execution_count": 2,
+     "execution_count": 19,
      "metadata": {},
      "output_type": "execute_result"
     },
@@ -774,7 +812,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 43,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -791,7 +829,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 44,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [
     {
@@ -800,7 +838,7 @@
        "array([[3.]], dtype=float32)"
       ]
      },
-     "execution_count": 44,
+     "execution_count": 21,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -836,7 +874,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 45,
+   "execution_count": 22,
    "metadata": {},
    "outputs": [
     {
@@ -846,7 +884,7 @@
       "[sudo] password for xilinx: Requirement already satisfied: dataset_loading from git+https://github.com/fbcotter/dataset_loading.git@0.0.4#egg=dataset_loading in /usr/local/lib/python3.6/dist-packages\n",
       "Requirement already satisfied: Pillow in /usr/lib/python3/dist-packages (from dataset_loading)\n",
       "Requirement already satisfied: scipy in /usr/lib/python3/dist-packages (from dataset_loading)\n",
-      "Connection to 192.168.2.99 closed.\n"
+      "Connection to 99.121.248.96 closed.\n"
      ]
     }
    ],
@@ -867,7 +905,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 46,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [
     {
@@ -877,18 +915,18 @@
       "[sudo] password for xilinx: Tar File found in dest_dir. Not Downloading again\n",
       "Extracting Python CIFAR10 data.\n",
       "Files extracted\n",
-      "batch 0 / 10 : total OK 851 NOK 149\n",
-      "batch 1 / 10 : total OK 1683 NOK 317\n",
-      "batch 2 / 10 : total OK 2522 NOK 478\n",
-      "batch 3 / 10 : total OK 3370 NOK 630\n",
-      "batch 4 / 10 : total OK 4207 NOK 793\n",
-      "batch 5 / 10 : total OK 5044 NOK 956\n",
-      "batch 6 / 10 : total OK 5887 NOK 1113\n",
-      "batch 7 / 10 : total OK 6728 NOK 1272\n",
-      "batch 8 / 10 : total OK 7570 NOK 1430\n",
-      "batch 9 / 10 : total OK 8419 NOK 1581\n",
+      "batch 1 / 10 : total OK 851 NOK 149\n",
+      "batch 2 / 10 : total OK 1683 NOK 317\n",
+      "batch 3 / 10 : total OK 2522 NOK 478\n",
+      "batch 4 / 10 : total OK 3370 NOK 630\n",
+      "batch 5 / 10 : total OK 4207 NOK 793\n",
+      "batch 6 / 10 : total OK 5044 NOK 956\n",
+      "batch 7 / 10 : total OK 5887 NOK 1113\n",
+      "batch 8 / 10 : total OK 6728 NOK 1272\n",
+      "batch 9 / 10 : total OK 7570 NOK 1430\n",
+      "batch 10 / 10 : total OK 8419 NOK 1581\n",
       "Final accuracy: 84.190000\n",
-      "Connection to 192.168.2.99 closed.\n"
+      "Connection to 99.121.248.96 closed.\n"
      ]
     }
    ],
@@ -900,7 +938,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "We see that the final top-1 accuracy is 84.19%, which is very close to the 84.22% reported on the [BNN-PYNQ accuracy table in Brevitas](https://github.com/Xilinx/brevitas/tree/master/brevitas_examples/bnn_pynq). "
+    "We see that the final top-1 accuracy is 84.19%, which is very close to the 84.22% reported on the [BNN-PYNQ accuracy table in Brevitas](https://github.com/Xilinx/brevitas/tree/master/src/brevitas_examples/bnn_pynq). "
    ]
   },
   {
@@ -927,7 +965,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.8"
+   "version": "3.8.5"
   }
  },
  "nbformat": 4,
diff --git a/notebooks/end2end_example/bnn-pynq/tfc_end2end_example.ipynb b/notebooks/end2end_example/bnn-pynq/tfc_end2end_example.ipynb
index 5ed4b170b4..a1a8450225 100644
--- a/notebooks/end2end_example/bnn-pynq/tfc_end2end_example.ipynb
+++ b/notebooks/end2end_example/bnn-pynq/tfc_end2end_example.ipynb
@@ -384,7 +384,7 @@
     "model = model.transform(MergeONNXModels(pre_model))\n",
     "# add input quantization annotation: UINT8 for all BNN-PYNQ models\n",
     "global_inp_name = model.graph.input[0].name\n",
-    "model.set_tensor_datatype(global_inp_name, DataType.UINT8)\n",
+    "model.set_tensor_datatype(global_inp_name, DataType[\"UINT8\"])\n",
     "\n",
     "model.save(build_dir+\"/tfc_w1_a1_with_preproc.onnx\")\n",
     "showInNetron(build_dir+\"/tfc_w1_a1_with_preproc.onnx\")"
@@ -1799,7 +1799,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.8"
+   "version": "3.8.5"
   }
  },
  "nbformat": 4,
diff --git a/notebooks/end2end_example/cybersecurity/1-train-mlp-with-brevitas.ipynb b/notebooks/end2end_example/cybersecurity/1-train-mlp-with-brevitas.ipynb
index e0ce00c1be..2c9f4a99ed 100644
--- a/notebooks/end2end_example/cybersecurity/1-train-mlp-with-brevitas.ipynb
+++ b/notebooks/end2end_example/cybersecurity/1-train-mlp-with-brevitas.ipynb
@@ -98,7 +98,7 @@
     "\n",
     "Following Murovic and Trost's open-source implementation provided as a Matlab script [here](https://github.com/TadejMurovic/BNN_Deployment/blob/master/cybersecurity_dataset_unswb15.m), we've created a [Python version](dataloader_quantized.py).\n",
     "\n",
-    "<font color=\"red\">**FPGA'21 tutorial:** Downloading the original dataset and quantizing it can take some time, so we provide a download link to the pre-quantized version for your convenience. </font>"
+    "<font color=\"red\">**Live FINN tutorial:** Downloading the original dataset and quantizing it can take some time, so we provide a download link to the pre-quantized version for your convenience. </font>"
    ]
   },
   {
@@ -110,16 +110,16 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "--2021-05-10 18:14:00--  https://zenodo.org/record/4519767/files/unsw_nb15_binarized.npz?download=1\n",
+      "--2021-10-12 15:49:17--  https://zenodo.org/record/4519767/files/unsw_nb15_binarized.npz?download=1\n",
       "Resolving zenodo.org (zenodo.org)... 137.138.76.77\n",
       "Connecting to zenodo.org (zenodo.org)|137.138.76.77|:443... connected.\n",
       "HTTP request sent, awaiting response... 200 OK\n",
       "Length: 13391907 (13M) [application/octet-stream]\n",
       "Saving to: ‘unsw_nb15_binarized.npz’\n",
       "\n",
-      "unsw_nb15_binarized 100%[===================>]  12.77M  3.96MB/s    in 3.4s    \n",
+      "unsw_nb15_binarized 100%[===================>]  12.77M  3.56MB/s    in 3.7s    \n",
       "\n",
-      "2021-05-10 18:14:04 (3.77 MB/s) - ‘unsw_nb15_binarized.npz’ saved [13391907/13391907]\n",
+      "2021-10-12 15:49:22 (3.44 MB/s) - ‘unsw_nb15_binarized.npz’ saved [13391907/13391907]\n",
       "\n"
      ]
     }
@@ -422,9 +422,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Training loss:   0%|          | 0/10 [00:00<?, ?it/s]/opt/conda/lib/python3.8/site-packages/torch/autograd/__init__.py:130: UserWarning: CUDA initialization: Found no NVIDIA driver on your system. Please check that you have an NVIDIA GPU and installed a driver from http://www.nvidia.com/Download/index.aspx (Triggered internally at  /opt/conda/conda-bld/pytorch_1607370172916/work/c10/cuda/CUDAFunctions.cpp:100.)\n",
-      "  Variable._execution_engine.run_backward(\n",
-      "Training loss = 0.131708 test accuracy = 0.805398: 100%|██████████| 10/10 [01:04<00:00,  6.42s/it]\n"
+      "Training loss = 0.132918 test accuracy = 0.798341: 100%|██████████| 10/10 [00:44<00:00,  4.45s/it]\n"
      ]
     }
    ],
@@ -459,7 +457,7 @@
    "outputs": [
     {
      "data": {
-      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAY4AAAEWCAYAAABxMXBSAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/d3fzzAAAACXBIWXMAAAsTAAALEwEAmpwYAAApFUlEQVR4nO3df5xddX3n8dd77vzK/LyTzJCfMyRAAAPi3O6AP6i0KrbAtsB2qYK/oGtLbYvVRa2ou9pl5bFWWrXuUhdWxVqpqKglVRCsAtJWMIGEQIKBGAJJSMjk9ySTzM/P/nHPTO5MJsncZG7unZn38/G4j3vP9/y4n3Mheeec7znfo4jAzMxsvMqKXYCZmU0uDg4zM8uLg8PMzPLi4DAzs7w4OMzMLC8ODjMzy4uDw+w4SLpf0rUTvWyeNfympE0TvV2zYykvdgFmJ4ukfTmTNUAPMJBM/3FE3DXebUXEpYVY1mwycHDYtBERdUOfJW0A/jAi/mX0cpLKI6L/ZNZmNpn4VJVNe0OnfCR9VNJW4E5JTZJ+IKlT0q7k84KcdR6W9IfJ5+sk/aukv06WfUHSpce57CJJP5PUJelfJN0m6Rvj3I9XJd+1W9JqSZfnzLtM0ppku5slfThpb072bbeknZIeleS/F+yo/D+IWdYcYCZwKnA92T8bdybTbcAB4P8cZf3XAmuBZuCzwFck6TiW/UfgF8As4C+Bd4+neEkVwD8DDwKnAO8H7pJ0VrLIV8iejqsHzgV+mrR/CNgEtACzgY8DHofIjsrBYZY1CHwqInoi4kBE7IiI70ZEd0R0AbcAv3GU9V+MiP8XEQPA3wNzyf5FPO5lJbUB5wOfjIjeiPhXYOk4638dUAd8Jln3p8APgGuS+X3AEkkNEbErIp7MaZ8LnBoRfRHxaHgAOzsGB4dZVmdEHByakFQj6XZJL0raC/wMSEtKHWH9rUMfIqI7+ViX57LzgJ05bQAbx1n/PGBjRAzmtL0IzE8+/2fgMuBFSY9Ien3SfiuwDnhQ0npJN43z+2wac3CYZY3+V/aHgLOA10ZEA3BR0n6k008TYQswU1JNTlvrONd9GWgd1T/RBmwGiIhlEXEF2dNY/wR8O2nviogPRcRpwOXAjZLecmK7YVOdg8NsbPVk+zV2S5oJfKrQXxgRLwLLgb+UVJkcFfzuOFd/HOgG/kJShaTfTNa9O9nWOyU1RkQfsJfsqTkk/Y6kM5I+lj1kL08eHPMbzBIODrOxfQGYAWwHHgN+dJK+953A64EdwKeBb5G93+SoIqKXbFBcSrbmvwPeExG/TBZ5N7AhOe32vuR7ABYD/wLsA34O/F1EPDRhe2NTktwPZla6JH0L+GVEFPyIx2y8fMRhVkIknS/pdEllki4BriDbJ2FWMnznuFlpmQN8j+x9HJuAP4mIFcUtyWwkn6oyM7O8+FSVmZnlZVqcqmpubo6FCxcWuwwzs0nliSee2B4RLaPbp0VwLFy4kOXLlxe7DDOzSUXSi2O1+1SVmZnlxcFhZmZ5cXCYmVleHBxmZpYXB4eZmeXFwWFmZnlxcJiZWV4cHEdx78rNfOOxMS9jNjObthwcR/HA6q3c/rNfFbsMM7OS4uA4ivbWNBt3HmD7vmM+R8fMbNooaHBIukTSWknrJN00xvz3SXpa0kpJ/yppSc68jyXrrZX02+Pd5kTKtDUBsPKl3YX8GjOzSaVgwSEpBdxG9lGWS4BrcoMh8Y8R8eqIaAc+C3wuWXcJcDVwDnAJ8HeSUuPc5oQ5d14j5WVixcZdhfoKM7NJp5BHHBcA6yJiffI85LvJPs1sWETszZmsBYYeDnIFcHdE9ETEC8C6ZHvH3OZEmlGZ4lVzG1jhIw4zs2GFDI75wMac6U1J2wiS/kzSr8gecfz5MdYd1zaT7V4vabmk5Z2dnce9E5m2NE9t3M3AoB94ZWYGJdA5HhG3RcTpwEeB/zaB270jIjoioqOl5bDh5Mct05Zmf+8Az2/rmqjSzMwmtUIGx2agNWd6QdJ2JHcDVx5j3Xy3ecLaW91BbmaWq5DBsQxYLGmRpEqynd1LcxeQtDhn8j8CzyeflwJXS6qStAhYDPxiPNucaAtn1ZCuqXA/h5lZomBPAIyIfkk3AA8AKeCrEbFa0s3A8ohYCtwg6WKgD9gFXJusu1rSt4E1QD/wZxExADDWNgu1D8n3kWlN+8oqM7NEQR8dGxH3AfeNavtkzucPHGXdW4BbxrPNQsu0NfHwc53sPdhHQ3XFyfxqM7OSU/TO8ckg05YmAlZt3FPsUszMis7BMQ6vaU0jwYqXfLrKzMzBMQ4N1RWc3lLHyo27i12KmVnROTjGKdtBvpsI3whoZtObg2OcMm1N7Nzfy0s7u4tdiplZUTk4xinTlgbw/RxmNu05OMbpzNn11FSm3EFuZtOeg2OcUmXivAWNrHAHuZlNcw6OPGTamljz8l4O9g0UuxQzs6JxcOQh05qmfzBY/bJvBDSz6cvBkYd2d5CbmTk48nFKfTULmmY4OMxsWnNw5CnT1uQrq8xsWnNw5Km9Nc3Lew7yyt6DxS7FzKwoHBx58o2AZjbdOTjydM68BipTZX6wk5lNWw6OPFWVp1gyr8FHHGY2bTk4jkOmLc2qTbvpHxgsdilmZiedg+M4ZNqaONg3yC+3dhW7FDOzk87BcRwyrWkAP9jJzKalggaHpEskrZW0TtJNY8y/UdIaSask/UTSqUn7myStzHkdlHRlMu9rkl7ImddeyH0Yy4KmGTTXVbqfw8ympfJCbVhSCrgNeCuwCVgmaWlErMlZbAXQERHdkv4E+Czw9oh4CGhPtjMTWAc8mLPeRyLinkLVfiySaG9t8pVVZjYtFfKI4wJgXUSsj4he4G7gitwFIuKhiBh6pN5jwIIxtnMVcH/OciUh05Zmfed+dnf3FrsUM7OTqpDBMR/YmDO9KWk7kvcC94/RfjXwzVFttySntz4vqWqsjUm6XtJyScs7OzvzqXtchm4EdD+HmU03JdE5LuldQAdw66j2ucCrgQdymj8GnA2cD8wEPjrWNiPijojoiIiOlpaWCa/5vAVpJN9BbmbTTyGDYzPQmjO9IGkbQdLFwCeAyyOiZ9TstwHfj4i+oYaI2BJZPcCdZE+JnXR1VeWcNbveRxxmNu0UMjiWAYslLZJUSfaU09LcBSRlgNvJhsa2MbZxDaNOUyVHIUgScCXwzMSXPj6ZtjQrN+5mcDCKVYKZ2UlXsOCIiH7gBrKnmZ4Fvh0RqyXdLOnyZLFbgTrgO8mltcPBImkh2SOWR0Zt+i5JTwNPA83Apwu1D8eSaW1iz4E+Xtixv1glmJmddAW7HBcgIu4D7hvV9smczxcfZd0NjNGZHhFvnsAST0juSLmnt9QVtxgzs5OkJDrHJ6vTW+qoryr3g53MbFpxcJyAsjLxmta0r6wys2nFwXGCMm1p1r7SRXdvf7FLMTM7KRwcJyjTlmZgMHh6055il2JmdlI4OE5Qe2sTACt8P4eZTRMOjhM0s7aShbNq3EFuZtOGg2MCZNqaWPHSbiJ8I6CZTX0OjgnQ3ppmW1cPW/YcLHYpZmYF5+CYALk3ApqZTXUOjglw9pwGqsrL3M9hZtOCg2MCVJaX8er5jb6yysymBQfHBMm0pXl68x56+weLXYqZWUE5OCZIe2sTvf2DPLtlb7FLMTMrKAfHBPGjZM1sunBwTJC5jdXMbqhyB7mZTXkOjgkiiUxrkzvIzWzKc3BMoExbmhd3dLNj3+hHp5uZTR0OjgmUacsOeOh+DjObyhwcE+jV8xtJlcl3kJvZlFbQ4JB0iaS1ktZJummM+TdKWiNplaSfSDo1Z96ApJXJa2lO+yJJjyfb/JakykLuQz5mVKY4e069jzjMbEorWHBISgG3AZcCS4BrJC0ZtdgKoCMizgPuAT6bM+9ARLQnr8tz2v8K+HxEnAHsAt5bqH04Hpm2NCs37mZg0CPlmtnUVMgjjguAdRGxPiJ6gbuBK3IXiIiHIqI7mXwMWHC0DUoS8GayIQPw98CVE1n0icq0NrGvp59fde4rdilmZgVRyOCYD2zMmd6UtB3Je4H7c6arJS2X9JikK5O2WcDuiBh6wPextnnSHRop1/dzmNnUVBKd45LeBXQAt+Y0nxoRHcA7gC9IOj3PbV6fBM/yzs7OCaz26BY119I4o8Id5GY2ZRUyODYDrTnTC5K2ESRdDHwCuDwihm+AiIjNyft64GEgA+wA0pLKj7bNZL07IqIjIjpaWlpOfG/GSRLtrWl3kJvZlFXI4FgGLE6ugqoErgaW5i4gKQPcTjY0tuW0N0mqSj43AxcCayL7bNaHgKuSRa8F7i3gPhyXTFuata90sa+n/9gLm5lNMgULjqQf4gbgAeBZ4NsRsVrSzZKGrpK6FagDvjPqsttXAcslPUU2KD4TEWuSeR8FbpS0jmyfx1cKtQ/HK9PWRASs8lGHmU1B5cde5PhFxH3AfaPaPpnz+eIjrPfvwKuPMG892Su2Slb7gjQAKzbu5g1nNBe3GDOzCVYSneNTTWNNBae31PrKKjObkhwcBdLe2sSKl3aT7ZYxM5s6HBwFkmlLs2N/L5t2HSh2KWZmE8rBUSBDNwI+6dNVZjbFODgK5KzZ9cyoSPlGQDObchwcBVKeKuO8BY1+IqCZTTkOjgLKtDWx5uU9HOwbKHYpZmYTxsFRQO2tafoGgtUv7y12KWZmE8bBUUBDHeQet8rMphIHRwHNbqhmfnqGbwQ0synFwVFg7W1pX1llZlOKg6PAMq1pNu8+wLa9B4tdipnZhHBwFFimrQnAl+Wa2ZTh4Ciwc+Y1UJGST1eZ2ZTh4Ciw6ooUS+Y2sHKjO8jNbGpwcJwEmbYmVm3aQ//AYLFLMTM7YQ6OkyDTlqa7d4DnXtlX7FLMzE6Yg+MkyLQOdZD7dJWZTX4OjpOgdeYMZtZWuoPczKYEB8dJIIlMa9pDj5jZlFDQ4JB0iaS1ktZJummM+TdKWiNplaSfSDo1aW+X9HNJq5N5b89Z52uSXpC0Mnm1F3IfJkqmLc26bfvYc6Cv2KWYmZ2QggWHpBRwG3ApsAS4RtKSUYutADoi4jzgHuCzSXs38J6IOAe4BPiCpHTOeh+JiPbktbJQ+zCRhm4EfMpHHWY2yRXyiOMCYF1ErI+IXuBu4IrcBSLioYjoTiYfAxYk7c9FxPPJ55eBbUBLAWstuPMWNCLhfg4zm/TGFRySaiWVJZ/PlHS5pIpjrDYf2JgzvSlpO5L3AveP8d0XAJXAr3Kab0lOYX1eUtURar5e0nJJyzs7O49RauHVV1dw5in1vrLKzCa98R5x/AyoljQfeBB4N/C1iSpC0ruADuDWUe1zgX8A/iAihu6e+xhwNnA+MBP46FjbjIg7IqIjIjpaWkrjYKU96SCPiGKXYmZ23MYbHEpOKf0e8HcR8fvAOcdYZzPQmjO9IGkbuWHpYuATwOUR0ZPT3gD8EPhERDw21B4RWyKrB7iT7CmxSSHTlmZ3dx8bdnQfe2EzsxI17uCQ9HrgnWT/MgdIHWOdZcBiSYskVQJXA0tHbTQD3E42NLbltFcC3we+HhH3jFpn7lBBwJXAM+Pch6IbHinXD3Yys0lsvMHxQbKniL4fEaslnQY8dLQVIqIfuAF4AHgW+Hay7s2SLk8WuxWoA76TXFo7FCxvAy4Crhvjstu7JD0NPA00A58e5z4U3Rmn1FFXVe4OcjOb1JTv+fakk7wuIvYWpqSJ19HREcuXLy92GQC888uPsedAHz94/xuLXYqZ2VFJeiIiOka3j/eqqn+U1CCpluypoTWSPjLRRU4HmdYmnt3SxYHegWKXYmZ2XMZ7qmpJcoRxJdlLZheRvbLK8tTemmZgMHh6855il2JmdlzGGxwVyX0bVwJLI6IP8DWlx6G9LQ3gBzuZ2aQ13uC4HdgA1AI/S8aUmjR9HKWkua6Ktpk17iA3s0lrXMEREV+MiPkRcVlyD8WLwJsKXNuUlWlLOzjMbNIab+d4o6TPDQ3hIelvyB592HHItKbZuvcgW/YcKHYpZmZ5G++pqq8CXWTvr3gb2dNUdxaqqKmuffhGwN3FLcTM7DiMNzhOj4hPJSPdro+I/wGcVsjCprIlcxuoLC/zg53MbFIab3AckPTrQxOSLgR8nuU4VZaXce68Bg89YmaTUvk4l3sf8HVJjcn0LuDawpQ0PWTamvjGYy/SNzBIRcpP8DWzyWO8V1U9FRGvAc4DzouIDPDmglY2xWXa0vT0D/LLLV3FLsXMLC95/VM3IvbmjFF1YwHqmTaGR8r1jYBmNsmcyDkSTVgV09C8xmpa6qt8ZZWZTTonEhwecuQESCKTPBHQzGwyOWrnuKQuxg4IATMKUtE0kmlr4sE1r7Brfy9NtZXFLsfMbFyOesQREfUR0TDGqz4ixntFlh1BZnjAw91FrcPMLB++DrSIzlvQSJn8KFkzm1wcHEVUU1nO2XMaWOEjDjObRBwcRdbelmblS7sZHPS1BmY2OTg4iizTmqarp5/12/cVuxQzs3EpaHBIukTSWknrJN00xvwbJa2RtErST5IHRA3Nu1bS88nr2pz2/yDp6WSbX5Q0qe8nGboR8Enfz2Fmk0TBgkNSCrgNuBRYAlwjacmoxVYAHRFxHnAP8Nlk3ZnAp4DXAhcAn5LUlKzzJeCPgMXJ65JC7cPJcFpzLQ3V5b4R0MwmjUIecVwArEuGYe8F7gauyF0gIh6KiO5k8jFgQfL5t4EfR8TOiNgF/Bi4RNJcoCEiHouIAL5O9jnok1ZZmWhva/KVVWY2aRQyOOYDG3OmNyVtR/Je4P5jrDs/+XzMbUq6fuiJhZ2dnXmWfnK1t6Z57pUu9vX0F7sUM7NjKonOcUnvAjqAWydqmxFxR0R0RERHS0vLRG22IDJtaQYDVm3aXexSzMyOqZDBsRlozZlekLSNIOli4BPA5RHRc4x1N3PodNYRtznZtC9IA76D3Mwmh0IGxzJgsaRFkiqBq4GluQtIygC3kw2NbTmzHgB+S1JT0in+W8ADEbEF2CvpdcnVVO8B7i3gPpwUTbWVnNZc6w5yM5sUCjbeVET0S7qBbAikgK9GxGpJNwPLI2Ip2VNTdcB3kqtqX4qIyyNip6T/STZ8AG6OiJ3J5z8FvkZ2kMX7OdQvMqm1t6X52XPbiQgm+RXGZjbFFXSgwoi4D7hvVNsncz5ffJR1vwp8dYz25cC5E1hmSci0NfG9JzezadcBWmfWFLscM7MjKonOccveQQ543CozK3kOjhJx1px6qivKWOl+DjMrcQ6OElGRKuO8+Wk/g9zMSp6Do4Rk2tKs3ryXnv6BYpdiZnZEDo4SkmlL0zswyJqX9xa7FDOzI3JwlJChkXJ9P4eZlTIHRwmZ3VDN3MZqX1llZiXNwVFiMm1pVrqD3MxKmIOjxGRam9i48wCdXT3HXtjMrAgcHCUm05YGPOChmZUuB0eJOXd+I+Vl8oOdzKxkOThKTHVFilfNbfCVVWZWshwcJSjTlmbVpt0MDEaxSzEzO4yDowRl2tLs7x3g+W1dxS7FzOwwDo4SlGn1jYBmVrocHCXo1Fk1NNVUuIPczEqSg6MESSLT1uQjDjMrSQ6OEtXemmZd5z72HuwrdilmZiM4OEpUpi1NBKzauKfYpZiZjVDQ4JB0iaS1ktZJummM+RdJelJSv6SrctrfJGllzuugpCuTeV+T9ELOvPZC7kOxvKY1jYT7Ocys5JQXasOSUsBtwFuBTcAySUsjYk3OYi8B1wEfzl03Ih4C2pPtzATWAQ/mLPKRiLinULWXgobqCs5oqfNIuWZWcgp5xHEBsC4i1kdEL3A3cEXuAhGxISJWAYNH2c5VwP0R0V24UktTpi3Nipd2EeEbAc2sdBQyOOYDG3OmNyVt+boa+OaotlskrZL0eUlVY60k6XpJyyUt7+zsPI6vLb5MWxO7uvt4cce0y0wzK2El3TkuaS7wauCBnOaPAWcD5wMzgY+OtW5E3BERHRHR0dLSUvBaC6G9NQ3ACj+fw8xKSCGDYzPQmjO9IGnLx9uA70fE8DWpEbElsnqAO8meEpuSzpxdT01lipW+n8PMSkghg2MZsFjSIkmVZE85Lc1zG9cw6jRVchSCJAFXAs+ceKmlKVUmXrMg7Q5yMyspBQuOiOgHbiB7mulZ4NsRsVrSzZIuB5B0vqRNwO8Dt0taPbS+pIVkj1geGbXpuyQ9DTwNNAOfLtQ+lIJMW5o1L+/lYN9AsUsxMwMKeDkuQETcB9w3qu2TOZ+XkT2FNda6GxijMz0i3jyxVZa2TFsT/YPBM5v30LFwZrHLMTMr7c5xO9RBvmyDO8jNrDQ4OEpcS30V58xr4K8fXMv/uv9ZDvT6lJWZFZeDYxK46w9fy1W/toDbH1nPWz//CA+v3VbsksxsGnNwTALpmkr+6qrz+Nb1r6OyvIzr7lzG+7+5gs6unmKXZmbTkINjEnntabO4/wNv5IMXL+aBZ7bylr95mG/+4iUG/WxyMzuJHByTTFV5ig9efCb3f/CNvGpuAx/73tO8/Y6f8/wrfj65mZ0cDo5J6vSWOu6+/nV89qrzeH7bPi774qP8zYNrfb+HmRWcg2MSk8TbOlr5yY2/we+eN4///dN1XPq3j/Lv67YXuzQzm8IcHFPArLoqPvf2dr7x3tcyGME7vvw4N357JTv39xa7NDObghwcU8ivL27mgQ9exJ+96XSWrnyZt/zNw9zzxCY/z8PMJpSDY4qprkjxkd8+mx/++Rs5raWOD3/nKd7x/x5nfee+YpdmZlOEg2OKOmtOPd/549dzy386l2de3sMlf/soX/zJ8/T2H+1hi2Zmx+bgmMLKysQ7X3sqP7nxN3jrktl87sfPcdkXH2XZhp3FLs3MJjEHxzRwSkM1t73j17jzuvM50DvA7//fn3PTd1exp7vv2CubmY3i4JhG3nT2Kfz4xou4/qLT+M4Tm3jL5x7m3pWb3XluZnlxcEwzNZXlfPyyV7H0hguZl57BB+5eybV3LmPjzu5il2Zmk4SDY5o6Z14j3//TC/nU7y7hiQ07eevnH+FLD/+KvgF3npvZ0Tk4prFUmfiDCxfxLx/6DS5a3MJf/eiX/O7//ldWvOSHRpnZkTk4jLmNM7jjPR3c/u7/wO7uPn7vS//OJ+99hr0H3XluZodzcNiw3z5nDj++8SKuff1C/uGxF3nr5x7h/qe3uPPczEYoaHBIukTSWknrJN00xvyLJD0pqV/SVaPmDUhambyW5rQvkvR4ss1vSaos5D5MN/XVFfzl5efwT396ITNrq/iTu57kj76+nM27DxS7NDMrEQULDkkp4DbgUmAJcI2kJaMWewm4DvjHMTZxICLak9flOe1/BXw+Is4AdgHvnfDijde0pvnnGy7k45edzb+t28FbP/cIX350PXsO+PSV2XRXXsBtXwCsi4j1AJLuBq4A1gwtEBEbknnjupRHkoA3A+9Imv4e+EvgSxNVtB1Snirj+otO59Jz5/Lf732GT//wWT79w2eZ21jNmbPrOWtOffZ9dj1nnFLHjMpUsUs2s5OgkMExH9iYM70JeG0e61dLWg70A5+JiH8CZgG7I6I/Z5vzx1pZ0vXA9QBtbW35VW4jtM6s4c7rzufn63ewatMe1m7tYu3WLn6+fsfw2FcSLJxVy5mz6zhrdj1nzskGysLmWipS7kozm0oKGRwn6tSI2CzpNOCnkp4G9ox35Yi4A7gDoKOjw727J0gSbzi9mTec3jzc1j8wyIs7u3luaxdrX+niuVeygfLjNa8w9Bj0ipQ4vaVu+AjlrOR9fnoGZWUq0t6Y2YkoZHBsBlpzphckbeMSEZuT9/WSHgYywHeBtKTy5Kgjr23axCpPlXF6Sx2nt9Rx6avnDrcf7BvgV537kiDJvj/x4i6WPvXy8DI1lSkWz67nrNkjQ6WlvorsGUkzK1WFDI5lwGJJi8j+5X41h/omjkpSE9AdET2SmoELgc9GREh6CLgKuBu4Fri3INXbcauuSHHOvEbOmdc4or3rYB/Pb9s3fISydmsXP/3lNr69fNPwMk01FSP7T+bUc+Yp9TTWVJzs3TCzI1Ahr9GXdBnwBSAFfDUibpF0M7A8IpZKOh/4PtAEHAS2RsQ5kt4A3A4Mkr3y6wsR8ZVkm6eRDY2ZwArgXRHRc7Q6Ojo6Yvny5QXZRztx2/f18NwrXUmg7Bv+3NXTP7zMnIbqpN8ke4RyWkstp86qZVZtpY9QzApE0hMR0XFY+3S4ucvBMflEBFv2HBw+Mhk6Snl+274RD6OqrypnYXNt9jWrhoWzDn2e6VAxOyFHCo5S7hy3aUwS89IzmJeewZvOOmW4fWAweGlnNxu27+eF7ft5ccd+XtjRzVMbd/PDVS8Pd8oD1FeXs6g5e2SyaFYNC4c+N9fSVFPhUDE7Tg4Om1RSZWJRc/Yv/zeNmtfbP8imXd1s2LGfF7Z3Z0Nl+35Wbtx1WKg0VCdHKskRyqLmmiRgammq9WAEZkfj4LApo7K8jNNa6jitpe6web39g2zclXukkg2YJ1/axQ9GhUrjjIrsaa/hYMmeAlvUXEu6xqFi5uCwaaGy/NClw6P19A+wcecBNmzfz4YdyWt79/AlxLndgOmaiuFTX22zapnXWM3sxmrmNlYzt2EGDTPKfQrMpjwHh017VeUpzjiljjNOOVKodI849bVhx36WbdjFvaNCBWBGRYo5jdXMaciGyZyhV0M1cxtnMKexmlm1lb750SY1B4fZUWRDpZ4zTqk/bF7fwCDbunrYuucAW/YcZGvy2rI3+/74Czt5Ze9B+gdHpktFSpxSnw2W2Y3VzG3Ihks2WKqY0ziDU+qrPFSLlSwHh9lxqkiVMT89g/npGUdcZnAw2L6/ZzhUtu49yJY9B3llT/Z9zct7+cmzr3Cwb+Q4nxK01FWNOnpJgqVhxvDRTHWFB5a0k8/BYVZAZWXZo4tT6qs5b8HYy0QEew/0s2XvyCOXoaOXDTv289j6Hew92H/YuumaCprrqqitTFFbVU5NZTm1Vanse9I2PF2VorayPFkuNfxel6xXWe4jHBsfB4dZkUmisaaCxpoKzp7TcMTl9vf0s3XvwVFHLwfYub+X/T0DdPf28/LuA+zv7R+e7u4dGHcdFSmNCJyaquznmspy6qpSI6Zrq5JQqjwUQo0zKjilvoqZtZWU+zTblObgMJskaqvKj3hl2JEMDgbdfQN09/Szv3eA/T3ZMNnf08/+3n66eway770D7OvpH16uOwmf/T397Oo+MDw9njCSYGZNJS31VbTUV9Fcl31vqauiub6SlrrqpL2SphpfKDAZOTjMprCyMlFXVU5d1cT9UR8YDA6MCqN9Pf3s7u6jc18P27t66NzXQ2dXD9v39fDC9v10dvXQ03/489pSZaK5rnJUuGTfRwdPQ7UvdS4VDg4zy0vqOMIoItjX009n11Cg9NLZdTAJmt7hoPnlli627+s57Eo0yN6LMzJYKocDpjnnvb66nBmVKarLUz6aKRAHh5kVnCTqqyuor64Y887+XIODwZ4Dhx+9DL939bB59wFWbtzNjv09h91Lk6uqvGw4RGZUpqiuSDGjIts2oyI7nW3LnX9omdHzD61TdmidaRhQDg4zKyllZaKptpKm2krOnH34/TO5BgaDnft7h4Nle1cP+3v7OdA7wIG+7Otg7wAH+wYPTfcNcKB3gN3dfRzsy5mXrHM8hgIqN4yqysuoLC+jKnlVlpdRmSqjqjyV/ZzTPrqtamjZijIqU6kRy45YL5VKlik7qeHl4DCzSStVpuG+kIkQEfT0D3Kgd4CD/YfCJBs2g4c+5wTQUCD19A2OnO4fpLc/2we0c/8gvf2DSdsgvQOD9PQN0DswSN/AxDzaorxMo8IlGzhffk8HC5trJ+Q7hr9rQrdmZjaJSRo+YjhZBgcjGyT9g/T0D2SD5bCQGaR3YCB5H1p2MGfZgZHrDLUNDDKjcuL3xcFhZlZEZWWiumworCbHI5J9l46ZmeXFwWFmZnkpaHBIukTSWknrJN00xvyLJD0pqV/SVTnt7ZJ+Lmm1pFWS3p4z72uSXpC0Mnm1F3IfzMxspIL1cUhKAbcBbwU2AcskLY2INTmLvQRcB3x41OrdwHsi4nlJ84AnJD0QEbuT+R+JiHsKVbuZmR1ZITvHLwDWRcR6AEl3A1cAw8ERERuSeSPGIoiI53I+vyxpG9AC7C5gvWZmNg6FPFU1H9iYM70pacuLpAuASuBXOc23JKewPi9pYi7gNjOzcSnpznFJc4F/AP4gIoaOSj4GnA2cD8wEPnqEda+XtFzS8s7OzpNSr5nZdFDI4NgMtOZML0jaxkVSA/BD4BMR8dhQe0Rsiawe4E6yp8QOExF3RERHRHS0tLQc1w6YmdnhCtnHsQxYLGkR2cC4GnjHeFaUVAl8H/j66E5wSXMjYouy4ytfCTxzrO098cQT2yW9mGf9Q5qB7ce57lTk3+MQ/xYj+fcYaSr8HqeO1ag42tCSJ0jSZcAXgBTw1Yi4RdLNwPKIWCrpfLIB0QQcBLZGxDmS3kX2aGJ1zuaui4iVkn5KtqNcwErgfRGxr4D7sDwiOgq1/cnGv8ch/i1G8u8x0lT+PQoaHFPBVP6Pfzz8exzi32Ik/x4jTeXfo6Q7x83MrPQ4OI7tjmIXUGL8exzi32Ik/x4jTdnfw6eqzMwsLz7iMDOzvDg4zMwsLw6OozjW6L7ThaRWSQ9JWpOMWPyBYtdUCiSlJK2Q9INi11JsktKS7pH0S0nPSnp9sWsqFkn/Nflz8oykb0qqLnZNE83BcQQ5o/teCiwBrpG0pLhVFU0/8KGIWAK8Dvizafxb5PoA8GyxiygRfwv8KCLOBl7DNP1dJM0H/hzoiIhzyd7DdnVxq5p4Do4jGx7dNyJ6gaHRfaedZJiXJ5PPXWT/Ush7wMqpRNIC4D8CXy52LcUmqRG4CPgKQET05jwCYToqB2ZIKgdqgJeLXM+Ec3Ac2YSM7jvVSFoIZIDHi1xKsX0B+Atg8BjLTQeLgE7gzuTU3Zcl1Ra7qGKIiM3AX5N91tAWYE9EPFjcqiaeg8PGTVId8F3ggxGxt9j1FIuk3wG2RcQTxa6lRJQDvwZ8KSIywH5gWvYJSmoie2ZiETAPqE2GUJpSHBxHdkKj+041kirIhsZdEfG9YtdTZBcCl0vaQPYU5pslfaO4JRXVJmBTRAwdhd5DNkimo4uBFyKiMyL6gO8BbyhyTRPOwXFkw6P7JqP1Xg0sLXJNRZGMRPwV4NmI+Fyx6ym2iPhYRCyIiIVk/7/4aURMuX9VjldEbAU2SjoraXoLOU/6nGZeAl4nqSb5c/MWpuCFAoUcVn1Si4h+STcAD3BodN/Vx1htqroQeDfwtKSVSdvHI+K+4pVkJeb9wF3JP7LWA39Q5HqKIiIel3QP8CTZqxFXMAWHHvGQI2ZmlhefqjIzs7w4OMzMLC8ODjMzy4uDw8zM8uLgMDOzvDg4zI5B0r7kfaGkd0zwtj8+avrfJ3L7ZoXg4DAbv4VAXsGRDHR3NCOCIyKm3F3GNvU4OMzG7zPAGyWtTJ65kJJ0q6RlklZJ+mMASb8p6VFJS0nuoJb0T5KeSJ7TcH3S9hmyo6iulHRX0jZ0dKNk289IelrS23O2/XDOsy/uSu5QRtJnkmemrJL01yf917Fpw3eOm43fTcCHI+J3AJIA2BMR50uqAv5N0tBIqL8GnBsRLyTT/yUidkqaASyT9N2IuEnSDRHRPsZ3/R7QTvbZFs3JOj9L5mWAc8gO1/1vwIWSngX+E3B2RISk9MTuutkhPuIwO36/BbwnGYblcWAWsDiZ94uc0AD4c0lPAY+RHTxzMUf368A3I2IgIl4BHgHOz9n2pogYBFaSPYW2BzgIfEXS7wHdJ7hvZkfk4DA7fgLeHxHtyWtRzrMX9g8vJP0m2VFTXx8RryE7ftGJPE60J+fzAFAeEf1kHz52D/A7wI9OYPtmR+XgMBu/LqA+Z/oB4E+SIeeRdOYRHmDUCOyKiG5JZ5N9/O6QvqH1R3kUeHvSj9JC9gl7vzhSYcmzUhqTgSf/K9lTXGYF4T4Os/FbBQwkp5y+RvY52wuBJ5MO6k7gyjHW+xHwvqQfYi3Z01VD7gBWSXoyIt6Z0/594PXAU0AAfxERW5PgGUs9cK+karJHQjce1x6ajYNHxzUzs7z4VJWZmeXFwWFmZnlxcJiZWV4cHGZmlhcHh5mZ5cXBYWZmeXFwmJlZXv4//lzH8IMQHB8AAAAASUVORK5CYII=\n",
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAY4AAAEWCAYAAABxMXBSAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/d3fzzAAAACXBIWXMAAAsTAAALEwEAmpwYAAAofElEQVR4nO3de3Rd5X3m8e+jo5slSzq2JRMsHWMbTIi5SLSGXEsTSlpIO8C0uUCbhLRpmXRKmpY2DWlmpR2mWSuFTpJ2SjowJSSZkFJCksaTQsiNQNKEBBOMb9yM8N1g+Spbsu6/+eNsiSMhyTq2js+R9HzW0tLe776c3z4herz3u/e7FRGYmZlNVVmxCzAzs5nFwWFmZnlxcJiZWV4cHGZmlhcHh5mZ5cXBYWZmeXFwmJ0ASQ9Ium66182zhjdL2jnd+zU7nvJiF2B2qkg6mjNbA/QCg8n8f4mIu6e6r4i4ohDrms0EDg6bMyJi/vC0pK3A70fEd8euJ6k8IgZOZW1mM4kvVdmcN3zJR9JHJL0I3CVpgaRvSuqQdDCZbsnZ5geSfj+Zfp+kH0n6u2TdFyRdcYLrLpf0iKQjkr4r6TZJX5ricbwm+axDkjZJujJn2dskbU72u0vSnyftjcmxHZJ0QNIPJfnvgk3K/4GYZb0KWAicAVxP9v8bdyXzS4FjwD9Osv1rgWeARuAW4E5JOoF1vwz8DFgE/DXwnqkUL6kC+H/At4HFwAeBuyW9OlnlTrKX4+qA84DvJ+1/BuwEmoDTgL8EPA6RTcrBYZY1BPxVRPRGxLGI2B8RX42I7og4AnwC+OVJtt8WEf8nIgaBLwCnk/1DPOV1JS0FLgI+HhF9EfEjYM0U638dMB/4ZLLt94FvAtcmy/uBVZLqI+JgRPw8p/104IyI6I+IH4YHsLPjcHCYZXVERM/wjKQaSbdL2iapE3gESEtKTbD9i8MTEdGdTM7Pc90lwIGcNoAdU6x/CbAjIoZy2rYBzcn0bwFvA7ZJeljS65P2W4EtwLcltUu6aYqfZ3OYg8Msa+y/sv8MeDXw2oioBy5J2ie6/DQd9gALJdXktGWmuO1uIDOmf2IpsAsgIh6LiKvIXsb6N+DepP1IRPxZRKwArgRulPQrJ3cYNts5OMzGV0e2X+OQpIXAXxX6AyNiG7AW+GtJlclZwX+a4uY/BbqBv5BUIenNybb3JPv6HUkNEdEPdJK9NIek35B0VtLHcpjs7clD436CWcLBYTa+zwDzgH3Ao8C3TtHn/g7wemA/8DfAv5J93mRSEdFHNiiuIFvzZ4H3RsTTySrvAbYml90+kHwOwErgu8BR4CfAZyPioWk7GpuV5H4ws9Il6V+BpyOi4Gc8ZlPlMw6zEiLpIklnSiqTdDlwFdk+CbOS4SfHzUrLq4CvkX2OYyfwhxHxRHFLMhvNl6rMzCwvvlRlZmZ5mROXqhobG2PZsmXFLsPMbEZ5/PHH90VE09j2OREcy5YtY+3atcUuw8xsRpG0bbx2X6oyM7O8ODjMzCwvDg4zM8uLg8PMzPLi4DAzs7w4OMzMLC8ODjMzy4uDYxLfWLeLLz067m3MZmZzloNjEt/a+CJ3PNJe7DLMzEqKg2MSrZk02w90c6Crr9ilmJmVDAfHJFpb0gA8ueNQUeswMyslDo5JXNDSQJlgnYPDzGyEg2MStVXlrFxcx5M7DxW7FDOzkuHgOI62TJondxzCL7wyM8tycBxHaybNwe5+th/oLnYpZmYloaDBIelySc9I2iLppnGWf0DSBknrJP1I0qqcZR9NtntG0q9NdZ/TrTXTALifw8xsWMGCQ1IKuA24AlgFXJsbDIkvR8T5EdEG3AJ8Ktl2FXANcC5wOfBZSakp7nNavfq0OqoryhwcZmaJQp5xXAxsiYj2iOgD7gGuyl0hIjpzZmuB4Y6Eq4B7IqI3Il4AtiT7O+4+p1t5qozzmxt8S66ZWaKQwdEM7MiZ35m0jSLpjyQ9T/aM44+Ps+2U9pns93pJayWt7ejoOOGDgOzzHBt3d9I/OHRS+zEzmw2K3jkeEbdFxJnAR4D/No37vSMiVkfE6qamV7xrPS9tS9P0DQzx9J4j01SdmdnMVcjg2AVkcuZbkraJ3ANcfZxt893ntBh+gnydn+cwMytocDwGrJS0XFIl2c7uNbkrSFqZM/vrwHPJ9BrgGklVkpYDK4GfTWWfhdCyYB6LaitZt/1QoT/KzKzklRdqxxExIOkG4EEgBXwuIjZJuhlYGxFrgBskXQb0AweB65JtN0m6F9gMDAB/FBGDAOPts1DHMExS9kFAn3GYmRUuOAAi4n7g/jFtH8+Z/tAk234C+MRU9nkqtGbSfP+ZvXT29FNfXXGqP97MrGQUvXN8pmjLpImADTsPF7sUM7OicnBM0QUtfoLczAwcHFOWrqlkeWOtHwQ0sznPwZGHtkyadR4p18zmOAdHHlpbGth7pJcXO3uKXYqZWdE4OPLQmkkDfpWsmc1tDo48rFpST0VKPOHgMLM5zMGRh6ryFKtOr/cZh5nNaQ6OPLVm0mzYeZjBIXeQm9nc5ODIU1smTVffIFv2Hi12KWZmReHgyJM7yM1srnNw5Gn5olrqq8s9xLqZzVkOjjyVlYnWTNpDrJvZnOXgOAGtLWmeeekIx/oGi12Kmdkp5+A4AW2ZNINDwcbdHinXzOYeB8cJuCCTHSnXHeRmNhc5OE7A4rpqmtPzPMS6mc1JDo4TNDxSrpnZXOPgOEGtmQZ2HjzGvqO9xS7FzOyUKmhwSLpc0jOStki6aZzlN0raLGm9pO9JOiNpf4ukdTk/PZKuTpZ9XtILOcvaCnkME2ltSQOw3s9zmNkcU7DgkJQCbgOuAFYB10paNWa1J4DVEXEBcB9wC0BEPBQRbRHRBlwKdAPfztnuw8PLI2JdoY5hMue3NFAm/DyHmc05hTzjuBjYEhHtEdEH3ANclbtCEhDdyeyjQMs4+3k78EDOeiWhprKcs0+rY91O35JrZnNLIYOjGdiRM78zaZvI+4EHxmm/BviXMW2fSC5vfVpS1Xg7k3S9pLWS1nZ0dORT95S1ZdI86VfJmtkcUxKd45LeDawGbh3TfjpwPvBgTvNHgXOAi4CFwEfG22dE3BERqyNidVNTU0HqbsukOXysn637S+pkyMysoAoZHLuATM58S9I2iqTLgI8BV0bE2FuU3gl8PSL6hxsiYk9k9QJ3kb0kVhQeKdfM5qJCBsdjwEpJyyVVkr3ktCZ3BUkXAreTDY294+zjWsZcpkrOQpAk4Gpg4/SXPjVnn1ZHTWXKz3OY2ZxSXqgdR8SApBvIXmZKAZ+LiE2SbgbWRsQaspem5gNfyeYA2yPiSgBJy8iesTw8Ztd3S2oCBKwDPlCoYzieVJk4r7nBwWFmc0rBggMgIu4H7h/T9vGc6csm2XYr43SmR8Sl01jiSWvLpPn8f2ylb2CIyvKS6DIyMyso/6U7SW2ZNH2DQzy1p7PYpZiZnRIOjpM00kHuJ8jNbI5wcJykJQ3VNM6vcj+Hmc0ZDo6TJMkj5ZrZnOLgmAZtmQbaO7o4fKz/+Cubmc1wDo5pMNzPscHjVpnZHODgmAYXJEOsr9txsLiFmJmdAg6OadAwr4IVTbWs2+EzDjOb/Rwc02S4g9wj5ZrZbOfgmCZtmTT7jvay+3BPsUsxMysoB8c0GX6VrEfKNbPZzsExTV5zej2VqTIHh5nNeg6OaVJZXsaqJfU84eAws1nOwTGN2jJpNuw8zMDgULFLMTMrGAfHNGrLpDnWP8hze48WuxQzs4JxcEwjv0rWzOYCB8c0WraohoZ5FR5i3cxmNQfHNJJEaybNE9sPFbsUM7OCcXBMs7aWBp596QjdfQPFLsXMrCAKGhySLpf0jKQtkm4aZ/mNkjZLWi/pe5LOyFk2KGld8rMmp325pJ8m+/xXSZWFPIZ8tS1NMxSwcZdfJWtms1PBgkNSCrgNuAJYBVwradWY1Z4AVkfEBcB9wC05y45FRFvyc2VO+98Cn46Is4CDwPsLdQwnwiPlmtlsV8gzjouBLRHRHhF9wD3AVbkrRMRDEdGdzD4KtEy2Q0kCLiUbMgBfAK6ezqJPVuP8KloWzONJj5RrZrNUIYOjGdiRM78zaZvI+4EHcuarJa2V9Kikq5O2RcChiBjuQJhwn5KuT7Zf29HRcUIHcKL8Klkzm81KonNc0ruB1cCtOc1nRMRq4LeBz0g6M599RsQdEbE6IlY3NTVNY7XH15ZJs+vQMTqO9J7SzzUzOxUKGRy7gEzOfEvSNoqky4CPAVdGxMhf2ojYlfxuB34AXAjsB9KSyifbZ7H5QUAzm80KGRyPASuTu6AqgWuANbkrSLoQuJ1saOzNaV8gqSqZbgTeCGyO7FuSHgLenqx6HfCNAh7DCTlvSQOpMvlBQDOblQoWHEk/xA3Ag8BTwL0RsUnSzZKG75K6FZgPfGXMbbevAdZKepJsUHwyIjYnyz4C3ChpC9k+jzsLdQwnal5lilefVud+DjOblcqPv8qJi4j7gfvHtH08Z/qyCbb7MXD+BMvayd6xVdJaM2n+ff1uhoaCsjIVuxwzs2lTEp3js9GFmTSdPQNs3d9V7FLMzKaVg6NAhjvIfbnKzGYbB0eBnLV4PrWVKd9ZZWazjoOjQFJl4vyWBp9xmNms4+AooNZMms17OukdGCx2KWZm08bBUUAXZtL0DwZP7TlS7FLMzKaNg6OARjrIt3ukXDObPRwcBfSq+moW11Xx5E6PlGtms4eDo4Ak0ZZJ+84qM5tVHBwF1ppJ076vi8Pd/cUuxcxsWjg4CqxteKRcD3hoZrOEg6PAzm9pQPIQ62Y2ezg4Cqy+uoIzm+b7QUAzmzUcHKdAa0uaJ3ceIvs6ETOzmc3BcQq0LU2z72gfuw4dK3YpZmYnzcFxCrS1pAGPlGtms4OD4xQ45/Q6KsvL3EFuZrOCg+MUqEiVcd6Sep9xmNms4OA4RVozaTbsOszA4FCxSzEzOylTCg5JtZLKkumzJV0pqWIK210u6RlJWyTdNM7yGyVtlrRe0vcknZG0t0n6iaRNybJ35WzzeUkvSFqX/LRN+WiLqC2Tpqd/iGdfOlrsUszMTspUzzgeAaolNQPfBt4DfH6yDSSlgNuAK4BVwLWSVo1Z7QlgdURcANwH3JK0dwPvjYhzgcuBz0hK52z34YhoS37WTfEYiqrNr5I1s1liqsGhiOgGfhP4bES8Azj3ONtcDGyJiPaI6APuAa7KXSEiHkr2C/Ao0JK0PxsRzyXTu4G9QNMUay1JSxfWsKCmwh3kZjbjTTk4JL0e+B3g35O21HG2aQZ25MzvTNom8n7ggXE++GKgEng+p/kTySWsT0uqmqDg6yWtlbS2o6PjOKUWniRaM2mPWWVmM95Ug+NPgI8CX4+ITZJWAA9NVxGS3g2sBm4d03468H+B342I4V7ljwLnABcBC4GPjLfPiLgjIlZHxOqmptI4WWltSfPsS0fo6h0odilmZidsSsEREQ9HxJUR8bdJJ/m+iPjj42y2C8jkzLckbaNIugz4GHBlRPTmtNeTPbv5WEQ8mlPLnsjqBe4ie0lsRmjLpBkK2LDLL3Yys5lrqndVfVlSvaRaYCOwWdKHj7PZY8BKScslVQLXAGvG7PdC4HayobE3p70S+DrwxYi4b8w2pye/BVyd1DMjDL9K1v0cZjaTTfVS1aqI6CT7h/oBYDnZO6smFBEDwA3Ag8BTwL3JZa6bJV2ZrHYrMB/4SnJr7XCwvBO4BHjfOLfd3i1pA7ABaAT+ZorHUHQLaytZurDGd1aZ2YxWPsX1KpLnNq4G/jEi+iUdd6jXiLgfuH9M28dzpi+bYLsvAV+aYNmlU6y5JLVl0qzdeqDYZZiZnbCpnnHcDmwFaoFHkgf1OgtV1GzWmkmz+3APezt7il2KmdkJmWrn+D9ERHNEvC3pmN4GvKXAtc1KbZkGwA8CmtnMNdXO8QZJnxp+LkLS/yR79mF5OndJA+Vl8vMcZjZjTfVS1eeAI2Q7rd9J9jLVXYUqajarrkhxzul1PuMwsxlrqp3jZ0bEb+XM/3dJ6wpQz5zQ2pJmzbrdDA0FZWUqdjlmZnmZ6hnHMUlvGp6R9EbA70E9QW2ZNEd6B2jf11XsUszM8jbVM44PAF+U1JDMHwSuK0xJs1/uSLlnLZ5f3GLMzPI01buqnoyIVuAC4IKIuBCY0c9TFNOKpvnMryr3E+RmNiPl9QbAiOhMniAHuLEA9cwJqTJxQUuD76wysxnpZF4d617dk9CaSfPUnk56+geLXYqZWV5OJjiOO+SITay1JU3/YLB5jx/AN7OZZdLOcUlHGD8gBMwrSEVzxIVL00B2pNxfWLqguMWYmeVh0uCIiLpTVchcc1p9Na+qr/aDgGY245zMpSo7SW2ZtO+sMrMZx8FRRK2ZNFv3d3Oou6/YpZiZTZmDo4haPVKumc1ADo4iuqAljQRP7vA7yM1s5nBwFNH8qnJWLp7vBwHNbEZxcBRZa0uadTsOEeHHYsxsZihocEi6XNIzkrZIummc5TdK2ixpvaTvJa+kHV52naTnkp/rctp/UdKGZJ//IGlGP8HetjTNga4+dh70YMNmNjMULDgkpYDbgCuAVcC1klaNWe0JYHVEXADcB9ySbLsQ+CvgtcDFwF9JGn5K7p+APwBWJj+XF+oYToXWljQAT7iD3MxmiEKecVwMbImI9ojoA+4BrspdISIeiojuZPZRoCWZ/jXgOxFxICIOAt8BLpd0OlAfEY9G9trOF4GrC3gMBffqV9VRVV7m5znMbMYoZHA0Azty5ncmbRN5P/DAcbZtTqaPu09J1w+/I72joyPP0k+dilQZ5zc3ODjMbMYoic5xSe8GVgO3Ttc+I+KOiFgdEaubmpqma7cF0ZpJs2HXYfoHh4pdipnZcRUyOHYBmZz5lqRtFEmXAR8DroyI3uNsu4uXL2dNuM+Zpi2TpndgiGdePFLsUszMjquQwfEYsFLSckmVwDXAmtwVJF0I3E42NPbmLHoQ+FVJC5JO8V8FHoyIPUCnpNcld1O9F/hGAY/hlBh+layf5zCzmaBgwRERA8ANZEPgKeDeiNgk6WZJVyar3QrMB74iaZ2kNcm2B4D/QTZ8HgNuTtoA/ivwz8AW4Hle7heZsVoWzGNhbSXrth8qdilmZsc16bDqJysi7gfuH9P28ZzpyybZ9nPA58ZpXwucN41lFp2k7Ei5PuMwsxmgJDrHLfs8x3N7j3K0d6DYpZiZTcrBUSJaMw1EwHqfdZhZiXNwlIiRDnKPlGtmJc7BUSLSNZUsW1TjBwHNrOQ5OEpIaybtlzqZWclzcJSQtkyaFzt7ePFwT7FLMTObkIOjhLQm/Rw+6zCzUubgKCGrTq+nIiU/z2FmJc3BUUKqK1K85vR6d5CbWUlzcJSY1pY063ceZnDIr5I1s9Lk4CgxbZk0R3sHaO84WuxSzMzG5eAoMe4gN7NS5+AoMSsaa6mrLndwmFnJcnCUmLIy0drikXLNrHQ5OEpQa6aBp/ccoad/sNilmJm9goOjBLW2pBkYCjbt9oCHZlZ6HBwlqG2kg9zBYWalx8FRghbXV7OkodoPAppZSXJwlCiPlGtmpaqgwSHpcknPSNoi6aZxll8i6eeSBiS9Paf9LZLW5fz0SLo6WfZ5SS/kLGsr5DEUS1smzfYD3Rzo6it2KWZmoxQsOCSlgNuAK4BVwLWSVo1ZbTvwPuDLuY0R8VBEtEVEG3Ap0A18O2eVDw8vj4h1hTmC4modeSPgoaLWYWY2ViHPOC4GtkREe0T0AfcAV+WuEBFbI2I9MDTJft4OPBAR3YUrtfSc39xAmfwEuZmVnkIGRzOwI2d+Z9KWr2uAfxnT9glJ6yV9WlLViRZYymqryjn7tDo/CGhmJaekO8clnQ6cDzyY0/xR4BzgImAh8JEJtr1e0lpJazs6OgpeayG0tqR5cschIjxSrpmVjkIGxy4gkzPfkrTl453A1yOif7ghIvZEVi9wF9lLYq8QEXdExOqIWN3U1JTnx5aGtqVpDnb3s/3AnLpKZ2YlrpDB8RiwUtJySZVkLzmtyXMf1zLmMlVyFoIkAVcDG0++1NLU2pIG3M9hZqWlYMEREQPADWQvMz0F3BsRmyTdLOlKAEkXSdoJvAO4XdKm4e0lLSN7xvLwmF3fLWkDsAFoBP6mUMdQbGefNp95FSkHh5mVlPJC7jwi7gfuH9P28Zzpx8hewhpv262M05keEZdOb5WlqzxVxvnNDb4l18xKSkl3jlt2pNyNuzvpH5zsjmUzs1PHwVHiWjNp+gaGeHrPkWKXYmYGODhK3shIuX6ew8xKhIOjxDWn59Gcnsct33qaf/jecxzp6T/+RmZmBeTgKHGS+MLvXcwbzlzEp77zLJfc8hC3P/w8x/r8dkAzKw7NhaeSV69eHWvXri12GSdt/c5DfOo7z/KDZzponF/FDW85k2tfu5Sq8lSxSzOzWUjS4xGx+hXtDo6ZZ+3WA/zdt5/h0fYDLGmo5oZLV/KO1S1UpHwCaWbTx8Exi4Jj2I+37OPWbz/DE9sPsXRhDX9y2UquamsmVaZil2Zms8BEweF/os5gbzirka/94Ru4630XUVddzo33Psmvfvphvrl+N0NDs/8fBGZWHA6OGU4SbzlnMd/84Jv43+/+BVJl4oYvP8Hb/uGHfGfzSx5Z18ymnYNjlpDE5eedzgMfuoS/v6aNnv5B/uCLa7n6tv/gkWc7HCBmNm0cHLNMqkxc1dbMd2/8ZW75rQvYd7SP937uZ7zr9kf5afv+YpdnZrOAO8dnud6BQe59bAf/6/tb2Hukl19a2ciNbz2bC5cuKHZpZlbifFfVHA2OYT39g3zp0W189gfPc6Crj8tes5g/fevZnLukodilmVmJcnDM8eAY1tU7wOd/vJXbH36ezp4Bfv380/nTt67krMV1xS7NzEqMg8PBMcrhY/3c+cN27vzRCxzrH+TqtmY+dNlKzlhUW+zSzKxEODgcHOM60NXH7Q8/zxd+spX+weAdv9jCB39lJc3pecUuzcyKzMHh4JjU3s4ePvuD5/nyT7cDcO3FGf7oLWexuL66yJWZWbE4OBwcU7Lr0DH+8fvP8ZW1O0mVievesIwP/PKZLKytLHZpZnaKOTgcHHnZtr+Lv//uc3x93S5qKlL83puW8/u/tIKGeRXFLs3MTpGiBIeky4G/B1LAP0fEJ8csvwT4DHABcE1E3JezbBDYkMxuj4grk/blwD3AIuBx4D0R0TdZHQ6OE/fcS0f4zHef49837KGuupxLVjZxbnM95zc3cN6SBhb4TMRs1jrlwSEpBTwLvBXYCTwGXBsRm3PWWQbUA38OrBkTHEcjYv44+70X+FpE3CPpfwNPRsQ/TVaLg+Pkbdp9mP/zSDuPbz/IjgPHRtqb0/M4LwmSc5MwaaqrKmKlZjZdJgqO8gJ+5sXAlohoTwq4B7gKGAmOiNiaLBuayg4lCbgU+O2k6QvAXwOTBoedvHOXNPCZay4E4FB3H5t2d7Jh12E27jrMpt2dPLjppZF1X1VfzXnN9Zy7pCF7ZtLcwGn1VWT/5zOzma6QwdEM7MiZ3wm8No/tqyWtBQaAT0bEv5G9PHUoIgZy9tk83saSrgeuB1i6dGl+lduk0jWVvPGsRt54VuNIW2dPP5t3d7IxCZONuzv53tN7GT6hbZxfxXnN9Zy3JBsk5zXX05ye5zAxm4EKGRwn64yI2CVpBfB9SRuAw1PdOCLuAO6A7KWqAtVoifrqCl63YhGvW7FopK2rd4Cn9mTDZMOuTjbtPswPn9vHYPKukAU1FZzX3JBzZlLP0oU1DhOzElfI4NgFZHLmW5K2KYmIXcnvdkk/AC4EvgqkJZUnZx157dNOrdqqclYvW8jqZQtH2nr6B7NhsruTjTsPs3H3Ye78UTv9g9kwqasuT85K6pMzkwaWL6qlzG81NCsZhQyOx4CVyV1Qu4BreLlvYlKSFgDdEdErqRF4I3BLRISkh4C3k72z6jrgGwWp3gqiuiLFhUsXjBqdt3dgkGdfPMrG3YfZsOswm3Yd5gs/2UbfQLbrq7YyxblLGkbu5jp3SQPLGmuoKk8V6zDM5rRC3477NrK326aAz0XEJyTdDKyNiDWSLgK+DiwAeoAXI+JcSW8AbgeGyL4z5DMRcWeyzxVkQ2Mh8ATw7ojonawO31U18/QPDvHcS9kwGe432bynk57+bJiUCTILa1jeWMuKxvmsaKplRWMtK5rmuyPebJr4AUAHx4w3MDhE+74uNu0+zAsdXTy/r4v2ji5e2Hd0JFAAaipT2UBpms/yxlrObMqGy/KmWuZXlXK3nllpKcbtuGbTqjxVxtmn1XH2aaOHgB8aCl7s7BkJkec7umjf18W6HQf55vrd5P7baHFdFSuaalneOD8bKMl0ZsE8ylN+IabZVDg4bMYrKxNL0vNYkp7Hm1Y2jlrW0z/I9gPdtHdkA+WFfV20dxzlgY17ONTdP7JeRUosXVgzEijDZywrmmpZVFvpS19mORwcNqtVV6TGPUsBONjVR/u+o7QnZyjtHUd5YV8XjzzbQd/gy5e+6qrLWdE0nzMba0ddAlveWMu8SnfQ29zj4LA5a0FtJb9Yu5BfPGPhqPbBoWDXwWMjofLCvi7a9x3lJ+37+doTo+/+flV9NWcsqmHZolrOaEx+L6rhjEXuT7HZy/9lm42RKhNLF9WwdFENb3716GXdfQPJ5a5soGzb3822/V187+m97Ds6+ua+xvlVLEtCZFmyv2WLalm2qJaGGo8ybDOXg8MsDzWV5dlnSpY0vGLZ0d4Btu3PhsnW/V1s29fNtgNd/Pj5fXz15z2j1k3XVIwEytjfC92nYiXOwWE2TeZXTRwqw530W/flBMv+bh7fdpD/9+RuhnLu/KqrKueMxrGBkr0EtrjOz6hY8Tk4zE6ByTrpewcG2XnwGNv2d7F1X3c2YPZ3sXl3Jw9ufJGBnFSZV5FK+lCG+1NqWTS/kqGhYGAoGBz5PUT/4Oj5gaFgcPDl9fqHhkbNj1ovmR8YHMpZFgyM7DPb3p8zX1NZzuK6KhbXV3NafRWnJb8X11WzuL6KRbVVpDx0zKzg4DArsqryFGc2zefMple8foaBwSF2H+pJzlC62Jr0qTzf0cVDT4+++ysfqTJRnvykykR5qmykLfd3xSvas/NVFeU5+8i2He0dYPfhHtbtOMT+rle+Wy1VJprmV2XDpL6axXU54VJfzWlJwCysqfTYZCXOwWFWwspTZSMd9dA0atlg8uDjwa4+ylPDf9zLcsLg5T/qYwOh0Je7+gaG6Djay0udPezt7GXvkR5e6uzhpc5s244D3azdeoCDOc/SjBxzmUaduSyuywmXnLYFNRW+bFckDg6zGSpVJprT82hOzyt2Ka9QWV42pdp6+gfpOJINlr1JqLx05OXAeWFfF4+2H+DwsVcGTGWqjKa6qpHLYsNhU1ddTlV5GVXlqezvipenqytSLy+rKBuZriwv82W0PDg4zKxoqitSZBbWkFlYM+l6Pf2D2WAZFTAvTz+39yg/2rKPIz0Dk+5nMhUpvRw25WVUDYdMRU7bmMAZG0zD4VRZXkZFaswZYHJJcOzZ38hZYWr89orUmPXKVPRLeQ4OMyt51RWpnEt2EzvWN0hX3wC9A0P09g9mf4+dHhiktz873TPSnvzuz5ketd0gR3sH2H+0b2R5T866w68AOFUkXhk8EwTUnddddNzvLV8ODjObNeZVpooyDMzQUNA3ODqYcu9G6x97d9pQzvzgOO2D49/l9vKdba9sf+VnZNerqpj+wTsdHGZmJ6msTFSXpaiuSAGzf1QAjyNtZmZ5cXCYmVleHBxmZpYXB4eZmeWloMEh6XJJz0jaIummcZZfIunnkgYkvT2nvU3STyRtkrRe0rtyln1e0guS1iU/bYU8BjMzG61gd1VJSgG3AW8FdgKPSVoTEZtzVtsOvA/48zGbdwPvjYjnJC0BHpf0YEQcSpZ/OCLuK1TtZmY2sULejnsxsCUi2gEk3QNcBYwER0RsTZaNenomIp7Nmd4taS/ZgXoOFbBeMzObgkJeqmoGduTM70za8iLpYqASeD6n+RPJJaxPS6qaYLvrJa2VtLajoyPfjzUzswmU9AOAkk4H/i9wXUQMn5V8FHiRbJjcAXwEuHnsthFxR7IcSR2Stp1gGY3AvhPcdjby9/Eyfxej+fsYbTZ8H2eM11jI4NgFZHLmW5K2KZFUD/w78LGIeHS4PSL2JJO9ku7ilf0jrxARTcdbZ5I61kbE6hPdfrbx9/Eyfxej+fsYbTZ/H4W8VPUYsFLSckmVwDXAmqlsmKz/deCLYzvBk7MQlB2I/2pg43QWbWZmkytYcETEAHAD8CDwFHBvRGySdLOkKwEkXSRpJ/AO4HZJm5LN3wlcArxvnNtu75a0AdhA9lTwbwp1DGZm9kqKiOOvNYdJuj7pLzH8feTydzGav4/RZvP34eAwM7O8eMgRMzPLi4PDzMzy4uCYxPHG2porJGUkPSRpczJ+2IeKXVMpkJSS9ISkbxa7lmKTlJZ0n6SnJT0l6fXFrqlYJP1p8v+TjZL+RVJ1sWuabg6OCeSMtXUFsAq4VtKq4lZVNAPAn0XEKuB1wB/N4e8i14fI3jFo8PfAtyLiHKCVOfq9SGoG/hhYHRHnASmyjyLMKg6OiY2MtRURfcDwWFtzTkTsiYifJ9NHyP5RyHv4mNlEUgvw68A/F7uWYpPUQPb2+TsBIqIvZ0DSuagcmCepHKgBdhe5nmnn4JjYtIy1NdtIWgZcCPy0yKUU22eAvwCGjrPeXLAc6ADuSi7d/bOk2mIXVQwRsQv4O7Ijf+8BDkfEt4tb1fRzcNiUSZoPfBX4k4joLHY9xSLpN4C9EfF4sWspEeXALwD/FBEXAl3AnOwTlLSA7JWJ5cASoFbSu4tb1fRzcEzspMbamm0kVZANjbsj4mvFrqfI3ghcKWkr2UuYl0r6UnFLKqqdwM6IGD4LvY9skMxFlwEvRERHRPQDXwPeUOSapp2DY2InPNbWbJOMC3Yn8FREfKrY9RRbRHw0IloiYhnZ/y6+HxGz7l+VUxURLwI7JL06afoVct67M8dsB14nqSb5/82vMAtvFCjpYdWLKSIGJA2PtZUCPhcRm46z2Wz1RuA9wAZJ65K2v4yI+4tXkpWYD5IdR64SaAd+t8j1FEVE/FTSfcDPyd6N+ATJ6x1mEw85YmZmefGlKjMzy4uDw8zM8uLgMDOzvDg4zMwsLw4OMzPLi4PD7DgkHU1+L5P029O8778cM//j6dy/WSE4OMymbhmQV3AkA91NZlRwRMSse8rYZh8Hh9nUfRL4JUnrkncupCTdKukxSesl/RcASW+W9ENJa0ieoJb0b5IeT97TcH3S9kmyo6iuk3R30jZ8dqNk3xslbZD0rpx9/yDn3Rd3J08oI+mTyTtT1kv6u1P+7dic4SfHzabuJuDPI+I3AJIAOBwRF0mqAv5D0vBIqL8AnBcRLyTzvxcRByTNAx6T9NWIuEnSDRHRNs5n/SbQRvbdFo3JNo8kyy4EziU7XPd/AG+U9BTwn4FzIiIkpaf30M1e5jMOsxP3q8B7k2FYfgosAlYmy36WExoAfyzpSeBRsoNnrmRybwL+JSIGI+Il4GHgopx974yIIWAd2Utoh4Ee4E5Jvwl0n+SxmU3IwWF24gR8MCLakp/lOe9e6BpZSXoz2VFTXx8RrWTHLzqZ14n25kwPAuURMUD25WP3Ab8BfOsk9m82KQeH2dQdAepy5h8E/jAZch5JZ0/wAqMG4GBEdEs6h+zrd4f1D28/xg+BdyX9KE1k37D3s4kKS96V0pAMPPmnZC9xmRWE+zjMpm49MJhccvo82fdsLwN+nnRQdwBXj7Pdt4APJP0Qz5C9XDXsDmC9pJ9HxO/ktH8deD3wJBDAX0TEi0nwjKcO+IakarJnQjee0BGaTYFHxzUzs7z4UpWZmeXFwWFmZnlxcJiZWV4cHGZmlhcHh5mZ5cXBYWZmeXFwmJlZXv4/QAgzW/yBXxUAAAAASUVORK5CYII=\n",
       "text/plain": [
        "<Figure size 432x288 with 1 Axes>"
       ]
@@ -485,7 +483,7 @@
    "outputs": [
     {
      "data": {
-      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEWCAYAAAB8LwAVAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/d3fzzAAAACXBIWXMAAAsTAAALEwEAmpwYAAAn/klEQVR4nO3de5xdZ13v8c937pOZyaXJ5J4maZs2DRRSGCtYxQtQq2KL4MEUEVCx4KGAqHCKx4NYjp56AeFgX0qBKsqlQFEMnkoFBURuJoVy6eyWpmmhSXaSyXXPJHOf3/ljrZnsTPbM7DTZWXvv+b5fr51Z61nrWfs3+5VZv72eZ63nUURgZmY2XUPWAZiZWXVygjAzs5KcIMzMrCQnCDMzK8kJwszMSnKCMDOzkpwgzMysJCcIq3mSBopeE5IGi9Z/+Ukc7wuSXlWJWM1qSVPWAZidq4jonFyW9Djwqoj4XHYRVZakpogYyzoOq3++grC6JalB0q2SHpV0WNLHJV2UbmuT9KG0/JikHZJWSPoj4MeAv0yvQP5yhmN/QtJ+Sccl/YekpxRta5f0DknfT7f/p6T2dNuPSvpK+p5PSHplWn7aVYukV0r6z6L1kPRaSY8Aj6Rl706PUZB0v6QfK9q/UdLvpb97f7p9naQ7JL1j2u+yXdIbz/0Tt3rjBGH17HXAC4EfB1YDR4E70m2vABYB64ClwGuAwYj4n8CXgFsiojMibpnh2P8CbAKWA98APly07c+BZwI/AlwEvBmYkLQ+rfceoBvYCjxwFr/PC4EfBrak6zvSY1wEfAT4hKS2dNtvAzcBPwssBH4NOAl8ELhJUgOApGXA89L6ZqdxE5PVs9eQnOj3AEh6G/ADSb8CjJIkhssi4tvA/Wdz4Ii4a3I5Pe5RSYuAfpKT8bMiYm+6y1fS/V4KfC4iPpqWH05f5fo/EXGkKIYPFW17h6TfB64AvgW8CnhzRDycbv/W5HtKOg48F/gssA34QkQcOIs4bJ7wFYTVs/XAP6bNOceAHDAOrAD+HrgPuFvSPkl/Kqm5nIOmzTe3p803BeDxdNOy9NUGPFqi6roZysv1xLQ4fldSLm3GOkZyRbSsjPf6IPCydPllJJ+F2RmcIKyePQH8TEQsLnq1RcTeiBiNiD+MiC0kTUEvAF6e1ptriOOXAjeSNM0sAjak5QIOAUPApTPEU6oc4ASwoGh9ZYl9puJK+xveDLwEWBIRi4HjaQxzvdeHgBslPR24EvjUDPvZPOcEYfXsr4E/Stv+kdQt6cZ0+SclXSWpESiQNDlNpPUOAJfMctwuYJikeWgB8MeTGyJiArgLeKek1enVxrMltZL0UzxP0kskNUlaKmlrWvUB4EWSFki6DPj1OX63LmAM6AOaJL2VpK9h0vuBt0vapMTTJC1NY9xD0n/x98AnI2JwjveyecoJwurZu4HtwL9K6ge+RtLJC8k39HtIkkMO+CKnmlreDfyipKOS/m+J4/4d8H1gL9CbHrfY7wLfITkJHwH+BGiIiB+QdBr/Tlr+APD0tM5fACMkyemDnN7pXcp9wGeA76WxDHF6E9Q7gY8D/5r+jh8A2ou2fxC4Cjcv2SzkCYPM5h9JzyFpalofPgnYDHwFYTbPpJ3xbwDe7+Rgs3GCMJtHJF0JHANWAe/KNBirem5iMjOzknwFYWZmJdXNk9TLli2LDRs2ZB2GmVlNuf/++w9FRHepbXWTIDZs2MDOnTuzDsPMrKZI+v5M29zEZGZmJTlBmJlZSU4QZmZWkhOEmZmV5ARhZmYlOUGYmVlJThBmZlZS3TwHYWb1ISIYGp2gf2iUwtAYhaFR+ofGkvXB5OfI2ATdXa2sWtzOqkVtrFzUxsK2siYErBsTE8GhE8McLAwzEcHT1i4+7+9R0QQh6XqSsfUbSUaOvH3a9otJxqVfnO5za0Tcm257C8mkKePA6yPivkrGambnx8jYqZN7/7ST++TJvvikf/r6GIXBUcYmzn6MuM7WJlYuamNV+lq5qH1qedWi9jSJNCFp7oNlKCIoDI1xsDDE/sIQBwrDHCgMTb32F4Y5WBiir3946nN6+rrF/NNrrz3vsVQsQaQzdd0BPB/YA+yQtD0ieot2+33g4xHxV5K2APcCG9LlbcBTgNXA5yRdHhHjlYrXzE6JCAaGxzg8MMLhE8McGhhJlgeGTz+ZT0sEhcFRhscm5jx+Z2sTC9ua6GprpqutieVdbVza3URXWrYwLe9qa2Jhe/Np+y5sa6a5sYGD/UPsPz5E/vgQ+eOD5I8n6/uOD/G9A30c7B9m+likHS2NaRJpPz2RLE4TycJ2FrZXLokMjY6nJ/ph9heGOFh00j+Qru8vDDE0euZnuKi9mRULW1mxsI1Ny5dNLa9Y2MbaJe0l3u3cVfIK4hpgV0TsBpB0N8k8vsUJIjg1TeIiYF+6fCNwd0QMA49J2pUe76sVjNesrg2PjXPkRHKiPzQwPHXyT9ZPLR8eGObQiRFGZjjRtzU3FJ3Am1nU3szaJe1TJ/HpJ/Opk3578rOztYnGhnM/Aa9dsoC1SxbMuH10fIKD/cPsT5NH/tjpyeRLjxziYP8Q0y9WFkwlkVOJZOWiNlanVyGrFrWxqL35tCQyNj7BoYGR9Bv/0Azf/oc5Pjh6RpytTQ2sXJSc6K9au5jndbWyclEbyxe2sWJyuauN9pbGc/7MzlYlE8QaTp8CcQ+npnuc9DaS6SBfB3SQTAI/Wbd4Gsc9adlpJN0M3Axw8cUXn5egzWrFxERwbHA0OaGXOMEfnkoCSULoHxoreZyWpgaWdbSwtLOVpZ0tXL6ii2WdLSztbGFpR1K2rLOVZZ2tLOloprXpwp+onozmxgbWLG5nzeKZv12PpUlkMnFMvyL58q5DHCicmUTamxtZtaiNBa2NHCgMc2jgzKuVxgaxvKuV5Qvb2LC0g2ddsnTqG3/xt/9qbvbKupP6JuBvI+Idkp4N/L2kp5ZbOSLuBO4E6Onp8cQWVhcmOx/3HRsif2yQvceSk9WhgeGpb/6HBkY4enKE8RJt9RJctODUCf4pqxeyrLOVpUVJYFnRyb+ztXpPUJXW1NjA6sXtrF7cDiwpuc/Y+AR9A8NFVyGnEsnJkTGesmoRKxalJ/2utvTbfytLO1rPy5VSliqZIPYC64rW16ZlxX4duB4gIr4qqQ1YVmZds5pUGBpl37FB8seG0pP/IPuODSVl6bfX0fHTT/xtzQ0s72pjaWcLa5csYOu6xSW/4S/tbGHJgpaaPzFVk6bGhrSpqR3mWUNFJRPEDmCTpI0kJ/dtwEun7fMD4LnA36ZTIbYBfcB24COS3knSSb0J+K8Kxmp2XgyPjbP/eHriT0/6+45PnvyTRDAwfHpTT2ODWLkwadveum4xP3PVStYsbp9q/16zuJ3FC5rn7bd8y07FEkREjEm6BbiP5BbWuyLiQUm3ATsjYjvwO8D7JL2RpMP6lekk6g9K+jhJh/YY8FrfwWRZm5gI+gaGk5N+2tQwlQjSk/+hgeEz6i3taGHV4qQd+kcuXcbqxUnn5+rFbaxe3M7yrjZ/47eqVDdzUvf09IQnDLJzNTY+wQ+OnOTRvhPsOjjAroMDPHH0JPuODXKgMHRG009HSyOr0jbs1YuSE/7kt/7Jh7jammujU9fmJ0n3R0RPqW1Zd1KbZWJwZJxH+wZ4tC9JApM/Hz90kpHxU7d3rljYyvqLOuhZvyQ5+S9uZ83kFcCiyt4zb5Y1Jwira0dOjEyd/Cdfj/YNsPfY4NRtiQ2C9Us7uLS7k5/avILLlndyaXcHly7vnHfDN5gVc4KwmjcxEew7Ppie/JOmoUcPDrCrb4AjJ0am9mtrbuCSZZ084+IlvKRnHZct7+Sy5Z2sX7qgZu7tN7uQnCCsZoyMTfD9wydOuxLY1TfAowdPMDh66h6GJQuauWx5J9dtSa8GlndyWXcnaxa30+DOYLOyOUFYVTpyYoTPP3SQXZN9BAcH+P6Rk6c9GLZmcTuXLu/kh665KLka6E6uCJZ2tmYYuVn9cIKwqvS//7mXf/jmXpoaxIZlHVy+ooufvWoVly7v4LLuLi7p7qCj1f99zSrJf2FWlb699zjPubybD7yih+ZGz2tllgX/5VnVGRodZ3ffAFvXLnJyMMuQ//qs6nzvQD8TAVeuWjj3zmZWMU4QVnVy+QLgBGGWNScIqzq5fD8dLY1cfNHMk8GYWeU5QVjV6d1X4IqVXX5mwSxjThBWVSKC3P4CW1a7ecksa04QVlX2HB2kf2jM/Q9mVcAJwqqKO6jNqocThFWV3nwBCTav7Mo6FLN5zwnCqkouX2Dj0g4WtPghf7OsOUFYVcnl+928ZFYlnCCsavQPjfKDIye5cpWbl8yqQUUThKTrJT0saZekW0ts/wtJD6Sv70k6VrRtvGjb9krGadXh4f39gDuozapFxRp6JTUCdwDPB/YAOyRtj4jeyX0i4o1F+78OuLroEIMRsbVS8Vn16fUdTGZVpZJXENcAuyJid0SMAHcDN86y/03ARysYj1W5XL7A4gXNrFrUlnUoZkZlE8Qa4Imi9T1p2RkkrQc2Av9eVNwmaaekr0l64Qz1bk732dnX13eewras9Ob7uXLlQiQPsWFWDaqlk3obcE9EjBeVrY+IHuClwLskXTq9UkTcGRE9EdHT3d19oWK1ChifCB7eX3DzklkVqWSC2AusK1pfm5aVso1pzUsRsTf9uRv4Aqf3T1idefzwCYZGJ3wHk1kVqWSC2AFskrRRUgtJEjjjbiRJm4ElwFeLypZIak2XlwHXAr3T61r96N3nDmqzalOxu5giYkzSLcB9QCNwV0Q8KOk2YGdETCaLbcDdERFF1a8E3itpgiSJ3V5895PVn1y+QFOD2LSiM+tQzCxV0fEMIuJe4N5pZW+dtv62EvW+AlxVydisuuTyBS5b3klrU2PWoZhZqlo6qW2e8xAbZtXHCcIyd+TECPsLQ+6gNqsyThCWuck5ILasWpRxJGZWzAnCMndqkiBfQZhVEycIy1xvvsDyrlaWdrZmHYqZFXGCsMy5g9qsOjlBWKZGxibYddAJwqwaOUFYpnYdHGB0PNiy2gnCrNo4QVimTt3B5A5qs2rjBGGZyuULtDY1sGFpR9ahmNk0ThCWqd58gStWdtHU6P+KZtXGf5WWmYggly9w5Ur3P5hVIycIy8yBwjBHT466g9qsSjlBWGZOPUHtBGFWjZwgLDO9aYLY7DuYzKqSE4RlpjdfYO2Sdha2NWcdipmV4ARhmcnlC2xx85JZ1XKCsEwMjozz+KET7n8wq2JOEJaJhw/0MxHuoDarZhVNEJKul/SwpF2Sbi2x/S8kPZC+vifpWNG2V0h6JH29opJx2oV3aogNJwizatVUqQNLagTuAJ4P7AF2SNoeEb2T+0TEG4v2fx1wdbp8EfAHQA8QwP1p3aOVitcurN59BTpbm1i7pD3rUMxsBpW8grgG2BURuyNiBLgbuHGW/W8CPpou/zTw2Yg4kiaFzwLXVzBWu8By+QJXruqioUFZh2JmM6hkglgDPFG0victO4Ok9cBG4N/Ppq6kmyXtlLSzr6/vvARtlTcxETy033NAmFW7aumk3gbcExHjZ1MpIu6MiJ6I6Onu7q5QaHa+7Tk6yMDwmBOEWZWrZILYC6wrWl+blpWyjVPNS2db12pMb/444DuYzKpdJRPEDmCTpI2SWkiSwPbpO0naDCwBvlpUfB9wnaQlkpYA16VlVgd68/00CK5Y4SE2zKpZxe5iiogxSbeQnNgbgbsi4kFJtwE7I2IyWWwD7o6IKKp7RNLbSZIMwG0RcaRSsdqFlcsX2Lisg/aWxqxDMbNZVCxBAETEvcC908reOm39bTPUvQu4q2LBWWZy+QJb1y3OOgwzm0O1dFLbPHF8cJQ9Rwfd/2BWA5wg7IJ6yE9Qm9UMJwi7oKaG2PAscmZVb8Y+CEkvKqP+UNrPYFaWXL6fizpaWN7VmnUoZjaH2Tqp3wf8EzDbWAjPYVontNlscvuTITYkD7FhVu1mSxD/EhG/NltlSR86z/FYHRsbn+Ch/f28/Fnrsw7FzMowYx9ERLxsrsrl7GM26bFDJxgZm/AdTGY1ouxOakmXSfqQpE9KenYlg7L61OsOarOaMlsndVtEDBUVvR14c7r8aWBrBeOyOpTL99PcKC7t7sw6FDMrw2xXEJ+W9PKi9VFgA7AeOKtRV80gucX1suVdtDT57mqzWjDbX+r1wEJJn5H0HOB3SSby+QXgly9EcFZfetNJgsysNszYxJTOzfCXkv4e+F/AbwK/HxGPXqjgrH4cGhimr3/YT1Cb1ZDZ+iB+GHgTMAL8MTAI/JGkvcDbI+LYBYnQ6kLOQ2yY1ZzZnoN4L/CzQCfwNxFxLbBN0o8DHyNpbjIry2SC8C2uZrVjtgQxRtIp3UFyFQFARHwR+GJlw7J6k8v3s3JhG0s6WrIOxczKNFuCeCnwapLk8PJZ9jObU+8+d1Cb1ZrZOqm/B/zOBYzF6tTw2DiP9g3wvC3Lsw7FzM7CjLe5SvrnuSqXs4/ZIwcGGJsI9z+Y1ZjZmph+VNL2WbYL2HKe47E65A5qs9o0W4K4sYz6I7NtlHQ98G6gEXh/RNxeYp+XAG8DAvhWRLw0LR8HvpPu9oOIuKGMeKwK9eYLtDU3sGFpR9ahmNlZmK0P4pzuVJLUCNwBPB/YA+yQtD0ieov22QS8Bbg2Io5KKm6kHoyIrecSg1WHXL7AFSsX0tjgOSDMakklB8W5BtgVEbsjYgS4mzOvSn4DuCMijgJExMEKxmMZiAhy+X4/IGdWgyqZINYATxSt70nLil0OXC7py5K+ljZJTWqTtDMtf2GpN5B0c7rPzr6+vvMavJ0f+eNDHB8cZYtvcTWrOXMmCEk/L6lSiaQJ2AT8BHAT8D5Ji9Nt6yOih+R5jHdJunR65Yi4MyJ6IqKnu7u7QiHauXAHtVntKufE/0vAI5L+VNLmszj2XmBd0fratKzYHmB7RIxGxGPA90gSBhGxN/25G/gCcPVZvLdVid59SYLY7ARhVnPmTBDptKJXA48Cfyvpq2nTzlxtBjuATZI2SmoBtgHTb5v9FMnVA5KWkTQ57Za0RFJrUfm1QC9Wc3L7C6xfuoDO1tlumDOzalRW01FEFIB7SDqaV5HMCfENSa+bpc4YcAtwH5ADPh4RD0q6TdLkLav3AYcl9QKfB94UEYeBK4Gdkr6Vlt9efPeT1Y5cvp8rV/rqwawWzfm1Lj2Z/ypwGfB3wDURcVDSApJv9e+ZqW5E3AvcO63srUXLAfx2+ire5yvAVeX/GlaNTo6M8fjhE7xw6/R7E8ysFpRz3f9i4C8i4j+KCyPipKRfr0xYVg8e2t9PBB6kz6xGlZMg3gbkJ1cktQMrIuLxiPi3SgVmtW+yg9p3MJnVpnL6ID4BTBStj6dlZrPK5Qt0tTWxdkl71qGY2ZNQToJoSp+EBiBd9qwvNqdcvsCVqxYieYgNs1pUToLoK7rrCEk3AocqF5LVg4mJ4KH9HmLDrJaV0wfxGuDDkv6SZIjvJ/AMczaH7x85ycmRcXdQm9WwORNERDwKPEtSZ7o+UPGorOZ5iA2z2lfW462Sfg54CskAegBExG0VjMtqXC5foLFBXL7CVxBmtaqcwfr+mmQ8pteRNDH9N2B9heOyGpfLF7hkWQdtzY1Zh2JmT1I5ndQ/EhEvB45GxB8CzyYZM8lsRrl8v5uXzGpcOQliKP15UtJqYJRkPCazko6dHGHvsUEnCLMaV04fxKfTORr+DPgGydzR76tkUFbbcvl+ALasdoIwq2WzJoh0oqB/i4hjwCcl/TPQFhHHL0RwVptO3cHkDmqzWjZrE1NETAB3FK0POznYXHL5Ass6W1je1ZZ1KGZ2Dsrpg/g3SS+Wx0uwMvWmQ2yYWW0rJ0G8mmRwvmFJBUn9kgoVjstq1Oj4BI8cGHCCMKsD5TxJ7YZkK9vuvhOMjE94DCazOlDOjHLPKVU+fQIhM/AQG2b1pJwmpjcVvf4X8GmSSYTmJOl6SQ9L2iXp1hn2eYmkXkkPSvpIUfkrJD2Svl5RzvtZ9nL5Ai2NDVzS3ZF1KGZ2jsppYvr54nVJ64B3zVVPUiPJHVDPB/YAOyRtj4jeon02AW8Bro2Io5KWp+UXAX8A9JA8d3F/Wvdoub+YZaM3X2DTik6aG8v57mFm1ezJ/BXvAa4sY79rgF0RsTudZOhu4MZp+/wGcMfkiT8iDqblPw18NiKOpNs+C1z/JGK1CyznO5jM6kY5fRDvIfkWD0lC2UryRPVc1pDMHTFpD/DD0/a5PH2PLwONwNsi4jMz1F1TIrabgZsBLr744jJCsko62D/EoYERd1Cb1YlyhtrYWbQ8Bnw0Ir58Ht9/E/ATwFrgPyRdVW7liLgTuBOgp6cn5tjdKmxyiA1fQZjVh3ISxD3AUESMQ9K3IGlBRJyco95eYF3R+tq0rNge4OsRMQo8Jul7JAljL0nSKK77hTJitQxN3sHkKwiz+lDWk9RAe9F6O/C5MurtADZJ2iipBdgGbJ+2z6dIE4GkZSRNTruB+4DrJC2RtAS4Li2zKta7r8DqRW0sWtCcdShmdh6UcwXRVjzNaEQMSFowV6WIGJN0C8mJvRG4KyIelHQbsDMitnMqEfQC48CbIuIwgKS3kyQZgNsi4shZ/WZ2weXyBY/galZHykkQJyQ9IyK+ASDpmcBgOQePiHuBe6eVvbVoOYDfTl/T694F3FXO+1j2hkbH2X3oBNc/dWXWoZjZeVJOgvgt4BOS9pFMObqSZApSsymPHBhgfCLcQW1WR8p5UG6HpM3AFWnRw2mnstmU3nwyCrwThFn9mLOTWtJrgY6I+G5EfBfolPTfKx+a1ZJcvp8FLY2sv2jO7ikzqxHl3MX0G+mMcgCkTzb/RsUisprUmy+weWUXDQ2eNsSsXpSTIBqLJwtKx1hqqVxIVmsiwkNsmNWhcjqpPwN8TNJ70/VXp2VmAOw9Nkj/0JgThFmdKSdB/A+S8Y5+M13/LPC+ikVkNad3n+eAMKtHczYxRcRERPx1RPxiRPwi0Au8p/KhWa3I5fuRYPNKTz5oVk/KuYJA0tXATcBLgMeAf6hkUFZbcvkCG5Z20NFa1n8nM6sRM/5FS7qcJCncBBwCPgYoIn7yAsVmNSK3v8BTPMSGWd2ZrYnpIeCngBdExI9GxHtIxksymzIwPMb3D5/kypVOEGb1ZrYE8SIgD3xe0vskPZdkqA2zKQ/l3UFtVq9mTBAR8amI2AZsBj5PMibTckl/Jem6CxSfVbmpOSDcxGRWd8q5i+lERHwkIn6eZOKeb5Lc+mpGb76fRe3NrFrUlnUoZnaelfMk9ZSIOBoRd0bEcysVkNWW5AnqLooetjezOnFWCcKs2PhE8NB+D7FhVq+cIOxJe/zwCYZGJ5wgzOqUE4Q9aVMd1E4QZnXJCcKetFy+QFOD2LSiM+tQzKwCKpogJF0v6WFJuyTdWmL7KyX1SXogfb2qaNt4Ufn2SsZpT04u38+l3Z20NjVmHYqZVUDFBs9J5424A3g+sAfYIWl7RPRO2/VjEXFLiUMMRsTWSsVn5653X4FnXXJR1mGYWYVU8griGmBXROyOiBHgbuDGCr6fXUBHT4ywvzDkDmqzOlbJBLEGeKJofU9aNt2LJX1b0j2S1hWVt0naKelrkl5Y6g0k3Zzus7Ovr+/8RW5z8hPUZvUv607qTwMbIuJpJBMRfbBo2/qI6AFeCrxL0qXTK6cP7fVERE93d/eFidiAZA5q8BhMZvWskgliL1B8RbA2LZsSEYcjYjhdfT/wzKJte9Ofu4EvAFdXMFY7S735At1drSzrbM06FDOrkEomiB3AJkkbJbUA24DT7kaStKpo9QYgl5YvkdSaLi8DriWZyc6qRC7f76sHszpXsbuYImJM0i3AfUAjcFdEPCjpNmBnRGwHXi/pBmAMOAK8Mq1+JfBeSRMkSez2Enc/WUZGxibYdbCfH7/czXpm9ayic0RGxL3AvdPK3lq0/BbgLSXqfQW4qpKx2ZP3aN8Ao+PBlas8B7VZPcu6k9pqkIfYMJsfnCDsrPXuK9DS1MDGZR1Zh2JmFeQEYWctt7/AFSu6aGr0fx+zeua/cDsrEUEu3+/mJbN5wAnCzsrB/mGOnBhxB7XZPOAEYWfFT1CbzR9OEHZWevclCWKzE4RZ3XOCsLOSyxdYs7idRe3NWYdiZhXmBGFnJZcveARXs3nCCcLKNjgyzmOHTrj/wWyecIKwsj18oJ+JgC2+g8lsXnCCsLLlfAeT2bziBGFly+ULdLY2sW7JgqxDMbMLwAnCypbLF9i8souGBmUdipldAE4QVpaJifAkQWbzjBOElWXP0UEGhsecIMzmEScIK8upITZ8B5PZfOEEYWXJ5Qs0CDav9BWE2XzhBGFlyeULbFjWQXtLY9ahmNkFUtEEIel6SQ9L2iXp1hLbXympT9ID6etVRdteIemR9PWKSsZpc+vNF9z/YDbPNFXqwJIagTuA5wN7gB2StkdE77RdPxYRt0yrexHwB0APEMD9ad2jlYrXZlYYGmXP0UFuuubirEMxswuoklcQ1wC7ImJ3RIwAdwM3lln3p4HPRsSRNCl8Fri+QnHaHB7K9wN4FjmzeaaSCWIN8ETR+p60bLoXS/q2pHskrTubupJulrRT0s6+vr7zFbdN4yE2zOanrDupPw1siIinkVwlfPBsKkfEnRHRExE93d3dFQnQkgSxZEEzKxa2Zh2KmV1AlUwQe4F1Retr07IpEXE4IobT1fcDzyy3rl04kx3UkofYMJtPKpkgdgCbJG2U1AJsA7YX7yBpVdHqDUAuXb4PuE7SEklLgOvSMrvAxsYneHi/h9gwm48qdhdTRIxJuoXkxN4I3BURD0q6DdgZEduB10u6ARgDjgCvTOsekfR2kiQDcFtEHKlUrDazxw+fYHhswh3UZvNQxRIEQETcC9w7reytRctvAd4yQ927gLsqGZ/NrTe9g8lXEGbzT9ad1FblcvkCzY3isuWdWYdiZheYE4TNqndfgUu7O2lp8n8Vs/nGf/U2q1y+4P4Hs3nKCcJmdHhgmIP9w2xZ7QRhNh85QdiMcu6gNpvXnCBsRr3544AThNl85QRhM8rl+1mxsJWLOlqyDsXMMuAEYTNyB7XZ/OYEYSUNj42z6+CAm5fM5jEnCCtp18EBxibCCcJsHnOCsJJ693kOCLP5zgnCSsrl+2lrbmDjso6sQzGzjDhBWEm5fIErVi6kscFzQJjNV04QdoaIILe/wJZVXVmHYmYZcoKwM+wvDHHs5Kj7H8zmOScIO4M7qM0MnCCshFw+SRCbV7qJyWw+c4KwM+Ty/Vx80QK62pqzDsXMMlTRBCHpekkPS9ol6dZZ9nuxpJDUk65vkDQo6YH09deVjNNOl8sXuNId1GbzXsXmpJbUCNwBPB/YA+yQtD0ieqft1wW8Afj6tEM8GhFbKxWflXZyZIzHDp/ghq2rsw7FzDJWySuIa4BdEbE7IkaAu4EbS+z3duBPgKEKxmJlemh/PxHuoDazCl5BAGuAJ4rW9wA/XLyDpGcA6yLi/0l607T6GyV9EygAvx8RX6pEkEdPjPCC9/znaWXSDMtohvLi/VWyfHrBTHUiIvk59U/yIyImV4mAIIg4tV5cP4rKJvcrrju5Nlle/J6jYxMAHsXVzCqaIGYlqQF4J/DKEpvzwMURcVjSM4FPSXpKRBSmHeNm4GaAiy+++EnF0dQonnXJ0qn1U6dhmGFx6oR6Znnp/Werc/p7RJKE0nwhkuQxmT6kU2WT20/tq6ntp/bVVCJTurNOO/appFd87NWL21i7pB0zm98qmSD2AuuK1temZZO6gKcCX0hPeCuB7ZJuiIidwDBARNwv6VHgcmBn8RtExJ3AnQA9PT3Tz8ll6Wpr5h0vefqTqWpmVtcq2QexA9gkaaOkFmAbsH1yY0Qcj4hlEbEhIjYAXwNuiIidkrrTTm4kXQJsAnZXMFYzM5umYlcQETEm6RbgPqARuCsiHpR0G7AzIrbPUv05wG2SRoEJ4DURcaRSsZqZ2ZlU3DZey3p6emLnzp1z72hmZlMk3R8RPaW2+UlqMzMryQnCzMxKcoIwM7OSnCDMzKwkJwgzMyupbu5iktQHfP8cDrEMOHSewql1/ixO58/jdP48TqmHz2J9RHSX2lA3CeJcSdo5061e840/i9P58zidP49T6v2zcBOTmZmV5ARhZmYlOUGccmfWAVQRfxan8+dxOn8ep9T1Z+E+CDMzK8lXEGZmVpIThJmZlTTvE4Sk6yU9LGmXpFuzjidLktZJ+rykXkkPSnpD1jFlTVKjpG9K+uesY8mapMWS7pH0kKScpGdnHVOWJL0x/Tv5rqSPSmrLOqbzbV4niHRSojuAnwG2ADdJ2pJtVJkaA34nIrYAzwJeO88/D4A3ALmsg6gS7wY+ExGbgaczjz8XSWuA1wM9EfFUkjlvtmUb1fk3rxMEcA2wKyJ2R8QIcDdwY8YxZSYi8hHxjXS5n+QEsCbbqLIjaS3wc8D7s44la5IWkUzk9QGAiBiJiGOZBpW9JqBdUhOwANiXcTzn3XxPEGuAJ4rW9zCPT4jFJG0Arga+nnEoWXoX8GaSWQ3nu41AH/A3aZPb+yV1ZB1UViJiL/DnwA+APHA8Iv4126jOv/meIKwESZ3AJ4HfiohC1vFkQdILgIMRcX/WsVSJJuAZwF9FxNXACWDe9tlJWkLS2rARWA10SHpZtlGdf/M9QewF1hWtr03L5i1JzSTJ4cMR8Q9Zx5Oha4EbJD1O0vT4U5I+lG1ImdoD7ImIySvKe0gSxnz1POCxiOiLiFHgH4AfyTim826+J4gdwCZJGyW1kHQybc84psxIEkkbcy4i3pl1PFmKiLdExNqI2EDy/+LfI6LuviGWKyL2A09IuiItei7Qm2FIWfsB8CxJC9K/m+dSh532TVkHkKWIGJN0C3AfyV0Id0XEgxmHlaVrgV8BviPpgbTs9yLi3uxCsiryOuDD6Zep3cCvZhxPZiLi65LuAb5BcvffN6nDYTc81IaZmZU035uYzMxsBk4QZmZWkhOEmZmV5ARhZmYlOUGYmVlJThBmKUkD6c8Nkl56no/9e9PWv3I+j29WCU4QZmfaAJxVgkgHbJvNaQkiIuruqVurP04QZme6HfgxSQ+kY/43SvozSTskfVvSqwEk/YSkL0naTvpUsaRPSbo/nSfg5rTsdpJRPx+Q9OG0bPJqRemxvyvpO5J+qejYXyiaf+HD6RO7SLo9nbPj25L+/IJ/OjZvzOsnqc1mcCvwuxHxAoD0RH88In5IUivwZUmTI3c+A3hqRDyWrv9aRByR1A7skPTJiLhV0i0RsbXEe70I2Eoyv8KytM5/pNuuBp5CMoz0l4FrJeWAXwA2R0RIWnx+f3WzU3wFYTa364CXp8OPfB1YCmxKt/1XUXIAeL2kbwFfIxkIchOz+1HgoxExHhEHgC8CP1R07D0RMQE8QNL0dRwYAj4g6UXAyXP83cxm5ARhNjcBr4uIrelrY9HY/yemdpJ+gmSUz2dHxNNJxuc5l2koh4uWx4GmiBgjmejqHuAFwGfO4fhms3KCMDtTP9BVtH4f8JvpUOhIunyGyXIWAUcj4qSkzSTTtk4anaw/zZeAX0r7ObpJZm37r5kCS+fqWJQOoPhGkqYps4pwH4TZmb4NjKdNRX9LMhfzBuAbaUdxH/DCEvU+A7wm7Sd4mKSZadKdwLclfSMifrmo/B+BZwPfAgJ4c0TsTxNMKV3AP0lqI7my+e0n9RualcGjuZqZWUluYjIzs5KcIMzMrCQnCDMzK8kJwszMSnKCMDOzkpwgzMysJCcIMzMr6f8DtnEhqj6H3isAAAAASUVORK5CYII=\n",
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEWCAYAAAB8LwAVAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/d3fzzAAAACXBIWXMAAAsTAAALEwEAmpwYAAAot0lEQVR4nO3deXxcZ33v8c9Xuyx5X+LEexI7djYSUMOSsrQ0kFJIKFDqpBRyWxrgEkqhhZv09lIaXuXSjcItebUNNMAtS6CBUtObktKyFgq1Q0xCrHHiOIttNIls2dZYsvbf/eMcyWN5JI1tjWak+b5fr3nNOc9Z5qeJc37znOc5z6OIwMzMbLyacgdgZmaVyQnCzMwKcoIwM7OCnCDMzKwgJwgzMyvICcLMzApygjAzs4KcIGzWk3Qs7zUi6Xje+q+dwfm+JenNpYjVbDapK3cAZmcrIlpHlyU9Abw5Iv6tfBGVlqS6iBgqdxw297kGYXOWpBpJt0p6TNIhSV+UtCTd1iTpM2n5EUnbJZ0j6Y+BFwIfS2sgH5vg3P8gKSvpqKTvSLokb1uzpL+Q9GS6/T8kNafbflbS99PP3CfpprT8pFqLpJsk/Ufeekh6u6RHgUfTso+m5+iWdL+kF+btXyvp99O/PZduXyPpDkl/Me5v2SbpXWf/jdtc4wRhc9k7gFcDLwbOAw4Dd6Tb3gQsBNYAS4G3Ascj4n8C3wVuiYjWiLhlgnP/C7ARWAH8CPhs3rY/B54DvABYArwXGJG0Lj3ur4DlwBXAztP4e14NPBe4OF3fnp5jCfA54B8kNaXb3g3cALwCWAD8BtALfBq4QVINgKRlwC+kx5udxLeYbC57K8mFfj+ApPcDT0n6dWCQJDFcGBEPAvefzokj4q7R5fS8hyUtBHIkF+PnRcSBdJfvp/vdCPxbRHw+LT+Uvor1vyOiKy+Gz+Rt+wtJfwBcBPwYeDPw3ojYnW7/8ehnSjoKvBT4OrAV+FZEPH0acViVcA3C5rJ1wD+mt3OOAO3AMHAO8PfAfcDdkn4q6U8l1Rdz0vT2zYfS2zfdwBPppmXpqwl4rMChayYoL9a+cXH8nqT29DbWEZIa0bIiPuvTwBvS5TeQfBdmp3CCsLlsH/CLEbEo79UUEQciYjAi/igiLia5FfRK4I3pcVMNcXwjcD3JrZmFwPq0XMBBoA+4YIJ4CpUD9ADz8tZXFthnLK60veG9wOuBxRGxCDiaxjDVZ30GuF7Ss4AtwFcm2M+qnBOEzWV/A/xxeu8fScslXZ8u/5ykyyTVAt0kt5xG0uOeBs6f5LzzgX6S20PzgA+OboiIEeAu4MOSzktrG8+X1EjSTvELkl4vqU7SUklXpIfuBF4jaZ6kC4HfnOJvmw8MAZ1AnaT3kbQ1jPoE8AFJG5W4XNLSNMb9JO0Xfw98KSKOT/FZVqWcIGwu+yiwDfhXSTngBySNvJD8Qr+HJDm0A9/mxK2WjwKvk3RY0v8pcN7/CzwJHAB2pefN93vAQyQX4S7gT4CaiHiKpNH4d9PyncCz0mP+EhggSU6f5uRG70LuA74GPJLG0sfJt6A+DHwR+Nf0b/w7oDlv+6eBy/DtJZuEPGGQWfWR9CKSW03rwhcBm4BrEGZVJm2MfyfwCScHm4wThFkVkbQFOAKcC3ykrMFYxfMtJjMzK8g1CDMzK2jOPEm9bNmyWL9+fbnDMDObVe6///6DEbG80LY5kyDWr1/Pjh07yh2GmdmsIunJibb5FpOZmRXkBGFmZgU5QZiZWUFOEGZmVpAThJmZFeQEYWZmBZU0QUi6VtJuSXsk3Vpg+1pJ35T0gKQHJb0ib9tt6XG7Jb28lHGamdmpSvYcRDrO/h3ANcB+YLukbRGxK2+3PwC+GBF/Leli4F5gfbq8FbiEZC7hf5O0KSKGSxWvmRUWETzd3c+D+4/w6DPHaKyrYdG8BhbPqx97XzyvgYXN9dTUaOoT2qxRygflrgL2RMReAEl3k8zClZ8gghOTnCwEfpouXw/cHRH9wOOS9qTn+88SxmtmwDPdfTy4/ygPHTjx6sz1T3mcBAubk2SxaN7J7yeSSd5yS7Ktqb52Bv4qOxOlTBCrOHkCk/2cmKxl1PtJJnN5B9BCMoXj6LH5k7DsT8tOIulm4GaAtWvXTkvQZtWkM9fPTw4cTRPCER46cJSnu5NkUCO4YHkrL9y4jMtXLeSy1QvZvHIBQ8PB4d4BjhwfTN57BzjcM5i8946WDfJ0dx+7szkO9w7QOzBx5b+pviZNJg0saq5nccvJNZNCtZUFzfXUVkFtpW9wmEM9A3QdG+BQTz+Hjg3Q1TPAwZ5+usaWB1i/dB4f3XrltH9+uYfauAH4VET8haTnA38v6dJiD46IO4E7Adra2jwsrdkkDh3rT2oEebWDjqN9QPLr//xlLbzggmVcliaDi89dQEtj4UvEwnn1p/XZ/UPDHEmTx6nJJFk+kiaW3dkcR3oHOXJ8kOGRwv9bS9DaUMf8pjrmN9WzoDl5T9ZPLC8Y9z5W3lxPS0Mt0swmmb7BYbp6BjiUXvBPLA/QlSaAQz3ptmMD9EyQWOtrxdKWRpa0NLC0tYFzFzYX3O9slTJBHADW5K2vTsvy/SZwLUBE/KekJmBZkcea2QQO9wycuEWUJoQDR05MPX3+shau2rAkSQarFnLJqoW0TpAMpkNjXS3nLKjlnAVNRR8zMhLk+odOTSY9SfLI9Q3SfXyIXN8gub4hnsn18VjnELm+IbqPDzI0QXIZVSNobTw5mUyVaOY31bMgr7y2RnT1JL/kD/UMcOhY/ynLB9Nf+l09AxzrHyoYS32tkot9SyNLWxtYt3Te2PLSloaxRLC0pZElrQ3Mb6ybkeRWygSxHdgoaQPJxX0rcOO4fZ4CXgp8Kp3IpIlkEvZtwOckfZikkXoj8F8ljNVszOGeAXbuP8IDTx1h574j9PYPsaA5uTAk78mFJHk/dX1+Ux31tTPXg/xo7yA/+enJt4n2dZ1IBuuXzuPZ6xbzphes47JVi7hk1QIWNJ1eDaAcamrEwuZ6FjbXs27p6R0bEfQNjiRJpO9EEulO33Nj70nZaKL56ZE+cv25sW0T1WCmUlcjlrY2sKSlkaUtyQV/SUsDy1qTX/3JcrJ9SUsDC5pm5oJ/ukqWICJiSNItJJOr1wJ3RcTDkm4HdkTENpLJ2z8u6V0kDdY3pVMgPizpiyQN2kPA292DyUphcHiE3dkcDzx1mAeeOsID+47w+MEeIPmFuemc+SxpaeCZXB97nhm9mAwy1XVjXkNtgURy9gmmu2+Qn4y7TfTkod6x7WuXzOPyVYv4teeu4/K0ZrCwufKTwXSTRHNDLc0NtaxYMPX+hUQEvQPDYwmlUKIZHgkWz2sY+6W/tLWyL/ina87MKNfW1hYe7tumkj3alySDfUd44KnDPHTgKH2DIwAsa23kyrWLkteaxVy+emHBe/CjF47RX56jSWNsPX+5b7DAflP/Mh2fYFoa63iqq3cseQGsXtw81l5w+apFXLpqAYvmNUzvF2ZznqT7I6Kt0LZyN1KblUzf4DAPHTjKA08dZue+5JbRaKNsQ20Nl6xawI1XrRtLCqsWNRf1q08SLY11tDTWce7C04/rTBLM4d4BNp3Tyuues5pL03aDJS1OBlZaThB2kogg291Ha2MdrTPUEDYdIoInDvWyc196q+ipI7R3dI81VK5Z0szPrF+SJoPFbDl3Po115el/f7YJxmymOEHYSe7evo/bvvwQkPzKXtxSnzakpe/z0vfWBpbMaxhrcFvSkvRRr5uhxtnuvkF+nNYKRmsIh3sHAWhpqOVZaxbxlhefz5VrFnPF2kUsa22ckbjM5hInCDvJ/U8eZvG8et72kgvo6hmkK+2r3dUzwEOHj9DVM0B3X+GuepA8STvaLW9xS9JwN/q+ZNzykpYG5jVM/U9weCR45OncWDJ4YN8RHus8RkTSH37jilZedvFKrkhvFW1cMb8qHqIyKzUnCDtJJtvNpasWcvOLLphwn8HhEQ73DNDVmzzh2dU7MPbAz+HepA/44Z4B9nX1Jr/sewYm7JPeVF+T1ERGu/zNO1Fj6RkYZudTR/jx/iNjT+IuaWngyjWLuP5Z53Hl2sVcvmbhrOiyaTYbOUHYmKHhER55+hg3vWD9pPvV19awYkETK4p86Cki6O4bGquJdKUJZPTp0bGaSu8gjx88NvYEaV2NuOS8BfzKc1Zz5drFXLl2EWuXzJs17SJms50ThI154lAPA0MjbF45f1rPK5144GnDspaijukbHEaibA3JZuYEYXnaO3IAbF55hk8WTSOP8GlWfp5RzsZkst3U1YgLVhT3K9/M5jYnCBuT6chxwfJW39YxM8AJwvJksjk2nzu97Q9mNns5QRgAR48PcuDI8YpofzCzyuAEYQDszqYN1K5BmFnKCcIA2J3tBmCLaxBmlnKCMADaszkWzavnnAUes8jMEk4QBkCmo5vNK+f7KWUzG+MEYYyMBLuzOTdQm9lJnCCM/YeP0zMwPO1DbJjZ7OYEYbSnDdSbz3UNwsxOKGmCkHStpN2S9ki6tcD2v5S0M309IulI3rbhvG3bShlntct05JBg0zmt5Q7FzCpIyQbrk1QL3AFcA+wHtkvaFhG7RveJiHfl7f8O4Mq8UxyPiCtKFZ+dkMl2s35pS1GT95hZ9ShlDeIqYE9E7I2IAeBu4PpJ9r8B+HwJ47EJZLI5tz+Y2SlKmSBWAfvy1venZaeQtA7YAHwjr7hJ0g5JP5D06gmOuzndZ0dnZ+c0hV1degeGeOJQj3swmdkpKqWReitwT0QM55Wti4g24EbgI5JOmQMzIu6MiLaIaFu+fPlMxTqnPPJ0Mrezh9gws/FKmSAOAGvy1lenZYVsZdztpYg4kL7vBb7Fye0TNk0yHR5iw8wKK2WC2A5slLRBUgNJEjilN5KkzcBi4D/zyhZLakyXlwFXA7vGH2tnL5PN0dJQy+rFzeUOxcwqTMm6rUTEkKRbgPuAWuCuiHhY0u3AjogYTRZbgbsjIvIO3wL8raQRkiT2ofzeTzZ92ju6uWjlfGpqPMSGmZ2spP0aI+Je4N5xZe8bt/7+Asd9H7islLEZRASZbI5fuvzccodiZhWoUhqprQyy3X0cPT7IFndxNbMCnCCqWKZjdJIgN1Cb2amcIKrY6BhMF7kGYWYFOEFUsUxHjlWLmlnQVF/uUMysAjlBVLFMtpstfkDOzCbgBFGl+oeGeazTQ2yY2cScIKrUnmeOMTwSHmLDzCbkBFGlxnowuQZhZhNwgqhSmWw3jXU1rF86r9yhmFmFcoKoUplsjk3nzKeu1v8EzKwwXx2qVHuHJwkys8k5QVShzlw/B4/1+wlqM5uUE0QV2p1NGqg9BpOZTcYJogplPMSGmRXBCaIKtXfkWDG/kaWtjeUOxcwqmBNEFcpku93+YGZTcoKoMkPDIzz69DG3P5jZlJwgqszjB3sYGB7xEBtmNiUniCrTnvUQG2ZWnJImCEnXStotaY+kWwts/0tJO9PXI5KO5G17k6RH09ebShlnNcl0dFNXIy5Y3lruUMyswtWV6sSSaoE7gGuA/cB2SdsiYtfoPhHxrrz93wFcmS4vAf4QaAMCuD899nCp4q0WmWyOC1e00lDnyqOZTa6UV4mrgD0RsTciBoC7gesn2f8G4PPp8suBr0dEV5oUvg5cW8JYq0amo9tDbJhZUUqZIFYB+/LW96dlp5C0DtgAfON0jpV0s6QdknZ0dnZOS9Bz2dHeQX56tM9dXM2sKJVyn2ErcE9EDJ/OQRFxZ0S0RUTb8uXLSxTa3DH6BLVrEGZWjFImiAPAmrz11WlZIVs5cXvpdI+1ImVGx2ByDcLMilDKBLEd2Chpg6QGkiSwbfxOkjYDi4H/zCu+D3iZpMWSFgMvS8vsLGSy3SyeV8+K+R5iw8ymVrJeTBExJOkWkgt7LXBXRDws6XZgR0SMJoutwN0REXnHdkn6AEmSAbg9IrpKFWu1yGRzbF65AEnlDsXMZoGSJQiAiLgXuHdc2fvGrb9/gmPvAu4qWXBVZmQk2J3N8as/s2bqnc3MqJxGaiuxfYd76R0YdgO1mRXNCaJKtHd4iA0zOz1OEFUik+1Ggk3nuAZhZsVxgqgSmY4cG5a20NxQW+5QzGyWcIKoEskkQa49mFnxJuzFJOk1RRzfl/ZUsgrW0z/Ek129vObZq8sdipnNIpN1c/048E/AZJ3mX8S4bqxWeR55OkeEh9gws9MzWYL4l4j4jckOlvSZaY7HSsBDbJjZmZiwDSIi3jDVwcXsY+WX6eimtbGOVYuayx2Kmc0iRTdSS7pQ0mckfUnS80sZlE2v9myOi1bOp6bGQ2yYWfEma6Ruioi+vKIPAO9Nl78KXFHCuGyaRASZjm5e9azzyh2Kmc0yk9UgvirpjXnrg8B6YB1wWvM2WPl0HO2ju2/IkwSZ2WmbLEFcCyyQ9DVJLwJ+j2Qq0F8Gfm0mgrOzNzpJ0Bb3YDKz0zThLaZ0drePSfp74H8BbwP+ICIem6ng7OyNjsG0yQnCzE7TZG0QzwXeAwwAHwSOA38s6QDwgYg4MiMR2lnJZHOsXtzMgqb6codiZrPMZM9B/C3wCqAV+GREXA1slfRi4Askt5uswmU6uj2Cq5mdkcnaIIY40Sg9MFoYEd+OCCeHWaBvcJi9B3vY4jGYzOwMTFaDuBF4C0lyeOMk+1mF2vPMMYZHwjUIMzsjkzVSPwL87gzGYtNsdIgNj+JqZmdiwltMkv55qoOn2kfStZJ2S9oj6dYJ9nm9pF2SHpb0ubzyYUk709e2qWKxU2U6ummsq2H90pZyh2Jms9Bkt5h+dooLs4CLJ9wo1QJ3ANcA+4HtkrZFxK68fTYCtwFXR8RhSSvyTnE8Iq4o4m+wCWTSITZqPcSGmZ2ByRLE9UUcPzDJtquAPRGxF0DS3ek5d+Xt81vAHRFxGCAininiM61ImWw3P795xdQ7mpkVMFkbxLfP8tyrgH156/uB547bZxOApO8BtcD7I+Jr6bYmSTtIelN9KCK+Mv4DJN0M3Aywdu3aswx3bunM9XPw2IAbqM3sjE1Wg5ipz98IvARYDXxH0mXpQ3jrIuKApPOBb0h6aPxT3BFxJ3AnQFtbW8xo5BVudIgNN1Cb2Zkq5ZzUB4A1eeur07J8+4FtETEYEY8Dj5AkDCLiQPq+F/gWcGUJY51zMukQG65BmNmZmjJBSHqVpDNJJNuBjZI2SGoAtgLjG72/QlJ7QNIykltOeyUtltSYV341J7dd2BTas92cs6CRJS0N5Q7FzGapYi78vwo8KulPJW0u9sQRMQTcAtwHtANfjIiHJd0u6bp0t/uAQ5J2Ad8E3hMRh4AtwA5JP07LP5Tf+8mmlunIufZgZmdlyjaIiHiDpAXADcCnJAXwSeDzEZGb4th7gXvHlb0vbzmAd6ev/H2+D1xW7B9hJxscHmHPM8d44aZl5Q7FzGaxom4dRUQ3cA9wN3AuyZwQP5L0jhLGZmfo8YM9DAyPsMU1CDM7C8W0QVwn6R9JGorrgasi4heBZ+GhOCpSe4d7MJnZ2Summ+trgb+MiO/kF0ZEr6TfLE1YdjYy2Rz1teL8Za3lDsXMZrFiEsT7gY7RFUnNwDkR8URE/HupArMzl+no5oLlrTTUlbIXs5nNdcVcQf4BGMlbH07LrEJlsjm2nOv2BzM7O8UkiLqIyJ8waABw5/oKdaR3gI6jfWz2HNRmdpaKSRCdec8tIOl64GDpQrKzcWIOCNcgzOzsFNMG8Vbgs5I+RjLE9z48w1zFyqQ9mLa4BmFmZ6mYB+UeA54nqTVdP1byqOyMZbI5lrQ0sHx+Y7lDMbNZrqjRXCX9EnAJyRDcAETE7SWMy85QezbH5pXzGf3vZGZ2pop5UO5vSMZjegfJLaZfAdaVOC47AyMjwSNZj8FkZtOjmEbqF0TEG4HDEfFHwPNJJ/qxyvJUVy/HB4f9BLWZTYtiEkRf+t4r6TxgkGQ8JqswY5MEuYHazKZBMW0QX5W0CPgz4EdAAB8vZVB2Zto7ctQINq5wgjCzszdpgkgnCvr3dArQL0n6Z6ApIo7ORHB2ejLZbtYva6G5obbcoZjZHDDpLaaIGAHuyFvvd3KoXJlszkN8m9m0KaYN4t8lvVbuN1nRevqHePJQr9sfzGzaFJMg3kIyOF+/pG5JOUndJY7LTtPupz3EhplNr2KepPZP0lkg05EmCNcgzGyaFPOg3IsKvYo5uaRrJe2WtEfSrRPs83pJuyQ9LOlzeeVvkvRo+npT8X9Sdcpku2ltrGP14uZyh2Jmc0Qx3Vzfk7fcBFwF3A/8/GQHSaolaeC+BtgPbJe0LSJ25e2zEbgNuDoiDktakZYvAf4QaCPpVnt/euzhov+yKpPp8BAbZja9pqxBRMSr8l7XAJcCxVyorwL2RMTedA6Ju4Hrx+3zW8Adoxf+iHgmLX858PWI6Eq3fR24trg/qfpEBO3Zbj9BbWbT6kzmpNwPbCliv1UkQ4PnH7dq3D6bgE2SvifpB5KuPY1jkXSzpB2SdnR2dhb9B8w1Pz3aR65vyGMwmdm0mvIWk6S/IrnNA0lCuYLkierp+vyNwEuA1cB3JF1W7MERcSdwJ0BbW1tMsfucNTYHhGsQZjaNimmD2JG3PAR8PiK+V8RxB4A1eeur07J8+4EfRsQg8LikR0gSxgGSpJF/7LeK+MyqNDqL3KZznCDMbPoUkyDuAfoiYhiSxmdJ8yKid4rjtgMbJW0gueBvBW4ct89XgBuAT0paRnLLaS/wGPBBSYvT/V5G0phtBbR3dLNmSTPzm+rLHYqZzSFFPUkN5PedbAb+baqDImIIuAW4D2gHvhgRD0u6PW+O6/uAQ5J2Ad8E3hMRhyKiC/gASZLZDtyellkBGc8BYWYlUEwNoil/mtGIOCZpXjEnj4h7gXvHlb0vbzmAd6ev8cfeBdxVzOdUs77BYfZ2HuMVl64sdyhmNscUU4PokfTs0RVJzwGOly4kOx17njnGSHiIDTObfsXUIH4H+AdJPyWZcnQlyRSkVgHaOzxJkJmVRjFjMW2XtBm4KC3anfY6sgqQyeZoqq9h3dKWcodiZnNMMWMxvR1oiYifRMRPgFZJ/730oVkxMtluLjpnPrU1HmLDzKZXMW0Qv5XOKAdAOvTFb5UsIitaRNDe4R5MZlYaxSSI2vzJgtJB+BpKF5IVq/NYP109Ax6DycxKophG6q8BX5D0t+n6W9IyK7MTc0C4BmFm06+YBPE/gJuBt6XrXwc+XrKIrGiZrHswmVnpFDPc90hE/E1EvC4iXgfsAv6q9KHZVDIdOVYuaGJxi+/4mdn0K6YGgaQrScZMej3wOPDlUgZlxWnP5tz+YGYlM2GCkLSJJCncABwEvgAoIn5uhmKzSQwOj7DnmRwv3rS83KGY2Rw1WQ0iA3wXeGVE7AGQ9K4ZicqmtLezh8Hh8BwQZlYyk7VBvAboAL4p6eOSXkoy1IZVgBMN1O7BZGalMWGCiIivRMRWYDPJUNy/A6yQ9NeSXjZD8dkE2jty1NeK85d7iA0zK41iejH1RMTnIuJVJDO7PUDS9dXKKJPt5sIV86mvPZNpxc3MpnZaV5eIOBwRd0bES0sVkBUn05Fji59/MLMS8s/PWehwzwDZ7j53cTWzknKCmIUyWQ+xYWal5wQxC431YHINwsxKqKQJQtK1knZL2iPp1gLbb5LUKWln+npz3rbhvPJtpYxztsl05Fja0sDy1sZyh2Jmc1hRQ22ciXRY8DuAa4D9wHZJ2yJi17hdvxARtxQ4xfGIuKJU8c1mmWw3m8+dT94o7GZm066UNYirgD0RsTciBoC7getL+HlVYXgk2P20Jwkys9IrZYJYBezLW9+flo33WkkPSrpH0pq88iZJOyT9QNKrC32ApJvTfXZ0dnZOX+QV7MlDPfQNjniIbzMruXI3Un8VWB8Rl5PMM/HpvG3rIqINuBH4iKQLxh+cPpPRFhFty5dXx6B1u9MeTFvOdQ3CzEqrlAniAJBfI1idlo2JiEMR0Z+ufgJ4Tt62A+n7XuBbwJUljHXWaM/mqBFcuKK13KGY2RxXygSxHdgoaYOkBmArcFJvJEnn5q1eB7Sn5YslNabLy4CrSSYqqnqZjm42LGuhqb623KGY2RxXsl5METEk6RbgPqAWuCsiHpZ0O7AjIrYBvy3pOmAI6AJuSg/fAvytpBGSJPahAr2fqlImm+Oy1QvLHYaZVYGSJQiAiLgXuHdc2fvylm8Dbitw3PeBy0oZ22x0rH+Ip7p6eX3b6nKHYmZVoNyN1HYadnuIDTObQU4Qs4iH2DCzmeQEMYtkOnLMb6xj1aLmcodiZlXACWIW8RAbZjaTnCBmiYgg0+EhNsxs5jhBzBIHjhwn1z/k9gczmzFOELNEpsM9mMxsZjlBzBKjPZgu8iB9ZjZDnCBmifZsjrVL5tHaWNJnG83MxjhBzBKZjm4P8W1mM8oJYhboGxzm8YM9bPYQ32Y2g5wgZoFHnz7GSMAW1yDMbAY5QcwC7WNDbLgGYWYzxwliFsh05Giur2XtknnlDsXMqogTxCyQyXazaeV8ams8xIaZzRwniAoXEbR3dLv9wcxmnBNEhevM9XO4d9BdXM1sxjlBVLj20UmC3EBtZjPMCaLCZTrSHkyuQZjZDCtpgpB0raTdkvZIurXA9pskdUramb7enLftTZIeTV9vKmWclSyTzXHuwiYWzWsodyhmVmVKNrCPpFrgDuAaYD+wXdK2iNg1btcvRMQt445dAvwh0AYEcH967OFSxVup2j3EhpmVSSlrEFcBeyJib0QMAHcD1xd57MuBr0dEV5oUvg5cW6I4K9bA0AiPdR5z+4OZlUUpE8QqYF/e+v60bLzXSnpQ0j2S1pzOsZJulrRD0o7Ozs7pirti7D14jMHhcA3CzMqi3I3UXwXWR8TlJLWET5/OwRFxZ0S0RUTb8uXLSxJgOY1OErTFNQgzK4NSJogDwJq89dVp2ZiIOBQR/enqJ4DnFHtsNWjPdtNQW8OGZS3lDsXMqlApE8R2YKOkDZIagK3AtvwdJJ2bt3od0J4u3we8TNJiSYuBl6VlVSXTkePCFa3U15a7omdm1ahkvZgiYkjSLSQX9lrgroh4WNLtwI6I2Ab8tqTrgCGgC7gpPbZL0gdIkgzA7RHRVapYK1Um283VFy4rdxhmVqVKOn9lRNwL3Duu7H15y7cBt01w7F3AXaWMr5J19QzwdHc/W1a6/cHMysP3LipUZmwOCPdgMrPycIKoUKM9mDa7BmFmZeIEUaEy2W6WtTawfH5juUMxsyrlBFGhMtmcaw9mVlZOEBVoeCTYnc35CWozKysniAr0xKEe+odGPAaTmZWVE0QFOtFA7RqEmZWPE0QFymS7qa0RF65oLXcoZlbFnCAqUCab4/xlLTTV15Y7FDOrYk4QFSiT7eYi314yszJzgqgwub5B9nUd9xDfZlZ2ThAV5pGn3UBtZpXBCaLCtI/2YHINwszKzAmiwmSy3cxvquO8hU3lDsXMqpwTRIXJdOTYsnIBksodiplVOSeIChIRyRhMHuLbzCqAE0QF2X/4OMf6hzxIn5lVBCeICpLJjjZQuwZhZuXnBFFBMh3JLHIXneMEYWblV9IEIelaSbsl7ZF06yT7vVZSSGpL19dLOi5pZ/r6m1LGWSky2Rzrls6jpbGkU4WbmRWlZFciSbXAHcA1wH5gu6RtEbFr3H7zgXcCPxx3isci4opSxVeJ2rPdfkDOzCpGKX+qXgXsiYi9AJLuBq4Hdo3b7wPAnwDvKWEsExoeCQ4e60eCGil9gdL30bIT2xlbn86uqMcHhnniYA+vuvy8aTunmdnZKGWCWAXsy1vfDzw3fwdJzwbWRMT/kzQ+QWyQ9ADQDfxBRHy3FEEe6R3guR/89zM6Nj9pTJ1Q8ren+9ec2H9oZISRgC1uoDazClG2m92SaoAPAzcV2NwBrI2IQ5KeA3xF0iUR0T3uHDcDNwOsXbv2jOJoaazjg798GSMRRAQjASPpe7J+oiwCRkby14vYP68sIhgZOXX/4XS/521YytUXLjujv8PMbLqVMkEcANbkra9Oy0bNBy4FvpXeqlkJbJN0XUTsAPoBIuJ+SY8Bm4Ad+R8QEXcCdwK0tbXFmQTZVF/Ljc89s+RiZjaXlbIX03Zgo6QNkhqArcC20Y0RcTQilkXE+ohYD/wAuC4idkhanjZyI+l8YCOwt4SxmpnZOCWrQUTEkKRbgPuAWuCuiHhY0u3AjojYNsnhLwJulzQIjABvjYiuUsVqZmanUsQZ3ZmpOG1tbbFjx46pdzQzszGS7o+ItkLb/CS1mZkV5ARhZmYFOUGYmVlBThBmZlaQE4SZmRU0Z3oxSeoEnjyLUywDDk5TOLOdv4uT+fs4mb+PE+bCd7EuIpYX2jBnEsTZkrRjoq5e1cbfxcn8fZzM38cJc/278C0mMzMryAnCzMwKcoI44c5yB1BB/F2czN/Hyfx9nDCnvwu3QZiZWUGuQZiZWUFOEGZmVlDVJwhJ10raLWmPpFvLHU85SVoj6ZuSdkl6WNI7yx1TuUmqlfSApH8udyzlJmmRpHskZSS1S3p+uWMqJ0nvSv8/+Ymkz0tqKndM062qE0Q6KdEdwC8CFwM3SLq4vFGV1RDwuxFxMfA84O1V/n0AvBNoL3cQFeKjwNciYjPwLKr4e5G0CvhtoC0iLiWZ82ZreaOaflWdIICrgD0RsTciBoC7gevLHFPZRERHRPwoXc6RXABWlTeq8pG0Gvgl4BPljqXcJC0kmcjr7wAiYiAijpQ1qPKrA5ol1QHzgJ+WOZ5pV+0JYhWwL299P1V8QcwnaT1wJfDDModSTh8B3ksyq2G12wB0Ap9Mb7l9QlJLuYMql4g4APw58BTQARyNiH8tb1TTr9oThBUgqRX4EvA7EdFd7njKQdIrgWci4v5yx1Ih6oBnA38dEVcCPUDVttlJWkxyt2EDcB7QIukN5Y1q+lV7gjgArMlbX52WVS1J9STJ4bMR8eVyx1NGVwPXSXqC5Nbjz0v6THlDKqv9wP6IGK1R3kOSMKrVLwCPR0RnRAwCXwZeUOaYpl21J4jtwEZJGyQ1kDQybStzTGUjSST3mNsj4sPljqecIuK2iFgdEetJ/l18IyLm3C/EYkVEFtgn6aK06KXArjKGVG5PAc+TNC/9/+alzMFG+7pyB1BOETEk6RbgPpJeCHdFxMNlDqucrgZ+HXhI0s607Pcj4t7yhWQV5B3AZ9MfU3uB/1bmeMomIn4o6R7gRyS9/x5gDg674aE2zMysoGq/xWRmZhNwgjAzs4KcIMzMrCAnCDMzK8gJwszMCnKCMEtJOpa+r5d04zSf+/fHrX9/Os9vVgpOEGanWg+cVoJIB2ybzEkJIiLm3FO3Nvc4QZid6kPACyXtTMf8r5X0Z5K2S3pQ0lsAJL1E0nclbSN9qljSVyTdn84TcHNa9iGSUT93SvpsWjZaW1F67p9IekjSr+ad+1t58y98Nn1iF0kfSufseFDSn8/4t2NVo6qfpDabwK3A70XEKwHSC/3RiPgZSY3A9ySNjtz5bODSiHg8Xf+NiOiS1Axsl/SliLhV0i0RcUWBz3oNcAXJ/ArL0mO+k267EriEZBjp7wFXS2oHfhnYHBEhadH0/ulmJ7gGYTa1lwFvTIcf+SGwFNiYbvuvvOQA8NuSfgz8gGQgyI1M7meBz0fEcEQ8DXwb+Jm8c++PiBFgJ8mtr6NAH/B3kl4D9J7l32Y2IScIs6kJeEdEXJG+NuSN/d8ztpP0EpJRPp8fEc8iGZ/nbKah7M9bHgbqImKIZKKre4BXAl87i/ObTcoJwuxUOWB+3vp9wNvSodCRtGmCyXIWAocjolfSZpJpW0cNjh4/zneBX03bOZaTzNr2XxMFls7VsTAdQPFdJLemzErCbRBmp3oQGE5vFX2KZC7m9cCP0obiTuDVBY77GvDWtJ1gN8ltplF3Ag9K+lFE/Fpe+T8Czwd+DATw3ojIpgmmkPnAP0lqIqnZvPuM/kKzIng0VzMzK8i3mMzMrCAnCDMzK8gJwszMCnKCMDOzgpwgzMysICcIMzMryAnCzMwK+v9dS7Ovcb84WwAAAABJRU5ErkJggg==\n",
       "text/plain": [
        "<Figure size 432x288 with 1 Axes>"
       ]
@@ -509,7 +507,7 @@
     {
      "data": {
       "text/plain": [
-       "0.8053976582616722"
+       "0.798340863819657"
       ]
      },
      "execution_count": 15,
@@ -782,7 +780,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 25,
    "metadata": {
     "scrolled": true
    },
@@ -799,7 +797,9 @@
      "output_type": "stream",
      "text": [
       "<ipython-input-22-78c27bb59095>:15: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect.\n",
-      "  x = (x + torch.tensor([1.0])) / 2.0\n"
+      "  x = (x + torch.tensor([1.0])) / 2.0\n",
+      "/workspace/brevitas/src/brevitas/quant_tensor/__init__.py:74: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect.\n",
+      "  training = torch.tensor(training, dtype=torch.bool)\n"
      ]
     }
    ],
@@ -843,7 +843,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 26,
    "metadata": {},
    "outputs": [
     {
@@ -867,10 +867,10 @@
        "        "
       ],
       "text/plain": [
-       "<IPython.lib.display.IFrame at 0x7f49738bffa0>"
+       "<IPython.lib.display.IFrame at 0x7fb36398c3a0>"
       ]
      },
-     "execution_count": 28,
+     "execution_count": 26,
      "metadata": {},
      "output_type": "execute_result"
     }
diff --git a/notebooks/end2end_example/cybersecurity/2-import-into-finn-and-verify.ipynb b/notebooks/end2end_example/cybersecurity/2-import-into-finn-and-verify.ipynb
index 6ac4e52072..a0fef1ab61 100644
--- a/notebooks/end2end_example/cybersecurity/2-import-into-finn-and-verify.ipynb
+++ b/notebooks/end2end_example/cybersecurity/2-import-into-finn-and-verify.ipynb
@@ -169,7 +169,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
@@ -177,13 +177,13 @@
      "output_type": "stream",
      "text": [
       "Input tensor name: 0\n",
-      "Output tensor name: 78\n",
+      "Output tensor name: 73\n",
       "Input tensor shape: [1, 600]\n",
       "Output tensor shape: [1, 1]\n",
-      "Input tensor datatype: DataType.BIPOLAR\n",
-      "Output tensor datatype: DataType.FLOAT32\n",
+      "Input tensor datatype: BIPOLAR\n",
+      "Output tensor datatype: FLOAT32\n",
       "List of node operator types in the graph: \n",
-      "['Add', 'Div', 'MatMul', 'Add', 'Mul', 'Unsqueeze', 'BatchNormalization', 'Squeeze', 'MultiThreshold', 'Mul', 'MatMul', 'Add', 'Mul', 'Unsqueeze', 'BatchNormalization', 'Squeeze', 'MultiThreshold', 'Mul', 'MatMul', 'Add', 'Mul', 'Unsqueeze', 'BatchNormalization', 'Squeeze', 'MultiThreshold', 'Mul', 'MatMul', 'Add', 'Mul', 'MultiThreshold']\n"
+      "['Mul', 'Add', 'Div', 'MatMul', 'Mul', 'Add', 'BatchNormalization', 'MultiThreshold', 'Mul', 'MatMul', 'Mul', 'Add', 'BatchNormalization', 'MultiThreshold', 'Mul', 'MatMul', 'Mul', 'Add', 'BatchNormalization', 'MultiThreshold', 'Mul', 'MatMul', 'Mul', 'Add', 'MultiThreshold']\n"
      ]
     }
    ],
@@ -200,8 +200,8 @@
     "print(\"Output tensor shape: %s\" % str(finnonnx_model_out_shape))\n",
     "finnonnx_model_in_dt = model_for_sim.get_tensor_datatype(finnonnx_in_tensor_name)\n",
     "finnonnx_model_out_dt = model_for_sim.get_tensor_datatype(finnonnx_out_tensor_name)\n",
-    "print(\"Input tensor datatype: %s\" % str(model_for_sim.get_tensor_datatype(finnonnx_in_tensor_name)))\n",
-    "print(\"Output tensor datatype: %s\" % str(model_for_sim.get_tensor_datatype(finnonnx_out_tensor_name)))\n",
+    "print(\"Input tensor datatype: %s\" % str(finnonnx_model_in_dt.name))\n",
+    "print(\"Output tensor datatype: %s\" % str(finnonnx_model_out_dt.name))\n",
     "print(\"List of node operator types in the graph: \")\n",
     "print([x.op_type for x in model_for_sim.graph.node])"
    ]
@@ -226,7 +226,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -262,7 +262,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
@@ -286,10 +286,10 @@
        "        "
       ],
       "text/plain": [
-       "<IPython.lib.display.IFrame at 0x7f388298b470>"
+       "<IPython.lib.display.IFrame at 0x7f3be619b2b0>"
       ]
      },
-     "execution_count": 6,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -311,7 +311,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
@@ -320,7 +320,7 @@
        "torch.Size([100, 593])"
       ]
      },
-     "execution_count": 7,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -356,16 +356,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "IncompatibleKeys(missing_keys=[], unexpected_keys=[])"
+       "<All keys matched successfully>"
       ]
      },
-     "execution_count": 8,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -409,7 +409,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -441,7 +441,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -476,14 +476,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "ok 100 nok 0: 100%|██████████| 100/100 [00:47<00:00,  2.09it/s]\n"
+      "ok 100 nok 0: 100%|██████████| 100/100 [00:21<00:00,  4.72it/s]\n"
      ]
     }
    ],
@@ -511,7 +511,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
@@ -560,7 +560,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.8"
+   "version": "3.8.5"
   }
  },
  "nbformat": 4,
diff --git a/notebooks/end2end_example/cybersecurity/dataloader_quantized.py b/notebooks/end2end_example/cybersecurity/dataloader_quantized.py
index 45651faa5a..738811fa72 100644
--- a/notebooks/end2end_example/cybersecurity/dataloader_quantized.py
+++ b/notebooks/end2end_example/cybersecurity/dataloader_quantized.py
@@ -26,12 +26,12 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import torch
-import pandas as pd
+import math
 import numpy as np
+import pandas as pd
+import torch
 from sklearn import preprocessing
 from sklearn.preprocessing import OneHotEncoder
-import math
 
 # quantize the UNSW_NB15 dataset and convert it to binary vectors
 # reimplementation
@@ -112,7 +112,7 @@ def my_binary_repr(number, nbits):
 
     def round_like_matlab_number(self, n: np.float64) -> int:
         """Round the input "n" like matlab uint32(n) cast (which also rounds) e.g.
-        0.5->1;  1.5->2; 2.3->2;   2.45->2 """
+        0.5->1;  1.5->2; 2.3->2;   2.45->2"""
         if n - math.floor(n) < 0.5:
             return math.floor(n)
         return math.ceil(n)
diff --git a/notebooks/end2end_example/cybersecurity/validate-unsw-nb15.py b/notebooks/end2end_example/cybersecurity/validate-unsw-nb15.py
index 622c69c8d0..0ffb525544 100644
--- a/notebooks/end2end_example/cybersecurity/validate-unsw-nb15.py
+++ b/notebooks/end2end_example/cybersecurity/validate-unsw-nb15.py
@@ -27,9 +27,9 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import argparse
+import numpy as np
 from driver import io_shape_dict
 from driver_base import FINNExampleOverlay
-import numpy as np
 
 
 def make_unsw_nb15_test_batches(bsize, dataset_root):
diff --git a/requirements.txt b/requirements.txt
index de007ace50..da0ec0b630 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,6 +9,7 @@ onnx==1.7.0
 onnxoptimizer
 onnxruntime==1.4.0
 pre-commit==2.6.0
+pyscaffold==3.2.1
 scipy==1.5.2
 setupext-janitor>=1.1.2
 toposort==1.5
diff --git a/run-docker.sh b/run-docker.sh
index 6e8439810c..2abd67f067 100755
--- a/run-docker.sh
+++ b/run-docker.sh
@@ -41,24 +41,19 @@ recho () {
   echo -e "${RED}$1${NC}"
 }
 
-if [ -z "$VIVADO_PATH" ];then
-  recho "Please set the VIVADO_PATH that contains the path to your Vivado installation directory."
-  recho "FINN functionality depending on Vivado or Vivado HLS will not be available."
+if [ -z "$FINN_XILINX_PATH" ];then
+  recho "Please set the FINN_XILINX_PATH environment variable to the path to your Xilinx tools installation directory (e.g. /opt/Xilinx)."
+  recho "FINN functionality depending on Vivado, Vitis or HLS will not be available."
 fi
 
-if [ -z "$PYNQ_IP" ];then
-  recho "Please set the PYNQ_IP env.var. to enable PYNQ deployment tests."
+if [ -z "$FINN_XILINX_VERSION" ];then
+  recho "Please set the FINN_XILINX_VERSION to the version of the Xilinx tools to use (e.g. 2020.1)"
+  recho "FINN functionality depending on Vivado, Vitis or HLS will not be available."
 fi
 
-if [ -z "$VITIS_PATH" ];then
-  recho "Please set the VITIS_PATH that contains the path to your Vitis installation directory."
-  recho "FINN functionality depending on Vitis will not be available."
-else
-  if [ -z "$PLATFORM_REPO_PATHS" ];then
-    recho "Please set PLATFORM_REPO_PATHS pointing to Vitis platform files (DSAs)."
-    recho "This is required to be able to use Vitis."
-    exit -1
-  fi
+if [ -z "$PLATFORM_REPO_PATHS" ];then
+  recho "Please set PLATFORM_REPO_PATHS pointing to Vitis platform files (DSAs)."
+  recho "This is required to be able to use Alveo PCIe cards."
 fi
 
 DOCKER_GID=$(id -g)
@@ -66,21 +61,8 @@ DOCKER_GNAME=$(id -gn)
 DOCKER_UNAME=$(id -un)
 DOCKER_UID=$(id -u)
 DOCKER_PASSWD="finn"
-# generate a random number per-run to allow multiple
-# containers from the same user
-DOCKER_RND=$(shuf -i0-32768 -n1)
-# create a separate tag when Vitis enabled, since Docker image needs
-# additional dependencies installed
-if [ ! -z "$VITIS_PATH" ];then
-  DOCKER_TAG="finn_dev_vitis_${DOCKER_UNAME}"
-else
-  DOCKER_TAG="finn_dev_${DOCKER_UNAME}"
-fi
-# uncomment to run multiple instances with different names
-# DOCKER_INST_NAME="finn_${DOCKER_UNAME}_${DOCKER_RND}"
 DOCKER_INST_NAME="finn_dev_${DOCKER_UNAME}"
-# ensure Docker tag and inst. name are all lowercase
-DOCKER_TAG=$(echo "$DOCKER_TAG" | tr '[:upper:]' '[:lower:]')
+# ensure Docker inst. name is all lowercase
 DOCKER_INST_NAME=$(echo "$DOCKER_INST_NAME" | tr '[:upper:]' '[:lower:]')
 # Absolute path to this script, e.g. /home/user/bin/foo.sh
 SCRIPT=$(readlink -f "$0")
@@ -97,7 +79,7 @@ SCRIPTPATH=$(dirname "$SCRIPT")
 : ${PYNQ_PASSWORD="xilinx"}
 : ${PYNQ_BOARD="Pynq-Z1"}
 : ${PYNQ_TARGET_DIR="/home/xilinx/$DOCKER_INST_NAME"}
-: ${NUM_DEFAULT_WORKERS=1}
+: ${NUM_DEFAULT_WORKERS=4}
 : ${FINN_SSH_KEY_DIR="$SCRIPTPATH/ssh_keys"}
 : ${ALVEO_USERNAME="alveo_user"}
 : ${ALVEO_PASSWORD=""}
@@ -106,9 +88,15 @@ SCRIPTPATH=$(dirname "$SCRIPT")
 : ${PLATFORM_REPO_PATHS="/opt/xilinx/platforms"}
 : ${XRT_DEB_VERSION="xrt_202010.2.7.766_18.04-amd64-xrt"}
 : ${FINN_HOST_BUILD_DIR="/tmp/$DOCKER_INST_NAME"}
+: ${FINN_DOCKER_TAG="xilinx/finn:$(git describe --tags --dirty).$XRT_DEB_VERSION"}
+: ${FINN_DOCKER_PREBUILT="0"}
+: ${FINN_DOCKER_RUN_AS_ROOT="0"}
+: ${FINN_DOCKER_GPU="$(docker info | grep nvidia | wc -m)"}
+: ${FINN_DOCKER_EXTRA=""}
+: ${NVIDIA_VISIBLE_DEVICES=""}
+: ${DOCKER_BUILDKIT="1"}
 
 DOCKER_INTERACTIVE=""
-DOCKER_EXTRA=""
 
 if [ "$1" = "test" ]; then
   gecho "Running test suite (all tests)"
@@ -123,21 +111,21 @@ elif [ "$1" = "notebook" ]; then
   else
     JUPYTER_PASSWD_ARG="--NotebookApp.password='$JUPYTER_PASSWD_HASH'"
   fi
-  DOCKER_CMD="jupyter notebook --no-browser --ip=0.0.0.0 --port $JUPYTER_PORT $JUPYTER_PASSWD_ARG notebooks"
-  DOCKER_EXTRA+="-e JUPYTER_PORT=$JUPYTER_PORT "
-  DOCKER_EXTRA+="-e NETRON_PORT=$NETRON_PORT "
-  DOCKER_EXTRA+="-p $JUPYTER_PORT:$JUPYTER_PORT "
-  DOCKER_EXTRA+="-p $NETRON_PORT:$NETRON_PORT "
+  DOCKER_CMD="jupyter notebook --allow-root --no-browser --ip=0.0.0.0 --port $JUPYTER_PORT $JUPYTER_PASSWD_ARG notebooks"
+  FINN_DOCKER_EXTRA+="-e JUPYTER_PORT=$JUPYTER_PORT "
+  FINN_DOCKER_EXTRA+="-e NETRON_PORT=$NETRON_PORT "
+  FINN_DOCKER_EXTRA+="-p $JUPYTER_PORT:$JUPYTER_PORT "
+  FINN_DOCKER_EXTRA+="-p $NETRON_PORT:$NETRON_PORT "
 elif [ "$1" = "build_dataflow" ]; then
   BUILD_DATAFLOW_DIR=$(readlink -f "$2")
-  DOCKER_EXTRA="-v $BUILD_DATAFLOW_DIR:$BUILD_DATAFLOW_DIR"
+  FINN_DOCKER_EXTRA="-v $BUILD_DATAFLOW_DIR:$BUILD_DATAFLOW_DIR "
   DOCKER_INTERACTIVE="-it"
   #FINN_HOST_BUILD_DIR=$BUILD_DATAFLOW_DIR/build
   gecho "Running build_dataflow for folder $BUILD_DATAFLOW_DIR"
   DOCKER_CMD="build_dataflow $BUILD_DATAFLOW_DIR"
 elif [ "$1" = "build_custom" ]; then
   BUILD_CUSTOM_DIR=$(readlink -f "$2")
-  DOCKER_EXTRA="-v $BUILD_CUSTOM_DIR:$BUILD_CUSTOM_DIR -w $BUILD_CUSTOM_DIR"
+  FINN_DOCKER_EXTRA="-v $BUILD_CUSTOM_DIR:$BUILD_CUSTOM_DIR -w $BUILD_CUSTOM_DIR "
   DOCKER_INTERACTIVE="-it"
   #FINN_HOST_BUILD_DIR=$BUILD_DATAFLOW_DIR/build
   gecho "Running build_custom: $BUILD_CUSTOM_DIR/build.py"
@@ -148,49 +136,47 @@ else
   DOCKER_INTERACTIVE="-it"
 fi
 
+if [ "$FINN_DOCKER_GPU" != 0 ];then
+  gecho "nvidia-docker detected, enabling GPUs"
+  if [ ! -z "$NVIDIA_VISIBLE_DEVICES" ];then
+    FINN_DOCKER_EXTRA+="--runtime nvidia -e NVIDIA_VISIBLE_DEVICES=$NVIDIA_VISIBLE_DEVICES "
+  else
+    FINN_DOCKER_EXTRA+="--gpus all "
+  fi
+fi
+
 VIVADO_HLS_LOCAL=$VIVADO_PATH
 VIVADO_IP_CACHE=$FINN_HOST_BUILD_DIR/vivado_ip_cache
-INSTALL_XRT_DEPS=0
 
 # ensure build dir exists locally
 mkdir -p $FINN_HOST_BUILD_DIR
 mkdir -p $FINN_SSH_KEY_DIR
 
 gecho "Docker container is named $DOCKER_INST_NAME"
+gecho "Docker tag is named $FINN_DOCKER_TAG"
 gecho "Mounting $FINN_HOST_BUILD_DIR into $FINN_HOST_BUILD_DIR"
-gecho "Mounting $VIVADO_PATH into $VIVADO_PATH"
-if [ ! -z "$VITIS_PATH" ];then
-  gecho "Mounting $VITIS_PATH into $VITIS_PATH"
-  INSTALL_XRT_DEPS=1
-fi
+gecho "Mounting $FINN_XILINX_PATH into $FINN_XILINX_PATH"
 gecho "Port-forwarding for Jupyter $JUPYTER_PORT:$JUPYTER_PORT"
 gecho "Port-forwarding for Netron $NETRON_PORT:$NETRON_PORT"
 gecho "Vivado IP cache dir is at $VIVADO_IP_CACHE"
 gecho "Using default PYNQ board $PYNQ_BOARD"
 
 # Build the FINN Docker image
-# Need to ensure this is done within the finn/ root folder:
-OLD_PWD=$(pwd)
-cd $SCRIPTPATH
-docker build -f docker/Dockerfile.finn_dev --tag=$DOCKER_TAG \
-             --build-arg GID=$DOCKER_GID \
-             --build-arg GNAME=$DOCKER_GNAME \
-             --build-arg UNAME=$DOCKER_UNAME \
-             --build-arg UID=$DOCKER_UID \
-             --build-arg PASSWD=$DOCKER_PASSWD \
-             --build-arg INSTALL_XRT_DEPS=$INSTALL_XRT_DEPS \
-             --build-arg XRT_DEB_VERSION=$XRT_DEB_VERSION \
-             .
-cd $OLD_PWD
+if [ "$FINN_DOCKER_PREBUILT" = "0" ]; then
+  # Need to ensure this is done within the finn/ root folder:
+  OLD_PWD=$(pwd)
+  cd $SCRIPTPATH
+  docker build -f docker/Dockerfile.finn --build-arg XRT_DEB_VERSION=$XRT_DEB_VERSION --tag=$FINN_DOCKER_TAG .
+  cd $OLD_PWD
+fi
 # Launch container with current directory mounted
 # important to pass the --init flag here for correct Vivado operation, see:
 # https://stackoverflow.com/questions/55733058/vivado-synthesis-hangs-in-docker-container-spawned-by-jenkins
-DOCKER_EXEC="docker run -t --rm $DOCKER_INTERACTIVE --init "
+DOCKER_EXEC="docker run -t --rm $DOCKER_INTERACTIVE --tty --init "
 DOCKER_EXEC+="--hostname $DOCKER_INST_NAME "
 DOCKER_EXEC+="-e SHELL=/bin/bash "
 DOCKER_EXEC+="-v $SCRIPTPATH:/workspace/finn "
 DOCKER_EXEC+="-v $FINN_HOST_BUILD_DIR:$FINN_HOST_BUILD_DIR "
-DOCKER_EXEC+="-v $FINN_SSH_KEY_DIR:/home/$DOCKER_UNAME/.ssh "
 DOCKER_EXEC+="-e FINN_BUILD_DIR=$FINN_HOST_BUILD_DIR "
 DOCKER_EXEC+="-e FINN_ROOT="/workspace/finn" "
 DOCKER_EXEC+="-e LOCALHOST_URL=$LOCALHOST_URL "
@@ -201,31 +187,42 @@ DOCKER_EXEC+="-e PYNQ_USERNAME=$PYNQ_USERNAME "
 DOCKER_EXEC+="-e PYNQ_PASSWORD=$PYNQ_PASSWORD "
 DOCKER_EXEC+="-e PYNQ_TARGET_DIR=$PYNQ_TARGET_DIR "
 DOCKER_EXEC+="-e NUM_DEFAULT_WORKERS=$NUM_DEFAULT_WORKERS "
+if [ "$FINN_DOCKER_RUN_AS_ROOT" = "0" ];then
+  DOCKER_EXEC+="-v /etc/group:/etc/group:ro "
+  DOCKER_EXEC+="-v /etc/passwd:/etc/passwd:ro "
+  DOCKER_EXEC+="-v /etc/shadow:/etc/shadow:ro "
+  DOCKER_EXEC+="-v /etc/sudoers.d:/etc/sudoers.d:ro "
+  DOCKER_EXEC+="-v $FINN_SSH_KEY_DIR:$HOME/.ssh "
+  DOCKER_EXEC+="--user $DOCKER_UID:$DOCKER_GID "
+else
+  DOCKER_EXEC+="-v $FINN_SSH_KEY_DIR:/root/.ssh "
+fi
 if [ ! -z "$IMAGENET_VAL_PATH" ];then
   DOCKER_EXEC+="-v $IMAGENET_VAL_PATH:$IMAGENET_VAL_PATH "
   DOCKER_EXEC+="-e IMAGENET_VAL_PATH=$IMAGENET_VAL_PATH "
 fi
-if [ ! -z "$VIVADO_PATH" ];then
-  DOCKER_EXEC+="-e "XILINX_VIVADO=$VIVADO_PATH" "
-  DOCKER_EXEC+="-v $VIVADO_PATH:$VIVADO_PATH "
-  DOCKER_EXEC+="-e VIVADO_PATH=$VIVADO_PATH "
-fi
-if [ ! -z "$VITIS_PATH" ];then
-  if [ -z "$PLATFORM_REPO_PATHS" ];then
-    recho "PLATFORM_REPO_PATHS must be set for Vitis/Alveo flows"
-    exit -1
+if [ ! -z "$FINN_XILINX_PATH" ];then
+  VIVADO_PATH="$FINN_XILINX_PATH/Vivado/$FINN_XILINX_VERSION"
+  VITIS_PATH="$FINN_XILINX_PATH/Vitis/$FINN_XILINX_VERSION"
+  DOCKER_EXEC+="-v $FINN_XILINX_PATH:$FINN_XILINX_PATH "
+  if [ -d "$VIVADO_PATH" ];then
+    DOCKER_EXEC+="-e "XILINX_VIVADO=$VIVADO_PATH" "
+    DOCKER_EXEC+="-e VIVADO_PATH=$VIVADO_PATH "
+  fi
+  if [ -d "$VITIS_PATH" ];then
+    DOCKER_EXEC+="-e VITIS_PATH=$VITIS_PATH "
+  fi
+  if [ -d "$PLATFORM_REPO_PATHS" ];then
+    DOCKER_EXEC+="-v $PLATFORM_REPO_PATHS:$PLATFORM_REPO_PATHS "
+    DOCKER_EXEC+="-e PLATFORM_REPO_PATHS=$PLATFORM_REPO_PATHS "
+    DOCKER_EXEC+="-e ALVEO_IP=$ALVEO_IP "
+    DOCKER_EXEC+="-e ALVEO_USERNAME=$ALVEO_USERNAME "
+    DOCKER_EXEC+="-e ALVEO_PASSWORD=$ALVEO_PASSWORD "
+    DOCKER_EXEC+="-e ALVEO_BOARD=$ALVEO_BOARD "
+    DOCKER_EXEC+="-e ALVEO_TARGET_DIR=$ALVEO_TARGET_DIR "
   fi
-  DOCKER_EXEC+="-v $VITIS_PATH:$VITIS_PATH "
-  DOCKER_EXEC+="-v $PLATFORM_REPO_PATHS:$PLATFORM_REPO_PATHS "
-  DOCKER_EXEC+="-e VITIS_PATH=$VITIS_PATH "
-  DOCKER_EXEC+="-e PLATFORM_REPO_PATHS=$PLATFORM_REPO_PATHS "
-  DOCKER_EXEC+="-e ALVEO_IP=$ALVEO_IP "
-  DOCKER_EXEC+="-e ALVEO_USERNAME=$ALVEO_USERNAME "
-  DOCKER_EXEC+="-e ALVEO_PASSWORD=$ALVEO_PASSWORD "
-  DOCKER_EXEC+="-e ALVEO_BOARD=$ALVEO_BOARD "
-  DOCKER_EXEC+="-e ALVEO_TARGET_DIR=$ALVEO_TARGET_DIR "
 fi
-DOCKER_EXEC+="$DOCKER_EXTRA "
-DOCKER_EXEC+="$DOCKER_TAG $DOCKER_CMD"
+DOCKER_EXEC+="$FINN_DOCKER_EXTRA "
+DOCKER_EXEC+="$FINN_DOCKER_TAG $DOCKER_CMD"
 
 $DOCKER_EXEC
diff --git a/setup.cfg b/setup.cfg
index e98077ddf1..96618e0ffc 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -58,9 +58,6 @@ package_dir =
     =src
 # DON'T CHANGE THE FOLLOWING LINE! IT WILL BE UPDATED BY PYSCAFFOLD!
 setup_requires = pyscaffold>=3.2a0,<3.3a0
-# finn-base is added specifically to be able to build on readthedocs
-install_requires =
-    finn-base @ git+https://github.com/Xilinx/finn-base#egg=finn-base
 # The usage of test_requires is discouraged, see `Dependency Management` docs
 # tests_require = pytest; pytest-cov
 # Require a specific Python version, e.g. Python 2.7 or >= 3.4
@@ -75,6 +72,19 @@ exclude =
 # Add here additional requirements for extra features, to install with:
 # `pip install FINN[PDF]` like:
 # PDF = ReportLab; RXP
+# finn-base is needed to build the full set of docs
+docs =
+    finn-base==0.0.3
+    docutils==0.17.1
+    dataclasses-json==0.5.2
+    gspread==3.6.0
+    pytest
+    netron
+    vcdvcd
+    torchvision
+    torch
+    qonnx@git+https://github.com/fastmachinelearning/qonnx@main#egg=qonnx
+
 # Add here test requirements (semicolon/line-separated)
 testing =
     pytest
diff --git a/setup.py b/setup.py
index d7e158b560..8fd781462c 100644
--- a/setup.py
+++ b/setup.py
@@ -35,10 +35,11 @@
     PyScaffold helps you to put up the scaffold of your new Python project.
     Learn more under: https://pyscaffold.org/
 """
-import sys
 from pkg_resources import VersionConflict, require
 from setuptools import setup
 
+import sys
+
 try:
     require("setuptools>=38.3")
 except VersionConflict:
diff --git a/src/finn/analysis/fpgadataflow/floorplan_params.py b/src/finn/analysis/fpgadataflow/floorplan_params.py
index 4c8cbf53de..9ba99fb546 100644
--- a/src/finn/analysis/fpgadataflow/floorplan_params.py
+++ b/src/finn/analysis/fpgadataflow/floorplan_params.py
@@ -26,8 +26,8 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-from finn.util.fpgadataflow import is_fpgadataflow_node
 from finn.custom_op.registry import getCustomOp
+from finn.util.fpgadataflow import is_fpgadataflow_node
 
 
 def floorplan_params(model):
diff --git a/src/finn/analysis/fpgadataflow/hls_synth_res_estimation.py b/src/finn/analysis/fpgadataflow/hls_synth_res_estimation.py
index 39d6332aa4..aff99efd80 100644
--- a/src/finn/analysis/fpgadataflow/hls_synth_res_estimation.py
+++ b/src/finn/analysis/fpgadataflow/hls_synth_res_estimation.py
@@ -25,8 +25,8 @@
 # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-import warnings
 import os
+import warnings
 import xml.etree.ElementTree as ET
 
 import finn.custom_op.registry as registry
diff --git a/src/finn/analysis/fpgadataflow/post_synth_res.py b/src/finn/analysis/fpgadataflow/post_synth_res.py
index 79204c54cd..4b81791094 100644
--- a/src/finn/analysis/fpgadataflow/post_synth_res.py
+++ b/src/finn/analysis/fpgadataflow/post_synth_res.py
@@ -29,9 +29,9 @@
 import os
 import xml.etree.ElementTree as ET
 
-from finn.transformation.move_reshape import _is_fpgadataflow_node
 from finn.core.modelwrapper import ModelWrapper
 from finn.custom_op.registry import getCustomOp
+from finn.transformation.move_reshape import _is_fpgadataflow_node
 
 
 def post_synth_res(model, override_synth_report_filename=None):
diff --git a/src/finn/analysis/verify_custom_nodes.py b/src/finn/analysis/verify_custom_nodes.py
index 9af1e9a4fe..62dac2827f 100644
--- a/src/finn/analysis/verify_custom_nodes.py
+++ b/src/finn/analysis/verify_custom_nodes.py
@@ -32,7 +32,8 @@
 
 def verify_nodes(model):
     """Checks if custom ops in graph are correctly built, with all attributes
-    and inputs.
+    and inputs. Please note that many FINN CustomOps don't yet implement the
+    verify_node function required for this analysis pass to work correctly.
 
     Returns {node op_type : info_messages}
 
diff --git a/src/finn/builder/build_dataflow.py b/src/finn/builder/build_dataflow.py
index c46bfa48df..c4664a5471 100644
--- a/src/finn/builder/build_dataflow.py
+++ b/src/finn/builder/build_dataflow.py
@@ -26,20 +26,21 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-from finn.core.modelwrapper import ModelWrapper
-import os
-import json
-import time
 import clize
-import sys
+import json
 import logging
+import os
 import pdb  # NOQA
+import sys
+import time
 import traceback
-from finn.builder.build_dataflow_steps import build_dataflow_step_lookup
+
 from finn.builder.build_dataflow_config import (
     DataflowBuildConfig,
     default_build_dataflow_steps,
 )
+from finn.builder.build_dataflow_steps import build_dataflow_step_lookup
+from finn.core.modelwrapper import ModelWrapper
 
 
 # adapted from https://stackoverflow.com/a/39215961
@@ -61,7 +62,7 @@ def flush(self):
         pass
 
 
-def resolve_build_steps(cfg: DataflowBuildConfig):
+def resolve_build_steps(cfg: DataflowBuildConfig, partial: bool = True):
     steps = cfg.steps
     if steps is None:
         steps = default_build_dataflow_steps
@@ -75,19 +76,56 @@ def resolve_build_steps(cfg: DataflowBuildConfig):
             steps_as_fxns.append(transform_step)
         else:
             raise Exception("Could not resolve build step: " + str(transform_step))
+    if partial:
+        step_names = list(map(lambda x: x.__name__, steps_as_fxns))
+        if cfg.start_step is None:
+            start_ind = 0
+        else:
+            start_ind = step_names.index(cfg.start_step)
+        if cfg.stop_step is None:
+            stop_ind = len(step_names) - 1
+        else:
+            stop_ind = step_names.index(cfg.stop_step)
+        steps_as_fxns = steps_as_fxns[start_ind : (stop_ind + 1)]
+
     return steps_as_fxns
 
 
+def resolve_step_filename(
+    step_name: str, cfg: DataflowBuildConfig, step_delta: int = 0
+):
+    step_names = list(
+        map(lambda x: x.__name__, resolve_build_steps(cfg, partial=False))
+    )
+    assert step_name in step_names, "start_step %s not found" + step_name
+    step_no = step_names.index(step_name) + step_delta
+    assert step_no >= 0, "Invalid step+delta combination"
+    assert step_no < len(step_names), "Invalid step+delta combination"
+    filename = cfg.output_dir + "/intermediate_models/"
+    filename += "%s.onnx" % (step_names[step_no])
+    return filename
+
+
 def build_dataflow_cfg(model_filename, cfg: DataflowBuildConfig):
     """Best-effort build a dataflow accelerator using the given configuration.
 
     :param model_filename: ONNX model filename to build
     :param cfg: Build configuration
     """
-    model = ModelWrapper(model_filename)
+    # if start_step is specified, override the input model
+    if cfg.start_step is None:
+        print("Building dataflow accelerator from " + model_filename)
+        model = ModelWrapper(model_filename)
+    else:
+        intermediate_model_filename = resolve_step_filename(cfg.start_step, cfg, -1)
+        print(
+            "Building dataflow accelerator from intermediate checkpoint"
+            + intermediate_model_filename
+        )
+        model = ModelWrapper(intermediate_model_filename)
     assert type(model) is ModelWrapper
     finn_build_dir = os.environ["FINN_BUILD_DIR"]
-    print("Building dataflow accelerator from " + model_filename)
+
     print("Intermediate outputs will be generated in " + finn_build_dir)
     print("Final outputs will be generated in " + cfg.output_dir)
     print("Build log is at " + cfg.output_dir + "/build_dataflow.log")
@@ -131,7 +169,7 @@ def build_dataflow_cfg(model_filename, cfg: DataflowBuildConfig):
             sys.stdout = stdout_orig
             sys.stderr = stderr_orig
             time_per_step[step_name] = step_end - step_start
-            chkpt_name = "%d_%s.onnx" % (step_num, step_name)
+            chkpt_name = "%s.onnx" % (step_name)
             if cfg.save_intermediate_models:
                 intermediate_model_dir = cfg.output_dir + "/intermediate_models"
                 if not os.path.exists(intermediate_model_dir):
diff --git a/src/finn/builder/build_dataflow_config.py b/src/finn/builder/build_dataflow_config.py
index bd938f1741..807fd70686 100644
--- a/src/finn/builder/build_dataflow_config.py
+++ b/src/finn/builder/build_dataflow_config.py
@@ -26,14 +26,15 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-from typing import List, Optional, Any
-from finn.util.basic import pynq_part_map, alveo_part_map
-from finn.transformation.fpgadataflow.vitis_build import VitisOptStrategy
-from enum import Enum
+import numpy as np
+import os
 from dataclasses import dataclass
 from dataclasses_json import dataclass_json
-import os
-import numpy as np
+from enum import Enum
+from typing import Any, List, Optional
+
+from finn.transformation.fpgadataflow.vitis_build import VitisOptStrategy
+from finn.util.basic import alveo_part_map, pynq_part_map
 
 
 class ShellFlowType(str, Enum):
@@ -88,6 +89,8 @@ class LargeFIFOMemStyle(str, Enum):
 class VerificationStepType(str, Enum):
     "Steps at which FINN ONNX execution can be launched for verification."
 
+    #: verify after step_qonnx_to_finn, using Python execution
+    QONNX_TO_FINN_PYTHON = "finn_onnx_python"
     #: verify after step_tidy_up, using Python execution
     TIDY_UP_PYTHON = "initial_python"
     #: verify after step_streamline , using Python execution
@@ -102,6 +105,7 @@ class VerificationStepType(str, Enum):
 #: specified order. Use the `steps` as part of build config to restrict which
 #: steps will be run.
 default_build_dataflow_steps = [
+    "step_qonnx_to_finn",
     "step_tidy_up",
     "step_streamline",
     "step_convert_to_hls",
@@ -122,6 +126,7 @@ class VerificationStepType(str, Enum):
 
 #: List of steps to run for an estimate-only (no synthesis) dataflow build
 estimate_only_dataflow_steps = [
+    "step_qonnx_to_finn",
     "step_tidy_up",
     "step_streamline",
     "step_convert_to_hls",
@@ -171,6 +176,13 @@ class DataflowBuildConfig:
     #: that will override the target_fps setting here.
     target_fps: Optional[int] = None
 
+    #: (Optional) Use two-pass relaxation for folding, only relevant if target_fps
+    #: is set. If enabled, parallelization will internally run a second time if the
+    #: target cycles from the first pass could not be achieved, instead using the
+    #: achievable target to obtain a balanced pipeline. If disabled, this can be
+    #: useful for decreasing the latency (even though throughput won't increase).
+    folding_two_pass_relaxation: Optional[bool] = True
+
     #: (Optional) At which steps the generated intermediate output model
     #: will be verified. See documentation of VerificationStepType for
     #: available options.
@@ -184,6 +196,19 @@ class DataflowBuildConfig:
     #: verification. Only required if verify_steps is not empty.
     verify_expected_output_npy: Optional[str] = "expected_output.npy"
 
+    #: (Optional) Save full execution context for each of the verify_steps.
+    #: By default, only the top-level graph output is saved.
+    verify_save_full_context: Optional[bool] = False
+
+    #: (Optional) Save .vcd waveforms from rtlsim under reports.
+    #: By default, waveforms won't be saved.
+    verify_save_rtlsim_waveforms: Optional[bool] = False
+
+    #: (Optional) Run synthesis to generate a .dcp for the stitched-IP output product.
+    #: This can make it easier to treat it as a standalone artifact without requiring
+    #: the full list of layer IP build directories. By default, synthesis will not run.
+    stitched_ip_gen_dcp: Optional[bool] = False
+
     #: (Optional) Control the maximum width of the per-PE MVAU stream while
     #: exploring the parallelization attributes to reach target_fps
     #: Only relevant if target_fps is specified.
@@ -263,6 +288,24 @@ class DataflowBuildConfig:
     #: - functions are called with (model, DataflowBuildConfig) as args
     steps: Optional[List[Any]] = None
 
+    #: If given, start from this step, loading the intermediate model generated
+    #: from the previous step (save_intermediate_models must be enabled)
+    start_step: Optional[str] = None
+
+    #: If given, stop at this step.
+    stop_step: Optional[str] = None
+
+    #: The optional argument `max_multithreshold_bit_width` affects which Quant nodes
+    #: of the QONNX format get converted to the MultiThreshold nodes of FINN. This
+    #: only affects Quant nodes in the activation path. Quant nodes, which define a
+    #: bit width larger than `max_multithreshold_bit_width` are not converted to
+    #: MultiThreshold nodes and a warning is raised instead.
+    #: If not given `max_multithreshold_bit_width` defaults to 8.
+    max_multithreshold_bit_width: Optional[int] = 8
+
+    #: Override the number of inputs for rtlsim performance measurement.
+    rtlsim_batch_size: Optional[int] = 1
+
     def _resolve_hls_clk_period(self):
         if self.hls_clk_period_ns is None:
             # use same clk for synth and hls if not explicitly specified
@@ -332,4 +375,7 @@ def _resolve_verification_io_pair(self):
                 + self.verify_expected_output_npy
             )
             verify_expected_output_npy = np.load(self.verify_expected_output_npy)
-            return (verify_input_npy, verify_expected_output_npy)
+            return (
+                verify_input_npy.astype(np.float32),
+                verify_expected_output_npy.astype(np.float32),
+            )
diff --git a/src/finn/builder/build_dataflow_steps.py b/src/finn/builder/build_dataflow_steps.py
index 50cd9ed4ff..c977f15e70 100644
--- a/src/finn/builder/build_dataflow_steps.py
+++ b/src/finn/builder/build_dataflow_steps.py
@@ -26,78 +26,85 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-from finn.core.modelwrapper import ModelWrapper
-import os
 import json
+import numpy as np
+import os
+from copy import deepcopy
+from distutils.dir_util import copy_tree
+from qonnx.util.cleanup import cleanup_model
+from shutil import copy
+
 import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
 import finn.transformation.streamline.absorb as absorb
+from finn.analysis.fpgadataflow.dataflow_performance import dataflow_performance
+from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
+from finn.analysis.fpgadataflow.hls_synth_res_estimation import hls_synth_res_estimation
+from finn.analysis.fpgadataflow.op_and_param_counts import (
+    aggregate_dict_keys,
+    op_and_param_counts,
+)
+from finn.analysis.fpgadataflow.res_estimation import (
+    res_estimation,
+    res_estimation_complete,
+)
+from finn.builder.build_dataflow_config import (
+    DataflowBuildConfig,
+    DataflowOutputType,
+    ShellFlowType,
+    VerificationStepType,
+)
+from finn.core.modelwrapper import ModelWrapper
+from finn.core.onnx_exec import execute_onnx
+from finn.core.throughput_test import throughput_test_rtlsim
+from finn.custom_op.registry import getCustomOp
 from finn.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount
 from finn.transformation.fold_constants import FoldConstants
-from finn.transformation.general import (
-    ApplyConfig,
-    GiveReadableTensorNames,
-    GiveUniqueNodeNames,
-    RemoveUnusedTensors,
-    RemoveStaticGraphInputs,
+from finn.transformation.fpgadataflow.annotate_cycles import AnnotateCycles
+from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
+from finn.transformation.fpgadataflow.create_dataflow_partition import (
+    CreateDataflowPartition,
 )
-from finn.transformation.infer_datatypes import InferDataTypes
-from finn.transformation.infer_shapes import InferShapes
-from finn.transformation.streamline import Streamline
-from finn.transformation.infer_data_layouts import InferDataLayouts
-from finn.transformation.move_reshape import RemoveCNVtoFCFlatten
-from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
-from finn.transformation.streamline.reorder import MakeMaxPoolNHWC
-
-from shutil import copy, copytree
+from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP
+from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 from finn.transformation.fpgadataflow.insert_dwc import InsertDWC
 from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO
+from finn.transformation.fpgadataflow.make_pynq_driver import MakePYNQDriver
+from finn.transformation.fpgadataflow.make_zynq_proj import ZynqBuild
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
 from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
-from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
-from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP
+from finn.transformation.fpgadataflow.replace_verilog_relpaths import (
+    ReplaceVerilogRelPaths,
+)
+from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
 from finn.transformation.fpgadataflow.set_fifo_depths import (
     InsertAndSetFIFODepths,
     RemoveShallowFIFOs,
 )
-from finn.transformation.fpgadataflow.make_zynq_proj import ZynqBuild
-from finn.transformation.fpgadataflow.vitis_build import VitisBuild
-from finn.transformation.fpgadataflow.make_pynq_driver import MakePYNQDriver
 from finn.transformation.fpgadataflow.set_folding import SetFolding
-from finn.transformation.fpgadataflow.create_dataflow_partition import (
-    CreateDataflowPartition,
-)
-from finn.transformation.fpgadataflow.replace_verilog_relpaths import (
-    ReplaceVerilogRelPaths,
-)
-from finn.custom_op.registry import getCustomOp
-from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
-from finn.analysis.fpgadataflow.res_estimation import (
-    res_estimation,
-    res_estimation_complete,
+from finn.transformation.fpgadataflow.synth_ooc import SynthOutOfContext
+from finn.transformation.fpgadataflow.vitis_build import VitisBuild
+from finn.transformation.general import (
+    ApplyConfig,
+    GiveReadableTensorNames,
+    GiveUniqueNodeNames,
+    RemoveStaticGraphInputs,
+    RemoveUnusedTensors,
 )
-from finn.analysis.fpgadataflow.op_and_param_counts import (
-    aggregate_dict_keys,
-    op_and_param_counts,
+from finn.transformation.infer_data_layouts import InferDataLayouts
+from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
+from finn.transformation.move_reshape import RemoveCNVtoFCFlatten
+from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
+from finn.transformation.qonnx.quant_act_to_multithreshold import (
+    default_filter_function_generator,
 )
-from finn.analysis.fpgadataflow.dataflow_performance import dataflow_performance
-from finn.analysis.fpgadataflow.hls_synth_res_estimation import hls_synth_res_estimation
+from finn.transformation.streamline import Streamline
+from finn.transformation.streamline.reorder import MakeMaxPoolNHWC
+from finn.util.basic import get_rtlsim_trace_depth
 from finn.util.config import extract_model_config_to_json
-from finn.transformation.fpgadataflow.synth_ooc import SynthOutOfContext
-from finn.builder.build_dataflow_config import (
-    DataflowBuildConfig,
-    DataflowOutputType,
-    ShellFlowType,
-    VerificationStepType,
-)
-from finn.transformation.fpgadataflow.annotate_cycles import AnnotateCycles
-from finn.core.onnx_exec import execute_onnx
-import numpy as np
+from finn.util.pyverilator import pyverilate_get_liveness_threshold_cycles
 from finn.util.test import execute_parent
-from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
-from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
-from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
-from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
-from finn.core.throughput_test import throughput_test_rtlsim
-from copy import deepcopy
 
 
 def verify_step(
@@ -115,21 +122,108 @@ def verify_step(
         parent_model_fn = intermediate_models_dir + "/dataflow_parent.onnx"
         child_model_fn = intermediate_models_dir + "/verify_%s.onnx" % step_name
         model.save(child_model_fn)
-        out_npy = execute_parent(parent_model_fn, child_model_fn, in_npy)
+        out_tensor_name = ModelWrapper(parent_model_fn).graph.output[0].name
+        out_dict = execute_parent(
+            parent_model_fn, child_model_fn, in_npy, return_full_ctx=True
+        )
+        out_npy = out_dict[out_tensor_name]
     else:
         inp_tensor_name = model.graph.input[0].name
         out_tensor_name = model.graph.output[0].name
         inp_dict = {inp_tensor_name: in_npy}
-        out_dict = execute_onnx(model, inp_dict)
+        out_dict = execute_onnx(model, inp_dict, True)
         out_npy = out_dict[out_tensor_name]
     res = np.isclose(exp_out_npy, out_npy, atol=1e-3).all()
     res_to_str = {True: "SUCCESS", False: "FAIL"}
     res_str = res_to_str[res]
-    verification_output_fn = verify_out_dir + "/verify_%s_%s.npy" % (step_name, res_str)
-    np.save(verification_output_fn, out_npy)
+    if cfg.verify_save_full_context:
+        verification_output_fn = verify_out_dir + "/verify_%s_%s.npz" % (
+            step_name,
+            res_str,
+        )
+        np.savez(verification_output_fn, **out_dict)
+    else:
+        verification_output_fn = verify_out_dir + "/verify_%s_%s.npy" % (
+            step_name,
+            res_str,
+        )
+        np.save(verification_output_fn, out_npy)
     print("Verification for %s : %s" % (step_name, res_str))
 
 
+def prepare_for_stitched_ip_rtlsim(verify_model, cfg):
+    need_restitch = False
+    # rtlsim only supports certain impl_style for some nodes
+    # StreamingFIFO must have impl_style=rtl
+    for fifo_layer in verify_model.get_nodes_by_op_type("StreamingFIFO"):
+        inst = getCustomOp(fifo_layer)
+        if inst.get_nodeattr("impl_style") != "rtl":
+            inst.set_nodeattr("impl_style", "rtl")
+            inst.set_nodeattr("code_gen_dir_ipgen", "")
+            inst.set_nodeattr("ipgen_path", "")
+            need_restitch = True
+    # StreamingDataWidthConverter must have impl_style=hls
+    for dwc_layer in verify_model.get_nodes_by_op_type(
+        "StreamingDataWidthConverter_Batch"
+    ):
+        inst = getCustomOp(dwc_layer)
+        if inst.get_nodeattr("impl_style") != "hls":
+            inst.set_nodeattr("impl_style", "hls")
+            inst.set_nodeattr("code_gen_dir_ipgen", "")
+            inst.set_nodeattr("ipgen_path", "")
+            need_restitch = True
+    # if we've made alterations to the model, need to do some re-prep
+    if need_restitch:
+        print("Need to regen/re-stitch some IP for STITCHED_IP_RTLSIM")
+        verify_model = verify_model.transform(
+            PrepareIP(cfg._resolve_fpga_part(), cfg._resolve_hls_clk_period())
+        )
+        verify_model = verify_model.transform(HLSSynthIP())
+        verify_model = verify_model.transform(
+            CreateStitchedIP(
+                cfg._resolve_fpga_part(),
+                cfg.synth_clk_period_ns,
+                vitis=False,
+            )
+        )
+    # set top-level prop for stitched-ip rtlsim and launch
+    verify_model.set_metadata_prop("exec_mode", "rtlsim")
+    # TODO make configurable
+    # verify_model.set_metadata_prop("rtlsim_trace", "trace.vcd")
+    return verify_model
+
+
+def step_qonnx_to_finn(model: ModelWrapper, cfg: DataflowBuildConfig):
+    """
+    This step will only execute if QONNX nodes are found.
+    These include the following op_types: "Quant" , "Trunc" and "BinaryQuant".
+    If such nodes are found the step will run the tidy-up step from QONNX
+    and then convert the QONNX model to the FINN-ONNX dialect.
+    """
+    # Check if any QONNX nodes exist, i.e. BinaryQuant, Quant or Trunc
+    q_count = 0
+    for op_type in ["BinaryQuant", "Quant", "Trunc"]:
+        q_count += len(model.get_nodes_by_op_type(op_type))
+    if q_count == 0:
+        return model
+
+    # QONNX cleanup
+    model = cleanup_model(model)
+    # QONNX to FINN-ONNX
+    model = model.transform(
+        ConvertQONNXtoFINN(
+            filter_function=default_filter_function_generator(
+                max_multithreshold_bit_width=cfg.max_multithreshold_bit_width
+            )
+        )
+    )
+
+    if VerificationStepType.QONNX_TO_FINN_PYTHON in cfg._resolve_verification_steps():
+        verify_step(model, cfg, "qonnx_to_finn_python", need_parent=False)
+
+    return model
+
+
 def step_tidy_up(model: ModelWrapper, cfg: DataflowBuildConfig):
     """Run the tidy-up step on given model. This includes shape and datatype
     inference, constant folding, and giving nodes and tensors better names.
@@ -164,6 +258,7 @@ def step_streamline(model: ModelWrapper, cfg: DataflowBuildConfig):
         model = model.transform(MakeMaxPoolNHWC())
         model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold())
         model = model.transform(MakeMaxPoolNHWC())
+        model = model.transform(absorb.AbsorbConsecutiveTransposes())
     model = model.transform(ConvertBipolarMatMulToXnorPopcount())
     model = model.transform(Streamline())
     # absorb final add-mul nodes into TopK
@@ -212,7 +307,12 @@ def step_create_dataflow_partition(model: ModelWrapper, cfg: DataflowBuildConfig
     nodes, which point to a separate ONNX file. Dataflow accelerator synthesis
     can only be performed on those HLSCustomOp sub-graphs."""
 
-    parent_model = model.transform(CreateDataflowPartition())
+    parent_model = model.transform(
+        CreateDataflowPartition(
+            partition_model_dir=cfg.output_dir
+            + "/intermediate_models/supported_op_partitions"
+        )
+    )
     sdp_nodes = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")
     assert len(sdp_nodes) == 1, "Only a single StreamingDataflowPartition supported."
     sdp_node = sdp_nodes[0]
@@ -226,13 +326,32 @@ def step_create_dataflow_partition(model: ModelWrapper, cfg: DataflowBuildConfig
 
 def step_target_fps_parallelization(model: ModelWrapper, cfg: DataflowBuildConfig):
     """If target_fps was specified, use the SetFolding transformation to determine
-    parallelization attributes."""
+    parallelization attributes. The auto-generated config will be saved under
+    auto_folding_config.json under the outputs, which can serve as a basis for
+    customizing the folding factors further."""
 
     target_cycles_per_frame = cfg._resolve_cycles_per_frame()
     if target_cycles_per_frame is not None:
         model = model.transform(
-            SetFolding(target_cycles_per_frame, mvau_wwidth_max=cfg.mvau_wwidth_max)
+            SetFolding(
+                target_cycles_per_frame,
+                mvau_wwidth_max=cfg.mvau_wwidth_max,
+                two_pass_relaxation=cfg.folding_two_pass_relaxation,
+            )
         )
+        # extract the suggested configuration and save it as json
+        hw_attrs = [
+            "PE",
+            "SIMD",
+            "ram_style",
+            "resType",
+            "mem_mode",
+            "runtime_writeable_weights",
+        ]
+        extract_model_config_to_json(
+            model, cfg.output_dir + "/auto_folding_config.json", hw_attrs
+        )
+
     return model
 
 
@@ -380,25 +499,35 @@ def step_create_stitched_ip(model: ModelWrapper, cfg: DataflowBuildConfig):
     if DataflowOutputType.STITCHED_IP in cfg.generate_outputs:
         stitched_ip_dir = cfg.output_dir + "/stitched_ip"
         model = model.transform(
-            CreateStitchedIP(cfg._resolve_fpga_part(), cfg.synth_clk_period_ns)
+            CreateStitchedIP(
+                cfg._resolve_fpga_part(),
+                cfg.synth_clk_period_ns,
+                vitis=cfg.stitched_ip_gen_dcp,
+            )
         )
         # TODO copy all ip sources into output dir? as zip?
-        copytree(model.get_metadata_prop("vivado_stitch_proj"), stitched_ip_dir)
+        copy_tree(model.get_metadata_prop("vivado_stitch_proj"), stitched_ip_dir)
         print("Vivado stitched IP written into " + stitched_ip_dir)
     if VerificationStepType.STITCHED_IP_RTLSIM in cfg._resolve_verification_steps():
         # prepare ip-stitched rtlsim
         verify_model = deepcopy(model)
-        # rtlsim only supports impl_style=rtl for StreamingFIFO, ensure that
-        for fifo_layer in verify_model.get_nodes_by_op_type("StreamingFIFO"):
-            getCustomOp(fifo_layer).set_nodeattr("impl_style", "rtl")
-        # similarly for StreamingDataWidthConverter with impl_style=hls
-        for dwc_layer in verify_model.get_nodes_by_op_type(
-            "StreamingDataWidthConverter_Batch"
-        ):
-            getCustomOp(dwc_layer).set_nodeattr("impl_style", "hls")
-        verify_model = verify_model.transform(PrepareRTLSim())
-        verify_model.set_metadata_prop("exec_mode", "rtlsim")
+        verify_model = prepare_for_stitched_ip_rtlsim(verify_model, cfg)
+        # use critical path estimate to set rtlsim liveness threshold
+        # (very conservative)
+        verify_model = verify_model.transform(AnnotateCycles())
+        estimate_network_performance = verify_model.analysis(dataflow_performance)
+        prev_liveness = pyverilate_get_liveness_threshold_cycles()
+        os.environ["LIVENESS_THRESHOLD"] = str(
+            int(estimate_network_performance["critical_path_cycles"])
+        )
+        if cfg.verify_save_rtlsim_waveforms:
+            report_dir = cfg.output_dir + "/report"
+            os.makedirs(report_dir, exist_ok=True)
+            verify_model.set_metadata_prop(
+                "rtlsim_trace", "%s/verify_rtlsim.vcd" % (report_dir)
+            )
         verify_step(verify_model, cfg, "stitched_ip_rtlsim", need_parent=True)
+        os.environ["LIVENESS_THRESHOLD"] = str(prev_liveness)
     return model
 
 
@@ -411,30 +540,30 @@ def step_measure_rtlsim_performance(model: ModelWrapper, cfg: DataflowBuildConfi
         assert (
             DataflowOutputType.STITCHED_IP in cfg.generate_outputs
         ), "rtlsim_perf needs stitched IP"
+        report_dir = cfg.output_dir + "/report"
+        os.makedirs(report_dir, exist_ok=True)
         # prepare ip-stitched rtlsim
         rtlsim_model = deepcopy(model)
-        # rtlsim only supports impl_style=rtl for StreamingFIFO, ensure that
-        for fifo_layer in rtlsim_model.get_nodes_by_op_type("StreamingFIFO"):
-            getCustomOp(fifo_layer).set_nodeattr("impl_style", "rtl")
-        # similarly for StreamingDataWidthConverter with impl_style=hls
-        for dwc_layer in rtlsim_model.get_nodes_by_op_type(
-            "StreamingDataWidthConverter_Batch"
-        ):
-            getCustomOp(dwc_layer).set_nodeattr("impl_style", "hls")
-        rtlsim_model = rtlsim_model.transform(PrepareRTLSim())
-        rtlsim_model.set_metadata_prop("exec_mode", "rtlsim")
+        rtlsim_model = prepare_for_stitched_ip_rtlsim(rtlsim_model, cfg)
         # run with single input to get latency
-        rtlsim_perf_dict = throughput_test_rtlsim(rtlsim_model, 1)
-        rtlsim_latency = rtlsim_perf_dict["cycles"]
-        # run with num inputs equal to layers to fill the whole pipeline
-        # to get the steady-state throughput
-        rtlsim_bs = len(rtlsim_model.graph.node)
+        orig_rtlsim_trace_depth = get_rtlsim_trace_depth()
+        rtlsim_bs = int(cfg.rtlsim_batch_size)
+        assert rtlsim_bs > 0, "rtlsim batch size must be >0"
+        if cfg.verify_save_rtlsim_waveforms:
+            # set depth to 3 for layer-by-layer visibility
+            os.environ["RTLSIM_TRACE_DEPTH"] = "3"
+            rtlsim_model.set_metadata_prop(
+                "rtlsim_trace", "%s/rtlsim_perf_batch_%d.vcd" % (report_dir, rtlsim_bs)
+            )
+        rtlsim_model.set_metadata_prop("extra_verilator_args", str(["-CFLAGS", "-O3"]))
         rtlsim_perf_dict = throughput_test_rtlsim(rtlsim_model, rtlsim_bs)
+        rtlsim_latency = rtlsim_perf_dict["cycles"]
         rtlsim_perf_dict["latency_cycles"] = rtlsim_latency
-        report_dir = cfg.output_dir + "/report"
-        os.makedirs(report_dir, exist_ok=True)
         with open(report_dir + "/rtlsim_performance.json", "w") as f:
             json.dump(rtlsim_perf_dict, f, indent=2)
+        if cfg.verify_save_rtlsim_waveforms:
+            # restore original trace depth
+            os.environ["RTLSIM_TRACE_DEPTH"] = str(orig_rtlsim_trace_depth)
 
     return model
 
@@ -446,7 +575,7 @@ def step_make_pynq_driver(model: ModelWrapper, cfg: DataflowBuildConfig):
     if DataflowOutputType.PYNQ_DRIVER in cfg.generate_outputs:
         driver_dir = cfg.output_dir + "/driver"
         model = model.transform(MakePYNQDriver(cfg._resolve_driver_platform()))
-        copytree(model.get_metadata_prop("pynq_driver_dir"), driver_dir)
+        copy_tree(model.get_metadata_prop("pynq_driver_dir"), driver_dir)
         print("PYNQ Python driver written into " + driver_dir)
     return model
 
@@ -487,9 +616,15 @@ def step_synthesize_bitfile(model: ModelWrapper, cfg: DataflowBuildConfig):
         os.makedirs(bitfile_dir, exist_ok=True)
         report_dir = cfg.output_dir + "/report"
         os.makedirs(report_dir, exist_ok=True)
+        partition_model_dir = cfg.output_dir + "/intermediate_models/kernel_partitions"
         if cfg.shell_flow_type == ShellFlowType.VIVADO_ZYNQ:
             model = model.transform(
-                ZynqBuild(cfg.board, cfg.synth_clk_period_ns, cfg.enable_hw_debug)
+                ZynqBuild(
+                    cfg.board,
+                    cfg.synth_clk_period_ns,
+                    cfg.enable_hw_debug,
+                    partition_model_dir=partition_model_dir,
+                )
             )
             copy(model.get_metadata_prop("bitfile"), bitfile_dir + "/finn-accel.bit")
             copy(model.get_metadata_prop("hw_handoff"), bitfile_dir + "/finn-accel.hwh")
@@ -513,6 +648,7 @@ def step_synthesize_bitfile(model: ModelWrapper, cfg: DataflowBuildConfig):
                     strategy=cfg._resolve_vitis_opt_strategy(),
                     enable_debug=cfg.enable_hw_debug,
                     floorplan_file=cfg.vitis_floorplan_file,
+                    partition_model_dir=partition_model_dir,
                 )
             )
             copy(model.get_metadata_prop("bitfile"), bitfile_dir + "/finn-accel.xclbin")
@@ -535,13 +671,14 @@ def step_deployment_package(model: ModelWrapper, cfg: DataflowBuildConfig):
         bitfile_dir = cfg.output_dir + "/bitfile"
         driver_dir = cfg.output_dir + "/driver"
         os.makedirs(deploy_dir, exist_ok=True)
-        copytree(bitfile_dir, deploy_dir + "/bitfile")
-        copytree(driver_dir, deploy_dir + "/driver")
+        copy_tree(bitfile_dir, deploy_dir + "/bitfile")
+        copy_tree(driver_dir, deploy_dir + "/driver")
     return model
 
 
 #: map step name strings to step functions
 build_dataflow_step_lookup = {
+    "step_qonnx_to_finn": step_qonnx_to_finn,
     "step_tidy_up": step_tidy_up,
     "step_streamline": step_streamline,
     "step_convert_to_hls": step_convert_to_hls,
diff --git a/src/finn/custom_op/fpgadataflow/__init__.py b/src/finn/custom_op/fpgadataflow/__init__.py
index b20b652254..417a505898 100644
--- a/src/finn/custom_op/fpgadataflow/__init__.py
+++ b/src/finn/custom_op/fpgadataflow/__init__.py
@@ -26,6 +26,8 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+from finn.custom_op.fpgadataflow.addstreams_batch import AddStreams_Batch
+from finn.custom_op.fpgadataflow.channelwise_op_batch import ChannelwiseOp_Batch
 from finn.custom_op.fpgadataflow.convolutioninputgenerator import (
     ConvolutionInputGenerator,
 )
@@ -33,28 +35,28 @@
     ConvolutionInputGenerator1D,
 )
 from finn.custom_op.fpgadataflow.downsampler import DownSampler
-from finn.custom_op.fpgadataflow.streamingfclayer_batch import StreamingFCLayer_Batch
-from finn.custom_op.fpgadataflow.streamingmaxpool_batch import StreamingMaxPool_Batch
-from finn.custom_op.fpgadataflow.streamingfifo import StreamingFIFO
-from finn.custom_op.fpgadataflow.tlastmarker import TLastMarker
+from finn.custom_op.fpgadataflow.duplicatestreams_batch import DuplicateStreams_Batch
+from finn.custom_op.fpgadataflow.fmpadding_batch import FMPadding_Batch
+from finn.custom_op.fpgadataflow.globalaccpool_batch import GlobalAccPool_Batch
+from finn.custom_op.fpgadataflow.iodma import IODMA
+from finn.custom_op.fpgadataflow.labelselect_batch import LabelSelect_Batch
+from finn.custom_op.fpgadataflow.lookup import Lookup
+from finn.custom_op.fpgadataflow.pool_batch import Pool_Batch
+from finn.custom_op.fpgadataflow.streamingdataflowpartition import (
+    StreamingDataflowPartition,
+)
 from finn.custom_op.fpgadataflow.streamingdatawidthconverter_batch import (
     StreamingDataWidthConverter_Batch,
 )
-from finn.custom_op.fpgadataflow.globalaccpool_batch import GlobalAccPool_Batch
-from finn.custom_op.fpgadataflow.pool_batch import Pool_Batch
-from finn.custom_op.fpgadataflow.fmpadding_batch import FMPadding_Batch
+from finn.custom_op.fpgadataflow.streamingfclayer_batch import StreamingFCLayer_Batch
+from finn.custom_op.fpgadataflow.streamingfifo import StreamingFIFO
+from finn.custom_op.fpgadataflow.streamingmaxpool_batch import StreamingMaxPool_Batch
 from finn.custom_op.fpgadataflow.thresholding_batch import Thresholding_Batch
-from finn.custom_op.fpgadataflow.addstreams_batch import AddStreams_Batch
-from finn.custom_op.fpgadataflow.labelselect_batch import LabelSelect_Batch
-from finn.custom_op.fpgadataflow.duplicatestreams_batch import DuplicateStreams_Batch
+from finn.custom_op.fpgadataflow.tlastmarker import TLastMarker
+from finn.custom_op.fpgadataflow.upsampler import UpsampleNearestNeighbour_Batch
 from finn.custom_op.fpgadataflow.vector_vector_activate_batch import (
     Vector_Vector_Activate_Batch,
 )
-from finn.custom_op.fpgadataflow.channelwise_op_batch import ChannelwiseOp_Batch
-from finn.custom_op.fpgadataflow.iodma import IODMA
-from finn.custom_op.fpgadataflow.streamingdataflowpartition import (
-    StreamingDataflowPartition,
-)
 
 custom_op = dict()
 
@@ -79,3 +81,5 @@
 custom_op["ChannelwiseOp_Batch"] = ChannelwiseOp_Batch
 custom_op["IODMA"] = IODMA
 custom_op["StreamingDataflowPartition"] = StreamingDataflowPartition
+custom_op["UpsampleNearestNeighbour_Batch"] = UpsampleNearestNeighbour_Batch
+custom_op["Lookup"] = Lookup
diff --git a/src/finn/custom_op/fpgadataflow/addstreams_batch.py b/src/finn/custom_op/fpgadataflow/addstreams_batch.py
index 38940ccb94..fa80e47485 100644
--- a/src/finn/custom_op/fpgadataflow/addstreams_batch.py
+++ b/src/finn/custom_op/fpgadataflow/addstreams_batch.py
@@ -26,13 +26,12 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import os
-
 import numpy as np
+import os
 import warnings
+
 from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
-from onnx import TensorProto, helper
 from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy
 
 
@@ -84,19 +83,7 @@ def make_shape_compatible_op(self, model):
         assert ishape == exp_ishape, "Unexpected input1 shape."
         ishape = tuple(model.get_tensor_shape(self.onnx_node.input[1]))
         assert ishape == exp_ishape, "Unexpected input2 shape."
-        # implement tensor with correct shape
-        values = np.random.randn(*oshape).astype(np.float32)
-        return helper.make_node(
-            "Constant",
-            inputs=[],
-            outputs=[self.onnx_node.output[0]],
-            value=helper.make_tensor(
-                name="const_tensor",
-                data_type=TensorProto.FLOAT,
-                dims=values.shape,
-                vals=values.flatten().astype(float),
-            ),
-        )
+        return super().make_const_shape_op(oshape)
 
     def infer_node_datatype(self, model):
         node = self.onnx_node
diff --git a/src/finn/custom_op/fpgadataflow/channelwise_op_batch.py b/src/finn/custom_op/fpgadataflow/channelwise_op_batch.py
index 097ec336ff..4961f61482 100644
--- a/src/finn/custom_op/fpgadataflow/channelwise_op_batch.py
+++ b/src/finn/custom_op/fpgadataflow/channelwise_op_batch.py
@@ -26,12 +26,11 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-from math import ceil
-import os
-
 import numpy as np
+import os
+import warnings
+from math import ceil
 
-from onnx import TensorProto, helper
 from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
 from finn.util.data_packing import (
@@ -39,9 +38,8 @@
     numpy_to_hls_code,
     rtlsim_output_to_npy,
 )
-from . import templates
 
-import warnings
+from . import templates
 
 # ONNX i/o tensor shape assumptions for channelwise ops:
 # input 0 is the input tensor, shape (..., NumChannels)
@@ -57,10 +55,10 @@ def get_smallest_possible(vals):
     for v in vals:
         assert int(v) == v, "Error float value"
 
-    for k in DataType.__members__:
+    for k in DataType.get_accumulator_dt_cands():
         dt = DataType[k]
 
-        if dt in [DataType.BIPOLAR, DataType.TERNARY, DataType.FLOAT32]:
+        if dt in [DataType["BIPOLAR"], DataType["TERNARY"], DataType["FLOAT32"]]:
             # not currently supported
             continue
 
@@ -76,9 +74,9 @@ def get_smallest_possible(vals):
     )
 
     if (0 <= vals).all():
-        return DataType.UINT64
+        return DataType["UINT64"]
     else:
-        return DataType.INT64
+        return DataType["INT64"]
 
 
 class ChannelwiseOp_Batch(HLSCustomOp):
@@ -126,18 +124,7 @@ def calc_tmem(self):
     def make_shape_compatible_op(self, model):
         oshape = self.get_normal_output_shape()
         # implement tensor with correct shape
-        values = np.random.randn(*oshape).astype(np.float32)
-        return helper.make_node(
-            "Constant",
-            inputs=[],
-            outputs=[self.onnx_node.output[0]],
-            value=helper.make_tensor(
-                name="const_tensor",
-                data_type=TensorProto.FLOAT,
-                dims=values.shape,
-                vals=values.flatten().astype(float),
-            ),
-        )
+        return super().make_const_shape_op(oshape)
 
     def infer_node_datatype(self, model):
         node = self.onnx_node
@@ -217,7 +204,7 @@ def bram_estimation(self):
             return 0
 
     def lut_estimation(self):
-        """Calculates LUT cost, taking memory resource type into account """
+        """Calculates LUT cost, taking memory resource type into account"""
         # TODO add in/out FIFO contributions
         style = self.get_nodeattr("ram_style")
         P = self.get_nodeattr("PE")
@@ -348,8 +335,8 @@ def generate_params(self, model, path):
         )
         # get input data type
         export_idt = self.get_input_datatype()
-        if self.get_input_datatype() == DataType.BIPOLAR:
-            export_idt = DataType.BINARY
+        if self.get_input_datatype() == DataType["BIPOLAR"]:
+            export_idt = DataType["BINARY"]
         idt_hls = export_idt.get_hls_datatype_str()
 
         # write parameters into params.h
@@ -357,8 +344,8 @@ def generate_params(self, model, path):
         pdt_hls = pdt.get_hls_datatype_str()
         # use binary to export bipolar activations
         export_odt = self.get_output_datatype()
-        if self.get_output_datatype() == DataType.BIPOLAR:
-            export_odt = DataType.BINARY
+        if self.get_output_datatype() == DataType["BIPOLAR"]:
+            export_odt = DataType["BINARY"]
         odt_hls = export_odt.get_hls_datatype_str()
         # get desired function
         func = self.get_nodeattr("Func")
@@ -439,7 +426,7 @@ def execute_node(self, context, graph):
             # load output npy file
             super().npy_to_dynamic_output(context)
             # reinterpret binary output as bipolar where needed
-            if self.get_output_datatype() == DataType.BIPOLAR:
+            if self.get_output_datatype() == DataType["BIPOLAR"]:
                 out = context[node.output[0]]
                 out = 2 * out - 1
                 context[node.output[0]] = out
@@ -490,7 +477,9 @@ def defines(self, var):
         numReps = numInputVectors[0]
         self.code_gen_dict["$DEFINES$"] = [
             """#define NumChannels1 {}\n#define PE1 {}\n#define numReps {}""".format(
-                self.get_nodeattr("NumChannels"), self.get_nodeattr("PE"), numReps,
+                self.get_nodeattr("NumChannels"),
+                self.get_nodeattr("PE"),
+                numReps,
             )
         ]
 
@@ -525,24 +514,29 @@ def docompute(self):
         # should ImgDim be defined or just filled in here like we do now?
         ishape = self.get_folded_input_shape()
         if len(ishape) == 3:
-            imgdim = 1
+            imgdim_h = 1
+            imgdim_w = 1
         elif len(ishape) == 5:
-            imgdim = ishape[1]
+            imgdim_h = ishape[1]
+            imgdim_w = ishape[2]
         else:
             raise Exception("""Unexpeted input shape""")
         self.code_gen_dict["$DOCOMPUTE$"] = [
-            """Thresholding_Batch<{}, NumChannels1, PE1, {}, {}>
+            """Thresholding_Batch<{}, {}, NumChannels1, PE1, {}, {}>
             (in0, out, threshs, numReps);""".format(
-                imgdim, tmpl_args["TSrcI"], tmpl_args["TDstI"],
+                imgdim_h,
+                imgdim_w,
+                tmpl_args["TSrcI"],
+                tmpl_args["TDstI"],
             )
         ]
 
     def dataoutstrm(self):
         code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
         dtype = self.get_output_datatype()
-        if dtype == DataType.BIPOLAR:
+        if dtype == DataType["BIPOLAR"]:
             # use binary for bipolar storage
-            dtype = DataType.BINARY
+            dtype = DataType["BINARY"]
         elem_bits = dtype.bitwidth()
         packed_bits = self.get_outstream_width()
         packed_hls_type = "ap_uint<%d>" % packed_bits
diff --git a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py
index 6e77cd3da7..a401883684 100644
--- a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py
+++ b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py
@@ -26,15 +26,13 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import os
-
 import math
 import numpy as np
+import os
 
 from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
 from finn.custom_op.general.im2col import compute_conv_output_dim
-from onnx import TensorProto, helper
 from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy
 
 # ONNX i/o tensor shape assumptions for ConvolutionInputGenerator:
@@ -149,18 +147,7 @@ def make_shape_compatible_op(self, model):
         ishape = tuple(model.get_tensor_shape(self.onnx_node.input[0]))
         assert ishape == exp_ishape, "Unexpect input shape for ConvInpGen."
         # implement tensor with correct shape
-        values = np.random.randn(*oshape).astype(np.float32)
-        return helper.make_node(
-            "Constant",
-            inputs=[],
-            outputs=[self.onnx_node.output[0]],
-            value=helper.make_tensor(
-                name="const_tensor",
-                data_type=TensorProto.FLOAT,
-                dims=values.shape,
-                vals=values.flatten().astype(float),
-            ),
-        )
+        return super().make_const_shape_op(oshape)
 
     def infer_node_datatype(self, model):
         node = self.onnx_node
@@ -320,10 +307,10 @@ def execute_node(self, context, graph):
             inp.shape == exp_ishape
         ), """Input shape doesn't
         match expected shape (1, ifm_dim_h, ifm_dim_w, ifm_ch)."""
-        if self.get_input_datatype() == DataType.BIPOLAR:
+        if self.get_input_datatype() == DataType["BIPOLAR"]:
             # store bipolar activations as binary
             inp = (inp + 1) / 2
-            export_idt = DataType.BINARY
+            export_idt = DataType["BINARY"]
         else:
             export_idt = self.get_input_datatype()
         # reshape input into folded form
@@ -371,7 +358,7 @@ def execute_node(self, context, graph):
                 )
             )
         # binary -> bipolar if needed
-        if self.get_output_datatype() == DataType.BIPOLAR:
+        if self.get_output_datatype() == DataType["BIPOLAR"]:
             out = context[node.output[0]]
             out = 2 * out - 1
             context[node.output[0]] = out
@@ -405,9 +392,9 @@ def defines(self, var):
     def read_npy_data(self):
         code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
         dtype = self.get_input_datatype()
-        if dtype == DataType.BIPOLAR:
+        if dtype == DataType["BIPOLAR"]:
             # use binary for bipolar storage
-            dtype = DataType.BINARY
+            dtype = DataType["BINARY"]
         elem_bits = dtype.bitwidth()
         packed_bits = self.get_instream_width()
         packed_hls_type = "ap_uint<%d>" % packed_bits
@@ -466,9 +453,9 @@ def docompute(self):
     def dataoutstrm(self):
         code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
         dtype = self.get_output_datatype()
-        if dtype == DataType.BIPOLAR:
+        if dtype == DataType["BIPOLAR"]:
             # use binary for bipolar storage
-            dtype = DataType.BINARY
+            dtype = DataType["BINARY"]
         elem_bits = dtype.bitwidth()
         packed_bits = self.get_outstream_width()
         packed_hls_type = "ap_uint<%d>" % packed_bits
diff --git a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator1d.py b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator1d.py
index 782655b31b..e43d73b1cd 100644
--- a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator1d.py
+++ b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator1d.py
@@ -26,15 +26,13 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import os
-
 import math
 import numpy as np
+import os
 
 from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
 from finn.custom_op.general.im2col import compute_conv_output_dim
-from onnx import TensorProto, helper
 from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy
 
 # This operation should only be used for 1D convolutions. Either the
@@ -129,8 +127,12 @@ def get_folded_output_shape(self):
         ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, pad, dilation_h)
         ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, pad, dilation_w)
         assert ifm_ch % simd == 0, "SIMD must divide IFMChannels"
-        wf = int((k_h * k_w * ifm_ch) // simd)
-        folded_oshape = (1, ofm_dim_h, ofm_dim_w, wf, simd)
+        if self.use_parallel_window_output():
+            wf = int((ifm_ch) // simd)
+            folded_oshape = (1, ofm_dim_h, ofm_dim_w, wf, k_h * k_w * simd)
+        else:
+            wf = int((k_h * k_w * ifm_ch) // simd)
+            folded_oshape = (1, ofm_dim_h, ofm_dim_w, wf, simd)
         return folded_oshape
 
     def make_shape_compatible_op(self, model):
@@ -138,19 +140,7 @@ def make_shape_compatible_op(self, model):
         oshape = self.get_normal_output_shape()
         ishape = tuple(model.get_tensor_shape(self.onnx_node.input[0]))
         assert ishape == exp_ishape, "Unexpect input shape for ConvInpGen."
-        # implement tensor with correct shape
-        values = np.random.randn(*oshape).astype(np.float32)
-        return helper.make_node(
-            "Constant",
-            inputs=[],
-            outputs=[self.onnx_node.output[0]],
-            value=helper.make_tensor(
-                name="const_tensor",
-                data_type=TensorProto.FLOAT,
-                dims=values.shape,
-                vals=values.flatten().astype(float),
-            ),
-        )
+        return super().make_const_shape_op(oshape)
 
     def infer_node_datatype(self, model):
         node = self.onnx_node
@@ -170,8 +160,6 @@ def get_output_datatype(self):
         return DataType[self.get_nodeattr("outputDataType")]
 
     def get_instream_width(self):
-        """Returns stream width, input and output stream width are equal for
-        the sliding window function"""
         ibits = self.get_input_datatype().bitwidth()
         simd = self.get_nodeattr("SIMD")
         ifm_ch = self.get_nodeattr("IFMChannels")
@@ -180,10 +168,13 @@ def get_instream_width(self):
         return in_width
 
     def get_outstream_width(self):
-        """Returns stream width, input and output stream width are equal for
-        the sliding window function, so the function to determine the input
-        stream width can be reused."""
-        return self.get_instream_width()
+        if self.use_parallel_window_output():
+            # feed all window pixels in parallel
+            k_h, k_w = self.get_nodeattr("ConvKernelDim")
+            return self.get_instream_width() * k_h * k_w
+        else:
+            # if parallel variant not in use: same width for output and input stream
+            return self.get_instream_width()
 
     def get_number_output_values(self):
         folded_oshape = self.get_folded_output_shape()
@@ -219,6 +210,22 @@ def get_1d_conv_attrs_normalized(self):
 
         return (ifm_ch, ifm_dim, ofm_dim, k, stride, dilation)
 
+    def use_parallel_window_output(self):
+        # Check if simple "ConvolutionInputGenerator_1D_parallel" variant can be used to
+        # feed window in parallel to the following layer, enabling full SIMD unfolding.
+        stride = self.get_nodeattr("Stride")
+        dilation = self.get_nodeattr("Dilation")
+        stride_h, stride_w = stride
+        dilation_h, dilation_w = dilation
+
+        if self.get_nodeattr("SIMD") == self.get_nodeattr("IFMChannels"):
+            if self.get_nodeattr("depthwise") == 0:
+                if stride_h == 1 and stride_w == 1:
+                    if dilation_h == 1 and dilation_w == 1:
+                        return True
+
+        return False
+
     def get_exp_cycles(self):
         simd = self.get_nodeattr("SIMD")
         (
@@ -238,12 +245,15 @@ def get_exp_cycles(self):
         # since mmv != 1 is not supported yet, we set mmv for now to 1
         mmv = 1
         # see https://github.com/Xilinx/finn-hlslib/blob/master/slidingwindow.h
-        cycles_write_block = (ofm_dim_w * k_w * k_h * (ifm_ch / simd)) / mmv
-        cycles_read_block = stride_w * ifm_dim_w * (ifm_ch / simd)
-        max_cycles = max(cycles_write_block, cycles_read_block)
-        exp_cycles = (
-            ifm_dim_w * k_h * dilation_h * (ifm_ch / simd) + ofm_dim_h * max_cycles
-        )
+        if self.use_parallel_window_output():
+            exp_cycles = k_w + ofm_dim_w
+        else:
+            cycles_write_block = (ofm_dim_w * k_w * k_h * (ifm_ch / simd)) / mmv
+            cycles_read_block = stride_w * ifm_dim_w * (ifm_ch / simd)
+            max_cycles = max(cycles_write_block, cycles_read_block)
+            exp_cycles = (
+                ifm_dim_w * k_h * dilation_h * (ifm_ch / simd) + ofm_dim_h * max_cycles
+            )
 
         return int(exp_cycles)
 
@@ -346,10 +356,10 @@ def execute_node(self, context, graph):
             inp.shape == exp_ishape
         ), """Input shape doesn't
         match expected shape (1, ifm_dim, ifm_dim, ifm_ch)."""
-        if self.get_input_datatype() == DataType.BIPOLAR:
+        if self.get_input_datatype() == DataType["BIPOLAR"]:
             # store bipolar activations as binary
             inp = (inp + 1) / 2
-            export_idt = DataType.BINARY
+            export_idt = DataType["BINARY"]
         else:
             export_idt = self.get_input_datatype()
         # reshape input into folded form
@@ -397,7 +407,7 @@ def execute_node(self, context, graph):
                 )
             )
         # binary -> bipolar if needed
-        if self.get_output_datatype() == DataType.BIPOLAR:
+        if self.get_output_datatype() == DataType["BIPOLAR"]:
             out = context[node.output[0]]
             out = 2 * out - 1
             context[node.output[0]] = out
@@ -503,9 +513,9 @@ def defines(self, var):
     def read_npy_data(self):
         code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
         dtype = self.get_input_datatype()
-        if dtype == DataType.BIPOLAR:
+        if dtype == DataType["BIPOLAR"]:
             # use binary for bipolar storage
-            dtype = DataType.BINARY
+            dtype = DataType["BINARY"]
         elem_bits = dtype.bitwidth()
         packed_bits = self.get_instream_width()
         packed_hls_type = "ap_uint<%d>" % packed_bits
@@ -536,46 +546,56 @@ def docompute(self):
             "ultra": "ap_resource_uram()",
         }
         hls_ram_style = map_to_hls_ram_style[ram_style]
-        hls_call = "ConvolutionInputGenerator"
-        # check which ConvolutionInputGenerator is needed
-        dilation_h, dilation_w = self.get_nodeattr("Dilation")
 
-        hls_call += "_NonSquare"
-        if dilation_h > 1 or dilation_w > 1:
-            hls_call += "_Dilated"
-            if self.get_nodeattr("depthwise") == 1:
-                hls_call += "_dws"
-            self.code_gen_dict["$DOCOMPUTE$"] = [
-                """{}<ConvKernelDim1_x, ConvKernelDim1_y, IFMChannels1, Input_precision1,
-                IFMDim1_x, IFMDim1_y, OFMDim1_x, OFMDim1_y, SIMD1, Stride1_x, Stride1_y,
-                Dilation1_x, Dilation1_y> (in0, out, numReps, {});""".format(
-                    hls_call, hls_ram_style
-                )
-            ]
-        elif self.get_nodeattr("depthwise") == 1:
-            hls_call += "_dws"
+        # check which ConvolutionInputGenerator is needed
+        if self.use_parallel_window_output():
+            hls_call = "ConvolutionInputGenerator_1D_parallel"
             self.code_gen_dict["$DOCOMPUTE$"] = [
-                """{}<ConvKernelDim1_x, ConvKernelDim1_y, IFMChannels1, Input_precision1,
-                IFMDim1_x, IFMDim1_y, OFMDim1_x, OFMDim1_y, SIMD1, Stride1_x, Stride1_y>
+                """{}<ConvKernelDim1_x, IFMChannels1, Input_precision1,
+                IFMDim1_x, OFMDim1_x, SIMD1, Stride1_x>
                 (in0, out, numReps, {});""".format(
                     hls_call, hls_ram_style
                 )
             ]
         else:
-            self.code_gen_dict["$DOCOMPUTE$"] = [
-                """{}<ConvKernelDim1_x, ConvKernelDim1_y, IFMChannels1, Input_precision1,
-                IFMDim1_x, IFMDim1_y, OFMDim1_x, OFMDim1_y, SIMD1, Stride1_x, Stride1_y>
-                (in0, out, numReps, {});""".format(
-                    hls_call, hls_ram_style
-                )
-            ]
+            hls_call = "ConvolutionInputGenerator_NonSquare"
+            dilation_h, dilation_w = self.get_nodeattr("Dilation")
+            if dilation_h > 1 or dilation_w > 1:
+                hls_call += "_Dilated"
+                if self.get_nodeattr("depthwise") == 1:
+                    hls_call += "_dws"
+                self.code_gen_dict["$DOCOMPUTE$"] = [
+                    """{}<ConvKernelDim1_x, ConvKernelDim1_y, IFMChannels1,
+                    Input_precision1, IFMDim1_x, IFMDim1_y, OFMDim1_x, OFMDim1_y,
+                    SIMD1, Stride1_x, Stride1_y, Dilation1_x, Dilation1_y>
+                    (in0, out, numReps, {});""".format(
+                        hls_call, hls_ram_style
+                    )
+                ]
+            elif self.get_nodeattr("depthwise") == 1:
+                hls_call += "_dws"
+                self.code_gen_dict["$DOCOMPUTE$"] = [
+                    """{}<ConvKernelDim1_x, ConvKernelDim1_y, IFMChannels1,
+                    Input_precision1, IFMDim1_x, IFMDim1_y, OFMDim1_x, OFMDim1_y,
+                    SIMD1, Stride1_x, Stride1_y> (in0, out, numReps, {});""".format(
+                        hls_call, hls_ram_style
+                    )
+                ]
+            else:
+                self.code_gen_dict["$DOCOMPUTE$"] = [
+                    """{}<ConvKernelDim1_x, ConvKernelDim1_y, IFMChannels1,
+                    Input_precision1, IFMDim1_x, IFMDim1_y, OFMDim1_x, OFMDim1_y,
+                    SIMD1, Stride1_x, Stride1_y> (in0, out, numReps, {});""".format(
+                        hls_call, hls_ram_style
+                    )
+                ]
 
     def dataoutstrm(self):
         code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
         dtype = self.get_output_datatype()
-        if dtype == DataType.BIPOLAR:
+        if dtype == DataType["BIPOLAR"]:
             # use binary for bipolar storage
-            dtype = DataType.BINARY
+            dtype = DataType["BINARY"]
         elem_bits = dtype.bitwidth()
         packed_bits = self.get_outstream_width()
         packed_hls_type = "ap_uint<%d>" % packed_bits
@@ -584,9 +604,16 @@ def dataoutstrm(self):
         npy_out = "%s/output.npy" % code_gen_dir
         oshape = self.get_folded_output_shape()
         oshape_cpp_str = str(oshape).replace("(", "{").replace(")", "}")
+        if self.use_parallel_window_output():
+            # pass the number of pixels in the folded output to apintstream2npy, needed
+            # to unpack the ouput correctly and reverse only the inner SIMD dimension
+            k_h, k_w = self.get_nodeattr("ConvKernelDim")
+            multi_pixel_out = k_h * k_w
+        else:
+            multi_pixel_out = 1
 
         self.code_gen_dict["$DATAOUTSTREAM$"] = [
-            'apintstream2npy<%s, %s, %d, %s>(out, %s, "%s");'
+            'apintstream2npy<%s, %s, %d, %s>(out, %s, "%s", true, 1, %d);'
             % (
                 packed_hls_type,
                 elem_hls_type,
@@ -594,6 +621,7 @@ def dataoutstrm(self):
                 npy_type,
                 oshape_cpp_str,
                 npy_out,
+                multi_pixel_out,
             )
         ]
 
@@ -601,12 +629,21 @@ def save_as_npy(self):
         self.code_gen_dict["$SAVEASCNPY$"] = []
 
     def blackboxfunction(self):
-        self.code_gen_dict["$BLACKBOXFUNCTION$"] = [
-            """void {}(hls::stream<ap_uint<SIMD1*Input_precision1>> &in0,
-                hls::stream<ap_uint<SIMD1*Input_precision1>> &out)""".format(
-                self.onnx_node.name
-            )
-        ]
+        if self.use_parallel_window_output():
+            self.code_gen_dict["$BLACKBOXFUNCTION$"] = [
+                """void {}(hls::stream<ap_uint<SIMD1*Input_precision1>> &in0,
+                    hls::stream<ap_uint<ConvKernelDim1_x*SIMD1*Input_precision1>>
+                    &out)""".format(
+                    self.onnx_node.name
+                )
+            ]
+        else:
+            self.code_gen_dict["$BLACKBOXFUNCTION$"] = [
+                """void {}(hls::stream<ap_uint<SIMD1*Input_precision1>> &in0,
+                    hls::stream<ap_uint<SIMD1*Input_precision1>> &out)""".format(
+                    self.onnx_node.name
+                )
+            ]
 
     def pragmas(self):
         self.code_gen_dict["$PRAGMAS$"] = ["#pragma HLS INTERFACE axis port=in0"]
diff --git a/src/finn/custom_op/fpgadataflow/downsampler.py b/src/finn/custom_op/fpgadataflow/downsampler.py
index 002f71aa30..124b3e4645 100644
--- a/src/finn/custom_op/fpgadataflow/downsampler.py
+++ b/src/finn/custom_op/fpgadataflow/downsampler.py
@@ -1,10 +1,38 @@
-import os
+# Copyright (c) 2020, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
 import numpy as np
-from onnx import TensorProto, helper
+import os
+import warnings
+
 from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
 from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy
-import warnings
 
 
 class DownSampler(HLSCustomOp):
@@ -82,19 +110,7 @@ def make_shape_compatible_op(self, model):
         oshape = self.get_normal_output_shape()
         ishape = tuple(model.get_tensor_shape(self.onnx_node.input[0]))
         assert ishape == exp_ishape, "Unexpect input shape for DownSampler."
-        # implement tensor with correct shape
-        values = np.random.randn(*oshape).astype(np.float32)
-        return helper.make_node(
-            "Constant",
-            inputs=[],
-            outputs=[self.onnx_node.output[0]],
-            value=helper.make_tensor(
-                name="const_tensor",
-                data_type=TensorProto.FLOAT,
-                dims=values.shape,
-                vals=values.flatten().astype(float),
-            ),
-        )
+        return super().make_const_shape_op(oshape)
 
     def infer_node_datatype(self, model):
         node = self.onnx_node
@@ -163,9 +179,9 @@ def defines(self, var):
     def read_npy_data(self):
         code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
         dtype = self.get_input_datatype()
-        if dtype == DataType.BIPOLAR:
+        if dtype == DataType["BIPOLAR"]:
             # use binary for bipolar storage
-            dtype = DataType.BINARY
+            dtype = DataType["BINARY"]
         elem_bits = dtype.bitwidth()
         packed_bits = self.get_instream_width()
         packed_hls_type = "ap_uint<%d>" % packed_bits
@@ -196,9 +212,9 @@ def docompute(self):
     def dataoutstrm(self):
         code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
         dtype = self.get_output_datatype()
-        if dtype == DataType.BIPOLAR:
+        if dtype == DataType["BIPOLAR"]:
             # use binary for bipolar storage
-            dtype = DataType.BINARY
+            dtype = DataType["BINARY"]
         elem_bits = dtype.bitwidth()
         packed_bits = self.get_outstream_width()
         packed_hls_type = "ap_uint<%d>" % packed_bits
diff --git a/src/finn/custom_op/fpgadataflow/duplicatestreams_batch.py b/src/finn/custom_op/fpgadataflow/duplicatestreams_batch.py
index 73da77bd3f..3b0fa55b00 100644
--- a/src/finn/custom_op/fpgadataflow/duplicatestreams_batch.py
+++ b/src/finn/custom_op/fpgadataflow/duplicatestreams_batch.py
@@ -26,13 +26,13 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import os
-
 import numpy as np
+import os
 import warnings
+from onnx import TensorProto, helper
+
 from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
-from onnx import helper, TensorProto
 from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy
 
 
diff --git a/src/finn/custom_op/fpgadataflow/fmpadding_batch.py b/src/finn/custom_op/fpgadataflow/fmpadding_batch.py
index 99f959bf59..8ac30524eb 100644
--- a/src/finn/custom_op/fpgadataflow/fmpadding_batch.py
+++ b/src/finn/custom_op/fpgadataflow/fmpadding_batch.py
@@ -1,10 +1,38 @@
-import os
+# Copyright (c) 2020, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
 import numpy as np
-from onnx import TensorProto, helper
+import os
+import warnings
+
 from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
 from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy
-import warnings
 
 
 class FMPadding_Batch(HLSCustomOp):
@@ -98,19 +126,7 @@ def make_shape_compatible_op(self, model):
         oshape = self.get_normal_output_shape()
         ishape = tuple(model.get_tensor_shape(self.onnx_node.input[0]))
         assert ishape == exp_ishape, "Unexpect input shape for SameResize."
-        # implement tensor with correct shape
-        values = np.random.randn(*oshape).astype(np.float32)
-        return helper.make_node(
-            "Constant",
-            inputs=[],
-            outputs=[self.onnx_node.output[0]],
-            value=helper.make_tensor(
-                name="const_tensor",
-                data_type=TensorProto.FLOAT,
-                dims=values.shape,
-                vals=values.flatten().astype(float),
-            ),
-        )
+        return super().make_const_shape_op(oshape)
 
     def infer_node_datatype(self, model):
         node = self.onnx_node
@@ -209,9 +225,9 @@ def defines(self, var):
     def read_npy_data(self):
         code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
         dtype = self.get_input_datatype()
-        if dtype == DataType.BIPOLAR:
+        if dtype == DataType["BIPOLAR"]:
             # use binary for bipolar storage
-            dtype = DataType.BINARY
+            dtype = DataType["BINARY"]
         elem_bits = dtype.bitwidth()
         packed_bits = self.get_instream_width()
         packed_hls_type = "ap_uint<%d>" % packed_bits
@@ -260,9 +276,9 @@ def docompute(self):
     def dataoutstrm(self):
         code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
         dtype = self.get_output_datatype()
-        if dtype == DataType.BIPOLAR:
+        if dtype == DataType["BIPOLAR"]:
             # use binary for bipolar storage
-            dtype = DataType.BINARY
+            dtype = DataType["BINARY"]
         elem_bits = dtype.bitwidth()
         packed_bits = self.get_outstream_width()
         packed_hls_type = "ap_uint<%d>" % packed_bits
diff --git a/src/finn/custom_op/fpgadataflow/globalaccpool_batch.py b/src/finn/custom_op/fpgadataflow/globalaccpool_batch.py
index 8cc71ce9eb..6d4a55ee5c 100644
--- a/src/finn/custom_op/fpgadataflow/globalaccpool_batch.py
+++ b/src/finn/custom_op/fpgadataflow/globalaccpool_batch.py
@@ -26,13 +26,12 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import os
-
 import numpy as np
+import os
 import warnings
+
 from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
-from onnx import TensorProto, helper
 from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy
 
 
@@ -95,19 +94,7 @@ def make_shape_compatible_op(self, model):
         oshape = self.get_normal_output_shape()
         ishape = tuple(model.get_tensor_shape(self.onnx_node.input[0]))
         assert ishape == exp_ishape, "Unexpected input shape."
-        # implement tensor with correct shape
-        values = np.random.randn(*oshape).astype(np.float32)
-        return helper.make_node(
-            "Constant",
-            inputs=[],
-            outputs=[self.onnx_node.output[0]],
-            value=helper.make_tensor(
-                name="const_tensor",
-                data_type=TensorProto.FLOAT,
-                dims=values.shape,
-                vals=values.flatten(),
-            ),
-        )
+        return super().make_const_shape_op(oshape)
 
     def infer_node_datatype(self, model):
         node = self.onnx_node
diff --git a/src/finn/custom_op/fpgadataflow/hlscustomop.py b/src/finn/custom_op/fpgadataflow/hlscustomop.py
index c071884302..3aac7f6b45 100644
--- a/src/finn/custom_op/fpgadataflow/hlscustomop.py
+++ b/src/finn/custom_op/fpgadataflow/hlscustomop.py
@@ -27,22 +27,24 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 # namespace package, extend path
 
-from abc import abstractmethod
 import numpy as np
 import os
 import subprocess
+from abc import abstractmethod
+
 from finn.custom_op.base import CustomOp
 from finn.util.basic import (
     CppBuilder,
+    get_rtlsim_trace_depth,
     make_build_dir,
     roundup_to_integer_multiple,
-    get_rtlsim_trace_depth,
 )
+from finn.util.hls import CallHLS
 from finn.util.pyverilator import (
     pyverilate_get_liveness_threshold_cycles,
     rtlsim_multi_io,
 )
-from finn.util.hls import CallHLS
+
 from . import templates
 
 try:
@@ -289,6 +291,7 @@ def code_generation_ipgen(self, model, fpgapart, clk):
         self.code_gen_dict["$HWSRCDIR$"] = [code_gen_dir]
         self.code_gen_dict["$FPGAPART$"] = [fpgapart]
         self.code_gen_dict["$FINNHLSLIBDIR$"] = ["/workspace/finn-hlslib"]
+        self.code_gen_dict["$FINNHLSCUSTOMDIR$"] = ["/workspace/finn/custom_hls"]
         self.code_gen_dict["$TOPFXN$"] = [node.name]
         self.code_gen_dict["$CLKPERIOD$"] = [str(clk)]
         self.code_gen_dict["$EXTRA_DIRECTIVES$"] = self.ipgen_extra_directives()
@@ -372,6 +375,7 @@ def compile_singlenode_code(self):
         builder.append_includes("-I/workspace/finn/src/finn/qnn-data/cpp")
         builder.append_includes("-I/workspace/cnpy/")
         builder.append_includes("-I/workspace/finn-hlslib")
+        builder.append_includes("-I/workspace/finn/custom_hls")
         builder.append_includes("-I{}/include".format(os.environ["VIVADO_PATH"]))
         builder.append_includes("--std=c++11")
         builder.append_includes("-O3")
diff --git a/src/finn/custom_op/fpgadataflow/iodma.py b/src/finn/custom_op/fpgadataflow/iodma.py
index 857496a261..802c7e7851 100644
--- a/src/finn/custom_op/fpgadataflow/iodma.py
+++ b/src/finn/custom_op/fpgadataflow/iodma.py
@@ -26,12 +26,12 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import numpy as np
 import math
-from onnx import TensorProto, helper
+import numpy as np
+import warnings
+
 from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
-import warnings
 
 # the IODMA inerfaces a memory-mapped AXI interface and an AXI stream
 # direction "in": pulls data from AXI-MM to AXI stream
@@ -145,19 +145,7 @@ def make_shape_compatible_op(self, model):
         oshape = self.get_normal_output_shape()
         ishape = tuple(model.get_tensor_shape(self.onnx_node.input[0]))
         assert ishape == exp_ishape, "Unexpected input shape."
-        # implement tensor with correct shape
-        values = np.random.randn(*oshape).astype(np.float32)
-        return helper.make_node(
-            "Constant",
-            inputs=[],
-            outputs=[self.onnx_node.output[0]],
-            value=helper.make_tensor(
-                name="const_tensor",
-                data_type=TensorProto.FLOAT,
-                dims=values.shape,
-                vals=values.flatten().astype(float),
-            ),
-        )
+        return super().make_const_shape_op(oshape)
 
     def infer_node_datatype(self, model):
         node = self.onnx_node
diff --git a/src/finn/custom_op/fpgadataflow/labelselect_batch.py b/src/finn/custom_op/fpgadataflow/labelselect_batch.py
index 1640e2f27c..1eb5962fdb 100644
--- a/src/finn/custom_op/fpgadataflow/labelselect_batch.py
+++ b/src/finn/custom_op/fpgadataflow/labelselect_batch.py
@@ -26,15 +26,14 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import os
-
 import numpy as np
+import os
+from onnx import TensorProto, helper
 
 from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
-from onnx import TensorProto, helper
-from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy
 from finn.util.basic import roundup_to_integer_multiple
+from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy
 
 
 class LabelSelect_Batch(HLSCustomOp):
@@ -103,18 +102,14 @@ def make_shape_compatible_op(self, model):
         oshape = self.get_normal_output_shape()
         ishape = tuple(model.get_tensor_shape(self.onnx_node.input[0]))
         assert ishape == exp_ishape, "Unexpected input shape."
-        # implement tensor with correct shape
-        values = np.random.randn(*oshape).astype(np.int64)
         return helper.make_node(
-            "Constant",
+            "RandomNormal",
             inputs=[],
             outputs=[self.onnx_node.output[0]],
-            value=helper.make_tensor(
-                name="const_tensor",
-                data_type=TensorProto.INT64,
-                dims=values.shape,
-                vals=values.flatten(),
-            ),
+            mean=0.0,
+            scale=1.0,
+            dtype=TensorProto.INT64,
+            shape=list(oshape),
         )
 
     def infer_node_datatype(self, model):
diff --git a/src/finn/custom_op/fpgadataflow/lookup.py b/src/finn/custom_op/fpgadataflow/lookup.py
new file mode 100644
index 0000000000..27be06bdfa
--- /dev/null
+++ b/src/finn/custom_op/fpgadataflow/lookup.py
@@ -0,0 +1,338 @@
+# Copyright (c) 2020, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import numpy as np
+import os
+import warnings
+from math import ceil
+
+from finn.core.datatype import DataType
+from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
+from finn.util.data_packing import (
+    npy_to_rtlsim_input,
+    numpy_to_hls_code,
+    rtlsim_output_to_npy,
+)
+
+
+class Lookup(HLSCustomOp):
+    "Streaming elementwise HLS lookup, mapping indices to values."
+
+    def __init__(self, onnx_node):
+        super().__init__(onnx_node)
+
+    def get_nodeattr_types(self):
+        my_attrs = {
+            # Number of embeddings ("memory depth")
+            "NumEmbeddings": ("i", True, 0),
+            # Dimensionality of each embedding (part of "memory width")
+            "EmbeddingDim": ("i", True, 0),
+            # Datatype for embeddings (part of "memory width")
+            "EmbeddingType": ("s", True, ""),
+            # Datatype for inputs
+            "InputType": ("s", True, ""),
+            # Input shape
+            "InputShape": ("ints", False, [1]),
+        }
+        my_attrs.update(super().get_nodeattr_types())
+        return my_attrs
+
+    def get_exp_cycles(self):
+        n_inputs = np.prod(self.get_nodeattr("InputShape"))
+        exp_cycles = int(n_inputs)
+        return exp_cycles
+
+    def get_normal_input_shape(self):
+        return self.get_nodeattr("InputShape")
+
+    def get_normal_output_shape(self):
+        ishape = self.get_normal_input_shape()
+        oshape = list(ishape) + [self.get_nodeattr("EmbeddingDim")]
+        return tuple(oshape)
+
+    def get_folded_input_shape(self):
+        ishape = self.get_normal_input_shape()
+        folded_ishape = list(ishape) + [1]
+        return tuple(folded_ishape)
+
+    def get_folded_output_shape(self):
+        return self.get_normal_output_shape()
+
+    def make_shape_compatible_op(self, model):
+        exp_ishape = tuple(self.get_normal_input_shape())
+        oshape = tuple(self.get_normal_output_shape())
+        ishape = tuple(model.get_tensor_shape(self.onnx_node.input[0]))
+        assert ishape == exp_ishape, "Unexpected input shape for Lookup: %s vs %s" % (
+            str(exp_ishape),
+            str(ishape),
+        )
+        return super().make_const_shape_op(oshape)
+
+    def infer_node_datatype(self, model):
+        node = self.onnx_node
+        idt = model.get_tensor_datatype(node.input[0])
+        if idt != self.get_input_datatype():
+            warn_str = "InputType changing for %s: %s -> %s " % (
+                node.name,
+                str(self.get_input_datatype()),
+                str(idt),
+            )
+            warnings.warn(warn_str)
+        self.set_nodeattr("InputType", idt.name)
+        odt = DataType[self.get_nodeattr("EmbeddingType")]
+        model.set_tensor_datatype(node.output[0], odt)
+
+    def verify_node(self):
+        pass
+
+    def get_input_datatype(self):
+        ret = DataType[self.get_nodeattr("InputType")]
+        return ret
+
+    def get_output_datatype(self):
+        ret = DataType[self.get_nodeattr("EmbeddingType")]
+        return ret
+
+    def get_instream_width(self):
+        ibits = self.get_input_datatype().bitwidth()
+        return ibits
+
+    def get_outstream_width(self):
+        obits = self.get_output_datatype().bitwidth()
+        ofm_ch = self.get_nodeattr("EmbeddingDim")
+        return obits * ofm_ch
+
+    def get_number_output_values(self):
+        folded_oshape = self.get_folded_output_shape()
+        return np.prod(folded_oshape[:-1])
+
+    def global_includes(self):
+        global_incls = ['#include "lookup.hpp"']
+        global_incls.append('#include "embeddings.hpp"')
+        self.code_gen_dict["$GLOBALS$"] = global_incls
+
+    def defines(self, var):
+        n_inputs = np.prod(self.get_folded_input_shape()[:-1])
+        dtype = self.get_input_datatype()
+        elem_hls_type = dtype.get_hls_datatype_str()
+        emb_type = DataType[self.get_nodeattr("EmbeddingType")]
+        emb_hls_type = emb_type.get_hls_datatype_str()
+        my_defines = []
+        my_defines.append(
+            "#define NumEmbeddings %d" % self.get_nodeattr("NumEmbeddings")
+        )
+        my_defines.append("#define EmbeddingDim %d" % self.get_nodeattr("EmbeddingDim"))
+        my_defines.append("#define NumInputs %d" % n_inputs)
+        my_defines.append("#define InputType %s" % elem_hls_type)
+        my_defines.append("#define EmbeddingType %s" % emb_hls_type)
+        self.code_gen_dict["$DEFINES$"] = my_defines
+
+    def read_npy_data(self):
+        code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
+        dtype = self.get_input_datatype()
+        if dtype == DataType["BIPOLAR"]:
+            # use binary for bipolar storage
+            dtype = DataType["BINARY"]
+        elem_bits = dtype.bitwidth()
+        packed_bits = self.get_instream_width()
+        packed_hls_type = "ap_uint<%d>" % packed_bits
+        elem_hls_type = dtype.get_hls_datatype_str()
+        npy_type = "int64_t"
+        npy_in = "%s/input_0.npy" % code_gen_dir
+        self.code_gen_dict["$READNPYDATA$"] = []
+        self.code_gen_dict["$READNPYDATA$"].append(
+            'npy2apintstream<%s, %s, %d, %s>("%s", in0);'
+            % (packed_hls_type, elem_hls_type, elem_bits, npy_type, npy_in)
+        )
+
+    def dataoutstrm(self):
+        code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
+        dtype = self.get_output_datatype()
+        if dtype == DataType["BIPOLAR"]:
+            # use binary for bipolar storage
+            dtype = DataType["BINARY"]
+        elem_bits = dtype.bitwidth()
+        packed_bits = self.get_outstream_width()
+        packed_hls_type = "ap_uint<%d>" % packed_bits
+        elem_hls_type = dtype.get_hls_datatype_str()
+        npy_type = "float"
+        npy_out = "%s/output.npy" % code_gen_dir
+        oshape = self.get_folded_output_shape()
+        oshape_cpp_str = str(oshape).replace("(", "{").replace(")", "}")
+
+        self.code_gen_dict["$DATAOUTSTREAM$"] = [
+            'apintstream2npy<%s, %s, %d, %s>(out, %s, "%s");'
+            % (
+                packed_hls_type,
+                elem_hls_type,
+                elem_bits,
+                npy_type,
+                oshape_cpp_str,
+                npy_out,
+            )
+        ]
+
+    def save_as_npy(self):
+        self.code_gen_dict["$SAVEASCNPY$"] = []
+
+    def strm_decl(self):
+        self.code_gen_dict["$STREAMDECLARATIONS$"] = []
+        self.code_gen_dict["$STREAMDECLARATIONS$"].append(
+            'hls::stream<ap_uint<{}>> in0 ("in0");'.format(self.get_instream_width())
+        )
+        self.code_gen_dict["$STREAMDECLARATIONS$"].append(
+            'hls::stream<ap_uint<{}>> out ("out");'.format(self.get_outstream_width())
+        )
+
+    def docompute(self):
+        self.code_gen_dict["$DOCOMPUTE$"] = [
+            """StreamingLookup<NumEmbeddings,  EmbeddingDim, NumInputs,
+            InputType, EmbeddingType >(in0, out, embeddings);"""
+        ]
+
+    def blackboxfunction(self):
+        ibits = self.get_instream_width()
+        packed_input_hls_type = "ap_uint<%d>" % ibits
+        obits = self.get_outstream_width()
+        packed_output_hls_type = "ap_uint<%d>" % obits
+        self.code_gen_dict["$BLACKBOXFUNCTION$"] = [
+            "void %s(hls::stream<%s > &in0, hls::stream<%s > &out)"
+            % (self.onnx_node.name, packed_input_hls_type, packed_output_hls_type)
+        ]
+
+    def pragmas(self):
+        my_pragmas = ["#pragma HLS INTERFACE axis port=in0"]
+        my_pragmas.append("#pragma HLS INTERFACE axis port=out")
+        my_pragmas.append("#pragma HLS INTERFACE ap_ctrl_none port=return")
+        self.code_gen_dict["$PRAGMAS$"] = my_pragmas
+
+    def generate_params(self, model, path):
+        code_gen_dir = path
+        embeddings = model.get_initializer(self.onnx_node.input[1])
+        weight_filename = "{}/embeddings.hpp".format(code_gen_dir)
+        edt = DataType[self.get_nodeattr("EmbeddingType")]
+        # obits = self.get_outstream_width()
+        # packed_output_hls_type = "ap_uint<%d>" % obits
+        assert np.vectorize(edt.allowed)(
+            embeddings
+        ).all(), "Embeddings can't be expressed with type %s" % str(edt)
+        embeddings_hls_code = numpy_to_hls_code(
+            embeddings, edt, "embeddings", True, False
+        )
+        f_thresh = open(weight_filename, "w")
+        f_thresh.write(embeddings_hls_code)
+        f_thresh.close()
+
+    def execute_node(self, context, graph):
+        mode = self.get_nodeattr("exec_mode")
+        node = self.onnx_node
+        exp_ishape = tuple(self.get_normal_input_shape())
+        exp_oshape = tuple(self.get_normal_output_shape())
+        folded_ishape = tuple(self.get_folded_input_shape())
+        folded_oshape = tuple(self.get_folded_output_shape())
+
+        if mode == "cppsim":
+            code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
+        elif mode == "rtlsim":
+            code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen")
+        else:
+            raise Exception(
+                """Invalid value for attribute exec_mode! Is currently set to: {}
+            has to be set to one of the following value ("cppsim", "rtlsim")""".format(
+                    mode
+                )
+            )
+
+        inp = context[node.input[0]]
+        assert inp.dtype == np.int64, "Inputs must be contained in int64 ndarray"
+        assert inp.shape == exp_ishape, """Input shape doesn't match expected shape."""
+        export_idt = self.get_input_datatype()
+        odt = self.get_output_datatype()
+
+        reshaped_input = inp.reshape(folded_ishape)
+        np.save(os.path.join(code_gen_dir, "input_0.npy"), reshaped_input)
+
+        if mode == "cppsim":
+            # execute the precompiled model
+            super().exec_precompiled_singlenode_model()
+            # load output npy file
+            super().npy_to_dynamic_output(context)
+            assert (
+                context[node.output[0]].shape == folded_oshape
+            ), "cppsim did not produce expected folded output shape"
+            context[node.output[0]] = context[node.output[0]].reshape(*exp_oshape)
+        elif mode == "rtlsim":
+            sim = self.get_rtlsim()
+            nbits = self.get_instream_width()
+            rtlsim_inp = npy_to_rtlsim_input(
+                "{}/input_0.npy".format(code_gen_dir), export_idt, nbits
+            )
+            super().reset_rtlsim(sim)
+            super().toggle_clk(sim)
+            rtlsim_output = self.rtlsim(sim, rtlsim_inp)
+            target_bits = odt.bitwidth()
+            packed_bits = self.get_outstream_width()
+            out_npy_path = "{}/output.npy".format(code_gen_dir)
+            out_shape = self.get_folded_output_shape()
+            rtlsim_output_to_npy(
+                rtlsim_output,
+                out_npy_path,
+                odt,
+                out_shape,
+                packed_bits,
+                target_bits,
+                reverse_inner=False,
+            )
+            # load and reshape output
+            output = np.load(out_npy_path)
+            output = np.asarray([output], dtype=np.float32).reshape(*exp_oshape)
+            context[node.output[0]] = output
+        else:
+            raise Exception(
+                """Invalid value for attribute exec_mode! Is currently set to: {}
+            has to be set to one of the following value ("cppsim", "rtlsim")""".format(
+                    mode
+                )
+            )
+        assert (
+            context[node.output[0]].shape == exp_oshape
+        ), """Output shape doesn't match expected shape."""
+
+    def bram_estimation(self):
+        # current calculation assumes embeddings always stored in BRAM_18Ks
+        width_factor = ceil(self.get_outstream_width() / 16)
+        depth_factor = ceil(self.get_nodeattr("NumEmbeddings") / 1024)
+        return width_factor * depth_factor
+
+    def bram_efficiency_estimation(self):
+        bram16_est = self.bram_estimation()
+        if bram16_est == 0:
+            return 1
+        ebits = self.get_outstream_width() * self.get_nodeattr("NumEmbeddings")
+        bram16_est_capacity = bram16_est * 18 * 1024
+        return ebits / bram16_est_capacity
diff --git a/src/finn/custom_op/fpgadataflow/pool_batch.py b/src/finn/custom_op/fpgadataflow/pool_batch.py
index edba084b52..ba8a446f2c 100644
--- a/src/finn/custom_op/fpgadataflow/pool_batch.py
+++ b/src/finn/custom_op/fpgadataflow/pool_batch.py
@@ -26,12 +26,11 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import os
 import numpy as np
+import os
 
-from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
 from finn.core.datatype import DataType
-from onnx import TensorProto, helper
+from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
 from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy
 
 
@@ -163,19 +162,7 @@ def make_shape_compatible_op(self, model):
         oshape = self.get_normal_output_shape()
         ishape = tuple(model.get_tensor_shape(self.onnx_node.input[0]))
         assert ishape == exp_ishape, "Unexpected input shape for Pool_Batch."
-        # implement tensor with correct shape
-        values = np.random.randn(*oshape).astype(np.float32)
-        return helper.make_node(
-            "Constant",
-            inputs=[],
-            outputs=[self.onnx_node.output[0]],
-            value=helper.make_tensor(
-                name="const_tensor",
-                data_type=TensorProto.FLOAT,
-                dims=values.shape,
-                vals=values.flatten().astype(float),
-            ),
-        )
+        return super().make_const_shape_op(oshape)
 
     def infer_node_datatype(self, model):
         node = self.onnx_node
@@ -235,9 +222,9 @@ def defines(self, var):
     def read_npy_data(self):
         code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
         dtype = self.get_input_datatype()
-        if dtype == DataType.BIPOLAR:
+        if dtype == DataType["BIPOLAR"]:
             # use binary for bipolar storage
-            dtype = DataType.BINARY
+            dtype = DataType["BINARY"]
         elem_bits = dtype.bitwidth()
         packed_bits = self.get_instream_width()
         packed_hls_type = "ap_uint<%d>" % packed_bits
@@ -296,9 +283,9 @@ def docompute(self):
     def dataoutstrm(self):
         code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
         dtype = self.get_output_datatype()
-        if dtype == DataType.BIPOLAR:
+        if dtype == DataType["BIPOLAR"]:
             # use binary for bipolar storage
-            dtype = DataType.BINARY
+            dtype = DataType["BINARY"]
         elem_bits = dtype.bitwidth()
         packed_bits = self.get_outstream_width()
         packed_hls_type = "ap_uint<%d>" % packed_bits
diff --git a/src/finn/custom_op/fpgadataflow/streamingdataflowpartition.py b/src/finn/custom_op/fpgadataflow/streamingdataflowpartition.py
index 53446ff1f2..cf065cf156 100644
--- a/src/finn/custom_op/fpgadataflow/streamingdataflowpartition.py
+++ b/src/finn/custom_op/fpgadataflow/streamingdataflowpartition.py
@@ -47,6 +47,7 @@ def get_nodeattr_types(self):
             "partition_id": ("i", False, 0),
             "device_id": ("i", False, 0),
             "mem_port": ("s", False, ""),
+            "instance_name": ("s", False, ""),
         }
 
     def make_shape_compatible_op(self, model):
diff --git a/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py b/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py
index 4d84b74dce..1791706afa 100644
--- a/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py
+++ b/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py
@@ -26,13 +26,13 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import os
-import numpy as np
 import math
+import numpy as np
+import os
 import warnings
-from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
+
 from finn.core.datatype import DataType
-from onnx import TensorProto, helper
+from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
 from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy
 
 # does not do anything at the ONNX node-by-node level, and input-output
@@ -164,19 +164,7 @@ def make_shape_compatible_op(self, model):
         oshape = self.get_normal_output_shape()
         ishape = tuple(model.get_tensor_shape(self.onnx_node.input[0]))
         assert ishape == tuple(exp_ishape), "Unexpect input shape for StreamingDWC."
-        # implement tensor with correct shape
-        values = np.random.randn(*oshape).astype(np.float32)
-        return helper.make_node(
-            "Constant",
-            inputs=[],
-            outputs=[self.onnx_node.output[0]],
-            value=helper.make_tensor(
-                name="const_tensor",
-                data_type=TensorProto.FLOAT,
-                dims=values.shape,
-                vals=values.flatten().astype(float),
-            ),
-        )
+        return super().make_const_shape_op(oshape)
 
     def infer_node_datatype(self, model):
         node = self.onnx_node
@@ -227,9 +215,9 @@ def defines(self, var):
     def read_npy_data(self):
         code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
         dtype = self.get_input_datatype()
-        if dtype == DataType.BIPOLAR:
+        if dtype == DataType["BIPOLAR"]:
             # use binary for bipolar storage
-            dtype = DataType.BINARY
+            dtype = DataType["BINARY"]
         elem_bits = dtype.bitwidth()
         packed_bits = self.get_instream_width()
         packed_hls_type = "ap_uint<%d>" % packed_bits
@@ -261,9 +249,9 @@ def docompute(self):
     def dataoutstrm(self):
         code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
         dtype = self.get_output_datatype()
-        if dtype == DataType.BIPOLAR:
+        if dtype == DataType["BIPOLAR"]:
             # use binary for bipolar storage
-            dtype = DataType.BINARY
+            dtype = DataType["BINARY"]
         elem_bits = dtype.bitwidth()
         packed_bits = self.get_outstream_width()
         packed_hls_type = "ap_uint<%d>" % packed_bits
@@ -330,10 +318,10 @@ def execute_node(self, context, graph):
             exp_shape
         ), "Input shape does not match expected shape."
 
-        if self.get_input_datatype() == DataType.BIPOLAR:
+        if self.get_input_datatype() == DataType["BIPOLAR"]:
             # store bipolar activations as binary
             inp = (inp + 1) / 2
-            export_idt = DataType.BINARY
+            export_idt = DataType["BINARY"]
         else:
             export_idt = self.get_input_datatype()
         # reshape input into folded shape
@@ -376,7 +364,7 @@ def execute_node(self, context, graph):
                 )
             )
         # binary -> bipolar if needed
-        if self.get_output_datatype() == DataType.BIPOLAR:
+        if self.get_output_datatype() == DataType["BIPOLAR"]:
             out = context[node.output[0]]
             out = 2 * out - 1
             context[node.output[0]] = out
diff --git a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py
index 48b40f105c..68cd1ff9ea 100644
--- a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py
+++ b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py
@@ -26,27 +26,27 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import warnings
 import math
-import os
 import numpy as np
+import os
+import textwrap
+import warnings
 
-from onnx import TensorProto, helper
 from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
 from finn.util.basic import (
+    calculate_matvec_accumulator_range,
     interleave_matrix_outer_dim_from_partitions,
     roundup_to_integer_multiple,
-    calculate_matvec_accumulator_range,
 )
 from finn.util.data_packing import (
     npy_to_rtlsim_input,
     numpy_to_hls_code,
-    rtlsim_output_to_npy,
     pack_innermost_dim_as_hex_string,
+    rtlsim_output_to_npy,
 )
+
 from . import templates
-import textwrap
 
 # ONNX i/o tensor shape assumptions for StreamingFCLayer:
 # input 0 is the input tensor, shape (.., i_size) = (..., MW)
@@ -104,6 +104,16 @@ def get_nodeattr_types(self):
                 "auto",
                 {"auto", "block", "distributed", "ultra"},
             ),
+            # FPGA resource type for threshold memories (if noActivation is False)
+            # auto -- let Vivado decide
+            # block -- use BRAM
+            # distributed -- use LUTRAM
+            "ram_style_thresholds": (
+                "s",
+                False,
+                "auto",
+                {"auto", "block", "distributed"},
+            ),
             # (mem_mode = decoupled only) whether weights will be writable through
             # an AXI-lite interface during runtime
             # 1 for enabled, 0 for disabled.
@@ -140,19 +150,7 @@ def calc_tmem(self):
 
     def make_shape_compatible_op(self, model):
         oshape = self.get_normal_output_shape()
-        # implement tensor with correct shape
-        values = np.random.randn(*oshape).astype(np.float32)
-        return helper.make_node(
-            "Constant",
-            inputs=[],
-            outputs=[self.onnx_node.output[0]],
-            value=helper.make_tensor(
-                name="const_tensor",
-                data_type=TensorProto.FLOAT,
-                dims=values.shape,
-                vals=values.flatten().astype(float),
-            ),
-        )
+        return super().make_const_shape_op(oshape)
 
     def infer_node_datatype(self, model):
         node = self.onnx_node
@@ -238,9 +236,10 @@ def uram_estimation(self):
         mem_width = Q * W * P
         mmode = self.get_nodeattr("mem_mode")
         mstyle = self.get_nodeattr("ram_style")
-        if (mmode == "decoupled" and mstyle != "ultra") or (
-            mmode == "const" and self.calc_wmem() <= 128) or (
-            mmode == "external"
+        if (
+            (mmode == "decoupled" and mstyle != "ultra")
+            or (mmode == "const" and self.calc_wmem() <= 128)
+            or (mmode == "external")
         ):
             return 0
         width_multiplier = math.ceil(mem_width / 72)
@@ -266,9 +265,10 @@ def bram_estimation(self):
         mem_width = Q * W * P
         mmode = self.get_nodeattr("mem_mode")
         mstyle = self.get_nodeattr("ram_style")
-        if (mmode == "decoupled" and mstyle in ["distributed", "ultra"]) or (
-            mmode == "const" and self.calc_wmem() <= 128) or (
-            mmode == "external"
+        if (
+            (mmode == "decoupled" and mstyle in ["distributed", "ultra"])
+            or (mmode == "const" and self.calc_wmem() <= 128)
+            or (mmode == "external")
         ):
             return 0
         # assuming SDP mode RAMB18s (see UG573 Table 1-10)
@@ -496,15 +496,15 @@ def get_template_param_values(self):
         ret = dict()
         inp_hls_str = self.get_input_datatype().get_hls_datatype_str()
         out_hls_str = self.get_output_datatype().get_hls_datatype_str()
-        inp_is_binary = self.get_input_datatype() == DataType.BINARY
-        # out_is_binary = self.get_output_datatype() == DataType.BINARY
-        wt_is_binary = self.get_weight_datatype() == DataType.BINARY
+        inp_is_binary = self.get_input_datatype() == DataType["BINARY"]
+        # out_is_binary = self.get_output_datatype() == DataType["BINARY"]
+        wt_is_binary = self.get_weight_datatype() == DataType["BINARY"]
         bin_xnor_mode = self.get_nodeattr("binaryXnorMode") == 1
         if (inp_is_binary or wt_is_binary) and (not bin_xnor_mode):
             raise Exception("True binary (non-bipolar) inputs not yet supported")
-        inp_is_bipolar = self.get_input_datatype() == DataType.BIPOLAR
-        # out_is_bipolar = self.get_output_datatype() == DataType.BIPOLAR
-        wt_is_bipolar = self.get_weight_datatype() == DataType.BIPOLAR
+        inp_is_bipolar = self.get_input_datatype() == DataType["BIPOLAR"]
+        # out_is_bipolar = self.get_output_datatype() == DataType["BIPOLAR"]
+        wt_is_bipolar = self.get_weight_datatype() == DataType["BIPOLAR"]
         # reinterpret inp/wt as bipolar if bin_xnor_mode is iset
         inp_is_bipolar = inp_is_bipolar or (inp_is_binary and bin_xnor_mode)
         wt_is_bipolar = wt_is_bipolar or (wt_is_binary and bin_xnor_mode)
@@ -554,7 +554,7 @@ def get_hls_compatible_weight_tensor(self, orig_weight_matrix):
         # ONNX uses (in_features, out_features) and matmul(x, W)
         # finn-hlslib uses (out_features, in_features) and matmul(W, x)
         ret = orig_weight_matrix.T
-        if self.get_weight_datatype() == DataType.BIPOLAR:
+        if self.get_weight_datatype() == DataType["BIPOLAR"]:
             # convert bipolar to binary
             ret = (ret + 1) / 2
         # interleave rows between PEs and reshape
@@ -601,12 +601,14 @@ def minimize_accumulator_width(self, model):
                 if abs(tdt_min) > tdt_max:
                     tdt = DataType.get_smallest_possible(tdt_min)
                 else:
-                    tdt = DataType.get_smallest_possible(0 - tdt_max)
+                    tdt = DataType.get_smallest_possible(-tdt_max - 1)
             else:
                 tdt = DataType.get_smallest_possible(tdt_max)
-            assert np.vectorize(tdt.allowed)(threshold_tensor).all(), (
-                "Thresholds in %s can't be expressed with type %s"
-                % (self.onnx_node.name, str(tdt))
+            assert np.vectorize(tdt.allowed)(
+                threshold_tensor
+            ).all(), "Thresholds in %s can't be expressed with type %s" % (
+                self.onnx_node.name,
+                str(tdt),
             )
             self.set_nodeattr("accDataType", tdt.name)
         else:
@@ -614,7 +616,7 @@ def minimize_accumulator_width(self, model):
                 if abs(acc_min) > acc_max:
                     adt = DataType.get_smallest_possible(acc_min)
                 else:
-                    adt = DataType.get_smallest_possible(0 - acc_max)
+                    adt = DataType.get_smallest_possible(-acc_max - 1)
             else:
                 adt = DataType.get_smallest_possible(acc_max)
             # ensure a datatype divisible by 8-bits in case this is the last node
@@ -643,11 +645,11 @@ def get_hls_compatible_threshold_tensor(self, orig_thres_matrix):
         ), """Threshold matrix dimension is
         not as expected (2)."""
         n_thres_steps = orig_thres_matrix.shape[1]
-        inp_is_bipolar = self.get_input_datatype() == DataType.BIPOLAR
-        wt_is_bipolar = self.get_weight_datatype() == DataType.BIPOLAR
+        inp_is_bipolar = self.get_input_datatype() == DataType["BIPOLAR"]
+        wt_is_bipolar = self.get_weight_datatype() == DataType["BIPOLAR"]
         # reinterpret inp/wt as bipolar if bin_xnor_mode is iset
-        inp_is_binary = self.get_input_datatype() == DataType.BINARY
-        wt_is_binary = self.get_weight_datatype() == DataType.BINARY
+        inp_is_binary = self.get_input_datatype() == DataType["BINARY"]
+        wt_is_binary = self.get_weight_datatype() == DataType["BINARY"]
         bin_xnor_mode = self.get_nodeattr("binaryXnorMode") == 1
         inp_is_bipolar = inp_is_bipolar or (inp_is_binary and bin_xnor_mode)
         wt_is_bipolar = wt_is_bipolar or (wt_is_binary and bin_xnor_mode)
@@ -658,7 +660,7 @@ def get_hls_compatible_threshold_tensor(self, orig_thres_matrix):
             assert (orig_thres_matrix.astype(np.int32) == orig_thres_matrix).all()
         ret = orig_thres_matrix
         # workaround for vivado_hls threshold bug
-        if ret[0][0] == 0:
+        if ret[0][0] == 0 and n_thres_steps == 1:
             ret = np.copy(ret)
             ret[0][0] = 1
             warnings.warn(
@@ -702,8 +704,8 @@ def make_weight_file(self, weights, weight_file_mode, weight_file_name):
         export_wdt = self.get_weight_datatype()
         # we have converted bipolar weights to binary for export,
         # so use it as such for weight generation
-        if self.get_weight_datatype() == DataType.BIPOLAR:
-            export_wdt = DataType.BINARY
+        if self.get_weight_datatype() == DataType["BIPOLAR"]:
+            export_wdt = DataType["BINARY"]
         if weight_file_mode == "hls_header":
             weight_hls_code = numpy_to_hls_code(
                 weight_tensor, export_wdt, "weights", True, True
@@ -832,20 +834,22 @@ def generate_params(self, model, path):
             if thresholds is not None:
                 threshold_tensor = self.get_hls_compatible_threshold_tensor(thresholds)
                 # use UINT32 threshold export for bipolar times bipolar
-                inp_is_bipolar = self.get_input_datatype() == DataType.BIPOLAR
-                wt_is_bipolar = self.get_weight_datatype() == DataType.BIPOLAR
+                inp_is_bipolar = self.get_input_datatype() == DataType["BIPOLAR"]
+                wt_is_bipolar = self.get_weight_datatype() == DataType["BIPOLAR"]
                 # reinterpret inp/wt as bipolar if bin_xnor_mode is iset
-                inp_is_binary = self.get_input_datatype() == DataType.BINARY
-                wt_is_binary = self.get_weight_datatype() == DataType.BINARY
+                inp_is_binary = self.get_input_datatype() == DataType["BINARY"]
+                wt_is_binary = self.get_weight_datatype() == DataType["BINARY"]
                 bin_xnor_mode = self.get_nodeattr("binaryXnorMode") == 1
                 inp_is_bipolar = inp_is_bipolar or (inp_is_binary and bin_xnor_mode)
                 wt_is_bipolar = wt_is_bipolar or (wt_is_binary and bin_xnor_mode)
                 # get computed threshold datatype from attribute
                 tdt = DataType[self.get_nodeattr("accDataType")]
 
-                assert np.vectorize(tdt.allowed)(threshold_tensor).all(), (
-                    "Thresholds in %s can't be expressed with type %s"
-                    % (self.onnx_node.name, str(tdt))
+                assert np.vectorize(tdt.allowed)(
+                    threshold_tensor
+                ).all(), "Thresholds in %s can't be expressed with type %s" % (
+                    self.onnx_node.name,
+                    str(tdt),
                 )
                 thresholds_hls_code = numpy_to_hls_code(
                     threshold_tensor, tdt, "thresholds", False, True
@@ -855,8 +859,8 @@ def generate_params(self, model, path):
                 tdt_hls = tdt.get_hls_datatype_str()
                 # use binary to export bipolar activations
                 export_odt = self.get_output_datatype()
-                if self.get_output_datatype() == DataType.BIPOLAR:
-                    export_odt = DataType.BINARY
+                if self.get_output_datatype() == DataType["BIPOLAR"]:
+                    export_odt = DataType["BINARY"]
                 odt_hls = export_odt.get_hls_datatype_str()
                 f_thresh.write(
                     "static ThresholdsActivation<{},{},{},{},{},{},{}> threshs \
@@ -904,10 +908,10 @@ def execute_node(self, context, graph):
                 not float32 as expected."""
                 expected_inp_shape = self.get_folded_input_shape()
                 reshaped_input = context[inputs].reshape(expected_inp_shape)
-                if self.get_input_datatype() == DataType.BIPOLAR:
+                if self.get_input_datatype() == DataType["BIPOLAR"]:
                     # store bipolar activations as binary
                     reshaped_input = (reshaped_input + 1) / 2
-                    export_idt = DataType.BINARY
+                    export_idt = DataType["BINARY"]
                 else:
                     export_idt = self.get_input_datatype()
                 # make copy before saving the array
@@ -926,7 +930,7 @@ def execute_node(self, context, graph):
             # load output npy file
             super().npy_to_dynamic_output(context)
             # reinterpret binary output as bipolar where needed
-            if self.get_output_datatype() == DataType.BIPOLAR:
+            if self.get_output_datatype() == DataType["BIPOLAR"]:
                 out = context[node.output[0]]
                 out = 2 * out - 1
                 context[node.output[0]] = out
@@ -949,8 +953,8 @@ def execute_node(self, context, graph):
                 export_wdt = self.get_weight_datatype()
                 # we have converted bipolar weights to binary for export,
                 # so use it as such for weight generation
-                if self.get_weight_datatype() == DataType.BIPOLAR:
-                    export_wdt = DataType.BINARY
+                if self.get_weight_datatype() == DataType["BIPOLAR"]:
+                    export_wdt = DataType["BINARY"]
                 wei = npy_to_rtlsim_input(
                     "{}/weights.npy".format(code_gen_dir), export_wdt, wnbits
                 )
@@ -1005,6 +1009,17 @@ def global_includes(self):
             self.code_gen_dict["$GLOBALS$"] += ['#include "thresh.h"']
 
     def defines(self, var):
+        # Only ipgen mode: Make sure that SIMD parameter satisfies minimum requirements.
+        if var == "ipgen":
+            SIMD = self.get_nodeattr("SIMD")
+            MW = self.get_nodeattr("MW")
+            condition = SIMD >= (MW / 1024)
+            msg = (
+                f"HLS synthesis of StreamingFCLayer_Batch requires: "
+                f"SIMD >= MW / 1024. This is not fulfilled with: SIMD={SIMD} "
+                f"and MW={MW} for node: {self.onnx_node.name}."
+            )
+            assert condition, msg
         mem_mode = self.get_nodeattr("mem_mode")
         numInputVectors = list(self.get_nodeattr("numInputVectors"))
         numReps = np.prod(numInputVectors)
@@ -1030,9 +1045,9 @@ def defines(self, var):
     def read_npy_data(self):
         code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
         dtype = self.get_input_datatype()
-        if dtype == DataType.BIPOLAR:
+        if dtype == DataType["BIPOLAR"]:
             # use binary for bipolar storage
-            dtype = DataType.BINARY
+            dtype = DataType["BINARY"]
         elem_bits = dtype.bitwidth()
         packed_bits = self.get_instream_width()
         packed_hls_type = "ap_uint<%d>" % packed_bits
@@ -1106,8 +1121,8 @@ def docompute(self):
             ]
         elif mem_mode == "decoupled" or mem_mode == "external":
             wdt = self.get_weight_datatype()
-            if wdt == DataType.BIPOLAR:
-                export_wdt = DataType.BINARY
+            if wdt == DataType["BIPOLAR"]:
+                export_wdt = DataType["BINARY"]
             else:
                 export_wdt = wdt
             wdtype_hls_str = export_wdt.get_hls_datatype_str()
@@ -1132,9 +1147,9 @@ def docompute(self):
     def dataoutstrm(self):
         code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
         dtype = self.get_output_datatype()
-        if dtype == DataType.BIPOLAR:
+        if dtype == DataType["BIPOLAR"]:
             # use binary for bipolar storage
-            dtype = DataType.BINARY
+            dtype = DataType["BINARY"]
         elem_bits = dtype.bitwidth()
         packed_bits = self.get_outstream_width()
         packed_hls_type = "ap_uint<%d>" % packed_bits
@@ -1194,6 +1209,7 @@ def blackboxfunction(self):
 
     def pragmas(self):
         mem_mode = self.get_nodeattr("mem_mode")
+        ram_style_thresholds = self.get_nodeattr("ram_style_thresholds")
         self.code_gen_dict["$PRAGMAS$"] = ["#pragma HLS INTERFACE axis port=in0"]
         self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS INTERFACE axis port=out")
         in_fifo_depth = self.get_nodeattr("inFIFODepth")
@@ -1252,6 +1268,28 @@ def pragmas(self):
                     "complete dim=3"
                 )
             )
+            # add resource pragma for thresholds if set
+            if ram_style_thresholds == "distributed":
+                self.code_gen_dict["$PRAGMAS$"].append(
+                    (
+                        "#pragma HLS RESOURCE variable=threshs.m_thresholds "
+                        "core=ROM_2P_LUTRAM"
+                    )
+                )
+            elif ram_style_thresholds == "block":
+                self.code_gen_dict["$PRAGMAS$"].append(
+                    (
+                        "#pragma HLS RESOURCE variable=threshs.m_thresholds "
+                        "core=ROM_2P_BRAM"
+                    )
+                )
+            elif ram_style_thresholds == "auto":
+                # no pragma needed
+                pass
+            else:
+                raise Exception(
+                    "Unrecognized ram_style_thresholds value:" + ram_style_thresholds
+                )
 
     def code_generation_ipi(self):
         cmd = []
diff --git a/src/finn/custom_op/fpgadataflow/streamingfifo.py b/src/finn/custom_op/fpgadataflow/streamingfifo.py
index 133a869b28..91f6ed5b8d 100644
--- a/src/finn/custom_op/fpgadataflow/streamingfifo.py
+++ b/src/finn/custom_op/fpgadataflow/streamingfifo.py
@@ -25,16 +25,15 @@
 # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-import os
+import math
 import numpy as np
-from shutil import copy
+import os
 import subprocess
-import math
 import warnings
+from shutil import copy
 
-from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
 from finn.core.datatype import DataType
-from onnx import TensorProto, helper
+from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
 from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy
 
 from . import templates
@@ -78,19 +77,7 @@ def make_shape_compatible_op(self, model):
         oshape = self.get_normal_output_shape()
         ishape = tuple(model.get_tensor_shape(self.onnx_node.input[0]))
         assert ishape == tuple(exp_ishape), "Unexpect input shape for StreamingFIFO."
-        # implement tensor with correct shape
-        values = np.random.randn(*oshape).astype(np.float32)
-        return helper.make_node(
-            "Constant",
-            inputs=[],
-            outputs=[self.onnx_node.output[0]],
-            value=helper.make_tensor(
-                name="const_tensor",
-                data_type=TensorProto.FLOAT,
-                dims=values.shape,
-                vals=values.flatten().astype(float),
-            ),
-        )
+        return super().make_const_shape_op(oshape)
 
     def infer_node_datatype(self, model):
         node = self.onnx_node
@@ -261,10 +248,10 @@ def execute_node(self, context, graph):
                 not float32 as expected."""
             expected_inp_shape = self.get_folded_input_shape()
             reshaped_input = inp.reshape(expected_inp_shape)
-            if DataType[self.get_nodeattr("dataType")] == DataType.BIPOLAR:
+            if DataType[self.get_nodeattr("dataType")] == DataType["BIPOLAR"]:
                 # store bipolar activations as binary
                 reshaped_input = (reshaped_input + 1) / 2
-                export_idt = DataType.BINARY
+                export_idt = DataType["BINARY"]
             else:
                 export_idt = DataType[self.get_nodeattr("dataType")]
             # make copy before saving the array
diff --git a/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py b/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py
index 07e1197af5..1e66a5c204 100644
--- a/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py
+++ b/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py
@@ -26,13 +26,13 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import os
 import numpy as np
+import os
 import warnings
+
+from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
 from finn.custom_op.general.im2col import compute_conv_output_dim
-from finn.core.datatype import DataType
-from onnx import TensorProto, helper
 from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy
 
 
@@ -41,8 +41,8 @@ class StreamingMaxPool_Batch(HLSCustomOp):
 
     def get_nodeattr_types(self):
         my_attrs = {
-            "ImgDim": ("i", True, 0),
-            "PoolDim": ("i", True, 0),
+            "ImgDim": ("ints", True, []),  # [H, W] = [Y, X]
+            "PoolDim": ("ints", True, []),  # [H, W] = [Y, X]
             "NumChannels": ("i", True, 0),
             # FINN DataTypes for inputs/outputs
             "dataType": ("s", True, ""),
@@ -58,10 +58,27 @@ def get_output_datatype(self):
         """Returns FINN DataType of output."""
         return DataType[self.get_nodeattr("dataType")]
 
-    def get_normal_input_shape(self):
+    def get_1d_attrs_normalized(self):
+        # support both (1, D) and (D, 1) cases transparently:
+        # assume the dummy ('1') dimension is the Y-dimension, i.e.
+        # images and kernels (and their attributes) of dimension
+        # [H, W] = [Y, X] = [D, 1] or [1, D] are always mapped to [1, D]
         ifm_dim = self.get_nodeattr("ImgDim")
+        k = self.get_nodeattr("PoolDim")
+        ifm_ch = self.get_nodeattr("NumChannels")
+        if ifm_dim[1] == 1:
+            ifm_dim = ifm_dim[::-1]
+            k = k[::-1]
+        return (ifm_dim, k, ifm_ch)
+
+    def is_1d(self):
+        ifm_dim, k, ifm_ch = self.get_1d_attrs_normalized()
+        return (ifm_dim[0] == 1) and (k[0] == 1)
+
+    def get_normal_input_shape(self):
+        ifm_dim_h, ifm_dim_w = self.get_nodeattr("ImgDim")
         ifm_ch = self.get_nodeattr("NumChannels")
-        ishape = (1, ifm_dim, ifm_dim, ifm_ch)
+        ishape = (1, ifm_dim_h, ifm_dim_w, ifm_ch)
         return ishape
 
     def get_folded_input_shape(self):
@@ -73,14 +90,17 @@ def get_folded_input_shape(self):
         return tuple(ret)
 
     def get_normal_output_shape(self):
-        k = self.get_nodeattr("PoolDim")
-        ifm_dim = self.get_nodeattr("ImgDim")
+        ifm_dim_h, ifm_dim_w = self.get_nodeattr("ImgDim")
+        k_h, k_w = tuple(self.get_nodeattr("PoolDim"))
         ifm_ch = self.get_nodeattr("NumChannels")
-        stride = k
+        stride_h = k_h
+        stride_w = k_w
         pad = 0
-        assert ifm_dim % k == 0, "StreamingMaxPool needs ImgDim % PoolDim == 0"
-        ofm_dim = compute_conv_output_dim(ifm_dim, k, stride, pad)
-        oshape = (1, ofm_dim, ofm_dim, ifm_ch)
+        assert ifm_dim_h % k_h == 0, "StreamingMaxPool needs ImgDim_h % PoolDim_h == 0"
+        assert ifm_dim_w % k_w == 0, "StreamingMaxPool needs ImgDim_w % PoolDim_w == 0"
+        ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, pad)
+        ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, pad)
+        oshape = (1, ofm_dim_h, ofm_dim_w, ifm_ch)
         return oshape
 
     def get_folded_output_shape(self):
@@ -97,9 +117,12 @@ def get_number_output_values(self):
 
     def get_exp_cycles(self):
         # derived from StreamingMaxPool_Batch loop nest
-        k = self.get_nodeattr("PoolDim")
-        ifm_dim = self.get_nodeattr("ImgDim")
-        return int(ifm_dim * (ifm_dim + (ifm_dim / k)))
+        ifm_dim, k, ifm_ch = self.get_1d_attrs_normalized()
+        if self.is_1d():
+            return int(ifm_dim[1] + k[1])
+        else:
+            # TODO: adjust inaccurate formula
+            return int(ifm_dim[1] * (ifm_dim[1] + (ifm_dim[1] / k[1])))
 
     def get_instream_width(self):
         dt_bits = self.get_input_datatype().bitwidth()
@@ -116,19 +139,7 @@ def make_shape_compatible_op(self, model):
         oshape = self.get_normal_output_shape()
         ishape = tuple(model.get_tensor_shape(self.onnx_node.input[0]))
         assert ishape == exp_ishape, "Unexpect input shape for StreamingMaxPool."
-        # implement tensor with correct shape
-        values = np.random.randn(*oshape).astype(np.float32)
-        return helper.make_node(
-            "Constant",
-            inputs=[],
-            outputs=[self.onnx_node.output[0]],
-            value=helper.make_tensor(
-                name="const_tensor",
-                data_type=TensorProto.FLOAT,
-                dims=values.shape,
-                vals=values.flatten().astype(float),
-            ),
-        )
+        return super().make_const_shape_op(oshape)
 
     def infer_node_datatype(self, model):
         node = self.onnx_node
@@ -166,11 +177,13 @@ def global_includes(self):
 
     def defines(self, var):
         numReps = 2
+        ifm_dim, k, ifm_ch = self.get_1d_attrs_normalized()
+
         self.code_gen_dict["$DEFINES$"] = [
             """#define ImgDim {}\n #define PoolDim {}\n
             #define NumChannels {}\n #define numReps {}""".format(
-                self.get_nodeattr("ImgDim"),
-                self.get_nodeattr("PoolDim"),
+                ifm_dim[1],
+                k[1],
                 self.get_nodeattr("NumChannels"),
                 numReps,
             )
@@ -179,9 +192,9 @@ def defines(self, var):
     def read_npy_data(self):
         code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
         dtype = self.get_input_datatype()
-        if dtype == DataType.BIPOLAR:
+        if dtype == DataType["BIPOLAR"]:
             # use binary for bipolar storage
-            dtype = DataType.BINARY
+            dtype = DataType["BINARY"]
         elem_bits = dtype.bitwidth()
         packed_bits = self.get_instream_width()
         packed_hls_type = "ap_uint<%d>" % packed_bits
@@ -206,12 +219,18 @@ def strm_decl(self):
     def docompute(self):
         dtype = self.get_input_datatype()
         if dtype.bitwidth() == 1:
-            op = "StreamingMaxPool_Batch"
+            if self.is_1d():
+                raise Exception("Binary 1d MaxPool not implemented on HLS backend")
+            else:
+                op = "StreamingMaxPool_Batch"
             self.code_gen_dict["$DOCOMPUTE$"] = [
                 "%s<ImgDim, PoolDim, NumChannels>(in0, out, numReps);" % (op)
             ]
         else:
-            op = "StreamingMaxPool_Precision_Batch"
+            if self.is_1d():
+                op = "StreamingMaxPool_Precision_Batch_1d"
+            else:
+                op = "StreamingMaxPool_Precision_Batch"
             dtype = self.get_input_datatype()
             dtype_hls = dtype.get_hls_datatype_str()
             minval_str = str(int(dtype.min()))
@@ -223,9 +242,9 @@ def docompute(self):
     def dataoutstrm(self):
         code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
         dtype = self.get_output_datatype()
-        if dtype == DataType.BIPOLAR:
+        if dtype == DataType["BIPOLAR"]:
             # use binary for bipolar storage
-            dtype = DataType.BINARY
+            dtype = DataType["BINARY"]
         elem_bits = dtype.bitwidth()
         packed_bits = self.get_outstream_width()
         packed_hls_type = "ap_uint<%d>" % packed_bits
@@ -291,10 +310,10 @@ def execute_node(self, context, graph):
             inp.shape == exp_ishape
         ), """Input shape doesn't
         match expected shape (1, ifm_dim, ifm_dim, ifm_ch)."""
-        if self.get_input_datatype() == DataType.BIPOLAR:
+        if self.get_input_datatype() == DataType["BIPOLAR"]:
             # store bipolar activations as binary
             inp = (inp + 1) / 2
-            export_idt = DataType.BINARY
+            export_idt = DataType["BINARY"]
         else:
             export_idt = self.get_input_datatype()
         # no reshaping for input since assuming no folding on input
@@ -341,7 +360,7 @@ def execute_node(self, context, graph):
                 )
             )
         # binary -> bipolar if needed
-        if self.get_output_datatype() == DataType.BIPOLAR:
+        if self.get_output_datatype() == DataType["BIPOLAR"]:
             out = context[node.output[0]]
             out = 2 * out - 1
             context[node.output[0]] = out
diff --git a/src/finn/custom_op/fpgadataflow/templates.py b/src/finn/custom_op/fpgadataflow/templates.py
index 40221ce3b3..e253348598 100644
--- a/src/finn/custom_op/fpgadataflow/templates.py
+++ b/src/finn/custom_op/fpgadataflow/templates.py
@@ -88,12 +88,13 @@
 set config_proj_part "$FPGAPART$"
 
 set config_bnnlibdir "$FINNHLSLIBDIR$"
+set config_customhlsdir "$FINNHLSCUSTOMDIR$"
 
 set config_toplevelfxn "$TOPFXN$"
 set config_clkperiod $CLKPERIOD$
 
 open_project $config_proj_name
-add_files $config_hwsrcdir/top_$TOPFXN$.cpp -cflags "-std=c++0x -I$config_bnnlibdir"
+add_files $config_hwsrcdir/top_$TOPFXN$.cpp -cflags "-std=c++0x -I$config_bnnlibdir -I$config_customhlsdir"
 
 set_top $config_toplevelfxn
 open_solution sol1
@@ -355,56 +356,3 @@
 
 endmodule
 """
-
-decoupled_thresholding_template = """
-template <
-    unsigned ImgDim, unsigned NumChannels, unsigned PE,
-    typename TSrcI = Identity, typename TDstI = Identity,
-    int ActVal=0, typename TT, unsigned int NumSteps,
-    typename TI, typename TO>
-void Thresholding_Stream_Batch(hls::stream<TI> &in,
-                        hls::stream<TO> &out,
-                        hls::stream<ap_uint<PE*NumSteps*TT::width>> &weight,
-                        int const reps)
-{
-
-  // how many different rows each neuron will compute
-  // alternatively: number of vertical matrix chunks
-  unsigned const NF = NumChannels / PE;
-
-  ThresholdsActivation<1, PE, NumSteps, TT, TO, ActVal, comp::less_equal<TT>> internal_thr;
-  #pragma HLS ARRAY_PARTITION variable=internal_thr.m_thresholds complete dim=0
-
-  // everything merged into a common iteration space (one "big" loop instead
-  // of smaller nested loops) to get the pipelinening the way we want
-  for (unsigned i = 0; i < reps * ImgDim * ImgDim * NF; i++)
-  {
-    #pragma HLS PIPELINE II=1
-
-    ap_uint<PE*NumSteps*TT::width> packed_thr;
-    packed_thr = weight.read();
-    // slicer to get 1 PE's worth of thresholds
-    auto const pe_slicer = Slice<ap_uint<NumSteps*TT::width>>()(packed_thr);
-
-    TI inElem;
-    inElem = in.read();
-    auto outElem = TDstI().template operator()<TO>();
-
-    for (unsigned pe = 0; pe < PE; pe++)
-    {
-#pragma HLS UNROLL
-      // slicer to get individual thresholds
-      auto const thr_slicer = Slice<TT>()(pe_slicer(pe, 0));
-      for (unsigned nt = 0; nt < NumSteps; nt++)
-      {
-      #pragma HLS UNROLL
-        internal_thr.m_thresholds[pe][0][nt] = thr_slicer(nt, 0);
-      }
-
-      auto const act = TSrcI()(inElem);
-      outElem(pe,0,1) = internal_thr.activate(0, pe, act(pe,0));
-    }
-    out.write(outElem);
-  }
-}
-"""
diff --git a/src/finn/custom_op/fpgadataflow/thresholding_batch.py b/src/finn/custom_op/fpgadataflow/thresholding_batch.py
index 0b248c1503..610139f44e 100644
--- a/src/finn/custom_op/fpgadataflow/thresholding_batch.py
+++ b/src/finn/custom_op/fpgadataflow/thresholding_batch.py
@@ -26,13 +26,12 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-from math import ceil, log2
-import textwrap
+import numpy as np
 import os
+import textwrap
 import warnings
-import numpy as np
+from math import ceil, log2
 
-from onnx import TensorProto, helper
 from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
 from finn.util.basic import (
@@ -42,9 +41,10 @@
 from finn.util.data_packing import (
     npy_to_rtlsim_input,
     numpy_to_hls_code,
-    rtlsim_output_to_npy,
     pack_innermost_dim_as_hex_string,
+    rtlsim_output_to_npy,
 )
+
 from . import templates
 
 # ONNX i/o tensor shape assumptions for Thresholding:
@@ -111,19 +111,7 @@ def calc_tmem(self):
 
     def make_shape_compatible_op(self, model):
         oshape = self.get_normal_output_shape()
-        # implement tensor with correct shape
-        values = np.random.randn(*oshape).astype(np.float32)
-        return helper.make_node(
-            "Constant",
-            inputs=[],
-            outputs=[self.onnx_node.output[0]],
-            value=helper.make_tensor(
-                name="const_tensor",
-                data_type=TensorProto.FLOAT,
-                dims=values.shape,
-                vals=values.flatten().astype(float),
-            ),
-        )
+        return super().make_const_shape_op(oshape)
 
     def infer_node_datatype(self, model):
         node = self.onnx_node
@@ -131,8 +119,8 @@ def infer_node_datatype(self, model):
         if idt != self.get_input_datatype():
             warn_str = "inputDataType changing for %s: %s -> %s " % (
                 node.name,
-                str(self.get_input_datatype()),
-                str(idt),
+                str(self.get_input_datatype().name),
+                str(idt.name),
             )
             warnings.warn(warn_str)
         self.set_nodeattr("inputDataType", idt.name)
@@ -180,7 +168,7 @@ def bram_estimation(self):
             return 0
 
     def lut_estimation(self):
-        """Calculates LUT cost, taking memory resource type into account """
+        """Calculates LUT cost, taking memory resource type into account"""
         # TODO add in/out FIFO contributions
         style = self.get_nodeattr("ram_style")
         P = self.get_nodeattr("PE")
@@ -224,7 +212,7 @@ def minimize_accumulator_width(self, model):
             if abs(tdt_min) > tdt_max:
                 tdt = DataType.get_smallest_possible(tdt_min)
             else:
-                tdt = DataType.get_smallest_possible(0 - tdt_max - 1)
+                tdt = DataType.get_smallest_possible(-tdt_max - 1)
         else:
             tdt = DataType.get_smallest_possible(tdt_max)
         assert np.vectorize(tdt.allowed)(
@@ -335,7 +323,7 @@ def get_hls_compatible_threshold_tensor(self, orig_thres_matrix):
         ).all(), "Need int threshold tensor"
         ret = orig_thres_matrix
         # workaround for vivado_hls threshold bug
-        if ret[0][0] == 0:
+        if ret[0][0] == 0 and n_thres_steps == 1:
             ret = np.copy(ret)
             ret[0][0] = 1
             warnings.warn(
@@ -389,8 +377,8 @@ def make_weight_file(self, weights, weight_file_mode, weight_file_name):
             tdt_hls = tdt.get_hls_datatype_str()
             # use binary to export bipolar activations
             export_odt = self.get_output_datatype()
-            if self.get_output_datatype() == DataType.BIPOLAR:
-                export_odt = DataType.BINARY
+            if self.get_output_datatype() == DataType["BIPOLAR"]:
+                export_odt = DataType["BINARY"]
             odt_hls = export_odt.get_hls_datatype_str()
             f_thresh.write(
                 "static ThresholdsActivation<{},{},{},{},{},{},{}> threshs \
@@ -514,10 +502,10 @@ def execute_node(self, context, graph):
                 not float32 as expected."""
                 expected_inp_shape = self.get_folded_input_shape()
                 reshaped_input = context[inputs].reshape(expected_inp_shape)
-                if self.get_input_datatype() == DataType.BIPOLAR:
+                if self.get_input_datatype() == DataType["BIPOLAR"]:
                     # store bipolar activations as binary
                     reshaped_input = (reshaped_input + 1) / 2
-                    export_idt = DataType.BINARY
+                    export_idt = DataType["BINARY"]
                 else:
                     export_idt = self.get_input_datatype()
                 # make copy before saving the array
@@ -536,7 +524,7 @@ def execute_node(self, context, graph):
             # load output npy file
             super().npy_to_dynamic_output(context)
             # reinterpret binary output as bipolar where needed
-            if self.get_output_datatype() == DataType.BIPOLAR:
+            if self.get_output_datatype() == DataType["BIPOLAR"]:
                 out = context[node.output[0]]
                 out = 2 * out - 1
                 context[node.output[0]] = out
@@ -604,7 +592,9 @@ def defines(self, var):
         numReps = numInputVectors[0]
         self.code_gen_dict["$DEFINES$"] = [
             """#define NumChannels1 {}\n #define PE1 {}\n #define numReps {}""".format(
-                self.get_nodeattr("NumChannels"), self.get_nodeattr("PE"), numReps,
+                self.get_nodeattr("NumChannels"),
+                self.get_nodeattr("PE"),
+                numReps,
             )
         ]
         if self.get_nodeattr("mem_mode") == "decoupled":
@@ -618,10 +608,6 @@ def defines(self, var):
             self.code_gen_dict["$DEFINES$"].append(
                 "#define NumSteps1 %d" % self.get_nodeattr("numSteps")
             )
-            # TODO remove once Thresholding_Stream_Batch is in hlslib:
-            self.code_gen_dict["$DEFINES$"].append(
-                templates.decoupled_thresholding_template
-            )
 
     def read_npy_data(self):
         code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
@@ -676,25 +662,32 @@ def docompute(self):
         node = self.onnx_node
         ishape = self.get_folded_input_shape()
         if len(ishape) == 3:
-            imgdim = 1
+            imgdimh = 1
+            imgdimw = 1
         elif len(ishape) == 5:
-            imgdim = ishape[1]
+            imgdimh = ishape[1]
+            imgdimw = ishape[2]
         else:
-            raise Exception("""Unexpeted input shape""")
+            raise Exception("""Unexpected input shape""")
         mem_mode = self.get_nodeattr("mem_mode")
         if mem_mode == "const":
             self.code_gen_dict["$DOCOMPUTE$"] = [
-                """{}<{}, NumChannels1, PE1, {}, {}>
+                """{}<{}, {}, NumChannels1, PE1, {}, {}>
                 (in0, out, threshs, numReps);""".format(
-                    node.op_type, imgdim, tmpl_args["TSrcI"], tmpl_args["TDstI"],
+                    node.op_type,
+                    imgdimh,
+                    imgdimw,
+                    tmpl_args["TSrcI"],
+                    tmpl_args["TDstI"],
                 )
             ]
         elif mem_mode == "decoupled":
             self.code_gen_dict["$DOCOMPUTE$"] = [
-                """{}<{}, NumChannels1, PE1, {}, {}, ActVal1, ThresType1, NumSteps1>
+                """{}<{}, {}, NumChannels1, PE1, {}, {}, ActVal1, ThresType1, NumSteps1>
                 (in0, out, weights, numReps);""".format(
                     "Thresholding_Stream_Batch",
-                    imgdim,
+                    imgdimh,
+                    imgdimw,
                     tmpl_args["TSrcI"],
                     tmpl_args["TDstI"],
                 )
@@ -705,9 +698,9 @@ def docompute(self):
     def dataoutstrm(self):
         code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
         dtype = self.get_output_datatype()
-        if dtype == DataType.BIPOLAR:
+        if dtype == DataType["BIPOLAR"]:
             # use binary for bipolar storage
-            dtype = DataType.BINARY
+            dtype = DataType["BINARY"]
         elem_bits = dtype.bitwidth()
         packed_bits = self.get_outstream_width()
         packed_hls_type = "ap_uint<%d>" % packed_bits
diff --git a/src/finn/custom_op/fpgadataflow/upsampler.py b/src/finn/custom_op/fpgadataflow/upsampler.py
new file mode 100644
index 0000000000..7114cd83ed
--- /dev/null
+++ b/src/finn/custom_op/fpgadataflow/upsampler.py
@@ -0,0 +1,311 @@
+# Copyright (c) 2020, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import numpy as np
+import os
+import warnings
+
+from finn.core.datatype import DataType
+from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
+from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy
+
+
+class UpsampleNearestNeighbour_Batch(HLSCustomOp):
+    """
+    Corresponds to finn-hlslib UpsampleNearestNeighbour_Batch function.
+    Upsampling is done with the Nearest Neighbour algorithm.
+    The layer expects square feature maps for the in and output.
+    """
+
+    def __init__(self, onnx_node):
+        super().__init__(onnx_node)
+
+    def get_nodeattr_types(self):
+        my_attrs = {
+            # Size of the output feature map
+            "OFMDim": ("i", True, 0),
+            # Size of the input feature map
+            "IFMDim": ("i", True, 0),
+            # Amount of channels of the input feature map
+            "NumChannels": ("i", True, 0),
+            # FINN input datatype
+            "inputDataType": ("s", True, ""),
+            # Batch size
+            "numInputVectors": ("i", False, 1),
+        }
+        my_attrs.update(super().get_nodeattr_types())
+        return my_attrs
+
+    def get_exp_cycles(self):
+        OFMDim = self.get_nodeattr("OFMDim")
+        batch_size = self.get_nodeattr("numInputVectors")
+        exp_cycles = OFMDim * OFMDim * batch_size
+        return int(exp_cycles)
+
+    def get_normal_input_shape(self):
+        IFMDim = self.get_nodeattr("IFMDim")
+        num_ch = self.get_nodeattr("NumChannels")
+        batch = self.get_nodeattr("numInputVectors")
+        ishape = (batch, IFMDim, IFMDim, num_ch)
+        return ishape
+
+    def get_normal_output_shape(self):
+        OFMDim = self.get_nodeattr("OFMDim")
+        num_ch = self.get_nodeattr("NumChannels")
+        batch = self.get_nodeattr("numInputVectors")
+        oshape = (batch, OFMDim, OFMDim, num_ch)
+        return oshape
+
+    def get_folded_input_shape(self):
+        normal_ishape = list(self.get_normal_input_shape())
+        return tuple(normal_ishape)
+
+    def get_folded_output_shape(self):
+        normal_oshape = list(self.get_normal_output_shape())
+        return tuple(normal_oshape)
+
+    def make_shape_compatible_op(self, model):
+        exp_ishape = self.get_normal_input_shape()
+        oshape = self.get_normal_output_shape()
+        ishape = tuple(model.get_tensor_shape(self.onnx_node.input[0]))
+        assert (
+            ishape == exp_ishape
+        ), "Unexpect input shape for UpsampleNearestNeighbour_Batch."
+        return super().make_const_shape_op(oshape)
+
+    def infer_node_datatype(self, model):
+        node = self.onnx_node
+        # data type stays the same
+        idt = model.get_tensor_datatype(node.input[0])
+        if idt != self.get_input_datatype():
+            warn_str = "inputDataType changing for %s: %s -> %s " % (
+                node.name,
+                str(self.get_input_datatype()),
+                str(idt),
+            )
+            warnings.warn(warn_str)
+        self.set_nodeattr("inputDataType", idt.name)
+        model.set_tensor_datatype(node.output[0], idt)
+
+    def verify_node(self):
+        pass
+
+    def get_input_datatype(self):
+        """Returns FINN DataType of input."""
+        ret = DataType[self.get_nodeattr("inputDataType")]
+        return ret
+
+    def get_output_datatype(self):
+        """Returns FINN DataType of output. (Same as input datatype)"""
+        return self.get_input_datatype()
+
+    def get_instream_width(self):
+        ibits = self.get_input_datatype().bitwidth()
+        ifm_ch = self.get_nodeattr("NumChannels")
+        return ibits * ifm_ch
+
+    def get_outstream_width(self):
+        obits = self.get_output_datatype().bitwidth()
+        ifm_ch = self.get_nodeattr("NumChannels")
+        return obits * ifm_ch
+
+    def get_number_output_values(self):
+        folded_oshape = self.get_folded_output_shape()
+        return np.prod(folded_oshape[:-1])
+
+    def global_includes(self):
+        self.code_gen_dict["$GLOBALS$"] = ['#include "upsample.hpp"']
+
+    def defines(self, var):
+        self.code_gen_dict["$DEFINES$"] = []
+
+        ifm_ch = self.get_nodeattr("NumChannels")
+        self.code_gen_dict["$DEFINES$"] += ["#define IFMChannels {}".format(ifm_ch)]
+
+        ibits = self.get_input_datatype().bitwidth()
+        self.code_gen_dict["$DEFINES$"] += ["#define Input_precision {}".format(ibits)]
+
+        idim = self.get_nodeattr("IFMDim")
+        self.code_gen_dict["$DEFINES$"] += ["#define IFMDim {}".format(idim)]
+
+        odim = self.get_nodeattr("OFMDim")
+        self.code_gen_dict["$DEFINES$"] += ["#define OFMDim {}".format(odim)]
+
+        batch_size = self.get_nodeattr("numInputVectors")
+        self.code_gen_dict["$DEFINES$"] += ["#define numReps {}".format(batch_size)]
+
+    def read_npy_data(self):
+        code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
+        dtype = self.get_input_datatype()
+        if dtype == DataType["BIPOLAR"]:
+            # use binary for bipolar storage
+            dtype = DataType["BINARY"]
+        elem_bits = dtype.bitwidth()
+        packed_bits = self.get_instream_width()
+        packed_hls_type = "ap_uint<%d>" % packed_bits
+        elem_hls_type = dtype.get_hls_datatype_str()
+        npy_type = "float"
+        npy_in = "%s/input_0.npy" % code_gen_dir
+        self.code_gen_dict["$READNPYDATA$"] = []
+        self.code_gen_dict["$READNPYDATA$"].append(
+            'npy2apintstream<%s, %s, %d, %s>("%s", in0);'
+            % (packed_hls_type, elem_hls_type, elem_bits, npy_type, npy_in)
+        )
+
+    def strm_decl(self):
+        self.code_gen_dict["$STREAMDECLARATIONS$"] = []
+        self.code_gen_dict["$STREAMDECLARATIONS$"].append(
+            'hls::stream<ap_uint<{}>> in0 ("in0");'.format(self.get_instream_width())
+        )
+        self.code_gen_dict["$STREAMDECLARATIONS$"].append(
+            'hls::stream<ap_uint<{}>> out ("out");'.format(self.get_outstream_width())
+        )
+
+    def docompute(self):
+        self.code_gen_dict["$DOCOMPUTE$"] = [
+            """UpsampleNearestNeighbour_Batch<OFMDim, IFMDim, IFMChannels,
+            ap_uint<Input_precision> > (in0, out, numReps);"""
+        ]
+
+    def dataoutstrm(self):
+        code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
+        dtype = self.get_output_datatype()
+        if dtype == DataType["BIPOLAR"]:
+            # use binary for bipolar storage
+            dtype = DataType["BINARY"]
+        elem_bits = dtype.bitwidth()
+        packed_bits = self.get_outstream_width()
+        packed_hls_type = "ap_uint<%d>" % packed_bits
+        elem_hls_type = dtype.get_hls_datatype_str()
+        npy_type = "float"
+        npy_out = "%s/output.npy" % code_gen_dir
+        oshape = self.get_folded_output_shape()
+        oshape_cpp_str = str(oshape).replace("(", "{").replace(")", "}")
+
+        self.code_gen_dict["$DATAOUTSTREAM$"] = [
+            'apintstream2npy<%s, %s, %d, %s>(out, %s, "%s");'
+            % (
+                packed_hls_type,
+                elem_hls_type,
+                elem_bits,
+                npy_type,
+                oshape_cpp_str,
+                npy_out,
+            )
+        ]
+
+    def save_as_npy(self):
+        self.code_gen_dict["$SAVEASCNPY$"] = []
+
+    def blackboxfunction(self):
+        packed_bits = self.get_instream_width()
+        packed_hls_type = "ap_uint<%d>" % packed_bits
+        self.code_gen_dict["$BLACKBOXFUNCTION$"] = [
+            "void %s(hls::stream<%s > &in0, hls::stream<%s > &out)"
+            % (self.onnx_node.name, packed_hls_type, packed_hls_type)
+        ]
+
+    def pragmas(self):
+        self.code_gen_dict["$PRAGMAS$"] = ["#pragma HLS INTERFACE axis port=in0"]
+        self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS INTERFACE axis port=out")
+        self.code_gen_dict["$PRAGMAS$"].append(
+            "#pragma HLS INTERFACE ap_ctrl_none port=return"
+        )
+
+    def execute_node(self, context, graph):
+        mode = self.get_nodeattr("exec_mode")
+        node = self.onnx_node
+        exp_ishape = self.get_normal_input_shape()
+        exp_oshape = self.get_normal_output_shape()
+        folded_ishape = self.get_folded_input_shape()
+        folded_oshape = self.get_folded_output_shape()
+
+        if mode == "cppsim":
+            code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
+        elif mode == "rtlsim":
+            code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen")
+        else:
+            raise Exception(
+                """Invalid value for attribute exec_mode! Is currently set to: {}
+            has to be set to one of the following value ("cppsim", "rtlsim")""".format(
+                    mode
+                )
+            )
+
+        inp = context[node.input[0]]
+        assert str(inp.dtype) == "float32", "Input datatype is not float32"
+        assert (
+            inp.shape == exp_ishape
+        ), """Input shape doesn't
+        match expected shape (numInputVectors, ImgDim, ImgDim, NumChannels)."""
+        export_idt = self.get_input_datatype()
+
+        reshaped_input = inp.reshape(folded_ishape)
+        np.save(os.path.join(code_gen_dir, "input_0.npy"), reshaped_input)
+
+        if mode == "cppsim":
+            # execute the precompiled model
+            super().exec_precompiled_singlenode_model()
+            # load output npy file
+            super().npy_to_dynamic_output(context)
+            assert (
+                context[node.output[0]].shape == folded_oshape
+            ), "cppsim did not produce expected folded output shape"
+            context[node.output[0]] = context[node.output[0]].reshape(*exp_oshape)
+        elif mode == "rtlsim":
+            sim = self.get_rtlsim()
+            nbits = self.get_instream_width()
+            rtlsim_inp = npy_to_rtlsim_input(
+                "{}/input_0.npy".format(code_gen_dir), export_idt, nbits
+            )
+            super().reset_rtlsim(sim)
+            super().toggle_clk(sim)
+            rtlsim_output = self.rtlsim(sim, rtlsim_inp)
+            odt = export_idt
+            target_bits = odt.bitwidth()
+            packed_bits = self.get_outstream_width()
+            out_npy_path = "{}/output.npy".format(code_gen_dir)
+            out_shape = self.get_folded_output_shape()
+            rtlsim_output_to_npy(
+                rtlsim_output, out_npy_path, odt, out_shape, packed_bits, target_bits
+            )
+            # load and reshape output
+            output = np.load(out_npy_path)
+            output = np.asarray([output], dtype=np.float32).reshape(*exp_oshape)
+            context[node.output[0]] = output
+        else:
+            raise Exception(
+                """Invalid value for attribute exec_mode! Is currently set to: {}
+            has to be set to one of the following value ("cppsim", "rtlsim")""".format(
+                    mode
+                )
+            )
+        assert (
+            context[node.output[0]].shape == exp_oshape
+        ), """Output shape doesn't match expected shape
+            (1, OutputDim, OutputDim, NumChannels)."""
diff --git a/src/finn/custom_op/fpgadataflow/vector_vector_activate_batch.py b/src/finn/custom_op/fpgadataflow/vector_vector_activate_batch.py
index fead30650c..e0f789a888 100644
--- a/src/finn/custom_op/fpgadataflow/vector_vector_activate_batch.py
+++ b/src/finn/custom_op/fpgadataflow/vector_vector_activate_batch.py
@@ -1,20 +1,48 @@
-import os
-import numpy as np
+# Copyright (c) 2020, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
 import math
-from onnx import TensorProto, helper
+import numpy as np
+import os
+import warnings
+
 from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
 from finn.util.basic import (
+    calculate_matvec_accumulator_range,
     interleave_matrix_outer_dim_from_partitions,
     roundup_to_integer_multiple,
-    calculate_matvec_accumulator_range,
 )
 from finn.util.data_packing import (
     npy_to_rtlsim_input,
     numpy_to_hls_code,
     rtlsim_output_to_npy,
 )
-import warnings
 
 
 class Vector_Vector_Activate_Batch(HLSCustomOp):
@@ -82,7 +110,7 @@ def minimize_accumulator_width(self, model):
                 if abs(tdt_min) > tdt_max:
                     tdt = DataType.get_smallest_possible(tdt_min)
                 else:
-                    tdt = DataType.get_smallest_possible(0 - tdt_max)
+                    tdt = DataType.get_smallest_possible(-tdt_max - 1)
             else:
                 tdt = DataType.get_smallest_possible(tdt_max)
             assert np.vectorize(tdt.allowed)(
@@ -97,7 +125,7 @@ def minimize_accumulator_width(self, model):
                 if abs(acc_min) > acc_max:
                     adt = DataType.get_smallest_possible(acc_min)
                 else:
-                    adt = DataType.get_smallest_possible(0 - acc_max)
+                    adt = DataType.get_smallest_possible(-acc_max - 1)
             else:
                 adt = DataType.get_smallest_possible(acc_max)
             # ensure a datatype divisible by 8-bits in case this is the last node
@@ -128,19 +156,7 @@ def calc_tmem(self):
 
     def make_shape_compatible_op(self, model):
         oshape = self.get_normal_output_shape()
-        # implement tensor with correct shape
-        values = np.random.randn(*oshape).astype(np.float32)
-        return helper.make_node(
-            "Constant",
-            inputs=[],
-            outputs=[self.onnx_node.output[0]],
-            value=helper.make_tensor(
-                name="const_tensor",
-                data_type=TensorProto.FLOAT,
-                dims=values.shape,
-                vals=values.flatten().astype(float),
-            ),
-        )
+        return super().make_const_shape_op(oshape)
 
     def infer_node_datatype(self, model):
         node = self.onnx_node
@@ -235,8 +251,8 @@ def get_template_param_values(self):
         ret = dict()
         inp_hls_str = self.get_input_datatype().get_hls_datatype_str()
         out_hls_str = self.get_output_datatype().get_hls_datatype_str()
-        inp_is_bipolar = self.get_input_datatype() == DataType.BIPOLAR
-        wt_is_bipolar = self.get_weight_datatype() == DataType.BIPOLAR
+        inp_is_bipolar = self.get_input_datatype() == DataType["BIPOLAR"]
+        wt_is_bipolar = self.get_weight_datatype() == DataType["BIPOLAR"]
         # fill in TSrcI and TWeightI
         # TODO handle bipolar inputs
         if inp_is_bipolar or wt_is_bipolar:
diff --git a/src/finn/qnn-data/build_dataflow/dataflow_build_config.json b/src/finn/qnn-data/build_dataflow/dataflow_build_config.json
index 5e4cf2d302..27ec38f6a4 100644
--- a/src/finn/qnn-data/build_dataflow/dataflow_build_config.json
+++ b/src/finn/qnn-data/build_dataflow/dataflow_build_config.json
@@ -6,6 +6,7 @@
   "board": "Pynq-Z1",
   "standalone_thresholds": true,
   "shell_flow_type": "vivado_zynq",
+  "verify_save_rtlsim_waveforms": true,
   "verify_steps": [
     "initial_python",
     "streamlined_python",
diff --git a/src/finn/qnn-data/cpp/npy2apintstream.hpp b/src/finn/qnn-data/cpp/npy2apintstream.hpp
index f3afbc5bfb..6aade3a2bb 100644
--- a/src/finn/qnn-data/cpp/npy2apintstream.hpp
+++ b/src/finn/qnn-data/cpp/npy2apintstream.hpp
@@ -3,6 +3,7 @@
 #include "hls_stream.h"
 #include "ap_int.h"
 #include <vector>
+#include <stdio.h>
 
 #ifdef DEBUG
 #define DEBUG_NPY2APINTSTREAM(x) std::cout << "[npy2apintstream] " << x << std::endl;
@@ -34,7 +35,7 @@ void npy2apintstream(const char * npy_path, hls::stream<PackedT> & out_stream, b
         NpyT loaded_elem_npyt = *loaded_data;
         ElemT loaded_elem = (ElemT) loaded_elem_npyt;
         DEBUG_NPY2APINTSTREAM("NpyT " << loaded_elem_npyt << " elem " << loaded_elem)
-        packed_elem((i+1)*ElemBits-1, i*ElemBits) = loaded_elem;
+        packed_elem((i+1)*ElemBits-1, i*ElemBits) = *reinterpret_cast<ap_uint<ElemBits>*>(&loaded_elem);
         loaded_data++;
       }
       DEBUG_NPY2APINTSTREAM("packed hls elem " << std::hex << packed_elem << std::dec)
@@ -44,25 +45,34 @@ void npy2apintstream(const char * npy_path, hls::stream<PackedT> & out_stream, b
 }
 
 template <typename PackedT, typename ElemT, int ElemBits, typename NpyT>
-void apintstream2npy(hls::stream<PackedT> & in_stream, const std::vector<size_t> & shape, const char * npy_path, bool reverse_inner = true, size_t numReps = 1) {
+void apintstream2npy(hls::stream<PackedT> & in_stream, const std::vector<size_t> & shape, const char * npy_path, bool reverse_inner = true, size_t numReps = 1, size_t multi_pixel_out = 1) {
   for(size_t rep = 0; rep < numReps; rep++) {
     std::vector<NpyT> data_to_save;
     size_t outer_dim_elems = 1;
     for(size_t dim = 0; dim < shape.size()-1; dim++) {
       outer_dim_elems *= shape[dim];
     }
-    size_t inner_dim_elems = shape[shape.size()-1];
-    DEBUG_APINTSTREAM2NPY("n_outer " << outer_dim_elems << " n_inner " << inner_dim_elems)
+    size_t inner_dim_elems = shape[shape.size()-1] / multi_pixel_out;
+    DEBUG_APINTSTREAM2NPY("n_outer " << outer_dim_elems << " n_inner " << inner_dim_elems << " n_multi_pixel_out " << multi_pixel_out)
     for(size_t outer_elem = 0; outer_elem < outer_dim_elems; outer_elem++) {
       PackedT packed_elem;
       in_stream >> packed_elem;
       DEBUG_APINTSTREAM2NPY("packed hls elem " << std::hex << packed_elem << std::dec)
-      for(size_t ii = 0; ii < inner_dim_elems; ii++) {
-        size_t i = reverse_inner ? inner_dim_elems-ii-1 : ii;
-        ElemT elem = packed_elem((i+1)*ElemBits-1, i*ElemBits);
-        NpyT npyt = (NpyT) elem;
-        DEBUG_APINTSTREAM2NPY("elem " << elem << " NpyT " << npyt)
-        data_to_save.push_back(npyt);
+      for(size_t ii_multi_pixel_out = 0; ii_multi_pixel_out < multi_pixel_out; ii_multi_pixel_out++) {
+        // loop over multi_pixel_out blocks of inner_dim_elems separately,
+        // so that reverse_inner is not applied across multiple pixels
+        for(size_t ii = 0; ii < inner_dim_elems; ii++) {
+          size_t i = ii_multi_pixel_out*inner_dim_elems;
+          i += reverse_inner ? inner_dim_elems-ii-1 : ii;
+          ap_uint<ElemBits> tmp_elem = packed_elem((i+1)*ElemBits-1, i*ElemBits);
+          // important: don't init elem = reinterpret_cast.. directly here
+          // this causes weird behavior for conversion to NpyT afterwards
+          ElemT elem;
+          elem = reinterpret_cast<ElemT&>(tmp_elem);
+          NpyT npyt = (NpyT) elem;
+          DEBUG_APINTSTREAM2NPY("elem " << elem << " NpyT " << npyt)
+          data_to_save.push_back(npyt);
+        }
       }
     }
     cnpy::npy_save(npy_path, &data_to_save[0], shape, "w");
diff --git a/src/finn/qnn-data/cybsec-mlp/validate-unsw-nb15.py b/src/finn/qnn-data/cybsec-mlp/validate-unsw-nb15.py
index 2fabc716a6..be09abad9c 100644
--- a/src/finn/qnn-data/cybsec-mlp/validate-unsw-nb15.py
+++ b/src/finn/qnn-data/cybsec-mlp/validate-unsw-nb15.py
@@ -27,9 +27,9 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import argparse
+import numpy as np
 from driver import io_shape_dict
 from driver_base import FINNExampleOverlay
-import numpy as np
 
 
 def make_unsw_nb15_test_batches(bsize, dataset_root, limit_batches):
diff --git a/src/finn/qnn-data/templates/driver/driver_base.py b/src/finn/qnn-data/templates/driver/driver_base.py
index df3c988137..b6dd835080 100644
--- a/src/finn/qnn-data/templates/driver/driver_base.py
+++ b/src/finn/qnn-data/templates/driver/driver_base.py
@@ -27,19 +27,18 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import numpy as np
-import time
 import os
+import time
 from pynq import Overlay, allocate
 from pynq.ps import Clocks
 
+from finn.core.datatype import DataType
+from finn.util.basic import gen_finn_dt_tensor
 from finn.util.data_packing import (
     finnpy_to_packed_bytearray,
     packed_bytearray_to_finnpy,
 )
 
-from finn.util.basic import gen_finn_dt_tensor
-from finn.core.datatype import DataType
-
 # Driver base class for FINN-generated dataflow accelerators.
 # The particulars of the generated accelerator are specified via the
 # io_shape_dict (generated by the MakePYNQDriver transformation).
@@ -86,24 +85,27 @@ def __init__(
         self.platform = platform
         self.batch_size = batch_size
         self.fclk_mhz = fclk_mhz
-        if self.platform == "alveo":
-            if "input_dma_name" in io_shape_dict.keys():
-                self.idma = getattr(self, io_shape_dict["input_dma_name"])
-            else:
-                self.idma = self.idma0
-            self.odma = self.odma0
-            self.odma_handle = None
-        elif self.platform == "zynq-iodma":
-            if "input_dma_name" in io_shape_dict.keys():
-                self.idma = getattr(self, io_shape_dict["input_dma_name"])
-            else:
-                self.idma = self.idma0
-            self.odma = self.odma0
+        self.idma = []
+        self.odma = []
+        self.odma_handle = []
+        if "input_dma_name" in io_shape_dict.keys():
+            for idma_name in io_shape_dict["input_dma_name"]:
+                self.idma.append(getattr(self, idma_name))
+        else:
+            self.idma = [self.idma0]
+        if "output_dma_name" in io_shape_dict.keys():
+            for odma_name in io_shape_dict["output_dma_name"]:
+                self.odma.append(getattr(self, odma_name))
+                if self.platform == "alveo":
+                    self.odma_handle.append(None)
+        else:
+            self.odma = [self.odma0]
+            if self.platform == "alveo":
+                self.odma_handle.append(None)
+        if self.platform == "zynq-iodma":
             # set the clock frequency as specified by user during transformations
             if self.fclk_mhz > 0:
                 Clocks.fclk0_mhz = self.fclk_mhz
-        else:
-            raise ValueError("Supported platforms are zynq-iodma alveo")
         # load any external + runtime weights
         self.load_external_weights()
         self.load_runtime_weights()
@@ -205,50 +207,50 @@ def load_runtime_weights(self, flush_accel=True, verify=True):
             # run accelerator to flush any stale weights from weight streamer FIFOs
             self.execute_on_buffers()
 
-    @property
-    def idt(self):
-        return self._io_shape_dict["idt"]
+    def idt(self, ind=0):
+        return self._io_shape_dict["idt"][ind]
 
-    @property
-    def odt(self):
-        return self._io_shape_dict["odt"]
+    def odt(self, ind=0):
+        return self._io_shape_dict["odt"][ind]
 
-    @property
-    def ishape_normal(self):
-        ret = list(self._io_shape_dict["ishape_normal"])
+    def ishape_normal(self, ind=0):
+        ret = list(self._io_shape_dict["ishape_normal"][ind])
         ret[0] = self.batch_size
         return tuple(ret)
 
-    @property
-    def oshape_normal(self):
-        ret = list(self._io_shape_dict["oshape_normal"])
+    def oshape_normal(self, ind=0):
+        ret = list(self._io_shape_dict["oshape_normal"][ind])
         ret[0] = self.batch_size
         return tuple(ret)
 
-    @property
-    def ishape_folded(self):
-        ret = list(self._io_shape_dict["ishape_folded"])
+    def ishape_folded(self, ind=0):
+        ret = list(self._io_shape_dict["ishape_folded"][ind])
         ret[0] = self.batch_size
         return tuple(ret)
 
-    @property
-    def oshape_folded(self):
-        ret = list(self._io_shape_dict["oshape_folded"])
+    def oshape_folded(self, ind=0):
+        ret = list(self._io_shape_dict["oshape_folded"][ind])
         ret[0] = self.batch_size
         return tuple(ret)
 
-    @property
-    def ishape_packed(self):
-        ret = list(self._io_shape_dict["ishape_packed"])
+    def ishape_packed(self, ind=0):
+        ret = list(self._io_shape_dict["ishape_packed"][ind])
         ret[0] = self.batch_size
         return tuple(ret)
 
-    @property
-    def oshape_packed(self):
-        ret = list(self._io_shape_dict["oshape_packed"])
+    def oshape_packed(self, ind=0):
+        ret = list(self._io_shape_dict["oshape_packed"][ind])
         ret[0] = self.batch_size
         return tuple(ret)
 
+    @property
+    def num_inputs(self):
+        return self._io_shape_dict["num_inputs"]
+
+    @property
+    def num_outputs(self):
+        return self._io_shape_dict["num_outputs"]
+
     @property
     def batch_size(self):
         return self._batch_size
@@ -262,68 +264,72 @@ def batch_size(self, value):
             self.ibuf_packed_device = None
         if self.obuf_packed_device is not None:
             self.obuf_packed_device = None
-        if self.platform == "alveo":
-            self.ibuf_packed_device = allocate(shape=self.ishape_packed, dtype=np.uint8)
-            self.obuf_packed_device = allocate(shape=self.oshape_packed, dtype=np.uint8)
-        else:
-            self.ibuf_packed_device = allocate(
-                shape=self.ishape_packed, dtype=np.uint8, cacheable=True
+        cacheable = {"alveo": False, "zynq-iodma": True}[self.platform]
+        self.ibuf_packed_device = []
+        self.obuf_packed_device = []
+        self.obuf_packed = []
+        for i in range(self.num_inputs):
+            new_packed_ibuf = allocate(
+                shape=self.ishape_packed(i), dtype=np.uint8, cacheable=cacheable
             )
-            self.obuf_packed_device = allocate(
-                shape=self.oshape_packed, dtype=np.uint8, cacheable=True
+            self.ibuf_packed_device.append(new_packed_ibuf)
+        for o in range(self.num_outputs):
+            new_packed_obuf = allocate(
+                shape=self.oshape_packed(o), dtype=np.uint8, cacheable=cacheable
             )
-        self.obuf_packed = np.empty_like(self.obuf_packed_device)
+            self.obuf_packed_device.append(new_packed_obuf)
+            self.obuf_packed.append(np.empty_like(new_packed_obuf))
 
-    def fold_input(self, ibuf_normal):
+    def fold_input(self, ibuf_normal, ind=0):
         """Reshapes input in desired shape.
         Gets input data (ibuf_normal), checks if data is in expected normal shape.
         Returns folded input."""
         # ensure that shape is as expected
-        assert ibuf_normal.shape == self.ishape_normal
+        assert ibuf_normal.shape == self.ishape_normal(ind)
         # convert to folded form
-        ibuf_folded = ibuf_normal.reshape(self.ishape_folded)
+        ibuf_folded = ibuf_normal.reshape(self.ishape_folded(ind))
         return ibuf_folded
 
-    def pack_input(self, ibuf_folded):
+    def pack_input(self, ibuf_folded, ind=0):
         """Packs folded input and reverses both SIMD dim and endianness.
         Gets input data in folded shape and returns packed input data."""
         ibuf_packed = finnpy_to_packed_bytearray(
             ibuf_folded,
-            self.idt,
+            self.idt(ind),
             reverse_endian=True,
             reverse_inner=True,
             fast_mode=True,
         )
         return ibuf_packed
 
-    def unpack_output(self, obuf_packed):
+    def unpack_output(self, obuf_packed, ind=0):
         """Unpacks the packed output buffer from accelerator.
         Gets packed output and returns output data in folded shape."""
         obuf_folded = packed_bytearray_to_finnpy(
             obuf_packed,
-            self.odt,
-            self.oshape_folded,
+            self.odt(ind),
+            self.oshape_folded(ind),
             reverse_endian=True,
             reverse_inner=True,
             fast_mode=True,
         )
         return obuf_folded
 
-    def unfold_output(self, obuf_folded):
+    def unfold_output(self, obuf_folded, ind=0):
         """Unfolds output data to normal shape.
         Gets folded output data and returns output data in normal shape."""
-        obuf_normal = obuf_folded.reshape(self.oshape_normal)
+        obuf_normal = obuf_folded.reshape(self.oshape_normal(ind))
         return obuf_normal
 
-    def copy_input_data_to_device(self, data):
+    def copy_input_data_to_device(self, data, ind=0):
         """Copies given input data to PYNQ buffer."""
-        np.copyto(self.ibuf_packed_device, data)
-        self.ibuf_packed_device.flush()
+        np.copyto(self.ibuf_packed_device[ind], data)
+        self.ibuf_packed_device[ind].flush()
 
-    def copy_output_data_from_device(self, data):
+    def copy_output_data_from_device(self, data, ind=0):
         """Copies PYNQ output buffer from device."""
-        self.obuf_packed_device.invalidate()
-        np.copyto(data, self.obuf_packed_device)
+        self.obuf_packed_device[ind].invalidate()
+        np.copyto(data, self.obuf_packed_device[ind])
 
     def execute_on_buffers(self, asynch=False, batch_size=None):
         """Executes accelerator by setting up the DMA(s) on pre-allocated buffers.
@@ -339,24 +345,36 @@ def execute_on_buffers(self, asynch=False, batch_size=None):
             batch_size = self.batch_size
         assert batch_size <= self.batch_size, "Specified batch_size is too large."
         if self.platform == "zynq-iodma":
-            assert self.odma.read(0x00) & 0x4 != 0, "Output DMA is not idle"
+            for o in range(self.num_outputs):
+                assert (
+                    self.odma[o].read(0x00) & 0x4 != 0
+                ), "Output DMA %d is not idle" % (o)
             # manually launch IODMAs since signatures are missing
             for iwdma, iwbuf, iwdma_name in self.external_weights:
                 iwdma.write(0x10, iwbuf.device_address)
                 iwdma.write(0x1C, batch_size)
                 iwdma.write(0x00, 1)
-            self.idma.write(0x10, self.ibuf_packed_device.device_address)
-            self.idma.write(0x1C, batch_size)
-            self.odma.write(0x10, self.obuf_packed_device.device_address)
-            self.odma.write(0x1C, batch_size)
-            self.idma.write(0x00, 1)
-            self.odma.write(0x00, 1)
+            for o in range(self.num_outputs):
+                self.odma[o].write(0x10, self.obuf_packed_device[o].device_address)
+                self.odma[o].write(0x1C, batch_size)
+                self.odma[o].write(0x00, 1)
+            for i in range(self.num_inputs):
+                self.idma[i].write(0x10, self.ibuf_packed_device[i].device_address)
+                self.idma[i].write(0x1C, batch_size)
+                self.idma[i].write(0x00, 1)
         elif self.platform == "alveo":
-            assert self.odma_handle is None, "Output DMA is already running"
-            self.idma.start(self.ibuf_packed_device, batch_size)
+            for o in range(self.num_outputs):
+                assert self.odma_handle[o] is None, (
+                    "Output DMA %d is already running" % o
+                )
+            for i in range(self.num_inputs):
+                self.idma[i].start(self.ibuf_packed_device[i], batch_size)
             for iwdma, iwbuf, iwdma_name in self.external_weights:
                 iwdma.start(iwbuf, batch_size)
-            self.odma_handle = self.odma.start(self.obuf_packed_device, batch_size)
+            for o in range(self.num_outputs):
+                self.odma_handle[o] = self.odma[o].start(
+                    self.obuf_packed_device[o], batch_size
+                )
         else:
             raise Exception("Unrecognized platform: %s" % self.platform)
         # blocking behavior depends on asynch parameter
@@ -364,31 +382,48 @@ def execute_on_buffers(self, asynch=False, batch_size=None):
             self.wait_until_finished()
 
     def wait_until_finished(self):
-        "Block until the output DMA has finished writing."
+        "Block until all output DMAs have finished writing."
         if self.platform == "zynq-iodma":
             # check if output IODMA is finished via register reads
-            status = self.odma.read(0x00)
-            while status & 0x2 == 0:
-                status = self.odma.read(0x00)
+            for o in range(self.num_outputs):
+                status = self.odma[o].read(0x00)
+                while status & 0x2 == 0:
+                    status = self.odma[o].read(0x00)
         elif self.platform == "alveo":
-            assert self.odma_handle is not None, "No odma_handle to wait on"
-            self.odma_handle.wait()
-            self.odma_handle = None
+            assert all(
+                [x is not None for x in self.odma_handle]
+            ), "No odma_handle to wait on"
+            for o in range(self.num_outputs):
+                self.odma_handle[o].wait()
+                self.odma_handle[o] = None
         else:
             raise Exception("Unrecognized platform: %s" % self.platform)
 
     def execute(self, input_npy):
-        """Given input numpy array, first perform necessary packing and copying
-        to device buffers, execute on accelerator, then unpack output and return
-        output numpy array from accelerator."""
-        ibuf_folded = self.fold_input(input_npy)
-        ibuf_packed = self.pack_input(ibuf_folded)
-        self.copy_input_data_to_device(ibuf_packed)
+        """Given a single or a list of input numpy array, first perform necessary
+        packing and copying to device buffers, execute on accelerator, then unpack
+        output and return output numpy array from accelerator."""
+        # if single input, convert to list to normalize how we process the input
+        if not type(input_npy) is list:
+            input_npy = [input_npy]
+        assert self.num_inputs == len(
+            input_npy
+        ), "Not all accelerator inputs are specified."
+        for i in range(self.num_inputs):
+            ibuf_folded = self.fold_input(input_npy[i], ind=i)
+            ibuf_packed = self.pack_input(ibuf_folded, ind=i)
+            self.copy_input_data_to_device(ibuf_packed, ind=i)
         self.execute_on_buffers()
-        self.copy_output_data_from_device(self.obuf_packed)
-        obuf_folded = self.unpack_output(self.obuf_packed)
-        obuf_normal = self.unfold_output(obuf_folded)
-        return obuf_normal
+        outputs = []
+        for o in range(self.num_outputs):
+            self.copy_output_data_from_device(self.obuf_packed[o], ind=o)
+            obuf_folded = self.unpack_output(self.obuf_packed[o], ind=o)
+            obuf_normal = self.unfold_output(obuf_folded, ind=o)
+            outputs.append(obuf_normal)
+        if self.num_outputs == 1:
+            return outputs[0]
+        else:
+            return outputs
 
     def throughput_test(self):
         """Run accelerator with empty inputs to measure throughput and other metrics.
@@ -401,12 +436,14 @@ def throughput_test(self):
         runtime = end - start
         res["runtime[ms]"] = runtime * 1000
         res["throughput[images/s]"] = self.batch_size / runtime
-        res["DRAM_in_bandwidth[Mb/s]"] = (
-            np.prod(self.ishape_packed) * 0.000001 / runtime
-        )
-        res["DRAM_out_bandwidth[Mb/s]"] = (
-            np.prod(self.oshape_packed) * 0.000001 / runtime
-        )
+        total_in = 0
+        for i in range(self.num_inputs):
+            total_in += np.prod(self.ishape_packed(i))
+        res["DRAM_in_bandwidth[Mb/s]"] = total_in * 0.000001 / runtime
+        total_out = 0
+        for o in range(self.num_outputs):
+            total_out += np.prod(self.oshape_packed(o))
+        res["DRAM_out_bandwidth[Mb/s]"] = total_out * 0.000001 / runtime
         for iwdma, iwbuf, iwdma_name in self.external_weights:
             res["DRAM_extw_%s_bandwidth[Mb/s]" % iwdma_name] = (
                 self.batch_size * np.prod(iwbuf.shape) * 0.000001 / runtime
@@ -417,11 +454,11 @@ def throughput_test(self):
             res["fclk[mhz]"] = self.clock_dict["clock0"]["frequency"]
         res["batch_size"] = self.batch_size
         # also benchmark driver-related overheads
-        input_npy = gen_finn_dt_tensor(self.idt, self.ishape_normal)
+        input_npy = gen_finn_dt_tensor(self.idt(), self.ishape_normal())
         # provide as int8/uint8 to support fast packing path where possible
-        if self.idt == DataType.UINT8:
+        if self.idt() == DataType["UINT8"]:
             input_npy = input_npy.astype(np.uint8)
-        elif self.idt == DataType.INT8:
+        elif self.idt() == DataType["INT8"]:
             input_npy = input_npy.astype(np.int8)
         start = time.time()
         ibuf_folded = self.fold_input(input_npy)
@@ -442,13 +479,13 @@ def throughput_test(self):
         res["copy_input_data_to_device[ms]"] = runtime * 1000
 
         start = time.time()
-        self.copy_output_data_from_device(self.obuf_packed)
+        self.copy_output_data_from_device(self.obuf_packed[0])
         end = time.time()
         runtime = end - start
         res["copy_output_data_from_device[ms]"] = runtime * 1000
 
         start = time.time()
-        obuf_folded = self.unpack_output(self.obuf_packed)
+        obuf_folded = self.unpack_output(self.obuf_packed[0])
         end = time.time()
         runtime = end - start
         res["unpack_output[ms]"] = runtime * 1000
diff --git a/src/finn/qnn-data/templates/driver/validate.py b/src/finn/qnn-data/templates/driver/validate.py
index 4aa7d67aa1..1b29d4342c 100644
--- a/src/finn/qnn-data/templates/driver/validate.py
+++ b/src/finn/qnn-data/templates/driver/validate.py
@@ -27,9 +27,9 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import argparse
+import numpy as np
 from driver import io_shape_dict
 from driver_base import FINNExampleOverlay
-import numpy as np
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(
@@ -94,11 +94,11 @@
     test_labels = test_labels.reshape(n_batches, bsize)
 
     for i in range(n_batches):
-        ibuf_normal = test_imgs[i].reshape(driver.ibuf_packed_device.shape)
+        ibuf_normal = test_imgs[i].reshape(driver.ibuf_packed_device[0].shape)
         exp = test_labels[i]
         driver.copy_input_data_to_device(ibuf_normal)
         driver.execute_on_buffers()
-        obuf_normal = np.empty_like(driver.obuf_packed_device)
+        obuf_normal = np.empty_like(driver.obuf_packed_device[0])
         driver.copy_output_data_from_device(obuf_normal)
         ret = np.bincount(obuf_normal.flatten() == exp.flatten())
         nok += ret[0]
diff --git a/src/finn/transformation/fpgadataflow/annotate_cycles.py b/src/finn/transformation/fpgadataflow/annotate_cycles.py
index 2c547203df..5ab491dd10 100644
--- a/src/finn/transformation/fpgadataflow/annotate_cycles.py
+++ b/src/finn/transformation/fpgadataflow/annotate_cycles.py
@@ -27,10 +27,10 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import finn.custom_op.registry as registry
-from finn.transformation.base import Transformation
-from finn.transformation.move_reshape import _is_fpgadataflow_node
 from finn.core.modelwrapper import ModelWrapper
 from finn.custom_op.registry import getCustomOp
+from finn.transformation.base import Transformation
+from finn.transformation.move_reshape import _is_fpgadataflow_node
 
 
 class AnnotateCycles(Transformation):
diff --git a/src/finn/transformation/fpgadataflow/annotate_resources.py b/src/finn/transformation/fpgadataflow/annotate_resources.py
index 4e50151011..d9089cbeba 100644
--- a/src/finn/transformation/fpgadataflow/annotate_resources.py
+++ b/src/finn/transformation/fpgadataflow/annotate_resources.py
@@ -27,13 +27,13 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import finn.custom_op.registry as registry
-from finn.transformation.base import Transformation
-from finn.transformation.move_reshape import _is_fpgadataflow_node
-from finn.analysis.fpgadataflow.res_estimation import res_estimation
 from finn.analysis.fpgadataflow.hls_synth_res_estimation import hls_synth_res_estimation
 from finn.analysis.fpgadataflow.post_synth_res import post_synth_res
+from finn.analysis.fpgadataflow.res_estimation import res_estimation
 from finn.core.modelwrapper import ModelWrapper
 from finn.custom_op.registry import getCustomOp
+from finn.transformation.base import Transformation
+from finn.transformation.move_reshape import _is_fpgadataflow_node
 
 
 class AnnotateResources(Transformation):
diff --git a/src/finn/transformation/fpgadataflow/cleanup.py b/src/finn/transformation/fpgadataflow/cleanup.py
index 5dbe5f0517..f59f4bdeab 100644
--- a/src/finn/transformation/fpgadataflow/cleanup.py
+++ b/src/finn/transformation/fpgadataflow/cleanup.py
@@ -30,8 +30,8 @@
 import shutil
 
 import finn.custom_op.registry as registry
-from finn.util.fpgadataflow import is_fpgadataflow_node
 from finn.transformation.base import Transformation
+from finn.util.fpgadataflow import is_fpgadataflow_node
 
 
 class CleanUp(Transformation):
diff --git a/src/finn/transformation/fpgadataflow/compile_cppsim.py b/src/finn/transformation/fpgadataflow/compile_cppsim.py
index 6321b33359..5f7c534b45 100644
--- a/src/finn/transformation/fpgadataflow/compile_cppsim.py
+++ b/src/finn/transformation/fpgadataflow/compile_cppsim.py
@@ -27,8 +27,8 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import finn.custom_op.registry as registry
-from finn.util.fpgadataflow import is_fpgadataflow_node
 from finn.transformation.base import NodeLocalTransformation
+from finn.util.fpgadataflow import is_fpgadataflow_node
 
 
 class CompileCppSim(NodeLocalTransformation):
diff --git a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py
index 1f3d40e929..113ccb93b8 100644
--- a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py
+++ b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py
@@ -27,22 +27,22 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
-from onnx import helper, TensorProto
 import numpy as np
 import warnings
+from onnx import TensorProto, helper
 
+import finn.core.data_layout as DataLayout
 from finn.core.datatype import DataType
-from finn.transformation.base import Transformation
 from finn.custom_op.registry import getCustomOp
-from finn.transformation.infer_shapes import InferShapes
-from finn.transformation.infer_datatypes import InferDataTypes
-from finn.transformation.general import SortGraph
-import finn.core.data_layout as DataLayout
-from finn.util.onnx import nchw_to_nhwc
-from finn.util.basic import get_by_name
+from finn.transformation.base import Transformation
 from finn.transformation.fpgadataflow.minimize_accumulator_width import (
     MinimizeAccumulatorWidth,
 )
+from finn.transformation.general import SortGraph
+from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.infer_shapes import InferShapes
+from finn.util.basic import get_by_name
+from finn.util.onnx import nchw_to_nhwc
 
 
 class InferConvInpGen(Transformation):
@@ -61,7 +61,9 @@ def apply(self, model):
                 i2c_out_shape = model.get_tensor_shape(i2c_output)
                 dt = model.get_tensor_datatype(i2c_input)
                 if not dt.is_integer():
-                    warnings.warn("Input is not int. Can't infer ConvInpGen")
+                    warnings.warn(
+                        "%s : Input is not int. Can't infer ConvInpGen." % n.name
+                    )
                     continue
                 i2c_inst = getCustomOp(n)
                 stride_h, stride_w = i2c_inst.get_nodeattr("stride")
@@ -89,9 +91,10 @@ def apply(self, model):
                     # if padding enabled, ensure pad_val supported by DataType
                     # assert dt.allowed(pad_val),"""FMPadding_Batch DataType
                     # must support pad_val"""
-                    assert (
-                        pad_val == 0
-                    ), "FMPadding_Batch doesn't currently support pad_val!= 0"
+                    assert pad_val == 0, (
+                        "%s : FMPadding_Batch doesn't currently support pad_val!= 0"
+                        % n.name
+                    )
 
                     odim_padding_h = ifm_dim_h + pad_h
                     odim_padding_w = ifm_dim_w + pad_w
@@ -121,6 +124,7 @@ def apply(self, model):
                         NumChannels=ifm_ch,
                         inputDataType=dt.name,
                         SIMD=ifm_ch,
+                        name="FMPadding_Batch_" + n.name,
                     )
                     graph.node.insert(node_ind, padding_node)
 
@@ -134,11 +138,15 @@ def apply(self, model):
                 )
 
                 if (stride_h > 1 or stride_w > 1) and is_kernel_pointwise:
-                    assert (
-                        is_square_image
-                    ), "DownSampler currently only supports square input images."
-                    assert is_equal_stride, """DownSampler currently only supports equal stride value
+                    assert is_square_image, (
+                        "%s : DownSampler currently only supports square input images."
+                        % n.name
+                    )
+                    assert is_equal_stride, (
+                        """%s : DownSampler currently only supports equal stride value
                         along different axes."""
+                        % n.name
+                    )
                     ConvInpGen_idim = ConvInpGen_idim_h
                     stride = stride_h
                     # create DownSampler node
@@ -153,6 +161,7 @@ def apply(self, model):
                         SIMD=ifm_ch,
                         Stride=stride,
                         inputDataType=dt.name,
+                        name="DownSampler_" + n.name,
                     )
                     graph.node.insert(ConvInpGen_node_idx, ConvInpGen_node)
                 else:
@@ -160,12 +169,16 @@ def apply(self, model):
                     if (
                         is_square_image and is_square_kernel
                     ):  # square images and square kernels
-                        assert is_equal_stride, """Non-equal strides along different axes is not supported
+                        assert is_equal_stride, (
+                            """%s: Non-equal strides along different axes is not supported
                             for (non-)square convolutions"""
-                        assert (
-                            dilation_h == 1 and dilation_w == 1
-                        ), """Dilation value != 1 is not supported
+                            % n.name
+                        )
+                        assert dilation_h == 1 and dilation_w == 1, (
+                            """%s: Dilation value != 1 is not supported
                             for square convolutions"""
+                            % n.name
+                        )
                         ConvInpGen_node = helper.make_node(
                             "ConvolutionInputGenerator",
                             [ConvInpGen_input],
@@ -182,16 +195,19 @@ def apply(self, model):
                             inputDataType=dt.name,
                             outputDataType=dt.name,
                             depthwise=depthwise,
+                            name="ConvolutionInputGenerator_" + n.name,
                         )
                     else:  # non-square images and/or kernels
-                        assert (
-                            is_1d_convolution
-                        ), "ConvultionInputGenerator1D works only for 1D convolutions"
+                        assert is_1d_convolution, (
+                            "%s: ConvolutionInputGenerator1D works only for 1D convs"
+                            % n.name
+                        )
                         if dilation_h > 1 or dilation_w > 1:
-                            assert (
-                                stride_h == 1 and stride_w == 1
-                            ), """Stride value of greater than 1 is not supported for convolutions
+                            assert stride_h == 1 and stride_w == 1, (
+                                """%s: Stride value of greater than 1 is not supported for convolutions
                                 with dilation value greater than 1"""
+                                % n.name
+                            )
                         ConvInpGen_node = helper.make_node(
                             "ConvolutionInputGenerator1D",
                             [ConvInpGen_input],
@@ -208,6 +224,7 @@ def apply(self, model):
                             inputDataType=dt.name,
                             outputDataType=dt.name,
                             depthwise=depthwise,
+                            name="ConvolutionInputGenerator1D_" + n.name,
                         )
                     graph.node.insert(ConvInpGen_node_idx, ConvInpGen_node)
                 # remove old nodes
@@ -219,6 +236,102 @@ def apply(self, model):
         return (model, graph_modified)
 
 
+class InferUpsample(Transformation):
+    """
+    Convert Upsample and Resize nodes to layers to UpsampleNearestNeighbour_Batch nodes.
+    """
+
+    def apply(self, model):
+        graph = model.graph
+        node_ind = 0
+        graph_modified = False
+        for n in graph.node:
+            node_ind += 1
+            if n.op_type == "Upsample" or n.op_type == "Resize":
+                # Extract mode and scales and input shape
+                mode = get_by_name(n.attribute, "mode").s.decode("ascii")
+                if n.op_type == "Upsample":
+                    scales = model.get_initializer(n.input[1])
+                else:
+                    scales = model.get_initializer(n.input[2])
+                in_shape = model.get_tensor_shape(n.input[0])
+
+                dt = model.get_tensor_datatype(n.input[0])
+                if not dt.is_integer():
+                    warnings.warn(
+                        "%s: Input not int. Can't infer UpsampleNearestNeighbour."
+                        % n.name
+                    )
+                    continue
+
+                if model.get_tensor_layout(n.input[0]) != DataLayout.NHWC:
+                    warnings.warn(
+                        "%s: Input not NHWC. Can't infer UpsampleNearestNeighbour."
+                        % n.name
+                    )
+                    continue
+
+                # Check that the parameters are okay
+                assert mode == "nearest", (
+                    "%s: Upsampling is only supported for the mode nearest." % n.name
+                )
+                assert len(in_shape) == 4, "Upsampling is only supported for 4D inputs."
+                assert scales.shape == (4,), (
+                    "%s: Upsampling is only supported for 4D scales." % n.name
+                )
+                assert (scales >= 1).all(), (
+                    n.name + ": Upsampling is only supported for scales "
+                    "which are larger or equal 1 in all dimensions."
+                )
+
+                # Assumes nhwc layout for scales and input
+                assert scales[1] == scales[2], (
+                    "%s: Upsampling is only supported for quadratic scales." % n.name
+                )
+                assert scales[0] == scales[3] == 1, (
+                    n.name + ": Upsampling is only supported for scales with "
+                    "the first and last dimensions being 1."
+                )
+                spatial_scale = scales[1]
+                assert spatial_scale == int(spatial_scale), (
+                    "%s: Upsampling is only supported for integer scales." % n.name
+                )
+
+                assert in_shape[1] == in_shape[2], (
+                    "%s: Upsampling is only supported for quadratic input shapes."
+                    % n.name
+                )
+
+                # Extract information for HLS node
+                IFMDim = in_shape[1]
+                OFMDim = int(round(in_shape[1] * spatial_scale))
+                NumChannels = in_shape[-1]
+                numInputVectors = in_shape[0]
+                inputDataType = dt.name
+
+                # Insert the HLSCustomOp node
+                Upsample_HLS_node = helper.make_node(
+                    "UpsampleNearestNeighbour_Batch",
+                    [n.input[0]],
+                    [n.output[0]],
+                    domain="finn.custom_op.fpgadataflow",
+                    backend="fpgadataflow",
+                    OFMDim=OFMDim,
+                    IFMDim=IFMDim,
+                    NumChannels=NumChannels,
+                    inputDataType=inputDataType,
+                    numInputVectors=numInputVectors,
+                    name="UpsampleNearestNeighbour_Batch_" + n.name,
+                )
+
+                # Remove the old node
+                graph.node.insert(node_ind, Upsample_HLS_node)
+                # remove old nodes
+                graph.node.remove(n)
+                graph_modified = True
+        return (model, graph_modified)
+
+
 class InferStreamingMaxPool(Transformation):
     """Convert MaxPoolNHWC layers to StreamingMaxPool layers."""
 
@@ -235,25 +348,23 @@ def apply(self, model):
                 # mp_out_shape = model.get_tensor_shape(mp_output)
                 dt = model.get_tensor_datatype(mp_input)
                 mp_inst = getCustomOp(n)
-                # stride = mp_inst.get_nodeattr("strides")[0]
-                k = mp_inst.get_nodeattr("kernel_shape")[0]
-                # pad = mp_inst.get_nodeattr("pads")[0]
+                k_h, k_w = mp_inst.get_nodeattr("kernel_shape")
                 ifm_ch = mp_in_shape[-1]
-                ifm_dim = mp_in_shape[1]
-                # ofm_dim = mp_out_shape[1]
-                if ifm_dim % k == 0:
+                ifm_dim_h = mp_in_shape[1]
+                ifm_dim_w = mp_in_shape[2]
+                if ifm_dim_h % k_h == 0 and ifm_dim_w % k_w == 0:
                     # create equivalent StreamingMaxPool_Batch node
-                    # TODO support non-k strides
                     new_node = helper.make_node(
                         "StreamingMaxPool_Batch",
                         [mp_input],
                         [mp_output],
                         domain="finn.custom_op.fpgadataflow",
                         backend="fpgadataflow",
-                        PoolDim=k,
+                        PoolDim=(k_h, k_w),
                         NumChannels=ifm_ch,
-                        ImgDim=ifm_dim,
+                        ImgDim=(ifm_dim_h, ifm_dim_w),
                         dataType=dt.name,
+                        name="StreamingMaxPool_Batch_" + n.name,
                     )
                     graph.node.insert(node_ind, new_node)
                     # remove old nodes
@@ -276,7 +387,7 @@ def apply(self, model):
         graph_modified = False
         for n in graph.node:
             node_ind += 1
-            if n.op_type in ["MaxPool", "QuantAvgPool2d"]:
+            if n.op_type in ["MaxPool", "QuantAvgPool2d", "MaxPoolNHWC"]:
                 # extract pool parameters
 
                 if n.op_type == "MaxPool":
@@ -289,6 +400,15 @@ def apply(self, model):
                     k = inst.get_nodeattr("kernel")
                     stride = inst.get_nodeattr("stride")
                     dlayout = inst.get_nodeattr("data_layout")
+                elif n.op_type == "MaxPoolNHWC":
+                    inst = getCustomOp(n)
+                    k_shape = inst.get_nodeattr("kernel_shape")
+                    strides = inst.get_nodeattr("strides")
+                    assert k_shape[0] == k_shape[1]
+                    assert strides[0] == strides[1]
+                    k = k_shape[0]
+                    stride = strides[0]
+                    dlayout = "NHWC"
                 try:
                     pad = get_by_name(n.attribute, "pads").ints[-1]
                 except AttributeError:
@@ -305,7 +425,8 @@ def apply(self, model):
                     continue
                 elif k == stride:
                     warnings.warn(
-                        """Inferring Pool_Batch node for k == stride.
+                        n.name
+                        + """: Inferring Pool_Batch node for k == stride.
                         This case can be optimized.
                         For example, for MaxPool run InferStreamingMaxPool before
                         InferPool_Batch """
@@ -366,7 +487,7 @@ def apply(self, model):
                 accum_bits = 0
                 pool_size_param = k
                 pad_value = 0
-                if n.op_type == "MaxPool":
+                if n.op_type in ["MaxPool", "MaxPoolNHWC"]:
                     pool_fxn = "MaxPool"
                     odt = idt
                     pad_value = idt.min()
@@ -396,6 +517,7 @@ def apply(self, model):
                     pad_value=pad_value,
                     depthwise=1,
                     input_shape="(1,{},{},{})".format(ifm_dim, ifm_dim, ifm_ch),
+                    name="Im2Col_" + n.name,
                 )
 
                 # Warning PE has to be equal to ifm_ch until Im2Col is replaced by
@@ -418,6 +540,7 @@ def apply(self, model):
                     AccumBits=accum_bits,
                     Size=pool_size_param,
                     BatchSize=1,
+                    name="Pool_Batch_" + n.name,
                 )
 
                 if dlayout == "NCHW":
@@ -466,16 +589,18 @@ def apply(self, model):
                 mm_output = n.output[0]
                 mm_in_shape = model.get_tensor_shape(mm_input)
                 mm_out_shape = model.get_tensor_shape(mm_output)
-                assert (
-                    model.get_tensor_datatype(mm_input) == DataType.BINARY
-                ), """First
+                assert model.get_tensor_datatype(mm_input) == DataType["BINARY"], (
+                    n.name
+                    + """: First
                 input for xnorpopcount is not set to FINN DataType BINARY."""
-                assert (
-                    model.get_tensor_datatype(mm_weight) == DataType.BINARY
-                ), """Second
+                )
+                assert model.get_tensor_datatype(mm_weight) == DataType["BINARY"], (
+                    n.name
+                    + """: Second
                 input (weights) for xnorpopcount is not set to FINN DataType BINARY."""
-                idt = DataType.BINARY
-                wdt = DataType.BINARY
+                )
+                idt = DataType["BINARY"]
+                wdt = DataType["BINARY"]
                 mm_output = n.output[0]
                 W = model.get_initializer(mm_weight)
                 # extract weight shape, note that ONNX and finn-hlslib
@@ -487,13 +612,12 @@ def apply(self, model):
                 # create node with no parallelization first
                 pe = 1
                 simd = 1
-                assert mh % pe == 0, "Requirement MH divisable by PE is violated."
-                assert mw % simd == 0, "Requirement MW divisable by SIMD is violated."
                 wmem = mw * mh // (pe * simd)
-                assert (
-                    mw * mh == wmem * pe * simd
-                ), """Requirement (MW * MH) divisiable by
+                assert mw * mh == wmem * pe * simd, (
+                    n.name
+                    + """: Requirement (MW * MH) divisiable by
                 (WMEM * PE * SIMD) is violated."""
+                )
                 # see if we have any following thresholds
                 consumer = model.find_consumer(mm_output)
                 if consumer is not None and consumer.op_type == "MultiThreshold":
@@ -503,10 +627,11 @@ def apply(self, model):
                     mt_out_shape = model.get_tensor_shape(mt_output)
                     mt_thres = consumer.input[1]
                     T = model.get_initializer(mt_thres)
-                    assert (
-                        T.shape[0] == 1 or T.shape[0] == mh
-                    ), """First dimension of
+                    assert T.shape[0] == 1 or T.shape[0] == mh, (
+                        consumer.name
+                        + """: First dimension of
                     thresholds neither 1 nor MH."""
+                    )
                     odt = model.get_tensor_datatype(mt_output)
                     if odt.bitwidth() == 1:
                         # covers both bipolar and binary
@@ -534,6 +659,7 @@ def apply(self, model):
                         noActivation=0,
                         numInputVectors=list(mm_in_shape[:-1]),
                         mem_mode=self.mem_mode,
+                        name=n.name,
                     )
                     graph.node.insert(node_ind, new_node)
                     # remove old nodes
@@ -564,6 +690,7 @@ def apply(self, model):
                         noActivation=1,
                         numInputVectors=list(mm_in_shape[:-1]),
                         mem_mode=self.mem_mode,
+                        name=n.name,
                     )
                     graph.node.insert(node_ind, new_node)
                     # remove old node
@@ -611,15 +738,12 @@ def apply(self, model):
                     # create node with no parallelization first
                     pe = 1
                     simd = 1
-                    assert mh % pe == 0, "Requirement MH divisable by PE is violated."
-                    assert (
-                        mw % simd == 0
-                    ), "Requirement MW divisable by SIMD is violated."
                     wmem = mw * mh // (pe * simd)
-                    assert (
-                        mw * mh == wmem * pe * simd
-                    ), """Requirement (MW * MH) divisible by
+                    assert mw * mh == wmem * pe * simd, (
+                        n.name
+                        + """: Requirement (MW * MH) divisible by
                     (WMEM * PE * SIMD) is violated."""
+                    )
                     # see if we have any following thresholds
                     consumer = model.find_consumer(mm_output)
                     if consumer is not None and consumer.op_type == "MultiThreshold":
@@ -629,27 +753,30 @@ def apply(self, model):
                         mt_out_shape = model.get_tensor_shape(mt_output)
                         mt_thres = consumer.input[1]
                         T = model.get_initializer(mt_thres)
-                        assert (
-                            T.shape[0] == 1 or T.shape[0] == mh
-                        ), """First dimension of
+                        assert T.shape[0] == 1 or T.shape[0] == mh, (
+                            consumer.name
+                            + """: First dimension of
                         thresholds neither 1 nor MH."""
+                        )
                         odt = model.get_tensor_datatype(mt_output)
                         scale = getCustomOp(consumer).get_nodeattr("out_scale")
                         actval = getCustomOp(consumer).get_nodeattr("out_bias")
-                        assert (
-                            int(actval) == actval
-                        ), "out_bias must be integer for HLS conversion."
+                        assert int(actval) == actval, (
+                            consumer.name
+                            + ": out_bias must be integer for HLS conversion."
+                        )
                         actval = int(actval)
-                        odt_is_bipolar = odt == DataType.BIPOLAR
+                        odt_is_bipolar = odt == DataType["BIPOLAR"]
                         bipolar_ok = (
                             odt_is_bipolar and (scale == 2.0) and (actval == -1)
                         )
-                        assert (
-                            scale == 1.0 or bipolar_ok
-                        ), "out_scale = 1.0 or bipolar output needed for conversion."
-                        assert (not odt.signed()) or (
-                            actval < 0
-                        ), "Signed output requres actval < 0"
+                        assert scale == 1.0 or bipolar_ok, (
+                            consumer.name
+                            + ": out_scale=1 or bipolar output needed for conversion."
+                        )
+                        assert (not odt.signed()) or (actval < 0), (
+                            consumer.name + ": Signed output requres actval < 0"
+                        )
                         model.set_tensor_shape(mm_input, mm_in_shape)
                         model.set_tensor_shape(mt_output, mt_out_shape)
                         if bipolar_ok:
@@ -675,6 +802,7 @@ def apply(self, model):
                             noActivation=0,
                             numInputVectors=list(mm_in_shape[:-1]),
                             mem_mode=self.mem_mode,
+                            name="StreamingFCLayer_Batch_" + n.name,
                         )
                         graph.node.insert(node_ind, new_node)
                         # remove old nodes
@@ -705,6 +833,7 @@ def apply(self, model):
                             noActivation=1,
                             numInputVectors=list(mm_in_shape[:-1]),
                             mem_mode=self.mem_mode,
+                            name="StreamingFCLayer_Batch_" + n.name,
                         )
                         graph.node.insert(node_ind, new_node)
                         # remove old node
@@ -739,7 +868,8 @@ def apply(self, model):
                     k_h, k_w = sparsity["dw"]["kernel_shape"]
                 except KeyError:
                     raise Exception(
-                        """Sparsity doesn't indicate that MatMul
+                        n.name
+                        + """: sparsity annotation doesn't indicate that MatMul
                         belongs to a depthwise convolution."""
                     )
 
@@ -775,9 +905,6 @@ def apply(self, model):
                     model.set_tensor_shape(mm_weight, (channels, 1, k_h, k_w))
                     # create node with pe=channels as default
                     pe = channels
-                    assert (
-                        channels % pe == 0
-                    ), "Requirement Channels divisable by PE is violated."
                     # see if we have any following thresholds
                     consumer = model.find_consumer(mm_output)
                     if consumer is not None and consumer.op_type == "MultiThreshold":
@@ -786,23 +913,26 @@ def apply(self, model):
                         mt_out_shape = model.get_tensor_shape(mt_output)
                         mt_thres = consumer.input[1]
                         T = model.get_initializer(mt_thres)
-                        assert (
-                            T.shape[0] == 1 or T.shape[0] == channels
-                        ), """First dimension of
+                        assert T.shape[0] == 1 or T.shape[0] == channels, (
+                            consumer.name
+                            + """: First dimension of
                         thresholds neither 1 nor Channels."""
+                        )
                         odt = model.get_tensor_datatype(mt_output)
                         scale = getCustomOp(consumer).get_nodeattr("out_scale")
-                        assert (
-                            scale == 1.0
-                        ), "out_scale must be equal to 1.0 for HLS conversion."
+                        assert scale == 1.0, (
+                            consumer.name
+                            + ": out_scale must be equal to 1.0 for HLS conversion."
+                        )
                         actval = getCustomOp(consumer).get_nodeattr("out_bias")
-                        assert (
-                            int(actval) == actval
-                        ), "out_bias must be integer for HLS conversion."
+                        assert int(actval) == actval, (
+                            consumer.name
+                            + ": out_bias must be integer for HLS conversion."
+                        )
                         actval = int(actval)
-                        assert (not odt.signed()) or (
-                            actval < 0
-                        ), "Signed output requres actval < 0"
+                        assert (not odt.signed()) or (actval < 0), (
+                            consumer.name + ": Signed output requres actval < 0"
+                        )
                         model.set_tensor_shape(mm_input, mm_in_shape)
                         model.set_tensor_shape(mt_output, mt_out_shape)
                         # create and insert new Vector_Vector_Activate_Batch node
@@ -822,6 +952,7 @@ def apply(self, model):
                             outputDataType=odt.name,
                             ActVal=actval,
                             noActivation=0,
+                            name="Vector_Vector_Activate_Batch_" + n.name,
                         )
                         graph.node.insert(node_ind, new_node)
                         # remove old nodes
@@ -850,6 +981,7 @@ def apply(self, model):
                             outputDataType=odt.name,
                             ActVal=0,
                             noActivation=1,
+                            name="Vector_Vector_Activate_Batch_" + n.name,
                         )
                         graph.node.insert(node_ind, new_node)
                         # remove old node
@@ -907,21 +1039,22 @@ def apply(self, model):
                 ifc = int(thl_in_shape[-1])
                 # create node with no parallelization first
                 pe = 1
-                assert ifc % pe == 0, "Requirement IFC divisable by PE is violated."
 
                 odt = model.get_tensor_datatype(thl_output)
                 scale = getCustomOp(node).get_nodeattr("out_scale")
-                assert (
-                    scale == 1.0
-                ), "MultiThreshold out_scale must be equal to 1.0 for HLS conversion."
+                assert scale == 1.0, (
+                    node.name
+                    + ": MultiThreshold out_scale must be 1 for HLS conversion."
+                )
                 actval = getCustomOp(node).get_nodeattr("out_bias")
-                assert (
-                    int(actval) == actval
-                ), "MultiThreshold out_bias must be integer for HLS conversion."
+                assert int(actval) == actval, (
+                    node.name
+                    + ": MultiThreshold out_bias must be integer for HLS conversion."
+                )
                 actval = int(actval)
-                assert (not odt.signed()) or (
-                    actval < 0
-                ), "Signed output requres actval < 0"
+                assert (not odt.signed()) or (actval < 0), (
+                    node.name + ": Signed output requres actval < 0"
+                )
                 # create and insert new Thresholding_Batch node
                 new_node = helper.make_node(
                     "Thresholding_Batch",
@@ -938,6 +1071,7 @@ def apply(self, model):
                     numInputVectors=list(thl_in_shape[:-1]),
                     ActVal=actval,
                     mem_mode=self.mem_mode,
+                    name="Thresholding_Batch_" + node.name,
                 )
                 graph.node.insert(insert_point, new_node)
                 # remove old node
@@ -1011,9 +1145,6 @@ def apply(self, model):
                 num_channels = int(in0_shape[-1])
                 # create node with no parallelization first
                 pe = 1
-                assert (
-                    num_channels % pe == 0
-                ), "Requirement Channels divisable by PE is violated."
 
                 # create and insert new StreamingFCLayer node
                 new_node = helper.make_node(
@@ -1026,6 +1157,7 @@ def apply(self, model):
                     PE=pe,
                     inputDataType=idt.name,
                     numInputVectors=in0_shape[:-1],
+                    name="AddStreams_Batch_" + node.name,
                 )
                 graph.node.insert(insert_point, new_node)
                 # remove old node
@@ -1039,7 +1171,7 @@ def apply(self, model):
 
 
 class InferDuplicateStreamsLayer(Transformation):
-    """Insert a DuplicateStreams HLS layer for any tensor with fanout == 2 """
+    """Insert a DuplicateStreams HLS layer for any tensor with fanout == 2"""
 
     def apply(self, model):
         graph = model.graph
@@ -1072,9 +1204,6 @@ def apply(self, model):
 
                 # create node with no parallelization first
                 pe = 1
-                assert (
-                    num_ch % pe == 0
-                ), "Requirement channels divisable by PE is violated."
 
                 dup_node = helper.make_node(
                     "DuplicateStreams_Batch",
@@ -1086,6 +1215,7 @@ def apply(self, model):
                     PE=pe,
                     inputDataType=dt.name,
                     numInputVectors=vecs,
+                    name="DuplicateStreams_Batch_" + node.name,
                 )
 
                 graph.node.insert(node_ind, dup_node)
@@ -1121,10 +1251,10 @@ def get_smallest_possible(self, vals):
         for v in vals:
             assert int(v) == v, "Error float value"
 
-        for k in DataType.__members__:
+        for k in DataType.get_accumulator_dt_cands():
             dt = DataType[k]
 
-            if dt in [DataType.BIPOLAR, DataType.TERNARY, DataType.FLOAT32]:
+            if dt in [DataType["BIPOLAR"], DataType["TERNARY"], DataType["FLOAT32"]]:
                 # not currently supported
                 continue
 
@@ -1140,9 +1270,9 @@ def get_smallest_possible(self, vals):
         )
 
         if (0 <= vals).all():
-            return DataType.UINT64
+            return DataType["UINT64"]
         else:
-            return DataType.INT64
+            return DataType["INT64"]
 
     def apply(self, model):
         graph = model.graph
@@ -1254,6 +1384,7 @@ def apply(self, model):
                     paramDataType=pdt.name,
                     outputDataType=odt.name,
                     numInputVectors=list(ll_in_shape[:-1]),
+                    name="ChannelwiseOp_Batch_" + node.name,
                 )
                 graph.node.insert(insert_point, new_node)
                 # remove old node
@@ -1296,9 +1427,6 @@ def apply(self, model):
                 num_inp_vecs = list(fc_in_shape[:-1])
                 # create node with no parallelization first
                 pe = 1
-                assert (
-                    num_labels % pe == 0
-                ), "Requirement Labels divisable by PE is violated."
 
                 k = model.get_initializer(k_input)[0]
 
@@ -1314,6 +1442,7 @@ def apply(self, model):
                     K=k,
                     inputDataType=idt.name,
                     numInputVectors=num_inp_vecs,
+                    name="LabelSelect_Batch_" + node.name,
                 )
                 graph.node.insert(node_ind, new_node)
                 # remove old node
@@ -1367,9 +1496,6 @@ def apply(self, model):
                 vecs = in0_shape[:-1]
                 # create node with no parallelization first
                 pe = 1
-                assert (
-                    num_ch % pe == 0
-                ), "Requirement Labels divisable by PE is violated."
 
                 # create an additional tensor of the same shape and layout as result
                 out_shape = model.get_tensor_shape(result)
@@ -1390,6 +1516,7 @@ def apply(self, model):
                     PE=pe,
                     inputDataType=idt.name,
                     numInputVectors=vecs,
+                    name="GlobalAccPool_Batch_" + node.name,
                 )
 
                 mul_value = helper.make_tensor_value_info(
@@ -1413,3 +1540,56 @@ def apply(self, model):
             model = model.transform(InferShapes())
             model = model.transform(InferDataTypes())
         return (model, graph_modified)
+
+
+class InferLookupLayer(Transformation):
+    """Convert Gather nodes with constant op0 into Lookup HLS layers."""
+
+    def apply(self, model):
+        graph = model.graph
+        node_ind = 0
+        graph_modified = False
+        for node in graph.node:
+            node_ind += 1
+            if node.op_type == "Gather":
+                emb_name = node.input[0]
+                embs = model.get_initializer(emb_name)
+                axis = get_by_name(node.attribute, "axis")
+                # skip conversion if input0 is not constant
+                if embs is None:
+                    continue
+                # skip conversion if axis != 0
+                if axis is not None and axis.i != 0:
+                    continue
+                ind_name = node.input[1]
+                ind_dtype = model.get_tensor_datatype(ind_name)
+                emb_dtype = model.get_tensor_datatype(emb_name)
+                # skip conversion if inputs are not unsigned integers
+                if (not ind_dtype.is_integer()) or ind_dtype.signed():
+                    continue
+                num_embs, emb_dim = embs.shape
+                out_name = node.output[0]
+                ishape = model.get_tensor_shape(node.input[1])
+                # create and insert new Lookup node
+                new_node = helper.make_node(
+                    "Lookup",
+                    [ind_name, emb_name],
+                    [out_name],
+                    domain="finn.custom_op.fpgadataflow",
+                    backend="fpgadataflow",
+                    name="Lookup_" + node.name,
+                    NumEmbeddings=num_embs,
+                    EmbeddingDim=emb_dim,
+                    EmbeddingType=emb_dtype.name,
+                    InputType=ind_dtype.name,
+                    InputShape=list(ishape),
+                )
+                graph.node.insert(node_ind, new_node)
+                # remove old node
+                graph.node.remove(node)
+                graph_modified = True
+
+        if graph_modified:
+            model = model.transform(InferShapes())
+            model = model.transform(InferDataTypes())
+        return (model, graph_modified)
diff --git a/src/finn/transformation/fpgadataflow/create_dataflow_partition.py b/src/finn/transformation/fpgadataflow/create_dataflow_partition.py
index 419a6d8c49..9b2577bc2b 100644
--- a/src/finn/transformation/fpgadataflow/create_dataflow_partition.py
+++ b/src/finn/transformation/fpgadataflow/create_dataflow_partition.py
@@ -26,11 +26,11 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import copy
-
-from onnx import helper
+from finn.core.modelwrapper import ModelWrapper
 from finn.custom_op.registry import getCustomOp
 from finn.transformation.base import Transformation
+from finn.transformation.create_generic_partitions import PartitionFromLambda
+from finn.transformation.fpgadataflow.externalize_params import ExternalizeParams
 from finn.util.basic import get_by_name, make_build_dir
 
 
@@ -41,120 +41,76 @@ class CreateDataflowPartition(Transformation):
     that indicates the filename for the second graph that only contains
     dataflow nodes. No action is taken if there are no dataflow nodes."""
 
-    def __init__(self):
+    def __init__(self, partition_model_dir=None):
         super().__init__()
+        if partition_model_dir is None:
+            self.partition_model_dir = make_build_dir("dataflow_partition_")
+        else:
+            self.partition_model_dir = partition_model_dir
 
     def apply(self, model):
-        target_partition_id = 0
-        # we currently assume that all dataflow nodes belonging to the same partition
-        # are connected to each other and there is a single input/output to/from each.
-        # NOTE: all dataflow nodes with no partition_id set are moved to partition 0
-        # TODO: check the assumption and/or improve this.
-        while True:
-            all_nodes = list(model.graph.node)
-            df_nodes = filter(
-                lambda x: get_by_name(x.attribute, "backend") is not None, all_nodes
-            )
-            df_nodes = filter(
-                lambda x: get_by_name(x.attribute, "backend").s.decode("UTF-8")
-                == "fpgadataflow"
-                and (
-                    get_by_name(x.attribute, "partition_id") is None
-                    or get_by_name(x.attribute, "partition_id").i == target_partition_id
-                )
-                and x.op_type != "StreamingDataflowPartition",
-                df_nodes,
-            )
-            df_nodes = list(df_nodes)
-            non_df_nodes = filter(lambda x: x not in df_nodes, all_nodes)
-            non_df_nodes = list(non_df_nodes)
-
-            if len(df_nodes) == 0:
-                # no changes if no dataflow nodes are present
-                break
-            else:
-                # partition the model into two models
-                df_model = copy.deepcopy(model)
-                non_df_model = model
-                # remove all non-dataflow nodes from the dataflow model
-                for node_to_remove in non_df_nodes:
-                    df_model.graph.node.remove(node_to_remove)
-                # identify the entry and exit points for the dataflow part
-                df_in = df_model.graph.node[0].input[0]
-                df_out = df_model.graph.node[-1].output[0]
-                df_in_vi = df_model.get_tensor_valueinfo(df_in)
-                df_out_vi = df_model.get_tensor_valueinfo(df_out)
-                # set df graph in/out to be df_in/df_out
-                df_model.graph.input.remove(df_model.graph.input[0])
-                df_model.graph.input.insert(0, df_in_vi)
-                df_model.graph.output.remove(df_model.graph.output[0])
-                df_model.graph.output.insert(0, df_out_vi)
-                # parse StreamingFCLayers looking for external weight memories
-                fc_extw_nodes = filter(
-                    lambda x: x.op_type == "StreamingFCLayer_Batch"
-                    and get_by_name(x.attribute, "mem_mode") is not None
-                    and get_by_name(x.attribute, "mem_mode").s.decode("UTF-8")
-                    == "external",
-                    df_nodes,
-                )
-                fc_extw_nodes = list(fc_extw_nodes)
-                extra_df_inputs = []
+        def filter_fc_extw(x):
+            if x.op_type == "IODMA":
+                burst_mode = get_by_name(x.attribute, "burstMode")
+                if burst_mode is not None:
+                    burst_mode = burst_mode.s.decode("UTF-8")
+                    return burst_mode == "wrap"
 
-                for i in range(len(fc_extw_nodes)):
-                    fc_weight_vi = df_model.get_tensor_valueinfo(
-                        fc_extw_nodes[i].input[1]
-                    )
-                    df_model.graph.input.insert(i + 1, fc_weight_vi)
-                    extra_df_inputs.append(fc_extw_nodes[i].input[1])
+        extw_dma_nodes = list(filter(filter_fc_extw, model.graph.node))
+        if len(extw_dma_nodes) > 0:
+            model = model.transform(ExternalizeParams())
 
-                # save model
-                df_model_dir = make_build_dir(
-                    "dataflow_partition" + str(target_partition_id) + "_"
-                )
-                df_model_filename = df_model_dir + "/df_model.onnx"
-                df_model.cleanup()
-                df_model.save(df_model_filename)
-                # remove all dataflow nodes from the non-dataflow model
-                # keep track of where the dataflow part starts
-                df_start_ind = all_nodes.index(df_nodes[0])
-
-                # get and check floorplan
-                inst = getCustomOp(df_nodes[0])
-                slr = inst.get_nodeattr("slr")
-                for node in df_nodes[1:]:
-                    inst = getCustomOp(node)
-                    assert slr == inst.get_nodeattr(
-                        "slr"
-                    ), """all nodes with
-                same partition_id must have the same slr id"""
-
-                # check that there is only one non-null mem_port per partition
-                nmemports = 0
-                mem_port = ""
-                for node in df_nodes:
-                    inst = getCustomOp(node)
-                    port = inst.get_nodeattr("mem_port")
-                    if port is not None and port != "":
-                        nmemports += 1
-                        mem_port = port
-                assert nmemports <= 1, """too many memory ports per partition"""
+        def assign_partition_id(node):
+            if node.op_type in ["GenericPartition", "StreamingDataflowPartition"]:
+                return -1
+            else:
+                backend = get_by_name(node.attribute, "backend")
+                if backend is not None and backend.s.decode("UTF-8") == "fpgadataflow":
+                    assigned_partition = get_by_name(node.attribute, "partition_id")
+                    if assigned_partition is not None:
+                        return assigned_partition.i
+                    else:
+                        return 0
+                else:
+                    return -1
 
-                for node_to_remove in df_nodes:
-                    non_df_model.graph.node.remove(node_to_remove)
-                # create StreamingDataflow node with df_in/df_out io
-                df_node = helper.make_node(
-                    "StreamingDataflowPartition",
-                    [df_in] + extra_df_inputs,
-                    [df_out],
-                    # use the model attribute to mark the df model
-                    model=df_model_filename,
-                    domain="finn.custom_op.fpgadataflow",
-                    partition_id=target_partition_id,
-                    slr=slr,
-                    mem_port=mem_port,
-                )
-                non_df_model.graph.node.insert(df_start_ind, df_node)
-                model = non_df_model
-                target_partition_id += 1
+        # first, use the generic partitioning functionality to split up the graph
+        parent_model = model.transform(
+            PartitionFromLambda(
+                partitioning=assign_partition_id, partition_dir=self.partition_model_dir
+            )
+        )
+        # change node types to StreamingDataflowPartition
+        p_nodes = parent_model.get_nodes_by_op_type("GenericPartition")
+        for partition_ind, p_node in enumerate(p_nodes):
+            # go into partition to extract some info
+            p_node_inst = getCustomOp(p_node)
+            node_model_filename = p_node_inst.get_nodeattr("model")
+            p_model = ModelWrapper(node_model_filename)
+            # check floorplan (SLR assignment per node)
+            inst = getCustomOp(p_model.graph.node[0])
+            slr = inst.get_nodeattr("slr")
+            for node in p_model.graph.node:
+                inst = getCustomOp(node)
+                assert slr == inst.get_nodeattr(
+                    "slr"
+                ), """all nodes with same partition_id must have the same slr id"""
+            # check that there is only one non-null mem_port per partition
+            nmemports = 0
+            mem_port = ""
+            for node in p_model.graph.node:
+                inst = getCustomOp(node)
+                port = inst.get_nodeattr("mem_port")
+                if port is not None and port != "":
+                    nmemports += 1
+                    mem_port = port
+            assert nmemports <= 1, """Too many memory ports per partition"""
+            # done, change node type and add info in parent graph
+            p_node.op_type = "StreamingDataflowPartition"
+            p_node.domain = "finn.custom_op.fpgadataflow"
+            new_p_node_inst = getCustomOp(p_node)
+            new_p_node_inst.set_nodeattr("partition_id", partition_ind)
+            new_p_node_inst.set_nodeattr("slr", slr)
+            new_p_node_inst.set_nodeattr("mem_port", mem_port)
 
-        return (model, False)
+        return (parent_model, False)
diff --git a/src/finn/transformation/fpgadataflow/create_stitched_ip.py b/src/finn/transformation/fpgadataflow/create_stitched_ip.py
index 738f2000a1..327c7867fe 100644
--- a/src/finn/transformation/fpgadataflow/create_stitched_ip.py
+++ b/src/finn/transformation/fpgadataflow/create_stitched_ip.py
@@ -26,19 +26,19 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+import json
+import multiprocessing as mp
 import os
-import warnings
 import subprocess
-import json
+import warnings
 
-from finn.transformation.base import Transformation
-from finn.util.basic import make_build_dir, get_num_default_workers
-from finn.util.fpgadataflow import is_fpgadataflow_node
 from finn.custom_op.registry import getCustomOp
-import multiprocessing as mp
+from finn.transformation.base import Transformation
 from finn.transformation.fpgadataflow.replace_verilog_relpaths import (
     ReplaceVerilogRelPaths,
 )
+from finn.util.basic import get_num_default_workers, make_build_dir
+from finn.util.fpgadataflow import is_fpgadataflow_node
 
 
 def is_external_input(model, node, i):
@@ -86,11 +86,6 @@ def __init__(self, fpgapart, clk_ns, ip_name="finn_design", vitis=False):
         self.clk_ns = clk_ns
         self.ip_name = ip_name
         self.vitis = vitis
-        if float(clk_ns) not in [5.0, 10.0, 20.0]:
-            warnings.warn(
-                """The chosen frequency may lead to failure due to clock divider
-                constraints."""
-            )
         self.has_aximm = False
         self.has_m_axis = False
         self.m_axis_idx = 0
@@ -221,6 +216,13 @@ def apply(self, model):
         ip_dirs = ["list"]
         # add RTL streamer IP
         ip_dirs.append("/workspace/finn/finn-rtllib/memstream")
+        if model.graph.node[0].op_type not in ["StreamingFIFO", "IODMA"]:
+            warnings.warn(
+                """First node is not StreamingFIFO or IODMA.
+                You may experience incorrect stitched-IP rtlsim or hardware
+                behavior. It is strongly recommended to insert FIFOs prior to
+                calling CreateStitchedIP."""
+            )
         # ensure that all nodes are fpgadataflow, and that IPs are generated
         for node in model.graph.node:
             assert is_fpgadataflow_node(
@@ -330,12 +332,13 @@ def apply(self, model):
         )
         tcl.append("set_property core_revision 2 [ipx::find_open_core %s]" % block_vlnv)
         tcl.append("ipx::create_xgui_files [ipx::find_open_core %s]" % block_vlnv)
+        # mark bus interface params as user-resolvable to avoid FREQ_MHZ mismatches
+        tcl.append(
+            "set_property value_resolve_type user [ipx::get_bus_parameters "
+            "-of [ipx::get_bus_interfaces -of [ipx::current_core ]]]"
+        )
         # if targeting Vitis, add some properties to the IP
         if self.vitis:
-            tcl.append(
-                "ipx::remove_bus_parameter FREQ_HZ "
-                "[ipx::get_bus_interfaces CLK.AP_CLK -of_objects [ipx::current_core]]"
-            )
             # replace source code with dcp
             tcl.append(
                 "set_property sdx_kernel true [ipx::find_open_core %s]" % block_vlnv
diff --git a/src/finn/transformation/fpgadataflow/externalize_params.py b/src/finn/transformation/fpgadataflow/externalize_params.py
new file mode 100644
index 0000000000..dcb66a8538
--- /dev/null
+++ b/src/finn/transformation/fpgadataflow/externalize_params.py
@@ -0,0 +1,75 @@
+# Copyright (c) 2021, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+from finn.transformation.base import Transformation
+from finn.util.basic import get_by_name
+
+
+class ExternalizeParams(Transformation):
+    """Create top-level graph inputs for IODMAs serving layers where weights are
+    marked as external using mem_mode="external"."""
+
+    def __init__(self):
+        super().__init__()
+
+    def apply(self, model):
+        graph_modified = False
+
+        def filter_fc_extw(x):
+            if x.op_type == "IODMA":
+                burst_mode = get_by_name(x.attribute, "burstMode")
+                if burst_mode is not None:
+                    burst_mode = burst_mode.s.decode("UTF-8")
+                    return burst_mode == "wrap"
+
+        dma_extw_nodes = list(filter(filter_fc_extw, model.graph.node))
+
+        for dma_extw in dma_extw_nodes:
+            extw_tensor_name = dma_extw.input[0]
+            extw_tensor_name_out = dma_extw.output[0]
+            if extw_tensor_name in [x.name for x in model.graph.input]:
+                continue
+            else:
+                extw_vi = model.get_tensor_valueinfo(extw_tensor_name)
+                assert extw_vi is not None
+                model.graph.value_info.remove(extw_vi)
+                model.graph.input.append(extw_vi)
+                iodma_init = model.get_initializer(extw_vi.name)
+                assert iodma_init is not None
+                # remove output-side initializer to get correct dataflow partitioning
+                model.graph.initializer.remove(
+                    [
+                        x
+                        for x in model.graph.initializer
+                        if x.name == extw_tensor_name_out
+                    ][0]
+                )
+                graph_modified = True
+
+        return (model, graph_modified)
diff --git a/src/finn/transformation/fpgadataflow/floorplan.py b/src/finn/transformation/fpgadataflow/floorplan.py
index 3434183b14..2bda788313 100644
--- a/src/finn/transformation/fpgadataflow/floorplan.py
+++ b/src/finn/transformation/fpgadataflow/floorplan.py
@@ -26,14 +26,14 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+import json
+import warnings
+
+from finn.analysis.fpgadataflow.floorplan_params import floorplan_params
 from finn.custom_op.registry import getCustomOp
 from finn.transformation.base import Transformation
-from finn.util.basic import get_by_name
-from finn.analysis.fpgadataflow.floorplan_params import floorplan_params
-from finn.util.basic import make_build_dir
 from finn.transformation.general import ApplyConfig
-import warnings
-import json
+from finn.util.basic import get_by_name, make_build_dir
 
 
 class Floorplan(Transformation):
@@ -70,7 +70,7 @@ def apply(self, model):
 
         try:
             default_slr = self.user_floorplan["Defaults"]["slr"][0]
-        except:
+        except Exception:
             default_slr = -1
 
         # perform DWC and FIFO specific adjustments
@@ -107,7 +107,8 @@ def apply(self, model):
             warnings.warn(
                 str(unassigned_nodes)
                 + " nodes have no entry in the provided floorplan,"
-                + " SLR was set to " + str(default_slr)
+                + " SLR was set to "
+                + str(default_slr)
             )
 
         # partition id generation
diff --git a/src/finn/transformation/fpgadataflow/hlssynth_ip.py b/src/finn/transformation/fpgadataflow/hlssynth_ip.py
index bbd012a715..2a7d9e9066 100644
--- a/src/finn/transformation/fpgadataflow/hlssynth_ip.py
+++ b/src/finn/transformation/fpgadataflow/hlssynth_ip.py
@@ -27,10 +27,11 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import os
+import warnings
+
 import finn.custom_op.registry as registry
-from finn.util.fpgadataflow import is_fpgadataflow_node
 from finn.transformation.base import NodeLocalTransformation
-import warnings
+from finn.util.fpgadataflow import is_fpgadataflow_node
 
 
 class HLSSynthIP(NodeLocalTransformation):
diff --git a/src/finn/transformation/fpgadataflow/insert_dwc.py b/src/finn/transformation/fpgadataflow/insert_dwc.py
index c8df80659d..58efe65eb5 100644
--- a/src/finn/transformation/fpgadataflow/insert_dwc.py
+++ b/src/finn/transformation/fpgadataflow/insert_dwc.py
@@ -1,10 +1,10 @@
+import warnings
 from onnx import TensorProto
 from onnx import helper as oh
 
 from finn.custom_op.registry import getCustomOp
 from finn.transformation.base import Transformation
 from finn.util.fpgadataflow import is_fpgadataflow_node
-import warnings
 
 
 def _is_dwc_node(node):
diff --git a/src/finn/transformation/fpgadataflow/insert_fifo.py b/src/finn/transformation/fpgadataflow/insert_fifo.py
index 1ce936cd79..c8bb716922 100644
--- a/src/finn/transformation/fpgadataflow/insert_fifo.py
+++ b/src/finn/transformation/fpgadataflow/insert_fifo.py
@@ -1,11 +1,11 @@
+import numpy as np
+import warnings
 from onnx import TensorProto
 from onnx import helper as oh
 
 from finn.custom_op.registry import getCustomOp
 from finn.transformation.base import Transformation
 from finn.util.fpgadataflow import is_fpgadataflow_node
-import warnings
-import numpy as np
 
 
 def _is_fifo_node(node):
@@ -180,49 +180,50 @@ def apply(self, model):
                 n.input[0] = fifo_output_tensor.name
 
             # insert FIFO as last node, except when last node is DMA
-            if (
-                graph.node[-1].op_type != "StreamingFIFO"
-                and graph.node[-1].op_type != "IODMA"
-            ):
-                n = graph.node[-1]
-                assert (
-                    n.op_type != "TLastMarker"
-                ), """Insert tlast marker should be done
-                    after inserting the FIFOs"""
-                graph_out_name = graph.output[0].name
-                n0 = getCustomOp(n)
-                # determine fifo node attributes
-                fld_shape = n0.get_folded_output_shape()
-                dtype = n0.get_output_datatype()
-                fifo_depth = n0.get_nodeattr("outFIFODepth")
-
-                if fifo_depth <= 2:
-                    warnings.warn("Overriding output FIFO depth to 32")
-                    fifo_depth = 32
-
-                # create fifo node
-                fifo_input_tensor = oh.make_tensor_value_info(
-                    model.make_new_valueinfo_name(),
-                    TensorProto.FLOAT,
-                    n0.get_normal_output_shape(),
-                )
-                graph.value_info.append(fifo_input_tensor)
-                model.set_tensor_datatype(fifo_input_tensor.name, dtype)
-
-                fifo_node = oh.make_node(
-                    "StreamingFIFO",
-                    [fifo_input_tensor.name],
-                    [graph_out_name],
-                    domain="finn.custom_op.fpgadataflow",
-                    backend="fpgadataflow",
-                    depth=fifo_depth,
-                    folded_shape=fld_shape,
-                    dataType=str(dtype.name),
-                )
-                # insert fifo
-                graph.node.append(fifo_node)
-
-                # set fifo output tensor as new input tensor of second node
-                n.output[0] = fifo_input_tensor.name
+            graph_out_names = [x.name for x in model.graph.output]
+            for graph_out_name in graph_out_names:
+                final_node = model.find_producer(graph_out_name)
+                if (
+                    final_node.op_type != "StreamingFIFO"
+                    and final_node.op_type != "IODMA"
+                ):
+                    assert (
+                        final_node.op_type != "TLastMarker"
+                    ), """Insert tlast marker should be done
+                        after inserting the FIFOs"""
+                    n0 = getCustomOp(final_node)
+                    # determine fifo node attributes
+                    fld_shape = n0.get_folded_output_shape()
+                    dtype = n0.get_output_datatype()
+                    fifo_depth = n0.get_nodeattr("outFIFODepth")
+
+                    if fifo_depth <= 2:
+                        warnings.warn("Overriding output FIFO depth to 32")
+                        fifo_depth = 32
+
+                    # create fifo node
+                    fifo_input_tensor = oh.make_tensor_value_info(
+                        model.make_new_valueinfo_name(),
+                        TensorProto.FLOAT,
+                        n0.get_normal_output_shape(),
+                    )
+                    graph.value_info.append(fifo_input_tensor)
+                    model.set_tensor_datatype(fifo_input_tensor.name, dtype)
+
+                    fifo_node = oh.make_node(
+                        "StreamingFIFO",
+                        [fifo_input_tensor.name],
+                        [graph_out_name],
+                        domain="finn.custom_op.fpgadataflow",
+                        backend="fpgadataflow",
+                        depth=fifo_depth,
+                        folded_shape=fld_shape,
+                        dataType=str(dtype.name),
+                    )
+                    # insert fifo
+                    graph.node.append(fifo_node)
+
+                    # set fifo output tensor as new input tensor of second node
+                    final_node.output[0] = fifo_input_tensor.name
 
         return (model, graph_modified)
diff --git a/src/finn/transformation/fpgadataflow/insert_iodma.py b/src/finn/transformation/fpgadataflow/insert_iodma.py
index 27055a4fd2..d0ef270816 100644
--- a/src/finn/transformation/fpgadataflow/insert_iodma.py
+++ b/src/finn/transformation/fpgadataflow/insert_iodma.py
@@ -26,15 +26,15 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+import math
+import numpy as np
 from onnx import TensorProto
 from onnx import helper as oh
 
-from finn.util.basic import get_by_name
 from finn.custom_op.registry import getCustomOp
 from finn.transformation.base import Transformation
 from finn.transformation.general import SortGraph
-import math
-import numpy as np
+from finn.util.basic import get_by_name
 
 
 class InsertIODMA(Transformation):
@@ -87,6 +87,7 @@ def get_mem_init(self, weights, pe, simd):
         return reshaped_w
 
     def apply(self, model):
+        modified = False
         # only makes sense for a pure fpgadataflow graph -- so we check!
         all_nodes = list(model.graph.node)
         assert all(
@@ -102,59 +103,14 @@ def apply(self, model):
                 all_nodes,
             )
         )
-        graph_in_name = model.graph.input[0].name
-        first_node = model.find_consumer(graph_in_name)
-        graph_out_name = model.graph.output[0].name
-        final_node = model.find_producer(graph_out_name)
-        if (
-            final_node.op_type == "IODMA"
-            and first_node.op_type == "IODMA"
-            and len(fc_extw_nodes) == 0
-        ):
-            # TODO maybe check the correctness of properties
-            return (model, False)
-        else:
-            if final_node.op_type != "IODMA":
-                out_shape = model.get_tensor_shape(graph_out_name)
-                out_dtype = model.get_tensor_datatype(graph_out_name)
-                final_node_inst = getCustomOp(final_node)
-                out_folded_shape = final_node_inst.get_folded_output_shape()
-                # take advantage of AXI stream width padding for DMA alignment
-                # (AXI streams are always padded to 8 bits)
-                # this is the width of stream input to DMA
-                padded_outstream_width = final_node_inst.get_outstream_width_padded()
-                padded_outstream_bytes = padded_outstream_width // 8
-                # determine the feasible interface width
-                transfer_bits = padded_outstream_width * np.prod(out_folded_shape[:-1])
-                intfwidth = math.gcd(transfer_bits, self.max_intfwidth)
-                assert (
-                    intfwidth % 8 == 0
-                ), "No feasible interface width for transfer size"
-                # make new buffer
-                final_node_out = oh.make_tensor_value_info(
-                    model.make_new_valueinfo_name(), TensorProto.FLOAT, out_shape
-                )
-                model.graph.value_info.append(final_node_out)
-                model.set_tensor_datatype(final_node_out.name, out_dtype)
-                # reroute final node output to final_node_out_name
-                final_node.output[0] = final_node_out.name
-                # FIXME: currently always using 8-bit dtypes to work around the
-                # padding problems for i/o DMA
-                dma_node = oh.make_node(
-                    "IODMA",
-                    [final_node_out.name],
-                    [graph_out_name],
-                    numInputVectors=out_folded_shape[:-1],
-                    NumChannels=padded_outstream_bytes,
-                    dataType="UINT8",
-                    intfWidth=intfwidth,
-                    streamWidth=padded_outstream_width,
-                    direction="out",
-                    domain="finn.custom_op.fpgadataflow",
-                    backend="fpgadataflow",
-                )
-                model.graph.node.append(dma_node)
-            if first_node.op_type != "IODMA":
+        # insert IODMAs for graph inputs
+        graph_in_names = [x.name for x in model.graph.input]
+        for graph_in_name in graph_in_names:
+            first_node = model.find_consumer(graph_in_name)
+            if first_node.op_type == "IODMA":
+                # IODMA already inserted for this input
+                continue
+            else:
                 in_shape = model.get_tensor_shape(graph_in_name)
                 in_dtype = model.get_tensor_datatype(graph_in_name)
                 first_node_inst = getCustomOp(first_node)
@@ -194,47 +150,96 @@ def apply(self, model):
                     backend="fpgadataflow",
                 )
                 model.graph.node.insert(0, dma_node)
-            for fc_node in fc_extw_nodes:
-                fc_inst = getCustomOp(fc_node)
-                fc_w_name = fc_node.input[1]
-                w_shape = model.get_tensor_shape(fc_w_name)
-                w_dtype = model.get_tensor_datatype(fc_w_name)
+                modified = True
+        # insert IODMAs for graph outputs
+        graph_out_names = [x.name for x in model.graph.output]
+        for graph_out_name in graph_out_names:
+            final_node = model.find_producer(graph_out_name)
+            if final_node.op_type == "IODMA":
+                continue
+            else:
+                out_shape = model.get_tensor_shape(graph_out_name)
+                out_dtype = model.get_tensor_datatype(graph_out_name)
+                final_node_inst = getCustomOp(final_node)
+                out_folded_shape = final_node_inst.get_folded_output_shape()
+                # take advantage of AXI stream width padding for DMA alignment
+                # (AXI streams are always padded to 8 bits)
+                # this is the width of stream input to DMA
+                padded_outstream_width = final_node_inst.get_outstream_width_padded()
+                padded_outstream_bytes = padded_outstream_width // 8
                 # determine the feasible interface width
-                transfer_bits = np.prod(w_shape) * w_dtype.bitwidth()
+                transfer_bits = padded_outstream_width * np.prod(out_folded_shape[:-1])
                 intfwidth = math.gcd(transfer_bits, self.max_intfwidth)
                 assert (
                     intfwidth % 8 == 0
                 ), "No feasible interface width for transfer size"
-                # calculate width of stream output from DMA
-                pe = get_by_name(fc_node.attribute, "PE").i
-                simd = get_by_name(fc_node.attribute, "SIMD").i
-                streamWidth = fc_inst.get_weightstream_width_padded()
                 # make new buffer
-                W = model.get_initializer(fc_w_name)
-                iodma_mem = self.get_mem_init(W, pe, simd)
-                model.set_initializer(fc_w_name, iodma_mem)
-
-                fc_node_in = oh.make_tensor_value_info(
-                    model.make_new_valueinfo_name(), TensorProto.FLOAT, iodma_mem.shape
+                final_node_out = oh.make_tensor_value_info(
+                    model.make_new_valueinfo_name(), TensorProto.FLOAT, out_shape
                 )
-                model.graph.value_info.append(fc_node_in)
-                model.set_tensor_datatype(fc_node_in.name, w_dtype)
-                model.set_initializer(fc_node_in.name, W)
+                model.graph.value_info.append(final_node_out)
+                model.set_tensor_datatype(final_node_out.name, out_dtype)
+                # reroute final node output to final_node_out_name
+                final_node.output[0] = final_node_out.name
+                # FIXME: currently always using 8-bit dtypes to work around the
+                # padding problems for i/o DMA
                 dma_node = oh.make_node(
                     "IODMA",
-                    [fc_w_name],
-                    [fc_node_in.name],
-                    numInputVectors=[iodma_mem.shape[0]],
-                    NumChannels=pe * simd,
-                    dataType=str(w_dtype.name),
+                    [final_node_out.name],
+                    [graph_out_name],
+                    numInputVectors=out_folded_shape[:-1],
+                    NumChannels=padded_outstream_bytes,
+                    dataType="UINT8",
                     intfWidth=intfwidth,
-                    streamWidth=streamWidth,
-                    direction="in",
-                    burstMode="wrap",
+                    streamWidth=padded_outstream_width,
+                    direction="out",
                     domain="finn.custom_op.fpgadataflow",
                     backend="fpgadataflow",
                 )
-                fc_node.input[1] = fc_node_in.name
-                model.graph.node.insert(0, dma_node)
+                model.graph.node.append(dma_node)
+                modified = True
+
+        for fc_node in fc_extw_nodes:
+            fc_inst = getCustomOp(fc_node)
+            fc_w_name = fc_node.input[1]
+            w_shape = model.get_tensor_shape(fc_w_name)
+            w_dtype = model.get_tensor_datatype(fc_w_name)
+            # determine the feasible interface width
+            transfer_bits = np.prod(w_shape) * w_dtype.bitwidth()
+            intfwidth = math.gcd(transfer_bits, self.max_intfwidth)
+            assert intfwidth % 8 == 0, "No feasible interface width for transfer size"
+            # calculate width of stream output from DMA
+            pe = get_by_name(fc_node.attribute, "PE").i
+            simd = get_by_name(fc_node.attribute, "SIMD").i
+            streamWidth = fc_inst.get_weightstream_width_padded()
+            # make new buffer
+            W = model.get_initializer(fc_w_name)
+            iodma_mem = self.get_mem_init(W, pe, simd)
+            model.set_initializer(fc_w_name, iodma_mem)
+
+            fc_node_in = oh.make_tensor_value_info(
+                model.make_new_valueinfo_name(), TensorProto.FLOAT, iodma_mem.shape
+            )
+            model.graph.value_info.append(fc_node_in)
+            model.set_tensor_datatype(fc_node_in.name, w_dtype)
+            model.set_initializer(fc_node_in.name, W)
+            dma_node = oh.make_node(
+                "IODMA",
+                [fc_w_name],
+                [fc_node_in.name],
+                numInputVectors=[iodma_mem.shape[0]],
+                NumChannels=pe * simd,
+                dataType=str(w_dtype.name),
+                intfWidth=intfwidth,
+                streamWidth=streamWidth,
+                direction="in",
+                burstMode="wrap",
+                domain="finn.custom_op.fpgadataflow",
+                backend="fpgadataflow",
+            )
+            fc_node.input[1] = fc_node_in.name
+            model.graph.node.insert(0, dma_node)
+            modified = True
+        if modified:
             model = model.transform(SortGraph())
-            return (model, True)
+        return (model, modified)
diff --git a/src/finn/transformation/fpgadataflow/insert_tlastmarker.py b/src/finn/transformation/fpgadataflow/insert_tlastmarker.py
index 3ce9824b14..34cb61346d 100644
--- a/src/finn/transformation/fpgadataflow/insert_tlastmarker.py
+++ b/src/finn/transformation/fpgadataflow/insert_tlastmarker.py
@@ -26,6 +26,7 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+import numpy as np
 from onnx import TensorProto
 from onnx import helper as oh
 
@@ -33,8 +34,6 @@
 from finn.transformation.base import Transformation
 from finn.util.basic import get_by_name
 
-import numpy as np
-
 
 class InsertTLastMarker(Transformation):
     """Ensure that the graph is started/terminated with a TLastMarker node, inserting
diff --git a/src/finn/transformation/fpgadataflow/make_deployment.py b/src/finn/transformation/fpgadataflow/make_deployment.py
index 84d3f4cd94..d43d81716a 100644
--- a/src/finn/transformation/fpgadataflow/make_deployment.py
+++ b/src/finn/transformation/fpgadataflow/make_deployment.py
@@ -31,9 +31,9 @@
 from distutils.dir_util import copy_tree
 from shutil import copy
 
+import finn.transformation.fpgadataflow.templates as templates
 from finn.transformation.base import Transformation
 from finn.util.basic import make_build_dir
-import finn.transformation.fpgadataflow.templates as templates
 
 
 class DeployToPYNQ(Transformation):
diff --git a/src/finn/transformation/fpgadataflow/make_pynq_driver.py b/src/finn/transformation/fpgadataflow/make_pynq_driver.py
index 6ab12548ab..2c3bd7ee59 100644
--- a/src/finn/transformation/fpgadataflow/make_pynq_driver.py
+++ b/src/finn/transformation/fpgadataflow/make_pynq_driver.py
@@ -27,24 +27,29 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
-import shutil
-from finn.transformation.base import Transformation
-from finn.util.basic import gen_finn_dt_tensor, make_build_dir
-import finn.util.data_packing as dpk
-import finn.core.datatype as dtp
-from finn.custom_op.registry import getCustomOp
-import os
-import warnings
 import pkg_resources as pk
-from . import template_driver
-from finn.core.modelwrapper import ModelWrapper
+
 import numpy as np
+import os
+import shutil
+import warnings
 
+import finn.core.datatype as dtp
+import finn.util.data_packing as dpk
+from finn.core.modelwrapper import ModelWrapper
+from finn.custom_op.registry import getCustomOp
+from finn.transformation.base import Transformation
+from finn.util.basic import (
+    gen_finn_dt_tensor,
+    make_build_dir,
+    roundup_to_integer_multiple,
+)
 from finn.util.data_packing import (
-    pack_innermost_dim_as_hex_string,
     hexstring2npbytearray,
+    pack_innermost_dim_as_hex_string,
 )
-from finn.util.basic import roundup_to_integer_multiple
+
+from . import template_driver
 
 
 def to_external_tensor(init, w_dtype):
@@ -85,6 +90,7 @@ def __init__(self, platform):
         self.platform = platform
 
     def apply(self, model):
+
         # create a temporary folder for the generated driver
         pynq_driver_dir = make_build_dir(prefix="pynq_driver_")
         model.set_metadata_prop("pynq_driver_dir", pynq_driver_dir)
@@ -95,59 +101,100 @@ def apply(self, model):
         )
         driver_base_py = pynq_driver_dir + "/driver_base.py"
         shutil.copy(driver_base_template, driver_base_py)
-
         # extract input-output shapes from the graph
         # TODO convert this to an analysis pass?
-        i_tensor_name = model.graph.input[0].name
-        o_tensor_name = model.graph.output[0].name
-        i_tensor_shape_normal = tuple(model.get_tensor_shape(i_tensor_name))
-        o_tensor_shape_normal = tuple(model.get_tensor_shape(o_tensor_name))
-        i_tensor_dt = model.get_tensor_datatype(i_tensor_name)
-        o_tensor_dt = model.get_tensor_datatype(o_tensor_name)
-
-        first_node = model.find_consumer(i_tensor_name)
-        last_node = model.find_producer(o_tensor_name)
-        if first_node.op_type == "StreamingDataflowPartition":
-            # IODMAs and dataflow partitions have already been created
-            # extract folded i/o shapes from IODMA consumer/producer
-            first_df_model = ModelWrapper(getCustomOp(first_node).get_nodeattr("model"))
+        idt = []
+        idma_names = []
+        ishape_normal = []
+        ishape_folded = []
+        ishape_packed = []
+        for idma_ind, graph_in in enumerate(model.graph.input):
+            i_tensor_name = graph_in.name
+            # get inp tensor properties
+            i_tensor_dt = model.get_tensor_datatype(i_tensor_name)
+            i_tensor_shape_normal = tuple(model.get_tensor_shape(i_tensor_name))
+            # go down into dataflow partition to get folded shape info etc
+            # TODO consider setting these as attributes during dataflow partitioning
+            i_consumer = model.find_consumer(i_tensor_name)
+            assert (
+                i_consumer.op_type == "StreamingDataflowPartition"
+            ), """
+                Ensure CreateDataflowPartition called before driver creation."""
+            first_df_model = ModelWrapper(getCustomOp(i_consumer).get_nodeattr("model"))
             assert (
                 first_df_model.graph.node[0].op_type == "IODMA"
             ), "First partition must hold input IODMA"
-            successors = model.find_direct_successors(first_node)
+            successors = model.find_direct_successors(i_consumer)
+            successor_input_num = list(successors[0].input).index(i_consumer.output[0])
             successor_sdp = getCustomOp(successors[0])
             successor_df_model = ModelWrapper(successor_sdp.get_nodeattr("model"))
             first_node = successor_df_model.find_consumer(
-                successor_df_model.graph.input[0].name
+                successor_df_model.graph.input[successor_input_num].name
             )
-
-            last_df_model = ModelWrapper(getCustomOp(last_node).get_nodeattr("model"))
+            i_tensor_shape_folded = tuple(
+                getCustomOp(first_node).get_folded_input_shape()
+            )
+            # generate dummy folded i/o tensors and their packed versions
+            i_tensor_dummy_folded = gen_finn_dt_tensor(
+                i_tensor_dt, i_tensor_shape_folded
+            )
+            i_tensor_dummy_packed = dpk.finnpy_to_packed_bytearray(
+                i_tensor_dummy_folded, i_tensor_dt
+            )
+            i_tensor_shape_packed = i_tensor_dummy_packed.shape
+            # append all input tensor info to relevant lists
+            idt.append("DataType['%s']" % i_tensor_dt.name)
+            ishape_normal.append(i_tensor_shape_normal)
+            ishape_folded.append(i_tensor_shape_folded)
+            ishape_packed.append(i_tensor_shape_packed)
+            idma_names.append(getCustomOp(i_consumer).get_nodeattr("instance_name"))
+
+        odt = []
+        odma_names = []
+        oshape_normal = []
+        oshape_folded = []
+        oshape_packed = []
+        for odma_ind, graph_out in enumerate(model.graph.output):
+            o_tensor_name = graph_out.name
+            # get inp tensor properties
+            o_tensor_dt = model.get_tensor_datatype(o_tensor_name)
+            o_tensor_shape_normal = tuple(model.get_tensor_shape(o_tensor_name))
+            # go down into IODMA partition to get folded shape info etc
+            # TODO consider setting these as attributes during dataflow partitioning
+            o_producer = model.find_producer(o_tensor_name)
+            assert (
+                o_producer.op_type == "StreamingDataflowPartition"
+            ), """
+                Ensure CreateDataflowPartition called before driver creation."""
+            df_model = ModelWrapper(getCustomOp(o_producer).get_nodeattr("model"))
             assert (
-                last_df_model.graph.node[0].op_type == "IODMA"
-            ), "Last partition must hold output IODMA"
-            predecessors = model.find_direct_predecessors(last_node)
+                df_model.graph.node[-1].op_type == "IODMA"
+            ), "Partition must hold output IODMA"
+            predecessors = model.find_direct_predecessors(o_producer)
+            predecessor_output_num = list(predecessors[0].output).index(
+                o_producer.input[0]
+            )
             predecessor_sdp = getCustomOp(predecessors[0])
             predecessor_df_model = ModelWrapper(predecessor_sdp.get_nodeattr("model"))
             last_node = predecessor_df_model.find_producer(
-                predecessor_df_model.graph.output[0].name
+                predecessor_df_model.graph.output[predecessor_output_num].name
             )
-
-        # else: transformation called before IODMA/SDP creation (legacy flow)
-        # can access folded i/o shapes directly
-        i_tensor_shape_folded = tuple(getCustomOp(first_node).get_folded_input_shape())
-        o_tensor_shape_folded = tuple(getCustomOp(last_node).get_folded_output_shape())
-
-        # generate dummy folded i/o tensors and their packed versions
-        i_tensor_dummy_folded = gen_finn_dt_tensor(i_tensor_dt, i_tensor_shape_folded)
-        o_tensor_dummy_folded = gen_finn_dt_tensor(o_tensor_dt, o_tensor_shape_folded)
-        i_tensor_dummy_packed = dpk.finnpy_to_packed_bytearray(
-            i_tensor_dummy_folded, i_tensor_dt
-        )
-        o_tensor_dummy_packed = dpk.finnpy_to_packed_bytearray(
-            o_tensor_dummy_folded, o_tensor_dt
-        )
-        i_tensor_shape_packed = i_tensor_dummy_packed.shape
-        o_tensor_shape_packed = o_tensor_dummy_packed.shape
+            o_tensor_shape_folded = tuple(
+                getCustomOp(last_node).get_folded_output_shape()
+            )
+            o_tensor_dummy_folded = gen_finn_dt_tensor(
+                o_tensor_dt, o_tensor_shape_folded
+            )
+            o_tensor_dummy_packed = dpk.finnpy_to_packed_bytearray(
+                o_tensor_dummy_folded, o_tensor_dt
+            )
+            o_tensor_shape_packed = o_tensor_dummy_packed.shape
+            # append all output tensor info to relevant lists
+            odt.append("DataType['%s']" % o_tensor_dt.name)
+            oshape_normal.append(o_tensor_shape_normal)
+            oshape_folded.append(o_tensor_shape_folded)
+            oshape_packed.append(o_tensor_shape_packed)
+            odma_names.append(getCustomOp(o_producer).get_nodeattr("instance_name"))
 
         # generate external weights npy files
         weights_dir = pynq_driver_dir + "/runtime_weights"
@@ -161,47 +208,50 @@ def apply(self, model):
                 node.op_type == "StreamingDataflowPartition"
             ), "CreateDataflowPartition needs to be applied before driver generation"
 
-            producer = model.find_producer(node.input[0])
-            init_tensor = model.get_initializer(node.input[0])
+            if len(node.input) > 0:
+                producer = model.find_producer(node.input[0])
+                init_tensor = model.get_initializer(node.input[0])
+            else:
+                producer = None
+                init_tensor = None
 
             if producer is None:  # input dma?
-                idma_name = "idma" + str(idma_idx)
-                if init_tensor is not None:  # input weights dma?
+                sdp_inst = getCustomOp(node)
+                idma_name = sdp_inst.get_nodeattr("instance_name")
+                df_model = ModelWrapper(sdp_inst.get_nodeattr("model"))
+                assert df_model.graph.node[0].op_type == "IODMA"
+                iodma_node = getCustomOp(df_model.graph.node[0])
+                if iodma_node.get_nodeattr("burstMode") == "wrap":  # input weights dma?
+                    init_tensor = df_model.get_initializer(
+                        iodma_node.onnx_node.input[0]
+                    )
                     ext_weight_dma_cnt += 1
-                    w_dtype = model.get_tensor_datatype(node.input[0])
+                    w_dtype = df_model.get_tensor_datatype(
+                        iodma_node.onnx_node.input[0]
+                    )
                     init_external_tensor = to_external_tensor(init_tensor, w_dtype)
                     np.save(
                         weights_dir + "/" + idma_name + ".npy", init_external_tensor
                     )
-                else:
-                    net_input_name = idma_name
-
                 idma_idx += 1
 
         # fill in the driver template
         driver_py = pynq_driver_dir + "/driver.py"
         driver = template_driver.pynq_driver_template
 
-        def mss(x, batch_var_name="1"):
-            # "make shape string"
-            # for a shape like (1, ...) emit a string (N, ...)
-            # where N is the default value for batch_var_name
-            # this lets the driver work with a batch of samples at once
-            ret = str(x)
-            ret = ret.replace("(1,", "(%s," % batch_var_name)
-            ret = ret.replace("[1,", "[%s," % batch_var_name)
-            return ret
-
         driver = driver.replace("$PLATFORM$", self.platform)
-        driver = driver.replace("$INPUT_FINN_DATATYPE$", str(i_tensor_dt))
-        driver = driver.replace("$INPUT_SHAPE_NORMAL$", mss(i_tensor_shape_normal))
-        driver = driver.replace("$INPUT_SHAPE_FOLDED$", mss(i_tensor_shape_folded))
-        driver = driver.replace("$INPUT_SHAPE_PACKED$", mss(i_tensor_shape_packed))
-        driver = driver.replace("$OUTPUT_FINN_DATATYPE$", str(o_tensor_dt))
-        driver = driver.replace("$OUTPUT_SHAPE_NORMAL$", mss(o_tensor_shape_normal))
-        driver = driver.replace("$OUTPUT_SHAPE_FOLDED$", mss(o_tensor_shape_folded))
-        driver = driver.replace("$OUTPUT_SHAPE_PACKED$", mss(o_tensor_shape_packed))
-        driver = driver.replace("$INPUT_DMA_NAME$", "'%s'" % net_input_name)
+        driver = driver.replace("$INPUT_FINN_DATATYPE$", str(idt).replace('"', ""))
+        driver = driver.replace("$INPUT_SHAPE_NORMAL$", str(ishape_normal))
+        driver = driver.replace("$INPUT_SHAPE_FOLDED$", str(ishape_folded))
+        driver = driver.replace("$INPUT_SHAPE_PACKED$", str(ishape_packed))
+        driver = driver.replace("$OUTPUT_FINN_DATATYPE$", str(odt).replace('"', ""))
+        driver = driver.replace("$OUTPUT_SHAPE_NORMAL$", str(oshape_normal))
+        driver = driver.replace("$OUTPUT_SHAPE_FOLDED$", str(oshape_folded))
+        driver = driver.replace("$OUTPUT_SHAPE_PACKED$", str(oshape_packed))
+        driver = driver.replace("$INPUT_DMA_NAME$", "%s" % str(idma_names))
+        driver = driver.replace("$OUTPUT_DMA_NAME$", "%s" % str(odma_names))
+        driver = driver.replace("$NUM_INPUTS$", str(len(idma_names)))
+        driver = driver.replace("$NUM_OUTPUTS$", str(len(odma_names)))
         driver = driver.replace("$EXT_WEIGHT_NUM$", str(ext_weight_dma_cnt))
 
         with open(driver_py, "w") as f:
diff --git a/src/finn/transformation/fpgadataflow/make_zynq_proj.py b/src/finn/transformation/fpgadataflow/make_zynq_proj.py
index f2f172139e..84d587b6ce 100644
--- a/src/finn/transformation/fpgadataflow/make_zynq_proj.py
+++ b/src/finn/transformation/fpgadataflow/make_zynq_proj.py
@@ -28,27 +28,24 @@
 
 import os
 import subprocess
+from shutil import copy
 
+from finn.core.modelwrapper import ModelWrapper
 from finn.custom_op.registry import getCustomOp
 from finn.transformation.base import Transformation
-from finn.core.modelwrapper import ModelWrapper
-from finn.util.basic import get_by_name, make_build_dir
-from finn.util.basic import pynq_part_map
-
 from finn.transformation.fpgadataflow.create_dataflow_partition import (
     CreateDataflowPartition,
 )
+from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP
+from finn.transformation.fpgadataflow.floorplan import Floorplan
+from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 from finn.transformation.fpgadataflow.insert_dwc import InsertDWC
 from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO
 from finn.transformation.fpgadataflow.insert_iodma import InsertIODMA
 from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
-from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
-from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP
-from finn.transformation.fpgadataflow.floorplan import Floorplan
 from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
 from finn.transformation.infer_data_layouts import InferDataLayouts
-from shutil import copy
-from finn.transformation.fpgadataflow.make_pynq_driver import MakePYNQDriver
+from finn.util.basic import make_build_dir, pynq_part_map
 
 from . import templates
 
@@ -56,19 +53,22 @@
 def collect_ip_dirs(model, ipstitch_path):
     # collect list of all IP dirs
     ip_dirs = []
+    need_memstreamer = False
     for node in model.graph.node:
-        ip_dir_attribute = get_by_name(node.attribute, "ip_path")
-        assert (
-            ip_dir_attribute is not None
-        ), """Node attribute "ip_path" is
-        empty. Please run transformation HLSSynth_ipgen first."""
-        ip_dir_value = ip_dir_attribute.s.decode("UTF-8")
+        node_inst = getCustomOp(node)
+        ip_dir_value = node_inst.get_nodeattr("ip_path")
         assert os.path.isdir(
             ip_dir_value
         ), """The directory that should
         contain the generated ip blocks doesn't exist."""
         ip_dirs += [ip_dir_value]
+        if node.op_type in ["StreamingFCLayer_Batch", "Thresholding_Batch"]:
+            if node_inst.get_nodeattr("mem_mode") == "decoupled":
+                need_memstreamer = True
     ip_dirs += [ipstitch_path + "/ip"]
+    if need_memstreamer:
+        # add RTL streamer IP
+        ip_dirs.append("/workspace/finn/finn-rtllib/memstream")
     return ip_dirs
 
 
@@ -144,16 +144,21 @@ def apply(self, model):
             # assume only one connection from each ip to the next
             # all aximm allocated to DDR[0]
             # all kernels allocated to SLR0
-            producer = model.find_producer(node.input[0])
+            if len(node.input) == 0:
+                producer = None
+            else:
+                producer = model.find_producer(node.input[0])
             consumer = model.find_consumers(node.output[0])
             # define kernel instances
             # name kernels connected to graph inputs as idmaxx
-            # name kernels connected to graph inputs as odmaxx
+            # name kernels connected to graph outputs as odmaxx
             if producer is None or consumer is None:
                 if producer is None:
                     instance_names[node.name] = "idma" + str(idma_idx)
+                    idma_idx += 1
                 elif consumer is None:
                     instance_names[node.name] = "odma" + str(odma_idx)
+                    odma_idx += 1
                 config.append(
                     "create_bd_cell -type ip -vlnv %s %s"
                     % (vivado_stitch_vlnv, instance_names[node.name])
@@ -178,7 +183,7 @@ def apply(self, model):
                     "assign_axi_addr_proc %s/%s"
                     % (instance_names[node.name], axilite_intf_name)
                 )
-                idma_idx += 1
+
                 aximm_idx += 1
                 axilite_idx += 1
             else:
@@ -199,6 +204,7 @@ def apply(self, model):
                         % (instance_names[node.name], axilite_intf_name)
                     )
                     axilite_idx += 1
+            sdp_node.set_nodeattr("instance_name", instance_names[node.name])
 
             config.append(
                 "connect_bd_net [get_bd_pins %s/ap_clk] "
@@ -297,12 +303,19 @@ class ZynqBuild(Transformation):
 
     """
 
-    def __init__(self, platform, period_ns, enable_debug=False):
+    def __init__(
+        self,
+        platform,
+        period_ns,
+        enable_debug=False,
+        partition_model_dir=None,
+    ):
         super().__init__()
         self.fpga_part = pynq_part_map[platform]
         self.period_ns = period_ns
         self.platform = platform
         self.enable_debug = enable_debug
+        self.partition_model_dir = partition_model_dir
 
     def apply(self, model):
         # first infer layouts
@@ -312,7 +325,7 @@ def apply(self, model):
             InsertIODMA(64),
             InsertDWC(),
             Floorplan(),
-            CreateDataflowPartition(),
+            CreateDataflowPartition(partition_model_dir=self.partition_model_dir),
         ]
         for trn in prep_transforms:
             model = model.transform(trn)
@@ -334,7 +347,7 @@ def apply(self, model):
             kernel_model = kernel_model.transform(HLSSynthIP())
             kernel_model = kernel_model.transform(
                 CreateStitchedIP(
-                    self.fpga_part, self.period_ns, sdp_node.onnx_node.name, True
+                    self.fpga_part, self.period_ns, sdp_node.onnx_node.name, False
                 )
             )
             kernel_model.set_metadata_prop("platform", "zynq-iodma")
@@ -347,6 +360,4 @@ def apply(self, model):
         # set platform attribute for correct remote execution
         model.set_metadata_prop("platform", "zynq-iodma")
 
-        # create driver
-        model = model.transform(MakePYNQDriver(platform="zynq-iodma"))
         return (model, False)
diff --git a/src/finn/transformation/fpgadataflow/prepare_cppsim.py b/src/finn/transformation/fpgadataflow/prepare_cppsim.py
index 653ec02ff3..8b332972ca 100644
--- a/src/finn/transformation/fpgadataflow/prepare_cppsim.py
+++ b/src/finn/transformation/fpgadataflow/prepare_cppsim.py
@@ -26,15 +26,14 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+import copy
+import multiprocessing as mp
 import os
 
 import finn.custom_op.registry as registry
-from finn.util.basic import make_build_dir
-from finn.util.fpgadataflow import is_fpgadataflow_node
 from finn.transformation.base import Transformation
-from finn.util.basic import get_num_default_workers
-import multiprocessing as mp
-import copy
+from finn.util.basic import get_num_default_workers, make_build_dir
+from finn.util.fpgadataflow import is_fpgadataflow_node
 
 
 def _codegen_single_node(node, model):
diff --git a/src/finn/transformation/fpgadataflow/prepare_ip.py b/src/finn/transformation/fpgadataflow/prepare_ip.py
index 4ed5e80aa7..4fdcf3939f 100644
--- a/src/finn/transformation/fpgadataflow/prepare_ip.py
+++ b/src/finn/transformation/fpgadataflow/prepare_ip.py
@@ -27,11 +27,12 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import os
+import warnings
+
 import finn.custom_op.registry as registry
 from finn.transformation.base import Transformation
 from finn.util.basic import make_build_dir
 from finn.util.fpgadataflow import is_fpgadataflow_node
-import warnings
 
 
 def _codegen_single_node(node, model, fpgapart, clk):
diff --git a/src/finn/transformation/fpgadataflow/prepare_rtlsim.py b/src/finn/transformation/fpgadataflow/prepare_rtlsim.py
index eaa85b9102..66799ff429 100644
--- a/src/finn/transformation/fpgadataflow/prepare_rtlsim.py
+++ b/src/finn/transformation/fpgadataflow/prepare_rtlsim.py
@@ -27,11 +27,11 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import finn.custom_op.registry as registry
-from finn.util.fpgadataflow import is_fpgadataflow_node
+from finn.transformation.base import NodeLocalTransformation
 from finn.transformation.fpgadataflow.replace_verilog_relpaths import (
     ReplaceVerilogRelPaths,
 )
-from finn.transformation.base import NodeLocalTransformation
+from finn.util.fpgadataflow import is_fpgadataflow_node
 
 try:
     from pyverilator import PyVerilator
diff --git a/src/finn/transformation/fpgadataflow/replace_verilog_relpaths.py b/src/finn/transformation/fpgadataflow/replace_verilog_relpaths.py
index cc7c305b3e..7850d37423 100644
--- a/src/finn/transformation/fpgadataflow/replace_verilog_relpaths.py
+++ b/src/finn/transformation/fpgadataflow/replace_verilog_relpaths.py
@@ -29,8 +29,8 @@
 import os
 
 import finn.custom_op.registry as registry
-from finn.util.fpgadataflow import is_fpgadataflow_node
 from finn.transformation.base import Transformation
+from finn.util.fpgadataflow import is_fpgadataflow_node
 
 
 class ReplaceVerilogRelPaths(Transformation):
diff --git a/src/finn/transformation/fpgadataflow/set_exec_mode.py b/src/finn/transformation/fpgadataflow/set_exec_mode.py
index 4677e59f7b..caf891bc44 100644
--- a/src/finn/transformation/fpgadataflow/set_exec_mode.py
+++ b/src/finn/transformation/fpgadataflow/set_exec_mode.py
@@ -27,8 +27,8 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import finn.custom_op.registry as registry
-from finn.util.fpgadataflow import is_fpgadataflow_node
 from finn.transformation.base import Transformation
+from finn.util.fpgadataflow import is_fpgadataflow_node
 
 
 class SetExecMode(Transformation):
diff --git a/src/finn/transformation/fpgadataflow/set_fifo_depths.py b/src/finn/transformation/fpgadataflow/set_fifo_depths.py
index ea27eee04d..39eb049565 100644
--- a/src/finn/transformation/fpgadataflow/set_fifo_depths.py
+++ b/src/finn/transformation/fpgadataflow/set_fifo_depths.py
@@ -29,16 +29,17 @@
 import math
 import numpy as np
 import warnings
+
+from finn.analysis.fpgadataflow.dataflow_performance import dataflow_performance
 from finn.custom_op.registry import getCustomOp
 from finn.transformation.base import Transformation
 from finn.transformation.fpgadataflow.annotate_cycles import AnnotateCycles
-from finn.analysis.fpgadataflow.dataflow_performance import dataflow_performance
-from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
-from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP
+from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 from finn.transformation.fpgadataflow.insert_dwc import InsertDWC
 from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO
-from finn.transformation.general import GiveUniqueNodeNames, GiveReadableTensorNames
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
+from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
 from finn.util.fpgadataflow import is_fpgadataflow_node
 from finn.util.pyverilator import pyverilate_stitched_ip, reset_rtlsim, toggle_clk
 
@@ -86,9 +87,14 @@ def __init__(self, shallow_threshold=2):
     def apply(self, model):
         shallow_fifos = []
         for node in model.graph.node:
+            if len(node.input) > 0:
+                is_first_node = model.find_producer(node.input[0]) is None
+            else:
+                is_first_node = True
             if (
                 node.op_type == "StreamingFIFO"
                 and getCustomOp(node).get_nodeattr("depth") <= self.shallow_threshold
+                and (not is_first_node)
             ):
                 # bypass shallow fifos
                 shallow_fifos.append(node)
diff --git a/src/finn/transformation/fpgadataflow/set_folding.py b/src/finn/transformation/fpgadataflow/set_folding.py
index bb4e0e1db5..64d7a08072 100644
--- a/src/finn/transformation/fpgadataflow/set_folding.py
+++ b/src/finn/transformation/fpgadataflow/set_folding.py
@@ -26,13 +26,15 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+import numpy as np
+import warnings
+
+from finn.analysis.fpgadataflow.dataflow_performance import dataflow_performance
 from finn.custom_op.registry import getCustomOp
 from finn.transformation.base import Transformation
-from finn.util.fpgadataflow import is_fpgadataflow_node
-from finn.analysis.fpgadataflow.dataflow_performance import dataflow_performance
 from finn.transformation.fpgadataflow.annotate_cycles import AnnotateCycles
 from finn.transformation.general import GiveUniqueNodeNames
-import warnings
+from finn.util.fpgadataflow import is_fpgadataflow_node
 
 
 def divisors(num):
@@ -153,9 +155,16 @@ def apply(self, model):
                     pe = node_inst.get_nodeattr("PE")
                     swu_node_inst.set_nodeattr("SIMD", pe)
                 else:
-                    raise Exception(
-                        "Expected SWU on DW op input, found " + swu_node.op_type
-                    )
+                    if op_type == "Vector_Vector_Activate_Batch":
+                        ksize = np.prod(node_inst.get_nodeattr("Kernel"))
+                    elif op_type == "Pool_Batch":
+                        ksize = node_inst.get_nodeattr("KernelSize")
+                    else:
+                        raise Exception("Undefined edge case for %s" % op_type)
+                    if ksize != 1:  # pointwise vvau/pool lack a SWU
+                        raise Exception(
+                            "Expected SWU on DW op input, found " + swu_node.op_type
+                        )
             elif op_type in simd_ops:
                 if op_type == "ConvolutionInputGenerator":
                     depthwise = node_inst.get_nodeattr("depthwise")
diff --git a/src/finn/transformation/fpgadataflow/synth_ooc.py b/src/finn/transformation/fpgadataflow/synth_ooc.py
index acc20e4ad0..49cd6c82bc 100644
--- a/src/finn/transformation/fpgadataflow/synth_ooc.py
+++ b/src/finn/transformation/fpgadataflow/synth_ooc.py
@@ -30,8 +30,8 @@
 from shutil import copy2
 
 from finn.transformation.base import Transformation
-from finn.util.vivado import out_of_context_synth
 from finn.util.basic import make_build_dir
+from finn.util.vivado import out_of_context_synth
 
 
 class SynthOutOfContext(Transformation):
@@ -52,10 +52,11 @@ def file_to_basename(x):
         top_module_name = model.get_metadata_prop("wrapper_filename")
         top_module_name = file_to_basename(top_module_name).strip(".v")
         build_dir = make_build_dir("synth_out_of_context_")
+        verilog_extensions = [".v", ".vh"]
         with open(vivado_stitch_proj_dir + "/all_verilog_srcs.txt", "r") as f:
             all_verilog_srcs = f.read().split()
         for file in all_verilog_srcs:
-            if file.endswith(".v"):
+            if any([file.endswith(x) for x in verilog_extensions]):
                 copy2(file, build_dir)
         ret = out_of_context_synth(
             build_dir, top_module_name, self.part, self.clk_name, self.clk_period_ns
diff --git a/src/finn/transformation/fpgadataflow/template_driver.py b/src/finn/transformation/fpgadataflow/template_driver.py
index 5265835dd2..31dd22573e 100644
--- a/src/finn/transformation/fpgadataflow/template_driver.py
+++ b/src/finn/transformation/fpgadataflow/template_driver.py
@@ -79,7 +79,10 @@
     "ishape_packed" : $INPUT_SHAPE_PACKED$,
     "oshape_packed" : $OUTPUT_SHAPE_PACKED$,
     "input_dma_name" : $INPUT_DMA_NAME$,
-    "number_of_external_weights": $EXT_WEIGHT_NUM$
+    "output_dma_name" : $OUTPUT_DMA_NAME$,
+    "number_of_external_weights": $EXT_WEIGHT_NUM$,
+    "num_inputs" : $NUM_INPUTS$,
+    "num_outputs" : $NUM_OUTPUTS$,
 }
 
 if __name__ == "__main__":
@@ -88,8 +91,8 @@
     parser.add_argument('--platform', help='Target platform: zynq-iodma alveo', default="$PLATFORM$")
     parser.add_argument('--batchsize', help='number of samples for inference', type=int, default=1)
     parser.add_argument('--bitfile', help='name of bitfile (i.e. "resizer.bit")', default="resizer.bit")
-    parser.add_argument('--inputfile', help='name of input npy file (i.e. "input.npy")', default="input.npy")
-    parser.add_argument('--outputfile', help='name of output npy file (i.e. "output.npy")', default="output.npy")
+    parser.add_argument('--inputfile', help='name(s) of input npy file(s) (i.e. "input.npy")', nargs="*", type=str, default=["input.npy"])
+    parser.add_argument('--outputfile', help='name(s) of output npy file(s) (i.e. "output.npy")', nargs="*", type=str, default=["output.npy"])
     parser.add_argument('--runtime_weight_dir', help='path to folder containing runtime-writable .dat weights', default="runtime_weights/")
     # parse arguments
     args = parser.parse_args()
@@ -111,16 +114,15 @@
     # for the remote execution the data from the input npy file has to be loaded,
     # packed and copied to the PYNQ buffer
     if exec_mode == "execute":
-        # remove old output file to prevent reusing old output
-        # in case execution fails
-        try:
-            os.remove(outputfile)
-        except FileNotFoundError:
-            pass
-        # load desired input .npy file
-        ibuf_normal = np.load(inputfile)
+        # load desired input .npy file(s)
+        ibuf_normal = []
+        for ifn in inputfile:
+            ibuf_normal.append(np.load(ifn))
         obuf_normal = accel.execute(ibuf_normal)
-        np.save(outputfile, obuf_normal)
+        if not isinstance(obuf_normal, list):
+            obuf_normal = [obuf_normal]
+        for o, obuf in enumerate(obuf_normal):
+            np.save(outputfile[o], obuf)
     elif exec_mode == "throughput_test":
         # remove old metrics file
         try:
diff --git a/src/finn/transformation/fpgadataflow/templates.py b/src/finn/transformation/fpgadataflow/templates.py
index bd5de66cf5..a12f359c7d 100644
--- a/src/finn/transformation/fpgadataflow/templates.py
+++ b/src/finn/transformation/fpgadataflow/templates.py
@@ -103,8 +103,8 @@
 # set board part repo paths to find PYNQ-Z1/Z2
 set paths_prop [get_property BOARD_PART_REPO_PATHS [current_project]]
 set paths_param [get_param board.repoPaths]
-lappend paths_prop /workspace/finn/board_files
-lappend paths_param /workspace/finn/board_files
+lappend paths_prop /workspace/board_files
+lappend paths_param /workspace/board_files
 set_property BOARD_PART_REPO_PATHS $paths_prop [current_project]
 set_param board.repoPaths $paths_param
 
@@ -119,6 +119,7 @@
     set ZYNQ_TYPE "zynq_us+"
 } elseif {$BOARD == "Pynq-Z2"} {
     set ZYNQ_TYPE "zynq_7000"
+    set_property board_part tul.com.tw:pynq-z2:part0:1.0 [current_project]
 } elseif {$BOARD == "Pynq-Z1"} {
     set ZYNQ_TYPE "zynq_7000"
     set_property board_part www.digilentinc.com:pynq-z1:part0:1.0 [current_project]
@@ -134,6 +135,7 @@
     set_property -dict [list CONFIG.PSU__USE__S_AXI_GP2 {1}] [get_bd_cells zynq_ps]
     set_property -dict [list CONFIG.PSU__USE__M_AXI_GP1 {0}] [get_bd_cells zynq_ps]
     #set frequency of PS clock (this can't always be exactly met)
+    set_property -dict [list CONFIG.PSU__OVERRIDE__BASIC_CLOCK {0}] [get_bd_cells zynq_ps]
     set_property -dict [list CONFIG.PSU__CRL_APB__PL0_REF_CTRL__FREQMHZ [expr int($FREQ_MHZ)]] [get_bd_cells zynq_ps]
 } elseif {$ZYNQ_TYPE == "zynq_7000"} {
     create_bd_cell -type ip -vlnv xilinx.com:ip:processing_system7:5.5 zynq_ps
diff --git a/src/finn/transformation/fpgadataflow/vitis_build.py b/src/finn/transformation/fpgadataflow/vitis_build.py
index c52dfcf0cd..a286532141 100644
--- a/src/finn/transformation/fpgadataflow/vitis_build.py
+++ b/src/finn/transformation/fpgadataflow/vitis_build.py
@@ -26,34 +26,33 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+import json
 import os
 import subprocess
-import json
+from enum import Enum
 
 from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.base import Transformation
 from finn.custom_op.registry import getCustomOp
-
+from finn.transformation.base import Transformation
 from finn.transformation.fpgadataflow.create_dataflow_partition import (
     CreateDataflowPartition,
 )
+from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP
+from finn.transformation.fpgadataflow.floorplan import Floorplan
+from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 from finn.transformation.fpgadataflow.insert_dwc import InsertDWC
 from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO
 from finn.transformation.fpgadataflow.insert_iodma import InsertIODMA
 from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
-from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
-from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP
-from finn.transformation.fpgadataflow.floorplan import Floorplan
-from finn.transformation.fpgadataflow.make_pynq_driver import MakePYNQDriver
 from finn.transformation.general import (
     GiveReadableTensorNames,
     GiveUniqueNodeNames,
     RemoveUnusedTensors,
 )
-from finn.util.basic import make_build_dir
 from finn.transformation.infer_data_layouts import InferDataLayouts
+from finn.util.basic import make_build_dir
+
 from . import templates
-from enum import Enum
 
 
 def _check_vitis_envvars():
@@ -207,7 +206,10 @@ def apply(self, model):
             # has axis, aximm and axilite
             # everything else is axis-only
             # assume only one connection from each ip to the next
-            producer = model.find_producer(node.input[0])
+            if len(node.input) == 0:
+                producer = None
+            else:
+                producer = model.find_producer(node.input[0])
             consumer = model.find_consumers(node.output[0])
             # define kernel instances
             # name kernels connected to graph inputs as idmaxx
@@ -223,6 +225,7 @@ def apply(self, model):
             else:
                 instance_names[node.name] = node.name
                 config.append("nk=%s:1:%s" % (node.name, instance_names[node.name]))
+            sdp_node.set_nodeattr("instance_name", instance_names[node.name])
             # explicitly assign SLRs if the slr attribute is not -1
             node_slr = sdp_node.get_nodeattr("slr")
             if node_slr != -1:
@@ -231,7 +234,7 @@ def apply(self, model):
             if producer is None or consumer is None:
                 node_mem_port = sdp_node.get_nodeattr("mem_port")
                 if node_mem_port == "":
-                    #configure good defaults based on board
+                    # configure good defaults based on board
                     if "u50" in self.platform or "u280" in self.platform:
                         # Use HBM where available (also U50 does not have DDR)
                         mem_type = "HBM"
@@ -251,7 +254,9 @@ def apply(self, model):
                         mem_type = "DDR"
                         mem_idx = 1
                     node_mem_port = "%s[%d]" % (mem_type, mem_idx)
-                config.append("sp=%s.m_axi_gmem0:%s" % (instance_names[node.name], node_mem_port))
+                config.append(
+                    "sp=%s.m_axi_gmem0:%s" % (instance_names[node.name], node_mem_port)
+                )
             # connect streams
             if producer is not None:
                 for i in range(len(node.input)):
@@ -373,6 +378,7 @@ def __init__(
         enable_debug=False,
         floorplan_file=None,
         enable_link=True,
+        partition_model_dir=None,
     ):
         super().__init__()
         self.fpga_part = fpga_part
@@ -382,6 +388,7 @@ def __init__(
         self.enable_debug = enable_debug
         self.floorplan_file = floorplan_file
         self.enable_link = enable_link
+        self.partition_model_dir = partition_model_dir
 
     def apply(self, model):
         _check_vitis_envvars()
@@ -396,7 +403,9 @@ def apply(self, model):
 
         model = model.transform(Floorplan(floorplan=self.floorplan_file))
 
-        model = model.transform(CreateDataflowPartition())
+        model = model.transform(
+            CreateDataflowPartition(partition_model_dir=self.partition_model_dir)
+        )
         model = model.transform(GiveUniqueNodeNames())
         model = model.transform(GiveReadableTensorNames())
 
@@ -437,6 +446,4 @@ def apply(self, model):
         # set platform attribute for correct remote execution
         model.set_metadata_prop("platform", "alveo")
 
-        # create driver
-        model = model.transform(MakePYNQDriver(platform="alveo"))
         return (model, False)
diff --git a/src/finn/transformation/move_reshape.py b/src/finn/transformation/move_reshape.py
index 990b858ad6..6c9a297337 100644
--- a/src/finn/transformation/move_reshape.py
+++ b/src/finn/transformation/move_reshape.py
@@ -1,7 +1,8 @@
+import warnings
+
+from finn.custom_op.registry import getCustomOp
 from finn.transformation.base import Transformation
 from finn.util.basic import get_by_name, is_finn_op
-from finn.custom_op.registry import getCustomOp
-import warnings
 
 
 def _is_fpgadataflow_node(node):
diff --git a/src/finn/transformation/qonnx/convert_qonnx_to_finn.py b/src/finn/transformation/qonnx/convert_qonnx_to_finn.py
new file mode 100644
index 0000000000..70656e4d09
--- /dev/null
+++ b/src/finn/transformation/qonnx/convert_qonnx_to_finn.py
@@ -0,0 +1,99 @@
+# Copyright (c) 2021, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from qonnx.transformation.quant_constant_folding import FoldTransposeIntoQuantInit
+
+from finn.transformation.base import Transformation
+from finn.transformation.extract_conv_bias import ExtractBiasFromConv
+from finn.transformation.gemm_to_matmul import GemmToMatMul
+from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.qonnx.fold_quant_weights import FoldQuantWeights
+from finn.transformation.qonnx.infer_quant_avg_pool_2d import (
+    AvgPoolAndTruncToQuantAvgPool,
+)
+from finn.transformation.qonnx.quant_act_to_multithreshold import (
+    ConvertQuantActToMultiThreshold,
+    default_filter_function_generator,
+)
+from finn.transformation.remove import RemoveIdentityOps
+
+
+class ConvertQONNXtoFINN(Transformation):
+    """Converts QONNX dialect to FINN ONNX dialect.
+    First the weights are converted using the FoldQuantWeights transformation,
+    then the ConvertQuantActToMultiThreshold transformation is used to convert
+    the activations.
+    If incompatibilities are found a ValueError or RuntimeError is raised.
+
+    The optional keyword argument `filter_function`
+    presents a way to control which Quant and BipolarQuant nodes in the activation path
+    are converted to MultiThreshold nodes. A warning will be emitted when a Quant node
+    is not converted to a MultiThreshold node.
+
+    :param filter_function: Each candidate Quant and BinaryQant node is first evaluated
+    by this function. If the function returns False,
+    then the node is not converted to a MultiTrheshold node.
+    The function is given the model and candidate node as parameters.
+    Per default a filter function is inserted, which disables the conversion of
+    Quant nodes, which have a bit width of larger than 8.
+    Defaults to: default_filter_function_generator(max_multithreshold_bit_width=8)
+    """
+
+    def __init__(
+        self,
+        filter_function=default_filter_function_generator(
+            max_multithreshold_bit_width=8
+        ),
+    ):
+        super().__init__()
+        self._filter_function = filter_function
+
+    def apply(self, model):
+        # Extract the bias from Conv node
+        model = model.transform(ExtractBiasFromConv())
+        # Gemm operations are not supported by FINN, so we convert them to MatMul
+        model = model.transform(GemmToMatMul())
+        model = model.transform(FoldTransposeIntoQuantInit())
+        # Make sure the datatypes exist, these are required for folding the weights
+        model = model.transform(InferDataTypes())
+        # Fold weights
+        model = model.transform(FoldQuantWeights())
+        # Convert activations
+        model = model.transform(
+            ConvertQuantActToMultiThreshold(
+                filter_function=self._filter_function,
+            )
+        )
+        # Recompute datatypes
+        model = model.transform(InferDataTypes())
+        # Convert AvgPool -> Mul -> Trunc structure to QuantAvgPool2d
+        model = model.transform(AvgPoolAndTruncToQuantAvgPool())
+        # Remove empty padding if it exists
+        model = model.transform(RemoveIdentityOps())
+
+        return model, False
diff --git a/src/finn/transformation/qonnx/fold_quant_weights.py b/src/finn/transformation/qonnx/fold_quant_weights.py
new file mode 100644
index 0000000000..12c854d3ba
--- /dev/null
+++ b/src/finn/transformation/qonnx/fold_quant_weights.py
@@ -0,0 +1,205 @@
+# Copyright (c) 2021, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import numpy as np
+from onnx import TensorProto, helper
+from qonnx.transformation.quant_constant_folding import FoldTransposeIntoQuantInit
+
+import finn.core.onnx_exec as oxe
+from finn.custom_op.registry import getCustomOp
+from finn.transformation.base import Transformation
+from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.remove import remove_node_and_rewire
+
+
+class FoldQuantWeights(Transformation):
+    """Merges Quant nodes, which are used as weights into the initializer
+    of the weight tensor.
+    """
+
+    def apply(self, model):
+        graph = model.graph
+        node_ind = 0
+        graph_modified = False
+        execution_context = model.make_empty_exec_context()
+        for n in graph.node:
+            node_ind += 1
+            if n.op_type == "Quant" or n.op_type == "BipolarQuant":
+                node_inp_inits = list(map(lambda x: model.get_initializer(x), n.input))
+                node_inp_dyn = list(filter(lambda x: x is None, node_inp_inits))
+                node_out = n.output[0]
+                is_all_constant_inputs = len(node_inp_dyn) == 0
+                ishape = model.get_tensor_shape(n.input[0])
+                is_const_shape = (n.op_type == "Shape") and (ishape is not None)
+                if is_all_constant_inputs or is_const_shape:
+                    # Check node validity
+                    if (
+                        n.op_type == "Quant"
+                        and not model.get_initializer(n.input[2]) == 0
+                    ):
+                        raise ValueError(
+                            "Only Quant nodes with zero-point == 0 "
+                            "are currently supported."
+                        )
+                    if model.is_fork_node(n):
+                        raise ValueError(
+                            "Weights quantized with the Quant node are not "
+                            "allowed to be fork nodes node."
+                        )
+                    target_node = model.find_direct_successors(n)
+                    if target_node is None:
+                        raise RuntimeError(
+                            "Weights quantized with the Quant node must have "
+                            "a successor node."
+                        )
+                    else:
+                        target_node = target_node[0]
+                    # If there is a DebugMarker in the weight path,
+                    # then the DebugMarker needs to be removed before any further
+                    # action is taken. Because this node interferes
+                    # with how the constant folding tries to determine how to
+                    # apply scale factors and in any case the DebugMarker would not
+                    # return useful information after folding.
+                    if target_node.op_type == "DebugMarker":
+                        remove_node_and_rewire(model, target_node)
+                        model = model.transform(FoldTransposeIntoQuantInit())
+                        return model, True
+
+                    # Continue with constant folding the quant node
+                    scale = model.get_initializer(n.input[1])
+                    unity_scale = (scale.flatten() == 1.0).all()
+                    # this node has no dynamic inputs, only constant ones -- so we can
+                    # do constant folding.
+                    oxe.execute_node(n, execution_context, graph)
+                    q_node_output = execution_context[node_out]
+                    # Check we can directly constant fold
+                    if unity_scale:
+                        # use the execution result as an initializer
+                        model.set_initializer(node_out, q_node_output)
+                    else:
+                        # Check next operator type
+                        mul_like_nodes = ["Mul", "Div", "Conv", "MatMul"]
+                        add_like_nodes = ["Add", "Sub"]
+                        all_supported_ops = mul_like_nodes.copy()
+                        all_supported_ops.extend(add_like_nodes)
+
+                        if target_node.op_type not in all_supported_ops:
+                            raise ValueError(
+                                f"Can't constant fold Quant weight node "
+                                f"into node type {target_node.op_type} "
+                                f"at node: {target_node}."
+                            )
+
+                        # For both mul and Add:
+                        # Move the scale factor behind the next operator
+                        scale = model.get_initializer(n.input[1])
+                        new_initializer = q_node_output / scale
+                        # Round, to correct for floating point errors
+                        new_initializer = np.round(new_initializer)
+                        model.set_initializer(node_out, new_initializer)
+                        q_inst = getCustomOp(n)
+                        new_dtype = q_inst.get_integer_datatype(model)
+                        model.set_tensor_datatype(node_out, new_dtype)
+
+                        # Reshape scale for Conv if required
+                        if target_node.op_type == "Conv" and len(scale.shape) > 0:
+                            bias_shape = [1] * len(scale.shape)
+                            bias_shape[1] = -1
+                            scale = scale.reshape(bias_shape)
+
+                        if scale.shape == (1,):
+                            scale = scale[0]
+                            mul_shape = tuple()
+                        else:
+                            mul_shape = scale.shape
+                        mul_tensor = helper.make_tensor_value_info(
+                            model.make_new_valueinfo_name(),
+                            TensorProto.FLOAT,
+                            mul_shape,
+                        )
+                        graph.value_info.append(mul_tensor)
+                        model.set_initializer(mul_tensor.name, scale)
+
+                        successor = model.find_consumers(node_out)
+                        if successor is None:
+                            raise RuntimeError(
+                                "Can only constant fold scaled Quant weights "
+                                "if a successor exists."
+                            )
+                        successor = successor[0]
+                        succ_output_name = successor.output[0]
+
+                        output_shape = model.get_tensor_shape(successor.output[0])
+                        act_mul_tensor = helper.make_tensor_value_info(
+                            model.make_new_valueinfo_name(),
+                            TensorProto.FLOAT,
+                            output_shape,
+                        )
+                        graph.value_info.append(act_mul_tensor)
+                        successor.output[0] = act_mul_tensor.name
+
+                        mul_node = helper.make_node(
+                            "Mul",
+                            [act_mul_tensor.name, mul_tensor.name],
+                            [succ_output_name],
+                        )
+                        graph.node.insert(node_ind, mul_node)
+
+                        if target_node.op_type in add_like_nodes:
+                            # Move the scale factor behind also in-front of
+                            # the next operator
+                            div_tensor = helper.make_tensor_value_info(
+                                model.make_new_valueinfo_name(),
+                                TensorProto.FLOAT,
+                                mul_shape,
+                            )
+                            graph.value_info.append(div_tensor)
+                            model.set_initializer(div_tensor.name, scale)
+
+                            succ_input_name = successor.input[0]
+                            act_mul_tensor = helper.make_tensor_value_info(
+                                model.make_new_valueinfo_name(),
+                                TensorProto.FLOAT,
+                                output_shape,
+                            )
+                            graph.value_info.append(act_mul_tensor)
+                            successor.input[0] = act_mul_tensor.name
+
+                            div_node = helper.make_node(
+                                "Div",
+                                [succ_input_name, div_tensor.name],
+                                [act_mul_tensor.name],
+                            )
+                            graph.node.insert(node_ind, div_node)
+
+                    # remove old node
+                    graph.node.remove(n)
+                    graph_modified = True
+                    model = model.transform(InferShapes())
+                    return (model, graph_modified)
+        return (model, graph_modified)
diff --git a/src/finn/transformation/qonnx/infer_quant_avg_pool_2d.py b/src/finn/transformation/qonnx/infer_quant_avg_pool_2d.py
new file mode 100644
index 0000000000..faad31fa06
--- /dev/null
+++ b/src/finn/transformation/qonnx/infer_quant_avg_pool_2d.py
@@ -0,0 +1,315 @@
+# Copyright (c) 2021, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+import math
+from onnx import TensorProto, helper
+
+from finn.core.datatype import DataType
+from finn.custom_op.registry import getCustomOp
+from finn.transformation.base import Transformation
+from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.infer_shapes import InferShapes
+from finn.util.basic import get_by_name
+
+
+def _get_signed_from_upstream(model, trunc_node):
+    """
+    Find out what the sign of the input to the trunc node is,
+    by looking at the upstream nodes.
+    """
+    node = trunc_node
+    # Check if the input of this node already has a FINN datatype
+    signed = None
+    inp_dt = model.get_tensor_datatype(node.input[0])
+    if inp_dt is not None and inp_dt is not DataType["FLOAT32"]:
+        signed = inp_dt.signed()
+    # Go further up the graph, since the datatype inference works top down
+    # these nodes should either be sign preserving ops or they already have a
+    # datatype defined for the output tensor.
+    curr_node = node
+    if signed is None:
+        while curr_node is not None:
+            if model.is_join_node(curr_node):
+                raise RuntimeError(
+                    "Datatype Inference for the Trunc node only supports "
+                    "linear nodes in the upstream path."
+                )
+            next_node = model.find_direct_predecessors(curr_node)
+            if next_node is None:
+                raise RuntimeError(
+                    "Could not infere the Datatype for the Trunc node due to "
+                    "missing upstream ndoes."
+                )
+            next_node = next_node[0]
+            out_dt = model.get_tensor_datatype(next_node.output[0])
+            if out_dt is not None and out_dt is not DataType["FLOAT32"]:
+                signed = out_dt.signed()
+                break
+            # Special cases where the node has an internal or intrinsic datatype.
+            if next_node.op_type == "MultiThreshold":
+                mt_inst = getCustomOp(next_node)
+                out_dt = DataType[mt_inst.get_nodeattr("out_dtype")]
+                if out_dt is not None and out_dt is not DataType["FLOAT32"]:
+                    signed = out_dt.signed()
+                    break
+            if next_node.op_type == "BipolarQuant":
+                signed = True
+                break
+            if next_node.op_type == "Quant":
+                q_inst = getCustomOp(next_node)
+                out_dt = q_inst.get_integer_datatype(model)
+                if out_dt is not None and out_dt is not DataType["FLOAT32"]:
+                    signed = out_dt.signed()
+                    break
+
+            # Check if we are allowed to move on to the next op
+            sign_preserving_ops = ["Add", "Mul", "AveragePool", "Pad"]
+            if next_node.op_type not in sign_preserving_ops:
+                raise RuntimeError(
+                    f"Could not infere the Datatype for the Trunc node, "
+                    f"because the sign of the input datatype could not be infered "
+                    f"from upstream nodes. And traversal further up the graph was "
+                    f"disallowed, since the next node type {next_node.op_type} "
+                    f"is not in the list of "
+                    f"sign preserving ops {sign_preserving_ops}."
+                )
+            curr_node = next_node
+
+    if signed is None:
+        raise RuntimeError(
+            "Could not infere the Datatype for the Trunc node, "
+            "because the sign of the input datatype could not be infered "
+            "from upstream nodes."
+        )
+
+    return signed
+
+
+class AvgPoolAndTruncToQuantAvgPool(Transformation):
+    """
+    Convert a section of nodes of the pattern:
+    AveragePool -> Mul (scalar) -> Trunc
+    To the FINN op: QuantAvgPool2d
+    """
+
+    def apply(self, model):
+        graph = model.graph
+        node_ind = 0
+        for n in graph.node:
+            node_ind += 1
+            if n.op_type == "AveragePool":
+                mul_node = model.find_direct_successors(n)
+                if (
+                    mul_node is not None
+                    and len(mul_node) == 1
+                    and mul_node[0].op_type == "Mul"
+                ):
+                    mul_node = mul_node[0]
+                    t_node = model.find_direct_successors(mul_node)
+                    if (
+                        t_node is not None
+                        and len(t_node) == 1
+                        and t_node[0].op_type == "Trunc"
+                    ):
+                        t_node = t_node[0]
+                        running_node_index = node_ind
+                        # Check node for compatibility
+                        # Avg pooling node
+                        k_s = get_by_name(n.attribute, "kernel_shape")
+                        if k_s is None or len(k_s.ints) != 2 or len(set(k_s.ints)) != 1:
+                            raise ValueError(
+                                "FINN only supports average pooling with "
+                                "2D square kernels."
+                            )
+                        k_s = k_s.ints[0]
+
+                        pads = get_by_name(n.attribute, "pads")
+                        if (
+                            pads is None
+                            or len(set(pads.ints)) != 1
+                            or pads.ints[0] != 0
+                        ):
+                            raise ValueError(
+                                "FINN dosn't support padding for average pooling."
+                            )
+
+                        stride = get_by_name(n.attribute, "strides")
+                        if (
+                            stride is None
+                            or len(stride.ints) != 2
+                            or len(set(stride.ints)) != 1
+                        ):
+                            raise ValueError(
+                                "FINN only supports 2D strides with equal values in "
+                                "each direction."
+                            )
+                        stride = stride.ints[0]
+
+                        # Mul node
+                        mul_val = model.get_initializer(mul_node.input[1])
+                        if (
+                            mul_val is None
+                            or len(mul_val.shape) != 0
+                            or mul_val != k_s * k_s
+                        ):
+                            raise ValueError(
+                                f"The Mul node after the AveragePool node must have "
+                                f"static initialization at the second input, "
+                                f"further the initialization must be of zero dimension "
+                                f"and the value of the initialization must be "
+                                f"the quadratic value of the kernel size, "
+                                f"in this case {k_s * k_s}."
+                            )
+
+                        # Trunc node
+                        rounding_mode = get_by_name(t_node.attribute, "rounding_mode")
+                        if rounding_mode is None or rounding_mode.s != b"FLOOR":
+                            raise ValueError(
+                                "The Trunc node must have the rounding_mode "
+                                "set to 'FLOOR'."
+                            )
+                        for inp in t_node.input[1:]:
+                            if model.get_initializer(inp) is None:
+                                raise ValueError(
+                                    f"All inputs of the Trunc node, "
+                                    f"except the first, must be statically "
+                                    f"initialized. However, {inp} is not."
+                                )
+                        zero_pt = model.get_initializer(t_node.input[2])
+                        if len(zero_pt.shape) != 0 or zero_pt != 0:
+                            raise ValueError(
+                                f"Finn only supports 0 as the zero point for "
+                                f"the Trunc node, it currently is {zero_pt}."
+                            )
+                        trunc_in_bits = model.get_initializer(t_node.input[3]).flatten()
+                        trunc_out_bits = model.get_initializer(
+                            t_node.input[4]
+                        ).flatten()
+                        if (
+                            len(trunc_in_bits.shape) != 1
+                            or len(trunc_out_bits.shape) != 1
+                        ):
+                            raise ValueError(
+                                f"Finn only supports scalar bit widths "
+                                f"for the Trunc node. The input bit width "
+                                f"currently is: {trunc_in_bits}, "
+                                f"while the output bit width is: {trunc_out_bits}."
+                            )
+                        trunc_in_bits = int(trunc_in_bits[0])
+                        trunc_out_bits = int(trunc_out_bits[0])
+
+                        # Calculate parameters for the QuantAvgPool2d node,
+                        # Calculate input bit width. Basically this backwards:
+                        # https://github.com/Xilinx/finn-base/blob/
+                        # 7c2603a95e90e4de2575020e575c24eab6a15889/src/finn/custom_op/
+                        # general/quantavgpool2d.py#L94
+                        ibits = math.floor(
+                            math.log(2 ** trunc_in_bits / (k_s * k_s), 2)
+                        )
+                        # Get sign
+                        signed = _get_signed_from_upstream(model, t_node)
+                        # ToDo: Change this to NHWC,
+                        #  when the channels last layout comes around.
+                        data_layout = "NCHW"
+
+                        # Insert scale nodes, QuantAvgPool2d node and required tensors
+                        scale = model.get_initializer(t_node.input[1])
+                        scale_div_tensor = helper.make_tensor_value_info(
+                            model.make_new_valueinfo_name(),
+                            TensorProto.FLOAT,
+                            None,
+                        )
+                        graph.value_info.append(scale_div_tensor)
+                        model.set_initializer(scale_div_tensor.name, scale)
+
+                        act_scale_div_tensor = helper.make_tensor_value_info(
+                            model.make_new_valueinfo_name(),
+                            TensorProto.FLOAT,
+                            None,
+                        )
+                        graph.value_info.append(act_scale_div_tensor)
+
+                        scale_div_node = helper.make_node(
+                            "Div",
+                            [n.input[0], scale_div_tensor.name],
+                            [act_scale_div_tensor.name],
+                        )
+                        graph.node.insert(running_node_index, scale_div_node)
+                        running_node_index += 1
+
+                        act_scale_mul_tensor = helper.make_tensor_value_info(
+                            model.make_new_valueinfo_name(),
+                            TensorProto.FLOAT,
+                            None,
+                        )
+                        graph.value_info.append(act_scale_mul_tensor)
+
+                        QuantAvgPool2d_node = helper.make_node(
+                            "QuantAvgPool2d",
+                            [act_scale_div_tensor.name],
+                            [act_scale_mul_tensor.name],
+                            domain="finn.custom_op.general",
+                            stride=stride,
+                            kernel=k_s,
+                            ibits=ibits,
+                            obits=trunc_out_bits,
+                            signed=int(signed),
+                            data_layout=data_layout,
+                        )
+                        graph.node.insert(running_node_index, QuantAvgPool2d_node)
+                        running_node_index += 1
+
+                        scale_mul_tensor = helper.make_tensor_value_info(
+                            model.make_new_valueinfo_name(),
+                            TensorProto.FLOAT,
+                            None,
+                        )
+                        graph.value_info.append(scale_mul_tensor)
+                        model.set_initializer(scale_mul_tensor.name, scale)
+
+                        scale_mul_node = helper.make_node(
+                            "Mul",
+                            [act_scale_mul_tensor.name, scale_mul_tensor.name],
+                            [t_node.output[0]],
+                        )
+                        graph.node.insert(running_node_index, scale_mul_node)
+                        running_node_index += 1
+
+                        # Remove old nodes
+                        graph.node.remove(n)
+                        graph.node.remove(mul_node)
+                        graph.node.remove(t_node)
+
+                        # Recompute shapes and datatypes
+                        model = model.transform(InferShapes())
+                        model = model.transform(InferDataTypes())
+
+                        return model, True
+
+        return model, False
diff --git a/src/finn/transformation/qonnx/qonnx_activation_handlers.py b/src/finn/transformation/qonnx/qonnx_activation_handlers.py
new file mode 100644
index 0000000000..3336b1eee7
--- /dev/null
+++ b/src/finn/transformation/qonnx/qonnx_activation_handlers.py
@@ -0,0 +1,524 @@
+# Copyright (c) 2021, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import numpy as np
+from abc import ABC, abstractmethod
+from onnx import TensorProto, helper
+
+from finn.core.modelwrapper import ModelWrapper
+from finn.custom_op.registry import getCustomOp
+
+
+class QuantActBaseHandler(ABC):
+    """Base class for converting quantized activation expressed in the QONNX dialect
+    to the FINN ONNX dialect.
+    :param model: The model on which this handler should operate.
+    :type model: class: `finn.core.modelwrapper.ModelWrapper`
+    :param quant_node: The Quant node which a given handler should replace.
+    :param quant_node_index: The index of the Quant node in the given model.
+    :type quant_node_index: `int`
+    """
+
+    def __init__(self, model: ModelWrapper, quant_node, quant_node_index: int):
+        """Base class constructor"""
+        super().__init__()
+        self._model = model
+        self._q_node = quant_node
+        self._q_index = quant_node_index
+
+    @property
+    @classmethod
+    @abstractmethod
+    def valid_predecessor_op_types(self):
+        """Defines which op types the preceding node is allowed to have for
+        this type of activation.
+        """
+        raise NotImplementedError()
+
+    @abstractmethod
+    def _check_compatibility(self):
+        """Check for compatibility with FINN.
+        There are many more possible combinations of QONNX settings,
+        than what is supported by FINN.
+        """
+        raise NotImplementedError()
+
+    @abstractmethod
+    def _calculate_act_bias(self):
+        """Calculate the activation bias,
+        which is introduced as an Add node behind the MultiTrheshold node.
+        """
+        raise NotImplementedError()
+
+    @abstractmethod
+    def _calculate_thresholds(self):
+        """Calculate the threshold array for the MultiThreshold node."""
+        raise NotImplementedError()
+
+    @abstractmethod
+    def _calculate_act_scale(self):
+        """Calculate the activation scale,
+        which is indroduced as a Mul node behind the Add node
+        for the activation bias.
+        """
+        raise NotImplementedError()
+
+    @abstractmethod
+    def _remove_activation_node(self, multi_threshold_node):
+        """Remove the activation node in front of the Quant node."""
+        raise NotImplementedError()
+
+    def _extract_output_datatype(self):
+        """Get the output datatype for the MultiThreshold node."""
+        q_inst = getCustomOp(self._q_node)
+        dtype = q_inst.get_integer_datatype(self._model)
+        dtype = dtype.name
+        return dtype
+
+    def calculate_node_parameters(self):
+        """Calculate all parameters required for replacing the QONNX style activation
+        with a FINN style one.
+        """
+        return {
+            "out_dtype": self._extract_output_datatype(),
+            "thresholds": self._calculate_thresholds(),
+            "adder_bias": self._calculate_act_bias(),
+            "mul_scale": self._calculate_act_scale(),
+        }
+
+    def replace_quant_node(self):
+        """Replace the given QONNX style activation with a FINN style one."""
+
+        # Check that we actually support what the user is trying to do
+        self._check_compatibility()
+
+        # Shorten instance variables
+        model = self._model
+        graph = model.graph
+        n = self._q_node
+        running_node_index = self._q_index
+
+        # Calculate insertion parameters
+        parameter_dict = self.calculate_node_parameters()
+        thresholds = parameter_dict["thresholds"]
+        out_dtype = parameter_dict["out_dtype"]
+        adder_bias = parameter_dict["adder_bias"]
+        mul_scale = parameter_dict["mul_scale"]
+
+        # Modify graph
+        # Insert threshold tensor
+        thresh_tensor = helper.make_tensor_value_info(
+            model.make_new_valueinfo_name(),
+            TensorProto.FLOAT,
+            thresholds.shape,
+        )
+        graph.value_info.append(thresh_tensor)
+        model.set_initializer(thresh_tensor.name, thresholds)
+
+        # Insert MultiThreshold node
+        outp_trans_node = helper.make_node(
+            "MultiThreshold",
+            [n.input[0], thresh_tensor.name],
+            [n.output[0]],
+            out_dtype="FLOAT32",
+            domain="finn.custom_op.general",
+        )
+        graph.node.insert(running_node_index, outp_trans_node)
+        running_node_index += 1
+
+        # Get the MultiThreshold node instance to work with
+        mt_node = graph.node[running_node_index - 1]
+        mt_inst = getCustomOp(mt_node)
+
+        # Set scale and bias
+        # If these values are scalar then they can be set as attributes
+        # of the MultiThreshold node, if not they get inserted as adder and mul nodes
+        # behind the MultiTrheshold nodes.
+        bias_scalar = adder_bias.shape == (1,) or len(adder_bias.shape) == 0
+        scale_scalar = mul_scale.shape == (1,) or len(mul_scale.shape) == 0
+        if scale_scalar and bias_scalar and self._q_node.op_type == "BipolarQuant":
+            # Get Quant parameters
+            mul_scale = np.atleast_1d(mul_scale)
+            # ONNX only accepts 64bit floats as attributes
+            mul_scale = mul_scale.astype(dtype=np.float64)
+            adder_bias = np.atleast_1d(adder_bias)
+            adder_bias = adder_bias.astype(dtype=np.float64)
+
+            # Set Bias and scale
+            mt_inst.set_nodeattr("out_scale", mul_scale[0])
+            # FINN applies scale first then bias,
+            # which is the other way around in Brevitas,
+            # we thus need to adjust the bias in the MultiThreshold node
+            finn_bias = adder_bias[0] * mul_scale[0]
+            mt_inst.set_nodeattr("out_bias", finn_bias)
+
+            # Set the output data type
+            mt_inst.set_nodeattr("out_dtype", out_dtype)
+        else:
+            # Set datatype
+            mt_inst.set_nodeattr("out_dtype", out_dtype)
+
+            # Insertion parameters
+            up_stream_node = mt_node
+
+            # Set bias
+            zero_bias = False
+            if bias_scalar:
+                adder_bias = np.atleast_1d(adder_bias)
+                # ONNX only accepts 64bit floats as attributes
+                adder_bias = adder_bias.astype(dtype=np.float64)[0]
+                add_shape = tuple()
+                if adder_bias == 0.0:
+                    zero_bias = True
+            else:
+                add_shape = adder_bias.shape
+
+            if not zero_bias:
+                # Insert Add node
+                add_tensor = helper.make_tensor_value_info(
+                    model.make_new_valueinfo_name(),
+                    TensorProto.FLOAT,
+                    add_shape,
+                )
+                graph.value_info.append(add_tensor)
+                model.set_initializer(add_tensor.name, adder_bias)
+
+                output_shape = model.get_tensor_shape(n.output[0])
+                act_add_tensor = helper.make_tensor_value_info(
+                    model.make_new_valueinfo_name(),
+                    TensorProto.FLOAT,
+                    output_shape,
+                )
+                graph.value_info.append(act_add_tensor)
+
+                add_node = helper.make_node(
+                    "Add",
+                    [act_add_tensor.name, add_tensor.name],
+                    [n.output[0]],
+                )
+                graph.node.insert(running_node_index, add_node)
+                running_node_index += 1
+                add_node = graph.node[running_node_index - 1]
+
+                # Re-point the upstream node
+                up_stream_node.output[0] = act_add_tensor.name
+                up_stream_node = add_node
+
+            # Set scale
+            # Insert Mul node
+            unity_scale = False
+            if scale_scalar:
+                mul_scale = np.atleast_1d(mul_scale)
+                mul_scale = mul_scale.astype(dtype=np.float64)[0]
+                mul_shape = tuple()
+                if mul_scale == 1.0:
+                    unity_scale = True
+            else:
+                mul_shape = mul_scale.shape
+
+            if not unity_scale:
+                mul_tensor = helper.make_tensor_value_info(
+                    model.make_new_valueinfo_name(),
+                    TensorProto.FLOAT,
+                    mul_shape,
+                )
+                graph.value_info.append(mul_tensor)
+                model.set_initializer(mul_tensor.name, mul_scale)
+
+                output_shape = model.get_tensor_shape(n.output[0])
+                act_mul_tensor = helper.make_tensor_value_info(
+                    model.make_new_valueinfo_name(),
+                    TensorProto.FLOAT,
+                    output_shape,
+                )
+                graph.value_info.append(act_mul_tensor)
+
+                mul_node = helper.make_node(
+                    "Mul",
+                    [act_mul_tensor.name, mul_tensor.name],
+                    [n.output[0]],
+                )
+                graph.node.insert(running_node_index, mul_node)
+                running_node_index += 1
+                mul_node = graph.node[running_node_index - 1]
+
+                # Re-point the upstream node
+                up_stream_node.output[0] = act_mul_tensor.name
+                up_stream_node = mul_node
+
+        # Remove activation node
+        self._remove_activation_node(mt_node)
+
+        # Remove the Quant node
+        graph.node.remove(n)
+
+        # return the internal model representation
+        return self._model
+
+
+class QuantReluHandler(QuantActBaseHandler):
+    """Class for converting a quantized relu operation expressed in the QONNX
+    dialect to the FINN ONNX dialect."""
+
+    valid_predecessor_op_types = [
+        "Relu",
+    ]
+
+    def _check_compatibility(self):
+        if self._q_node.op_type == "Quant":
+            q_inst = getCustomOp(self._q_node)
+            narrow = q_inst.get_nodeattr("narrow")
+            signed = q_inst.get_nodeattr("signed")
+            if signed or narrow:
+                raise ValueError(
+                    "FINN only supports unsigned and non-narrow Quant nodes "
+                    "for Relu activations."
+                )
+            if not self._model.get_initializer(self._q_node.input[2]) == 0:
+                raise ValueError(
+                    "Only Quant nodes with zero-point == 0 "
+                    "are currently supported for ReLu activations."
+                )
+        elif self._q_node.op_type == "BipolarQuant":
+            return
+        else:
+            raise RuntimeError("Got an unexpected quantizer node type")
+
+    def _calculate_act_bias(self):
+        # No bias allowed for Relu activations, see: https://github.com/Xilinx/
+        # brevitas/blob/a5bfd6dc5e030f0047ac1ee47932b60e8e873e17/src/brevitas/
+        # export/onnx/finn/handler/act.py#L48
+        bias = np.array([0.0])
+        return bias
+
+    def _calculate_thresholds(self):
+        # Gather parameters
+        if self._q_node.op_type == "Quant":
+            bit_width = self._model.get_initializer(self._q_node.input[3])
+        elif self._q_node.op_type == "BipolarQuant":
+            bit_width = 1.0
+        else:
+            raise RuntimeError("Got an unexpected quantizer node type")
+        quant_scale = self._model.get_initializer(self._q_node.input[1]).astype(
+            np.float32
+        )
+        # q_inst = getCustomOp(self._q_node)
+        # narrow = q_inst.get_nodeattr("narrow")
+
+        # Calculate thersholds, see: https://github.com/Xilinx/brevitas/blob/
+        # a5bfd6dc5e030f0047ac1ee47932b60e8e873e17/src/brevitas/export/
+        # onnx/finn/handler/act.py#L21
+        num_distinct_values = 2 ** bit_width
+        num_thresholds = int(num_distinct_values - 1)
+        flat_scale = quant_scale.flatten().astype(np.float32)
+        num_scale_channels = flat_scale.shape[0]
+        step = np.abs(flat_scale).astype(np.float32)
+        min_threshold = step / 2
+        thresholds = np.empty((num_scale_channels, num_thresholds)).astype(np.float32)
+        for c in range(num_scale_channels):
+            for t in range(num_thresholds):
+                thresholds[c][t] = min_threshold[c] + step[c] * t
+
+        # ToDo: The index 1 needs to be changed to -1 for the channels last format
+        num_output_channels = self._model.get_tensor_shape(self._q_node.output[0])[1]
+        final_shape = (num_output_channels, num_thresholds)
+        if thresholds.shape != final_shape:
+            thresholds = np.broadcast_to(thresholds, final_shape)
+
+        return thresholds
+
+    def _calculate_act_scale(self):
+        # Gather parameters
+        quant_scale = self._model.get_initializer(self._q_node.input[1])
+        # Calculate scale, see: https://github.com/Xilinx/brevitas/blob/
+        # a5bfd6dc5e030f0047ac1ee47932b60e8e873e17/src/brevitas/export/
+        # onnx/finn/handler/act.py#L40
+        scale = quant_scale
+        return scale
+
+    def _remove_activation_node(self, multi_threshold_node):
+        # Find the activation node
+        act_node = self._model.find_direct_predecessors(self._q_node)
+        if act_node is None:
+            raise RuntimeError(
+                "For handling of Relu activations a predecesor to "
+                "the Quant node must exist."
+            )
+        act_node = act_node[0]
+        if not act_node.op_type == "Relu":
+            raise RuntimeError(
+                "The predecesor of the Quant node must be Relu for handling "
+                "of Relu activations."
+            )
+
+        # Reroute upstream tensor
+        multi_threshold_node.input[0] = act_node.input[0]
+
+        # Remove the activation node
+        self._model.graph.node.remove(act_node)
+
+
+class QuantIdentityHandler(QuantActBaseHandler):
+    """Class for converting a quantized identity operation expressed in the QONNX
+    dialect to the FINN ONNX dialect.
+    This handler also takes care of quantized HardTanh activations, because
+    these are equivalent to quantized identity activations.
+    """
+
+    valid_predecessor_op_types = [
+        "BatchNormalization",
+        "Sub",
+        "Add",
+        "Mul",
+        "Div",
+        "DebugMarker",
+        None,
+    ]
+
+    def _check_compatibility(self):
+        # Gather parameters to check
+        if self._q_node.op_type == "Quant":
+            q_inst = getCustomOp(self._q_node)
+            signed = q_inst.get_nodeattr("signed")
+            if not signed:
+                raise ValueError(
+                    "FINN only supports signed Quant nodes for identity activations."
+                )
+            if not self._model.get_initializer(self._q_node.input[2]) == 0:
+                raise ValueError(
+                    "Only Quant nodes with zero-point == 0 "
+                    "are currently supported for identity activations."
+                )
+        elif self._q_node.op_type == "BipolarQuant":
+            quant_scale = self._model.get_initializer(self._q_node.input[1])
+            if (quant_scale.flatten().shape[0] != 1) or quant_scale.flatten()[0] != 1.0:
+                raise ValueError(
+                    "FINN only supports Bipolar identity activations "
+                    "with out per channel scaling and the scaling must be 1. "
+                )
+        else:
+            raise RuntimeError("Got an unexpected quantizer node type")
+
+    def _calculate_act_bias(self):
+        # Gather parameters
+        q_inst = getCustomOp(self._q_node)
+        if self._q_node.op_type == "Quant":
+            bit_width = self._model.get_initializer(self._q_node.input[3])
+            narrow = q_inst.get_nodeattr("narrow")
+        elif self._q_node.op_type == "BipolarQuant":
+            bit_width = 1.0
+        else:
+            raise RuntimeError("Got an unexpected quantizer node type")
+        # Calculate bias, see: https://github.com/Xilinx/brevitas/blob/
+        # a5bfd6dc5e030f0047ac1ee47932b60e8e873e17/src/brevitas/export/
+        # onnx/finn/handler/act.py#L64
+        if bit_width == 1.0:
+            bias = np.array([-0.5])
+        else:
+            if narrow:
+                min_non_scaled_val = -(2 ** (bit_width - 1) - 1)
+            else:
+                min_non_scaled_val = -(2 ** (bit_width - 1))
+            bias = np.array([min_non_scaled_val])
+        return bias
+
+    def _calculate_thresholds(self):
+        # Gather parameters
+        quant_scale = self._model.get_initializer(self._q_node.input[1])
+        q_inst = getCustomOp(self._q_node)
+        if self._q_node.op_type == "Quant":
+            bit_width = self._model.get_initializer(self._q_node.input[3])
+            narrow = q_inst.get_nodeattr("narrow")
+        elif self._q_node.op_type == "BipolarQuant":
+            bit_width = 1.0
+        else:
+            raise RuntimeError("Got an unexpected quantizer node type")
+
+        # Calculate thersholds, see: https://github.com/Xilinx/brevitas/
+        # blob/a5bfd6dc5e030f0047ac1ee47932b60e8e873e17/src/brevitas/
+        # export/onnx/finn/handler/act.py#L76
+        if bit_width == 1.0:
+            thresholds = np.empty([1, 1])
+            thresholds[0] = 0
+            return thresholds
+        else:
+            if narrow:
+                num_distinct_values = 2 ** bit_width - 1
+            else:
+                num_distinct_values = 2 ** bit_width
+
+            num_thresholds = int(num_distinct_values - 1)
+            flat_scale = quant_scale.flatten()
+            num_scale_channels = flat_scale.shape[0]
+            step = np.abs(flat_scale)
+            half_step = step / 2.0
+            thresholds = np.empty((num_scale_channels, num_thresholds))
+            # compute the value of the smallest threshold, we'll neg-bias all
+            # generated thresholds by this much
+            min_threshold = -half_step - step * ((num_thresholds // 2) - 1)
+            if not narrow:
+                min_threshold -= step
+            for c in range(num_scale_channels):
+                for t in range(num_thresholds):
+                    thresholds[c][t] = min_threshold[c] + step[c] * t
+
+            # ToDo: The index 1 needs to be changed to -1 for the channels last format
+            num_output_channels = self._model.get_tensor_shape(self._q_node.output[0])[
+                1
+            ]
+            final_shape = (num_output_channels, num_thresholds)
+            if thresholds.shape != final_shape:
+                thresholds = np.broadcast_to(thresholds, final_shape)
+
+            return thresholds
+
+    def _calculate_act_scale(self):
+        # Gather parameters
+        if self._q_node.op_type == "Quant":
+            bit_width = self._model.get_initializer(self._q_node.input[3])
+        elif self._q_node.op_type == "BipolarQuant":
+            bit_width = 1.0
+        else:
+            raise RuntimeError("Got an unexpected quantizer node type")
+        quant_scale = self._model.get_initializer(self._q_node.input[1])
+        # Calculate scale, see: https://github.com/Xilinx/brevitas/
+        # blob/a5bfd6dc5e030f0047ac1ee47932b60e8e873e17/src/brevitas/
+        # export/onnx/finn/handler/act.py#L111
+        if bit_width != 1:
+            scale = quant_scale
+        else:
+            assert (
+                quant_scale.flatten().shape[0] == 1
+            ), "Unsupported BIPOLAR per channel scale"
+            assert quant_scale.flatten()[0] == 1.0, "Unsupported BIPOLAR scale != 1"
+            scale = quant_scale * 2
+        return scale
+
+    def _remove_activation_node(self, multi_threshold_node):
+        # The Quant identity activation has per definition no explicit activation node
+        return
diff --git a/src/finn/transformation/qonnx/quant_act_to_multithreshold.py b/src/finn/transformation/qonnx/quant_act_to_multithreshold.py
new file mode 100644
index 0000000000..29ba93dfcf
--- /dev/null
+++ b/src/finn/transformation/qonnx/quant_act_to_multithreshold.py
@@ -0,0 +1,160 @@
+# Copyright (c) 2021, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+import warnings
+
+from finn.transformation.base import Transformation
+from finn.transformation.qonnx.qonnx_activation_handlers import QuantActBaseHandler
+
+
+def default_filter_function_generator(max_multithreshold_bit_width=8):
+    """
+    This function generates the default filter function for the
+    ConvertQuantActToMultiThreshold transformation. Per default the returned
+    function disables the conversion of Quant nodes which have a bit width above 8 bit.
+
+    This function generator can be used as a template to write custom
+    filter functions.
+    """
+
+    def filter_function(model, q_node):
+        if q_node.op_type == "Quant":
+            bit_width = model.get_initializer(q_node.input[3])
+        elif q_node.op_type == "BipolarQuant":
+            bit_width = 1.0
+        else:
+            raise RuntimeError("Got an unexpected quantizer node type")
+        if bit_width is None:
+            raise ValueError("Quant nodes must have a static bit width.")
+        if bit_width > max_multithreshold_bit_width:
+            warnings.warn(
+                f'The Quant node with name: "{q_node.name}" was not converted to a '
+                f"MultiThreshold node, because its bit width of {bit_width} is "
+                f"higher than the configured maximum bit width of "
+                f"{max_multithreshold_bit_width}."
+            )
+            return False
+        return True
+
+    return filter_function
+
+
+class ConvertQuantActToMultiThreshold(Transformation):
+    """
+    Converts Quant nodes in the activation path to MultiThreshold nodes.
+
+    The optional keyword argument `filter_function`
+    presents a way to control which Quant and BipolarQuant nodes in the activation path
+    are converted to MultiThreshold nodes. A warning will be emitted when a Quant node
+    is not converted to a MultiThreshold node.
+
+    :param filter_function: Each candidate Quant and BinaryQant node is first evaluated
+    by this function. If the function returns False,
+    then the node is not converted to a MultiTrheshold node.
+    The function is given the model and candidate node as parameters.
+    Per default a filter function is inserted, which disables the conversion of
+    Quant nodes, which have a bit width of larger than 8.
+    Defaults to: default_filter_function_generator(max_multithreshold_bit_width=8)
+    """
+
+    def __init__(
+        self,
+        filter_function=default_filter_function_generator(
+            max_multithreshold_bit_width=8
+        ),
+    ):
+        super().__init__()
+        self._filter_function = filter_function
+
+    def apply(self, model):
+        graph = model.graph
+        node_ind = 0
+        graph_modified = False
+
+        for n in graph.node:
+            node_ind += 1
+            if n.op_type == "Quant" or n.op_type == "BipolarQuant":
+                # Check that the node is in the activation path
+                inp = model.get_initializer(n.input[0])
+                out = model.get_initializer(n.output[0])
+                if not (inp is None and out is None):
+                    continue
+                predecessor = model.find_direct_predecessors(n)
+                if predecessor is not None:
+                    predecessor_op_type = predecessor[0].op_type
+                else:
+                    predecessor_op_type = predecessor
+                if model.is_fork_node(n):
+                    raise ValueError(
+                        "Forking Quant/BipolarQuant nodes are currently "
+                        "not supported by FINN."
+                    )
+                if n.op_type == "Quant" and not model.get_initializer(n.input[2]) == 0:
+                    raise ValueError(
+                        "Only Quant nodes with zero-point == 0 are currently supported."
+                    )
+
+                # Check that this node passes the user filter
+                if not self._filter_function(model, n):
+                    warnings.warn(
+                        f'The Quant node with name: "{n.name}" was not converted to a '
+                        f"MultiThreshold node, because the filtering function "
+                        f"returned False for this node."
+                    )
+                    continue
+
+                # Check for possible ambiguity in handler selection
+                valid_predecessors = []
+                for cls in QuantActBaseHandler.__subclasses__():
+                    valid_predecessors.extend(cls.valid_predecessor_op_types)
+                if len(valid_predecessors) != len(set(valid_predecessors)):
+                    raise RuntimeError(
+                        "Two or more activation handlers declare the same "
+                        "type of valid predecessor node. "
+                        "This leads to ambiguity in the handler selection "
+                        "and must thus be avoided."
+                    )
+
+                # Try to find a fitting handler for this Quant activation node
+                for handler_cls in QuantActBaseHandler.__subclasses__():
+                    if predecessor_op_type in handler_cls.valid_predecessor_op_types:
+                        handler = handler_cls(model, n, node_ind)
+                        break
+                else:
+                    raise ValueError(
+                        f"Quant nodes in the activation path and with predecessor "
+                        f"nodes of type {predecessor_op_type} are currently not "
+                        f"supported by FINN and can not be converted to "
+                        f"MultiThreshold nodes."
+                    )
+                model = handler.replace_quant_node()
+                graph_modified = True
+                return (model, graph_modified)
+
+        return (model, graph_modified)
diff --git a/src/finn/transformation/streamline/__init__.py b/src/finn/transformation/streamline/__init__.py
index 97cd957ce1..d0ec26a4d1 100644
--- a/src/finn/transformation/streamline/__init__.py
+++ b/src/finn/transformation/streamline/__init__.py
@@ -31,42 +31,38 @@
 __path__ = extend_path(__path__, __name__)
 
 from finn.transformation.base import Transformation
-from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.batchnorm_to_affine import BatchNormToAffine
 from finn.transformation.general import (
-    ConvertSubToAdd,
     ConvertDivToMul,
+    ConvertSubToAdd,
     GiveReadableTensorNames,
     GiveUniqueNodeNames,
 )
-
+from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.remove import RemoveIdentityOps
 from finn.transformation.streamline.absorb import (
+    Absorb1BitMulIntoConv,
+    Absorb1BitMulIntoMatMul,
     AbsorbAddIntoMultiThreshold,
     AbsorbMulIntoMultiThreshold,
-    FactorOutMulSignMagnitude,
-    Absorb1BitMulIntoMatMul,
-    Absorb1BitMulIntoConv,
     AbsorbSignBiasIntoMultiThreshold,
+    FactorOutMulSignMagnitude,
 )
-
 from finn.transformation.streamline.collapse_repeated import (
     CollapseRepeatedAdd,
     CollapseRepeatedMul,
 )
-
 from finn.transformation.streamline.reorder import (
-    MoveAddPastMul,
-    MoveScalarMulPastMatMul,
-    MoveScalarAddPastMatMul,
     MoveAddPastConv,
-    MoveScalarMulPastConv,
+    MoveAddPastMul,
     MoveMulPastMaxPool,
+    MoveScalarAddPastMatMul,
     MoveScalarLinearPastInvariants,
+    MoveScalarMulPastConv,
+    MoveScalarMulPastMatMul,
 )
-
 from finn.transformation.streamline.round_thresholds import RoundAndClipThresholds
 from finn.transformation.streamline.sign_to_thres import ConvertSignToThres
-from finn.transformation.batchnorm_to_affine import BatchNormToAffine
-from finn.transformation.streamline.remove import RemoveIdentityOps
 
 
 class Streamline(Transformation):
diff --git a/src/finn/transformation/streamline/absorb.py b/src/finn/transformation/streamline/absorb.py
index 8237d8bf2f..97ae3b51a8 100644
--- a/src/finn/transformation/streamline/absorb.py
+++ b/src/finn/transformation/streamline/absorb.py
@@ -27,16 +27,16 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import numpy as np
-from onnx import helper as oh
 import warnings
+from onnx import helper as oh
 
-from finn.core.datatype import DataType
 import finn.core.data_layout as DataLayout
-from finn.transformation.base import Transformation
-from finn.util.basic import get_by_name
+from finn.core.datatype import DataType
 from finn.custom_op.registry import getCustomOp
-from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.base import Transformation
 from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.infer_shapes import InferShapes
+from finn.util.basic import get_by_name
 
 
 class AbsorbSignBiasIntoMultiThreshold(Transformation):
@@ -205,7 +205,7 @@ def apply(self, model):
                 actual_ndims = len(tuple(filter(lambda x: x > 1, A.shape)))
                 is_1d = actual_ndims == 1
                 is_not_bipolar = (
-                    model.get_tensor_datatype(mul_weight_name) != DataType.BIPOLAR
+                    model.get_tensor_datatype(mul_weight_name) != DataType["BIPOLAR"]
                 )
                 is_signed = (A < 0).any()
                 if is_signed and (is_scalar or is_1d) and is_not_bipolar:
@@ -217,7 +217,7 @@ def apply(self, model):
                     # create new mul node with sign(A) as the operand
                     sgn = np.sign(A)
                     model.set_initializer(sign_mul_param_name, sgn)
-                    model.set_tensor_datatype(sign_mul_param_name, DataType.BIPOLAR)
+                    model.set_tensor_datatype(sign_mul_param_name, DataType["BIPOLAR"])
                     # replace original mul weight by magnitudes
                     model.set_initializer(mul_weight_name, np.abs(A))
                     new_mul = oh.make_node(
@@ -308,56 +308,61 @@ def apply(self, model):
 
 
 class AbsorbTransposeIntoMultiThreshold(Transformation):
-    """Change (NCHWTranspose -> MultiThreshold -> NHWCTranspose) to (MultiThreshold)
-    with NHWC mode. For (NCHWTranspose -> MultiThreshold) move Transpose past MT."""
+    """For (NCHWTranspose -> MultiThreshold) move Transpose past MultiThreshold
+    and set its data_layout mode to NHWC."""
 
     def apply(self, model):
         graph = model.graph
         node_ind = 0
         graph_modified = False
-        for n in graph.node:
+        nodes = [n for n in model.graph.node]
+        for n in nodes:
             node_ind += 1
             if n.op_type == "Transpose" and not model.is_fork_node(n):
                 perms = list(get_by_name(n.attribute, "perm").ints)
                 if perms == [0, 3, 1, 2]:
                     mt_cand = model.find_consumer(n.output[0])
-                    if mt_cand.op_type == "MultiThreshold" and not model.is_fork_node(
-                        mt_cand
+                    if (
+                        mt_cand is not None
+                        and mt_cand.op_type == "MultiThreshold"
+                        # and not model.is_fork_node(mt_cand)
                     ):
-                        final_t_cand = model.find_consumer(mt_cand.output[0])
-                        if final_t_cand.op_type == "Transpose":
-                            perms = list(
-                                get_by_name(final_t_cand.attribute, "perm").ints
-                            )
-                            if perms == [0, 2, 3, 1]:
-                                mt = getCustomOp(mt_cand)
-                                mt.set_nodeattr("data_layout", "NHWC")
-                                # get rid of tranpose nodes, wire MT directly
-                                mt_cand.input[0] = n.input[0]
-                                mt_cand.output[0] = final_t_cand.output[0]
-                                graph.node.remove(n)
-                                graph.node.remove(final_t_cand)
-                                graph_modified = True
-                        else:
-                            mt = getCustomOp(mt_cand)
-                            mt.set_nodeattr("data_layout", "NHWC")
-                            # get rid of first tranpose node
-                            mt_cand.input[0] = n.input[0]
-                            graph.node.remove(n)
-                            # fix output shape for MultiThreshold
-                            mt_ishape = model.get_tensor_shape(mt_cand.input[0])
-                            model.set_tensor_shape(mt_cand.output[0], mt_ishape)
-                            # re-insert Transpose behind MultiThreshold
-                            transpose_output = model.make_new_valueinfo_name()
-                            new_transpose = oh.make_node(
-                                "Transpose",
-                                [mt_cand.output[0]],
-                                [transpose_output],
-                                perm=[0, 3, 1, 2],
-                            )
-                            graph.node.insert(node_ind + 1, new_transpose)
-                            final_t_cand.input[0] = transpose_output
-                            graph_modified = True
+                        mt_cand_orig_output = mt_cand.output[0]
+                        mt = getCustomOp(mt_cand)
+                        mt.set_nodeattr("data_layout", "NHWC")
+                        # Rewire input of MultiThreshold node
+                        mt_cand.input[0] = n.input[0]
+                        # Make new intermediate tensor
+                        intermediate_tensor_name = model.make_new_valueinfo_name()
+                        intermediate_tensor_shape = model.get_tensor_shape(n.input[0])
+                        intermediate_tensor_finn_dtype = model.get_tensor_datatype(
+                            mt_cand.output[0]
+                        )
+                        # Create a new ValueInfoProto and set the shape
+                        model.set_tensor_shape(
+                            intermediate_tensor_name, intermediate_tensor_shape
+                        )
+                        # Set the tensor layout
+                        model.set_tensor_layout(
+                            intermediate_tensor_name, DataLayout.NHWC
+                        )
+                        # Set the tensor FINN datatype
+                        model.set_tensor_datatype(
+                            intermediate_tensor_name, intermediate_tensor_finn_dtype
+                        )
+                        # Rewire output of MT node
+                        mt_cand.output[0] = intermediate_tensor_name
+                        # Get rid of first transpose node
+                        graph.node.remove(n)
+                        # Create new Transpose node
+                        new_transpose = oh.make_node(
+                            "Transpose",
+                            [intermediate_tensor_name],
+                            [mt_cand_orig_output],
+                            perm=[0, 3, 1, 2],
+                        )
+                        graph.node.insert(node_ind + 1, new_transpose)
+                        graph_modified = True
         if graph_modified:
             model = model.transform(InferDataTypes())
         return (model, graph_modified)
@@ -457,7 +462,7 @@ def apply(self, model):
                         graph.node.remove(prod)
                         n.input[0] = prod_input
                         # to avoid error the dataype is set to float32
-                        model.set_tensor_datatype(n.input[0], DataType.FLOAT32)
+                        model.set_tensor_datatype(n.input[0], DataType["FLOAT32"])
                         graph_modified = True
         if graph_modified:
             model = model.transform(InferShapes())
@@ -531,11 +536,20 @@ def apply(self, model):
                             # TODO implement this to allow for forks as producers
                             consumers = model.find_direct_successors(next_node)
                             prod = model.find_producer(n.input[0])
-                            for cons in consumers:
-                                for cons_in in cons.input:
-                                    if cons_in == next_node.output[0]:
-                                        prod.output[0] = cons_in
-                                        break
+                            if prod is not None:
+                                for cons in consumers:
+                                    for cons_in in cons.input:
+                                        if cons_in == next_node.output[0]:
+                                            prod.output[0] = cons_in
+                                            break
+                            else:
+                                # n.input[0] is top-level graph input
+                                # wire consumers directly to that
+                                for cons in consumers:
+                                    for i, iname in enumerate(cons.input):
+                                        if iname == next_node.output[0]:
+                                            cons.input[i] = n.input[0]
+
                             # remove both transposes
                             graph.node.remove(n)
                             graph.node.remove(next_node)
@@ -544,3 +558,81 @@ def apply(self, model):
         if graph_modified:
             model = model.transform(InferDataTypes())
         return (model, graph_modified)
+
+
+class AbsorbTransposeIntoResize(Transformation):
+    """For (NCHWTranspose -> Resize) move Transpose past Resize and
+    change the Resize node's attributes accordingly."""
+
+    def apply(self, model):
+        graph = model.graph
+        node_ind = 0
+        graph_modified = False
+        for node in graph.node:
+            node_ind += 1
+            if node.op_type == "Transpose" and not model.is_fork_node(node):
+                perms = list(get_by_name(node.attribute, "perm").ints)
+                if perms == [0, 3, 1, 2]:
+                    mt_cand = model.find_consumer(node.output[0])
+                    if mt_cand is not None and mt_cand.op_type == "Resize":
+                        mode = get_by_name(mt_cand.attribute, "mode").s.decode("ascii")
+                        # skip if mode is not nearest
+                        if mode != "nearest":
+                            continue
+                        # if sizes specified, turn into scales
+                        if len(mt_cand.input) > 3:
+                            sizes = model.get_initializer(mt_cand.input[3])
+                        else:
+                            sizes = None
+                        if sizes is not None:
+                            ishape = model.get_tensor_shape(mt_cand.input[0])
+                            ns, cs, hs, ws = sizes / np.asarray(ishape)
+                            model.set_initializer(
+                                mt_cand.input[2], np.asarray([ns, cs, hs, ws])
+                            )
+                            mt_cand.input.remove(mt_cand.input[3])
+                        # scales already specified, transpose indices to NHWC
+                        scales = model.get_initializer(mt_cand.input[2])
+                        assert scales is not None
+                        ns, cs, hs, ws = scales
+                        model.set_initializer(
+                            mt_cand.input[2], np.asarray([ns, hs, ws, cs])
+                        )
+                        # get rid of first tranpose node
+                        mt_cand.input[0] = node.input[0]
+                        graph.node.remove(node)
+                        is_last_node = mt_cand.output[0] in [
+                            x.name for x in model.graph.output
+                        ]
+
+                        new_tensor_name = model.make_new_valueinfo_name()
+                        if is_last_node:
+                            trans_input = new_tensor_name
+                            trans_output = mt_cand.output[0]
+                        else:
+                            trans_input = mt_cand.output[0]
+                            trans_output = new_tensor_name
+                        # fix tensor shapes for Resize and Transpose
+                        # n, c, h, w = model.get_tensor_shape(mt_cand.input[0])
+                        n, c, hx, wx = model.get_tensor_shape(mt_cand.output[0])
+                        model.set_tensor_shape(trans_input, (n, hx, wx, c))
+                        model.set_tensor_shape(trans_output, (n, c, hx, wx))
+                        # re-insert Transpose behind Resize
+                        new_transpose = oh.make_node(
+                            "Transpose",
+                            [trans_input],
+                            [trans_output],
+                            perm=[0, 3, 1, 2],
+                        )
+                        graph.node.insert(node_ind + 1, new_transpose)
+                        # rewire nodes
+                        final_t_cands = model.find_consumers(mt_cand.output[0])
+                        if final_t_cands is not None:
+                            # rewire next nodes' inputs
+                            for final_t_cand in final_t_cands:
+                                final_t_cand.input[0] = trans_output
+                        mt_cand.output[0] = trans_input
+                        graph_modified = True
+        if graph_modified:
+            model = model.transform(InferDataTypes())
+        return (model, graph_modified)
diff --git a/src/finn/transformation/streamline/collapse_repeated.py b/src/finn/transformation/streamline/collapse_repeated.py
index 19f1ec3e83..92c48c84ff 100644
--- a/src/finn/transformation/streamline/collapse_repeated.py
+++ b/src/finn/transformation/streamline/collapse_repeated.py
@@ -28,15 +28,15 @@
 
 from onnx import helper as oh
 
+from finn.core.datatype import DataType
 from finn.transformation.base import Transformation
 from finn.transformation.infer_shapes import InferShapes
-from finn.core.datatype import DataType
 
 
 class CollapseRepeatedOp(Transformation):
     """Collapse repeated consecutive operations with constant parameters into
     a single operation. make_collapsed_param_fxn must take two tensors and
-    return a tensor which gives the equivalent result using a single op. """
+    return a tensor which gives the equivalent result using a single op."""
 
     def __init__(self, op_name, make_collapsed_param_fxn):
         super().__init__()
@@ -85,8 +85,8 @@ def apply(self, model):
                     # replace parameter value
                     model.set_initializer(new_node_param_name, new_param)
                     # be conservative with param/output DataTypes
-                    model.set_tensor_datatype(new_node_param_name, DataType.FLOAT32)
-                    model.set_tensor_datatype(end_name, DataType.FLOAT32)
+                    model.set_tensor_datatype(new_node_param_name, DataType["FLOAT32"])
+                    model.set_tensor_datatype(end_name, DataType["FLOAT32"])
                     # remove old nodes
                     graph.node.remove(n)
                     graph.node.remove(consumer)
diff --git a/src/finn/transformation/streamline/remove.py b/src/finn/transformation/streamline/remove.py
deleted file mode 100644
index 0abcf441f9..0000000000
--- a/src/finn/transformation/streamline/remove.py
+++ /dev/null
@@ -1,91 +0,0 @@
-# Copyright (c) 2020, Xilinx
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# * Redistributions of source code must retain the above copyright notice, this
-#   list of conditions and the following disclaimer.
-#
-# * Redistributions in binary form must reproduce the above copyright notice,
-#   this list of conditions and the following disclaimer in the documentation
-#   and/or other materials provided with the distribution.
-#
-# * Neither the name of FINN nor the names of its
-#   contributors may be used to endorse or promote products derived from
-#   this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-
-from finn.transformation.base import Transformation
-from finn.transformation.infer_shapes import InferShapes
-import numpy as np
-
-
-def _remove_node_and_rewire(model, node):
-    producer = model.find_producer(node.input[0])
-    if producer is not None:
-        # wire output tensor to
-        # output of producer node
-        producer.output[0] = node.output[0]
-    else:
-        # node is first in graph
-        consumer = model.find_consumer(node.output[0])
-        assert consumer is not None, "Whole graph is identity"
-        assert consumer.input[0] == node.output[0]
-        # rewire consumer's input directly to graph input
-        consumer.input[0] = node.input[0]
-    # remove node
-    model.graph.node.remove(node)
-
-
-class RemoveIdentityOps(Transformation):
-    """Remove identity ops like Add/Sub with zero or Mul/Div with one. A tolerance
-    value (defaults to 1e-05) can be specified during init for the comparison
-    to zero/one."""
-
-    def __init__(self, atol=1e-05):
-        super().__init__()
-        self.atol = atol
-
-    def apply(self, model):
-        graph = model.graph
-        node_ind = 0
-        graph_modified = False
-        for n in graph.node:
-            node_ind += 1
-            if (
-                n.op_type in ["Add", "Sub"]
-                and not model.is_fork_node(n)
-                and not model.is_join_node(n)
-            ):
-                A = model.get_initializer(n.input[1])
-                if (
-                    A is not None
-                    and np.isclose(A, np.zeros_like(A), atol=self.atol).all()
-                ):
-                    _remove_node_and_rewire(model, n)
-
-            elif (
-                n.op_type in ["Mul", "Div"]
-                and not model.is_fork_node(n)
-                and not model.is_join_node(n)
-            ):
-                A = model.get_initializer(n.input[1])
-                if (
-                    A is not None
-                    and np.isclose(A, np.ones_like(A), atol=self.atol).all()
-                ):
-                    _remove_node_and_rewire(model, n)
-        model = model.transform(InferShapes())
-        return (model, graph_modified)
diff --git a/src/finn/transformation/streamline/reorder.py b/src/finn/transformation/streamline/reorder.py
index 4049d7bc8b..0cdd6651d9 100644
--- a/src/finn/transformation/streamline/reorder.py
+++ b/src/finn/transformation/streamline/reorder.py
@@ -28,19 +28,19 @@
 
 import numpy as np
 import warnings
-from onnx import helper as oh
 from onnx import TensorProto
+from onnx import helper as oh
 
-from finn.transformation.base import Transformation
 import finn.core.data_layout as DataLayout
-from finn.transformation.infer_shapes import InferShapes
-from finn.transformation.infer_datatypes import InferDataTypes
-from finn.transformation.infer_data_layouts import InferDataLayouts
 from finn.core.datatype import DataType
 from finn.core.onnx_exec import execute_node
-from finn.util.basic import get_by_name
 from finn.custom_op.registry import getCustomOp
+from finn.transformation.base import Transformation
 from finn.transformation.general import SortGraph
+from finn.transformation.infer_data_layouts import InferDataLayouts
+from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.infer_shapes import InferShapes
+from finn.util.basic import get_by_name
 
 
 class MoveAddPastMul(Transformation):
@@ -408,16 +408,16 @@ def apply(self, model):
                         # rewire mul input to be conv input
                         conv_node.input[0] = start_name
                         model.set_tensor_shape(start_name, conv_in_shape)
-                        model.set_tensor_datatype(start_name, DataType.FLOAT32)
+                        model.set_tensor_datatype(start_name, DataType["FLOAT32"])
                         # use old conv input tensor as conv output
                         conv_node.output[0] = conv_in_name
                         model.set_tensor_shape(conv_in_name, conv_out_shape)
-                        model.set_tensor_datatype(conv_in_name, DataType.FLOAT32)
+                        model.set_tensor_datatype(conv_in_name, DataType["FLOAT32"])
                         # use new conv output as new mul node input
                         mul_node.input[0] = conv_in_name
                         # use old conv output as new mul node output
                         mul_node.output[0] = conv_out_name
-                        model.set_tensor_datatype(conv_out_name, DataType.FLOAT32)
+                        model.set_tensor_datatype(conv_out_name, DataType["FLOAT32"])
                         # move mul node past conv node
                         graph.node.remove(mul_node)
                         graph.node.insert(node_ind, mul_node)
@@ -482,16 +482,16 @@ def apply(self, model):
                         # rewire mul input to be maxpool input
                         maxpool_node.input[0] = start_name
                         model.set_tensor_shape(start_name, maxpool_in_shape)
-                        model.set_tensor_datatype(start_name, DataType.FLOAT32)
+                        model.set_tensor_datatype(start_name, DataType["FLOAT32"])
                         # use old maxpool input tensor as maxpool output
                         maxpool_node.output[0] = maxpool_in_name
                         model.set_tensor_shape(maxpool_in_name, maxpool_out_shape)
-                        model.set_tensor_datatype(maxpool_in_name, DataType.FLOAT32)
+                        model.set_tensor_datatype(maxpool_in_name, DataType["FLOAT32"])
                         # use new maxpool output as new mul node input
                         mul_node.input[0] = maxpool_in_name
                         # use old maxpool output as new mul node output
                         mul_node.output[0] = maxpool_out_name
-                        model.set_tensor_datatype(maxpool_out_name, DataType.FLOAT32)
+                        model.set_tensor_datatype(maxpool_out_name, DataType["FLOAT32"])
                         # move mul node past maxpool node
                         graph.node.remove(mul_node)
                         graph.node.insert(node_ind, mul_node)
@@ -594,11 +594,17 @@ def apply(self, model):
         nodes = [n for n in graph.node]
         for n in nodes:
             node_ind += 1
+            is_nearest_neighbor_resample = False
+            if n.op_type == "Upsample" or n.op_type == "Resize":
+                # Extract mode and scales and input shape
+                mode = get_by_name(n.attribute, "mode").s.decode("ascii")
+                is_nearest_neighbor_resample = mode == "nearest"
             if (
                 n.op_type == "GlobalAveragePool"
                 or n.op_type == "Reshape"
                 or n.op_type == "Transpose"
                 or n.op_type == "Flatten"
+                or is_nearest_neighbor_resample
             ):
                 in0 = n.input[0]
                 if in0 is None:
@@ -617,6 +623,10 @@ def apply(self, model):
                     # if initializer is not scalar, skip
                     if np.prod(init0.shape) != 1:
                         continue
+                    # Flatten input if required
+                    if len(init0.shape) > 0:
+                        init0 = init0.flatten()[0]
+                        model.set_initializer(prod0.input[1], init0)
                     # move prod0 from input to output,
                     old_prod0_in = prod0.input[0]
                     old_prod0_out = prod0.output[0]
@@ -632,7 +642,7 @@ def apply(self, model):
                     model.set_tensor_shape(n.output[0], out_shape)
                     model.set_tensor_shape(prod0.output[0], out_shape)
                     model.set_tensor_datatype(prod0.output[0], scalar_op_odt)
-                    model.set_tensor_datatype(n.output[0], DataType.FLOAT32)
+                    model.set_tensor_datatype(n.output[0], DataType["FLOAT32"])
                     graph.node.remove(prod0)
                     graph.node.insert(node_ind - 1, prod0)
                     graph_modified = True
diff --git a/src/finn/transformation/streamline/sign_to_thres.py b/src/finn/transformation/streamline/sign_to_thres.py
index 13f2e8524a..61d7eb3543 100644
--- a/src/finn/transformation/streamline/sign_to_thres.py
+++ b/src/finn/transformation/streamline/sign_to_thres.py
@@ -69,6 +69,6 @@ def apply(self, model):
                 graph.node.insert(node_ind, mt_node)
                 graph.node.remove(n)
                 # add quantization annotations
-                model.set_tensor_datatype(sign_out_name, DataType.BIPOLAR)
+                model.set_tensor_datatype(sign_out_name, DataType["BIPOLAR"])
                 graph_modified = True
         return (model, graph_modified)
diff --git a/src/finn/util/create.py b/src/finn/util/create.py
index d9c5d7b1b5..62229a69b6 100644
--- a/src/finn/util/create.py
+++ b/src/finn/util/create.py
@@ -49,10 +49,10 @@ def hls_random_mlp_maker(layer_spec):
             # no activation, produce accumulators
             T = None
             tdt = None
-            if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR:
-                odt = DataType.UINT32
+            if wdt == DataType["BIPOLAR"] and idt == DataType["BIPOLAR"]:
+                odt = DataType["UINT32"]
             else:
-                odt = DataType.INT32
+                odt = DataType["INT32"]
         else:
             odt = act
             (min, max) = calculate_signed_dot_prod_range(idt, wdt, mw)
@@ -61,13 +61,13 @@ def hls_random_mlp_maker(layer_spec):
             # provide non-decreasing thresholds
             T = np.sort(T, axis=1)
             # generate thresholds for activation
-            if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR:
-                tdt = DataType.UINT32
+            if wdt == DataType["BIPOLAR"] and idt == DataType["BIPOLAR"]:
+                tdt = DataType["UINT32"]
                 # bias thresholds to be positive
                 T = np.ceil((T + mw) / 2)
                 assert (T >= 0).all()
             else:
-                tdt = DataType.INT32
+                tdt = DataType["INT32"]
         lyr["T"] = T
         lyr["tdt"] = tdt
         lyr["odt"] = odt
@@ -120,11 +120,11 @@ def hls_mlp_maker(layer_spec):
         # StreamingFC:
         # - specify their datatypes as such
         # - specify their datatypes as BINARY as use binaryXnorMode
-        if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR:
+        if wdt == DataType["BIPOLAR"] and idt == DataType["BIPOLAR"]:
             # we'll internally convert weights/inputs to binary and specify the
             # datatypes as such, and also set the binaryXnorMode attribute to 1
-            export_wdt = DataType.BINARY
-            export_idt = DataType.BINARY
+            export_wdt = DataType["BINARY"]
+            export_idt = DataType["BINARY"]
             binary_xnor_mode = 1
         else:
             export_wdt = wdt
@@ -134,7 +134,7 @@ def hls_mlp_maker(layer_spec):
         if T is not None:
             no_act = 0
             node_inp_list = [current_in_name, current_W_name, current_T_name]
-            if odt == DataType.BIPOLAR:
+            if odt == DataType["BIPOLAR"]:
                 actval = 0
             else:
                 actval = odt.min()
diff --git a/src/finn/util/imagenet.py b/src/finn/util/imagenet.py
index 71ed9d9d26..abd412e8d9 100644
--- a/src/finn/util/imagenet.py
+++ b/src/finn/util/imagenet.py
@@ -26,11 +26,12 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import os
 import numpy as np
+import os
 from PIL import Image
+
 from finn.core.data_layout import NCHW, NHWC
-from finn.util.test import resize_smaller_side, crop_center
+from finn.util.test import crop_center, resize_smaller_side
 
 
 def get_val_images(n_images=100, interleave_classes=False):
diff --git a/src/finn/util/pytorch.py b/src/finn/util/pytorch.py
index f174c24601..18010083f7 100644
--- a/src/finn/util/pytorch.py
+++ b/src/finn/util/pytorch.py
@@ -26,7 +26,6 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 import torch
-
 from torch.nn import Module, Sequential
 
 
diff --git a/src/finn/util/test.py b/src/finn/util/test.py
index 0a34751786..9c5462ae7f 100644
--- a/src/finn/util/test.py
+++ b/src/finn/util/test.py
@@ -26,22 +26,25 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import onnx
-import onnx.numpy_helper as nph
 import pkg_resources as pk
-from pkgutil import get_data
-from brevitas_examples import bnn_pynq, imagenet_classification
-import numpy as np
+
 import pytest
+
+import numpy as np
+import onnx
+import onnx.numpy_helper as nph
+import os
+import torchvision.transforms.functional as torchvision_util
 import warnings
+from brevitas_examples import bnn_pynq, imagenet_classification
+from pkgutil import get_data
+
 from finn.core.modelwrapper import ModelWrapper
-import os
-from finn.util.basic import pynq_part_map, alveo_part_map, alveo_default_platform
+from finn.core.onnx_exec import execute_onnx
+from finn.custom_op.registry import getCustomOp
 from finn.transformation.fpgadataflow.make_zynq_proj import ZynqBuild
 from finn.transformation.fpgadataflow.vitis_build import VitisBuild, VitisOptStrategy
-from finn.custom_op.registry import getCustomOp
-from finn.core.onnx_exec import execute_onnx
-import torchvision.transforms.functional as torchvision_util
+from finn.util.basic import alveo_default_platform, alveo_part_map, pynq_part_map
 
 # map of (wbits,abits) -> model
 example_map = {
diff --git a/src/finn/util/vcd.py b/src/finn/util/vcd.py
index a4400f7bd7..6a5a68f099 100644
--- a/src/finn/util/vcd.py
+++ b/src/finn/util/vcd.py
@@ -26,9 +26,10 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+import multiprocessing as mp
 from vcdvcd import VCDVCD
+
 from finn.util.basic import get_num_default_workers
-import multiprocessing as mp
 
 # string patterns to search for to find particular interfaces
 # streaming interfaces
@@ -162,7 +163,9 @@ def _get_stats(x):
     return (x[0], get_stream_if_stats(x[1], x[0]))
 
 
-def get_all_stream_if_stats(vcd_file, stream_ifs=None, sort_by="{'V': 1, 'R': 0}", num_workers=None):
+def get_all_stream_if_stats(
+    vcd_file, stream_ifs=None, sort_by="{'V': 1, 'R': 0}", num_workers=None
+):
     """Return a list of streaming interface stats, sorted by the percentage
     for the given sort_by key. If stream_ifs is None, all streaming interface
     stats will be returned, otherwise treated as a list of interface names to
diff --git a/src/finn/util/visualization.py b/src/finn/util/visualization.py
index d8547a32e0..397bebb64c 100644
--- a/src/finn/util/visualization.py
+++ b/src/finn/util/visualization.py
@@ -27,8 +27,8 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import inspect
-import os
 import netron
+import os
 from IPython.display import IFrame
 
 
@@ -36,7 +36,27 @@ def showSrc(what):
     print("".join(inspect.getsourcelines(what)[0]))
 
 
-def showInNetron(model_filename):
-    netron.start(model_filename, address=("0.0.0.0", 8081))
-    localhost_url = os.getenv("LOCALHOST_URL", default="localhost")
-    return IFrame(src="http://%s:8081/" % localhost_url, width="100%", height=400)
+def showInNetron(model_filename: str, localhost_url: str = None, port: int = None):
+    """Shows a ONNX model file in the Jupyter Notebook using Netron.
+
+    :param model_filename: The path to the ONNX model file.
+    :type model_filename: str
+
+    :param localhost_url: The IP address used by the Jupyter IFrame to show the model.
+     Defaults to localhost.
+    :type localhost_url: str, optional
+
+    :param port: The port number used by Netron and the Jupyter IFrame to show
+     the ONNX model.  Defaults to 8081.
+    :type port: int, optional
+
+    :return: The IFrame displaying the ONNX model.
+    :rtype: IPython.lib.display.IFrame
+    """
+    try:
+        port = port or int(os.getenv("NETRON_PORT", default="8081"))
+    except ValueError:
+        port = 8081
+    localhost_url = localhost_url or os.getenv("LOCALHOST_URL", default="localhost")
+    netron.start(model_filename, address=("0.0.0.0", port), browse=False)
+    return IFrame(src=f"http://{localhost_url}:{port}/", width="100%", height=400)
diff --git a/tests/brevitas/test_brevitas_avg_pool_export.py b/tests/brevitas/test_brevitas_avg_pool_export.py
index 4b88b0f787..1b38914a83 100644
--- a/tests/brevitas/test_brevitas_avg_pool_export.py
+++ b/tests/brevitas/test_brevitas_avg_pool_export.py
@@ -25,26 +25,29 @@
 # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-import os
-
-import torch
-import numpy as np
 import pytest
-import finn.core.onnx_exec as oxe
-from finn.core.modelwrapper import ModelWrapper
-from finn.core.datatype import DataType
-from finn.transformation.infer_shapes import InferShapes
-from finn.transformation.infer_datatypes import InferDataTypes
-from finn.util.basic import gen_finn_dt_tensor
 
+import numpy as np
+import os
+import torch
 from brevitas.export import FINNManager
+from brevitas.export.onnx.generic.manager import BrevitasONNXManager
 from brevitas.nn import QuantAvgPool2d
 from brevitas.quant_tensor import QuantTensor
+from qonnx.util.cleanup import cleanup as qonnx_cleanup
 
+import finn.core.onnx_exec as oxe
+from finn.core.datatype import DataType
+from finn.core.modelwrapper import ModelWrapper
+from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
+from finn.util.basic import gen_finn_dt_tensor
 
-export_onnx_path = "test_brevitas_avg_pool_export.onnx"
+base_export_onnx_path = "test_brevitas_avg_pool_export.onnx"
 
 
+@pytest.mark.parametrize("QONNX_export", [False, True])
 @pytest.mark.parametrize("kernel_size", [2, 3])
 @pytest.mark.parametrize("stride", [1, 2])
 @pytest.mark.parametrize("signed", [True, False])
@@ -53,11 +56,23 @@
 @pytest.mark.parametrize("channels", [2, 4])
 @pytest.mark.parametrize("idim", [7, 8])
 def test_brevitas_avg_pool_export(
-    kernel_size, stride, signed, bit_width, input_bit_width, channels, idim
+    kernel_size,
+    stride,
+    signed,
+    bit_width,
+    input_bit_width,
+    channels,
+    idim,
+    QONNX_export,
 ):
-
+    export_onnx_path = base_export_onnx_path.replace(
+        ".onnx", f"test_QONNX-{QONNX_export}.onnx"
+    )
     quant_avgpool = QuantAvgPool2d(
-        kernel_size=kernel_size, stride=stride, bit_width=bit_width
+        kernel_size=kernel_size,
+        stride=stride,
+        bit_width=bit_width,
+        return_quant_tensor=False,
     )
     quant_avgpool.eval()
 
@@ -70,31 +85,57 @@ def test_brevitas_avg_pool_export(
     # Brevitas QuantAvgPool layers need QuantTensors to export correctly
     # which requires setting up a QuantTensor instance with the scale
     # factor, zero point, bitwidth and signedness
-    scale_array = np.random.uniform(low=0, high=1, size=(1, channels, 1, 1)).astype(
-        np.float32
-    )
+    scale_array = np.ones((1, channels, 1, 1)).astype(np.float32)
+    scale_array *= 0.5
     input_tensor = torch.from_numpy(input_array * scale_array).float()
     scale_tensor = torch.from_numpy(scale_array).float()
     zp = torch.tensor(0.0)
     input_quant_tensor = QuantTensor(
-        input_tensor, scale_tensor, zp, input_bit_width, signed
+        input_tensor, scale_tensor, zp, input_bit_width, signed, training=False
     )
 
     # export
-    FINNManager.export(
-        quant_avgpool, export_path=export_onnx_path, input_t=input_quant_tensor
-    )
+    if QONNX_export:
+        BrevitasONNXManager.export(
+            quant_avgpool,
+            export_path=export_onnx_path,
+            input_t=input_quant_tensor,
+        )
+        model = ModelWrapper(export_onnx_path)
+
+        # Statically set the additional inputs generated by the BrevitasONNXManager
+        model.graph.input.remove(model.graph.input[3])
+        model.graph.input.remove(model.graph.input[2])
+        model.graph.input.remove(model.graph.input[1])
+        model.set_initializer("1", scale_array)
+        model.set_initializer("2", np.array(0.0).astype(np.float32))
+        model.set_initializer("3", np.array(input_bit_width).astype(np.float32))
+        model.save(export_onnx_path)
+
+        qonnx_cleanup(export_onnx_path, out_file=export_onnx_path)
+        model = ModelWrapper(export_onnx_path)
+        model = model.transform(ConvertQONNXtoFINN())
+        model.save(export_onnx_path)
+    else:
+        FINNManager.export(
+            quant_avgpool, export_path=export_onnx_path, input_t=input_quant_tensor
+        )
     model = ModelWrapper(export_onnx_path)
     model = model.transform(InferShapes())
     model = model.transform(InferDataTypes())
 
     # reference brevitas output
-    ref_output_array = quant_avgpool(input_quant_tensor).tensor.detach().numpy()
+    ref_output_array = quant_avgpool(input_quant_tensor).detach().numpy()
     # finn output
-    idict = {model.graph.input[0].name: input_array}
+    if QONNX_export:
+        # Manually apply the Quant tensor scaling for QONNX
+        idict = {model.graph.input[0].name: input_array * scale_array}
+    else:
+        idict = {model.graph.input[0].name: input_array}
     odict = oxe.execute_onnx(model, idict, True)
     finn_output = odict[model.graph.output[0].name]
     # compare outputs
     assert np.isclose(ref_output_array, finn_output).all()
     # cleanup
+    # assert False
     os.remove(export_onnx_path)
diff --git a/tests/brevitas/test_brevitas_cnv.py b/tests/brevitas/test_brevitas_cnv.py
index 4b072535bd..78ca361366 100644
--- a/tests/brevitas/test_brevitas_cnv.py
+++ b/tests/brevitas/test_brevitas_cnv.py
@@ -26,19 +26,23 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import os
 import pkg_resources as pk
+
 import pytest
 
 import brevitas.onnx as bo
 import numpy as np
+import os
 import torch
+from brevitas.export.onnx.generic.manager import BrevitasONNXManager
+from qonnx.util.cleanup import cleanup as qonnx_cleanup
 
 import finn.core.onnx_exec as oxe
 from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.fold_constants import FoldConstants
-from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.general import GiveUniqueNodeNames, RemoveStaticGraphInputs
+from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
 from finn.util.test import get_test_model_trained
 
 export_onnx_path = "test_brevitas_cnv.onnx"
@@ -46,11 +50,20 @@
 
 @pytest.mark.parametrize("abits", [1, 2])
 @pytest.mark.parametrize("wbits", [1, 2])
-def test_brevitas_cnv_export_exec(wbits, abits):
+@pytest.mark.parametrize("QONNX_export", [False, True])
+def test_brevitas_cnv_export_exec(wbits, abits, QONNX_export):
     if wbits > abits:
         pytest.skip("No wbits > abits cases at the moment")
     cnv = get_test_model_trained("CNV", wbits, abits)
-    bo.export_finn_onnx(cnv, (1, 3, 32, 32), export_onnx_path)
+    ishape = (1, 3, 32, 32)
+    if QONNX_export:
+        BrevitasONNXManager.export(cnv, ishape, export_onnx_path)
+        qonnx_cleanup(export_onnx_path, out_file=export_onnx_path)
+        model = ModelWrapper(export_onnx_path)
+        model = model.transform(ConvertQONNXtoFINN())
+        model.save(export_onnx_path)
+    else:
+        bo.export_finn_onnx(cnv, ishape, export_onnx_path)
     model = ModelWrapper(export_onnx_path)
     model = model.transform(GiveUniqueNodeNames())
     model = model.transform(InferShapes())
diff --git a/tests/brevitas/test_brevitas_debug.py b/tests/brevitas/test_brevitas_debug.py
index 9115352796..e42b93babe 100644
--- a/tests/brevitas/test_brevitas_debug.py
+++ b/tests/brevitas/test_brevitas_debug.py
@@ -26,39 +26,71 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-from pkgutil import get_data
+import pytest
 
-import os
 import brevitas.onnx as bo
 import numpy as np
 import onnx
 import onnx.numpy_helper as nph
+import os
 import torch
+from brevitas.export.onnx.generic.manager import BrevitasONNXManager
+from pkgutil import get_data
+from qonnx.util.cleanup import cleanup as qonnx_cleanup
 
 import finn.core.onnx_exec as oxe
 from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.fold_constants import FoldConstants
 from finn.transformation.general import RemoveStaticGraphInputs
 from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
 from finn.util.test import get_test_model_trained
 
 
-def test_brevitas_debug():
+@pytest.mark.parametrize("QONNX_export", [False, True])
+@pytest.mark.parametrize("QONNX_FINN_conversion", [False, True])
+def test_brevitas_debug(QONNX_export, QONNX_FINN_conversion):
+    if (not QONNX_export) and QONNX_FINN_conversion:
+        pytest.skip("This test configuration is not valid and is thus skipped.")
     finn_onnx = "test_brevitas_debug.onnx"
     fc = get_test_model_trained("TFC", 2, 2)
-    dbg_hook = bo.enable_debug(fc)
-    bo.export_finn_onnx(fc, (1, 1, 28, 28), finn_onnx)
+    ishape = (1, 1, 28, 28)
+    if QONNX_export:
+        dbg_hook = bo.enable_debug(fc, proxy_level=True)
+        BrevitasONNXManager.export(fc, ishape, finn_onnx)
+        # DebugMarkers have the brevitas.onnx domain, so that needs adjusting
+        model = ModelWrapper(finn_onnx)
+        dbg_nodes = model.get_nodes_by_op_type("DebugMarker")
+        for dbg_node in dbg_nodes:
+            dbg_node.domain = "finn.custom_op.general"
+        model.save(finn_onnx)
+        qonnx_cleanup(finn_onnx, out_file=finn_onnx)
+        if QONNX_FINN_conversion:
+            model = ModelWrapper(finn_onnx)
+            model = model.transform(ConvertQONNXtoFINN())
+            model.save(finn_onnx)
+    else:
+        dbg_hook = bo.enable_debug(fc)
+        bo.export_finn_onnx(fc, ishape, finn_onnx)
+        model = ModelWrapper(finn_onnx)
+        # DebugMarkers have the brevitas.onnx domain, so that needs adjusting
+        # ToDo: We should probably have transformation pass, which does this
+        #  domain conversion for us?
+        dbg_nodes = model.get_nodes_by_op_type("DebugMarker")
+        for dbg_node in dbg_nodes:
+            dbg_node.domain = "finn.custom_op.general"
+        model = model.transform(InferShapes())
+        model = model.transform(FoldConstants())
+        model = model.transform(RemoveStaticGraphInputs())
+        model.save(finn_onnx)
     model = ModelWrapper(finn_onnx)
-    model = model.transform(InferShapes())
-    model = model.transform(FoldConstants())
-    model = model.transform(RemoveStaticGraphInputs())
     assert len(model.graph.input) == 1
     assert len(model.graph.output) == 1
     # load one of the test vectors
     raw_i = get_data("finn.data", "onnx/mnist-conv/test_data_set_0/input_0.pb")
     input_tensor = onnx.load_tensor_from_string(raw_i)
     # run using FINN-based execution
-    input_dict = {"0": nph.to_array(input_tensor)}
+    input_dict = {model.graph.input[0].name: nph.to_array(input_tensor)}
     output_dict = oxe.execute_onnx(model, input_dict, return_full_exec_context=True)
     produced = output_dict[model.graph.output[0].name]
     # run using PyTorch/Brevitas
@@ -71,9 +103,19 @@ def test_brevitas_debug():
     names_brevitas = set(dbg_hook.values.keys())
     names_finn = set(output_dict.keys())
     names_common = names_brevitas.intersection(names_finn)
-    assert len(names_common) == 16
+    # The different exports return debug markers in different numbers and places
+    print(len(names_common))
+    if QONNX_export and not QONNX_FINN_conversion:
+        assert len(names_common) == 12
+    elif QONNX_export and QONNX_FINN_conversion:
+        assert len(names_common) == 8
+    else:
+        assert len(names_common) == 16
     for dbg_name in names_common:
-        tensor_pytorch = dbg_hook.values[dbg_name].detach().numpy()
+        if QONNX_export:
+            tensor_pytorch = dbg_hook.values[dbg_name].value.detach().numpy()
+        else:
+            tensor_pytorch = dbg_hook.values[dbg_name].detach().numpy()
         tensor_finn = output_dict[dbg_name]
         assert np.isclose(tensor_finn, tensor_pytorch, atol=1e-5).all()
     os.remove(finn_onnx)
diff --git a/tests/brevitas/test_brevitas_fc.py b/tests/brevitas/test_brevitas_fc.py
index 24a4530075..8e1e3de8d0 100644
--- a/tests/brevitas/test_brevitas_fc.py
+++ b/tests/brevitas/test_brevitas_fc.py
@@ -26,8 +26,6 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-from pkgutil import get_data
-
 import pytest
 
 import brevitas.onnx as bo
@@ -35,32 +33,47 @@
 import onnx
 import onnx.numpy_helper as nph
 import torch
+from brevitas.export.onnx.generic.manager import BrevitasONNXManager
+from pkgutil import get_data
+from qonnx.util.cleanup import cleanup as qonnx_cleanup
 
 import finn.core.onnx_exec as oxe
 from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.fold_constants import FoldConstants
 from finn.transformation.general import RemoveStaticGraphInputs
 from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
 from finn.util.basic import make_build_dir
 from finn.util.test import get_test_model_trained
 
 export_onnx_path = make_build_dir("test_brevitas_fc_")
 
+
 # act bits
 @pytest.mark.parametrize("abits", [1, 2])
 # weight bits
 @pytest.mark.parametrize("wbits", [1, 2])
 # network topology / size
 @pytest.mark.parametrize("size", ["TFC", "SFC", "LFC"])
-def test_brevitas_fc_onnx_export_and_exec(size, wbits, abits):
+# QONNX export
+@pytest.mark.parametrize("QONNX_export", [False, True])
+def test_brevitas_fc_onnx_export_and_exec(size, wbits, abits, QONNX_export):
     if size == "LFC" and wbits == 2 and abits == 2:
         pytest.skip("No LFC-w2a2 present at the moment")
     if wbits > abits:
         pytest.skip("No wbits > abits cases at the moment")
-    nname = "%s_%dW%dA" % (size, wbits, abits)
+    nname = "%s_%dW%dA_QONNX-%d" % (size, wbits, abits, QONNX_export)
     finn_onnx = export_onnx_path + "/%s.onnx" % nname
     fc = get_test_model_trained(size, wbits, abits)
-    bo.export_finn_onnx(fc, (1, 1, 28, 28), finn_onnx)
+    ishape = (1, 1, 28, 28)
+    if QONNX_export:
+        BrevitasONNXManager.export(fc, ishape, finn_onnx)
+        qonnx_cleanup(finn_onnx, out_file=finn_onnx)
+        model = ModelWrapper(finn_onnx)
+        model = model.transform(ConvertQONNXtoFINN())
+        model.save(finn_onnx)
+    else:
+        bo.export_finn_onnx(fc, ishape, finn_onnx)
     model = ModelWrapper(finn_onnx)
     model = model.transform(InferShapes())
     model = model.transform(FoldConstants())
@@ -71,7 +84,7 @@ def test_brevitas_fc_onnx_export_and_exec(size, wbits, abits):
     raw_i = get_data("finn.data", "onnx/mnist-conv/test_data_set_0/input_0.pb")
     input_tensor = onnx.load_tensor_from_string(raw_i)
     # run using FINN-based execution
-    input_dict = {"0": nph.to_array(input_tensor)}
+    input_dict = {model.graph.input[0].name: nph.to_array(input_tensor)}
     output_dict = oxe.execute_onnx(model, input_dict)
     produced = output_dict[list(output_dict.keys())[0]]
     # run using PyTorch/Brevitas
diff --git a/tests/brevitas/test_brevitas_mobilenet.py b/tests/brevitas/test_brevitas_mobilenet.py
index 98a18403e7..108c97c2e8 100644
--- a/tests/brevitas/test_brevitas_mobilenet.py
+++ b/tests/brevitas/test_brevitas_mobilenet.py
@@ -26,29 +26,31 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-from PIL import Image
-import numpy as np
-import brevitas.onnx as bo
 import pytest
+
+import brevitas.onnx as bo
+import numpy as np
 import torch
-from finn.util.basic import make_build_dir
-from finn.util.pytorch import NormalizePreProc
-from finn.util.test import get_test_model_trained, resize_smaller_side, crop_center
-from finn.core.modelwrapper import ModelWrapper
+from PIL import Image
+
+import finn.core.onnx_exec as oxe
+import finn.transformation.streamline.absorb as absorb
 from finn.core.datatype import DataType
-from finn.transformation.infer_shapes import InferShapes
-from finn.transformation.infer_data_layouts import InferDataLayouts
+from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.fold_constants import FoldConstants
-from finn.transformation.infer_datatypes import InferDataTypes
 from finn.transformation.general import (
     GiveReadableTensorNames,
     GiveUniqueNodeNames,
     GiveUniqueParameterTensors,
 )
-from finn.transformation.merge_onnx_models import MergeONNXModels
-import finn.transformation.streamline.absorb as absorb
+from finn.transformation.infer_data_layouts import InferDataLayouts
+from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.insert_topk import InsertTopK
-import finn.core.onnx_exec as oxe
+from finn.transformation.merge_onnx_models import MergeONNXModels
+from finn.util.basic import make_build_dir
+from finn.util.pytorch import NormalizePreProc
+from finn.util.test import crop_center, get_test_model_trained, resize_smaller_side
 
 
 @pytest.mark.xfail
@@ -76,7 +78,9 @@ def test_brevitas_mobilenet():
     bo.export_finn_onnx(preproc, (1, 3, 224, 224), preproc_onnx)
     preproc_model = ModelWrapper(preproc_onnx)
     # set input finn datatype to UINT8
-    preproc_model.set_tensor_datatype(preproc_model.graph.input[0].name, DataType.UINT8)
+    preproc_model.set_tensor_datatype(
+        preproc_model.graph.input[0].name, DataType["UINT8"]
+    )
     preproc_model = preproc_model.transform(InferShapes())
     preproc_model = preproc_model.transform(GiveUniqueNodeNames())
     preproc_model = preproc_model.transform(GiveUniqueParameterTensors())
diff --git a/tests/brevitas/test_brevitas_non_scaled_QuantHardTanh_export.py b/tests/brevitas/test_brevitas_non_scaled_quanthardtanh_export.py
similarity index 82%
rename from tests/brevitas/test_brevitas_non_scaled_QuantHardTanh_export.py
rename to tests/brevitas/test_brevitas_non_scaled_quanthardtanh_export.py
index 37ea12ac0f..b530b4bd84 100644
--- a/tests/brevitas/test_brevitas_non_scaled_QuantHardTanh_export.py
+++ b/tests/brevitas/test_brevitas_non_scaled_quanthardtanh_export.py
@@ -26,19 +26,24 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import os
-import onnx  # noqa
+import pytest
+
+import brevitas.onnx as bo
 import numpy as np
+import onnx  # noqa
+import os
 import torch
-import brevitas.onnx as bo
-from brevitas.nn import QuantHardTanh
+from brevitas.core.quant import QuantType
 from brevitas.core.restrict_val import RestrictValueType
 from brevitas.core.scaling import ScalingImplType
-import pytest
-from finn.core.modelwrapper import ModelWrapper
+from brevitas.export.onnx.generic.manager import BrevitasONNXManager
+from brevitas.nn import QuantHardTanh
+from qonnx.util.cleanup import cleanup as qonnx_cleanup
+
 import finn.core.onnx_exec as oxe
+from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.infer_shapes import InferShapes
-from brevitas.core.quant import QuantType
+from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
 
 export_onnx_path = "test_brevitas_non_scaled_QuantHardTanh_export.onnx"
 
@@ -46,7 +51,10 @@
 @pytest.mark.parametrize("abits", [1, 2, 4, 8])
 @pytest.mark.parametrize("narrow_range", [False, True])
 @pytest.mark.parametrize("max_val", [1.0, 1 - 2 ** (-7)])
-def test_brevitas_act_export_qhardtanh_nonscaled(abits, narrow_range, max_val):
+@pytest.mark.parametrize("QONNX_export", [False, True])
+def test_brevitas_act_export_qhardtanh_nonscaled(
+    abits, narrow_range, max_val, QONNX_export
+):
     def get_quant_type(bit_width):
         if bit_width is None:
             return QuantType.FP
@@ -67,7 +75,15 @@ def get_quant_type(bit_width):
         scaling_impl_type=ScalingImplType.CONST,
         narrow_range=narrow_range,
     )
-    bo.export_finn_onnx(b_act, ishape, export_onnx_path)
+    if QONNX_export:
+        m_path = export_onnx_path
+        BrevitasONNXManager.export(b_act, ishape, m_path)
+        qonnx_cleanup(m_path, out_file=m_path)
+        model = ModelWrapper(m_path)
+        model = model.transform(ConvertQONNXtoFINN())
+        model.save(m_path)
+    else:
+        bo.export_finn_onnx(b_act, ishape, export_onnx_path)
     model = ModelWrapper(export_onnx_path)
     model = model.transform(InferShapes())
     inp_tensor = np.random.uniform(low=min_val, high=max_val, size=ishape).astype(
diff --git a/tests/brevitas/test_brevitas_QConv2d.py b/tests/brevitas/test_brevitas_qconv2d.py
similarity index 83%
rename from tests/brevitas/test_brevitas_QConv2d.py
rename to tests/brevitas/test_brevitas_qconv2d.py
index 5f124690d7..beaea4e51e 100644
--- a/tests/brevitas/test_brevitas_QConv2d.py
+++ b/tests/brevitas/test_brevitas_qconv2d.py
@@ -27,20 +27,24 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import pytest
-import os
+
+import brevitas.onnx as bo
 import numpy as np
+import os
 import torch
-import brevitas.onnx as bo
-from brevitas.nn import QuantConv2d
-from brevitas.core.restrict_val import RestrictValueType
 from brevitas.core.quant import QuantType
+from brevitas.core.restrict_val import RestrictValueType
 from brevitas.core.scaling import ScalingImplType
 from brevitas.core.stats import StatsOp
+from brevitas.export.onnx.generic.manager import BrevitasONNXManager
+from brevitas.nn import QuantConv2d
+from qonnx.util.cleanup import cleanup as qonnx_cleanup
 
-from finn.core.modelwrapper import ModelWrapper
-from finn.core.datatype import DataType
 import finn.core.onnx_exec as oxe
+from finn.core.datatype import DataType
+from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
 from finn.util.basic import gen_finn_dt_tensor
 
 export_onnx_path = "test_brevitas_conv.onnx"
@@ -49,7 +53,8 @@
 @pytest.mark.parametrize("dw", [False, True])
 @pytest.mark.parametrize("bias", [True, False])
 @pytest.mark.parametrize("in_channels", [32])
-def test_brevitas_QConv2d(dw, bias, in_channels):
+@pytest.mark.parametrize("QONNX_export", [False, True])
+def test_brevitas_QConv2d(dw, bias, in_channels, QONNX_export):
     ishape = (1, 32, 111, 111)
     if dw is True:
         groups = in_channels
@@ -85,10 +90,18 @@ def test_brevitas_QConv2d(dw, bias, in_channels):
         weight_narrow_range=True,
         weight_scaling_min_val=2e-16,
     )
-    weight_tensor = gen_finn_dt_tensor(DataType.INT4, w_shape)
+    weight_tensor = gen_finn_dt_tensor(DataType["INT4"], w_shape)
     b_conv.weight = torch.nn.Parameter(torch.from_numpy(weight_tensor).float())
     b_conv.eval()
-    bo.export_finn_onnx(b_conv, ishape, export_onnx_path)
+    if QONNX_export:
+        m_path = export_onnx_path
+        BrevitasONNXManager.export(b_conv, ishape, m_path)
+        qonnx_cleanup(m_path, out_file=m_path)
+        model = ModelWrapper(m_path)
+        model = model.transform(ConvertQONNXtoFINN())
+        model.save(m_path)
+    else:
+        bo.export_finn_onnx(b_conv, ishape, export_onnx_path)
     model = ModelWrapper(export_onnx_path)
     model = model.transform(InferShapes())
     inp_tensor = np.random.uniform(low=-1.0, high=1.0, size=ishape).astype(np.float32)
diff --git a/tests/brevitas/test_brevitas_qlinear.py b/tests/brevitas/test_brevitas_qlinear.py
index 62ed358dc9..1099d3ec83 100644
--- a/tests/brevitas/test_brevitas_qlinear.py
+++ b/tests/brevitas/test_brevitas_qlinear.py
@@ -27,16 +27,21 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import pytest
-import os
+
+import brevitas.onnx as bo
 import numpy as np
+import os
 import torch
-import brevitas.onnx as bo
-from brevitas.nn import QuantLinear
 from brevitas.core.quant import QuantType
-from finn.core.modelwrapper import ModelWrapper
-from finn.core.datatype import DataType
+from brevitas.export.onnx.generic.manager import BrevitasONNXManager
+from brevitas.nn import QuantLinear
+from qonnx.util.cleanup import cleanup as qonnx_cleanup
+
 import finn.core.onnx_exec as oxe
+from finn.core.datatype import DataType
+from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
 from finn.util.basic import gen_finn_dt_tensor
 
 export_onnx_path = "test_brevitas_qlinear.onnx"
@@ -46,8 +51,11 @@
 @pytest.mark.parametrize("out_features", [4])
 @pytest.mark.parametrize("in_features", [3])
 @pytest.mark.parametrize("w_bits", [4])
-@pytest.mark.parametrize("i_dtype", [DataType.UINT4])
-def test_brevitas_qlinear(bias, out_features, in_features, w_bits, i_dtype):
+@pytest.mark.parametrize("i_dtype", [DataType["UINT4"]])
+@pytest.mark.parametrize("QONNX_export", [False, True])
+def test_brevitas_qlinear(
+    bias, out_features, in_features, w_bits, i_dtype, QONNX_export
+):
     i_shape = (1, in_features)
     w_shape = (out_features, in_features)
     b_linear = QuantLinear(
@@ -64,7 +72,15 @@ def test_brevitas_qlinear(bias, out_features, in_features, w_bits, i_dtype):
     )
     b_linear.weight.data = torch.from_numpy(weight_tensor_fp)
     b_linear.eval()
-    bo.export_finn_onnx(b_linear, i_shape, export_onnx_path)
+    if QONNX_export:
+        m_path = export_onnx_path
+        BrevitasONNXManager.export(b_linear, i_shape, m_path)
+        qonnx_cleanup(m_path, out_file=m_path)
+        model = ModelWrapper(m_path)
+        model = model.transform(ConvertQONNXtoFINN())
+        model.save(m_path)
+    else:
+        bo.export_finn_onnx(b_linear, i_shape, export_onnx_path)
     model = ModelWrapper(export_onnx_path)
     model = model.transform(InferShapes())
     inp_tensor = gen_finn_dt_tensor(i_dtype, i_shape)
diff --git a/tests/brevitas/test_brevitas_relu_act_export.py b/tests/brevitas/test_brevitas_relu_act_export.py
index 278f05a4a9..57ead3b6c0 100644
--- a/tests/brevitas/test_brevitas_relu_act_export.py
+++ b/tests/brevitas/test_brevitas_relu_act_export.py
@@ -26,19 +26,24 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import os
-import onnx  # noqa
+import pytest
+
+import brevitas.onnx as bo
 import numpy as np
+import onnx  # noqa
+import os
 import torch
-import brevitas.onnx as bo
-from brevitas.nn import QuantReLU
 from brevitas.core.quant import QuantType
 from brevitas.core.restrict_val import RestrictValueType
 from brevitas.core.scaling import ScalingImplType
-import pytest
-from finn.core.modelwrapper import ModelWrapper
+from brevitas.export.onnx.generic.manager import BrevitasONNXManager
+from brevitas.nn import QuantReLU
+from qonnx.util.cleanup import cleanup as qonnx_cleanup
+
 import finn.core.onnx_exec as oxe
+from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
 
 export_onnx_path = "test_brevitas_relu_act_export.onnx"
 
@@ -48,7 +53,8 @@
 @pytest.mark.parametrize(
     "scaling_impl_type", [ScalingImplType.CONST, ScalingImplType.PARAMETER]
 )
-def test_brevitas_act_export_relu(abits, max_val, scaling_impl_type):
+@pytest.mark.parametrize("QONNX_export", [False, True])
+def test_brevitas_act_export_relu(abits, max_val, scaling_impl_type, QONNX_export):
     min_val = -1.0
     ishape = (1, 15)
 
@@ -69,8 +75,15 @@ def test_brevitas_act_export_relu(abits, max_val, scaling_impl_type):
             )
         }
         b_act.load_state_dict(checkpoint)
-
-    bo.export_finn_onnx(b_act, ishape, export_onnx_path)
+    if QONNX_export:
+        m_path = export_onnx_path
+        BrevitasONNXManager.export(b_act, ishape, m_path)
+        qonnx_cleanup(m_path, out_file=m_path)
+        model = ModelWrapper(m_path)
+        model = model.transform(ConvertQONNXtoFINN())
+        model.save(m_path)
+    else:
+        bo.export_finn_onnx(b_act, ishape, export_onnx_path)
     model = ModelWrapper(export_onnx_path)
     model = model.transform(InferShapes())
     inp_tensor = np.random.uniform(low=min_val, high=max_val, size=ishape).astype(
@@ -101,7 +114,10 @@ def test_brevitas_act_export_relu(abits, max_val, scaling_impl_type):
 @pytest.mark.parametrize("abits", [2, 4, 8])
 @pytest.mark.parametrize("max_val", [1.0, 1.5, 1 - 2 ** (-7)])
 @pytest.mark.parametrize("scaling_per_channel", [True, False])
-def test_brevitas_act_export_relu_imagenet(abits, max_val, scaling_per_channel):
+@pytest.mark.parametrize("QONNX_export", [False, True])
+def test_brevitas_act_export_relu_imagenet(
+    abits, max_val, scaling_per_channel, QONNX_export
+):
     out_channels = 32
     ishape = (1, out_channels, 1, 1)
     min_val = -1.0
@@ -113,7 +129,7 @@ def test_brevitas_act_export_relu_imagenet(abits, max_val, scaling_per_channel):
         restrict_scaling_type=RestrictValueType.LOG_FP,
         scaling_min_val=2e-16,
         max_val=6.0,
-        return_quant_tensor=True,
+        return_quant_tensor=False,
         per_channel_broadcastable_shape=(1, out_channels, 1, 1),
     )
     if scaling_per_channel is True:
@@ -127,7 +143,15 @@ def test_brevitas_act_export_relu_imagenet(abits, max_val, scaling_per_channel):
         )
     }
     b_act.load_state_dict(checkpoint)
-    bo.export_finn_onnx(b_act, ishape, export_onnx_path)
+    if QONNX_export:
+        m_path = export_onnx_path
+        BrevitasONNXManager.export(b_act, ishape, m_path)
+        qonnx_cleanup(m_path, out_file=m_path)
+        model = ModelWrapper(m_path)
+        model = model.transform(ConvertQONNXtoFINN())
+        model.save(m_path)
+    else:
+        bo.export_finn_onnx(b_act, ishape, export_onnx_path)
     model = ModelWrapper(export_onnx_path)
     model = model.transform(InferShapes())
     inp_tensor = np.random.uniform(low=min_val, high=max_val, size=ishape).astype(
@@ -138,7 +162,7 @@ def test_brevitas_act_export_relu_imagenet(abits, max_val, scaling_per_channel):
     produced = odict[model.graph.output[0].name]
     inp_tensor = torch.from_numpy(inp_tensor).float()
     b_act.eval()
-    expected = b_act.forward(inp_tensor).tensor.detach().numpy()
+    expected = b_act.forward(inp_tensor).detach().numpy()
     if not np.isclose(produced, expected, atol=1e-3).all():
         print(abits, max_val)
         print("scale: ", b_act.quant_act_scale().type(torch.FloatTensor).detach())
diff --git a/tests/brevitas/test_brevitas_scaled_QHardTanh_export.py b/tests/brevitas/test_brevitas_scaled_qhardtanh_export.py
similarity index 87%
rename from tests/brevitas/test_brevitas_scaled_QHardTanh_export.py
rename to tests/brevitas/test_brevitas_scaled_qhardtanh_export.py
index b1652c1cdc..c6da2e2e97 100644
--- a/tests/brevitas/test_brevitas_scaled_QHardTanh_export.py
+++ b/tests/brevitas/test_brevitas_scaled_qhardtanh_export.py
@@ -26,19 +26,24 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+import pytest
+
+import brevitas.onnx as bo
+import numpy as np
 import onnx  # noqa
 import os
-import numpy as np
 import torch
-import brevitas.onnx as bo
-from brevitas.nn import QuantHardTanh
-from brevitas.core.restrict_val import RestrictValueType
 from brevitas.core.quant import QuantType
+from brevitas.core.restrict_val import RestrictValueType
 from brevitas.core.scaling import ScalingImplType
-import pytest
-from finn.core.modelwrapper import ModelWrapper
+from brevitas.export.onnx.generic.manager import BrevitasONNXManager
+from brevitas.nn import QuantHardTanh
+from qonnx.util.cleanup import cleanup as qonnx_cleanup
+
 import finn.core.onnx_exec as oxe
+from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
 
 export_onnx_path = "test_brevitas_scaled_QHardTanh_export.onnx"
 
@@ -50,8 +55,9 @@
 @pytest.mark.parametrize(
     "scaling_impl_type", [ScalingImplType.CONST, ScalingImplType.PARAMETER]
 )
+@pytest.mark.parametrize("QONNX_export", [False, True])
 def test_brevitas_act_export_qhardtanh_scaled(
-    abits, narrow_range, min_val, max_val, scaling_impl_type
+    abits, narrow_range, min_val, max_val, scaling_impl_type, QONNX_export
 ):
     def get_quant_type(bit_width):
         if bit_width is None:
@@ -82,8 +88,15 @@ def get_quant_type(bit_width):
             )
         }
         b_act.load_state_dict(checkpoint)
-
-    bo.export_finn_onnx(b_act, ishape, export_onnx_path)
+    if QONNX_export:
+        m_path = export_onnx_path
+        BrevitasONNXManager.export(b_act, ishape, m_path)
+        qonnx_cleanup(m_path, out_file=m_path)
+        model = ModelWrapper(m_path)
+        model = model.transform(ConvertQONNXtoFINN())
+        model.save(m_path)
+    else:
+        bo.export_finn_onnx(b_act, ishape, export_onnx_path)
     model = ModelWrapper(export_onnx_path)
     model = model.transform(InferShapes())
     inp_tensor = np.random.uniform(low=min_val, high=max_val, size=ishape).astype(
diff --git a/tests/brevitas/test_brevitas_validate_mobilenet.py b/tests/brevitas/test_brevitas_validate_mobilenet.py
index dd079fe2e2..12e7e7aff2 100644
--- a/tests/brevitas/test_brevitas_validate_mobilenet.py
+++ b/tests/brevitas/test_brevitas_validate_mobilenet.py
@@ -26,33 +26,35 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import os
+import pytest
+
+import brevitas.onnx as bo
 import csv
 import numpy as np
-import brevitas.onnx as bo
+import os
 import torch
-from finn.util.basic import make_build_dir
-from finn.util.pytorch import NormalizePreProc
-from finn.util.test import get_test_model_trained
+import torchvision.datasets as datasets
+import torchvision.transforms as transforms
+
+import finn.core.onnx_exec as oxe
+import finn.transformation.streamline.absorb as absorb
+import finn.util.imagenet as imagenet_util
 from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.infer_shapes import InferShapes
-from finn.transformation.infer_data_layouts import InferDataLayouts
 from finn.transformation.fold_constants import FoldConstants
-from finn.transformation.general import RemoveStaticGraphInputs
-from finn.transformation.infer_datatypes import InferDataTypes
 from finn.transformation.general import (
     GiveReadableTensorNames,
     GiveUniqueNodeNames,
     GiveUniqueParameterTensors,
+    RemoveStaticGraphInputs,
 )
-from finn.transformation.merge_onnx_models import MergeONNXModels
-import finn.transformation.streamline.absorb as absorb
+from finn.transformation.infer_data_layouts import InferDataLayouts
+from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.insert_topk import InsertTopK
-import finn.core.onnx_exec as oxe
-import finn.util.imagenet as imagenet_util
-import pytest
-import torchvision.datasets as datasets
-import torchvision.transforms as transforms
+from finn.transformation.merge_onnx_models import MergeONNXModels
+from finn.util.basic import make_build_dir
+from finn.util.pytorch import NormalizePreProc
+from finn.util.test import get_test_model_trained
 
 # normalization (preprocessing) settings for MobileNet-v1 w4a4
 mean = [0.485, 0.456, 0.406]
diff --git a/tests/end2end/test_end2end_access_board.py b/tests/end2end/test_end2end_access_board.py
index 21b495c74c..ee15980ffb 100644
--- a/tests/end2end/test_end2end_access_board.py
+++ b/tests/end2end/test_end2end_access_board.py
@@ -27,7 +27,9 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import pytest
+
 import subprocess
+
 from finn.util.test import get_build_env
 
 
diff --git a/tests/end2end/test_end2end_bnn_pynq.py b/tests/end2end/test_end2end_bnn_pynq.py
index a6e7ad6422..1fddc7c1c2 100644
--- a/tests/end2end/test_end2end_bnn_pynq.py
+++ b/tests/end2end/test_end2end_bnn_pynq.py
@@ -26,77 +26,80 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import os
 import pytest
 
+import brevitas.onnx as bo
 import numpy as np
 
 # as of Feb'20 there is a bug that segfaults ONNX shape inference if we
 # import pytorch before onnx, so we make sure to import onnx first
 import onnx  # NOQA
+import os
+import subprocess
 import torch
-import brevitas.onnx as bo
+import warnings
+from brevitas.export.onnx.generic.manager import BrevitasONNXManager
+from collections import OrderedDict
+from dataset_loading import cifar, mnist
+from datetime import datetime
+from qonnx.util.cleanup import cleanup as qonnx_cleanup
+from scipy.stats import linregress
 
 import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
 import finn.transformation.streamline.absorb as absorb
+from finn.analysis.fpgadataflow.dataflow_performance import dataflow_performance
+from finn.core.datatype import DataType
+from finn.core.modelwrapper import ModelWrapper
 from finn.core.onnx_exec import execute_onnx
+from finn.core.throughput_test import throughput_test_remote, throughput_test_rtlsim
 from finn.custom_op.registry import getCustomOp
 from finn.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount
 from finn.transformation.fold_constants import FoldConstants
-
+from finn.transformation.fpgadataflow.annotate_cycles import AnnotateCycles
+from finn.transformation.fpgadataflow.annotate_resources import AnnotateResources
+from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.create_dataflow_partition import (
     CreateDataflowPartition,
 )
+from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP
+from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
+from finn.transformation.fpgadataflow.insert_dwc import InsertDWC
 from finn.transformation.fpgadataflow.make_deployment import DeployToPYNQ
+from finn.transformation.fpgadataflow.make_pynq_driver import MakePYNQDriver
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
+from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
+from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
+from finn.transformation.fpgadataflow.set_fifo_depths import InsertAndSetFIFODepths
 from finn.transformation.general import (
-    RemoveUnusedTensors,
-    RemoveStaticGraphInputs,
     GiveReadableTensorNames,
     GiveUniqueNodeNames,
+    RemoveStaticGraphInputs,
+    RemoveUnusedTensors,
 )
+from finn.transformation.infer_data_layouts import InferDataLayouts
 from finn.transformation.infer_datatypes import InferDataTypes
 from finn.transformation.infer_shapes import InferShapes
-from finn.transformation.streamline import Streamline
-from finn.util.test import (
-    get_build_env,
-    load_test_checkpoint_or_skip,
-    get_example_input,
-    get_trained_network_and_ishape,
-    execute_parent,
-    get_topk,
-)
-from finn.transformation.fpgadataflow.annotate_resources import AnnotateResources
-from finn.transformation.infer_data_layouts import InferDataLayouts
-from finn.transformation.move_reshape import RemoveCNVtoFCFlatten
+from finn.transformation.insert_topk import InsertTopK
 from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
+from finn.transformation.merge_onnx_models import MergeONNXModels
+from finn.transformation.move_reshape import RemoveCNVtoFCFlatten
+from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
+from finn.transformation.streamline import Streamline
 from finn.transformation.streamline.reorder import (
     MakeMaxPoolNHWC,
     MoveScalarLinearPastInvariants,
 )
-import warnings
-from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
-from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
-from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
-from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
-from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
-from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP
-from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
-from finn.transformation.fpgadataflow.insert_dwc import InsertDWC
-from finn.transformation.fpgadataflow.annotate_cycles import AnnotateCycles
-from finn.transformation.fpgadataflow.set_fifo_depths import InsertAndSetFIFODepths
-from finn.analysis.fpgadataflow.dataflow_performance import dataflow_performance
-from finn.core.modelwrapper import ModelWrapper
-from scipy.stats import linregress
-from finn.core.throughput_test import throughput_test_remote, throughput_test_rtlsim
-from finn.util.pytorch import ToTensor
-from finn.transformation.merge_onnx_models import MergeONNXModels
-from finn.transformation.insert_topk import InsertTopK
-from finn.core.datatype import DataType
-from dataset_loading import mnist, cifar
-from datetime import datetime
-import subprocess
 from finn.util.gdrive import upload_to_end2end_dashboard
-from collections import OrderedDict
+from finn.util.pytorch import ToTensor
+from finn.util.test import (
+    execute_parent,
+    get_build_env,
+    get_example_input,
+    get_topk,
+    get_trained_network_and_ishape,
+    load_test_checkpoint_or_skip,
+)
 
 build_dir = os.environ["FINN_BUILD_DIR"]
 target_clk_ns = 10
@@ -104,8 +107,14 @@
 rtlsim_trace = False
 
 
-def get_checkpoint_name(topology, wbits, abits, step):
-    return build_dir + "/end2end_%s_w%da%d_%s.onnx" % (topology, wbits, abits, step)
+def get_checkpoint_name(topology, wbits, abits, QONNX_export, step):
+    return build_dir + "/end2end_%s_w%da%d_QONNX-%d_%s.onnx" % (
+        topology,
+        wbits,
+        abits,
+        QONNX_export,
+        step,
+    )
 
 
 def get_dashboard_data(topology, wbits, abits):
@@ -303,15 +312,23 @@ def topology2dataset(topology):
 @pytest.mark.parametrize("wbits", [1, 2])
 @pytest.mark.parametrize("abits", [1, 2])
 @pytest.mark.parametrize("topology", ["lfc", "tfc", "cnv"])
+@pytest.mark.parametrize("QONNX_export", [False, True])
 class TestEnd2End:
-    def test_export(self, topology, wbits, abits):
+    def test_export(self, topology, wbits, abits, QONNX_export):
         if wbits > abits:
             pytest.skip("No wbits > abits end2end network configs for now")
         if topology == "lfc" and not (wbits == 1 and abits == 1):
             pytest.skip("Skipping certain lfc configs")
         (model, ishape) = get_trained_network_and_ishape(topology, wbits, abits)
-        chkpt_name = get_checkpoint_name(topology, wbits, abits, "export")
-        bo.export_finn_onnx(model, ishape, chkpt_name)
+        chkpt_name = get_checkpoint_name(topology, wbits, abits, QONNX_export, "export")
+        if QONNX_export:
+            BrevitasONNXManager.export(model, ishape, chkpt_name)
+            qonnx_cleanup(chkpt_name, out_file=chkpt_name)
+            model = ModelWrapper(chkpt_name)
+            model = model.transform(ConvertQONNXtoFINN())
+            model.save(chkpt_name)
+        else:
+            bo.export_finn_onnx(model, ishape, chkpt_name)
         nname = "%s_w%da%d" % (topology, wbits, abits)
         update_dashboard_data(topology, wbits, abits, "network", nname)
         dtstr = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
@@ -323,8 +340,10 @@ def test_export(self, topology, wbits, abits):
         update_dashboard_data(topology, wbits, abits, "finn-commit", finn_commit)
         assert os.path.isfile(chkpt_name)
 
-    def test_import_and_tidy(self, topology, wbits, abits):
-        prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "export")
+    def test_import_and_tidy(self, topology, wbits, abits, QONNX_export):
+        prev_chkpt_name = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "export"
+        )
         model = load_test_checkpoint_or_skip(prev_chkpt_name)
         model = model.transform(InferShapes())
         model = model.transform(FoldConstants())
@@ -332,17 +351,23 @@ def test_import_and_tidy(self, topology, wbits, abits):
         model = model.transform(GiveReadableTensorNames())
         model = model.transform(InferDataTypes())
         model = model.transform(RemoveStaticGraphInputs())
-        chkpt = get_checkpoint_name(topology, wbits, abits, "import_and_tidy")
+        chkpt = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "import_and_tidy"
+        )
         model.save(chkpt)
 
-    def test_add_pre_and_postproc(self, topology, wbits, abits):
-        prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "import_and_tidy")
+    def test_add_pre_and_postproc(self, topology, wbits, abits, QONNX_export):
+        prev_chkpt_name = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "import_and_tidy"
+        )
         model = load_test_checkpoint_or_skip(prev_chkpt_name)
         global_inp_name = model.graph.input[0].name
         ishape = model.get_tensor_shape(global_inp_name)
         # preprocessing: torchvision's ToTensor divides uint8 inputs by 255
         totensor_pyt = ToTensor()
-        chkpt_preproc_name = get_checkpoint_name(topology, wbits, abits, "preproc")
+        chkpt_preproc_name = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "preproc"
+        )
         bo.export_finn_onnx(totensor_pyt, ishape, chkpt_preproc_name)
         assert os.path.isfile(chkpt_preproc_name)
         # join preprocessing and core model
@@ -352,10 +377,12 @@ def test_add_pre_and_postproc(self, topology, wbits, abits):
         model = model.transform(MergeONNXModels(pre_model))
         # add input quantization annotation: UINT8 for all BNN-PYNQ models
         global_inp_name = model.graph.input[0].name
-        model.set_tensor_datatype(global_inp_name, DataType.UINT8)
+        model.set_tensor_datatype(global_inp_name, DataType["UINT8"])
         # postprocessing: insert Top-1 node at the end
         model = model.transform(InsertTopK(k=1))
-        chkpt_name = get_checkpoint_name(topology, wbits, abits, "pre_post")
+        chkpt_name = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "pre_post"
+        )
         # tidy-up again
         model = model.transform(InferShapes())
         model = model.transform(FoldConstants())
@@ -366,8 +393,10 @@ def test_add_pre_and_postproc(self, topology, wbits, abits):
         model.save(chkpt_name)
         assert os.path.isfile(chkpt_name)
 
-    def test_streamline(self, topology, wbits, abits):
-        prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "pre_post")
+    def test_streamline(self, topology, wbits, abits, QONNX_export):
+        prev_chkpt_name = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "pre_post"
+        )
         model = load_test_checkpoint_or_skip(prev_chkpt_name)
         model = model.transform(absorb.AbsorbSignBiasIntoMultiThreshold())
         # move past any reshapes to be able to streamline input scaling
@@ -383,10 +412,14 @@ def test_streamline(self, topology, wbits, abits):
         model = model.transform(absorb.AbsorbScalarMulAddIntoTopK())
         model = model.transform(InferDataLayouts())
         model = model.transform(RemoveUnusedTensors())
-        model.save(get_checkpoint_name(topology, wbits, abits, "streamline"))
+        model.save(
+            get_checkpoint_name(topology, wbits, abits, QONNX_export, "streamline")
+        )
 
-    def test_convert_to_hls_layers(self, topology, wbits, abits):
-        prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "streamline")
+    def test_convert_to_hls_layers(self, topology, wbits, abits, QONNX_export):
+        prev_chkpt_name = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "streamline"
+        )
         model = load_test_checkpoint_or_skip(prev_chkpt_name)
         if topology == "tfc" and wbits == 1 and abits == 1:
             # use standalone thresholds for tfc-w1a1 to also exercise that option
@@ -408,16 +441,55 @@ def test_convert_to_hls_layers(self, topology, wbits, abits):
         model = model.transform(absorb.AbsorbConsecutiveTransposes())
         model = model.transform(GiveUniqueNodeNames())
         model = model.transform(InferDataLayouts())
-        model.save(get_checkpoint_name(topology, wbits, abits, "convert_to_hls_layers"))
+        model.save(
+            get_checkpoint_name(
+                topology, wbits, abits, QONNX_export, "convert_to_hls_layers"
+            )
+        )
+        exp_layer_counts = {
+            "tfc": [
+                ("Reshape", 1),
+                ("Thresholding_Batch", 1),
+                ("StreamingFCLayer_Batch", 4),
+                ("LabelSelect_Batch", 1),
+            ],
+            "tfc-1-1": [
+                ("Reshape", 1),
+                ("Thresholding_Batch", 4),
+                ("StreamingFCLayer_Batch", 4),
+                ("LabelSelect_Batch", 1),
+            ],
+            "lfc": [
+                ("Reshape", 1),
+                ("Thresholding_Batch", 1),
+                ("StreamingFCLayer_Batch", 4),
+                ("LabelSelect_Batch", 1),
+            ],
+            "cnv": [
+                ("Transpose", 1),
+                ("Thresholding_Batch", 1),
+                ("ConvolutionInputGenerator", 6),
+                ("StreamingFCLayer_Batch", 9),
+                ("StreamingMaxPool_Batch", 2),
+                ("LabelSelect_Batch", 1),
+            ],
+        }
+        if topology == "tfc" and wbits == 1 and abits == 1:
+            exp_key = "tfc-1-1"
+        else:
+            exp_key = topology
+        exp_layer_counts = exp_layer_counts[exp_key]
+        for (op_type, exp_count) in exp_layer_counts:
+            assert len(model.get_nodes_by_op_type(op_type)) == exp_count
 
-    def test_create_dataflow_partition(self, topology, wbits, abits):
+    def test_create_dataflow_partition(self, topology, wbits, abits, QONNX_export):
         prev_chkpt_name = get_checkpoint_name(
-            topology, wbits, abits, "convert_to_hls_layers"
+            topology, wbits, abits, QONNX_export, "convert_to_hls_layers"
         )
         model = load_test_checkpoint_or_skip(prev_chkpt_name)
         parent_model = model.transform(CreateDataflowPartition())
         parent_model_chkpt = get_checkpoint_name(
-            topology, wbits, abits, "dataflow_parent"
+            topology, wbits, abits, QONNX_export, "dataflow_parent"
         )
         parent_model.save(parent_model_chkpt)
         sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0]
@@ -425,28 +497,36 @@ def test_create_dataflow_partition(self, topology, wbits, abits):
         dataflow_model_filename = sdp_node.get_nodeattr("model")
         dataflow_model = load_test_checkpoint_or_skip(dataflow_model_filename)
         dataflow_model_chkpt = get_checkpoint_name(
-            topology, wbits, abits, "dataflow_model"
+            topology, wbits, abits, QONNX_export, "dataflow_model"
         )
         dataflow_model.save(dataflow_model_chkpt)
 
-    def test_fold(self, topology, wbits, abits):
-        prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "dataflow_model")
+    def test_fold(self, topology, wbits, abits, QONNX_export):
+        prev_chkpt_name = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "dataflow_model"
+        )
         model = load_test_checkpoint_or_skip(prev_chkpt_name)
         folding_fxn = get_folding_function(topology, wbits, abits)
         model = folding_fxn(model)
-        model.save(get_checkpoint_name(topology, wbits, abits, "fold"))
+        model.save(get_checkpoint_name(topology, wbits, abits, QONNX_export, "fold"))
 
     @pytest.mark.slow
     @pytest.mark.vivado
-    def test_cppsim(self, topology, wbits, abits):
-        prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "fold")
+    def test_cppsim(self, topology, wbits, abits, QONNX_export):
+        prev_chkpt_name = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "fold"
+        )
         model = load_test_checkpoint_or_skip(prev_chkpt_name)
         model = model.transform(PrepareCppSim())
         model = model.transform(CompileCppSim())
         model = model.transform(SetExecMode("cppsim"))
-        cppsim_chkpt = get_checkpoint_name(topology, wbits, abits, "cppsim")
+        cppsim_chkpt = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "cppsim"
+        )
         model.save(cppsim_chkpt)
-        parent_chkpt = get_checkpoint_name(topology, wbits, abits, "dataflow_parent")
+        parent_chkpt = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "dataflow_parent"
+        )
         (input_tensor_npy, output_tensor_npy) = get_golden_io_pair(
             topology, wbits, abits, return_topk=1
         )
@@ -456,22 +536,28 @@ def test_cppsim(self, topology, wbits, abits):
     @pytest.mark.slow
     @pytest.mark.vivado
     @pytest.mark.parametrize("kind", ["zynq", "alveo"])
-    def test_ipgen(self, topology, wbits, abits, kind):
+    def test_ipgen(self, topology, wbits, abits, QONNX_export, kind):
         if kind == "alveo" and ("VITIS_PATH" not in os.environ):
             pytest.skip("VITIS_PATH not set")
-        prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "fold")
+        prev_chkpt_name = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "fold"
+        )
         model = load_test_checkpoint_or_skip(prev_chkpt_name)
         test_fpga_part = get_build_env(kind, target_clk_ns)["part"]
         model = model.transform(GiveUniqueNodeNames())
         model = model.transform(PrepareIP(test_fpga_part, target_clk_ns))
         model = model.transform(HLSSynthIP())
-        model.save(get_checkpoint_name(topology, wbits, abits, "ipgen_" + kind))
+        model.save(
+            get_checkpoint_name(topology, wbits, abits, QONNX_export, "ipgen_" + kind)
+        )
 
     @pytest.mark.slow
     @pytest.mark.vivado
     @pytest.mark.parametrize("kind", ["zynq", "alveo"])
-    def test_set_fifo_depths(self, topology, wbits, abits, kind):
-        prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "ipgen_" + kind)
+    def test_set_fifo_depths(self, topology, wbits, abits, QONNX_export, kind):
+        prev_chkpt_name = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "ipgen_" + kind
+        )
         model = load_test_checkpoint_or_skip(prev_chkpt_name)
         test_fpga_part = get_build_env(kind, target_clk_ns)["part"]
         model = model.transform(InsertAndSetFIFODepths(test_fpga_part, target_clk_ns))
@@ -483,14 +569,18 @@ def test_set_fifo_depths(self, topology, wbits, abits, kind):
                 op_inst = getCustomOp(node)
                 assert op_inst.get_nodeattr("inFIFODepth") == 0
                 assert op_inst.get_nodeattr("outFIFODepth") == 0
-        model.save(get_checkpoint_name(topology, wbits, abits, "fifodepth_" + kind))
+        model.save(
+            get_checkpoint_name(
+                topology, wbits, abits, QONNX_export, "fifodepth_" + kind
+            )
+        )
 
     @pytest.mark.slow
     @pytest.mark.vivado
     @pytest.mark.parametrize("kind", ["zynq"])
-    def test_ipstitch_rtlsim(self, topology, wbits, abits, kind):
+    def test_ipstitch_rtlsim(self, topology, wbits, abits, QONNX_export, kind):
         prev_chkpt_name = get_checkpoint_name(
-            topology, wbits, abits, "fifodepth_" + kind
+            topology, wbits, abits, QONNX_export, "fifodepth_" + kind
         )
         model = load_test_checkpoint_or_skip(prev_chkpt_name)
         test_fpga_part = get_build_env(kind, target_clk_ns)["part"]
@@ -514,54 +604,61 @@ def test_ipstitch_rtlsim(self, topology, wbits, abits, kind):
             )
             os.environ["RTLSIM_TRACE_DEPTH"] = "3"
         rtlsim_chkpt = get_checkpoint_name(
-            topology, wbits, abits, "ipstitch_rtlsim_" + kind
+            topology, wbits, abits, QONNX_export, "ipstitch_rtlsim_" + kind
         )
         model.save(rtlsim_chkpt)
-        parent_chkpt = get_checkpoint_name(topology, wbits, abits, "dataflow_parent")
+        parent_chkpt = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "dataflow_parent"
+        )
         (input_tensor_npy, output_tensor_npy) = get_golden_io_pair(
             topology, wbits, abits, return_topk=1
         )
         y = execute_parent(parent_chkpt, rtlsim_chkpt, input_tensor_npy)
-        model = ModelWrapper(rtlsim_chkpt)
-        perf["cycles_rtlsim"] = model.get_metadata_prop("cycles_rtlsim")
-        # warnings.warn("Estimated & rtlsim performance: " + str(perf))
-        # for (k, v) in perf.items():
-        #    update_dashboard_data(topology, wbits, abits, k, v)
-        update_dashboard_data(
-            topology, wbits, abits, "cycles_rtlsim", perf["cycles_rtlsim"]
-        )
         assert np.isclose(y, output_tensor_npy).all()
 
     @pytest.mark.slow
     @pytest.mark.vivado
     @pytest.mark.parametrize("kind", ["zynq"])
-    def test_throughput_rtlsim(self, topology, wbits, abits, kind):
+    def test_throughput_rtlsim(self, topology, wbits, abits, QONNX_export, kind):
         prev_chkpt_name = get_checkpoint_name(
-            topology, wbits, abits, "ipstitch_rtlsim_" + kind
+            topology, wbits, abits, QONNX_export, "ipstitch_rtlsim_" + kind
         )
         model = load_test_checkpoint_or_skip(prev_chkpt_name)
         n_nodes = len(model.graph.node)
         perf_est = model.analysis(dataflow_performance)
-        latency = int(model.get_metadata_prop("cycles_rtlsim"))
+        ret_b1 = throughput_test_rtlsim(model, batchsize=1)
+        latency = int(ret_b1["cycles"])
         cycles_per_sample_est = perf_est["max_cycles"]
         batchsize = 2 * n_nodes
         ret = throughput_test_rtlsim(model, batchsize=batchsize)
         res_cycles = ret["cycles"]
         est_cycles = latency + cycles_per_sample_est * batchsize
+        # warnings.warn("Estimated & rtlsim performance: " + str(perf))
+        # for (k, v) in perf.items():
+        #    update_dashboard_data(topology, wbits, abits, k, v)
+        update_dashboard_data(topology, wbits, abits, "cycles_rtlsim", latency)
         assert (abs(res_cycles - est_cycles) / res_cycles) < 0.15
 
     @pytest.mark.slow
     @pytest.mark.vivado
     @pytest.mark.parametrize("kind", ["zynq"])
-    def test_validate_top1(self, topology, wbits, abits, kind):
+    def test_validate_top1(self, topology, wbits, abits, QONNX_export, kind):
         if "TEST_END2END_VALIDATE_TOP1" not in os.environ:
             pytest.skip("TEST_END2END_VALIDATE_TOP1 not set")
-        prepostproc_chkpt = get_checkpoint_name(topology, wbits, abits, "pre_post")
-        streamline_chkpt = get_checkpoint_name(topology, wbits, abits, "streamline")
-        parent_chkpt = get_checkpoint_name(topology, wbits, abits, "dataflow_parent")
-        cppsim_chkpt = get_checkpoint_name(topology, wbits, abits, "cppsim")
+        prepostproc_chkpt = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "pre_post"
+        )
+        streamline_chkpt = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "streamline"
+        )
+        parent_chkpt = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "dataflow_parent"
+        )
+        cppsim_chkpt = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "cppsim"
+        )
         rtlsim_chkpt = get_checkpoint_name(
-            topology, wbits, abits, "ipstitch_rtlsim_" + kind
+            topology, wbits, abits, QONNX_export, "ipstitch_rtlsim_" + kind
         )
         dataset = topology2dataset(topology)
         assert measure_top1_accuracy(prepostproc_chkpt, dataset) > 80
@@ -573,11 +670,11 @@ def test_validate_top1(self, topology, wbits, abits, kind):
     @pytest.mark.vivado
     @pytest.mark.vitis
     @pytest.mark.parametrize("kind", ["zynq", "alveo"])
-    def test_build(self, topology, wbits, abits, kind):
+    def test_build(self, topology, wbits, abits, QONNX_export, kind):
         if kind == "alveo" and ("VITIS_PATH" not in os.environ):
             pytest.skip("VITIS_PATH not set")
         prev_chkpt_name = get_checkpoint_name(
-            topology, wbits, abits, "fifodepth_" + kind
+            topology, wbits, abits, QONNX_export, "fifodepth_" + kind
         )
         model = load_test_checkpoint_or_skip(prev_chkpt_name)
         cfg = get_build_env(kind, target_clk_ns)
@@ -587,11 +684,32 @@ def test_build(self, topology, wbits, abits, kind):
         for (k, v) in synth_dct.items():
             update_dashboard_data(topology, wbits, abits, k, v)
         update_dashboard_data(topology, wbits, abits, "board", cfg["board"])
-        model.save(get_checkpoint_name(topology, wbits, abits, "build_" + kind))
+        model.save(
+            get_checkpoint_name(topology, wbits, abits, QONNX_export, "build_" + kind)
+        )
 
+    @pytest.mark.slow
+    @pytest.mark.vivado
+    @pytest.mark.vitis
     @pytest.mark.parametrize("kind", ["zynq", "alveo"])
-    def test_deploy(self, topology, wbits, abits, kind):
-        prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "build_" + kind)
+    def test_make_pynq_driver(self, topology, wbits, abits, QONNX_export, kind):
+        if kind == "alveo" and ("VITIS_PATH" not in os.environ):
+            pytest.skip("VITIS_PATH not set")
+        prev_chkpt_name = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "build_" + kind
+        )
+        model = load_test_checkpoint_or_skip(prev_chkpt_name)
+        kind_to_driver_platform = {"zynq": "zynq-iodma", "alveo": "alveo"}
+        model = model.transform(MakePYNQDriver(kind_to_driver_platform[kind]))
+        model.save(
+            get_checkpoint_name(topology, wbits, abits, QONNX_export, "driver_" + kind)
+        )
+
+    @pytest.mark.parametrize("kind", ["zynq", "alveo"])
+    def test_deploy(self, topology, wbits, abits, QONNX_export, kind):
+        prev_chkpt_name = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "driver_" + kind
+        )
         model = load_test_checkpoint_or_skip(prev_chkpt_name)
         cfg = get_build_env(kind, target_clk_ns)
         if cfg["ip"] == "":
@@ -606,11 +724,15 @@ def test_deploy(self, topology, wbits, abits, kind):
             )
         )
         # save the model to be able to link it to the parent
-        model.save(get_checkpoint_name(topology, wbits, abits, "deploy_" + kind))
+        model.save(
+            get_checkpoint_name(topology, wbits, abits, QONNX_export, "deploy_" + kind)
+        )
 
     @pytest.mark.parametrize("kind", ["zynq", "alveo"])
-    def test_run_on_hw(self, topology, wbits, abits, kind):
-        prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "deploy_" + kind)
+    def test_run_on_hw(self, topology, wbits, abits, QONNX_export, kind):
+        prev_chkpt_name = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "deploy_" + kind
+        )
         model = load_test_checkpoint_or_skip(prev_chkpt_name)  # NOQA
         cfg = get_build_env(kind, target_clk_ns)
         if cfg["ip"] == "":
@@ -619,7 +741,7 @@ def test_run_on_hw(self, topology, wbits, abits, kind):
             topology, wbits, abits, return_topk=1
         )
         parent_model = load_test_checkpoint_or_skip(
-            get_checkpoint_name(topology, wbits, abits, "dataflow_parent")
+            get_checkpoint_name(topology, wbits, abits, QONNX_export, "dataflow_parent")
         )
         iname = parent_model.graph.input[0].name
         oname = parent_model.graph.output[0].name
@@ -631,8 +753,10 @@ def test_run_on_hw(self, topology, wbits, abits, kind):
         assert np.isclose(y, output_tensor_npy).all()
 
     @pytest.mark.parametrize("kind", ["zynq", "alveo"])
-    def test_throughput_hw(self, topology, wbits, abits, kind):
-        prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "deploy_" + kind)
+    def test_throughput_hw(self, topology, wbits, abits, QONNX_export, kind):
+        prev_chkpt_name = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "deploy_" + kind
+        )
         end2end_example = "%s_w%da%d_%s" % (topology, wbits, abits, kind)
         model = load_test_checkpoint_or_skip(prev_chkpt_name)  # NOQA
         cfg = get_build_env(kind, target_clk_ns)
@@ -688,9 +812,13 @@ def test_throughput_hw(self, topology, wbits, abits, kind):
             ret[largest_bsize]["throughput[images/s]"],
         )
 
-    def test_upload_results_to_dashboard(self, topology, wbits, abits):
-        dashboard_data = get_dashboard_data(topology, wbits, abits)
-        if len(dashboard_data.keys()) > 0:
-            upload_to_end2end_dashboard(dashboard_data)
+    def test_upload_results_to_dashboard(self, topology, wbits, abits, QONNX_export):
+        # ToDo: Extend the dashboard to also upload QONNX exported models?
+        if QONNX_export:
+            pytest.skip("Dashboard data upload is disabled for QONNX exported models.")
         else:
-            pytest.skip("No data to upload to dashboard")
+            dashboard_data = get_dashboard_data(topology, wbits, abits)
+            if len(dashboard_data.keys()) > 0:
+                upload_to_end2end_dashboard(dashboard_data)
+            else:
+                pytest.skip("No data to upload to dashboard")
diff --git a/tests/end2end/test_end2end_cybsec_mlp.py b/tests/end2end/test_end2end_cybsec_mlp.py
index 63d6a91e37..e24d87ca6a 100644
--- a/tests/end2end/test_end2end_cybsec_mlp.py
+++ b/tests/end2end/test_end2end_cybsec_mlp.py
@@ -26,40 +26,45 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+import pkg_resources as pk
+
+import pytest
+
+import brevitas.onnx as bo
+import json
+import numpy as np
+import os
+import shutil
+import subprocess
 import torch
-from brevitas.nn import QuantLinear, QuantReLU
-from brevitas.quant_tensor import QuantTensor
 import torch.nn as nn
-import numpy as np
+import wget
 from brevitas.core.quant import QuantType
-from brevitas.nn import QuantIdentity
-import brevitas.onnx as bo
-from finn.core.modelwrapper import ModelWrapper
-from finn.core.datatype import DataType
+from brevitas.export.onnx.generic.manager import BrevitasONNXManager
+from brevitas.nn import QuantIdentity, QuantLinear, QuantReLU
+from brevitas.quant_tensor import QuantTensor
+from qonnx.util.cleanup import cleanup as qonnx_cleanup
+
 import finn.builder.build_dataflow as build
 import finn.builder.build_dataflow_config as build_cfg
-import os
-import shutil
-from finn.util.test import get_build_env, load_test_checkpoint_or_skip
-import pytest
+from finn.core.datatype import DataType
+from finn.core.modelwrapper import ModelWrapper
+from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
 from finn.util.basic import make_build_dir
-import pkg_resources as pk
-import json
-import wget
-import subprocess
+from finn.util.test import get_build_env, load_test_checkpoint_or_skip
 
 target_clk_ns = 10
 build_kind = "zynq"
 build_dir = os.environ["FINN_BUILD_DIR"]
 
 
-def get_checkpoint_name(step):
+def get_checkpoint_name(step, QONNX_export):
     if step == "build":
         # checkpoint for build step is an entire dir
-        return build_dir + "/end2end_cybsecmlp_build"
+        return build_dir + "/end2end_cybsecmlp_build_QONNX-%d" % (QONNX_export)
     else:
         # other checkpoints are onnx files
-        return build_dir + "/end2end_cybsecmlp_%s.onnx" % (step)
+        return build_dir + "/end2end_cybsecmlp_QONNX-%d_%s.onnx" % (QONNX_export, step)
 
 
 class CybSecMLPForExport(nn.Module):
@@ -80,7 +85,8 @@ def forward(self, x):
         return out_final
 
 
-def test_end2end_cybsec_mlp_export():
+@pytest.mark.parametrize("QONNX_export", [False, True])
+def test_end2end_cybsec_mlp_export(QONNX_export):
     assets_dir = pk.resource_filename("finn.qnn-data", "cybsec-mlp/")
     # load up trained net in Brevitas
     input_size = 593
@@ -114,7 +120,7 @@ def test_end2end_cybsec_mlp_export():
     W_new = np.pad(W_orig, [(0, 0), (0, 7)])
     model[0].weight.data = torch.from_numpy(W_new)
     model_for_export = CybSecMLPForExport(model)
-    export_onnx_path = get_checkpoint_name("export")
+    export_onnx_path = get_checkpoint_name("export", QONNX_export)
     input_shape = (1, 600)
     # create a QuantTensor instance to mark the input as bipolar during export
     input_a = np.random.randint(0, 1, size=input_shape).astype(np.float32)
@@ -125,32 +131,61 @@ def test_end2end_cybsec_mlp_export():
         input_t, scale=torch.tensor(scale), bit_width=torch.tensor(1.0), signed=True
     )
 
-    bo.export_finn_onnx(
-        model_for_export, export_path=export_onnx_path, input_t=input_qt
-    )
+    if QONNX_export:
+        # With the BrevitasONNXManager we need to manually set
+        # the FINN DataType at the input
+        BrevitasONNXManager.export(
+            model_for_export, input_shape, export_path=export_onnx_path
+        )
+        model = ModelWrapper(export_onnx_path)
+        model.set_tensor_datatype(model.graph.input[0].name, DataType["BIPOLAR"])
+        model.save(export_onnx_path)
+        qonnx_cleanup(export_onnx_path, out_file=export_onnx_path)
+        model = ModelWrapper(export_onnx_path)
+        model = model.transform(ConvertQONNXtoFINN())
+        model.save(export_onnx_path)
+    else:
+        bo.export_finn_onnx(
+            model_for_export, export_path=export_onnx_path, input_t=input_qt
+        )
     assert os.path.isfile(export_onnx_path)
     # fix input datatype
     finn_model = ModelWrapper(export_onnx_path)
     finnonnx_in_tensor_name = finn_model.graph.input[0].name
     assert tuple(finn_model.get_tensor_shape(finnonnx_in_tensor_name)) == (1, 600)
     # verify a few exported ops
-    assert finn_model.graph.node[1].op_type == "Add"
-    assert finn_model.graph.node[2].op_type == "Div"
-    assert finn_model.graph.node[3].op_type == "MatMul"
-    assert finn_model.graph.node[-1].op_type == "MultiThreshold"
+    if QONNX_export:
+        # The first "Mul" node doesn't exist in the QONNX export,
+        # because the QuantTensor scale is not exported.
+        # However, this node would have been unity scale anyways and
+        # the models are still equivalent.
+        assert finn_model.graph.node[0].op_type == "Add"
+        assert finn_model.graph.node[1].op_type == "Div"
+        assert finn_model.graph.node[2].op_type == "MatMul"
+        assert finn_model.graph.node[-1].op_type == "MultiThreshold"
+    else:
+        assert finn_model.graph.node[0].op_type == "Mul"
+        assert finn_model.get_initializer(finn_model.graph.node[0].input[1]) == 1.0
+        assert finn_model.graph.node[1].op_type == "Add"
+        assert finn_model.graph.node[2].op_type == "Div"
+        assert finn_model.graph.node[3].op_type == "MatMul"
+        assert finn_model.graph.node[-1].op_type == "MultiThreshold"
     # verify datatypes on some tensors
-    assert finn_model.get_tensor_datatype(finnonnx_in_tensor_name) == DataType.BIPOLAR
-    first_matmul_w_name = finn_model.graph.node[3].input[1]
-    assert finn_model.get_tensor_datatype(first_matmul_w_name) == DataType.INT2
+    assert (
+        finn_model.get_tensor_datatype(finnonnx_in_tensor_name) == DataType["BIPOLAR"]
+    )
+    first_matmul_w_name = finn_model.get_nodes_by_op_type("MatMul")[0].input[1]
+    assert finn_model.get_tensor_datatype(first_matmul_w_name) == DataType["INT2"]
 
 
 @pytest.mark.slow
 @pytest.mark.vivado
-def test_end2end_cybsec_mlp_build():
-    model_file = get_checkpoint_name("export")
+@pytest.mark.parametrize("QONNX_export", [False, True])
+def test_end2end_cybsec_mlp_build(QONNX_export):
+    model_file = get_checkpoint_name("export", QONNX_export)
     load_test_checkpoint_or_skip(model_file)
     build_env = get_build_env(build_kind, target_clk_ns)
-    output_dir = make_build_dir("test_end2end_cybsec_mlp_build")
+    output_dir = make_build_dir(f"test_end2end_cybsec_mlp_build_QONNX-{QONNX_export}")
 
     cfg = build.DataflowBuildConfig(
         output_dir=output_dir,
@@ -188,13 +223,14 @@ def test_end2end_cybsec_mlp_build():
         est_res_dict = json.load(f)
         assert est_res_dict["total"]["LUT"] == 11360.0
         assert est_res_dict["total"]["BRAM_18K"] == 36.0
-    shutil.copytree(output_dir + "/deploy", get_checkpoint_name("build"))
+    shutil.copytree(output_dir + "/deploy", get_checkpoint_name("build", QONNX_export))
 
 
-def test_end2end_cybsec_mlp_run_on_hw():
+@pytest.mark.parametrize("QONNX_export", [False, True])
+def test_end2end_cybsec_mlp_run_on_hw(QONNX_export):
     build_env = get_build_env(build_kind, target_clk_ns)
     assets_dir = pk.resource_filename("finn.qnn-data", "cybsec-mlp/")
-    deploy_dir = get_checkpoint_name("build")
+    deploy_dir = get_checkpoint_name("build", QONNX_export)
     if not os.path.isdir(deploy_dir):
         pytest.skip(deploy_dir + " not found from previous test step, skipping")
     driver_dir = deploy_dir + "/driver"
diff --git a/tests/end2end/test_end2end_mobilenet_v1.py b/tests/end2end/test_end2end_mobilenet_v1.py
index 5bfe8e1ea1..e459bfbc3e 100644
--- a/tests/end2end/test_end2end_mobilenet_v1.py
+++ b/tests/end2end/test_end2end_mobilenet_v1.py
@@ -25,57 +25,55 @@
 # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-import time
 import pytest
 
-from PIL import Image
-import os
-import numpy as np
 import brevitas.onnx as bo
+import numpy as np
+import os
+import time
 import torch
+from PIL import Image
 
-from finn.custom_op.registry import getCustomOp
-from finn.util.pytorch import NormalizePreProc
-from finn.util.test import (
-    get_test_model_trained,
-    load_test_checkpoint_or_skip,
-    resize_smaller_side,
-    crop_center,
-)
-
-from finn.core.modelwrapper import ModelWrapper
+import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
+import finn.transformation.streamline.absorb as absorb
+import finn.transformation.streamline.reorder as reorder
 from finn.core.datatype import DataType
-
-from finn.transformation.infer_shapes import InferShapes
-from finn.transformation.infer_data_layouts import InferDataLayouts
+from finn.core.modelwrapper import ModelWrapper
+from finn.core.onnx_exec import execute_onnx
+from finn.custom_op.registry import getCustomOp
+from finn.transformation.change_datalayout import ChangeDataLayoutQuantAvgPool2d
+from finn.transformation.double_to_single_float import DoubleToSingleFloat
 from finn.transformation.fold_constants import FoldConstants
-from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
+from finn.transformation.fpgadataflow.create_dataflow_partition import (
+    CreateDataflowPartition,
+)
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
 from finn.transformation.general import (
     GiveReadableTensorNames,
     GiveUniqueNodeNames,
     GiveUniqueParameterTensors,
     RemoveUnusedTensors,
 )
-from finn.transformation.merge_onnx_models import MergeONNXModels
+from finn.transformation.infer_data_layouts import InferDataLayouts
+from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.insert_topk import InsertTopK
-import finn.transformation.streamline.absorb as absorb
-import finn.transformation.streamline.reorder as reorder
+from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
+from finn.transformation.merge_onnx_models import MergeONNXModels
+from finn.transformation.remove import RemoveIdentityOps
 from finn.transformation.streamline import Streamline
-from finn.transformation.double_to_single_float import DoubleToSingleFloat
-from finn.transformation.streamline.remove import RemoveIdentityOps
 from finn.transformation.streamline.collapse_repeated import CollapseRepeatedMul
-from finn.transformation.change_datalayout import ChangeDataLayoutQuantAvgPool2d
 from finn.transformation.streamline.round_thresholds import RoundAndClipThresholds
-from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
-import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
-from finn.transformation.fpgadataflow.create_dataflow_partition import (
-    CreateDataflowPartition,
+from finn.util.basic import alveo_default_platform, alveo_part_map
+from finn.util.pytorch import NormalizePreProc
+from finn.util.test import (
+    crop_center,
+    get_test_model_trained,
+    load_test_checkpoint_or_skip,
+    resize_smaller_side,
 )
-from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
-from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
-from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
-from finn.core.onnx_exec import execute_onnx
-from finn.util.basic import alveo_part_map, alveo_default_platform
 
 build_dir = os.environ["FINN_BUILD_DIR"]
 
@@ -99,7 +97,9 @@ def test_end2end_mobilenet_export():
     bo.export_finn_onnx(preproc, (1, 3, 224, 224), preproc_onnx)
     preproc_model = ModelWrapper(preproc_onnx)
     # set input finn datatype to UINT8
-    preproc_model.set_tensor_datatype(preproc_model.graph.input[0].name, DataType.UINT8)
+    preproc_model.set_tensor_datatype(
+        preproc_model.graph.input[0].name, DataType["UINT8"]
+    )
     preproc_model = preproc_model.transform(InferShapes())
     preproc_model = preproc_model.transform(FoldConstants())
     preproc_model = preproc_model.transform(GiveUniqueNodeNames())
@@ -200,6 +200,7 @@ def test_end2end_mobilenet_lowering():
     )
     model = model.transform(LowerConvsToMatMul())
     model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold())
+    model = model.transform(absorb.AbsorbConsecutiveTransposes())
     model = model.transform(GiveUniqueNodeNames())
     model = model.transform(GiveReadableTensorNames())
     model = model.transform(InferDataTypes())
diff --git a/tests/end2end/test_ext_weights.py b/tests/end2end/test_ext_weights.py
index aa0ce7a6c6..550dab4d03 100644
--- a/tests/end2end/test_ext_weights.py
+++ b/tests/end2end/test_ext_weights.py
@@ -26,16 +26,19 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import finn.builder.build_dataflow as build
-import finn.builder.build_dataflow_config as build_cfg
+import pkg_resources as pk
+
+import pytest
+
 import os
 import shutil
-from finn.util.test import get_build_env, load_test_checkpoint_or_skip
-import pytest
-from finn.util.basic import make_build_dir
-import pkg_resources as pk
-import wget
 import subprocess
+import wget
+
+import finn.builder.build_dataflow as build
+import finn.builder.build_dataflow_config as build_cfg
+from finn.util.basic import make_build_dir
+from finn.util.test import get_build_env, load_test_checkpoint_or_skip
 
 target_clk_ns = 10
 build_kind = "zynq"
diff --git a/tests/fpgadataflow/test_code_gen_trafo.py b/tests/fpgadataflow/test_code_gen_trafo.py
index cf3e064804..5ddff3d36f 100644
--- a/tests/fpgadataflow/test_code_gen_trafo.py
+++ b/tests/fpgadataflow/test_code_gen_trafo.py
@@ -26,10 +26,11 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import os
+import pytest
 
+import os
 from onnx import TensorProto, helper
-import pytest
+
 import finn.util.basic as util
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
@@ -38,7 +39,7 @@
 
 @pytest.mark.vivado
 def test_code_gen_trafo():
-    idt = wdt = odt = DataType.BIPOLAR
+    idt = wdt = odt = DataType["BIPOLAR"]
     mw = 8
     mh = 8
     pe = 4
diff --git a/tests/fpgadataflow/test_compilation_trafo.py b/tests/fpgadataflow/test_compilation_trafo.py
index a12c69285b..81e2ff9a7c 100644
--- a/tests/fpgadataflow/test_compilation_trafo.py
+++ b/tests/fpgadataflow/test_compilation_trafo.py
@@ -26,21 +26,21 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import os
+import pytest
 
+import os
 from onnx import TensorProto, helper
 
-import pytest
 import finn.util.basic as util
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
 
 
 @pytest.mark.vivado
 def test_compilation_trafo():
-    idt = wdt = odt = DataType.BIPOLAR
+    idt = wdt = odt = DataType["BIPOLAR"]
     mw = 8
     mh = 8
     pe = 4
diff --git a/tests/fpgadataflow/test_convert_to_hls_1d_conv_layer.py b/tests/fpgadataflow/test_convert_to_hls_1d_conv_layer.py
index dfdb21fa72..5cc5f8fa6c 100644
--- a/tests/fpgadataflow/test_convert_to_hls_1d_conv_layer.py
+++ b/tests/fpgadataflow/test_convert_to_hls_1d_conv_layer.py
@@ -26,30 +26,29 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-from onnx import TensorProto, helper
-import numpy as np
 import pytest
 
-from finn.core.datatype import DataType
-from finn.transformation.infer_shapes import InferShapes
-from finn.transformation.infer_datatypes import InferDataTypes
-from finn.transformation.general import GiveUniqueNodeNames
-from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
+import numpy as np
+from onnx import TensorProto, helper
 
-from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
-from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
-from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 import finn.core.onnx_exec as oxe
-from finn.core.modelwrapper import ModelWrapper
-from finn.util.basic import gen_finn_dt_tensor
 import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
-
-from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
-from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
-from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
+from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
+from finn.core.datatype import DataType
+from finn.core.modelwrapper import ModelWrapper
 from finn.custom_op.general.im2col import compute_conv_output_dim
 from finn.custom_op.registry import getCustomOp
-from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
+from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
+from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
+from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
+from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
+from finn.transformation.general import GiveUniqueNodeNames
+from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
+from finn.util.basic import gen_finn_dt_tensor
 
 
 # conv_config:
@@ -73,7 +72,7 @@
 def test_convert_to_hls_1d_conv_layer(conv_config, depthwise, exec_mode):
     pad, kernel_size, stride, dilation = conv_config
     np.random.seed(0)
-    idt = DataType.UINT4
+    idt = DataType["UINT4"]
 
     in_feature_dim_h, in_feature_dim_w = [10, 1]
     in_chn = 16
@@ -102,7 +101,7 @@ def test_convert_to_hls_1d_conv_layer(conv_config, depthwise, exec_mode):
     input_shape = [1, in_chn, in_feature_dim_h, in_feature_dim_w]
     output_shape = [1, out_chn, out_feature_dim_h, out_feature_dim_w]
 
-    conv_weight_dt = DataType.UINT4
+    conv_weight_dt = DataType["UINT4"]
 
     conv_config = {}
     conv_config["dilations"] = [dilation_h, dilation_w]
diff --git a/tests/fpgadataflow/test_convert_to_hls_channelwise_layer.py b/tests/fpgadataflow/test_convert_to_hls_channelwise_layer.py
index 40f0a620c6..bf690d1d68 100644
--- a/tests/fpgadataflow/test_convert_to_hls_channelwise_layer.py
+++ b/tests/fpgadataflow/test_convert_to_hls_channelwise_layer.py
@@ -28,24 +28,23 @@
 
 import pytest
 
+import numpy as np
 from onnx import TensorProto, helper
 
 import finn.core.onnx_exec as oxe
+import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
-import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
-from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
-from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
-from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
-
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
-from finn.transformation.infer_data_layouts import InferDataLayouts
+from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
 from finn.transformation.general import GiveUniqueNodeNames
-from finn.util.basic import gen_finn_dt_tensor
+from finn.transformation.infer_data_layouts import InferDataLayouts
 from finn.transformation.infer_shapes import InferShapes
-import numpy as np
+from finn.util.basic import gen_finn_dt_tensor
 
 
 def prepare_inputs(input_tensor):
@@ -77,9 +76,13 @@ def make_single_maxpool_modelwrapper(onnx_op_name, ishape, idt, pdt, pshape):
 
 
 # parameter datatype
-@pytest.mark.parametrize("pdt", [DataType.BIPOLAR, DataType.UINT4, DataType.INT2])
+@pytest.mark.parametrize(
+    "pdt", [DataType["BIPOLAR"], DataType["UINT4"], DataType["INT2"]]
+)
 # input datatype
-@pytest.mark.parametrize("idt", [DataType.INT32, DataType.UINT4, DataType.INT4])
+@pytest.mark.parametrize(
+    "idt", [DataType["INT32"], DataType["UINT4"], DataType["INT4"]]
+)
 # function
 @pytest.mark.parametrize("onnx_op_name", ["Add", "Mul"])
 # vector parameter or scalar parameter (broadcast)
@@ -104,10 +107,10 @@ def test_convert_to_hls_channelwise_layer(
 
     # Since the aren't Data types with a bit width of a non power of 2,
     # there are cases where the input won't use it full range.
-    if idt == DataType.INT32:
-        x = gen_finn_dt_tensor(DataType.INT16, (1, ifm_ch, ifm_dim, ifm_dim))
-    elif idt == DataType.UINT32:
-        x = gen_finn_dt_tensor(DataType.UINT16, (1, ifm_ch, ifm_dim, ifm_dim))
+    if idt == DataType["INT32"]:
+        x = gen_finn_dt_tensor(DataType["INT16"], (1, ifm_ch, ifm_dim, ifm_dim))
+    elif idt == DataType["UINT32"]:
+        x = gen_finn_dt_tensor(DataType["UINT16"], (1, ifm_ch, ifm_dim, ifm_dim))
     else:
         x = gen_finn_dt_tensor(idt, (1, ifm_ch, ifm_dim, ifm_dim))
 
diff --git a/tests/fpgadataflow/test_convert_to_hls_conv_fc_transition.py b/tests/fpgadataflow/test_convert_to_hls_conv_fc_transition.py
index 8b7b0a4b6a..9b0f3d68ae 100755
--- a/tests/fpgadataflow/test_convert_to_hls_conv_fc_transition.py
+++ b/tests/fpgadataflow/test_convert_to_hls_conv_fc_transition.py
@@ -26,34 +26,30 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-from onnx import TensorProto, helper
-import numpy as np
 import pytest
 
-from finn.core.datatype import DataType
-from finn.transformation.infer_shapes import InferShapes
-from finn.transformation.infer_datatypes import InferDataTypes
-from finn.transformation.general import GiveUniqueNodeNames
-from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
+import numpy as np
+from onnx import TensorProto, helper
 
+import finn.core.data_layout as DataLayout
 import finn.core.onnx_exec as oxe
-from finn.core.modelwrapper import ModelWrapper
-from finn.util.basic import gen_finn_dt_tensor
 import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
-
-from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+import finn.transformation.streamline.absorb as absorb
+from finn.core.datatype import DataType
+from finn.core.modelwrapper import ModelWrapper
+from finn.custom_op.general.im2col import compute_conv_output_dim
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
 from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
-from finn.custom_op.general.im2col import compute_conv_output_dim
-
-import finn.transformation.streamline.absorb as absorb
-from finn.transformation.general import RemoveUnusedTensors
-from finn.transformation.streamline import Streamline
+from finn.transformation.general import GiveUniqueNodeNames, RemoveUnusedTensors
 from finn.transformation.infer_data_layouts import InferDataLayouts
+from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
 from finn.transformation.move_reshape import RemoveCNVtoFCFlatten
+from finn.transformation.streamline import Streamline
 from finn.transformation.streamline.reorder import MoveScalarLinearPastInvariants
-
-import finn.core.data_layout as DataLayout
+from finn.util.basic import gen_finn_dt_tensor
 
 
 def get_multithreshold_rand_params(channels, num_of_thres, seed=None):
@@ -83,10 +79,10 @@ def get_multithreshold_rand_params(channels, num_of_thres, seed=None):
 @pytest.mark.slow
 def test_convert_to_hls_conv_fc_transition(conv_config, depthwise, use_reshape):
     np.random.seed(0)
-    idt = DataType.UINT4
-    odt = DataType.UINT4
-    conv_weight_dt = DataType.INT4
-    fc_weight_dt = DataType.INT4
+    idt = DataType["UINT4"]
+    odt = DataType["UINT4"]
+    conv_weight_dt = DataType["INT4"]
+    fc_weight_dt = DataType["INT4"]
 
     input_shape, kernel_shape, stride, pad = conv_config
     kernel_size_h, kernel_size_w = kernel_shape
@@ -190,8 +186,8 @@ def test_convert_to_hls_conv_fc_transition(conv_config, depthwise, use_reshape):
     model.set_tensor_datatype("global_out", odt)
     model.set_tensor_datatype("conv_param", conv_weight_dt)
     model.set_tensor_datatype("matmul_param", fc_weight_dt)
-    model.set_tensor_datatype("thres1_param", DataType.INT32)
-    model.set_tensor_datatype("thres2_param", DataType.INT32)
+    model.set_tensor_datatype("thres1_param", DataType["INT32"])
+    model.set_tensor_datatype("thres2_param", DataType["INT32"])
 
     model.set_initializer(
         "conv_param", gen_finn_dt_tensor(conv_weight_dt, conv_param_shape)
diff --git a/tests/fpgadataflow/test_convert_to_hls_conv_layer.py b/tests/fpgadataflow/test_convert_to_hls_conv_layer.py
index d88576583e..d96bc98756 100644
--- a/tests/fpgadataflow/test_convert_to_hls_conv_layer.py
+++ b/tests/fpgadataflow/test_convert_to_hls_conv_layer.py
@@ -26,30 +26,29 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-from onnx import TensorProto, helper
-import numpy as np
 import pytest
 
-from finn.core.datatype import DataType
-from finn.transformation.infer_shapes import InferShapes
-from finn.transformation.infer_datatypes import InferDataTypes
-from finn.transformation.general import GiveUniqueNodeNames
-from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
+import numpy as np
+from onnx import TensorProto, helper
 
-from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
-from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
-from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 import finn.core.onnx_exec as oxe
-from finn.core.modelwrapper import ModelWrapper
-from finn.util.basic import gen_finn_dt_tensor
 import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
-
-from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
-from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
-from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
+from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
+from finn.core.datatype import DataType
+from finn.core.modelwrapper import ModelWrapper
 from finn.custom_op.general.im2col import compute_conv_output_dim
 from finn.custom_op.registry import getCustomOp
-from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
+from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
+from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
+from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
+from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
+from finn.transformation.general import GiveUniqueNodeNames
+from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
+from finn.util.basic import gen_finn_dt_tensor
 
 # conv_config  kernel_size,stride, pad
 
@@ -64,7 +63,7 @@
 def test_convert_to_hls_conv_layer(conv_config, depthwise, exec_mode):
     kernel_size, stride, pad = conv_config
     np.random.seed(0)
-    idt = DataType.UINT4
+    idt = DataType["UINT4"]
 
     in_feature_dim = 7
     in_chn = 16
@@ -85,7 +84,7 @@ def test_convert_to_hls_conv_layer(conv_config, depthwise, exec_mode):
     input_shape = [1, in_chn, in_feature_dim, in_feature_dim]
     output_shape = [1, out_chn, out_feature_dim, out_feature_dim]
 
-    conv_weight_dt = DataType.UINT4
+    conv_weight_dt = DataType["UINT4"]
 
     conv_config = {}
     conv_config["dilations"] = [1, 1]
diff --git a/tests/fpgadataflow/test_convert_to_hls_layers_cnv.py b/tests/fpgadataflow/test_convert_to_hls_layers_cnv.py
index 15bf160799..3357ee6d6c 100644
--- a/tests/fpgadataflow/test_convert_to_hls_layers_cnv.py
+++ b/tests/fpgadataflow/test_convert_to_hls_layers_cnv.py
@@ -26,29 +26,31 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import os
 import pkg_resources as pk
 
+import pytest
+
 import brevitas.onnx as bo
 import numpy as np
-import pytest
+import os
+
 import finn.core.onnx_exec as oxe
+import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
 import finn.transformation.streamline.absorb as absorb
-from finn.transformation.streamline.reorder import MakeMaxPoolNHWC
 from finn.core.modelwrapper import ModelWrapper
+from finn.custom_op.registry import getCustomOp
+from finn.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount
 from finn.transformation.fold_constants import FoldConstants
+from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
 from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
-from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.infer_data_layouts import InferDataLayouts
+from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
 from finn.transformation.streamline import Streamline
+from finn.transformation.streamline.reorder import MakeMaxPoolNHWC
 from finn.util.test import get_test_model_trained
-from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
-from finn.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount
-import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
-from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
-from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
-from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
-from finn.custom_op.registry import getCustomOp
 
 export_onnx_path_cnv = "test_convert_to_hls_layers_cnv.onnx"
 
@@ -68,6 +70,7 @@ def test_convert_to_hls_layers_cnv_w1a1(fused_activation):
     model = model.transform(LowerConvsToMatMul())
     model = model.transform(MakeMaxPoolNHWC())
     model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold())
+    model = model.transform(absorb.AbsorbConsecutiveTransposes())
     model = model.transform(ConvertBipolarMatMulToXnorPopcount())
     model = model.transform(Streamline())
     model = model.transform(InferDataLayouts())
diff --git a/tests/fpgadataflow/test_convert_to_hls_layers_fc.py b/tests/fpgadataflow/test_convert_to_hls_layers_fc.py
index cb66fa7237..a1dc11e0ee 100644
--- a/tests/fpgadataflow/test_convert_to_hls_layers_fc.py
+++ b/tests/fpgadataflow/test_convert_to_hls_layers_fc.py
@@ -26,15 +26,16 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import os
-from pkgutil import get_data
+import pytest
 
 import brevitas.onnx as bo
 import numpy as np
 import onnx
 import onnx.numpy_helper as nph
+import os
 import torch
-import pytest
+from pkgutil import get_data
+
 import finn.core.onnx_exec as oxe
 import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
 import finn.transformation.streamline.absorb as absorb
@@ -42,8 +43,8 @@
 from finn.custom_op.registry import getCustomOp
 from finn.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount
 from finn.transformation.fold_constants import FoldConstants
-from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
 from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
 from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
 from finn.transformation.infer_shapes import InferShapes
@@ -51,7 +52,6 @@
 from finn.transformation.streamline.round_thresholds import RoundAndClipThresholds
 from finn.util.test import get_test_model_trained
 
-
 export_onnx_path = "test_convert_to_hls_layers_fc.onnx"
 
 
diff --git a/tests/fpgadataflow/test_convert_to_hls_layers_synthetic.py b/tests/fpgadataflow/test_convert_to_hls_layers_synthetic.py
index 86875d2ac7..6089901566 100644
--- a/tests/fpgadataflow/test_convert_to_hls_layers_synthetic.py
+++ b/tests/fpgadataflow/test_convert_to_hls_layers_synthetic.py
@@ -26,42 +26,43 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import os
-import numpy as np
+import pytest
 
+import numpy as np
+import os
 from onnx import TensorProto, helper
 
 import finn.core.onnx_exec as oxe
+import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.fold_constants import FoldConstants
+from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
 from finn.transformation.general import (
     GiveReadableTensorNames,
     GiveUniqueNodeNames,
     SortGraph,
 )
-from finn.transformation.streamline.reorder import MoveScalarLinearPastInvariants
-from finn.transformation.infer_shapes import InferShapes
-from finn.transformation.infer_datatypes import InferDataTypes
 from finn.transformation.infer_data_layouts import InferDataLayouts
-from finn.util.basic import gen_finn_dt_tensor
-from finn.util.test import soft_verify_topk
+from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.insert_topk import InsertTopK
-import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
-from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
-from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
-from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
 from finn.transformation.streamline.absorb import (
-    AbsorbScalarMulAddIntoTopK,
     AbsorbConsecutiveTransposes,
+    AbsorbScalarMulAddIntoTopK,
 )
 from finn.transformation.streamline.collapse_repeated import (
-    CollapseRepeatedMul,
     CollapseRepeatedAdd,
+    CollapseRepeatedMul,
 )
-from finn.transformation.streamline.reorder import MoveAddPastMul
-
-import pytest
+from finn.transformation.streamline.reorder import (
+    MoveAddPastMul,
+    MoveScalarLinearPastInvariants,
+)
+from finn.util.basic import gen_finn_dt_tensor
+from finn.util.test import soft_verify_topk
 
 export_onnx_path = "test_output_synthetic.onnx"
 
@@ -137,7 +138,7 @@ def make_model(ch, ifmdim):
 
 
 # data types
-@pytest.mark.parametrize("idt", [DataType.UINT2])
+@pytest.mark.parametrize("idt", [DataType["UINT2"]])
 # channels
 @pytest.mark.parametrize("ch", [16])
 # ifmdim
diff --git a/tests/fpgadataflow/test_convert_to_hls_pool_batch.py b/tests/fpgadataflow/test_convert_to_hls_pool_batch.py
index e8f3c3ae32..3efafc040d 100644
--- a/tests/fpgadataflow/test_convert_to_hls_pool_batch.py
+++ b/tests/fpgadataflow/test_convert_to_hls_pool_batch.py
@@ -28,23 +28,24 @@
 
 import pytest
 
-from onnx import TensorProto, helper
 import numpy as np
+from onnx import TensorProto, helper
+
 import finn.core.onnx_exec as oxe
+import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
+from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
-from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.custom_op.registry import getCustomOp
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
-from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
-import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
+from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
 from finn.transformation.general import GiveUniqueNodeNames
-from finn.custom_op.registry import getCustomOp
-from finn.util.basic import gen_finn_dt_tensor
 from finn.transformation.infer_shapes import InferShapes
-from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
+from finn.util.basic import gen_finn_dt_tensor
 
 
 def make_single_maxpool_modelwrapper(k, stride, pad, ifm_ch, ifm_dim, ofm_dim, idt):
@@ -117,9 +118,9 @@ def prepare_inputs(input_tensor):
 
 
 # input datatype
-@pytest.mark.parametrize("idt", [DataType.UINT4, DataType.INT4, DataType.INT8])
+@pytest.mark.parametrize("idt", [DataType["UINT4"], DataType["INT4"], DataType["INT8"]])
 # output datatype
-@pytest.mark.parametrize("odt", [DataType.UINT4, DataType.INT4])
+@pytest.mark.parametrize("odt", [DataType["UINT4"], DataType["INT4"]])
 # pool configuration:                   ( k,stride, pad, ifm_dim )
 @pytest.mark.parametrize("pool_config", [(7, 7, 0, 7), (3, 2, 1, 5)])
 # input channels
diff --git a/tests/fpgadataflow/test_depthwise_convolution.py b/tests/fpgadataflow/test_depthwise_convolution.py
index 3efeacb6e6..633db668d3 100644
--- a/tests/fpgadataflow/test_depthwise_convolution.py
+++ b/tests/fpgadataflow/test_depthwise_convolution.py
@@ -27,30 +27,29 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import pytest
+
+import numpy as np
 import onnx.helper as oh
 from onnx import TensorProto
-import numpy as np
 
-from finn.core.modelwrapper import ModelWrapper
+import finn.core.onnx_exec as oxe
 from finn.core.datatype import DataType
-from finn.transformation.infer_shapes import InferShapes
+from finn.core.modelwrapper import ModelWrapper
+from finn.custom_op.general.im2col import compute_conv_output_dim
+from finn.custom_op.registry import getCustomOp
+from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.convert_to_hls_layers import (
     InferConvInpGen,
     InferVVAU,
 )
+from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
-from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
-from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
-
-import finn.core.onnx_exec as oxe
-from finn.custom_op.general.im2col import compute_conv_output_dim
-from finn.util.basic import calculate_signed_dot_prod_range, gen_finn_dt_tensor
-from finn.custom_op.registry import getCustomOp
-
 from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
-from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
-from finn.transformation.general import GiveUniqueNodeNames
 from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
+from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
+from finn.transformation.general import GiveUniqueNodeNames
+from finn.transformation.infer_shapes import InferShapes
+from finn.util.basic import calculate_signed_dot_prod_range, gen_finn_dt_tensor
 
 
 def set_up_reference_model(act, idt, wdt, k, ifm_dim, ifm_ch, stride, padding):
@@ -61,14 +60,14 @@ def set_up_reference_model(act, idt, wdt, k, ifm_dim, ifm_ch, stride, padding):
     ofm_dim = compute_conv_output_dim(ifm_dim, k, stride, total_pad=total_pad)
 
     if act is None:
-        odt = DataType.INT32
+        odt = DataType["INT32"]
     else:
         odt = act
         out_act = oh.make_tensor_value_info(
             "out_act", TensorProto.FLOAT, [1, ofm_dim, ofm_dim, ofm_ch]
         )
         T = oh.make_tensor_value_info("T", TensorProto.FLOAT, [ofm_ch, 15])
-        tdt = DataType.INT32
+        tdt = DataType["INT32"]
         thresh_node = oh.make_node(
             "MultiThreshold",
             domain="finn.custom_op.general",
@@ -162,7 +161,7 @@ def set_up_reference_model(act, idt, wdt, k, ifm_dim, ifm_ch, stride, padding):
 # PE
 @pytest.mark.parametrize("pe", [1, 2, 4])
 # Output activation
-@pytest.mark.parametrize("act", [None, DataType.UINT4])
+@pytest.mark.parametrize("act", [None, DataType["UINT4"]])
 # kernel size
 @pytest.mark.parametrize("k", [2, 4])
 # stride
@@ -172,7 +171,7 @@ def set_up_reference_model(act, idt, wdt, k, ifm_dim, ifm_ch, stride, padding):
 @pytest.mark.slow
 @pytest.mark.vivado
 def test_depthwise_conv_hls_cppsim(act, pe, k, stride, padding):
-    idt = wdt = DataType.INT4
+    idt = wdt = DataType["INT4"]
     ifm_dim = 6
     ifm_ch = 4
 
@@ -204,7 +203,7 @@ def test_depthwise_conv_hls_cppsim(act, pe, k, stride, padding):
 # PE
 @pytest.mark.parametrize("pe", [1, 2, 4])
 # Output activation
-@pytest.mark.parametrize("act", [None, DataType.UINT4])
+@pytest.mark.parametrize("act", [None, DataType["UINT4"]])
 # kernel size
 @pytest.mark.parametrize("k", [2, 4])
 # stride
@@ -214,7 +213,7 @@ def test_depthwise_conv_hls_cppsim(act, pe, k, stride, padding):
 @pytest.mark.slow
 @pytest.mark.vivado
 def test_depthwise_conv_hls_rtlsim(act, pe, k, stride, padding):
-    idt = wdt = DataType.INT4
+    idt = wdt = DataType["INT4"]
     ifm_dim = 6
     ifm_ch = 4
 
diff --git a/tests/fpgadataflow/test_fpgadataflow_addstreams.py b/tests/fpgadataflow/test_fpgadataflow_addstreams.py
index 0fa156e23b..8cbf54ec18 100644
--- a/tests/fpgadataflow/test_fpgadataflow_addstreams.py
+++ b/tests/fpgadataflow/test_fpgadataflow_addstreams.py
@@ -27,23 +27,23 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import pytest
-import numpy as np
 
+import numpy as np
 from onnx import TensorProto, helper
 
 import finn.core.onnx_exec as oxe
+from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
-from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.custom_op.registry import getCustomOp
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
+from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
 from finn.transformation.general import GiveUniqueNodeNames
-from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 from finn.util.basic import gen_finn_dt_tensor
-from finn.custom_op.registry import getCustomOp
-from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
 
 
 def make_addstreams_modelwrapper(ch, pe, idt):
@@ -62,7 +62,10 @@ def make_addstreams_modelwrapper(ch, pe, idt):
         inputDataType=idt.name,
     )
     graph = helper.make_graph(
-        nodes=[addstreams_node], name="graph", inputs=[inp1, inp2], outputs=[outp],
+        nodes=[addstreams_node],
+        name="graph",
+        inputs=[inp1, inp2],
+        outputs=[outp],
     )
 
     model = helper.make_model(graph, producer_name="addstreams-model")
@@ -79,7 +82,7 @@ def prepare_inputs(input1, input2):
 
 
 # data types
-@pytest.mark.parametrize("idt", [DataType.UINT4, DataType.UINT8])
+@pytest.mark.parametrize("idt", [DataType["UINT4"], DataType["UINT8"]])
 # channels
 @pytest.mark.parametrize("ch", [1, 64])
 # folding
diff --git a/tests/fpgadataflow/test_fpgadataflow_channelwise_ops.py b/tests/fpgadataflow/test_fpgadataflow_channelwise_ops.py
index e45dfe07c3..949046d4ae 100644
--- a/tests/fpgadataflow/test_fpgadataflow_channelwise_ops.py
+++ b/tests/fpgadataflow/test_fpgadataflow_channelwise_ops.py
@@ -32,19 +32,19 @@
 from onnx import TensorProto, helper
 
 import finn.core.onnx_exec as oxe
+from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
 from finn.analysis.fpgadataflow.hls_synth_res_estimation import hls_synth_res_estimation
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
-from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.custom_op.registry import getCustomOp
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
+from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
 from finn.transformation.general import GiveUniqueNodeNames
-from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 from finn.util.basic import gen_finn_dt_tensor
-from finn.custom_op.registry import getCustomOp
-from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
 
 
 def make_modelwrapper(C, pe, idt, odt, pdt, func, vecs):
@@ -85,11 +85,11 @@ def make_modelwrapper(C, pe, idt, odt, pdt, func, vecs):
 
 
 # activation: None or DataType
-@pytest.mark.parametrize("act", [DataType.INT8])
+@pytest.mark.parametrize("act", [DataType["INT8"]])
 # input datatype
-@pytest.mark.parametrize("idt", [DataType.INT4])
+@pytest.mark.parametrize("idt", [DataType["INT4"]])
 # param datatype
-@pytest.mark.parametrize("pdt", [DataType.INT4])
+@pytest.mark.parametrize("pdt", [DataType["INT4"]])
 # folding, -1 is maximum possible
 @pytest.mark.parametrize("nf", [-1, 2])
 # number of input features
diff --git a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py
index 1ec12263e2..47cd7e7ba1 100644
--- a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py
+++ b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py
@@ -27,25 +27,24 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import pytest
-import numpy as np
 
+import numpy as np
 from onnx import TensorProto, helper
 
 import finn.core.onnx_exec as oxe
+from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
-from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.custom_op.registry import getCustomOp
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
-from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
+from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
 from finn.transformation.general import GiveUniqueNodeNames
 from finn.util.basic import gen_finn_dt_tensor
 
-from finn.custom_op.registry import getCustomOp
-from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
-
 
 def make_single_im2col_modelwrapper(
     k, ifm_ch, ifm_dim, ofm_dim, simd, stride, dilation, idt
@@ -132,7 +131,7 @@ def prepare_inputs(input_tensor):
 
 
 # input datatype
-@pytest.mark.parametrize("idt", [DataType.BIPOLAR, DataType.INT2])
+@pytest.mark.parametrize("idt", [DataType["BIPOLAR"], DataType["INT2"]])
 # kernel size
 @pytest.mark.parametrize("k", [2, 3])
 # input dimension
diff --git a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator1d.py b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator1d.py
index 6c83aab0d6..8440ac1fe4 100644
--- a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator1d.py
+++ b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator1d.py
@@ -27,26 +27,25 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import pytest
-import numpy as np
 
+import numpy as np
 from onnx import TensorProto, helper
 
 import finn.core.onnx_exec as oxe
+from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
-from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.custom_op.general.im2col import compute_conv_output_dim
+from finn.custom_op.registry import getCustomOp
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
-from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
+from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
 from finn.transformation.general import GiveUniqueNodeNames
 from finn.util.basic import gen_finn_dt_tensor
 
-from finn.custom_op.registry import getCustomOp
-from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
-from finn.custom_op.general.im2col import compute_conv_output_dim
-
 
 def make_single_im2col_modelwrapper(
     k, ifm_ch, ifm_dim, ofm_dim, simd, stride, dilation, idt
@@ -145,8 +144,8 @@ def prepare_inputs(input_tensor):
 
 
 # input datatype
-# @pytest.mark.parametrize("idt", [DataType.BIPOLAR, DataType.INT8])
-@pytest.mark.parametrize("idt", [DataType.INT8])
+# @pytest.mark.parametrize("idt", [DataType["BIPOLAR"], DataType["INT8"]])
+@pytest.mark.parametrize("idt", [DataType["INT8"]])
 # kernel size
 @pytest.mark.parametrize("k", [[4, 1]])
 # input dimension
diff --git a/tests/fpgadataflow/test_fpgadataflow_duplicatestreams.py b/tests/fpgadataflow/test_fpgadataflow_duplicatestreams.py
index 12505fdf45..73bf1165af 100644
--- a/tests/fpgadataflow/test_fpgadataflow_duplicatestreams.py
+++ b/tests/fpgadataflow/test_fpgadataflow_duplicatestreams.py
@@ -27,25 +27,25 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import pytest
-import numpy as np
 
+import numpy as np
 from onnx import TensorProto, helper
 
 import finn.core.onnx_exec as oxe
+from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.infer_shapes import InferShapes
-from finn.transformation.infer_datatypes import InferDataTypes
-from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
-from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.custom_op.registry import getCustomOp
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
+from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
 from finn.transformation.general import GiveUniqueNodeNames
-from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
+from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.infer_shapes import InferShapes
 from finn.util.basic import gen_finn_dt_tensor
-from finn.custom_op.registry import getCustomOp
-from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
 
 
 def make_dupstreams_modelwrapper(ch, pe, idim, idt):
@@ -85,7 +85,7 @@ def prepare_inputs(input_tensor, idt):
 
 
 # data type
-@pytest.mark.parametrize("idt", [DataType.INT4, DataType.UINT16])
+@pytest.mark.parametrize("idt", [DataType["INT4"], DataType["UINT16"]])
 # channels
 @pytest.mark.parametrize("ch", [64])
 # folding
diff --git a/tests/fpgadataflow/test_fpgadataflow_dwc.py b/tests/fpgadataflow/test_fpgadataflow_dwc.py
index 34930e672f..248b591eb4 100644
--- a/tests/fpgadataflow/test_fpgadataflow_dwc.py
+++ b/tests/fpgadataflow/test_fpgadataflow_dwc.py
@@ -30,15 +30,15 @@
 
 from onnx import TensorProto, helper
 
+import finn.core.onnx_exec as oxe
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
-from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
+from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
 from finn.transformation.general import GiveUniqueNodeNames
 from finn.util.basic import gen_finn_dt_tensor
-import finn.core.onnx_exec as oxe
 
 
 def make_single_dwc_modelwrapper(Shape, INWidth, OUTWidth, finn_dtype):
@@ -82,7 +82,7 @@ def prepare_inputs(input_tensor, dt):
 # outWidth
 @pytest.mark.parametrize("OUTWidth", [2, 4])
 # finn_dtype
-@pytest.mark.parametrize("finn_dtype", [DataType.BIPOLAR, DataType.INT2])
+@pytest.mark.parametrize("finn_dtype", [DataType["BIPOLAR"], DataType["INT2"]])
 @pytest.mark.slow
 @pytest.mark.vivado
 def test_fpgadataflow_dwc_rtlsim(Shape, INWidth, OUTWidth, finn_dtype):
diff --git a/tests/fpgadataflow/test_fpgadataflow_fclayer.py b/tests/fpgadataflow/test_fpgadataflow_fclayer.py
index 00f1ba5d59..02c3a3dc95 100644
--- a/tests/fpgadataflow/test_fpgadataflow_fclayer.py
+++ b/tests/fpgadataflow/test_fpgadataflow_fclayer.py
@@ -31,22 +31,22 @@
 import numpy as np
 from onnx import TensorProto, helper
 
-from finn.custom_op.registry import getCustomOp
 import finn.core.onnx_exec as oxe
 import finn.custom_op.general.xnorpopcount as xp
+from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
 from finn.analysis.fpgadataflow.hls_synth_res_estimation import hls_synth_res_estimation
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
 from finn.custom_op.general.multithreshold import multithreshold
-from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
-from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.custom_op.registry import getCustomOp
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
+from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
 from finn.transformation.general import GiveUniqueNodeNames
-from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 from finn.util.basic import calculate_signed_dot_prod_range, gen_finn_dt_tensor
-from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
 
 
 def make_single_fclayer_modelwrapper(W, pe, simd, wdt, idt, odt, T=None, tdt=None):
@@ -59,11 +59,11 @@ def make_single_fclayer_modelwrapper(W, pe, simd, wdt, idt, odt, T=None, tdt=Non
     # StreamingFC:
     # - specify their datatypes as such
     # - specify their datatypes as BINARY as use binaryXnorMode
-    if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR:
+    if wdt == DataType["BIPOLAR"] and idt == DataType["BIPOLAR"]:
         # we'll internally convert weights/inputs to binary and specify the
         # datatypes as such, and also set the binaryXnorMode attribute to 1
-        export_wdt = DataType.BINARY
-        export_idt = DataType.BINARY
+        export_wdt = DataType["BINARY"]
+        export_idt = DataType["BINARY"]
         binary_xnor_mode = 1
     else:
         export_wdt = wdt
@@ -75,7 +75,7 @@ def make_single_fclayer_modelwrapper(W, pe, simd, wdt, idt, odt, T=None, tdt=Non
     if T is not None:
         no_act = 0
         node_inp_list = ["inp", "weights", "thresh"]
-        if odt == DataType.BIPOLAR:
+        if odt == DataType["BIPOLAR"]:
             actval = 0
         else:
             actval = odt.min()
@@ -123,7 +123,7 @@ def make_single_fclayer_modelwrapper(W, pe, simd, wdt, idt, odt, T=None, tdt=Non
 
 
 def prepare_inputs(input_tensor, idt, wdt):
-    if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR:
+    if wdt == DataType["BIPOLAR"] and idt == DataType["BIPOLAR"]:
         # convert bipolar to binary
         return {"inp": (input_tensor + 1) / 2}
     else:
@@ -133,11 +133,11 @@ def prepare_inputs(input_tensor, idt, wdt):
 # mem_mode: const or decoupled
 @pytest.mark.parametrize("mem_mode", ["const", "decoupled", "external"])
 # activation: None or DataType
-@pytest.mark.parametrize("act", [None, DataType.BIPOLAR, DataType.INT4])
+@pytest.mark.parametrize("act", [None, DataType["BIPOLAR"], DataType["INT4"]])
 # weight datatype
-@pytest.mark.parametrize("wdt", [DataType.BIPOLAR, DataType.INT4])
+@pytest.mark.parametrize("wdt", [DataType["BIPOLAR"], DataType["INT4"]])
 # input datatype
-@pytest.mark.parametrize("idt", [DataType.BIPOLAR, DataType.INT4])
+@pytest.mark.parametrize("idt", [DataType["BIPOLAR"], DataType["INT4"]])
 # neuron folding, -1 is maximum possible
 @pytest.mark.parametrize("nf", [-1, 2, 1])
 # synapse folding, -1 is maximum possible
@@ -165,10 +165,10 @@ def test_fpgadataflow_fclayer_cppsim(mem_mode, idt, wdt, act, nf, sf, mw, mh):
         # no activation, produce accumulators
         T = None
         tdt = None
-        if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR:
-            odt = DataType.UINT32
+        if wdt == DataType["BIPOLAR"] and idt == DataType["BIPOLAR"]:
+            odt = DataType["UINT32"]
         else:
-            odt = DataType.INT32
+            odt = DataType["INT32"]
     else:
         odt = act
         (min, max) = calculate_signed_dot_prod_range(idt, wdt, mw)
@@ -177,13 +177,13 @@ def test_fpgadataflow_fclayer_cppsim(mem_mode, idt, wdt, act, nf, sf, mw, mh):
         # provide non-decreasing thresholds
         T = np.sort(T, axis=1)
         # generate thresholds for activation
-        if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR:
-            tdt = DataType.UINT32
+        if wdt == DataType["BIPOLAR"] and idt == DataType["BIPOLAR"]:
+            tdt = DataType["UINT32"]
             # bias thresholds to be positive
             T = np.ceil((T + mw) / 2)
             assert (T >= 0).all()
         else:
-            tdt = DataType.INT32
+            tdt = DataType["INT32"]
     model = make_single_fclayer_modelwrapper(W, pe, simd, wdt, idt, odt, T, tdt)
     for node in model.graph.node:
         # lookup op_type in registry of CustomOps
@@ -194,14 +194,14 @@ def test_fpgadataflow_fclayer_cppsim(mem_mode, idt, wdt, act, nf, sf, mw, mh):
     model = model.transform(CompileCppSim())
     # prepare input data
     input_dict = prepare_inputs(x, idt, wdt)
-    if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR:
+    if wdt == DataType["BIPOLAR"] and idt == DataType["BIPOLAR"]:
         # convert inputs to binary and use xnorpopcountmatmul
         y = xp.xnorpopcountmatmul((x + 1) / 2, (W + 1) / 2)
     else:
         y = np.matmul(x, W)
     if T is not None:
         y = multithreshold(y, T)
-        if act == DataType.BIPOLAR:
+        if act == DataType["BIPOLAR"]:
             # binary to bipolar
             y = 2 * y - 1
         else:
@@ -220,11 +220,11 @@ def test_fpgadataflow_fclayer_cppsim(mem_mode, idt, wdt, act, nf, sf, mw, mh):
 # mem_mode: const or decoupled
 @pytest.mark.parametrize("mem_mode", ["const", "decoupled", "external"])
 # activation: None or DataType
-@pytest.mark.parametrize("act", [None, DataType.BIPOLAR, DataType.INT4])
+@pytest.mark.parametrize("act", [None, DataType["BIPOLAR"], DataType["INT4"]])
 # weight datatype
-@pytest.mark.parametrize("wdt", [DataType.BIPOLAR, DataType.INT4])
+@pytest.mark.parametrize("wdt", [DataType["BIPOLAR"], DataType["INT4"]])
 # input datatype
-@pytest.mark.parametrize("idt", [DataType.BIPOLAR, DataType.INT4])
+@pytest.mark.parametrize("idt", [DataType["BIPOLAR"], DataType["INT4"]])
 # neuron folding, -1 is maximum possible
 @pytest.mark.parametrize("nf", [-1, 2, 1])
 # synapse folding, -1 is maximum possible
@@ -252,10 +252,10 @@ def test_fpgadataflow_fclayer_rtlsim(mem_mode, idt, wdt, act, nf, sf, mw, mh):
         # no activation, produce accumulators
         T = None
         tdt = None
-        if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR:
-            odt = DataType.UINT32
+        if wdt == DataType["BIPOLAR"] and idt == DataType["BIPOLAR"]:
+            odt = DataType["UINT32"]
         else:
-            odt = DataType.INT32
+            odt = DataType["INT32"]
     else:
         odt = act
         (min, max) = calculate_signed_dot_prod_range(idt, wdt, mw)
@@ -264,13 +264,13 @@ def test_fpgadataflow_fclayer_rtlsim(mem_mode, idt, wdt, act, nf, sf, mw, mh):
         # provide non-decreasing thresholds
         T = np.sort(T, axis=1)
         # generate thresholds for activation
-        if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR:
-            tdt = DataType.UINT32
+        if wdt == DataType["BIPOLAR"] and idt == DataType["BIPOLAR"]:
+            tdt = DataType["UINT32"]
             # bias thresholds to be positive
             T = np.ceil((T + mw) / 2)
             assert (T >= 0).all()
         else:
-            tdt = DataType.INT32
+            tdt = DataType["INT32"]
     model = make_single_fclayer_modelwrapper(W, pe, simd, wdt, idt, odt, T, tdt)
     for node in model.graph.node:
         # lookup op_type in registry of CustomOps
@@ -279,14 +279,14 @@ def test_fpgadataflow_fclayer_rtlsim(mem_mode, idt, wdt, act, nf, sf, mw, mh):
 
     # prepare input data
     input_dict = prepare_inputs(x, idt, wdt)
-    if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR:
+    if wdt == DataType["BIPOLAR"] and idt == DataType["BIPOLAR"]:
         # convert inputs to binary and use xnorpopcountmatmul
         y = xp.xnorpopcountmatmul((x + 1) / 2, (W + 1) / 2)
     else:
         y = np.matmul(x, W)
     if T is not None:
         y = multithreshold(y, T)
-        if act == DataType.BIPOLAR:
+        if act == DataType["BIPOLAR"]:
             # binary to bipolar
             y = 2 * y - 1
         else:
@@ -319,11 +319,11 @@ def test_fpgadataflow_fclayer_rtlsim(mem_mode, idt, wdt, act, nf, sf, mw, mh):
 # mem_mode: const or decoupled
 @pytest.mark.parametrize("mem_mode", ["decoupled"])
 # activation: None or DataType
-@pytest.mark.parametrize("act", [DataType.INT4])
+@pytest.mark.parametrize("act", [DataType["INT4"]])
 # weight datatype
-@pytest.mark.parametrize("wdt", [DataType.INT4])
+@pytest.mark.parametrize("wdt", [DataType["INT4"]])
 # input datatype
-@pytest.mark.parametrize("idt", [DataType.INT4])
+@pytest.mark.parametrize("idt", [DataType["INT4"]])
 # neuron folding, -1 is maximum possible
 @pytest.mark.parametrize("nf", [-1])
 # synapse folding, -1 is maximum possible
@@ -352,10 +352,10 @@ def test_fpgadataflow_fclayer_large_depth_decoupled_mode_rtlsim(
         # no activation, produce accumulators
         T = None
         tdt = None
-        if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR:
-            odt = DataType.UINT32
+        if wdt == DataType["BIPOLAR"] and idt == DataType["BIPOLAR"]:
+            odt = DataType["UINT32"]
         else:
-            odt = DataType.INT32
+            odt = DataType["INT32"]
     else:
         odt = act
         (min, max) = calculate_signed_dot_prod_range(idt, wdt, mw)
@@ -364,13 +364,13 @@ def test_fpgadataflow_fclayer_large_depth_decoupled_mode_rtlsim(
         # provide non-decreasing thresholds
         T = np.sort(T, axis=1)
         # generate thresholds for activation
-        if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR:
-            tdt = DataType.UINT32
+        if wdt == DataType["BIPOLAR"] and idt == DataType["BIPOLAR"]:
+            tdt = DataType["UINT32"]
             # bias thresholds to be positive
             T = np.ceil((T + mw) / 2)
             assert (T >= 0).all()
         else:
-            tdt = DataType.INT32
+            tdt = DataType["INT32"]
     model = make_single_fclayer_modelwrapper(W, pe, simd, wdt, idt, odt, T, tdt)
     for node in model.graph.node:
         # lookup op_type in registry of CustomOps
@@ -379,14 +379,14 @@ def test_fpgadataflow_fclayer_large_depth_decoupled_mode_rtlsim(
 
     # prepare input data
     input_dict = prepare_inputs(x, idt, wdt)
-    if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR:
+    if wdt == DataType["BIPOLAR"] and idt == DataType["BIPOLAR"]:
         # convert inputs to binary and use xnorpopcountmatmul
         y = xp.xnorpopcountmatmul((x + 1) / 2, (W + 1) / 2)
     else:
         y = np.matmul(x, W)
     if T is not None:
         y = multithreshold(y, T)
-        if act == DataType.BIPOLAR:
+        if act == DataType["BIPOLAR"]:
             # binary to bipolar
             y = 2 * y - 1
         else:
diff --git a/tests/fpgadataflow/test_fpgadataflow_fifo.py b/tests/fpgadataflow/test_fpgadataflow_fifo.py
index a603fc0664..4d3074fe14 100644
--- a/tests/fpgadataflow/test_fpgadataflow_fifo.py
+++ b/tests/fpgadataflow/test_fpgadataflow_fifo.py
@@ -27,19 +27,19 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import pytest
-import os
 
+import os
 from onnx import TensorProto, helper
+
+import finn.core.onnx_exec as oxe
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
+from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
-from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
 from finn.transformation.general import GiveUniqueNodeNames
 from finn.util.basic import gen_finn_dt_tensor
-import finn.core.onnx_exec as oxe
-
 
 build_dir = os.environ["FINN_BUILD_DIR"]
 test_fpga_part = "xc7z020clg400-1"
@@ -86,7 +86,7 @@ def prepare_inputs(input_tensor, dt):
 # outWidth
 @pytest.mark.parametrize("depth", [16])
 # finn_dtype
-@pytest.mark.parametrize("finn_dtype", [DataType.BIPOLAR])  # , DataType.INT2])
+@pytest.mark.parametrize("finn_dtype", [DataType["BIPOLAR"]])  # , DataType["INT2"]])
 @pytest.mark.slow
 @pytest.mark.vivado
 def test_fpgadataflow_fifo_rtlsim(Shape, folded_shape, depth, finn_dtype):
diff --git a/tests/fpgadataflow/test_fpgadataflow_fmpadding.py b/tests/fpgadataflow/test_fpgadataflow_fmpadding.py
index ab47b30013..b564273c09 100644
--- a/tests/fpgadataflow/test_fpgadataflow_fmpadding.py
+++ b/tests/fpgadataflow/test_fpgadataflow_fmpadding.py
@@ -27,26 +27,25 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import pytest
-import os
-import numpy as np
 
+import numpy as np
+import os
 from onnx import TensorProto, helper
+
+import finn.core.onnx_exec as oxe
+from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
-from finn.util.basic import gen_finn_dt_tensor
-import finn.core.onnx_exec as oxe
-from finn.transformation.infer_shapes import InferShapes
-from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
-from finn.transformation.general import GiveUniqueNodeNames
-from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.custom_op.registry import getCustomOp
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
-from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
-from finn.custom_op.registry import getCustomOp
-from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
-
-from finn.util.basic import pynq_part_map
+from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
+from finn.transformation.general import GiveUniqueNodeNames
+from finn.transformation.infer_shapes import InferShapes
+from finn.util.basic import gen_finn_dt_tensor, pynq_part_map
 
 test_pynq_board = os.getenv("PYNQ_BOARD", default="Pynq-Z1")
 test_fpga_part = pynq_part_map[test_pynq_board]
@@ -109,7 +108,7 @@ def make_single_fmpadding_modelwrapper(idim, padding, num_ch, simd, idt, pad_sty
 # PaddingStyle: selects behavior when (odim-idim)%2 != 0
 @pytest.mark.parametrize("pad_style", [2])
 # FINN input datatype
-@pytest.mark.parametrize("idt", [DataType.INT2, DataType.INT4])
+@pytest.mark.parametrize("idt", [DataType["INT2"], DataType["INT4"]])
 # execution mode
 @pytest.mark.parametrize("mode", ["cppsim", "rtlsim"])
 @pytest.mark.slow
diff --git a/tests/fpgadataflow/test_fpgadataflow_globalaccpool.py b/tests/fpgadataflow/test_fpgadataflow_globalaccpool.py
index 7fca91925a..2299cc6e8f 100644
--- a/tests/fpgadataflow/test_fpgadataflow_globalaccpool.py
+++ b/tests/fpgadataflow/test_fpgadataflow_globalaccpool.py
@@ -27,23 +27,23 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import pytest
-import numpy as np
 
+import numpy as np
 from onnx import TensorProto, helper
 
 import finn.core.onnx_exec as oxe
+from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
-from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.custom_op.registry import getCustomOp
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
+from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
 from finn.transformation.general import GiveUniqueNodeNames
-from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 from finn.util.basic import gen_finn_dt_tensor
-from finn.custom_op.registry import getCustomOp
-from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
 
 
 def make_accpool_modelwrapper(ch, pe, idim, idt):
@@ -78,7 +78,7 @@ def prepare_inputs(input_tensor, idt):
 
 
 # data type
-@pytest.mark.parametrize("idt", [DataType.UINT4, DataType.UINT16])
+@pytest.mark.parametrize("idt", [DataType["UINT4"], DataType["UINT16"]])
 # channels
 @pytest.mark.parametrize("ch", [64])
 # folding
@@ -127,7 +127,7 @@ def test_fpgadataflow_globalaccpool(idt, ch, fold, imdim, exec_mode):
         exp_cycles_dict = model.analysis(exp_cycles_per_layer)
         exp_cycles = exp_cycles_dict[node.name]
         # commented out, needs performance debug:
-        # test_fpgadataflow_globalaccpool[rtlsim-7-1-64-DataType.UINT4]
+        # test_fpgadataflow_globalaccpool[rtlsim-7-1-64-DataType["UINT4"]]
         # assert False where False =
         # <function isclose at 0x7eff26d5ca60>(50, 103, atol=(0.1 * 103))
         # assert np.isclose(exp_cycles, cycles_rtlsim, atol=0.1 * cycles_rtlsim)
diff --git a/tests/fpgadataflow/test_fpgadataflow_ipstitch.py b/tests/fpgadataflow/test_fpgadataflow_ipstitch.py
index 4fa780548a..a4e75f5254 100644
--- a/tests/fpgadataflow/test_fpgadataflow_ipstitch.py
+++ b/tests/fpgadataflow/test_fpgadataflow_ipstitch.py
@@ -26,41 +26,38 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import os
-
 import pytest
 
 import numpy as np
+import os
 from onnx import TensorProto, helper
 
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
 from finn.core.onnx_exec import execute_onnx
 from finn.custom_op.registry import getCustomOp
-from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
-from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP
 from finn.transformation.fpgadataflow.create_dataflow_partition import (
     CreateDataflowPartition,
 )
+from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP
+from finn.transformation.fpgadataflow.floorplan import Floorplan
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
+from finn.transformation.fpgadataflow.insert_iodma import InsertIODMA
 from finn.transformation.fpgadataflow.insert_tlastmarker import InsertTLastMarker
-from finn.transformation.fpgadataflow.make_deployment import DeployToPYNQ
+from finn.transformation.fpgadataflow.make_zynq_proj import ZynqBuild
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
+from finn.transformation.fpgadataflow.synth_ooc import SynthOutOfContext
+from finn.transformation.fpgadataflow.vitis_build import VitisBuild
 from finn.transformation.general import GiveUniqueNodeNames
+from finn.transformation.infer_data_layouts import InferDataLayouts
 from finn.util.basic import (
+    alveo_default_platform,
+    alveo_part_map,
     gen_finn_dt_tensor,
     pynq_part_map,
-    alveo_part_map,
-    alveo_default_platform,
 )
 from finn.util.pyverilator import pyverilate_stitched_ip
 from finn.util.test import load_test_checkpoint_or_skip
-from finn.transformation.fpgadataflow.synth_ooc import SynthOutOfContext
-from finn.transformation.infer_data_layouts import InferDataLayouts
-from finn.transformation.fpgadataflow.insert_iodma import InsertIODMA
-from finn.transformation.fpgadataflow.floorplan import Floorplan
-from finn.transformation.fpgadataflow.vitis_build import VitisBuild
-from finn.transformation.fpgadataflow.make_zynq_proj import ZynqBuild
-
 
 test_pynq_board = os.getenv("PYNQ_BOARD", default="Pynq-Z1")
 test_fpga_part = pynq_part_map[test_pynq_board]
@@ -71,9 +68,9 @@
 def create_one_fc_model(mem_mode="const"):
     # create a model with a StreamingFCLayer instance with no activation
     # the wider range of the full accumulator makes debugging a bit easier
-    wdt = DataType.INT2
-    idt = DataType.INT32
-    odt = DataType.INT32
+    wdt = DataType["INT2"]
+    idt = DataType["INT32"]
+    odt = DataType["INT32"]
     m = 4
     no_act = 1
     binary_xnor_mode = 0
@@ -124,9 +121,9 @@ def create_one_fc_model(mem_mode="const"):
 
 def create_two_fc_model(mem_mode="decoupled"):
     # create a model with two StreamingFCLayer instances
-    wdt = DataType.INT2
-    idt = DataType.INT32
-    odt = DataType.INT32
+    wdt = DataType["INT2"]
+    idt = DataType["INT32"]
+    odt = DataType["INT32"]
     m = 4
     actval = 0
     no_act = 1
@@ -365,11 +362,6 @@ def test_fpgadataflow_ipstitch_zynqbuild(board):
         assert sdp_node.__class__.__name__ == "StreamingDataflowPartition"
         assert os.path.isfile(sdp_node.get_nodeattr("model"))
         model = load_test_checkpoint_or_skip(sdp_node.get_nodeattr("model"))
-    # generate inputs for remote exec
-    iname = "inp"
-    idt = model.get_tensor_datatype(iname)
-    ishape = model.get_tensor_shape(iname)
-    x = gen_finn_dt_tensor(idt, ishape)
     # bitfile using ZynqBuild
     model = model.transform(ZynqBuild(board, 10))
     model.save(ip_stitch_model_dir + "/test_fpgadataflow_ipstitch_customzynq.onnx")
@@ -377,22 +369,3 @@ def test_fpgadataflow_ipstitch_zynqbuild(board):
     bitfile_name = model.get_metadata_prop("bitfile")
     assert bitfile_name is not None
     assert os.path.isfile(bitfile_name)
-    # deployment
-    try:
-        ip = os.environ["PYNQ_IP"]  # no default for this one; skip if not defined
-        if ip == "":
-            pytest.skip("PYNQ board IP address not specified")
-        username = os.getenv("PYNQ_USERNAME", "xilinx")
-        password = os.getenv("PYNQ_PASSWORD", "xilinx")
-        port = os.getenv("PYNQ_PORT", 22)
-        target_dir = os.getenv("PYNQ_TARGET_DIR", "/home/xilinx/finn")
-        model = model.transform(DeployToPYNQ(ip, port, username, password, target_dir))
-        deployment_dir = model.get_metadata_prop("pynq_deploy_dir")
-        assert deployment_dir is not None
-        assert os.path.isdir(deployment_dir)
-        # remote exec
-        input_dict = {"global_in": x}
-        outp = execute_onnx(model, input_dict)
-        assert np.isclose(outp["global_out"], x).all()
-    except KeyError:
-        pytest.skip("PYNQ board IP address not specified")
diff --git a/tests/fpgadataflow/test_fpgadataflow_labelselect.py b/tests/fpgadataflow/test_fpgadataflow_labelselect.py
index 5d496dbb33..8ed06c8bdf 100644
--- a/tests/fpgadataflow/test_fpgadataflow_labelselect.py
+++ b/tests/fpgadataflow/test_fpgadataflow_labelselect.py
@@ -27,20 +27,20 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import pytest
-import numpy as np
 
+import numpy as np
 from onnx import TensorProto, helper
 
 import finn.core.onnx_exec as oxe
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
-from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
+from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
 from finn.transformation.general import GiveUniqueNodeNames
-from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 from finn.util.basic import gen_finn_dt_tensor
 from finn.util.test import soft_verify_topk
 
@@ -61,7 +61,10 @@ def make_labelselect_modelwrapper(labels, pe, k, idt):
         inputDataType=idt.name,
     )
     graph = helper.make_graph(
-        nodes=[labelselect_node], name="graph", inputs=[inp], outputs=[outp],
+        nodes=[labelselect_node],
+        name="graph",
+        inputs=[inp],
+        outputs=[outp],
     )
 
     model = helper.make_model(graph, producer_name="thresholding-model")
@@ -78,7 +81,9 @@ def prepare_inputs(input_tensor, idt):
     return {"inp": input_tensor}
 
 
-@pytest.mark.parametrize("idt", [DataType.UINT8, DataType.UINT16, DataType.INT16])
+@pytest.mark.parametrize(
+    "idt", [DataType["UINT8"], DataType["UINT16"], DataType["INT16"]]
+)
 # labels
 @pytest.mark.parametrize("labels", [10, 100])
 # folding
diff --git a/tests/fpgadataflow/test_fpgadataflow_lookup.py b/tests/fpgadataflow/test_fpgadataflow_lookup.py
new file mode 100644
index 0000000000..45678bbdf2
--- /dev/null
+++ b/tests/fpgadataflow/test_fpgadataflow_lookup.py
@@ -0,0 +1,132 @@
+# Copyright (c) 2021, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import pytest
+
+import numpy as np
+import torch
+from brevitas.export import FINNManager
+from torch import nn
+
+from finn.core.datatype import DataType
+from finn.core.modelwrapper import ModelWrapper
+from finn.core.onnx_exec import execute_onnx
+from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
+from finn.transformation.fpgadataflow.convert_to_hls_layers import InferLookupLayer
+from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
+from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
+from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
+from finn.transformation.general import GiveUniqueNodeNames
+from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.infer_shapes import InferShapes
+from finn.util.basic import gen_finn_dt_tensor
+
+
+def make_lookup_model(embeddings, ishape, idt, edt):
+    num_embeddings, embedding_dim = embeddings.shape
+
+    class LookupModel(nn.Module):
+        def __init__(self, num_embeddings, embedding_dim):
+            super().__init__()
+            self.lookup = nn.Embedding(
+                num_embeddings=num_embeddings, embedding_dim=embedding_dim
+            )
+
+        def forward(self, x):
+            x = self.lookup(x)
+            return x
+
+    torch_model = LookupModel(num_embeddings, embedding_dim)
+    input_t = torch.zeros(ishape, dtype=torch.int64)
+    ret = FINNManager.export(torch_model, input_t=input_t, opset_version=11)
+    model = ModelWrapper(ret)
+    iname = model.graph.input[0].name
+    ename = model.graph.node[0].input[0]
+    model.set_tensor_datatype(iname, idt)
+    eshape = model.get_tensor_shape(ename)
+    assert tuple(eshape) == embeddings.shape
+    model.set_initializer(ename, embeddings)
+    model.set_tensor_datatype(ename, edt)
+    model = model.transform(InferShapes())
+    model = model.transform(InferDataTypes())
+    return model
+
+
+# embedding DataType
+@pytest.mark.parametrize("edt", [DataType["FIXED<8,2>"]])
+# other embedding config
+@pytest.mark.parametrize(
+    "embedding_cfg", [(130, DataType["UINT8"], 25), (5145, DataType["UINT16"], 20)]
+)
+# execution mode
+@pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"])
+@pytest.mark.vivado
+@pytest.mark.slow
+def test_fpgadataflow_lookup(edt, embedding_cfg, exec_mode):
+    ishape = (1, 10)
+    num_embeddings, idt, embedding_dim = embedding_cfg
+    eshape = (num_embeddings, embedding_dim)
+    exp_oshape = tuple(list(ishape) + [embedding_dim])
+    embeddings = gen_finn_dt_tensor(edt, eshape)
+    model = make_lookup_model(embeddings, ishape, idt, edt)
+    assert len(model.graph.node) == 1
+    assert model.graph.node[0].op_type == "Gather"
+    iname = model.graph.input[0].name
+    ename = model.graph.node[0].input[0]
+    oname = model.graph.output[0].name
+    assert model.get_tensor_datatype(iname) == idt
+    assert model.get_tensor_datatype(ename) == edt
+    assert model.get_tensor_datatype(oname) == edt
+    assert tuple(model.get_tensor_shape(ename)) == eshape
+    assert tuple(model.get_tensor_shape(oname)) == exp_oshape
+    assert (model.get_initializer(ename) == embeddings).all()
+    itensor = gen_finn_dt_tensor(idt, ishape).astype(np.int64)
+    itensor = np.clip(itensor, 0, num_embeddings - 1)
+    ret = execute_onnx(model, {iname: itensor})
+    exp_out = np.take(embeddings, itensor, axis=0)
+    assert (exp_out == ret[oname]).all()
+    # call transformation to convert to HLS and verify conversion
+    model = model.transform(InferLookupLayer())
+    assert model.graph.node[0].op_type == "Lookup"
+    assert model.graph.node[0].input[0] == iname
+    assert model.graph.node[0].input[1] == ename
+    assert model.graph.node[0].output[0] == oname
+    if exec_mode == "cppsim":
+        model = model.transform(PrepareCppSim())
+        model = model.transform(CompileCppSim())
+        model = model.transform(SetExecMode("cppsim"))
+    elif exec_mode == "rtlsim":
+        model = model.transform(GiveUniqueNodeNames())
+        model = model.transform(PrepareIP("xc7z020clg400-1", 10))
+        model = model.transform(HLSSynthIP())
+        model = model.transform(SetExecMode("rtlsim"))
+        model = model.transform(PrepareRTLSim())
+    ret_sim = execute_onnx(model, {iname: itensor})
+    assert (exp_out == ret_sim[oname]).all()
diff --git a/tests/fpgadataflow/test_fpgadataflow_res_estimate.py b/tests/fpgadataflow/test_fpgadataflow_res_estimate.py
index 9def746c1c..fe52a73fc0 100644
--- a/tests/fpgadataflow/test_fpgadataflow_res_estimate.py
+++ b/tests/fpgadataflow/test_fpgadataflow_res_estimate.py
@@ -54,9 +54,9 @@ def test_res_estimate():
     mw = mh = 4
     simd = 1
     pe = 1
-    idt = DataType.INT2
-    wdt = DataType.INT2
-    odt = DataType.INT2
+    idt = DataType["INT2"]
+    wdt = DataType["INT2"]
+    odt = DataType["INT2"]
     actval = odt.min()
 
     inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, mw])
diff --git a/tests/fpgadataflow/test_fpgadataflow_thresholding.py b/tests/fpgadataflow/test_fpgadataflow_thresholding.py
index bbc7e8227d..341bd3f370 100644
--- a/tests/fpgadataflow/test_fpgadataflow_thresholding.py
+++ b/tests/fpgadataflow/test_fpgadataflow_thresholding.py
@@ -29,27 +29,28 @@
 import pytest
 
 import numpy as np
+import os
 from onnx import TensorProto, helper
 
 import finn.core.onnx_exec as oxe
+from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
 from finn.analysis.fpgadataflow.hls_synth_res_estimation import hls_synth_res_estimation
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
+from finn.core.rtlsim_exec import rtlsim_exec
 from finn.custom_op.general.multithreshold import multithreshold
-from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
-from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.custom_op.registry import getCustomOp
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
+from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
+from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
+from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
 from finn.transformation.general import GiveUniqueNodeNames
-from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 from finn.util.basic import gen_finn_dt_tensor
-from finn.custom_op.registry import getCustomOp
-from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
-import os
 from finn.util.pyverilator import axilite_read, axilite_write
-from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP
-from finn.core.rtlsim_exec import rtlsim_exec
 
 test_fpga_part = "xc7z020clg400-1"
 target_clk_ns = 5
@@ -97,9 +98,9 @@ def make_single_thresholding_modelwrapper(T, pe, idt, odt, actval, mem_mode):
 
 
 # activation: None or DataType
-@pytest.mark.parametrize("act", [DataType.INT4, DataType.BIPOLAR])
+@pytest.mark.parametrize("act", [DataType["INT4"], DataType["BIPOLAR"]])
 # input datatype
-@pytest.mark.parametrize("idt", [DataType.INT16, DataType.UINT16])
+@pytest.mark.parametrize("idt", [DataType["INT16"], DataType["UINT16"]])
 # folding, -1 is maximum possible
 @pytest.mark.parametrize("nf", [-1, 2, 1])
 # number of input features
@@ -124,12 +125,12 @@ def test_fpgadataflow_thresholding(idt, act, nf, ich, exec_mode, mem_mode):
     T = np.random.randint(idt.min(), idt.max() + 1, (ich, n_steps)).astype(np.float32)
     # make the vivado_hls threshold bug appear (incorrect rtlsim result when first
     # threshold of first channel is zero, while using BIPOLAR output)
-    if act == DataType.BIPOLAR:
+    if act == DataType["BIPOLAR"]:
         T[0][0] = 0
     # provide non-decreasing thresholds
     T = np.sort(T, axis=1)
 
-    if odt == DataType.BIPOLAR:
+    if odt == DataType["BIPOLAR"]:
         actval = 0
     else:
         actval = odt.min()
@@ -153,7 +154,7 @@ def test_fpgadataflow_thresholding(idt, act, nf, ich, exec_mode, mem_mode):
     input_dict = {"inp": x}
 
     y = multithreshold(x, T)
-    if act == DataType.BIPOLAR:
+    if act == DataType["BIPOLAR"]:
         # binary to bipolar
         y = 2 * y - 1
     else:
@@ -185,8 +186,8 @@ def test_fpgadataflow_thresholding(idt, act, nf, ich, exec_mode, mem_mode):
 @pytest.mark.vivado
 def test_runtime_thresholds_single_layer():
     mem_mode = "decoupled"
-    act = DataType.INT4
-    idt = DataType.INT16
+    act = DataType["INT4"]
+    idt = DataType["INT16"]
     nf = 8
     ich = 16
     pe = ich // nf
@@ -201,7 +202,7 @@ def test_runtime_thresholds_single_layer():
     # provide non-decreasing thresholds
     T = np.sort(T, axis=1)
 
-    if odt == DataType.BIPOLAR:
+    if odt == DataType["BIPOLAR"]:
         actval = 0
     else:
         actval = odt.min()
@@ -216,6 +217,7 @@ def test_runtime_thresholds_single_layer():
     old_weight_stream = map(lambda x: int(x, 16), old_weight_stream.split("\n"))
     old_weight_stream = list(old_weight_stream)
     # need to create stitched IP for runtime weight testing
+    model = model.transform(InsertFIFO(True))
     model = model.transform(GiveUniqueNodeNames())
     model = model.transform(PrepareIP(test_fpga_part, target_clk_ns))
     model = model.transform(HLSSynthIP())
@@ -243,7 +245,7 @@ def read_weights(sim):
     # old weights (see above)
     y = exec_ctx["outp"][1]
     expected = multithreshold(in_tensor, T)[1]
-    if act == DataType.BIPOLAR:
+    if act == DataType["BIPOLAR"]:
         # binary to bipolar
         expected = 2 * expected - 1
     else:
@@ -272,7 +274,7 @@ def write_weights(sim):
     rtlsim_exec(model, exec_ctx, pre_hook=write_weights)
     y = exec_ctx["outp"][1]
     expected = multithreshold(in_tensor, new_weights)[1]
-    if act == DataType.BIPOLAR:
+    if act == DataType["BIPOLAR"]:
         # binary to bipolar
         expected = 2 * expected - 1
     else:
diff --git a/tests/fpgadataflow/test_fpgadataflow_upsampler.py b/tests/fpgadataflow/test_fpgadataflow_upsampler.py
new file mode 100644
index 0000000000..1709cfe329
--- /dev/null
+++ b/tests/fpgadataflow/test_fpgadataflow_upsampler.py
@@ -0,0 +1,201 @@
+# Copyright (c) 2020, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import pytest
+
+import numpy as np
+import os
+import torch
+from brevitas.export import FINNManager
+from torch import nn
+
+import finn.core.onnx_exec as oxe
+import finn.transformation.streamline.absorb as absorb
+from finn.core.datatype import DataType
+from finn.core.modelwrapper import ModelWrapper
+from finn.transformation.base import Transformation
+from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
+from finn.transformation.fpgadataflow.convert_to_hls_layers import InferUpsample
+from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
+from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
+from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
+from finn.transformation.general import GiveUniqueNodeNames
+from finn.transformation.infer_data_layouts import InferDataLayouts
+from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.make_input_chanlast import MakeInputChannelsLast
+
+tmpdir = os.environ["FINN_BUILD_DIR"]
+
+
+class ForceDataTypeForTensors(Transformation):
+    """
+    Forces a certain datatype for all tensors in a model.
+    """
+
+    def __init__(self, dType=DataType["INT8"]):
+        super().__init__()
+        self._dType = dType
+
+    def apply(self, model):
+        graph = model.graph
+        for n in graph.node:
+            for inp in n.input:
+                model.set_tensor_datatype(inp, self._dType)
+            for inp in n.output:
+                model.set_tensor_datatype(inp, self._dType)
+
+        return model, False
+
+
+_to_chan_last_args = (0, 2, 3, 1)
+_to_chan_first_args = (0, 3, 1, 2)
+
+
+class TransposeUpsampleIO(Transformation):
+    """
+    Converts the inputs outputs for all Upsample and Resize nodes
+    from NCHW to NHWC.
+    """
+
+    def apply(self, model):
+        graph = model.graph
+        for n in graph.node:
+            if n.op_type == "Upsample" or n.op_type == "Resize":
+                # Set input shape
+                inp = n.input[0]
+                NCHW_shape = model.get_tensor_shape(inp)
+                NHWC_shape = [NCHW_shape[idx] for idx in _to_chan_last_args]
+                model.set_tensor_shape(inp, NHWC_shape)
+                # Set output shape
+                out = n.output[0]
+                NCHW_shape = model.get_tensor_shape(out)
+                NHWC_shape = [NCHW_shape[idx] for idx in _to_chan_last_args]
+                model.set_tensor_shape(out, NHWC_shape)
+        return model, False
+
+
+class PyTorchTestModel(nn.Module):
+    def __init__(self, upscale_factor=2):
+        super(PyTorchTestModel, self).__init__()
+        self.m = nn.Upsample(
+            scale_factor=upscale_factor,
+            mode="nearest",
+        )
+
+    def forward(self, x):
+        x = self.m(x)
+        return x
+
+
+# param datatype
+@pytest.mark.parametrize("dt", [DataType["INT8"]])
+# Width/height of square input feature map
+@pytest.mark.parametrize("IFMDim", [3, 5])
+# upscaling factor
+@pytest.mark.parametrize("scale", [2, 3])
+# Number of input/output channels
+@pytest.mark.parametrize("NumChannels", [4])
+# execution mode
+@pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"])
+@pytest.mark.vivado
+@pytest.mark.slow
+def test_fpgadataflow_upsampler(dt, IFMDim, scale, NumChannels, exec_mode):
+    atol = 1e-3
+    # Create the test model and inputs for it
+    torch_model = PyTorchTestModel(upscale_factor=scale)
+    input_shape = (1, NumChannels, IFMDim, IFMDim)
+    test_in = torch.arange(0, np.prod(np.asarray(input_shape)))
+    # Limit the input to values valid for the given datatype
+    test_in %= dt.max() - dt.min() + 1
+    test_in += dt.min()
+    # Additionally make sure we always start with 0, for convenience purposes.
+    test_in = torch.roll(test_in, dt.min())
+    test_in = test_in.view(*input_shape).type(torch.float32)
+
+    # Get golden PyTorch and ONNX inputs
+    golden_torch_float = torch_model(test_in)
+    export_path = f"{tmpdir}/Upsample_exported.onnx"
+    FINNManager.export(
+        torch_model, input_shape=input_shape, export_path=export_path, opset_version=11
+    )
+    model = ModelWrapper(export_path)
+    input_dict = {model.graph.input[0].name: test_in.numpy().astype(np.int32)}
+    input_dict = {model.graph.input[0].name: test_in.numpy()}
+    golden_output_dict = oxe.execute_onnx(model, input_dict, True)
+    golden_result = golden_output_dict[model.graph.output[0].name]
+
+    # Make sure PyTorch and ONNX match
+    pyTorch_onnx_match = np.isclose(golden_result, golden_torch_float).all()
+    assert pyTorch_onnx_match, "ONNX and PyTorch upsampling output don't match."
+
+    # Prep model for execution
+    model = ModelWrapper(export_path)
+    # model = model.transform(TransposeUpsampleIO())
+    model = model.transform(MakeInputChannelsLast())
+    model = model.transform(InferDataLayouts())
+    model = model.transform(absorb.AbsorbTransposeIntoResize())
+    model = model.transform(InferShapes())
+    model = model.transform(ForceDataTypeForTensors(dType=dt))
+    model = model.transform(GiveUniqueNodeNames())
+    model = model.transform(InferUpsample())
+    model = model.transform(InferShapes())
+    model = model.transform(InferDataTypes())
+
+    # Check that all nodes are UpsampleNearestNeighbour_Batch nodes
+    for n in model.get_finn_nodes():
+        node_check = n.op_type == "UpsampleNearestNeighbour_Batch"
+        assert node_check, "All nodes should be UpsampleNearestNeighbour_Batch nodes."
+
+    # Prep sim
+    if exec_mode == "cppsim":
+        model = model.transform(PrepareCppSim())
+        model = model.transform(CompileCppSim())
+        model = model.transform(SetExecMode("cppsim"))
+    elif exec_mode == "rtlsim":
+        model = model.transform(GiveUniqueNodeNames())
+        model = model.transform(PrepareIP("xc7z020clg400-1", 10))
+        model = model.transform(HLSSynthIP())
+        model = model.transform(SetExecMode("rtlsim"))
+        model = model.transform(PrepareRTLSim())
+    else:
+        raise Exception("Unknown exec_mode")
+
+    # Run sim
+    test_in_transposed = test_in.numpy().transpose(_to_chan_last_args)
+    input_dict = {model.graph.input[0].name: test_in_transposed}
+    output_dict = oxe.execute_onnx(model, input_dict, True)
+    test_result = output_dict[model.graph.output[0].name]
+    output_matches = np.isclose(golden_result, test_result, atol=atol).all()
+
+    if exec_mode == "cppsim":
+        assert output_matches, "Cppsim output doesn't match ONNX/PyTorch."
+    elif exec_mode == "rtlsim":
+        assert output_matches, "Rtlsim output doesn't match ONNX/PyTorch."
diff --git a/tests/fpgadataflow/test_fpgadataflow_vvau.py b/tests/fpgadataflow/test_fpgadataflow_vvau.py
index 4756d4fe18..6f39994bf2 100644
--- a/tests/fpgadataflow/test_fpgadataflow_vvau.py
+++ b/tests/fpgadataflow/test_fpgadataflow_vvau.py
@@ -32,20 +32,19 @@
 from onnx import TensorProto, helper
 
 import finn.core.onnx_exec as oxe
+from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
-from finn.util.basic import gen_finn_dt_tensor
-from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
-from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.custom_op.general.multithreshold import multithreshold
+from finn.custom_op.registry import getCustomOp
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
-from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
+from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
 from finn.transformation.general import GiveUniqueNodeNames
-from finn.custom_op.general.multithreshold import multithreshold
-
-from finn.custom_op.registry import getCustomOp
-from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
+from finn.util.basic import gen_finn_dt_tensor
 
 
 def _infer_sparse_weight_tensor(W_conv, k_h, k_w, channels):
@@ -142,11 +141,11 @@ def prepare_inputs(input_tensor):
 
 
 # mem_mode: const or decoupled
-@pytest.mark.parametrize("idt", [DataType.UINT4, DataType.UINT8])
+@pytest.mark.parametrize("idt", [DataType["UINT4"], DataType["UINT8"]])
 # weight datatype
-@pytest.mark.parametrize("wdt", [DataType.INT4])
+@pytest.mark.parametrize("wdt", [DataType["INT4"]])
 # activation: None or DataType
-@pytest.mark.parametrize("act", [DataType.UINT4, None])
+@pytest.mark.parametrize("act", [DataType["UINT4"], None])
 # PE
 @pytest.mark.parametrize("pe", [1, "channels"])
 # Input image shape
@@ -188,14 +187,14 @@ def test_fpgadataflow_vvau(
     if act is None:
         T = None
         tdt = None
-        odt = DataType.INT32
+        odt = DataType["INT32"]
     else:
         odt = act
         (min_v, max_v) = _calculate_dot_prod_range(idt, wdt, k_h * k_w * channels)
         n_steps = act.get_num_possible_values() - 1
         T = np.random.randint(min_v, max_v - 1, (channels, n_steps)).astype(np.float32)
         T = np.sort(T, axis=1)
-        tdt = DataType.INT32
+        tdt = DataType["INT32"]
 
     model = _make_single_vvau_modelwrapper(
         W, pe, k_h, k_w, channels, dim_h, dim_w, wdt, idt, odt, T, tdt
diff --git a/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py b/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py
index ff88536f47..236eb2a034 100644
--- a/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py
+++ b/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py
@@ -28,31 +28,34 @@
 
 import pytest
 
+import numpy as np
 from onnx import TensorProto, helper
 
 import finn.core.onnx_exec as oxe
+from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
-from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.custom_op.registry import getCustomOp
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
-from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
+from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
 from finn.transformation.general import GiveUniqueNodeNames
 from finn.util.basic import gen_finn_dt_tensor
-from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
-from finn.custom_op.registry import getCustomOp
-import numpy as np
 
 
 def make_single_maxpoolnhwc_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt):
+    k_h, k_w = k
+    ifm_dim_h, ifm_dim_w = ifm_dim
+    ofm_dim_h, ofm_dim_w = ofm_dim
     odt = idt
     inp = helper.make_tensor_value_info(
-        "inp", TensorProto.FLOAT, [1, ifm_dim, ifm_dim, ifm_ch]
+        "inp", TensorProto.FLOAT, [1, ifm_dim_h, ifm_dim_w, ifm_ch]
     )
     outp = helper.make_tensor_value_info(
-        "outp", TensorProto.FLOAT, [1, ofm_dim, ofm_dim, ifm_ch]
+        "outp", TensorProto.FLOAT, [1, ofm_dim_h, ofm_dim_w, ifm_ch]
     )
 
     mp_node = helper.make_node(
@@ -60,8 +63,8 @@ def make_single_maxpoolnhwc_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt):
         ["inp"],
         ["outp"],
         domain="finn.custom_op.general",
-        kernel_shape=[k, k],
-        strides=[k, k],
+        kernel_shape=[k_h, k_w],
+        strides=[k_h, k_w],
         pads=[0, 0, 0, 0],
     )
     graph = helper.make_graph(
@@ -78,12 +81,15 @@ def make_single_maxpoolnhwc_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt):
 
 
 def make_single_streamingmaxpool_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt):
+    k_h, k_w = k
+    ifm_dim_h, ifm_dim_w = ifm_dim
+    ofm_dim_h, ofm_dim_w = ofm_dim
     odt = idt
     inp = helper.make_tensor_value_info(
-        "inp", TensorProto.FLOAT, [1, ifm_dim, ifm_dim, ifm_ch]
+        "inp", TensorProto.FLOAT, [1, ifm_dim_h, ifm_dim_w, ifm_ch]
     )
     outp = helper.make_tensor_value_info(
-        "outp", TensorProto.FLOAT, [1, ofm_dim, ofm_dim, ifm_ch]
+        "outp", TensorProto.FLOAT, [1, ofm_dim_h, ofm_dim_w, ifm_ch]
     )
 
     smp_node = helper.make_node(
@@ -92,9 +98,9 @@ def make_single_streamingmaxpool_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt):
         ["outp"],
         domain="finn.custom_op.fpgadataflow",
         backend="fpgadataflow",
-        PoolDim=k,
+        PoolDim=[k_h, k_w],
         NumChannels=ifm_ch,
-        ImgDim=ifm_dim,
+        ImgDim=[ifm_dim_h, ifm_dim_w],
         dataType=idt.name,
     )
     graph = helper.make_graph(
@@ -115,24 +121,42 @@ def prepare_inputs(input_tensor):
 
 
 # input datatype
-@pytest.mark.parametrize("idt", [DataType.BIPOLAR, DataType.INT2])
+@pytest.mark.parametrize("idt", [DataType["BIPOLAR"], DataType["INT4"]])
+# 1d maxpool
+@pytest.mark.parametrize("dim_1d", [False, True])
 # kernel size
 @pytest.mark.parametrize("k", [2, 4])
 # input dimension
-@pytest.mark.parametrize("ifm_dim", [4, 6, 8])
+@pytest.mark.parametrize("ifm_dim", [4, 8])
 # input channels
-@pytest.mark.parametrize("ifm_ch", [1, 2])  # , 2, 3, 4])
+@pytest.mark.parametrize("ifm_ch", [1, 3])  # 1,3
 # execution mode
 @pytest.mark.parametrize("exec_mode", ["rtlsim", "cppsim"])
 @pytest.mark.slow
 @pytest.mark.vivado
-def test_fpgadataflow_streamingmaxpool(idt, k, ifm_dim, ifm_ch, exec_mode):
-    stride = k
-    ofm_dim = int(((ifm_dim - k) / stride) + 1)
-    if ifm_dim % k != 0:
+def test_fpgadataflow_streamingmaxpool(idt, dim_1d, k, ifm_dim, ifm_ch, exec_mode):
+    ifm_dim_h = ifm_dim
+    k_h = k
+    if dim_1d:
+        ifm_dim_w = 1
+        k_w = 1
+    else:
+        ifm_dim_w = ifm_dim_h
+        k_w = k_h
+    ifm_dim = (ifm_dim_h, ifm_dim_w)
+    k = (k_h, k_w)
+
+    stride_h = k_h
+    stride_w = k_w
+    ofm_dim_h = int(((ifm_dim_h - k_h) / stride_h) + 1)
+    ofm_dim_w = int(((ifm_dim_w - k_w) / stride_w) + 1)
+    ofm_dim = (ofm_dim_h, ofm_dim_w)
+    if idt == DataType["BIPOLAR"] and dim_1d:
+        pytest.skip("Skipping binary StreamingMaxPool_1d (not implemented)")
+    if ifm_dim_h % k_h != 0 or ifm_dim_w % k_w != 0:
         pytest.skip("Skipping StreamingMaxPool test w/ ImgDim % PoolDim != 0")
 
-    x = gen_finn_dt_tensor(idt, (1, ifm_dim, ifm_dim, ifm_ch))
+    x = gen_finn_dt_tensor(idt, (1, ifm_dim_h, ifm_dim_w, ifm_ch))
     # prepare input data
     input_dict = prepare_inputs(x)
 
@@ -152,7 +176,7 @@ def test_fpgadataflow_streamingmaxpool(idt, k, ifm_dim, ifm_ch, exec_mode):
         model = model.transform(HLSSynthIP())
         model = model.transform(PrepareRTLSim())
     else:
-        raise Exception("Unknown exec_mode in test_fpgadataflow_slidingwindow")
+        raise Exception("Unknown exec_mode in test_layer_streaming_maxpool_batch")
 
     # execute model
     y_produced = oxe.execute_onnx(model, input_dict)["outp"]
diff --git a/tests/fpgadataflow/test_runtime_weights.py b/tests/fpgadataflow/test_runtime_weights.py
index c487824964..0196a78d5c 100644
--- a/tests/fpgadataflow/test_runtime_weights.py
+++ b/tests/fpgadataflow/test_runtime_weights.py
@@ -26,20 +26,22 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-from finn.util.create import hls_random_mlp_maker
+import pytest
+
+import numpy as np
+import os
+
 from finn.core.datatype import DataType
-from finn.transformation.general import GiveUniqueNodeNames
-from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
-from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
-from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP
-from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
-from finn.custom_op.registry import getCustomOp
 from finn.core.rtlsim_exec import rtlsim_exec
+from finn.custom_op.registry import getCustomOp
+from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP
+from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
+from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
+from finn.transformation.general import GiveUniqueNodeNames
 from finn.util.basic import gen_finn_dt_tensor
-from finn.util.pyverilator import axilite_write, axilite_read
-import numpy as np
-import pytest
-import os
+from finn.util.create import hls_random_mlp_maker
+from finn.util.pyverilator import axilite_read, axilite_write
 
 test_fpga_part = "xc7z020clg400-1"
 target_clk_ns = 5
@@ -47,8 +49,8 @@
 
 @pytest.mark.vivado
 def test_runtime_weights_single_layer():
-    idt = DataType.UINT32
-    wdt = DataType.UINT4
+    idt = DataType["UINT32"]
+    wdt = DataType["UINT4"]
     act = None
     mw = 64
     mh = 32
@@ -76,11 +78,11 @@ def test_runtime_weights_single_layer():
     os.remove("old_weights.dat")
     old_weight_stream = map(lambda x: int(x, 16), old_weight_stream.split("\n"))
     old_weight_stream = list(old_weight_stream)
+    model = model.transform(InsertFIFO(True))
     model = model.transform(GiveUniqueNodeNames())
     model = model.transform(PrepareIP(test_fpga_part, target_clk_ns))
     model = model.transform(HLSSynthIP())
     model = model.transform(CreateStitchedIP(test_fpga_part, target_clk_ns))
-    model = model.transform(PrepareRTLSim())
     model.set_metadata_prop("exec_mode", "rtlsim")
     in_tensor = np.asarray(range(mw), dtype=np.float32)
     # add two copies of the input tensor as the first one is just used to
diff --git a/tests/fpgadataflow/test_set_folding.py b/tests/fpgadataflow/test_set_folding.py
index fe3a1db8a4..66fd5b43a1 100644
--- a/tests/fpgadataflow/test_set_folding.py
+++ b/tests/fpgadataflow/test_set_folding.py
@@ -27,18 +27,19 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import pytest
+
 import numpy as np
 from onnx import TensorProto, helper
 
-from finn.custom_op.registry import getCustomOp
-from finn.core.datatype import DataType
 from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
+from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.fpgadataflow.set_folding import SetFolding
-from finn.transformation.general import GiveUniqueNodeNames
+from finn.custom_op.registry import getCustomOp
 from finn.transformation.fpgadataflow.create_dataflow_partition import (
     CreateDataflowPartition,
 )
+from finn.transformation.fpgadataflow.set_folding import SetFolding
+from finn.transformation.general import GiveUniqueNodeNames
 from finn.util.test import load_test_checkpoint_or_skip
 
 
@@ -97,7 +98,8 @@ def make_multi_fclayer_model(ch, wdt, adt, tdt, nnodes):
     model.set_tensor_datatype("outp", adt)
 
     for i in range(1, nnodes + 1):
-        model.graph.value_info.append(tensors[i])
+        if tensors[i].name != "outp":
+            model.graph.value_info.append(tensors[i])
         model.set_initializer("weights_" + str(i - 1), W)
         model.set_initializer("thresh_" + str(i - 1), T)
         model.set_tensor_datatype("weights_" + str(i - 1), wdt)
@@ -113,7 +115,7 @@ def make_multi_fclayer_model(ch, wdt, adt, tdt, nnodes):
 def test_set_folding(target_fps, platform):
 
     model = make_multi_fclayer_model(
-        128, DataType.INT4, DataType.INT2, DataType.INT16, 5
+        128, DataType["INT4"], DataType["INT2"], DataType["INT16"], 5
     )
 
     model = model.transform(GiveUniqueNodeNames())
diff --git a/tests/transformation/streamline/test_absorb_mul_into_topk.py b/tests/transformation/streamline/test_absorb_mul_into_topk.py
index d0a089f9e5..bc9a31d49c 100644
--- a/tests/transformation/streamline/test_absorb_mul_into_topk.py
+++ b/tests/transformation/streamline/test_absorb_mul_into_topk.py
@@ -30,13 +30,14 @@
 import numpy as np
 from onnx import TensorProto, helper
 
+import finn.core.onnx_exec as oxe
 from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
 from finn.transformation.infer_datatypes import InferDataTypes
-from finn.transformation.general import GiveUniqueNodeNames, GiveReadableTensorNames
+from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.insert_topk import InsertTopK
 from finn.transformation.streamline.absorb import AbsorbScalarMulAddIntoTopK
-import finn.core.onnx_exec as oxe
+
 
 # parameter to indicate if mul parameter is negative or positive
 @pytest.mark.parametrize("mul_positive", [True, False])
diff --git a/tests/transformation/streamline/test_absorb_transp_into_flatten.py b/tests/transformation/streamline/test_absorb_transp_into_flatten.py
index cbbb33b460..1e5d5fe580 100644
--- a/tests/transformation/streamline/test_absorb_transp_into_flatten.py
+++ b/tests/transformation/streamline/test_absorb_transp_into_flatten.py
@@ -3,14 +3,15 @@
 import numpy as np
 from onnx import TensorProto, helper
 
-from finn.core.modelwrapper import ModelWrapper
 import finn.core.data_layout as DataLayout
-from finn.transformation.infer_shapes import InferShapes
-from finn.transformation.infer_datatypes import InferDataTypes
+import finn.core.onnx_exec as oxe
+from finn.core.modelwrapper import ModelWrapper
+from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
 from finn.transformation.infer_data_layouts import InferDataLayouts
-from finn.transformation.general import GiveUniqueNodeNames, GiveReadableTensorNames
+from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.streamline.absorb import AbsorbTransposeIntoFlatten
-import finn.core.onnx_exec as oxe
+
 
 # permutation of transpose node
 @pytest.mark.parametrize("perm", [[0, 2, 3, 1], [0, 1, 3, 2], [3, 2, 0, 1]])
diff --git a/tests/transformation/streamline/test_collapse_repeated_op.py b/tests/transformation/streamline/test_collapse_repeated_op.py
index b74d868f9b..1741ab6b8f 100644
--- a/tests/transformation/streamline/test_collapse_repeated_op.py
+++ b/tests/transformation/streamline/test_collapse_repeated_op.py
@@ -26,6 +26,8 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+import pytest
+
 import numpy as np
 import onnx.helper as oh
 from onnx import TensorProto
@@ -34,7 +36,6 @@
 from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.streamline import CollapseRepeatedAdd, CollapseRepeatedMul
-import pytest
 
 
 def test_collapse_repeated_op():
@@ -74,7 +75,8 @@ def test_collapse_repeated_op():
 
 
 @pytest.mark.parametrize(
-    "test_args", [("Add", CollapseRepeatedAdd()), ("Mul", CollapseRepeatedMul())],
+    "test_args",
+    [("Add", CollapseRepeatedAdd()), ("Mul", CollapseRepeatedMul())],
 )
 def test_collapse_repeated_only_if_linear(test_args):
     scalar_op = test_args[0]
diff --git a/tests/transformation/streamline/test_linear_past_eltwise.py b/tests/transformation/streamline/test_linear_past_eltwise.py
index f5af2307fb..098b3f9d4f 100644
--- a/tests/transformation/streamline/test_linear_past_eltwise.py
+++ b/tests/transformation/streamline/test_linear_past_eltwise.py
@@ -26,19 +26,18 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import os
-import numpy as np
+import pytest
 
+import numpy as np
+import os
 from onnx import TensorProto, helper
 
 import finn.core.onnx_exec as oxe
 from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.fold_constants import FoldConstants
 from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
-from finn.transformation.streamline.reorder import MoveLinearPastEltwiseAdd
 from finn.transformation.infer_shapes import InferShapes
-
-import pytest
+from finn.transformation.streamline.reorder import MoveLinearPastEltwiseAdd
 
 export_onnx_path = "test_linear_past_eltwise.onnx"
 
diff --git a/tests/transformation/streamline/test_move_chw_add_past_conv.py b/tests/transformation/streamline/test_move_chw_add_past_conv.py
index fc64a04e40..e4be8fc383 100644
--- a/tests/transformation/streamline/test_move_chw_add_past_conv.py
+++ b/tests/transformation/streamline/test_move_chw_add_past_conv.py
@@ -29,13 +29,13 @@
 import pytest
 
 import numpy as np
-from onnx import helper, TensorProto
+from onnx import TensorProto, helper
 
+import finn.core.onnx_exec as oxe
 from finn.core.modelwrapper import ModelWrapper
+from finn.custom_op.general.im2col import compute_conv_output_dim
 from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.streamline.reorder import MoveAddPastConv
-from finn.custom_op.general.im2col import compute_conv_output_dim
-import finn.core.onnx_exec as oxe
 
 
 # input dimension
diff --git a/tests/transformation/streamline/test_move_flatten_past_affine.py b/tests/transformation/streamline/test_move_flatten_past_affine.py
index b2d5e51613..ef01436dc9 100644
--- a/tests/transformation/streamline/test_move_flatten_past_affine.py
+++ b/tests/transformation/streamline/test_move_flatten_past_affine.py
@@ -30,16 +30,17 @@
 import numpy as np
 from onnx import TensorProto, helper
 
-from finn.core.modelwrapper import ModelWrapper
-from finn.core.datatype import DataType
 import finn.core.data_layout as DataLayout
-from finn.util.basic import gen_finn_dt_tensor
-from finn.transformation.infer_shapes import InferShapes
-from finn.transformation.infer_datatypes import InferDataTypes
+import finn.core.onnx_exec as oxe
+from finn.core.datatype import DataType
+from finn.core.modelwrapper import ModelWrapper
+from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
 from finn.transformation.infer_data_layouts import InferDataLayouts
-from finn.transformation.general import GiveUniqueNodeNames, GiveReadableTensorNames
+from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.streamline.reorder import MoveFlattenPastAffine
-import finn.core.onnx_exec as oxe
+from finn.util.basic import gen_finn_dt_tensor
+
 
 # data layout
 @pytest.mark.parametrize("data_layout", [DataLayout.NHWC, DataLayout.NCHW])
@@ -76,14 +77,14 @@ def test_move_flatten_past_affine(data_layout, batch_size):
     model = ModelWrapper(model)
 
     # initialize values
-    a0_values = gen_finn_dt_tensor(DataType.TERNARY, [1024, 1000])
+    a0_values = gen_finn_dt_tensor(DataType["TERNARY"], [1024, 1000])
     model.set_initializer("a0", a0_values)
     a1_values = np.random.uniform(low=0.1, high=0.99, size=(1)).astype(np.float32)
     model.set_initializer("a1", a1_values)
     a2_values = np.random.uniform(low=-1, high=1, size=(1000)).astype(np.float32)
     model.set_initializer("a2", a2_values)
 
-    model.set_tensor_datatype("inp", DataType.INT2)
+    model.set_tensor_datatype("inp", DataType["INT2"])
     model.set_tensor_layout("inp", data_layout)
     model = model.transform(InferShapes())
     model = model.transform(InferDataTypes())
@@ -92,7 +93,7 @@ def test_move_flatten_past_affine(data_layout, batch_size):
     model = model.transform(GiveReadableTensorNames())
 
     # compare execution before and after transformation
-    inp_values = gen_finn_dt_tensor(DataType.INT2, ishape)
+    inp_values = gen_finn_dt_tensor(DataType["INT2"], ishape)
     idict = {model.graph.input[0].name: inp_values}
     model_transformed = model.transform(MoveFlattenPastAffine())
     assert oxe.compare_execution(model, model_transformed, idict)
diff --git a/tests/transformation/streamline/test_move_flatten_past_topk.py b/tests/transformation/streamline/test_move_flatten_past_topk.py
index 65da92c22d..6086f7804e 100644
--- a/tests/transformation/streamline/test_move_flatten_past_topk.py
+++ b/tests/transformation/streamline/test_move_flatten_past_topk.py
@@ -29,17 +29,18 @@
 
 from onnx import TensorProto, helper
 
-from finn.core.modelwrapper import ModelWrapper
-from finn.core.datatype import DataType
 import finn.core.data_layout as DataLayout
-from finn.util.basic import gen_finn_dt_tensor
-from finn.transformation.insert_topk import InsertTopK
-from finn.transformation.infer_shapes import InferShapes
-from finn.transformation.infer_datatypes import InferDataTypes
+import finn.core.onnx_exec as oxe
+from finn.core.datatype import DataType
+from finn.core.modelwrapper import ModelWrapper
+from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
 from finn.transformation.infer_data_layouts import InferDataLayouts
-from finn.transformation.general import GiveUniqueNodeNames, GiveReadableTensorNames
+from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.insert_topk import InsertTopK
 from finn.transformation.streamline.reorder import MoveFlattenPastTopK
-import finn.core.onnx_exec as oxe
+from finn.util.basic import gen_finn_dt_tensor
+
 
 # data layout
 @pytest.mark.parametrize("data_layout", [DataLayout.NHWC, DataLayout.NCHW])
@@ -59,13 +60,16 @@ def test_move_flatten_past_affine(data_layout, batch_size):
     flatten_node = helper.make_node("Flatten", ["inp"], ["outp"])
 
     graph = helper.make_graph(
-        nodes=[flatten_node], name="move-flatten-graph", inputs=[inp], outputs=[outp],
+        nodes=[flatten_node],
+        name="move-flatten-graph",
+        inputs=[inp],
+        outputs=[outp],
     )
 
     model = helper.make_model(graph, producer_name="move_flatten_model")
     model = ModelWrapper(model)
 
-    model.set_tensor_datatype("inp", DataType.INT2)
+    model.set_tensor_datatype("inp", DataType["INT2"])
     model.set_tensor_layout("inp", data_layout)
     model = model.transform(InsertTopK())
     model = model.transform(InferShapes())
@@ -75,7 +79,7 @@ def test_move_flatten_past_affine(data_layout, batch_size):
     model = model.transform(GiveReadableTensorNames())
 
     # compare execution before and after transformation
-    inp_values = gen_finn_dt_tensor(DataType.INT2, ishape)
+    inp_values = gen_finn_dt_tensor(DataType["INT2"], ishape)
     idict = {model.graph.input[0].name: inp_values}
     model_transformed = model.transform(MoveFlattenPastTopK())
     assert oxe.compare_execution(model, model_transformed, idict)
diff --git a/tests/transformation/streamline/test_move_identical_op_past_join_op.py b/tests/transformation/streamline/test_move_identical_op_past_join_op.py
index 94eb52835b..60e76b8b07 100644
--- a/tests/transformation/streamline/test_move_identical_op_past_join_op.py
+++ b/tests/transformation/streamline/test_move_identical_op_past_join_op.py
@@ -1,12 +1,12 @@
 import pytest
 
-from onnx import helper as oh
 from onnx import TensorProto
+from onnx import helper as oh
 
+import finn.core.onnx_exec as oxe
 from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.streamline.reorder import MoveTransposePastJoinAdd
 from finn.util.basic import gen_finn_dt_tensor
-import finn.core.onnx_exec as oxe
 
 
 def create_model(perm):
diff --git a/tests/transformation/streamline/test_move_maxpool_past_multithreshold.py b/tests/transformation/streamline/test_move_maxpool_past_multithreshold.py
index 7c49baf8cd..fca05afa5b 100644
--- a/tests/transformation/streamline/test_move_maxpool_past_multithreshold.py
+++ b/tests/transformation/streamline/test_move_maxpool_past_multithreshold.py
@@ -1,11 +1,11 @@
-from onnx import TensorProto, helper
 import numpy as np
+from onnx import TensorProto, helper
 
 import finn.core.onnx_exec as oxe
 from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.streamline.reorder import MoveMaxPoolPastMultiThreshold
-from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.streamline.reorder import MoveMaxPoolPastMultiThreshold
 
 
 def get_multithreshold_rand_params(channels, num_of_thres, seed=None):
diff --git a/tests/transformation/streamline/test_move_mul_past_dw_conv.py b/tests/transformation/streamline/test_move_mul_past_dw_conv.py
index ce0cbcd040..e9e956d845 100644
--- a/tests/transformation/streamline/test_move_mul_past_dw_conv.py
+++ b/tests/transformation/streamline/test_move_mul_past_dw_conv.py
@@ -1,14 +1,15 @@
 import pytest
 
-from onnx import helper, TensorProto
-from finn.custom_op.general.im2col import compute_conv_output_dim
+from onnx import TensorProto, helper
+
 import finn.core.onnx_exec as oxe
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
+from finn.custom_op.general.im2col import compute_conv_output_dim
 from finn.transformation.infer_datatypes import InferDataTypes
 from finn.transformation.infer_shapes import InferShapes
-from finn.util.basic import gen_finn_dt_tensor
 from finn.transformation.streamline.reorder import MoveMulPastDWConv
+from finn.util.basic import gen_finn_dt_tensor
 
 
 # input dimension
@@ -67,9 +68,9 @@ def test_move_mul_past_dw_conv(ifm_dim, ifm_ch, k, stride, pad_amt, dw):
 
     model = helper.make_model(graph, producer_name="mulpastconv-model")
     model = ModelWrapper(model)
-    inp_values = gen_finn_dt_tensor(DataType.INT2, [1, ifm_ch, ifm_dim, ifm_dim])
-    mul_values = gen_finn_dt_tensor(DataType.INT2, [1, ifm_ch, 1, 1])
-    W_values = gen_finn_dt_tensor(DataType.INT2, W_shape)
+    inp_values = gen_finn_dt_tensor(DataType["INT2"], [1, ifm_ch, ifm_dim, ifm_dim])
+    mul_values = gen_finn_dt_tensor(DataType["INT2"], [1, ifm_ch, 1, 1])
+    W_values = gen_finn_dt_tensor(DataType["INT2"], W_shape)
     model.set_initializer("W", W_values)
     model.set_initializer("mul", mul_values)
     model = model.transform(InferShapes())
diff --git a/tests/transformation/streamline/test_move_mul_past_maxpool.py b/tests/transformation/streamline/test_move_mul_past_maxpool.py
index f612841020..2c51aaf36a 100755
--- a/tests/transformation/streamline/test_move_mul_past_maxpool.py
+++ b/tests/transformation/streamline/test_move_mul_past_maxpool.py
@@ -1,15 +1,16 @@
-import numpy as np
 import pytest
 
-from onnx import helper, TensorProto
-from finn.custom_op.general.maxpoolnhwc import compute_pool_output_dim
+import numpy as np
+from onnx import TensorProto, helper
+
 import finn.core.onnx_exec as oxe
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
+from finn.custom_op.general.maxpoolnhwc import compute_pool_output_dim
 from finn.transformation.infer_datatypes import InferDataTypes
 from finn.transformation.infer_shapes import InferShapes
-from finn.util.basic import gen_finn_dt_tensor
 from finn.transformation.streamline.reorder import MoveMulPastMaxPool
+from finn.util.basic import gen_finn_dt_tensor
 
 
 # input dimension
@@ -65,7 +66,7 @@ def test_move_mul_past_maxpool(ifm_dim, ifm_ch, k, stride, pad, cw, negative):
 
     model = helper.make_model(graph, producer_name="mulpastmaxpool-model")
     model = ModelWrapper(model)
-    inp_values = gen_finn_dt_tensor(DataType.INT2, [1, ifm_ch, ifm_dim, ifm_dim])
+    inp_values = gen_finn_dt_tensor(DataType["INT2"], [1, ifm_ch, ifm_dim, ifm_dim])
     mul_values = np.random.random_sample(mul_shape).astype(np.float32)
     if negative == 1:
         mul_values = mul_values * (-1)
diff --git a/tests/transformation/streamline/test_move_past_fork.py b/tests/transformation/streamline/test_move_past_fork.py
index f3d37bd60c..364590f933 100644
--- a/tests/transformation/streamline/test_move_past_fork.py
+++ b/tests/transformation/streamline/test_move_past_fork.py
@@ -1,12 +1,12 @@
-from onnx import TensorProto, helper
+import pytest
+
 import numpy as np
+from onnx import TensorProto, helper
 
 import finn.core.onnx_exec as oxe
 from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.streamline.reorder import MoveLinearPastFork
 from finn.transformation.infer_shapes import InferShapes
-
-import pytest
+from finn.transformation.streamline.reorder import MoveLinearPastFork
 
 
 @pytest.mark.parametrize("ch", [64, 1])
diff --git a/tests/transformation/streamline/test_move_scalar_past_conv.py b/tests/transformation/streamline/test_move_scalar_past_conv.py
index 94fee7907d..5e2ded0174 100644
--- a/tests/transformation/streamline/test_move_scalar_past_conv.py
+++ b/tests/transformation/streamline/test_move_scalar_past_conv.py
@@ -1,20 +1,19 @@
+import pytest
+
 import numpy as np
 import onnx.helper as oh
-import pytest
 from onnx import TensorProto
 
 import finn.core.onnx_exec as ox
 from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.infer_shapes import InferShapes
-from finn.transformation.streamline import (
-    MoveAddPastConv,
-    MoveScalarMulPastConv,
-)
+from finn.transformation.streamline import MoveAddPastConv, MoveScalarMulPastConv
 
 
 @pytest.mark.parametrize("padding", [False, True])
 @pytest.mark.parametrize(
-    "test_args", [("Add", MoveAddPastConv()), ("Mul", MoveScalarMulPastConv())],
+    "test_args",
+    [("Add", MoveAddPastConv()), ("Mul", MoveScalarMulPastConv())],
 )
 def test_move_scalar_past_conv(test_args, padding):
     scalar_op = test_args[0]
@@ -92,7 +91,8 @@ def test_move_scalar_past_conv(test_args, padding):
 
 
 @pytest.mark.parametrize(
-    "test_args", [("Add", MoveAddPastConv()), ("Mul", MoveScalarMulPastConv())],
+    "test_args",
+    [("Add", MoveAddPastConv()), ("Mul", MoveScalarMulPastConv())],
 )
 def test_move_scalar_past_conv_only_if_linear(test_args):
     scalar_op = test_args[0]
diff --git a/tests/transformation/streamline/test_move_scalar_past_matmul.py b/tests/transformation/streamline/test_move_scalar_past_matmul.py
index e432dbf4ec..b15f84303b 100644
--- a/tests/transformation/streamline/test_move_scalar_past_matmul.py
+++ b/tests/transformation/streamline/test_move_scalar_past_matmul.py
@@ -26,8 +26,9 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import numpy as np
 import pytest
+
+import numpy as np
 import onnx.helper as oh
 from onnx import TensorProto
 
diff --git a/tests/transformation/streamline/test_move_transpose_past_scalar_mul.py b/tests/transformation/streamline/test_move_transpose_past_scalar_mul.py
index e434fc7d4f..9110ede98d 100644
--- a/tests/transformation/streamline/test_move_transpose_past_scalar_mul.py
+++ b/tests/transformation/streamline/test_move_transpose_past_scalar_mul.py
@@ -3,14 +3,15 @@
 import numpy as np
 from onnx import TensorProto, helper
 
-from finn.core.modelwrapper import ModelWrapper
 import finn.core.data_layout as DataLayout
-from finn.transformation.infer_shapes import InferShapes
-from finn.transformation.infer_datatypes import InferDataTypes
+import finn.core.onnx_exec as oxe
+from finn.core.modelwrapper import ModelWrapper
+from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
 from finn.transformation.infer_data_layouts import InferDataLayouts
-from finn.transformation.general import GiveUniqueNodeNames, GiveReadableTensorNames
+from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.streamline.reorder import MoveTransposePastScalarMul
-import finn.core.onnx_exec as oxe
+
 
 # permutation of transpose node
 @pytest.mark.parametrize("perm", [[0, 2, 3, 1], [0, 1, 3, 2], [3, 2, 0, 1]])
diff --git a/tests/transformation/streamline/test_remove_identity_ops.py b/tests/transformation/streamline/test_remove_identity_ops.py
deleted file mode 100644
index d02e1d3975..0000000000
--- a/tests/transformation/streamline/test_remove_identity_ops.py
+++ /dev/null
@@ -1,94 +0,0 @@
-import pytest
-
-import numpy as np
-from onnx import helper, TensorProto
-import finn.core.onnx_exec as oxe
-from finn.core.datatype import DataType
-from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.infer_datatypes import InferDataTypes
-from finn.transformation.infer_shapes import InferShapes
-from finn.transformation.streamline.remove import RemoveIdentityOps
-from finn.util.basic import gen_finn_dt_tensor
-
-
-def insert_identity_op(model, op, as_first_node, approx):
-    if approx:
-        zero_val = 0.000001
-        one_val = 0.999999
-    else:
-        zero_val = 0.0
-        one_val = 1.0
-    if op in ["Add", "Sub"]:
-        val = np.asarray([zero_val], dtype=np.float32)
-    elif op in ["Mul", "Div"]:
-        val = np.asarray([one_val], dtype=np.float32)
-    else:
-        return
-
-    graph = model.graph
-    if as_first_node:
-        identity_node = helper.make_node(op, ["inp", "value"], ["ident_out"])
-        graph.node.insert(0, identity_node)
-        graph.node[1].input[0] = "ident_out"
-    else:
-        identity_node = helper.make_node(op, ["div_out", "value"], ["ident_out"])
-        graph.node.insert(3, identity_node)
-        graph.node[-1].input[0] = "ident_out"
-    model.set_initializer("value", val)
-
-    return model
-
-
-# identity operations to be inserted
-@pytest.mark.parametrize("op", ["Add", "Sub", "Mul", "Div"])
-@pytest.mark.parametrize("approx", [False, True])
-@pytest.mark.parametrize("as_first_node", [False, True])
-def test_remove_identity_ops(op, as_first_node, approx):
-
-    # set up onnx model
-    inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, 4, 1, 1])
-    mul = helper.make_tensor_value_info("mul", TensorProto.FLOAT, [])
-    shape = helper.make_tensor_value_info("shape", TensorProto.FLOAT, [2])
-    div = helper.make_tensor_value_info("div", TensorProto.FLOAT, [])
-    matmul = helper.make_tensor_value_info("matmul", TensorProto.FLOAT, [4, 2])
-    outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, [1, 2])
-
-    mul_node = helper.make_node("Mul", ["inp", "mul"], ["mul_out"])
-    reshape_node = helper.make_node("Reshape", ["mul_out", "shape"], ["reshape_out"])
-    div_node = helper.make_node("Div", ["reshape_out", "div"], ["div_out"])
-    matmul_node = helper.make_node("MatMul", ["div_out", "matmul"], ["outp"])
-
-    graph = helper.make_graph(
-        nodes=[mul_node, reshape_node, div_node, matmul_node],
-        name="identity-graph",
-        inputs=[inp],
-        outputs=[outp],
-        value_info=[mul, shape, div, matmul],
-    )
-
-    model = helper.make_model(graph, producer_name="mulpastconv-model")
-    model = ModelWrapper(model)
-    inp_values = gen_finn_dt_tensor(DataType.INT2, [1, 4, 1, 1])
-    mul_values = np.random.uniform(low=0.1, high=0.99, size=(1)).astype(np.float32)
-    shape_values = np.asarray([1, -1], dtype=np.int64)
-    div_values = np.random.uniform(low=0.1, high=0.99, size=(1)).astype(np.float32)
-    matmul_values = gen_finn_dt_tensor(DataType.INT2, [4, 2])
-    model.set_initializer("mul", mul_values)
-    model.set_initializer("shape", shape_values)
-    model.set_initializer("div", div_values)
-    model.set_initializer("matmul", matmul_values)
-    insert_identity_op(model, op, as_first_node, approx)
-    model = model.transform(InferShapes())
-    model = model.transform(InferDataTypes())
-    idict = {"inp": inp_values}
-    odict = oxe.execute_onnx(model, idict)
-    out_before = odict["outp"]
-    num_of_nodes_before = len(model.graph.node)
-
-    model = model.transform(RemoveIdentityOps())
-    num_of_nodes_after = len(model.graph.node)
-    assert num_of_nodes_before - 1 == num_of_nodes_after
-
-    odict = oxe.execute_onnx(model, idict)
-    out_after = odict["outp"]
-    assert np.isclose(out_before, out_after, atol=1e-3).all()
diff --git a/tests/transformation/streamline/test_round_thresholds.py b/tests/transformation/streamline/test_round_thresholds.py
index f9259908a2..2e57f1c85f 100644
--- a/tests/transformation/streamline/test_round_thresholds.py
+++ b/tests/transformation/streamline/test_round_thresholds.py
@@ -47,17 +47,17 @@ def test_round_thresholds():
     model = ModelWrapper(model_def)
     threshold_val = np.asarray([[-1.1], [0.7], [2.3], [5.1]], dtype=np.float32)
     model.set_initializer("thresholds", threshold_val)
-    model.set_tensor_datatype("v", DataType.INT8)
+    model.set_tensor_datatype("v", DataType["INT8"])
     inp_dict_f = {"v": np.floor(threshold_val).T}
     inp_dict_n = {"v": np.round(threshold_val).T}
     inp_dict_c = {"v": np.ceil(threshold_val).T}
     orig_f = oxe.execute_onnx(model, inp_dict_f)["out"]
     orig_n = oxe.execute_onnx(model, inp_dict_n)["out"]
     orig_c = oxe.execute_onnx(model, inp_dict_c)["out"]
-    assert model.get_tensor_datatype("thresholds") == DataType.FLOAT32
+    assert model.get_tensor_datatype("thresholds") == DataType["FLOAT32"]
     new_model = model.transform(RoundAndClipThresholds())
     # rounded up thresholds should have same dtype as input
-    assert new_model.get_tensor_datatype("thresholds") == DataType.INT8
+    assert new_model.get_tensor_datatype("thresholds") == DataType["INT8"]
     new_f = oxe.execute_onnx(new_model, inp_dict_f)["out"]
     new_n = oxe.execute_onnx(new_model, inp_dict_n)["out"]
     new_c = oxe.execute_onnx(new_model, inp_dict_c)["out"]
diff --git a/tests/transformation/streamline/test_sign_to_thres.py b/tests/transformation/streamline/test_sign_to_thres.py
index 4618dffc43..2ffb5713c0 100644
--- a/tests/transformation/streamline/test_sign_to_thres.py
+++ b/tests/transformation/streamline/test_sign_to_thres.py
@@ -26,12 +26,11 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import os
-from pkgutil import get_data
-
 import brevitas.onnx as bo
 import onnx
 import onnx.numpy_helper as nph
+import os
+from pkgutil import get_data
 
 import finn.core.onnx_exec as oxe
 from finn.core.modelwrapper import ModelWrapper
diff --git a/tests/transformation/streamline/test_streamline_cnv.py b/tests/transformation/streamline/test_streamline_cnv.py
index ca8cf3b1ce..ed25953303 100644
--- a/tests/transformation/streamline/test_streamline_cnv.py
+++ b/tests/transformation/streamline/test_streamline_cnv.py
@@ -26,27 +26,30 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+import pkg_resources as pk
+
+import pytest
+
 import brevitas.onnx as bo
 import numpy as np
-import pytest
-import pkg_resources as pk
 
 import finn.core.onnx_exec as oxe
 from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.fold_constants import FoldConstants
 from finn.transformation.general import (
-    RemoveUnusedTensors,
-    RemoveStaticGraphInputs,
     GiveReadableTensorNames,
     GiveUniqueNodeNames,
+    RemoveStaticGraphInputs,
+    RemoveUnusedTensors,
 )
 from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.streamline import Streamline
-from finn.util.test import get_test_model_trained
 from finn.util.basic import make_build_dir
+from finn.util.test import get_test_model_trained
 
 export_onnx_path = make_build_dir("test_streamline_cnv_")
 
+
 # act bits
 @pytest.mark.parametrize("abits", [1, 2])
 # weight bits
diff --git a/tests/transformation/streamline/test_streamline_fc.py b/tests/transformation/streamline/test_streamline_fc.py
index d88bf14913..3563b87c45 100644
--- a/tests/transformation/streamline/test_streamline_fc.py
+++ b/tests/transformation/streamline/test_streamline_fc.py
@@ -26,30 +26,31 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-from pkgutil import get_data
+import pytest
 
 import brevitas.onnx as bo
 import numpy as np
 import onnx
 import onnx.numpy_helper as nph
-import pytest
+from pkgutil import get_data
 
 import finn.core.onnx_exec as oxe
 from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.fold_constants import FoldConstants
 from finn.transformation.general import (
-    RemoveUnusedTensors,
-    RemoveStaticGraphInputs,
     GiveReadableTensorNames,
     GiveUniqueNodeNames,
+    RemoveStaticGraphInputs,
+    RemoveUnusedTensors,
 )
 from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.streamline import Streamline
-from finn.util.test import get_test_model_trained
 from finn.util.basic import make_build_dir
+from finn.util.test import get_test_model_trained
 
 export_onnx_path = make_build_dir("test_streamline_fc_")
 
+
 # act bits
 @pytest.mark.parametrize("abits", [1, 2])
 # weight bits
diff --git a/tests/transformation/test_batchnorm_to_affine_bnn_pynq.py b/tests/transformation/test_batchnorm_to_affine_bnn_pynq.py
index 7e894c078b..300ef85faa 100644
--- a/tests/transformation/test_batchnorm_to_affine_bnn_pynq.py
+++ b/tests/transformation/test_batchnorm_to_affine_bnn_pynq.py
@@ -26,14 +26,14 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import os
-from pkgutil import get_data
 import pkg_resources as pk
 
 import brevitas.onnx as bo
+import numpy as np
 import onnx
 import onnx.numpy_helper as nph
-import numpy as np
+import os
+from pkgutil import get_data
 
 import finn.core.onnx_exec as oxe
 from finn.core.modelwrapper import ModelWrapper
diff --git a/tests/transformation/test_infer_data_layouts_cnv.py b/tests/transformation/test_infer_data_layouts_cnv.py
index a8ba81dff6..10bc687d13 100644
--- a/tests/transformation/test_infer_data_layouts_cnv.py
+++ b/tests/transformation/test_infer_data_layouts_cnv.py
@@ -26,22 +26,22 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+import brevitas.onnx as bo
 import os
 
-import brevitas.onnx as bo
+import finn.core.data_layout as DataLayout
+import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
 import finn.transformation.streamline.absorb as absorb
-from finn.transformation.streamline.reorder import MakeMaxPoolNHWC
 from finn.core.modelwrapper import ModelWrapper
+from finn.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount
 from finn.transformation.fold_constants import FoldConstants
 from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
+from finn.transformation.infer_data_layouts import InferDataLayouts
 from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
 from finn.transformation.streamline import Streamline
+from finn.transformation.streamline.reorder import MakeMaxPoolNHWC
 from finn.util.test import get_test_model_trained
-from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
-from finn.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount
-import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
-from finn.transformation.infer_data_layouts import InferDataLayouts
-import finn.core.data_layout as DataLayout
 
 export_onnx_path_cnv = "test_infer_data_layouts.onnx"
 
diff --git a/tests/transformation/test_infer_datatypes_lfc.py b/tests/transformation/test_infer_datatypes_lfc.py
index 0802c50c7d..8883dac7a5 100644
--- a/tests/transformation/test_infer_datatypes_lfc.py
+++ b/tests/transformation/test_infer_datatypes_lfc.py
@@ -26,9 +26,8 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import os
-
 import brevitas.onnx as bo
+import os
 
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
@@ -50,12 +49,12 @@ def test_infer_datatypes_lfc():
     model = model.transform(GiveUniqueNodeNames())
     model = model.transform(GiveReadableTensorNames())
     model = model.transform(InferDataTypes())
-    assert model.get_tensor_datatype("MatMul_0_out0") == DataType.INT32
-    assert model.get_tensor_datatype("MatMul_1_out0") == DataType.INT32
-    assert model.get_tensor_datatype("MatMul_2_out0") == DataType.INT32
-    assert model.get_tensor_datatype("MatMul_3_out0") == DataType.INT32
-    assert model.get_tensor_datatype("MultiThreshold_0_out0") == DataType.BIPOLAR
-    assert model.get_tensor_datatype("MultiThreshold_1_out0") == DataType.BIPOLAR
-    assert model.get_tensor_datatype("MultiThreshold_2_out0") == DataType.BIPOLAR
-    assert model.get_tensor_datatype("MultiThreshold_3_out0") == DataType.BIPOLAR
+    assert model.get_tensor_datatype("MatMul_0_out0") == DataType["INT32"]
+    assert model.get_tensor_datatype("MatMul_1_out0") == DataType["INT32"]
+    assert model.get_tensor_datatype("MatMul_2_out0") == DataType["INT32"]
+    assert model.get_tensor_datatype("MatMul_3_out0") == DataType["INT32"]
+    assert model.get_tensor_datatype("MultiThreshold_0_out0") == DataType["BIPOLAR"]
+    assert model.get_tensor_datatype("MultiThreshold_1_out0") == DataType["BIPOLAR"]
+    assert model.get_tensor_datatype("MultiThreshold_2_out0") == DataType["BIPOLAR"]
+    assert model.get_tensor_datatype("MultiThreshold_3_out0") == DataType["BIPOLAR"]
     os.remove(export_onnx_path)
diff --git a/tests/transformation/test_qonnx_to_finn.py b/tests/transformation/test_qonnx_to_finn.py
new file mode 100644
index 0000000000..df7d63e3d2
--- /dev/null
+++ b/tests/transformation/test_qonnx_to_finn.py
@@ -0,0 +1,175 @@
+# Copyright (c) 2021, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+import pkg_resources as pk
+
+import pytest
+
+import brevitas.export.onnx.generic as b_onnx
+import brevitas.onnx as bo
+import numpy as np
+import onnx
+import onnx.numpy_helper as nph
+import torch
+from pkgutil import get_data
+from qonnx.util.cleanup import cleanup
+from tempfile import TemporaryDirectory
+
+import finn.core.onnx_exec as oxe
+from finn.core.modelwrapper import ModelWrapper
+from finn.transformation.fold_constants import FoldConstants
+from finn.transformation.general import GiveUniqueNodeNames, RemoveStaticGraphInputs
+from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
+from finn.util.test import get_test_model_trained
+
+
+def get_brev_model_and_sample_inputs(model_name, wbits, abits):
+    if "FC" in model_name:
+        in_shape = (1, 1, 28, 28)
+        raw_i = get_data("finn.data", "onnx/mnist-conv/test_data_set_0/input_0.pb")
+        input_tensor = onnx.load_tensor_from_string(raw_i)
+        input_tensor = nph.to_array(input_tensor)
+        brev_model = get_test_model_trained(model_name, wbits, abits)
+    elif model_name == "CNV":
+        in_shape = (1, 3, 32, 32)
+        fn = pk.resource_filename(
+            "finn.qnn-data", "cifar10/cifar10-test-data-class3.npz"
+        )
+        input_tensor = np.load(fn)["arr_0"].astype(np.float32)
+        input_tensor = input_tensor / 255
+        brev_model = get_test_model_trained(model_name, wbits, abits)
+    elif model_name == "mobilenet":
+        in_shape = (1, 3, 224, 224)
+        np.random.seed(42)
+        input_tensor = np.random.normal(size=in_shape).astype(dtype=np.float32)
+        brev_model = get_test_model_trained(model_name, 4, 4)
+    else:
+        raise RuntimeError(f"The model with the name {model_name} is not supported.")
+
+    return brev_model, in_shape, input_tensor
+
+
+def analysis_testing_for_no_quant_nodes(model):
+    # Test that all Quant nodes have been converted to MultiThreshold nodes
+    # or folded into tensor initializers.
+
+    for op_type in ["BinaryQuant", "Quant", "Trunc"]:
+        q_count = len(model.get_nodes_by_op_type(op_type))
+        if q_count > 0:
+            raise ValueError(f"There should be no {op_type} nodes left in the graph.")
+
+    return dict()
+
+
+# This test currently takes about 4 min and 20 seconds
+@pytest.mark.parametrize("abits", [1, 2])
+@pytest.mark.parametrize("wbits", [1, 2])
+@pytest.mark.parametrize("model_name", ["TFC", "SFC", "LFC", "CNV", "mobilenet"])
+def test_QONNX_to_FINN(model_name, wbits, abits):
+    if wbits > abits:
+        pytest.skip("No wbits > abits cases at the moment")
+    if model_name == "LFC" and wbits == 2 and abits == 2:
+        pytest.skip("No LFC-w2a2 present at the moment")
+    if model_name == "mobilenet" and (wbits != 2 or abits != 2):
+        pytest.skip("Mobilenet only runs at W2A2, though it's technically W4A4.")
+
+    # Get test config and model
+    ATOL = 1e-7
+    brev_model, in_shape, input_tensor = get_brev_model_and_sample_inputs(
+        model_name, wbits, abits
+    )
+    temp_dir = TemporaryDirectory()
+    qonnx_base_path = temp_dir.name + "/qonnx_{}.onnx"
+    finn_base_path = temp_dir.name + "/finn_{}.onnx"
+
+    # Get Brevitas output
+    torch_input_tensor = torch.from_numpy(input_tensor).float()
+    brev_output = brev_model.forward(torch_input_tensor).detach().numpy()
+
+    # Get "clean" FINN model and it's output
+    _ = bo.export_finn_onnx(brev_model, in_shape, finn_base_path.format("raw"))
+    model = ModelWrapper(finn_base_path.format("raw"))
+    model = model.transform(GiveUniqueNodeNames())
+    model = model.transform(InferShapes())
+    model = model.transform(FoldConstants())
+    model = model.transform(RemoveStaticGraphInputs())
+    model.save(finn_base_path.format("clean"))
+
+    model = ModelWrapper(finn_base_path.format("clean"))
+    input_dict = {model.graph.input[0].name: input_tensor}
+    output_dict = oxe.execute_onnx(model, input_dict, False)
+    finn_export_output = output_dict[model.graph.output[0].name]
+    # This test always fails on MobileNet for some reason
+    if model_name != "mobilenet":
+        assert np.isclose(
+            brev_output, finn_export_output, atol=ATOL
+        ).all(), "The output of the Brevitas model and the FINN model should match."
+
+    # Get the equivalent QONNX model
+    b_onnx.function.DOMAIN_STRING = "finn.custom_op.general"
+    _ = b_onnx.manager.BrevitasONNXManager.export(
+        brev_model, in_shape, qonnx_base_path.format("raw")
+    )
+    cleanup(qonnx_base_path.format("raw"), out_file=qonnx_base_path.format("clean"))
+
+    # Compare output
+    model = ModelWrapper(qonnx_base_path.format("clean"))
+    input_dict = {model.graph.input[0].name: input_tensor}
+    output_dict = oxe.execute_onnx(model, input_dict, False)
+    qonnx_export_output = output_dict[model.graph.output[0].name]
+    assert np.isclose(
+        brev_output, qonnx_export_output, atol=ATOL
+    ).all(), "The output of the Brevitas model and the QONNX model should match."
+    # This test always fails on MobileNet for some reason
+    if model_name != "mobilenet":
+        assert np.isclose(
+            qonnx_export_output, finn_export_output, atol=ATOL
+        ).all(), "The output of the FINN model and the QONNX model should match."
+
+    # Run QONNX to FINN conversion
+    model = ModelWrapper(qonnx_base_path.format("clean"))
+    model = model.transform(ConvertQONNXtoFINN())
+    model.save(qonnx_base_path.format("whole_trafo"))
+
+    # Compare output
+    model = ModelWrapper(qonnx_base_path.format("whole_trafo"))
+    input_dict = {model.graph.input[0].name: input_tensor}
+    output_dict = oxe.execute_onnx(model, input_dict, False)
+    test_output = output_dict[model.graph.output[0].name]
+    assert np.isclose(test_output, finn_export_output, atol=ATOL).all(), (
+        "The output of the FINN model "
+        "and the QONNX -> FINN converted model should match."
+    )
+
+    # Run analysis passes on the converted model
+    model = ModelWrapper(qonnx_base_path.format("whole_trafo"))
+    _ = model.analysis(analysis_testing_for_no_quant_nodes)
+
+    temp_dir.cleanup()
diff --git a/tests/util/test_build_dataflow.py b/tests/util/test_build_dataflow.py
index c8e886ddb0..de1b3abcc3 100644
--- a/tests/util/test_build_dataflow.py
+++ b/tests/util/test_build_dataflow.py
@@ -26,12 +26,15 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import pytest
 import pkg_resources as pk
+
+import pytest
+
+import os
 from shutil import copytree
-from finn.util.basic import make_build_dir
+
 from finn.builder.build_dataflow import build_dataflow_directory
-import os
+from finn.util.basic import make_build_dir
 
 
 @pytest.mark.slow
@@ -45,11 +48,14 @@ def test_build_dataflow_directory():
     # check the generated files
     output_dir = target_dir + "/output_tfc_w1a1_Pynq-Z1"
     assert os.path.isfile(output_dir + "/time_per_step.json")
+    assert os.path.isfile(output_dir + "/auto_folding_config.json")
     assert os.path.isfile(output_dir + "/final_hw_config.json")
     assert os.path.isfile(output_dir + "/stitched_ip/ip/component.xml")
     assert os.path.isfile(output_dir + "/driver/driver.py")
     assert os.path.isfile(output_dir + "/report/estimate_layer_cycles.json")
     assert os.path.isfile(output_dir + "/report/estimate_layer_resources.json")
+    assert os.path.isfile(output_dir + "/report/verify_rtlsim.vcd")
+    assert os.path.isfile(output_dir + "/report/rtlsim_perf_batch_1.vcd")
     assert os.path.isfile(
         output_dir + "/report/estimate_layer_config_alternatives.json"
     )
diff --git a/tests/util/test_create.py b/tests/util/test_create.py
index 42a288b74e..c11e60175e 100644
--- a/tests/util/test_create.py
+++ b/tests/util/test_create.py
@@ -32,7 +32,9 @@
 from finn.core.datatype import DataType
 
 
-@pytest.mark.parametrize("bitwidth", [DataType.BIPOLAR, DataType.INT2, DataType.INT4])
+@pytest.mark.parametrize(
+    "bitwidth", [DataType["BIPOLAR"], DataType["INT2"], DataType["INT4"]]
+)
 def test_hls_random_mlp_maker(bitwidth):
     w = bitwidth
     a = bitwidth
@@ -42,7 +44,7 @@ def test_hls_random_mlp_maker(bitwidth):
             "mh": 100,
             "simd": 185,
             "pe": 100,
-            "idt": DataType.BIPOLAR,
+            "idt": DataType["BIPOLAR"],
             "wdt": w,
             "act": a,
         },
@@ -56,7 +58,7 @@ def test_hls_random_mlp_maker(bitwidth):
             "pe": 1,
             "idt": a,
             "wdt": w,
-            "act": DataType.BIPOLAR,
+            "act": DataType["BIPOLAR"],
         },
     ]
 
diff --git a/tests/util/test_data_packing_hls.py b/tests/util/test_data_packing_hls.py
index a926bc4068..7113a3051b 100644
--- a/tests/util/test_data_packing_hls.py
+++ b/tests/util/test_data_packing_hls.py
@@ -26,20 +26,28 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import os
-import shutil
-import subprocess
-
 import pytest
 
 import numpy as np
+import os
+import shutil
+import subprocess
 
 import finn.util.basic as cutil
 from finn.core.datatype import DataType
 from finn.util.data_packing import numpy_to_hls_code
 
 
-@pytest.mark.parametrize("dtype", [DataType.BINARY, DataType.INT2, DataType.INT32])
+@pytest.mark.parametrize(
+    "dtype",
+    [
+        DataType["BINARY"],
+        DataType["INT2"],
+        DataType["INT32"],
+        DataType["FIXED<9,6>"],
+        DataType["FLOAT32"],
+    ],
+)
 @pytest.mark.parametrize("test_shape", [(1, 2, 4), (1, 1, 64), (2, 64)])
 @pytest.mark.vivado
 def test_npy2apintstream(test_shape, dtype):
@@ -120,17 +128,17 @@ def remove_all_whitespace(s):
         return "".join(s.split())
 
     A = [[1, 1, 1, 0], [0, 1, 1, 0]]
-    ret = numpy_to_hls_code(A, DataType.BINARY, "test", True)
+    ret = numpy_to_hls_code(A, DataType["BINARY"], "test", True)
     eA = """ap_uint<4> test[2] =
     {ap_uint<4>("0xe", 16), ap_uint<4>("0x6", 16)};"""
     assert remove_all_whitespace(ret) == remove_all_whitespace(eA)
     B = [[[3, 3], [3, 3]], [[1, 3], [3, 1]]]
-    ret = numpy_to_hls_code(B, DataType.UINT2, "test", True)
+    ret = numpy_to_hls_code(B, DataType["UINT2"], "test", True)
     eB = """ap_uint<4> test[2][2] =
     {{ap_uint<4>("0xf", 16), ap_uint<4>("0xf", 16)},
      {ap_uint<4>("0x7", 16), ap_uint<4>("0xd", 16)}};"""
     assert remove_all_whitespace(ret) == remove_all_whitespace(eB)
-    ret = numpy_to_hls_code(B, DataType.UINT2, "test", True, True)
+    ret = numpy_to_hls_code(B, DataType["UINT2"], "test", True, True)
     eB = """{{ap_uint<4>("0xf", 16), ap_uint<4>("0xf", 16)},
      {ap_uint<4>("0x7", 16), ap_uint<4>("0xd", 16)}};"""
     assert remove_all_whitespace(ret) == remove_all_whitespace(eB)