diff --git a/lib/Targets/AIETargetHSA.cpp b/lib/Targets/AIETargetHSA.cpp
index a79c01cc63..36701fc96d 100644
--- a/lib/Targets/AIETargetHSA.cpp
+++ b/lib/Targets/AIETargetHSA.cpp
@@ -74,13 +74,18 @@ mlir::LogicalResult AIETranslateToHSA(ModuleOp module, raw_ostream &output) {
   }
 
   // Getting the sequence function op which contains the instructions
-  mlir::func::FuncOp funcOp;
+  mlir::func::FuncOp funcOp = NULL;
   for (auto op : targetOp.getOps<mlir::func::FuncOp>()) {
     if (op.getName().str().compare("sequence") == 0) {
       funcOp = op;
     }
   }
 
+  // If no funcOp then just return
+  if (funcOp == NULL) {
+    return success();
+  }
+
   collectTiles(targetOp, tiles);
   collectBuffers(targetOp, buffers);
 
diff --git a/python/ILP_solution.py b/python/ILP_solution.py
index aed6f33506..edd89370e9 100644
--- a/python/ILP_solution.py
+++ b/python/ILP_solution.py
@@ -4,289 +4,316 @@
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 #
 # (c) Copyright 2021 Xilinx Inc.
-#===============================================================================#
+# ===============================================================================#
 # This file implements an experimental ILP solver for
 # automatic tiling space exploration.
-#===============================================================================#
+# ===============================================================================#
 
-import gurobipy as gp 
-from gurobipy import GRB 
+import gurobipy as gp
+from gurobipy import GRB
 import numpy as np
 import time
 
-#-------------------------------------------------------------------------------#
+# -------------------------------------------------------------------------------#
 # Algorithmic parameters
-#-------------------------------------------------------------------------------#
+# -------------------------------------------------------------------------------#
 
 # The list of loop bounds
-loop_bounds=[64, 64, 64]
+loop_bounds = [64, 64, 64]
 
 # The constant matrix that reflects how data tensors are related with
 # loop induction variables
 # +--------------------+
-# |     | L0 | L1 | L2 | 
+# |     | L0 | L1 | L2 |
 # +--------------------+
-# | in1 | 1  | 0  | 1  | 
+# | in1 | 1  | 0  | 1  |
 # +--------------------+
-# | in2 | 0  | 1  | 1  | 
+# | in2 | 0  | 1  | 1  |
 # +--------------------+
-# | out | 1  | 1  | 0  | 
+# | out | 1  | 1  | 0  |
 # +--------------------+
-tensor_IV = [[1,0,1],[0,1,1],[1,1,0]]
+tensor_IV = [[1, 0, 1], [0, 1, 1], [1, 1, 0]]
 
-#-------------------------------------------------------------------------------#
+# -------------------------------------------------------------------------------#
 # Architectural parameters
-#-------------------------------------------------------------------------------#
+# -------------------------------------------------------------------------------#
 
-# In AIE, we typically have three architectural (memory/compute) hierarchy levels. 
-# L3->L2 copies data from L3 memory to L2 shared cache. L2->L1 copies data from 
-# L2 cache to L1 private cache. L2->L1 also indicates the transition from 
-# temporal to spatial execution. L1 indicates the transition from spatial 
+# In AIE, we typically have three architectural (memory/compute) hierarchy levels.
+# L3->L2 copies data from L3 memory to L2 shared cache. L2->L1 copies data from
+# L2 cache to L1 private cache. L2->L1 also indicates the transition from
+# temporal to spatial execution. L1 indicates the transition from spatial
 # to temporal task on each compute core.
 mem_levels = 3
 
 # memory capacity for L3, L2, L1
-mem_capacity=[["L3", 2**20], ["L2", 2**16], ["L1", 2**11]]
+mem_capacity = [["L3", 2**20], ["L2", 2**16], ["L1", 2**11]]
 
-# The ratios according to which the memory spaces are allocated for each data 
-# tensor, ignoring the L3 level. For example, [0.3, 0.3, 0.4] means 30% of 
-# memory space is reserved for two input tensors, and 40% of memory space is 
+# The ratios according to which the memory spaces are allocated for each data
+# tensor, ignoring the L3 level. For example, [0.3, 0.3, 0.4] means 30% of
+# memory space is reserved for two input tensors, and 40% of memory space is
 # estimated to store the output tensor.
 # L2, L1: [in1, in2, out]
-mem_ratios=[[0.3, 0.3, 0.4], [0.3, 0.3, 0.4]]
+mem_ratios = [[0.3, 0.3, 0.4], [0.3, 0.3, 0.4]]
 
 # memory bandwidth for L3, L2, L1
 # recalculated as log(data_size/#cycles)
-mem_bandwidth=[["L3-L2", 2**30], ["L2-L1", 2*2**30]]
+mem_bandwidth = [["L3-L2", 2**30], ["L2-L1", 2 * 2**30]]
 
 # frequency
-freq = 600*10**6
+freq = 600 * 10**6
 
 # compute cores of which L2 is in charge
-spatial_dim=[8,8]
+spatial_dim = [8, 8]
 
-#-------------------------------------------------------------------------------#
+# -------------------------------------------------------------------------------#
 # ILP formulation
-#-------------------------------------------------------------------------------#
+# -------------------------------------------------------------------------------#
+
 
 def prime_factorize(loop_bounds):
-	""" Factorize the original loops bounds into a list of prime factors.
-	Input: a list of loop bounds
-	Output: a super-list of prime factor lists
-	"""
-	prime_factor_list = []
-	for loop_bound in loop_bounds:
-		prime_factors = []
-		while loop_bound%2 == 0:
-			prime_factors.append(2)
-			loop_bound /= 2
-		if loop_bound > 3:
-			for i in range(3, loop_bound, 2):
-				while loop_bound%i == 0:
-					prime_factors.append(i)
-					loop_bound /= i 
-		if loop_bound > 2:
-			prime_factors.append(loop_bound)
-		prime_factor_list.append(prime_factors)
-	return prime_factor_list
-
-def ILP_formulation(util_factor = 0.5, compute_factor = 1, traffic_factor = 0.2):
-	# Create a new model
-	m = gp.Model("loopnest")
-	
-	# Initialize inputs 
-	prime_factor_list = prime_factorize(loop_bounds)
-	prime_factor_len = sum([len(x) for x in prime_factor_list])
-
-	# Create decision variables
-	## Binary decision variables - from factorized subloops to a specific 
-	## loop order
-	x = {}
-	for f1, pf_list in enumerate(prime_factor_list):
-		for f2, prime_factors in enumerate(pf_list):
-			for p in range(prime_factor_len):
-				var_name = "X_{}_{}_{}".format(f1, f2, p)
-				x[(f1, f2, p)] = m.addVar(vtype=GRB.BINARY, name=var_name)
-	
-	## Integer decision variables
-	## Y[(0, p)]: L3 mem, Y[(1, p)]: L2 mem, Y[(2, p)]: L1 mem
-	y = {}
-	for l in range(mem_levels):
-		for p in range(prime_factor_len):
-			var_name = "Y_{}_{}".format(l, p)
-			y[(l, p)] = m.addVar(lb=0, ub=1, vtype=GRB.INTEGER, name=var_name)
-
-	# Add Constraints
-	## One prime factor subloop has one assignment
-	for f1, pf_list in enumerate(prime_factor_list):
-		for f2, prime_factors in enumerate(pf_list):
-			col_sum = 0
-			for p in range(prime_factor_len):
-				col_sum += x[(f1, f2, p)]
-			m.addConstr(col_sum == 1, "col_sum_{}_{}".format(f1, f2))
-	
-	## One ordering slot has only one subloop
-	for p in range(prime_factor_len):
-		row_sum = 0
-		for f1, pf_list in enumerate(prime_factor_list):
-			for f2, prime_factors in enumerate(pf_list):
-				row_sum += x[(f1, f2, p)]
-		m.addConstr(row_sum == 1, "row_sum_{}".format(p))
-	
-	## monotone non-decreasing
-	for l in range(mem_levels):
-		for p in range(prime_factor_len - 1):
-			m.addConstr(y[(l, p)] <= y[(l, p+1)], "y_leq_{}_{}".format(l, p))
-	
-	## L3 region > L2 region > L1 region
-	L2_region = 0
-	L1_region = 0
-	for p in range(prime_factor_len):
-		L2_region += y[(0, p)] - y[(1, p)]
-		L1_region += y[(1, p)] - y[(2, p)]
-	m.addConstr(L2_region >= 1, "y_l2_region")
-	m.addConstr(L1_region >= 2, "y_l1_region")
-
-	## memory capacity
-	### L2: accommodates all tensors within L2 region
-	L2_util={}
-	for v, iv_map in enumerate(tensor_IV):
-		L2_util[v] = 0
-		for f1, pf_list in enumerate(prime_factor_list):
-			for f2, prime_factors in enumerate(pf_list):
-				for p in range(prime_factor_len):
-					L2_util[v] += tensor_IV[v][f1] * np.log2(prime_factor_list[f1][f2]) * \
-									x[(f1, f2, p)] * y[(0, p)] 
-		v_available = mem_capacity[1][1] * mem_ratios[0][v]
-		m.addConstr(L2_util[v] <= np.log2(v_available), "mem_capacity_L2_{}".format(v))
-
-	### L1: accommodates all tensors mapped to temporal dimension within L1 region
-	L1_util={}
-	for v, iv_map in enumerate(tensor_IV):
-		L1_util[v] = 0
-		for f1, pf_list in enumerate(prime_factor_list):
-			for f2, prime_factors in enumerate(pf_list):
-				for p in range(prime_factor_len):
-					L1_util[v] += tensor_IV[v][f1] * np.log2(prime_factor_list[f1][f2]) * \
-									x[(f1, f2, p)] * y[(2, p)]
-		v_available = mem_capacity[2][1] * mem_ratios[1][v]
-		m.addConstr(L1_util[v] <= np.log2(v_available), "mem_capacity_L1_{}".format(v))
-
-	## memory bandwidth - the amount of data copy size in a unit of time
-	## #Data_size_that_requires_moving / #compute_cycles
-	### L3->L2 traffic
-	L3_L2_tensor_traffic = {}
-	data_L2 = L2_util
-	cycles_L2 = 0
-	for f1, pf_list in enumerate(prime_factor_list):
-		for f2, prime_factors in enumerate(pf_list):
-			for p in range(prime_factor_len):
-				cycles_L2 += np.log2(prime_factor_list[f1][f2]) * x[(f1, f2, p)] *\
-					(y[(0, p)] - y[(1, p)] + y[(2, p)])
-	for v, iv_map in enumerate(tensor_IV):
-		bw_log = np.log2(mem_bandwidth[0][1]) + np.log2(mem_ratios[0][v]) - np.log2(freq)
-		L3_L2_tensor_traffic[v] = data_L2[v] - cycles_L2
-		m.addConstr(L3_L2_tensor_traffic[v] <= bw_log, "L2_bandwidth_{}".format(v))
-	L3_L2_traffic = sum(data_L2.values()) - cycles_L2
-	
-	### L2->L1 traffic
-	L2_L1_tensor_traffic = {}
-	data_L1 = {}
-	cycles_L1 = 0
-	for v, iv_map in enumerate(tensor_IV):
-		data_L1[v] = 0
-		for f1, pf_list in enumerate(prime_factor_list):
-			for f2, prime_factors in enumerate(pf_list):
-				for p in range(prime_factor_len):
-					data_L1[v] += tensor_IV[v][f1] * np.log2(prime_factor_list[f1][f2]) * \
-						x[(f1, f2, p)] * y[(1, p)]
-	for f1, pf_list in enumerate(prime_factor_list):
-		for f2, prime_factors in enumerate(pf_list):
-			for p in range(prime_factor_len):
-				cycles_L1 += np.log2(prime_factor_list[f1][f2]) * x[(f1, f2, p)] * y[(2, p)]
-	for v, iv_map in enumerate(tensor_IV):
-		bw_log = np.log2(mem_bandwidth[1][1]) + np.log2(mem_ratios[1][v]) - np.log2(freq)
-		L2_L1_tensor_traffic[v] = data_L1[v] - cycles_L1
-		m.addConstr(L2_L1_tensor_traffic[v] <= bw_log, "L1_bandwidth_{}".format(v))
-	L2_L1_traffic = sum(data_L1.values()) - cycles_L1
-
-
-	## spatial resource limitation
-	spatial_tile = 0
-	for f1, pf_list in enumerate(prime_factor_list):
-		for f2, prime_factors in enumerate(pf_list):
-			for p in range(prime_factor_len):
-				spatial_tile += np.log2(prime_factor_list[f1][f2]) * x[(f1, f2, p)] *\
-					(y[(1, p)] - y[(2, p)])
-	m.addConstr(spatial_tile <= sum(np.log2(spatial_dim)), "spatial_tile_limit")
-	
-	# Set objective function
-	## utilization
-	total_util = 0
-	for v, iv_map in enumerate(tensor_IV):
-		total_util += L2_util[v] + L1_util[v]
-	total_util += spatial_tile
-	
-	## compute latency
-	## The product of all dimensions that map to temporal 
-	total_cycles = 0
-	for f1, pf_list in enumerate(prime_factor_list):
-		for f2, prime_factors in enumerate(pf_list):
-			for p in range(prime_factor_len):
-				total_cycles += np.log2(prime_factor_list[f1][f2]) * x[(f1, f2, p)] *\
-					(1 - y[(1, p)] + y[(2, p)])
-	
-	
-	## traffic
-	total_traffic = L3_L2_traffic + L2_L1_traffic
-
-	loopnest_obj = - util_factor * total_util + \
-		compute_factor * total_cycles 
-
-	m.setObjective(loopnest_obj, GRB.MINIMIZE)
-
-	begin_time = time.time()
-	m.optimize()
-	end_time = time.time()
-	runtime = end_time - begin_time
+    """Factorize the original loops bounds into a list of prime factors.
+    Input: a list of loop bounds
+    Output: a super-list of prime factor lists
+    """
+    prime_factor_list = []
+    for loop_bound in loop_bounds:
+        prime_factors = []
+        while loop_bound % 2 == 0:
+            prime_factors.append(2)
+            loop_bound /= 2
+        if loop_bound > 3:
+            for i in range(3, loop_bound, 2):
+                while loop_bound % i == 0:
+                    prime_factors.append(i)
+                    loop_bound /= i
+        if loop_bound > 2:
+            prime_factors.append(loop_bound)
+        prime_factor_list.append(prime_factors)
+    return prime_factor_list
+
+
+def ILP_formulation(util_factor=0.5, compute_factor=1, traffic_factor=0.2):
+    # Create a new model
+    m = gp.Model("loopnest")
+
+    # Initialize inputs
+    prime_factor_list = prime_factorize(loop_bounds)
+    prime_factor_len = sum([len(x) for x in prime_factor_list])
+
+    # Create decision variables
+    ## Binary decision variables - from factorized subloops to a specific
+    ## loop order
+    x = {}
+    for f1, pf_list in enumerate(prime_factor_list):
+        for f2, prime_factors in enumerate(pf_list):
+            for p in range(prime_factor_len):
+                var_name = "X_{}_{}_{}".format(f1, f2, p)
+                x[(f1, f2, p)] = m.addVar(vtype=GRB.BINARY, name=var_name)
+
+    ## Integer decision variables
+    ## Y[(0, p)]: L3 mem, Y[(1, p)]: L2 mem, Y[(2, p)]: L1 mem
+    y = {}
+    for l in range(mem_levels):
+        for p in range(prime_factor_len):
+            var_name = "Y_{}_{}".format(l, p)
+            y[(l, p)] = m.addVar(lb=0, ub=1, vtype=GRB.INTEGER, name=var_name)
+
+    # Add Constraints
+    ## One prime factor subloop has one assignment
+    for f1, pf_list in enumerate(prime_factor_list):
+        for f2, prime_factors in enumerate(pf_list):
+            col_sum = 0
+            for p in range(prime_factor_len):
+                col_sum += x[(f1, f2, p)]
+            m.addConstr(col_sum == 1, "col_sum_{}_{}".format(f1, f2))
+
+    ## One ordering slot has only one subloop
+    for p in range(prime_factor_len):
+        row_sum = 0
+        for f1, pf_list in enumerate(prime_factor_list):
+            for f2, prime_factors in enumerate(pf_list):
+                row_sum += x[(f1, f2, p)]
+        m.addConstr(row_sum == 1, "row_sum_{}".format(p))
+
+    ## monotone non-decreasing
+    for l in range(mem_levels):
+        for p in range(prime_factor_len - 1):
+            m.addConstr(y[(l, p)] <= y[(l, p + 1)], "y_leq_{}_{}".format(l, p))
+
+    ## L3 region > L2 region > L1 region
+    L2_region = 0
+    L1_region = 0
+    for p in range(prime_factor_len):
+        L2_region += y[(0, p)] - y[(1, p)]
+        L1_region += y[(1, p)] - y[(2, p)]
+    m.addConstr(L2_region >= 1, "y_l2_region")
+    m.addConstr(L1_region >= 2, "y_l1_region")
+
+    ## memory capacity
+    ### L2: accommodates all tensors within L2 region
+    L2_util = {}
+    for v, iv_map in enumerate(tensor_IV):
+        L2_util[v] = 0
+        for f1, pf_list in enumerate(prime_factor_list):
+            for f2, prime_factors in enumerate(pf_list):
+                for p in range(prime_factor_len):
+                    L2_util[v] += (
+                        tensor_IV[v][f1]
+                        * np.log2(prime_factor_list[f1][f2])
+                        * x[(f1, f2, p)]
+                        * y[(0, p)]
+                    )
+        v_available = mem_capacity[1][1] * mem_ratios[0][v]
+        m.addConstr(L2_util[v] <= np.log2(v_available), "mem_capacity_L2_{}".format(v))
+
+    ### L1: accommodates all tensors mapped to temporal dimension within L1 region
+    L1_util = {}
+    for v, iv_map in enumerate(tensor_IV):
+        L1_util[v] = 0
+        for f1, pf_list in enumerate(prime_factor_list):
+            for f2, prime_factors in enumerate(pf_list):
+                for p in range(prime_factor_len):
+                    L1_util[v] += (
+                        tensor_IV[v][f1]
+                        * np.log2(prime_factor_list[f1][f2])
+                        * x[(f1, f2, p)]
+                        * y[(2, p)]
+                    )
+        v_available = mem_capacity[2][1] * mem_ratios[1][v]
+        m.addConstr(L1_util[v] <= np.log2(v_available), "mem_capacity_L1_{}".format(v))
+
+    ## memory bandwidth - the amount of data copy size in a unit of time
+    ## #Data_size_that_requires_moving / #compute_cycles
+    ### L3->L2 traffic
+    L3_L2_tensor_traffic = {}
+    data_L2 = L2_util
+    cycles_L2 = 0
+    for f1, pf_list in enumerate(prime_factor_list):
+        for f2, prime_factors in enumerate(pf_list):
+            for p in range(prime_factor_len):
+                cycles_L2 += (
+                    np.log2(prime_factor_list[f1][f2])
+                    * x[(f1, f2, p)]
+                    * (y[(0, p)] - y[(1, p)] + y[(2, p)])
+                )
+    for v, iv_map in enumerate(tensor_IV):
+        bw_log = (
+            np.log2(mem_bandwidth[0][1]) + np.log2(mem_ratios[0][v]) - np.log2(freq)
+        )
+        L3_L2_tensor_traffic[v] = data_L2[v] - cycles_L2
+        m.addConstr(L3_L2_tensor_traffic[v] <= bw_log, "L2_bandwidth_{}".format(v))
+    L3_L2_traffic = sum(data_L2.values()) - cycles_L2
+
+    ### L2->L1 traffic
+    L2_L1_tensor_traffic = {}
+    data_L1 = {}
+    cycles_L1 = 0
+    for v, iv_map in enumerate(tensor_IV):
+        data_L1[v] = 0
+        for f1, pf_list in enumerate(prime_factor_list):
+            for f2, prime_factors in enumerate(pf_list):
+                for p in range(prime_factor_len):
+                    data_L1[v] += (
+                        tensor_IV[v][f1]
+                        * np.log2(prime_factor_list[f1][f2])
+                        * x[(f1, f2, p)]
+                        * y[(1, p)]
+                    )
+    for f1, pf_list in enumerate(prime_factor_list):
+        for f2, prime_factors in enumerate(pf_list):
+            for p in range(prime_factor_len):
+                cycles_L1 += (
+                    np.log2(prime_factor_list[f1][f2]) * x[(f1, f2, p)] * y[(2, p)]
+                )
+    for v, iv_map in enumerate(tensor_IV):
+        bw_log = (
+            np.log2(mem_bandwidth[1][1]) + np.log2(mem_ratios[1][v]) - np.log2(freq)
+        )
+        L2_L1_tensor_traffic[v] = data_L1[v] - cycles_L1
+        m.addConstr(L2_L1_tensor_traffic[v] <= bw_log, "L1_bandwidth_{}".format(v))
+    L2_L1_traffic = sum(data_L1.values()) - cycles_L1
+
+    ## spatial resource limitation
+    spatial_tile = 0
+    for f1, pf_list in enumerate(prime_factor_list):
+        for f2, prime_factors in enumerate(pf_list):
+            for p in range(prime_factor_len):
+                spatial_tile += (
+                    np.log2(prime_factor_list[f1][f2])
+                    * x[(f1, f2, p)]
+                    * (y[(1, p)] - y[(2, p)])
+                )
+    m.addConstr(spatial_tile <= sum(np.log2(spatial_dim)), "spatial_tile_limit")
+
+    # Set objective function
+    ## utilization
+    total_util = 0
+    for v, iv_map in enumerate(tensor_IV):
+        total_util += L2_util[v] + L1_util[v]
+    total_util += spatial_tile
+
+    ## compute latency
+    ## The product of all dimensions that map to temporal
+    total_cycles = 0
+    for f1, pf_list in enumerate(prime_factor_list):
+        for f2, prime_factors in enumerate(pf_list):
+            for p in range(prime_factor_len):
+                total_cycles += (
+                    np.log2(prime_factor_list[f1][f2])
+                    * x[(f1, f2, p)]
+                    * (1 - y[(1, p)] + y[(2, p)])
+                )
+
+    ## traffic
+    total_traffic = L3_L2_traffic + L2_L1_traffic
+
+    loopnest_obj = -util_factor * total_util + compute_factor * total_cycles
+
+    m.setObjective(loopnest_obj, GRB.MINIMIZE)
+
+    begin_time = time.time()
+    m.optimize()
+    end_time = time.time()
+    runtime = end_time - begin_time
 
     # Logging to a file
-	m.write("debug.lp")
-
-	# print results
-	print("---runtime--- ", runtime)
-	m.printAttr('X')
-
-	## L2 utilization
-	print("---L2_utilization---")
-	for key, val in L2_util.items():
-		print(key, "-", val.getValue())
-
-	## L1 utilization
-	print("---L1_utilization---")
-	for key, val in L1_util.items():
-		print(key, "-", val.getValue())
-	
-	## spatial tile limit
-	print("---spatial_tile_limit---")
-	print(spatial_tile.getValue())
-
-	## objective - utilization
-	print("objective: total_utilization = ", total_util.getValue())
-	print("objective: total_cycles = ", total_cycles.getValue())
-	print("objective: L3_L2_traffic")
-	for key, val in L3_L2_tensor_traffic.items():
-		print(key, "-", val.getValue())
-	print("objective: L2_L1_traffic")
-	for key, val in L2_L1_tensor_traffic.items():
-		print(key, "-", val.getValue())
-
-if __name__=="__main__":
-	try:
-		ILP_formulation()
-
-	except gp.GurobiError as e:
-		print('Error code ' + str(e.errno) + ": " + str(e))
-
-	except AttributeError:
-		print('Encountered an attribute error')
+    m.write("debug.lp")
+
+    # print results
+    print("---runtime--- ", runtime)
+    m.printAttr("X")
+
+    ## L2 utilization
+    print("---L2_utilization---")
+    for key, val in L2_util.items():
+        print(key, "-", val.getValue())
+
+    ## L1 utilization
+    print("---L1_utilization---")
+    for key, val in L1_util.items():
+        print(key, "-", val.getValue())
+
+    ## spatial tile limit
+    print("---spatial_tile_limit---")
+    print(spatial_tile.getValue())
+
+    ## objective - utilization
+    print("objective: total_utilization = ", total_util.getValue())
+    print("objective: total_cycles = ", total_cycles.getValue())
+    print("objective: L3_L2_traffic")
+    for key, val in L3_L2_tensor_traffic.items():
+        print(key, "-", val.getValue())
+    print("objective: L2_L1_traffic")
+    for key, val in L2_L1_tensor_traffic.items():
+        print(key, "-", val.getValue())
+
+
+if __name__ == "__main__":
+    try:
+        ILP_formulation()
+
+    except gp.GurobiError as e:
+        print("Error code " + str(e.errno) + ": " + str(e))
+
+    except AttributeError:
+        print("Encountered an attribute error")
diff --git a/python/compiler/aiecc/main.py b/python/compiler/aiecc/main.py
index e67c8c7fd7..3296704e8e 100644
--- a/python/compiler/aiecc/main.py
+++ b/python/compiler/aiecc/main.py
@@ -671,7 +671,6 @@ async def process_host_cgen(self, aie_target, file_with_addresses):
                         file_inc_cpp,
                     ],
                 )
-              
 
             cmd = ["clang++", "-std=c++11"]
             if opts.host_target:
diff --git a/reference_designs/CMakeLists.txt b/reference_designs/CMakeLists.txt
index e732b4655c..08cb8a3155 100755
--- a/reference_designs/CMakeLists.txt
+++ b/reference_designs/CMakeLists.txt
@@ -57,7 +57,12 @@ option(ENABLE_CHESS_TESTS "Enable backend tests using xchesscc" ${DEFAULT_ENABLE
 if(${CMAKE_HOST_SYSTEM_PROCESSOR} STREQUAL aarch64)
   set(DEFAULT_ENABLE_BOARD_TESTS ON)
 else()
-  set(DEFAULT_ENABLE_BOARD_TESTS OFF)
+  # If we are on a x86 and find hsa-runtiem64 run the tests on the board
+  if(hsa-runtime64_FOUND)
+    set(ENABLE_BOARD_TESTS ON)
+  else()
+    set(ENABLE_BOARD_TESTS OFF)
+  endif()
 endif()
 option(ENABLE_BOARD_TESTS "Enable board tests" ${DEFAULT_ENABLE_BOARD_TESTS})
 
@@ -144,4 +149,4 @@ add_lit_testsuite(check-reference-designs "Running the aie reference designs"
   DEPENDS ${TEST_DEPENDS}
   ARGS "-sv --timeout 600"
 )
-set_target_properties(check-reference-designs PROPERTIES FOLDER "Tutorials")
\ No newline at end of file
+set_target_properties(check-reference-designs PROPERTIES FOLDER "Tutorials")
diff --git a/reference_designs/IRON-examples/add_one_objFifo_no_mem_tile/aie2.py b/reference_designs/IRON-examples/add_one_objFifo_no_mem_tile/aie2.py
index 18ea6a5672..e39d918e67 100644
--- a/reference_designs/IRON-examples/add_one_objFifo_no_mem_tile/aie2.py
+++ b/reference_designs/IRON-examples/add_one_objFifo_no_mem_tile/aie2.py
@@ -14,18 +14,19 @@
 # Used to get command line arguments
 import sys
 
+
 def my_add_one_objFifo():
     with mlir_mod_ctx() as ctx:
 
-        if(len(sys.argv) != 3):
-          raise ValueError("[ERROR] Need 2 command line arguments (Device name, Col)")
-        
-        if sys.argv[1] == 'ipu':
-          dev = AIEDevice.ipu
-        elif sys.argv[1] == 'xcvc1902':
-          dev = AIEDevice.xcvc1902
+        if len(sys.argv) != 3:
+            raise ValueError("[ERROR] Need 2 command line arguments (Device name, Col)")
+
+        if sys.argv[1] == "ipu":
+            dev = AIEDevice.ipu
+        elif sys.argv[1] == "xcvc1902":
+            dev = AIEDevice.xcvc1902
         else:
-          raise ValueError("[ERROR] Device name {} is unknown".format(sys.argv[1]))
+            raise ValueError("[ERROR] Device name {} is unknown".format(sys.argv[1]))
 
         @device(dev)
         def device_body():
diff --git a/reference_designs/IRON-examples/add_one_objFifo_no_mem_tile/run.lit b/reference_designs/IRON-examples/add_one_objFifo_no_mem_tile/run.lit
index 49cd75e360..a429e99221 100644
--- a/reference_designs/IRON-examples/add_one_objFifo_no_mem_tile/run.lit
+++ b/reference_designs/IRON-examples/add_one_objFifo_no_mem_tile/run.lit
@@ -3,7 +3,7 @@
 //
 // REQUIRES: ryzen_ai
 //
-// RUN: %python %S/aie2.py > ./aie.mlir
+// RUN: %python %S/aie2.py ipu 0 > ./aie.mlir
 // RUN: %python aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir
 // RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem
 // RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s
diff --git a/reference_designs/IRON-examples/add_one_objFifo_no_mem_tile/run_vck5000.lit b/reference_designs/IRON-examples/add_one_objFifo_no_mem_tile/run_vck5000.lit
new file mode 100644
index 0000000000..3583a5f393
--- /dev/null
+++ b/reference_designs/IRON-examples/add_one_objFifo_no_mem_tile/run_vck5000.lit
@@ -0,0 +1,10 @@
+// (c) Copyright 2023 Advanced Micro Devices, Inc.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// REQUIRES: hsa
+//
+// RUN: %python %S/aie2.py xcvc1902 6 > ./aie.mlir
+// RUN: %python aiecc.py %VitisSysrootFlag% --host-target=%aieHostTargetTriplet% %link_against_hsa% ./aie.mlir -I%host_runtime_lib%/test_lib/include %extraAieCcFlags% %S/test_vck5000.cpp -o test.elf -L%host_runtime_lib%/test_lib/lib -ltest_lib
+// RUN: %run_on_board ./test.elf | FileCheck %s
+// CHECK: PASS!
+
diff --git a/reference_designs/IRON-examples/lit.local.cfg b/reference_designs/IRON-examples/lit.local.cfg
new file mode 100644
index 0000000000..cd7e7f2f7a
--- /dev/null
+++ b/reference_designs/IRON-examples/lit.local.cfg
@@ -0,0 +1,12 @@
+#
+# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# (c) Copyright 2023 AMD Inc.
+
+config.suffixes = ['.lit']
+config.unsupported = []
+
+if not config.has_libxaie:
+    config.unsupported = ['IRON-examples']
diff --git a/reference_designs/IRON-examples/matrix_add_one/aie2.py b/reference_designs/IRON-examples/matrix_add_one/aie2.py
index 6e74ddc765..36eb3a3d38 100644
--- a/reference_designs/IRON-examples/matrix_add_one/aie2.py
+++ b/reference_designs/IRON-examples/matrix_add_one/aie2.py
@@ -28,18 +28,19 @@
 
 objfifo_capacity = 4
 
+
 def my_matrix_add_one():
     with mlir_mod_ctx() as ctx:
 
-        if(len(sys.argv) != 3):
-          raise ValueError("[ERROR] Need 2 command line arguments (Device name, Col)")
-        
-        if sys.argv[1] == 'ipu':
-          dev = AIEDevice.ipu
-        elif sys.argv[1] == 'xcvc1902':
-          dev = AIEDevice.xcvc1902
+        if len(sys.argv) != 3:
+            raise ValueError("[ERROR] Need 2 command line arguments (Device name, Col)")
+
+        if sys.argv[1] == "ipu":
+            dev = AIEDevice.ipu
+        elif sys.argv[1] == "xcvc1902":
+            dev = AIEDevice.xcvc1902
         else:
-          raise ValueError("[ERROR] Device name {} is unknown".format(sys.argv[1]))
+            raise ValueError("[ERROR] Device name {} is unknown".format(sys.argv[1]))
 
         @device(dev)
         def device_body():
@@ -51,10 +52,14 @@ def device_body():
 
             # AIE-array data movement with object fifos
             # Input
-            of_in1 = object_fifo("in0", ShimTile, ComputeTile2, objfifo_capacity, memRef_ty)
+            of_in1 = object_fifo(
+                "in0", ShimTile, ComputeTile2, objfifo_capacity, memRef_ty
+            )
 
             # Output
-            of_out1 = object_fifo("out0", ComputeTile2, ShimTile, objfifo_capacity, memRef_ty)
+            of_out1 = object_fifo(
+                "out0", ComputeTile2, ShimTile, objfifo_capacity, memRef_ty
+            )
 
             # Set up compute tiles
 
@@ -66,10 +71,10 @@ def core_body():
                     elem_in = of_in1.acquire(ObjectFifoPort.Consume, 1)
                     elem_out = of_out1.acquire(ObjectFifoPort.Produce, 1)
                     for i in for_(TILE_SIZE):
-                      v0 = memref.load(elem_in, [i])
-                      v1 = arith.addi(v0, arith.constant(1, T.i32()))
-                      memref.store(v1, elem_out, [i])
-                      yield_([])
+                        v0 = memref.load(elem_in, [i])
+                        v1 = arith.addi(v0, arith.constant(1, T.i32()))
+                        memref.store(v1, elem_out, [i])
+                        yield_([])
                     of_in1.release(ObjectFifoPort.Consume, 1)
                     of_out1.release(ObjectFifoPort.Produce, 1)
                     yield_([])
@@ -81,10 +86,18 @@ def core_body():
             @FuncOp.from_py_func(tensor_ty, tensor_ty, tensor_ty)
             def sequence(inTensor, notUsed, outTensor):
                 ipu_dma_memcpy_nd(
-                    metadata="out0", bd_id=0, mem=outTensor, sizes=[1, 1, TILE_HEIGHT, TILE_WIDTH], strides=[1, 1, IMAGE_WIDTH]
+                    metadata="out0",
+                    bd_id=0,
+                    mem=outTensor,
+                    sizes=[1, 1, TILE_HEIGHT, TILE_WIDTH],
+                    strides=[1, 1, IMAGE_WIDTH],
                 )
                 ipu_dma_memcpy_nd(
-                    metadata="in0", bd_id=1, mem=inTensor, sizes=[1, 1, TILE_HEIGHT, TILE_WIDTH], strides=[1, 1, IMAGE_WIDTH]
+                    metadata="in0",
+                    bd_id=1,
+                    mem=inTensor,
+                    sizes=[1, 1, TILE_HEIGHT, TILE_WIDTH],
+                    strides=[1, 1, IMAGE_WIDTH],
                 )
                 ipu_sync(column=0, row=0, direction=0, channel=0)
 
diff --git a/reference_designs/IRON-examples/matrix_add_one/run.lit b/reference_designs/IRON-examples/matrix_add_one/run.lit
index 49cd75e360..a429e99221 100644
--- a/reference_designs/IRON-examples/matrix_add_one/run.lit
+++ b/reference_designs/IRON-examples/matrix_add_one/run.lit
@@ -3,7 +3,7 @@
 //
 // REQUIRES: ryzen_ai
 //
-// RUN: %python %S/aie2.py > ./aie.mlir
+// RUN: %python %S/aie2.py ipu 0 > ./aie.mlir
 // RUN: %python aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir
 // RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem
 // RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s
diff --git a/reference_designs/IRON-examples/matrix_add_one/run_vck5000.lit b/reference_designs/IRON-examples/matrix_add_one/run_vck5000.lit
new file mode 100644
index 0000000000..3583a5f393
--- /dev/null
+++ b/reference_designs/IRON-examples/matrix_add_one/run_vck5000.lit
@@ -0,0 +1,10 @@
+// (c) Copyright 2023 Advanced Micro Devices, Inc.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// REQUIRES: hsa
+//
+// RUN: %python %S/aie2.py xcvc1902 6 > ./aie.mlir
+// RUN: %python aiecc.py %VitisSysrootFlag% --host-target=%aieHostTargetTriplet% %link_against_hsa% ./aie.mlir -I%host_runtime_lib%/test_lib/include %extraAieCcFlags% %S/test_vck5000.cpp -o test.elf -L%host_runtime_lib%/test_lib/lib -ltest_lib
+// RUN: %run_on_board ./test.elf | FileCheck %s
+// CHECK: PASS!
+
diff --git a/reference_designs/IRON-examples/matrix_multiplication_scalar/aie2.py b/reference_designs/IRON-examples/matrix_multiplication_scalar/aie2.py
index 6dc6a4c78e..7aacb0ab10 100644
--- a/reference_designs/IRON-examples/matrix_multiplication_scalar/aie2.py
+++ b/reference_designs/IRON-examples/matrix_multiplication_scalar/aie2.py
@@ -12,9 +12,9 @@
 from aie.extras.context import mlir_mod_ctx
 
 # Size of the matrices
-M = 4 
-N = 4 
-K = 4 
+M = 4
+N = 4
+K = 4
 
 A_SIZE = M * K
 B_SIZE = K * N
@@ -22,12 +22,13 @@
 
 objfifo_capacity = 4
 
+
 def my_matrix_multiplication_scalar():
     with mlir_mod_ctx() as ctx:
 
         @device(AIEDevice.xcvc1902)
         def device_body():
-            #memRef_ty = T.memref(A_SIZE, T.i32())
+            # memRef_ty = T.memref(A_SIZE, T.i32())
             memRef_ty = T.memref(M, N, T.i32())
 
             # Tile declarations
@@ -36,11 +37,17 @@ def device_body():
 
             # AIE-array data movement with object fifos
             # Input
-            of_in0 = object_fifo("in0", ShimTile, ComputeTile2, objfifo_capacity, memRef_ty)
-            of_in1 = object_fifo("in1", ShimTile, ComputeTile2, objfifo_capacity, memRef_ty)
+            of_in0 = object_fifo(
+                "in0", ShimTile, ComputeTile2, objfifo_capacity, memRef_ty
+            )
+            of_in1 = object_fifo(
+                "in1", ShimTile, ComputeTile2, objfifo_capacity, memRef_ty
+            )
 
             # Output
-            of_out0 = object_fifo("out0", ComputeTile2, ShimTile, objfifo_capacity, memRef_ty)
+            of_out0 = object_fifo(
+                "out0", ComputeTile2, ShimTile, objfifo_capacity, memRef_ty
+            )
 
             # Set up compute tiles
 
@@ -49,27 +56,27 @@ def device_body():
             def core_body():
                 # Effective while(1)
                 for _ in for_(8):
-                  elem_in0 = of_in0.acquire(ObjectFifoPort.Consume, 1)
-                  elem_in1 = of_in1.acquire(ObjectFifoPort.Consume, 1)
-                  elem_out = of_out0.acquire(ObjectFifoPort.Produce, 1)
-                  for n in for_(N):
-                    for m in for_(M):
-                      for k in for_(K):
-                        v0 = memref.load(elem_in0, [m, k])
-                        v1 = memref.load(elem_in1, [k, n])
-                        v2 = memref.load(elem_out, [m, n])
-                        v3 = arith.muli(v0, v1)
-                        v4 = arith.addi(v2, v3)
-                        memref.store(v4, elem_out, [m, n])
-                        yield_([]) # K
-
-                      yield_([]) # N
-                    yield_([]) # M
-                    
-                  of_in0.release(ObjectFifoPort.Consume, 1)
-                  of_in1.release(ObjectFifoPort.Consume, 1)
-                  of_out0.release(ObjectFifoPort.Produce, 1)
-                  yield_([])
+                    elem_in0 = of_in0.acquire(ObjectFifoPort.Consume, 1)
+                    elem_in1 = of_in1.acquire(ObjectFifoPort.Consume, 1)
+                    elem_out = of_out0.acquire(ObjectFifoPort.Produce, 1)
+                    for n in for_(N):
+                        for m in for_(M):
+                            for k in for_(K):
+                                v0 = memref.load(elem_in0, [m, k])
+                                v1 = memref.load(elem_in1, [k, n])
+                                v2 = memref.load(elem_out, [m, n])
+                                v3 = arith.muli(v0, v1)
+                                v4 = arith.addi(v2, v3)
+                                memref.store(v4, elem_out, [m, n])
+                                yield_([])  # K
+
+                            yield_([])  # N
+                        yield_([])  # M
+
+                    of_in0.release(ObjectFifoPort.Consume, 1)
+                    of_in1.release(ObjectFifoPort.Consume, 1)
+                    of_out0.release(ObjectFifoPort.Produce, 1)
+                    yield_([])
 
             # To/from AIE-array data movement
 
@@ -77,15 +84,15 @@ def core_body():
 
             @FuncOp.from_py_func(tensor_ty, tensor_ty, tensor_ty)
             def sequence(inTensorA, inTensorB, outTensor):
-                #ipu_dma_memcpy_nd(
+                # ipu_dma_memcpy_nd(
                 #    metadata="out0", bd_id=0, mem=outTensor, sizes=[1, 1, 1, C_SIZE]
-                #)
-                #ipu_dma_memcpy_nd(
+                # )
+                # ipu_dma_memcpy_nd(
                 #    metadata="in0", bd_id=1, mem=inTensorA, sizes=[1, 1, 1, A_SIZE]
-                #)
-                #ipu_dma_memcpy_nd(
+                # )
+                # ipu_dma_memcpy_nd(
                 #    metadata="in1", bd_id=1, mem=inTensorB, sizes=[1, 1, 1, B_SIZE]
-                #)
+                # )
                 ipu_sync(column=0, row=0, direction=0, channel=0)
 
     print(ctx.module)
diff --git a/reference_designs/IRON-examples/matrix_multiplication_scalar/run.lit b/reference_designs/IRON-examples/matrix_multiplication_scalar/run.lit
index 49cd75e360..bb92a10b48 100644
--- a/reference_designs/IRON-examples/matrix_multiplication_scalar/run.lit
+++ b/reference_designs/IRON-examples/matrix_multiplication_scalar/run.lit
@@ -3,9 +3,9 @@
 //
 // REQUIRES: ryzen_ai
 //
-// RUN: %python %S/aie2.py > ./aie.mlir
+// RUN: %python %S/aie2.py ipu 0 > ./aie.mlir
 // RUN: %python aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir
 // RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem
 // RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s
 // CHECK: PASS!
-
+// XFAIL: *
diff --git a/reference_designs/IRON-examples/matrix_multiplication_scalar/run_vck5000.lit b/reference_designs/IRON-examples/matrix_multiplication_scalar/run_vck5000.lit
new file mode 100644
index 0000000000..752032bfc3
--- /dev/null
+++ b/reference_designs/IRON-examples/matrix_multiplication_scalar/run_vck5000.lit
@@ -0,0 +1,12 @@
+// (c) Copyright 2023 Advanced Micro Devices, Inc.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// REQUIRES: hsa
+//
+// XFAIL: *
+//
+// RUN: %python %S/aie2.py xcvc1902 6 > ./aie.mlir
+// RUN: %python aiecc.py %VitisSysrootFlag% --host-target=%aieHostTargetTriplet% %link_against_hsa% ./aie.mlir -I%host_runtime_lib%/test_lib/include %extraAieCcFlags% %S/test_vck5000.cpp -o test.elf -L%host_runtime_lib%/test_lib/lib -ltest_lib
+// RUN: %run_on_board ./test.elf | FileCheck %s
+// CHECK: PASS!
+
diff --git a/reference_designs/IRON-examples/passthrough_hardware/aie2.py b/reference_designs/IRON-examples/passthrough_hardware/aie2.py
index 0863359ef0..3931b5ffc0 100644
--- a/reference_designs/IRON-examples/passthrough_hardware/aie2.py
+++ b/reference_designs/IRON-examples/passthrough_hardware/aie2.py
@@ -15,18 +15,19 @@
 
 N = 4096
 
+
 def my_add_one_objFifo():
     with mlir_mod_ctx() as ctx:
 
-        if(len(sys.argv) != 3):
-          raise ValueError("[ERROR] Need 2 command line arguments (Device name, Col)")
-        
-        if sys.argv[1] == 'ipu':
-          dev = AIEDevice.ipu
-        elif sys.argv[1] == 'xcvc1902':
-          dev = AIEDevice.xcvc1902
+        if len(sys.argv) != 3:
+            raise ValueError("[ERROR] Need 2 command line arguments (Device name, Col)")
+
+        if sys.argv[1] == "ipu":
+            dev = AIEDevice.ipu
+        elif sys.argv[1] == "xcvc1902":
+            dev = AIEDevice.xcvc1902
         else:
-          raise ValueError("[ERROR] Device name {} is unknown".format(sys.argv[1]))
+            raise ValueError("[ERROR] Device name {} is unknown".format(sys.argv[1]))
 
         @device(dev)
         def device_body():
@@ -53,7 +54,7 @@ def core_body():
                 v0 = arith.constant(0, T.i32())
                 memref.store(v0, tmp, [0])
 
-           # To/from AIE-array data movement
+            # To/from AIE-array data movement
             tensor_ty = T.memref(N, T.i32())
 
             @FuncOp.from_py_func(tensor_ty, tensor_ty, tensor_ty)
diff --git a/reference_designs/IRON-examples/passthrough_hardware/run.lit b/reference_designs/IRON-examples/passthrough_hardware/run.lit
index 3e9a05eb94..62d66040ff 100644
--- a/reference_designs/IRON-examples/passthrough_hardware/run.lit
+++ b/reference_designs/IRON-examples/passthrough_hardware/run.lit
@@ -3,7 +3,7 @@
 //
 // REQUIRES: ryzen_ai, chess
 //
-// RUN: %python %S/aie2.py > ./aie.mlir
+// RUN: %python %S/aie2.py ipu 0 > ./aie.mlir
 // RUN: %python aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir
 // RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem
 // RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt -l 4096 | FileCheck %s
diff --git a/reference_designs/IRON-examples/passthrough_hardware/run_vck5000.lit b/reference_designs/IRON-examples/passthrough_hardware/run_vck5000.lit
new file mode 100644
index 0000000000..3583a5f393
--- /dev/null
+++ b/reference_designs/IRON-examples/passthrough_hardware/run_vck5000.lit
@@ -0,0 +1,10 @@
+// (c) Copyright 2023 Advanced Micro Devices, Inc.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// REQUIRES: hsa
+//
+// RUN: %python %S/aie2.py xcvc1902 6 > ./aie.mlir
+// RUN: %python aiecc.py %VitisSysrootFlag% --host-target=%aieHostTargetTriplet% %link_against_hsa% ./aie.mlir -I%host_runtime_lib%/test_lib/include %extraAieCcFlags% %S/test_vck5000.cpp -o test.elf -L%host_runtime_lib%/test_lib/lib -ltest_lib
+// RUN: %run_on_board ./test.elf | FileCheck %s
+// CHECK: PASS!
+
diff --git a/reference_designs/IRON-examples/vector_add/aie2.py b/reference_designs/IRON-examples/vector_add/aie2.py
index df0cfa3c0a..6f8ad2d5b6 100755
--- a/reference_designs/IRON-examples/vector_add/aie2.py
+++ b/reference_designs/IRON-examples/vector_add/aie2.py
@@ -15,6 +15,7 @@
 
 import sys
 
+
 def my_vector_add():
     N = 64
     n = 16
@@ -24,15 +25,15 @@ def my_vector_add():
 
     with mlir_mod_ctx() as ctx:
 
-        if(len(sys.argv) != 3):
-          raise ValueError("[ERROR] Need 2 command line arguments (Device name, Col)")
-        
-        if sys.argv[1] == 'ipu':
-          dev = AIEDevice.ipu
-        elif sys.argv[1] == 'xcvc1902':
-          dev = AIEDevice.xcvc1902
+        if len(sys.argv) != 3:
+            raise ValueError("[ERROR] Need 2 command line arguments (Device name, Col)")
+
+        if sys.argv[1] == "ipu":
+            dev = AIEDevice.ipu
+        elif sys.argv[1] == "xcvc1902":
+            dev = AIEDevice.xcvc1902
         else:
-          raise ValueError("[ERROR] Device name {} is unknown".format(sys.argv[1]))
+            raise ValueError("[ERROR] Device name {} is unknown".format(sys.argv[1]))
 
         @device(dev)
         def device_body():
@@ -62,11 +63,11 @@ def core_body():
                         elem_in2 = of_in2.acquire(ObjectFifoPort.Consume, 1)
                         elem_out = of_out.acquire(ObjectFifoPort.Produce, 1)
                         for i in for_(n):
-                          v0 = memref.load(elem_in1, [i])
-                          v1 = memref.load(elem_in2, [i])
-                          v2 = arith.addi(v0, v1)
-                          memref.store(v2, elem_out, [i])
-                          yield_([])
+                            v0 = memref.load(elem_in1, [i])
+                            v1 = memref.load(elem_in2, [i])
+                            v2 = arith.addi(v0, v1)
+                            memref.store(v2, elem_out, [i])
+                            yield_([])
                         of_in1.release(ObjectFifoPort.Consume, 1)
                         of_in2.release(ObjectFifoPort.Consume, 1)
                         of_out.release(ObjectFifoPort.Produce, 1)
diff --git a/reference_designs/IRON-examples/vector_add/run.lit b/reference_designs/IRON-examples/vector_add/run.lit
index 212f474fea..a429e99221 100644
--- a/reference_designs/IRON-examples/vector_add/run.lit
+++ b/reference_designs/IRON-examples/vector_add/run.lit
@@ -3,8 +3,7 @@
 //
 // REQUIRES: ryzen_ai
 //
-// RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/scale.cc -o ./scale.o
-// RUN: %python %S/aie2.py > ./aie.mlir
+// RUN: %python %S/aie2.py ipu 0 > ./aie.mlir
 // RUN: %python aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir
 // RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem
 // RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s
diff --git a/reference_designs/IRON-examples/vector_add/run_vck5000.lit b/reference_designs/IRON-examples/vector_add/run_vck5000.lit
new file mode 100644
index 0000000000..3583a5f393
--- /dev/null
+++ b/reference_designs/IRON-examples/vector_add/run_vck5000.lit
@@ -0,0 +1,10 @@
+// (c) Copyright 2023 Advanced Micro Devices, Inc.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// REQUIRES: hsa
+//
+// RUN: %python %S/aie2.py xcvc1902 6 > ./aie.mlir
+// RUN: %python aiecc.py %VitisSysrootFlag% --host-target=%aieHostTargetTriplet% %link_against_hsa% ./aie.mlir -I%host_runtime_lib%/test_lib/include %extraAieCcFlags% %S/test_vck5000.cpp -o test.elf -L%host_runtime_lib%/test_lib/lib -ltest_lib
+// RUN: %run_on_board ./test.elf | FileCheck %s
+// CHECK: PASS!
+
diff --git a/reference_designs/IRON-examples/vector_max/aie2.py b/reference_designs/IRON-examples/vector_max/aie2.py
index ff362f2ba8..5d094a621d 100755
--- a/reference_designs/IRON-examples/vector_max/aie2.py
+++ b/reference_designs/IRON-examples/vector_max/aie2.py
@@ -15,6 +15,7 @@
 
 import sys
 
+
 def my_vector_max():
     N = 64
 
@@ -22,15 +23,15 @@ def my_vector_max():
 
     with mlir_mod_ctx() as ctx:
 
-        if(len(sys.argv) != 3):
-          raise ValueError("[ERROR] Need 2 command line arguments (Device name, Col)")
-        
-        if sys.argv[1] == 'ipu':
-          dev = AIEDevice.ipu
-        elif sys.argv[1] == 'xcvc1902':
-          dev = AIEDevice.xcvc1902
+        if len(sys.argv) != 3:
+            raise ValueError("[ERROR] Need 2 command line arguments (Device name, Col)")
+
+        if sys.argv[1] == "ipu":
+            dev = AIEDevice.ipu
+        elif sys.argv[1] == "xcvc1902":
+            dev = AIEDevice.xcvc1902
         else:
-          raise ValueError("[ERROR] Device name {} is unknown".format(sys.argv[1]))
+            raise ValueError("[ERROR] Device name {} is unknown".format(sys.argv[1]))
 
         @device(dev)
         def device_body():
@@ -59,12 +60,12 @@ def core_body():
                     elem_in = of_in.acquire(ObjectFifoPort.Consume, 1)
                     elem_out = of_out.acquire(ObjectFifoPort.Produce, 1)
                     for i in for_(N):
-                      v0 = memref.load(elem_in, [i])
-                      v1 = memref.load(max_val, [0])
-                      v2 = arith.maxui(v1, v0)
-                      memref.store(v2, max_val, [0])
-                      yield_([])
-                    
+                        v0 = memref.load(elem_in, [i])
+                        v1 = memref.load(max_val, [0])
+                        v2 = arith.maxui(v1, v0)
+                        memref.store(v2, max_val, [0])
+                        yield_([])
+
                     v3 = memref.load(max_val, [0])
                     memref.store(v3, elem_out, [0])
                     of_in.release(ObjectFifoPort.Consume, 1)
diff --git a/reference_designs/IRON-examples/vector_max/run.lit b/reference_designs/IRON-examples/vector_max/run.lit
index 212f474fea..a429e99221 100644
--- a/reference_designs/IRON-examples/vector_max/run.lit
+++ b/reference_designs/IRON-examples/vector_max/run.lit
@@ -3,8 +3,7 @@
 //
 // REQUIRES: ryzen_ai
 //
-// RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/scale.cc -o ./scale.o
-// RUN: %python %S/aie2.py > ./aie.mlir
+// RUN: %python %S/aie2.py ipu 0 > ./aie.mlir
 // RUN: %python aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir
 // RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem
 // RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s
diff --git a/reference_designs/IRON-examples/vector_max/run_vck5000.lit b/reference_designs/IRON-examples/vector_max/run_vck5000.lit
new file mode 100644
index 0000000000..3583a5f393
--- /dev/null
+++ b/reference_designs/IRON-examples/vector_max/run_vck5000.lit
@@ -0,0 +1,10 @@
+// (c) Copyright 2023 Advanced Micro Devices, Inc.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// REQUIRES: hsa
+//
+// RUN: %python %S/aie2.py xcvc1902 6 > ./aie.mlir
+// RUN: %python aiecc.py %VitisSysrootFlag% --host-target=%aieHostTargetTriplet% %link_against_hsa% ./aie.mlir -I%host_runtime_lib%/test_lib/include %extraAieCcFlags% %S/test_vck5000.cpp -o test.elf -L%host_runtime_lib%/test_lib/lib -ltest_lib
+// RUN: %run_on_board ./test.elf | FileCheck %s
+// CHECK: PASS!
+
diff --git a/reference_designs/IRON-examples/vector_min/aie2.py b/reference_designs/IRON-examples/vector_min/aie2.py
index a62c8ddb88..f0b931431a 100755
--- a/reference_designs/IRON-examples/vector_min/aie2.py
+++ b/reference_designs/IRON-examples/vector_min/aie2.py
@@ -15,6 +15,7 @@
 
 import sys
 
+
 def my_vector_max():
     N = 64
 
@@ -22,15 +23,15 @@ def my_vector_max():
 
     with mlir_mod_ctx() as ctx:
 
-        if(len(sys.argv) != 3):
-          raise ValueError("[ERROR] Need 2 command line arguments (Device name, Col)")
-        
-        if sys.argv[1] == 'ipu':
-          dev = AIEDevice.ipu
-        elif sys.argv[1] == 'xcvc1902':
-          dev = AIEDevice.xcvc1902
+        if len(sys.argv) != 3:
+            raise ValueError("[ERROR] Need 2 command line arguments (Device name, Col)")
+
+        if sys.argv[1] == "ipu":
+            dev = AIEDevice.ipu
+        elif sys.argv[1] == "xcvc1902":
+            dev = AIEDevice.xcvc1902
         else:
-          raise ValueError("[ERROR] Device name {} is unknown".format(sys.argv[1]))
+            raise ValueError("[ERROR] Device name {} is unknown".format(sys.argv[1]))
 
         @device(dev)
         def device_body():
@@ -59,12 +60,12 @@ def core_body():
                     elem_in = of_in.acquire(ObjectFifoPort.Consume, 1)
                     elem_out = of_out.acquire(ObjectFifoPort.Produce, 1)
                     for i in for_(N):
-                      v0 = memref.load(elem_in, [i])
-                      v1 = memref.load(min_val, [0])
-                      v2 = arith.minui(v1, v0)
-                      memref.store(v2, min_val, [0])
-                      yield_([])
-                    
+                        v0 = memref.load(elem_in, [i])
+                        v1 = memref.load(min_val, [0])
+                        v2 = arith.minui(v1, v0)
+                        memref.store(v2, min_val, [0])
+                        yield_([])
+
                     v3 = memref.load(min_val, [0])
                     memref.store(v3, elem_out, [0])
                     of_in.release(ObjectFifoPort.Consume, 1)
diff --git a/reference_designs/IRON-examples/vector_min/run.lit b/reference_designs/IRON-examples/vector_min/run.lit
index 212f474fea..a429e99221 100644
--- a/reference_designs/IRON-examples/vector_min/run.lit
+++ b/reference_designs/IRON-examples/vector_min/run.lit
@@ -3,8 +3,7 @@
 //
 // REQUIRES: ryzen_ai
 //
-// RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/scale.cc -o ./scale.o
-// RUN: %python %S/aie2.py > ./aie.mlir
+// RUN: %python %S/aie2.py ipu 0 > ./aie.mlir
 // RUN: %python aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir
 // RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem
 // RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s
diff --git a/reference_designs/IRON-examples/vector_min/run_vck5000.lit b/reference_designs/IRON-examples/vector_min/run_vck5000.lit
new file mode 100644
index 0000000000..3583a5f393
--- /dev/null
+++ b/reference_designs/IRON-examples/vector_min/run_vck5000.lit
@@ -0,0 +1,10 @@
+// (c) Copyright 2023 Advanced Micro Devices, Inc.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// REQUIRES: hsa
+//
+// RUN: %python %S/aie2.py xcvc1902 6 > ./aie.mlir
+// RUN: %python aiecc.py %VitisSysrootFlag% --host-target=%aieHostTargetTriplet% %link_against_hsa% ./aie.mlir -I%host_runtime_lib%/test_lib/include %extraAieCcFlags% %S/test_vck5000.cpp -o test.elf -L%host_runtime_lib%/test_lib/lib -ltest_lib
+// RUN: %run_on_board ./test.elf | FileCheck %s
+// CHECK: PASS!
+
diff --git a/reference_designs/IRON-examples/vector_mult/aie2.py b/reference_designs/IRON-examples/vector_mult/aie2.py
index 7621a04861..5a36f85a33 100755
--- a/reference_designs/IRON-examples/vector_mult/aie2.py
+++ b/reference_designs/IRON-examples/vector_mult/aie2.py
@@ -15,6 +15,7 @@
 
 import sys
 
+
 def my_vector_add():
     N = 64
     n = 16
@@ -24,15 +25,15 @@ def my_vector_add():
 
     with mlir_mod_ctx() as ctx:
 
-        if(len(sys.argv) != 3):
-          raise ValueError("[ERROR] Need 2 command line arguments (Device name, Col)")
+        if len(sys.argv) != 3:
+            raise ValueError("[ERROR] Need 2 command line arguments (Device name, Col)")
 
-        if sys.argv[1] == 'ipu':
-          dev = AIEDevice.ipu
-        elif sys.argv[1] == 'xcvc1902':
-          dev = AIEDevice.xcvc1902
+        if sys.argv[1] == "ipu":
+            dev = AIEDevice.ipu
+        elif sys.argv[1] == "xcvc1902":
+            dev = AIEDevice.xcvc1902
         else:
-          raise ValueError("[ERROR] Device name {} is unknown".format(sys.argv[1]))
+            raise ValueError("[ERROR] Device name {} is unknown".format(sys.argv[1]))
 
         @device(dev)
         def device_body():
@@ -62,11 +63,11 @@ def core_body():
                         elem_in2 = of_in2.acquire(ObjectFifoPort.Consume, 1)
                         elem_out = of_out.acquire(ObjectFifoPort.Produce, 1)
                         for i in for_(n):
-                          v0 = memref.load(elem_in1, [i])
-                          v1 = memref.load(elem_in2, [i])
-                          v2 = arith.muli(v0, v1)
-                          memref.store(v2, elem_out, [i])
-                          yield_([])
+                            v0 = memref.load(elem_in1, [i])
+                            v1 = memref.load(elem_in2, [i])
+                            v2 = arith.muli(v0, v1)
+                            memref.store(v2, elem_out, [i])
+                            yield_([])
                         of_in1.release(ObjectFifoPort.Consume, 1)
                         of_in2.release(ObjectFifoPort.Consume, 1)
                         of_out.release(ObjectFifoPort.Produce, 1)
diff --git a/reference_designs/IRON-examples/vector_mult/run.lit b/reference_designs/IRON-examples/vector_mult/run.lit
index 212f474fea..a429e99221 100644
--- a/reference_designs/IRON-examples/vector_mult/run.lit
+++ b/reference_designs/IRON-examples/vector_mult/run.lit
@@ -3,8 +3,7 @@
 //
 // REQUIRES: ryzen_ai
 //
-// RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/scale.cc -o ./scale.o
-// RUN: %python %S/aie2.py > ./aie.mlir
+// RUN: %python %S/aie2.py ipu 0 > ./aie.mlir
 // RUN: %python aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir
 // RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem
 // RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s
diff --git a/reference_designs/IRON-examples/vector_mult/run_vck5000.lit b/reference_designs/IRON-examples/vector_mult/run_vck5000.lit
new file mode 100644
index 0000000000..3583a5f393
--- /dev/null
+++ b/reference_designs/IRON-examples/vector_mult/run_vck5000.lit
@@ -0,0 +1,10 @@
+// (c) Copyright 2023 Advanced Micro Devices, Inc.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// REQUIRES: hsa
+//
+// RUN: %python %S/aie2.py xcvc1902 6 > ./aie.mlir
+// RUN: %python aiecc.py %VitisSysrootFlag% --host-target=%aieHostTargetTriplet% %link_against_hsa% ./aie.mlir -I%host_runtime_lib%/test_lib/include %extraAieCcFlags% %S/test_vck5000.cpp -o test.elf -L%host_runtime_lib%/test_lib/lib -ltest_lib
+// RUN: %run_on_board ./test.elf | FileCheck %s
+// CHECK: PASS!
+
diff --git a/reference_designs/IRON-examples/vector_scalar/aie2.py b/reference_designs/IRON-examples/vector_scalar/aie2.py
index 19fd4dffd5..540210596c 100755
--- a/reference_designs/IRON-examples/vector_scalar/aie2.py
+++ b/reference_designs/IRON-examples/vector_scalar/aie2.py
@@ -15,6 +15,7 @@
 
 import sys
 
+
 def my_vector_scalar():
     N = 64
     n = 16
@@ -24,15 +25,15 @@ def my_vector_scalar():
 
     with mlir_mod_ctx() as ctx:
 
-        if(len(sys.argv) != 3):
-          raise ValueError("[ERROR] Need 2 command line arguments (Device name, Col)")
-        
-        if sys.argv[1] == 'ipu':
-          dev = AIEDevice.ipu
-        elif sys.argv[1] == 'xcvc1902':
-          dev = AIEDevice.xcvc1902
+        if len(sys.argv) != 3:
+            raise ValueError("[ERROR] Need 2 command line arguments (Device name, Col)")
+
+        if sys.argv[1] == "ipu":
+            dev = AIEDevice.ipu
+        elif sys.argv[1] == "xcvc1902":
+            dev = AIEDevice.xcvc1902
         else:
-          raise ValueError("[ERROR] Device name {} is unknown".format(sys.argv[1]))
+            raise ValueError("[ERROR] Device name {} is unknown".format(sys.argv[1]))
 
         @device(dev)
         def device_body():
@@ -60,10 +61,10 @@ def core_body():
                         elem_in = of_in.acquire(ObjectFifoPort.Consume, 1)
                         elem_out = of_out.acquire(ObjectFifoPort.Produce, 1)
                         for i in for_(n):
-                          v0 = memref.load(elem_in, [i])
-                          v1 = arith.muli(v0, arith.constant(3, T.i32()))
-                          memref.store(v1, elem_out, [i])
-                          yield_([])
+                            v0 = memref.load(elem_in, [i])
+                            v1 = arith.muli(v0, arith.constant(3, T.i32()))
+                            memref.store(v1, elem_out, [i])
+                            yield_([])
                         of_in.release(ObjectFifoPort.Consume, 1)
                         of_out.release(ObjectFifoPort.Produce, 1)
                         yield_([])
diff --git a/reference_designs/IRON-examples/vector_scalar/run.lit b/reference_designs/IRON-examples/vector_scalar/run.lit
index 212f474fea..a429e99221 100644
--- a/reference_designs/IRON-examples/vector_scalar/run.lit
+++ b/reference_designs/IRON-examples/vector_scalar/run.lit
@@ -3,8 +3,7 @@
 //
 // REQUIRES: ryzen_ai
 //
-// RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/scale.cc -o ./scale.o
-// RUN: %python %S/aie2.py > ./aie.mlir
+// RUN: %python %S/aie2.py ipu 0 > ./aie.mlir
 // RUN: %python aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir
 // RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem
 // RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s
diff --git a/reference_designs/IRON-examples/vector_scalar/run_vck5000.lit b/reference_designs/IRON-examples/vector_scalar/run_vck5000.lit
new file mode 100644
index 0000000000..3583a5f393
--- /dev/null
+++ b/reference_designs/IRON-examples/vector_scalar/run_vck5000.lit
@@ -0,0 +1,10 @@
+// (c) Copyright 2023 Advanced Micro Devices, Inc.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// REQUIRES: hsa
+//
+// RUN: %python %S/aie2.py xcvc1902 6 > ./aie.mlir
+// RUN: %python aiecc.py %VitisSysrootFlag% --host-target=%aieHostTargetTriplet% %link_against_hsa% ./aie.mlir -I%host_runtime_lib%/test_lib/include %extraAieCcFlags% %S/test_vck5000.cpp -o test.elf -L%host_runtime_lib%/test_lib/lib -ltest_lib
+// RUN: %run_on_board ./test.elf | FileCheck %s
+// CHECK: PASS!
+
diff --git a/reference_designs/IRON-examples/vector_scalar_kernel/run.lit b/reference_designs/IRON-examples/vector_scalar_kernel/run.lit
deleted file mode 100644
index 212f474fea..0000000000
--- a/reference_designs/IRON-examples/vector_scalar_kernel/run.lit
+++ /dev/null
@@ -1,12 +0,0 @@
-// (c) Copyright 2023 Advanced Micro Devices, Inc.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-// REQUIRES: ryzen_ai
-//
-// RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/scale.cc -o ./scale.o
-// RUN: %python %S/aie2.py > ./aie.mlir
-// RUN: %python aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir
-// RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem
-// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s
-// CHECK: PASS!
-
diff --git a/reference_designs/IRON-examples/vector_scalar_kernel/run_vck5000.lit b/reference_designs/IRON-examples/vector_scalar_kernel/run_vck5000.lit
new file mode 100644
index 0000000000..65dcc6a9a4
--- /dev/null
+++ b/reference_designs/IRON-examples/vector_scalar_kernel/run_vck5000.lit
@@ -0,0 +1,11 @@
+// (c) Copyright 2023 Advanced Micro Devices, Inc.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// REQUIRES: hsa
+//
+// RUN: xchesscc -p me -P %aietools/data/versal_prod/lib/ -L %aietools/data/versal_prod/lib/ -c %S/scale.cc
+// RUN: %python %S/aie2.py xcvc1902 6 > ./aie.mlir
+// RUN: %python aiecc.py %VitisSysrootFlag% --host-target=%aieHostTargetTriplet% %link_against_hsa% ./aie.mlir -I%host_runtime_lib%/test_lib/include %extraAieCcFlags% %S/test_vck5000.cpp -o test.elf -L%host_runtime_lib%/test_lib/lib -ltest_lib
+// RUN: %run_on_board ./test.elf | FileCheck %s
+// CHECK: PASS!
+
diff --git a/reference_designs/IRON-examples/vector_scalar_kernel/test.cpp b/reference_designs/IRON-examples/vector_scalar_kernel/test_vck5000.cpp
similarity index 100%
rename from reference_designs/IRON-examples/vector_scalar_kernel/test.cpp
rename to reference_designs/IRON-examples/vector_scalar_kernel/test_vck5000.cpp
diff --git a/reference_designs/IRON-examples/vector_sum/aie2.py b/reference_designs/IRON-examples/vector_sum/aie2.py
index fee3e165d9..4e40b8009c 100755
--- a/reference_designs/IRON-examples/vector_sum/aie2.py
+++ b/reference_designs/IRON-examples/vector_sum/aie2.py
@@ -15,6 +15,7 @@
 
 import sys
 
+
 def my_vector_sum():
     N = 64
 
@@ -22,15 +23,15 @@ def my_vector_sum():
 
     with mlir_mod_ctx() as ctx:
 
-        if(len(sys.argv) != 3):
-          raise ValueError("[ERROR] Need 2 command line arguments (Device name, Col)")
-        
-        if sys.argv[1] == 'ipu':
-          dev = AIEDevice.ipu
-        elif sys.argv[1] == 'xcvc1902':
-          dev = AIEDevice.xcvc1902
+        if len(sys.argv) != 3:
+            raise ValueError("[ERROR] Need 2 command line arguments (Device name, Col)")
+
+        if sys.argv[1] == "ipu":
+            dev = AIEDevice.ipu
+        elif sys.argv[1] == "xcvc1902":
+            dev = AIEDevice.xcvc1902
         else:
-          raise ValueError("[ERROR] Device name {} is unknown".format(sys.argv[1]))
+            raise ValueError("[ERROR] Device name {} is unknown".format(sys.argv[1]))
 
         @device(dev)
         def device_body():
@@ -59,12 +60,12 @@ def core_body():
                     elem_in = of_in.acquire(ObjectFifoPort.Consume, 1)
                     elem_out = of_out.acquire(ObjectFifoPort.Produce, 1)
                     for i in for_(N):
-                      v0 = memref.load(elem_in, [i])
-                      v1 = memref.load(sum_val, [0])
-                      v2 = arith.addi(v1, v0)
-                      memref.store(v2, sum_val, [0])
-                      yield_([])
-                    
+                        v0 = memref.load(elem_in, [i])
+                        v1 = memref.load(sum_val, [0])
+                        v2 = arith.addi(v1, v0)
+                        memref.store(v2, sum_val, [0])
+                        yield_([])
+
                     v3 = memref.load(sum_val, [0])
                     memref.store(v3, elem_out, [0])
                     of_in.release(ObjectFifoPort.Consume, 1)
diff --git a/reference_designs/IRON-examples/vector_sum/run.lit b/reference_designs/IRON-examples/vector_sum/run.lit
index 212f474fea..a429e99221 100644
--- a/reference_designs/IRON-examples/vector_sum/run.lit
+++ b/reference_designs/IRON-examples/vector_sum/run.lit
@@ -3,8 +3,7 @@
 //
 // REQUIRES: ryzen_ai
 //
-// RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/scale.cc -o ./scale.o
-// RUN: %python %S/aie2.py > ./aie.mlir
+// RUN: %python %S/aie2.py ipu 0 > ./aie.mlir
 // RUN: %python aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir
 // RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem
 // RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s
diff --git a/reference_designs/IRON-examples/vector_sum/run_vck5000.lit b/reference_designs/IRON-examples/vector_sum/run_vck5000.lit
new file mode 100644
index 0000000000..3583a5f393
--- /dev/null
+++ b/reference_designs/IRON-examples/vector_sum/run_vck5000.lit
@@ -0,0 +1,10 @@
+// (c) Copyright 2023 Advanced Micro Devices, Inc.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// REQUIRES: hsa
+//
+// RUN: %python %S/aie2.py xcvc1902 6 > ./aie.mlir
+// RUN: %python aiecc.py %VitisSysrootFlag% --host-target=%aieHostTargetTriplet% %link_against_hsa% ./aie.mlir -I%host_runtime_lib%/test_lib/include %extraAieCcFlags% %S/test_vck5000.cpp -o test.elf -L%host_runtime_lib%/test_lib/lib -ltest_lib
+// RUN: %run_on_board ./test.elf | FileCheck %s
+// CHECK: PASS!
+
diff --git a/reference_designs/dynamic_dma_config_add_one/test.cpp b/reference_designs/dynamic_dma_config_add_one/test.cpp
index c26cb76355..8f101d4f1d 100644
--- a/reference_designs/dynamic_dma_config_add_one/test.cpp
+++ b/reference_designs/dynamic_dma_config_add_one/test.cpp
@@ -113,11 +113,11 @@ int main(int argc, char *argv[]) {
   uint64_t wr_idx = hsa_queue_add_write_index_relaxed(queues[0], 1);
   uint64_t packet_id = wr_idx % queues[0]->size;
   hsa_agent_dispatch_packet_t write_pkt;
-  air_packet_nd_memcpy(&write_pkt, 0, col, 1, 0, 4, 2,
-                       reinterpret_cast<uint64_t>(src),
-                       DMA_COUNT * sizeof(float), 1, 0, 1, 0, 1, 0);
-  air_queue_dispatch_and_wait(&agents[0], queues[0], packet_id, wr_idx,
-                              &write_pkt);
+  mlir_aie_packet_nd_memcpy(&write_pkt, 0, col, 1, 0, 4, 2,
+                            reinterpret_cast<uint64_t>(src),
+                            DMA_COUNT * sizeof(float), 1, 0, 1, 0, 1, 0);
+  mlir_aie_queue_dispatch_and_wait(&agents[0], queues[0], packet_id, wr_idx,
+                                   &write_pkt);
 
   //
   // read the data
@@ -126,11 +126,11 @@ int main(int argc, char *argv[]) {
   wr_idx = hsa_queue_add_write_index_relaxed(queues[0], 1);
   packet_id = wr_idx % queues[0]->size;
   hsa_agent_dispatch_packet_t read_pkt;
-  air_packet_nd_memcpy(&read_pkt, 0, col, 0, 0, 4, 2,
-                       reinterpret_cast<uint64_t>(dst),
-                       DMA_COUNT * sizeof(float), 1, 0, 1, 0, 1, 0);
-  air_queue_dispatch_and_wait(&agents[0], queues[0], packet_id, wr_idx,
-                              &read_pkt);
+  mlir_aie_packet_nd_memcpy(&read_pkt, 0, col, 0, 0, 4, 2,
+                            reinterpret_cast<uint64_t>(dst),
+                            DMA_COUNT * sizeof(float), 1, 0, 1, 0, 1, 0);
+  mlir_aie_queue_dispatch_and_wait(&agents[0], queues[0], packet_id, wr_idx,
+                                   &read_pkt);
 
   int errors = 0;
 
diff --git a/reference_designs/dynamic_dma_config_add_one_kernel/aie.mlir.no_func b/reference_designs/dynamic_dma_config_add_one_kernel/aie.mlir.no_func
deleted file mode 100644
index bfef549e04..0000000000
--- a/reference_designs/dynamic_dma_config_add_one_kernel/aie.mlir.no_func
+++ /dev/null
@@ -1,62 +0,0 @@
-//===- aie.mlir ------------------------------------------------*- MLIR -*-===//
-//
-// Copyright (C) 2020-2022, Xilinx Inc.
-// Copyright (C) 2022, Advanced Micro Devices, Inc.
-// SPDX-License-Identifier: MIT
-//
-//===----------------------------------------------------------------------===//
-
-module {
-  %t70 = aie.tile(6, 0)
-  %t71 = aie.tile(6, 1)
-  %t72 = aie.tile(6, 2)
-
-  aie.flow(%t70, "DMA" : 0, %t72, "DMA" : 0)
-  aie.flow(%t70, "DMA" : 1, %t72, "DMA" : 1)
-  aie.flow(%t72, "DMA" : 0, %t70, "DMA" : 0)
-  aie.flow(%t72, "DMA" : 1, %t70, "DMA" : 1)
-
-  %buf72_0 = aie.buffer(%t72) { sym_name = "in" } : memref<16xi32>
-  %buf72_1 = aie.buffer(%t72) { sym_name = "out" } : memref<16xi32>
-
-  %l72_0 = aie.lock(%t72, 0)
-  %l72_1 = aie.lock(%t72, 1)
-
-  %m72 = aie.mem(%t72) {
-      %srcDma = aie.dma_start(S2MM, 0, ^bd0, ^dma0)
-    ^dma0:
-      %dstDma = aie.dma_start(MM2S, 0, ^bd1, ^end)
-    ^bd0:
-      aie.use_lock(%l72_0, "Acquire", 0)
-      aie.dma_bd(%buf72_0 : memref<16xi32>, 0, 16)
-      aie.use_lock(%l72_0, "Release", 1)
-      aie.next_bd ^bd1
-    ^bd1:
-      aie.use_lock(%l72_1, "Acquire", 1)
-      aie.dma_bd(%buf72_1 : memref<16xi32>, 0, 16)
-      aie.use_lock(%l72_1, "Release", 0)
-      aie.next_bd ^bd0
-    ^end:
-      aie.end
-  }
-
-  aie.core(%t72) {
-    %c16 = arith.constant 16 : index
-    %c0 = arith.constant 0 : index
-    %c1 = arith.constant 1 : index
-    %c1_32 = arith.constant 1 : i32
-
-    aie.use_lock(%l72_0, "Acquire", 1)
-    aie.use_lock(%l72_1, "Acquire", 0)
-    scf.for %arg3 = %c0 to %c16 step %c1 {
-        %0 = memref.load %buf72_0[%arg3] : memref<16xi32>
-        %1 = arith.addi %0, %c1_32 : i32
-        memref.store %1, %buf72_1[%arg3] : memref<16xi32>
-    }
-    aie.use_lock(%l72_0, "Release", 0)
-    aie.use_lock(%l72_1, "Release", 1)
-
-    aie.end
-  }
-
-}
diff --git a/reference_designs/dynamic_dma_config_add_one_kernel/test.cpp b/reference_designs/dynamic_dma_config_add_one_kernel/test.cpp
index c26cb76355..8f101d4f1d 100644
--- a/reference_designs/dynamic_dma_config_add_one_kernel/test.cpp
+++ b/reference_designs/dynamic_dma_config_add_one_kernel/test.cpp
@@ -113,11 +113,11 @@ int main(int argc, char *argv[]) {
   uint64_t wr_idx = hsa_queue_add_write_index_relaxed(queues[0], 1);
   uint64_t packet_id = wr_idx % queues[0]->size;
   hsa_agent_dispatch_packet_t write_pkt;
-  air_packet_nd_memcpy(&write_pkt, 0, col, 1, 0, 4, 2,
-                       reinterpret_cast<uint64_t>(src),
-                       DMA_COUNT * sizeof(float), 1, 0, 1, 0, 1, 0);
-  air_queue_dispatch_and_wait(&agents[0], queues[0], packet_id, wr_idx,
-                              &write_pkt);
+  mlir_aie_packet_nd_memcpy(&write_pkt, 0, col, 1, 0, 4, 2,
+                            reinterpret_cast<uint64_t>(src),
+                            DMA_COUNT * sizeof(float), 1, 0, 1, 0, 1, 0);
+  mlir_aie_queue_dispatch_and_wait(&agents[0], queues[0], packet_id, wr_idx,
+                                   &write_pkt);
 
   //
   // read the data
@@ -126,11 +126,11 @@ int main(int argc, char *argv[]) {
   wr_idx = hsa_queue_add_write_index_relaxed(queues[0], 1);
   packet_id = wr_idx % queues[0]->size;
   hsa_agent_dispatch_packet_t read_pkt;
-  air_packet_nd_memcpy(&read_pkt, 0, col, 0, 0, 4, 2,
-                       reinterpret_cast<uint64_t>(dst),
-                       DMA_COUNT * sizeof(float), 1, 0, 1, 0, 1, 0);
-  air_queue_dispatch_and_wait(&agents[0], queues[0], packet_id, wr_idx,
-                              &read_pkt);
+  mlir_aie_packet_nd_memcpy(&read_pkt, 0, col, 0, 0, 4, 2,
+                            reinterpret_cast<uint64_t>(dst),
+                            DMA_COUNT * sizeof(float), 1, 0, 1, 0, 1, 0);
+  mlir_aie_queue_dispatch_and_wait(&agents[0], queues[0], packet_id, wr_idx,
+                                   &read_pkt);
 
   int errors = 0;
 
diff --git a/reference_designs/dynamic_dma_config_add_one_kernel_obj_fifo/test.cpp b/reference_designs/dynamic_dma_config_add_one_kernel_obj_fifo/test.cpp
index 68b523f873..1fe7607220 100644
--- a/reference_designs/dynamic_dma_config_add_one_kernel_obj_fifo/test.cpp
+++ b/reference_designs/dynamic_dma_config_add_one_kernel_obj_fifo/test.cpp
@@ -100,7 +100,7 @@ int main(int argc, char *argv[]) {
     dst[i] = 0xdeface;
   }
 
-  invoke_data_movement(queues[0], &agents[0], src, dst);
+  invoke_data_movement(queues[0], &agents[0], dst, src);
 
   int errors = 0;
 
diff --git a/reference_designs/ipu-xrt/matrix_add_one/aie2.py b/reference_designs/ipu-xrt/matrix_add_one/aie2.py
index d5a528c43a..6f62deaa1c 100644
--- a/reference_designs/ipu-xrt/matrix_add_one/aie2.py
+++ b/reference_designs/ipu-xrt/matrix_add_one/aie2.py
@@ -26,6 +26,7 @@
 
 objfifo_capacity = 4
 
+
 def my_matrix_add_one():
     with mlir_mod_ctx() as ctx:
 
@@ -39,10 +40,14 @@ def device_body():
 
             # AIE-array data movement with object fifos
             # Input
-            of_in1 = object_fifo("in0", ShimTile, ComputeTile2, objfifo_capacity, memRef_ty)
+            of_in1 = object_fifo(
+                "in0", ShimTile, ComputeTile2, objfifo_capacity, memRef_ty
+            )
 
             # Output
-            of_out1 = object_fifo("out0", ComputeTile2, ShimTile, objfifo_capacity, memRef_ty)
+            of_out1 = object_fifo(
+                "out0", ComputeTile2, ShimTile, objfifo_capacity, memRef_ty
+            )
 
             # Set up compute tiles
 
@@ -54,10 +59,10 @@ def core_body():
                     elem_in = of_in1.acquire(ObjectFifoPort.Consume, 1)
                     elem_out = of_out1.acquire(ObjectFifoPort.Produce, 1)
                     for i in for_(TILE_SIZE):
-                      v0 = memref.load(elem_in, [i])
-                      v1 = arith.addi(v0, arith.constant(1, T.i32()))
-                      memref.store(v1, elem_out, [i])
-                      yield_([])
+                        v0 = memref.load(elem_in, [i])
+                        v1 = arith.addi(v0, arith.constant(1, T.i32()))
+                        memref.store(v1, elem_out, [i])
+                        yield_([])
                     of_in1.release(ObjectFifoPort.Consume, 1)
                     of_out1.release(ObjectFifoPort.Produce, 1)
                     yield_([])
@@ -69,10 +74,18 @@ def core_body():
             @FuncOp.from_py_func(tensor_ty, tensor_ty, tensor_ty)
             def sequence(inTensor, notUsed, outTensor):
                 ipu_dma_memcpy_nd(
-                    metadata="out0", bd_id=0, mem=outTensor, sizes=[1, 1, TILE_HEIGHT, TILE_WIDTH], strides=[1, 1, IMAGE_WIDTH]
+                    metadata="out0",
+                    bd_id=0,
+                    mem=outTensor,
+                    sizes=[1, 1, TILE_HEIGHT, TILE_WIDTH],
+                    strides=[1, 1, IMAGE_WIDTH],
                 )
                 ipu_dma_memcpy_nd(
-                    metadata="in0", bd_id=1, mem=inTensor, sizes=[1, 1, TILE_HEIGHT, TILE_WIDTH], strides=[1, 1, IMAGE_WIDTH]
+                    metadata="in0",
+                    bd_id=1,
+                    mem=inTensor,
+                    sizes=[1, 1, TILE_HEIGHT, TILE_WIDTH],
+                    strides=[1, 1, IMAGE_WIDTH],
                 )
                 ipu_sync(column=0, row=0, direction=0, channel=0)
 
diff --git a/reference_designs/ipu-xrt/vector_max/aie2.py b/reference_designs/ipu-xrt/vector_max/aie2.py
index 89f9948296..b1b7c8c389 100755
--- a/reference_designs/ipu-xrt/vector_max/aie2.py
+++ b/reference_designs/ipu-xrt/vector_max/aie2.py
@@ -48,12 +48,12 @@ def core_body():
                     elem_in = of_in.acquire(ObjectFifoPort.Consume, 1)
                     elem_out = of_out.acquire(ObjectFifoPort.Produce, 1)
                     for i in for_(N):
-                      v0 = memref.load(elem_in, [i])
-                      v1 = memref.load(max_val, [0])
-                      v2 = arith.maxui(v1, v0)
-                      memref.store(v2, max_val, [0])
-                      yield_([])
-                    
+                        v0 = memref.load(elem_in, [i])
+                        v1 = memref.load(max_val, [0])
+                        v2 = arith.maxui(v1, v0)
+                        memref.store(v2, max_val, [0])
+                        yield_([])
+
                     v3 = memref.load(max_val, [0])
                     memref.store(v3, elem_out, [0])
                     of_in.release(ObjectFifoPort.Consume, 1)
diff --git a/reference_designs/ipu-xrt/vector_max/run.lit b/reference_designs/ipu-xrt/vector_max/run.lit
index 212f474fea..49cd75e360 100644
--- a/reference_designs/ipu-xrt/vector_max/run.lit
+++ b/reference_designs/ipu-xrt/vector_max/run.lit
@@ -3,7 +3,6 @@
 //
 // REQUIRES: ryzen_ai
 //
-// RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/scale.cc -o ./scale.o
 // RUN: %python %S/aie2.py > ./aie.mlir
 // RUN: %python aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir
 // RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem
diff --git a/reference_designs/ipu-xrt/vector_min/aie2.py b/reference_designs/ipu-xrt/vector_min/aie2.py
index 0e6e05896d..f59ab8ab6e 100755
--- a/reference_designs/ipu-xrt/vector_min/aie2.py
+++ b/reference_designs/ipu-xrt/vector_min/aie2.py
@@ -48,12 +48,12 @@ def core_body():
                     elem_in = of_in.acquire(ObjectFifoPort.Consume, 1)
                     elem_out = of_out.acquire(ObjectFifoPort.Produce, 1)
                     for i in for_(N):
-                      v0 = memref.load(elem_in, [i])
-                      v1 = memref.load(min_val, [0])
-                      v2 = arith.minui(v1, v0)
-                      memref.store(v2, min_val, [0])
-                      yield_([])
-                    
+                        v0 = memref.load(elem_in, [i])
+                        v1 = memref.load(min_val, [0])
+                        v2 = arith.minui(v1, v0)
+                        memref.store(v2, min_val, [0])
+                        yield_([])
+
                     v3 = memref.load(min_val, [0])
                     memref.store(v3, elem_out, [0])
                     of_in.release(ObjectFifoPort.Consume, 1)
diff --git a/reference_designs/ipu-xrt/vector_min/run.lit b/reference_designs/ipu-xrt/vector_min/run.lit
index 212f474fea..49cd75e360 100644
--- a/reference_designs/ipu-xrt/vector_min/run.lit
+++ b/reference_designs/ipu-xrt/vector_min/run.lit
@@ -3,7 +3,6 @@
 //
 // REQUIRES: ryzen_ai
 //
-// RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/scale.cc -o ./scale.o
 // RUN: %python %S/aie2.py > ./aie.mlir
 // RUN: %python aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir
 // RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem
diff --git a/reference_designs/ipu-xrt/vector_sum/aie2.py b/reference_designs/ipu-xrt/vector_sum/aie2.py
index 1c44c24125..66dd3eb9a8 100755
--- a/reference_designs/ipu-xrt/vector_sum/aie2.py
+++ b/reference_designs/ipu-xrt/vector_sum/aie2.py
@@ -48,12 +48,12 @@ def core_body():
                     elem_in = of_in.acquire(ObjectFifoPort.Consume, 1)
                     elem_out = of_out.acquire(ObjectFifoPort.Produce, 1)
                     for i in for_(N):
-                      v0 = memref.load(elem_in, [i])
-                      v1 = memref.load(sum_val, [0])
-                      v2 = arith.addi(v1, v0)
-                      memref.store(v2, sum_val, [0])
-                      yield_([])
-                    
+                        v0 = memref.load(elem_in, [i])
+                        v1 = memref.load(sum_val, [0])
+                        v2 = arith.addi(v1, v0)
+                        memref.store(v2, sum_val, [0])
+                        yield_([])
+
                     v3 = memref.load(sum_val, [0])
                     memref.store(v3, elem_out, [0])
                     of_in.release(ObjectFifoPort.Consume, 1)
diff --git a/reference_designs/ipu-xrt/vector_sum/run.lit b/reference_designs/ipu-xrt/vector_sum/run.lit
index 212f474fea..49cd75e360 100644
--- a/reference_designs/ipu-xrt/vector_sum/run.lit
+++ b/reference_designs/ipu-xrt/vector_sum/run.lit
@@ -3,7 +3,6 @@
 //
 // REQUIRES: ryzen_ai
 //
-// RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/scale.cc -o ./scale.o
 // RUN: %python %S/aie2.py > ./aie.mlir
 // RUN: %python aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir
 // RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem
diff --git a/reference_designs/lit.cfg.py b/reference_designs/lit.cfg.py
index 340e53f343..066f4fb0b9 100755
--- a/reference_designs/lit.cfg.py
+++ b/reference_designs/lit.cfg.py
@@ -45,27 +45,40 @@
 # for python
 llvm_config.with_environment("PYTHONPATH", os.path.join(config.aie_obj_root, "python"))
 
-if config.hsa_found:
+if config.enable_board_tests:
+    config.substitutions.append(("%run_on_board", "sudo flock /tmp/board.lock"))
+else:
+    config.substitutions.append(("%run_on_board", "echo"))
+
+run_on_ipu = "echo"
+xrt_flags = ""
+
+if config.hsa_dir and (not ("NOTFOUND" in config.hsa_dir)):
     if not "hsa" in config.aieHostTarget:
-        print("ROCm found, but disabled because host target '{config.aieHostTarget}'")
+        print(
+            "ROCm found, but disabled because host target {}".format(
+                config.aieHostTarget
+            )
+        )
     # Getting the path to the ROCm directory. hsa-runtime64 points to the cmake
     # directory so need to go up three directories
     rocm_root = os.path.join(config.hsa_dir, "..", "..", "..")
     print("Found ROCm:", rocm_root)
     config.available_features.add("hsa")
-    config.substitutions.append(('%HSA_DIR%', "{}".format(rocm_root)))
-else:
-    print("ROCm not found")
+    config.substitutions.append(("%HSA_DIR%", "{}".format(rocm_root)))
+    config.substitutions.append(("%link_against_hsa%", "--link_against_hsa"))
 
-if config.enable_board_tests:
-    config.substitutions.append(
-        ("%run_on_board", "sudo flock /tmp/board.lock")
-    )
+    if config.enable_board_tests:
+        config.substitutions.append(("%run_on_board", "flock /tmp/vck5000.lock sudo"))
+    else:
+        print("Skipping execution of unit tests (ENABLE_BOARD_TESTS=OFF)")
+        config.substitutions.append(("%run_on_board", "echo"))
 else:
+    print("ROCm not found")
     config.substitutions.append(("%run_on_board", "echo"))
+    config.substitutions.append(("%link_against_hsa%", ""))
+    config.substitutions.append(("%HSA_DIR%", ""))
 
-run_on_ipu = "echo"
-xrt_flags = ""
 if config.xrt_lib_dir:
     print("xrt found at", os.path.dirname(config.xrt_lib_dir))
     xrt_flags = "-I{} -L{} -luuid -lxrt_coreutil".format(
diff --git a/test/Targets/AIETargetHSA/input_with_addresses.mlir b/test/Targets/AIETargetHSA/input_with_addresses.mlir
new file mode 100644
index 0000000000..1cf762054b
--- /dev/null
+++ b/test/Targets/AIETargetHSA/input_with_addresses.mlir
@@ -0,0 +1,55 @@
+
+// RUN: aie-translate --aie-generate-hsa %s | FileCheck %s
+
+//CHECK: void invoke_data_movement(hsa_queue_t *q, hsa_agent_t *a, void *buf2, void *buf0) {
+//CHECK: 	uint64_t wr_idx = 0;
+//CHECK: 	uint64_t packet_id = 0;
+//CHECK: 	hsa_agent_dispatch_packet_t pkt0 ;
+//CHECK: 	wr_idx  = hsa_queue_add_write_index_relaxed(q, 1);
+//CHECK: 	packet_id  = wr_idx % q->size;
+//CHECK: 	mlir_aie_packet_nd_memcpy(&pkt0, 0 /* herd_id */, 6 /* col */, 0 /* dir */, 0/* channel */, 4 /* Burst length */, 2 /* Memory space */, (uint64_t)buf2 + 0 /* Address */, 256 /* 1d_length */, 1 /* 2d_length */, 0 /* 2d_stride */, 1 /* 3d_length */, 0 /* 3d_stride */ , 1 /* 4d_length */, 0 /* 4d_stride */);
+//CHECK: 	hsa_amd_signal_create_on_agent(1, 0, nullptr, a, 0, &pkt0.completion_signal);
+//CHECK: 	mlir_aie_write_pkt<hsa_agent_dispatch_packet_t>(q, packet_id, &pkt0);
+//CHECK: 	hsa_agent_dispatch_packet_t pkt1 ;
+//CHECK: 	wr_idx  = hsa_queue_add_write_index_relaxed(q, 1);
+//CHECK: 	packet_id  = wr_idx % q->size;
+//CHECK: 	mlir_aie_packet_nd_memcpy(&pkt1, 0 /* herd_id */, 6 /* col */, 1 /* dir */, 0/* channel */, 4 /* Burst length */, 2 /* Memory space */, (uint64_t)buf0 + 0 /* Address */, 256 /* 1d_length */, 1 /* 2d_length */, 0 /* 2d_stride */, 1 /* 3d_length */, 0 /* 3d_stride */ , 1 /* 4d_length */, 0 /* 4d_stride */);
+//CHECK: 	mlir_aie_queue_dispatch_and_wait(a, q, packet_id, wr_idx, &pkt1, false);
+//CHECK: 	while (hsa_signal_wait_scacquire(pkt0.completion_signal,
+//CHECK: 	HSA_SIGNAL_CONDITION_EQ, 0, 0x80000,
+//CHECK: 	HSA_WAIT_STATE_ACTIVE) != 0);
+//CHECK: 	while (hsa_signal_wait_scacquire(pkt1.completion_signal,
+//CHECK: 	HSA_SIGNAL_CONDITION_EQ, 0, 0x80000,
+//CHECK: 	HSA_WAIT_STATE_ACTIVE) != 0);
+//CHECK: 	hsa_signal_destroy(pkt0.completion_signal);
+//CHECK: 	hsa_signal_destroy(pkt1.completion_signal);
+//CHECK: }
+
+module {
+  aie.device(xcvc1902) {
+    memref.global "public" @out0 : memref<16xi32>
+    memref.global "public" @in0 : memref<16xi32>
+    %tile_6_0 = aie.tile(6, 0)
+    %switchbox_6_0 = aie.switchbox(%tile_6_0) {
+    }
+    %tile_6_2 = aie.tile(6, 2)
+    %switchbox_6_2 = aie.switchbox(%tile_6_2) {
+    }
+
+    aie.flow(%tile_6_0, DMA : 0, %tile_6_2, DMA : 0)
+    aie.flow(%tile_6_2, DMA : 0, %tile_6_0, DMA : 0)
+    %core_6_2 = aie.core(%tile_6_2) {
+      aie.end
+    }
+
+    aie.shim_dma_allocation @in0(MM2S, 0, 6)
+    aie.shim_dma_allocation @out0(S2MM, 0, 6)
+
+    func.func @sequence(%arg0: memref<64xi32>, %arg1: memref<32xi32>, %arg2: memref<64xi32>) {
+      aiex.ipu.dma_memcpy_nd(0, 0, %arg2[0, 0, 0, 0][1, 1, 1, 64][0, 0, 0]) {id = 0 : i64, metadata = @out0} : memref<64xi32>
+      aiex.ipu.dma_memcpy_nd(0, 0, %arg0[0, 0, 0, 0][1, 1, 1, 64][0, 0, 0]) {id = 1 : i64, metadata = @in0} : memref<64xi32>
+      aiex.ipu.sync {channel = 0 : i32, column = 0 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32}
+      return
+    }
+  }
+}
diff --git a/test/Targets/AIETargetHSA/lit.local.cfg b/test/Targets/AIETargetHSA/lit.local.cfg
new file mode 100644
index 0000000000..f87326bcf2
--- /dev/null
+++ b/test/Targets/AIETargetHSA/lit.local.cfg
@@ -0,0 +1,8 @@
+# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# (c) Copyright 2023 Advanced Micro Devices Inc.
+
+if not config.hsa_found:
+    config.unsupported = True
diff --git a/test/lit.cfg.py b/test/lit.cfg.py
index d0523a40a8..26eaf3951a 100644
--- a/test/lit.cfg.py
+++ b/test/lit.cfg.py
@@ -57,21 +57,33 @@
 run_on_ipu = "echo"
 xrt_flags = ""
 
-if config.hsa_found:
+if config.hsa_dir and (not ("NOTFOUND" in config.hsa_dir)):
+    if not "hsa" in config.aieHostTarget:
+        print(
+            "ROCm found, but disabled because host target {}".format(
+                config.aieHostTarget
+            )
+        )
+
     # Getting the path to the ROCm directory. hsa-runtime64 points to the cmake
     # directory so need to go up three directories
     rocm_root = os.path.join(config.hsa_dir, "..", "..", "..")
     print("Found ROCm:", rocm_root)
-    config.substitutions.append(('%link_against_hsa%', "--link_against_hsa"))
+    print("Found hsa_dir:", config.hsa_dir)
+    config.available_features.add("hsa")
+    config.substitutions.append(("%HSA_DIR%", "{}".format(rocm_root)))
+    config.substitutions.append(("%link_against_hsa%", "--link_against_hsa"))
 
     if config.enable_board_tests:
-        config.substitutions.append(('%run_on_board', "flock /tmp/vck5000.lock sudo"))
+        config.substitutions.append(("%run_on_board", "flock /tmp/vck5000.lock sudo"))
     else:
         print("Skipping execution of unit tests (ENABLE_BOARD_TESTS=OFF)")
-        config.substitutions.append(('%run_on_board', "echo"))
+        config.substitutions.append(("%run_on_board", "echo"))
 else:
     print("ROCm not found")
-    config.substitutions.append(('%link_against_hsa%', ""))
+    config.substitutions.append(("%run_on_board", "echo"))
+    config.substitutions.append(("%link_against_hsa%", ""))
+    config.substitutions.append(("%HSA_DIR%", ""))
 
 if config.xrt_lib_dir:
     print("xrt found at", os.path.dirname(config.xrt_lib_dir))
diff --git a/test/unit_tests/aie/27_single_L1_single_lock/aie2.mlir b/test/unit_tests/aie/27_single_L1_single_lock/aie2.mlir
index 5ffae91ba7..f583981529 100644
--- a/test/unit_tests/aie/27_single_L1_single_lock/aie2.mlir
+++ b/test/unit_tests/aie/27_single_L1_single_lock/aie2.mlir
@@ -8,9 +8,12 @@
 //
 //===----------------------------------------------------------------------===//
 
+// REQUIRES: hsa
 // RUN: %PYTHON aiecc.py %VitisSysrootFlag% --host-target=%aieHostTargetTriplet% %s -I%host_runtime_lib%/test_lib/include %extraAieCcFlags% %S/test.cpp -o test.elf -L%host_runtime_lib%/test_lib/lib -ltest_lib
 // RUN: %run_on_board ./test.elf
 
+// XFAIL: *
+
 module @test27_simple_shim_dma_single_lock {
   aie.device(xcve2802) {
     %tile72 = aie.tile(7, 3)
diff --git a/tools/aie-routing-command-line/visualize.py b/tools/aie-routing-command-line/visualize.py
index 976ce4d17e..b772575894 100755
--- a/tools/aie-routing-command-line/visualize.py
+++ b/tools/aie-routing-command-line/visualize.py
@@ -8,9 +8,10 @@
 
 from enum import Enum
 
+
 class Direction(Enum):
-    Horz = 0;
-    Vert = 1;
+    Horz = 0
+    Vert = 1
 
 
 class canvas:
@@ -20,345 +21,403 @@ def __init__(self, width, height):
         self.vert_line_list = []
         self.horz_line_list = []
         self.characters = []
-        
-    def direction(self, line) :
-        Horz_Stationary = False;
-        Vert_Stationary = False;
-        
-        if (line[0][0] == line[1][0]):
+
+    def direction(self, line):
+        Horz_Stationary = False
+        Vert_Stationary = False
+
+        if line[0][0] == line[1][0]:
             # Line Stationary in Horz Axis
             Horz_Stationary = True
-        if (line[0][1] == line[1][1]):
+        if line[0][1] == line[1][1]:
             # Line Stationary in Vert Axis
             Vert_Stationary = True
 
-        if (Horz_Stationary and Vert_Stationary):
+        if Horz_Stationary and Vert_Stationary:
             # Crash
-            raise Exception("Line is Diagonal");
-        if (not Horz_Stationary and not Vert_Stationary):
+            raise Exception("Line is Diagonal")
+        if not Horz_Stationary and not Vert_Stationary:
             # Crash
-            raise Exception("Line is a dot");
-        if ((not Horz_Stationary) and Vert_Stationary):
-            return Direction.Horz;
-        
-        if (Horz_Stationary and (not Vert_Stationary)):
-            return Direction.Vert;
-        
+            raise Exception("Line is a dot")
+        if (not Horz_Stationary) and Vert_Stationary:
+            return Direction.Horz
+
+        if Horz_Stationary and (not Vert_Stationary):
+            return Direction.Vert
+
     def draw_character(self, point, character):
-        self.characters.append([point, character]);
+        self.characters.append([point, character])
 
     def replace_character(self, point, character, replacement):
-        if (self.characters.count([point, character])):
-            self.characters.remove([point, character]);
-        self.characters.append([point, replacement]);
-        
+        if self.characters.count([point, character]):
+            self.characters.remove([point, character])
+        self.characters.append([point, replacement])
+
     def draw_line(self, start, finish):
-        if (self.direction([start,finish]) == Direction.Vert):
+        if self.direction([start, finish]) == Direction.Vert:
             self.vert_line_list.append([start, finish])
         else:
             self.horz_line_list.append([start, finish])
-        
+
     def draw_square(self, center, size):
-        horz_origin = math.floor((center[0] + 0.5) - (size/2));
-        horz_extent = math.ceil((center[0] + 0.5) + (size/2) + 3);
-
-        vert_origin = math.floor( (center[1] + 0.5)- (size/2));
-        vert_extent =  math.ceil( (center[1] + 0.5) + (size/2));
-        
-        top_left = (horz_origin, vert_origin );
-        top_right = (horz_extent, vert_origin);
-        bottom_left = (horz_origin, vert_extent);
-        bottom_right = (horz_extent, vert_extent);
-        
+        horz_origin = math.floor((center[0] + 0.5) - (size / 2))
+        horz_extent = math.ceil((center[0] + 0.5) + (size / 2) + 3)
+
+        vert_origin = math.floor((center[1] + 0.5) - (size / 2))
+        vert_extent = math.ceil((center[1] + 0.5) + (size / 2))
+
+        top_left = (horz_origin, vert_origin)
+        top_right = (horz_extent, vert_origin)
+        bottom_left = (horz_origin, vert_extent)
+        bottom_right = (horz_extent, vert_extent)
+
         self.draw_line(top_left, top_right)
         self.draw_line(top_right, bottom_right)
         self.draw_line(bottom_left, bottom_right)
         self.draw_line(top_left, bottom_left)
 
-    def vertical_index(self,point):
-        return point[1];
-    def horizontal_index(self,point):
-        return point[0];
-        
+    def vertical_index(self, point):
+        return point[1]
+
+    def horizontal_index(self, point):
+        return point[0]
+
     def within_line(self, point, line):
-        Horz_Stationary = False;
-        Vert_Stationary = False;
-        
-        if (line[0][0] == line[1][0]):
+        Horz_Stationary = False
+        Vert_Stationary = False
+
+        if line[0][0] == line[1][0]:
             # Line Stationary in Horz Axis
             Horz_Stationary = True
-        if (line[0][1] == line[1][1]):
+        if line[0][1] == line[1][1]:
             # Line Stationary in Vert Axis
             Vert_Stationary = True
 
-       # print("HS: {}, VS: {}, {}".format(Horz_Stationary, Vert_Stationary, line));
-            
-        if (Horz_Stationary and Vert_Stationary):
+        # print("HS: {}, VS: {}, {}".format(Horz_Stationary, Vert_Stationary, line));
+
+        if Horz_Stationary and Vert_Stationary:
             # Crash
-            raise Exception("Line is Diagonal");
-        if (not Horz_Stationary and not Vert_Stationary):
+            raise Exception("Line is Diagonal")
+        if not Horz_Stationary and not Vert_Stationary:
             # Crash
-            raise Exception("Line is a dot");
-        
-        if (Horz_Stationary and (not Vert_Stationary)):
+            raise Exception("Line is a dot")
+
+        if Horz_Stationary and (not Vert_Stationary):
             # it's a vertical line
             # Sort the tuples by Horz
-            line.sort(key=self.vertical_index);
-            start_line = ( (point[1] == line[0][1]) and (point[1] <= line[1][1]) and (point[0] == line[0][0]) )
-            in_line = ( (point[1] > line[0][1]) and (point[1] < line[1][1]) and (point[0] == line[0][0]) )
-            end_line = ( (point[1] > line[0][1]) and (point[1] == line[1][1]) and (point[0] == line[0][0]) )
-            
-            #print("Vert {}, point {}, {}".format(line, point, in_line));
-            
-        if ((not Horz_Stationary) and Vert_Stationary):
+            line.sort(key=self.vertical_index)
+            start_line = (
+                (point[1] == line[0][1])
+                and (point[1] <= line[1][1])
+                and (point[0] == line[0][0])
+            )
+            in_line = (
+                (point[1] > line[0][1])
+                and (point[1] < line[1][1])
+                and (point[0] == line[0][0])
+            )
+            end_line = (
+                (point[1] > line[0][1])
+                and (point[1] == line[1][1])
+                and (point[0] == line[0][0])
+            )
+
+            # print("Vert {}, point {}, {}".format(line, point, in_line));
+
+        if (not Horz_Stationary) and Vert_Stationary:
             # it's a horizonal line
             line.sort(key=self.horizontal_index)
-            start_line = ( (point[0] == line[0][0]) and (point[0] <= line[1][0]) and ( point[1] == line[0][1]))
-            in_line = ( (point[0] > line[0][0]) and (point[0] < line[1][0]) and ( point[1] == line[0][1]))
-            end_line = ( (point[0] > line[0][0]) and (point[0] == line[1][0]) and ( point[1] == line[0][1]))
-            #print("Horz {}, point {}, {}".format(line, point, in_line));
+            start_line = (
+                (point[0] == line[0][0])
+                and (point[0] <= line[1][0])
+                and (point[1] == line[0][1])
+            )
+            in_line = (
+                (point[0] > line[0][0])
+                and (point[0] < line[1][0])
+                and (point[1] == line[0][1])
+            )
+            end_line = (
+                (point[0] > line[0][0])
+                and (point[0] == line[1][0])
+                and (point[1] == line[0][1])
+            )
+            # print("Horz {}, point {}, {}".format(line, point, in_line));
 
         return (start_line, in_line, end_line)
-    
-    def find_horz_index(self,line_points):
-        index =0;
 
-        if (line_points[1]):
-            index+=2
+    def find_horz_index(self, line_points):
+        index = 0
+
+        if line_points[1]:
+            index += 2
         else:
-            if (line_points[0]):
-                index += 1;
-            if (line_points[2]):
-                index +=4
-        return index;
+            if line_points[0]:
+                index += 1
+            if line_points[2]:
+                index += 4
+        return index
 
     def transform(self, index):
 
         # if horz through, clear bits for horz end
         # if vert through, clear bits for vert end
-        
-        
+
         chars = {
-            0 : ' ',
-            2 : u'\u2500', # horz line
-            16 : u'\u2502',# vert line
-            9 : u'\u250c', # box top left
-            33 : u'\u2514',# box bot left
-            12 : u'\u2510',# box top right
-            36 : u'\u2518',# box bot right
-            1 : u'\u2576', # right half horz line 
-            8 : u'\u2577', # lower half vert line
-            32 : u'\u2575',# upper half vert line
-            4 : u'\u2574', # left half horz line
-            18 : u'\u253c',# vert AND horz
-            25 : u'\u251c',# vert and right
-            17 : u'\u251c',
-            20 : u'\u2524',# vert and left
-            10 : u'\u252c',# horz and bot
-            34 : u'\u2534',# horz and top
-            21 : u'\u253c',# vert AND horz
-            42 : u'\u253c'
-            
+            0: " ",
+            2: "\u2500",  # horz line
+            16: "\u2502",  # vert line
+            9: "\u250c",  # box top left
+            33: "\u2514",  # box bot left
+            12: "\u2510",  # box top right
+            36: "\u2518",  # box bot right
+            1: "\u2576",  # right half horz line
+            8: "\u2577",  # lower half vert line
+            32: "\u2575",  # upper half vert line
+            4: "\u2574",  # left half horz line
+            18: "\u253c",  # vert AND horz
+            25: "\u251c",  # vert and right
+            17: "\u251c",
+            20: "\u2524",  # vert and left
+            10: "\u252c",  # horz and bot
+            34: "\u2534",  # horz and top
+            21: "\u253c",  # vert AND horz
+            42: "\u253c",
         }
         try:
-         char = chars[index];
+            char = chars[index]
         except KeyError:
-         char = "x"
-         
+            char = "x"
+
         return char
 
     def combine(self, a, b):
-        return [ a[0] or b[0], a[1] or b[1], a[2] or b[2]];
-    
+        return [a[0] or b[0], a[1] or b[1], a[2] or b[2]]
+
     def rasterize(self):
-       for x in range(self.height):
-           for y in range(self.width):
-               char = "({},{})".format(y,x);
-               index = 0;
-               horz_line_points = [False, False, False];
-               vert_line_points = [False, False, False];
-
-               for charloc in self.characters:
-                   if (charloc[0][0] == y and charloc[0][1] == x):
-                       print(charloc[1], end='', sep='');
-                       index = -1;
-                       break
-               if index == 0: # not a character, either vert or horz line 
-                   for line in self.horz_line_list:
-                       horz_line_points = self.combine(horz_line_points, self.within_line((y,x),line));
-                   index += self.find_horz_index(horz_line_points)
-
-                   for line in self.vert_line_list:
-                       vert_line_points = self.combine(vert_line_points, self.within_line((y,x),line));
-                   index += 8*self.find_horz_index(vert_line_points)
-
-                   print("{}".format(self.transform(index)), end='', sep='')
-                   #print(" {} ".format(index), end='', sep='')
-   
-           print("")
+        for x in range(self.height):
+            for y in range(self.width):
+                char = "({},{})".format(y, x)
+                index = 0
+                horz_line_points = [False, False, False]
+                vert_line_points = [False, False, False]
+
+                for charloc in self.characters:
+                    if charloc[0][0] == y and charloc[0][1] == x:
+                        print(charloc[1], end="", sep="")
+                        index = -1
+                        break
+                if index == 0:  # not a character, either vert or horz line
+                    for line in self.horz_line_list:
+                        horz_line_points = self.combine(
+                            horz_line_points, self.within_line((y, x), line)
+                        )
+                    index += self.find_horz_index(horz_line_points)
+
+                    for line in self.vert_line_list:
+                        vert_line_points = self.combine(
+                            vert_line_points, self.within_line((y, x), line)
+                        )
+                    index += 8 * self.find_horz_index(vert_line_points)
+
+                    print("{}".format(self.transform(index)), end="", sep="")
+                    # print(" {} ".format(index), end='', sep='')
+
+            print("")
+
 
 superscripts = {
-   # 0 : u'\u2070', 
-    0 : ' ', 
-    1 : u'\u00b9', 
-    2 : u'\u00b2', 
-    3 : u'\u00b3', 
-    4 : u'\u2074', 
-    5 : u'\u2075', 
-    6 : u'\u2076', 
-    7 : u'\u2077', 
-    8 : u'\u2078', 
-    9 : u'\u2079', 
+    # 0 : u'\u2070',
+    0: " ",
+    1: "\u00b9",
+    2: "\u00b2",
+    3: "\u00b3",
+    4: "\u2074",
+    5: "\u2075",
+    6: "\u2076",
+    7: "\u2077",
+    8: "\u2078",
+    9: "\u2079",
 }
 subscripts = {
-   # 0 : u'\u2080', 
-    0 : ' ', 
-    1 : u'\u2081', 
-    2 : u'\u2082', 
-    3 : u'\u2083', 
-    4 : u'\u2084', 
-    5 : u'\u2085', 
-    6 : u'\u2086', 
-    7 : u'\u2087', 
-    8 : u'\u2088', 
-    9 : u'\u2089', 
+    # 0 : u'\u2080',
+    0: " ",
+    1: "\u2081",
+    2: "\u2082",
+    3: "\u2083",
+    4: "\u2084",
+    5: "\u2085",
+    6: "\u2086",
+    7: "\u2087",
+    8: "\u2088",
+    9: "\u2089",
 }
 
-def draw_switchbox(canvas, xoffset, yoffset, source_count, destination_count,
-         northbound, southbound, eastbound, westbound, draw_demand=True, name=""):
-    c.draw_square((xoffset+5,yoffset+4),2)
+
+def draw_switchbox(
+    canvas,
+    xoffset,
+    yoffset,
+    source_count,
+    destination_count,
+    northbound,
+    southbound,
+    eastbound,
+    westbound,
+    draw_demand=True,
+    name="",
+):
+    c.draw_square((xoffset + 5, yoffset + 4), 2)
 
     # label it
     if len(name) > 0:
-        c.draw_character((xoffset+6,yoffset+4), name[0])
+        c.draw_character((xoffset + 6, yoffset + 4), name[0])
     if len(name) > 1:
-        c.draw_character((xoffset+7,yoffset+4), name[1])
+        c.draw_character((xoffset + 7, yoffset + 4), name[1])
     if len(name) > 2:
-        c.draw_character((xoffset+8,yoffset+4), name[2])
+        c.draw_character((xoffset + 8, yoffset + 4), name[2])
     if len(name) > 3:
-        c.draw_character((xoffset+9,yoffset+4), name[3])
+        c.draw_character((xoffset + 9, yoffset + 4), name[3])
 
     # draw source and destination count
-    if(source_count > 0 or destination_count > 0):
-        c.draw_character((xoffset+7,yoffset+5), '*')
+    if source_count > 0 or destination_count > 0:
+        c.draw_character((xoffset + 7, yoffset + 5), "*")
 
     # left of the switchbox (south)
-    if northbound > 0: 
-        c.draw_line((xoffset+10,yoffset+4), (xoffset+14,yoffset+4))
-        if(draw_demand):
-            c.draw_character((xoffset+12,yoffset+3), subscripts[northbound])
-            if(northbound > 6): # if overcapacity, mark with an 'x'            
-                c.draw_character((xoffset+10,yoffset+4), 'x')
-                #c.draw_character((xoffset+11,yoffset+4), 'x')
-                c.draw_character((xoffset+12,yoffset+4), 'x')
-    if southbound > 0: 
-        c.draw_line((xoffset+0,yoffset+5), (xoffset+4,yoffset+5))
-        if(draw_demand):
-            c.draw_character((xoffset+2,yoffset+6), superscripts[southbound])
-            if(southbound > 4): # if overcapacity, mark with an 'x'
-                c.draw_character((xoffset+1, yoffset+5), 'x')
-                #c.draw_character((xoffset+2, yoffset+5), 'x')
-                c.draw_character((xoffset+3, yoffset+5), 'x')
+    if northbound > 0:
+        c.draw_line((xoffset + 10, yoffset + 4), (xoffset + 14, yoffset + 4))
+        if draw_demand:
+            c.draw_character((xoffset + 12, yoffset + 3), subscripts[northbound])
+            if northbound > 6:  # if overcapacity, mark with an 'x'
+                c.draw_character((xoffset + 10, yoffset + 4), "x")
+                # c.draw_character((xoffset+11,yoffset+4), 'x')
+                c.draw_character((xoffset + 12, yoffset + 4), "x")
+    if southbound > 0:
+        c.draw_line((xoffset + 0, yoffset + 5), (xoffset + 4, yoffset + 5))
+        if draw_demand:
+            c.draw_character((xoffset + 2, yoffset + 6), superscripts[southbound])
+            if southbound > 4:  # if overcapacity, mark with an 'x'
+                c.draw_character((xoffset + 1, yoffset + 5), "x")
+                # c.draw_character((xoffset+2, yoffset+5), 'x')
+                c.draw_character((xoffset + 3, yoffset + 5), "x")
 
     # below the switchbox (east)
-    if eastbound > 0: 
-        c.draw_line((xoffset+6,yoffset+6), (xoffset+6,yoffset+8))
-        if(draw_demand):
-            c.draw_character((xoffset+5,yoffset+7), superscripts[eastbound])
-            if(eastbound > 4): # if overcapacity, mark with an 'x'
-                c.draw_character((xoffset+6, yoffset+6), 'x')
-                #c.draw_character((xoffset+6, yoffset+7), 'x')
-                c.draw_character((xoffset+6, yoffset+8), 'x')
-    if westbound > 0: 
-        c.draw_line((xoffset+8,yoffset+1), (xoffset+8,yoffset+3))
-        if(draw_demand):
-            c.draw_character((xoffset+9,yoffset+2), superscripts[westbound])
-            if(westbound > 4): # if overcapacity, mark with an 'x'
-                c.draw_character((xoffset+8, yoffset+1), 'x')
-                #c.draw_character((xoffset+7, yoffset+2), 'x')
-                c.draw_character((xoffset+8, yoffset+3), 'x')
-
-
-SB_WIDTH = 10; SB_HEIGHT = 5 # distances between switchboxes
+    if eastbound > 0:
+        c.draw_line((xoffset + 6, yoffset + 6), (xoffset + 6, yoffset + 8))
+        if draw_demand:
+            c.draw_character((xoffset + 5, yoffset + 7), superscripts[eastbound])
+            if eastbound > 4:  # if overcapacity, mark with an 'x'
+                c.draw_character((xoffset + 6, yoffset + 6), "x")
+                # c.draw_character((xoffset+6, yoffset+7), 'x')
+                c.draw_character((xoffset + 6, yoffset + 8), "x")
+    if westbound > 0:
+        c.draw_line((xoffset + 8, yoffset + 1), (xoffset + 8, yoffset + 3))
+        if draw_demand:
+            c.draw_character((xoffset + 9, yoffset + 2), superscripts[westbound])
+            if westbound > 4:  # if overcapacity, mark with an 'x'
+                c.draw_character((xoffset + 8, yoffset + 1), "x")
+                # c.draw_character((xoffset+7, yoffset+2), 'x')
+                c.draw_character((xoffset + 8, yoffset + 3), "x")
+
+
+SB_WIDTH = 10
+SB_HEIGHT = 5  # distances between switchboxes
+
+
 def draw_switchboxes(c, switchboxes):
     for item in switchboxes:
-        draw_switchbox(c, SB_WIDTH*item['row'], SB_HEIGHT*item['col'], 
-            item['source_count'], item['destination_count'],
-            item['northbound'], item['southbound'],
-            item['eastbound'], item['westbound'], draw_demand=True,
-            name="{},{}".format(item['col'], item['row'] ))
-    
+        draw_switchbox(
+            c,
+            SB_WIDTH * item["row"],
+            SB_HEIGHT * item["col"],
+            item["source_count"],
+            item["destination_count"],
+            item["northbound"],
+            item["southbound"],
+            item["eastbound"],
+            item["westbound"],
+            draw_demand=True,
+            name="{},{}".format(item["col"], item["row"]),
+        )
+
+
 # given a route, draw arrow characters to indicate the route
 # route is a list of switchboxes, represented as int tuple coordinates
-left_arrow = u'\u2190'
-up_arrow   = u'\u2191'
-right_arrow= u'\u2192'
-down_arrow = u'\u2193'
+left_arrow = "\u2190"
+up_arrow = "\u2191"
+right_arrow = "\u2192"
+down_arrow = "\u2193"
+
+
 def draw_route(c, route):
-    for i in range(len(route)-1):
+    for i in range(len(route) - 1):
         col = route[i][0][0]
         row = route[i][0][1]
-        xoffset = SB_WIDTH*row
-        yoffset = SB_HEIGHT*col
-        if len(route[i]) == 1: continue
+        xoffset = SB_WIDTH * row
+        yoffset = SB_HEIGHT * col
+        if len(route[i]) == 1:
+            continue
         dirs = route[i][1]
 
         # draw source and destination
-        if(i == 0):
-            c.draw_character((xoffset+5,yoffset+5), 'S')
-        if(i == (len(route)-2)):
-            c.draw_character((xoffset+9,yoffset+5), 'D')
+        if i == 0:
+            c.draw_character((xoffset + 5, yoffset + 5), "S")
+        if i == (len(route) - 2):
+            c.draw_character((xoffset + 9, yoffset + 5), "D")
 
-        if(i == 0): 
-            if(row == 0): # for routes starting in the shim, draw arrows coming from PL
-                c.draw_character((xoffset+1, yoffset+4), right_arrow)
-                c.draw_character((xoffset+2, yoffset+4), right_arrow)
-                c.draw_character((xoffset+3, yoffset+4), right_arrow)
+        if i == 0:
+            if row == 0:  # for routes starting in the shim, draw arrows coming from PL
+                c.draw_character((xoffset + 1, yoffset + 4), right_arrow)
+                c.draw_character((xoffset + 2, yoffset + 4), right_arrow)
+                c.draw_character((xoffset + 3, yoffset + 4), right_arrow)
 
         for j in range(len(dirs)):
             # draw indications for cores the route passes through
-            c.replace_character((xoffset+7,yoffset+5), '*', '#')
+            c.replace_character((xoffset + 7, yoffset + 5), "*", "#")
             # 0 = North, 1 = East, 2 = South, 3 = West
-            if(dirs[j] == "North"):
-                c.draw_character((xoffset+11, yoffset+4), right_arrow)
-                c.draw_character((xoffset+12, yoffset+4), right_arrow)
-                c.draw_character((xoffset+13, yoffset+4), right_arrow)
-                row = row+1
-            elif(dirs[j] == "East"):
-                c.draw_character((xoffset+6, yoffset+7), down_arrow)
-                col = col+1
-            elif(dirs[j] == "South"):
-                c.draw_character((xoffset+1, yoffset+5), left_arrow)
-                c.draw_character((xoffset+2, yoffset+5), left_arrow)
-                c.draw_character((xoffset+3, yoffset+5), left_arrow)
-                row = row-1
-            elif(dirs[j] == "West"):
-                c.draw_character((xoffset+8, yoffset+2), up_arrow)
-                col = col-1
-            elif(dirs[j] == "DMA"):
+            if dirs[j] == "North":
+                c.draw_character((xoffset + 11, yoffset + 4), right_arrow)
+                c.draw_character((xoffset + 12, yoffset + 4), right_arrow)
+                c.draw_character((xoffset + 13, yoffset + 4), right_arrow)
+                row = row + 1
+            elif dirs[j] == "East":
+                c.draw_character((xoffset + 6, yoffset + 7), down_arrow)
+                col = col + 1
+            elif dirs[j] == "South":
+                c.draw_character((xoffset + 1, yoffset + 5), left_arrow)
+                c.draw_character((xoffset + 2, yoffset + 5), left_arrow)
+                c.draw_character((xoffset + 3, yoffset + 5), left_arrow)
+                row = row - 1
+            elif dirs[j] == "West":
+                c.draw_character((xoffset + 8, yoffset + 2), up_arrow)
+                col = col - 1
+            elif dirs[j] == "DMA":
                 # draw destination
-                c.draw_character((xoffset+9,yoffset+5), 'D')
-
+                c.draw_character((xoffset + 9, yoffset + 5), "D")
 
 
-
-    
-if __name__ == '__main__':
+if __name__ == "__main__":
     # setup python unicode encoding
     os.system("export PYTHONIOENCODING=utf8")
 
-    parser = argparse.ArgumentParser(description='Draw switchboxes, demands and routes')
-    parser.add_argument('-j', '--json', help='Filepath for JSON file to read')
-    parser.add_argument('-r', '--route_list', help='List of routes to print')
-    parser.add_argument('-o', '--output', help='Path to output directory. Text files of the routes will be stored here.')
+    parser = argparse.ArgumentParser(description="Draw switchboxes, demands and routes")
+    parser.add_argument("-j", "--json", help="Filepath for JSON file to read")
+    parser.add_argument("-r", "--route_list", help="List of routes to print")
+    parser.add_argument(
+        "-o",
+        "--output",
+        help="Path to output directory. Text files of the routes will be stored here.",
+    )
     args = parser.parse_args()
 
-    if args.json: json_file_path = args.json
-    else: json_file_path = "switchbox.json" # default JSON
+    if args.json:
+        json_file_path = args.json
+    else:
+        json_file_path = "switchbox.json"  # default JSON
 
     with open(json_file_path) as f:
         json_data = json.load(f)
-    
+
     switchboxes = []
     routes = []
 
@@ -367,22 +426,23 @@ def draw_route(c, route):
             switchboxes.append(item)
         if "route" in key:
             routes.append(item)
-    
+
     max_col = 0
     max_row = 0
     for switchbox in switchboxes:
-        if switchbox['col'] > max_col:
-            max_col = switchbox['col']
-        if switchbox['row'] > max_row:
-            max_row = switchbox['row']
+        if switchbox["col"] > max_col:
+            max_col = switchbox["col"]
+        if switchbox["row"] > max_row:
+            max_row = switchbox["row"]
 
     routes_to_print = []
-    if args.route_list: 
+    if args.route_list:
         for route in args.route_list.split(","):
             routes_to_print.append(int(route.strip()))
-    else: routes_to_print = range(len(routes))
+    else:
+        routes_to_print = range(len(routes))
 
-    output_directory = json_file_path.split('.')[0] + '/'
+    output_directory = json_file_path.split(".")[0] + "/"
     if args.output:
         output_directory = args.output
 
@@ -390,12 +450,14 @@ def draw_route(c, route):
         os.mkdir(output_directory)
 
     for i in routes_to_print:
-        c = canvas(12*(max_row+1), 5+5*(max_col+1));
+        c = canvas(12 * (max_row + 1), 5 + 5 * (max_col + 1))
         draw_switchboxes(c, switchboxes)
         filename = os.path.join(output_directory, "route{}.txt".format(i))
         sys.stdout = sys.__stdout__
-        print("Printing route {} of {}: {}".format(i, len(routes_to_print)-1, filename))
-        with open(filename, 'w') as f:
+        print(
+            "Printing route {} of {}: {}".format(i, len(routes_to_print) - 1, filename)
+        )
+        with open(filename, "w") as f:
             sys.stdout = f
             print("Route {}: {}".format(i, routes[i]))
             draw_route(c, routes[i])
diff --git a/tutorials/lit.cfg.py b/tutorials/lit.cfg.py
index 9ad07492b9..a09651649f 100755
--- a/tutorials/lit.cfg.py
+++ b/tutorials/lit.cfg.py
@@ -17,10 +17,11 @@
 import lit.util
 
 from lit.llvm import llvm_config
+
 # Configuration file for the 'lit' test runner.
 
 # name: The name of this test suite.
-config.name = 'AIE_TUTORIALS'
+config.name = "AIE_TUTORIALS"
 
 config.test_format = lit.formats.ShTest(not llvm_config.use_lit_shell)
 config.environment["PYTHONPATH"] = "{}".format(
@@ -28,75 +29,94 @@
 )
 
 # suffixes: A list of file extensions to treat as test files.
-config.suffixes = ['.mlir']
+config.suffixes = [".mlir"]
 
 # test_source_root: The root path where tests are located.
 config.test_source_root = os.path.dirname(__file__)
 
-config.substitutions.append(('%PATH%', config.environment['PATH']))
-config.substitutions.append(('%shlibext', config.llvm_shlib_ext))
-config.substitutions.append(('%extraAieCcFlags%', config.extraAieCcFlags))
-config.substitutions.append(('%host_runtime_lib%', os.path.join(config.aie_obj_root, "runtime_lib",config.aieHostTarget)))
-config.substitutions.append(('%aietools', config.vitis_aietools_dir))
+config.substitutions.append(("%PATH%", config.environment["PATH"]))
+config.substitutions.append(("%shlibext", config.llvm_shlib_ext))
+config.substitutions.append(("%extraAieCcFlags%", config.extraAieCcFlags))
+config.substitutions.append(
+    (
+        "%host_runtime_lib%",
+        os.path.join(config.aie_obj_root, "runtime_lib", config.aieHostTarget),
+    )
+)
+config.substitutions.append(("%aietools", config.vitis_aietools_dir))
 # for xchesscc_wrapper
-llvm_config.with_environment('AIETOOLS', config.vitis_aietools_dir)
+llvm_config.with_environment("AIETOOLS", config.vitis_aietools_dir)
 
-if(config.enable_board_tests):
-    config.substitutions.append(('%run_on_board', "echo %T >> /home/xilinx/testlog | sync | sudo"))
+if config.enable_board_tests:
+    config.substitutions.append(
+        ("%run_on_board", "echo %T >> /home/xilinx/testlog | sync | sudo")
+    )
 else:
-    config.substitutions.append(('%run_on_board', "echo"))
+    config.substitutions.append(("%run_on_board", "echo"))
 
-VitisSysrootFlag = ''
-if (config.aieHostTarget == 'x86_64'):
-    config.substitutions.append(('%aieHostTargetTriplet%', 'x86_64-unknown-linux-gnu'))
-elif (config.aieHostTarget == 'aarch64'):
-    config.substitutions.append(('%aieHostTargetTriplet%', 'aarch64-linux-gnu'))
-    VitisSysrootFlag = '--sysroot='+config.vitis_sysroot
+VitisSysrootFlag = ""
+if config.aieHostTarget == "x86_64":
+    config.substitutions.append(("%aieHostTargetTriplet%", "x86_64-unknown-linux-gnu"))
+elif config.aieHostTarget == "aarch64":
+    config.substitutions.append(("%aieHostTargetTriplet%", "aarch64-linux-gnu"))
+    VitisSysrootFlag = "--sysroot=" + config.vitis_sysroot
 
-config.substitutions.append(('%VitisSysrootFlag%', VitisSysrootFlag))
-config.substitutions.append(('%aieHostTargetArch%', config.aieHostTarget))
+config.substitutions.append(("%VitisSysrootFlag%", VitisSysrootFlag))
+config.substitutions.append(("%aieHostTargetArch%", config.aieHostTarget))
 
-llvm_config.with_system_environment(
-    ['HOME', 'INCLUDE', 'LIB', 'TMP', 'TEMP'])
+llvm_config.with_system_environment(["HOME", "INCLUDE", "LIB", "TMP", "TEMP"])
 
 llvm_config.use_default_substitutions()
 
 # excludes: A list of directories to exclude from the testsuite. The 'Inputs'
 # subdirectories contain auxiliary inputs for various tests in their parent
 # directories.
-config.excludes = ['Inputs', 'Examples', 'CMakeLists.txt', 'README.txt', 'LICENSE.txt', 'aie.mlir.prj']
+config.excludes = [
+    "Inputs",
+    "Examples",
+    "CMakeLists.txt",
+    "README.txt",
+    "LICENSE.txt",
+    "aie.mlir.prj",
+]
+
+config.aie_tools_dir = os.path.join(config.aie_obj_root, "bin")
 
-config.aie_tools_dir = os.path.join(config.aie_obj_root, 'bin')
 
 def prepend_path(path):
     global llvm_config
     paths = [path]
 
-    current_paths = llvm_config.config.environment.get('PATH', None)
+    current_paths = llvm_config.config.environment.get("PATH", None)
     if current_paths:
         paths.extend(current_paths.split(os.path.pathsep))
         paths = [os.path.normcase(os.path.normpath(p)) for p in paths]
     else:
         paths = []
 
-    llvm_config.config.environment['PATH'] = os.pathsep.join(paths)
+    llvm_config.config.environment["PATH"] = os.pathsep.join(paths)
+
 
 # Setup the path.
 prepend_path(config.llvm_tools_dir)
 prepend_path(config.peano_tools_dir)
 prepend_path(config.aie_tools_dir)
-#llvm_config.with_environment('LM_LICENSE_FILE', os.getenv('LM_LICENSE_FILE'))
-#llvm_config.with_environment('XILINXD_LICENSE_FILE', os.getenv('XILINXD_LICENSE_FILE'))
-if(config.vitis_root):
-  config.vitis_aietools_bin = os.path.join(config.vitis_aietools_dir, "bin")
-  prepend_path(config.vitis_aietools_bin)
-  llvm_config.with_environment('VITIS', config.vitis_root)
+# llvm_config.with_environment('LM_LICENSE_FILE', os.getenv('LM_LICENSE_FILE'))
+# llvm_config.with_environment('XILINXD_LICENSE_FILE', os.getenv('XILINXD_LICENSE_FILE'))
+if config.vitis_root:
+    config.vitis_aietools_bin = os.path.join(config.vitis_aietools_dir, "bin")
+    prepend_path(config.vitis_aietools_bin)
+    llvm_config.with_environment("VITIS", config.vitis_root)
 
 # Test to see if we have the peano backend.
 try:
-    result = subprocess.run([os.path.join(config.peano_tools_dir, 'llc'),'-mtriple=aie','--version'],stdout=subprocess.PIPE,stderr=subprocess.PIPE)
-    if (re.search("Xilinx AI Engine", result.stdout.decode('utf-8')) is not None):
-        config.available_features.add('peano')
+    result = subprocess.run(
+        [os.path.join(config.peano_tools_dir, "llc"), "-mtriple=aie", "--version"],
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+    )
+    if re.search("Xilinx AI Engine", result.stdout.decode("utf-8")) is not None:
+        config.available_features.add("peano")
         print("Peano found: " + shutil.which("llc"))
     else:
         print("Peano not found, but expected at ", config.peano_tools_dir)
@@ -104,51 +124,56 @@ def prepend_path(path):
     print("Peano not found, but expected at ", config.peano_tools_dir)
 
 print("Looking for Chess...")
-#test if LM_LICENSE_FILE valid
-if(config.enable_chess_tests):
+# test if LM_LICENSE_FILE valid
+if config.enable_chess_tests:
     result = None
-    if(config.vitis_root):
+    if config.vitis_root:
         result = shutil.which("xchesscc")
 
     if result != None:
         print("Chess found: " + result)
-        config.available_features.add('chess')
-        config.available_features.add('valid_xchess_license')
-        lm_license_file = os.getenv('LM_LICENSE_FILE')
-        if(lm_license_file != None):
-            llvm_config.with_environment('LM_LICENSE_FILE', lm_license_file)
-        xilinxd_license_file = os.getenv('XILINXD_LICENSE_FILE')
-        if(xilinxd_license_file != None):
-            llvm_config.with_environment('XILINXD_LICENSE_FILE', xilinxd_license_file)
+        config.available_features.add("chess")
+        config.available_features.add("valid_xchess_license")
+        lm_license_file = os.getenv("LM_LICENSE_FILE")
+        if lm_license_file != None:
+            llvm_config.with_environment("LM_LICENSE_FILE", lm_license_file)
+        xilinxd_license_file = os.getenv("XILINXD_LICENSE_FILE")
+        if xilinxd_license_file != None:
+            llvm_config.with_environment("XILINXD_LICENSE_FILE", xilinxd_license_file)
 
         validate_chess = False
-        if(validate_chess):
+        if validate_chess:
             import subprocess
-            result = subprocess.run(['xchesscc','+v'],stdout=subprocess.PIPE,stderr=subprocess.PIPE)
-            validLMLicense = (len(result.stderr.decode('utf-8')) == 0)
+
+            result = subprocess.run(
+                ["xchesscc", "+v"], stdout=subprocess.PIPE, stderr=subprocess.PIPE
+            )
+            validLMLicense = len(result.stderr.decode("utf-8")) == 0
         else:
             validLMLicense = lm_license_file or xilinxd_license_file
 
-        if(not lm_license_file and not xilinxd_license_file):
-            print("WARNING: no valid xchess license that is required by some of the lit tests")
+        if not lm_license_file and not xilinxd_license_file:
+            print(
+                "WARNING: no valid xchess license that is required by some of the lit tests"
+            )
 
     else:
         print("Chess not found")
 
 tool_dirs = [config.aie_tools_dir, config.peano_tools_dir, config.llvm_tools_dir]
 tools = [
-    'aie-opt',
-    'aie-translate',
-    'aiecc.py',
-    'ld.lld',
-    'llc',
-    'llvm-objdump',
-    'opt',
-    'xchesscc_wrapper',
+    "aie-opt",
+    "aie-translate",
+    "aiecc.py",
+    "ld.lld",
+    "llc",
+    "llvm-objdump",
+    "opt",
+    "xchesscc_wrapper",
 ]
 
 llvm_config.add_tool_substitutions(tools, tool_dirs)
 
-if(config.enable_board_tests):
+if config.enable_board_tests:
     lit_config.parallelism_groups["board"] = 1
     config.parallelism_group = "board"
diff --git a/tutorials/scripts/visualize.py b/tutorials/scripts/visualize.py
index 976ce4d17e..b772575894 100755
--- a/tutorials/scripts/visualize.py
+++ b/tutorials/scripts/visualize.py
@@ -8,9 +8,10 @@
 
 from enum import Enum
 
+
 class Direction(Enum):
-    Horz = 0;
-    Vert = 1;
+    Horz = 0
+    Vert = 1
 
 
 class canvas:
@@ -20,345 +21,403 @@ def __init__(self, width, height):
         self.vert_line_list = []
         self.horz_line_list = []
         self.characters = []
-        
-    def direction(self, line) :
-        Horz_Stationary = False;
-        Vert_Stationary = False;
-        
-        if (line[0][0] == line[1][0]):
+
+    def direction(self, line):
+        Horz_Stationary = False
+        Vert_Stationary = False
+
+        if line[0][0] == line[1][0]:
             # Line Stationary in Horz Axis
             Horz_Stationary = True
-        if (line[0][1] == line[1][1]):
+        if line[0][1] == line[1][1]:
             # Line Stationary in Vert Axis
             Vert_Stationary = True
 
-        if (Horz_Stationary and Vert_Stationary):
+        if Horz_Stationary and Vert_Stationary:
             # Crash
-            raise Exception("Line is Diagonal");
-        if (not Horz_Stationary and not Vert_Stationary):
+            raise Exception("Line is Diagonal")
+        if not Horz_Stationary and not Vert_Stationary:
             # Crash
-            raise Exception("Line is a dot");
-        if ((not Horz_Stationary) and Vert_Stationary):
-            return Direction.Horz;
-        
-        if (Horz_Stationary and (not Vert_Stationary)):
-            return Direction.Vert;
-        
+            raise Exception("Line is a dot")
+        if (not Horz_Stationary) and Vert_Stationary:
+            return Direction.Horz
+
+        if Horz_Stationary and (not Vert_Stationary):
+            return Direction.Vert
+
     def draw_character(self, point, character):
-        self.characters.append([point, character]);
+        self.characters.append([point, character])
 
     def replace_character(self, point, character, replacement):
-        if (self.characters.count([point, character])):
-            self.characters.remove([point, character]);
-        self.characters.append([point, replacement]);
-        
+        if self.characters.count([point, character]):
+            self.characters.remove([point, character])
+        self.characters.append([point, replacement])
+
     def draw_line(self, start, finish):
-        if (self.direction([start,finish]) == Direction.Vert):
+        if self.direction([start, finish]) == Direction.Vert:
             self.vert_line_list.append([start, finish])
         else:
             self.horz_line_list.append([start, finish])
-        
+
     def draw_square(self, center, size):
-        horz_origin = math.floor((center[0] + 0.5) - (size/2));
-        horz_extent = math.ceil((center[0] + 0.5) + (size/2) + 3);
-
-        vert_origin = math.floor( (center[1] + 0.5)- (size/2));
-        vert_extent =  math.ceil( (center[1] + 0.5) + (size/2));
-        
-        top_left = (horz_origin, vert_origin );
-        top_right = (horz_extent, vert_origin);
-        bottom_left = (horz_origin, vert_extent);
-        bottom_right = (horz_extent, vert_extent);
-        
+        horz_origin = math.floor((center[0] + 0.5) - (size / 2))
+        horz_extent = math.ceil((center[0] + 0.5) + (size / 2) + 3)
+
+        vert_origin = math.floor((center[1] + 0.5) - (size / 2))
+        vert_extent = math.ceil((center[1] + 0.5) + (size / 2))
+
+        top_left = (horz_origin, vert_origin)
+        top_right = (horz_extent, vert_origin)
+        bottom_left = (horz_origin, vert_extent)
+        bottom_right = (horz_extent, vert_extent)
+
         self.draw_line(top_left, top_right)
         self.draw_line(top_right, bottom_right)
         self.draw_line(bottom_left, bottom_right)
         self.draw_line(top_left, bottom_left)
 
-    def vertical_index(self,point):
-        return point[1];
-    def horizontal_index(self,point):
-        return point[0];
-        
+    def vertical_index(self, point):
+        return point[1]
+
+    def horizontal_index(self, point):
+        return point[0]
+
     def within_line(self, point, line):
-        Horz_Stationary = False;
-        Vert_Stationary = False;
-        
-        if (line[0][0] == line[1][0]):
+        Horz_Stationary = False
+        Vert_Stationary = False
+
+        if line[0][0] == line[1][0]:
             # Line Stationary in Horz Axis
             Horz_Stationary = True
-        if (line[0][1] == line[1][1]):
+        if line[0][1] == line[1][1]:
             # Line Stationary in Vert Axis
             Vert_Stationary = True
 
-       # print("HS: {}, VS: {}, {}".format(Horz_Stationary, Vert_Stationary, line));
-            
-        if (Horz_Stationary and Vert_Stationary):
+        # print("HS: {}, VS: {}, {}".format(Horz_Stationary, Vert_Stationary, line));
+
+        if Horz_Stationary and Vert_Stationary:
             # Crash
-            raise Exception("Line is Diagonal");
-        if (not Horz_Stationary and not Vert_Stationary):
+            raise Exception("Line is Diagonal")
+        if not Horz_Stationary and not Vert_Stationary:
             # Crash
-            raise Exception("Line is a dot");
-        
-        if (Horz_Stationary and (not Vert_Stationary)):
+            raise Exception("Line is a dot")
+
+        if Horz_Stationary and (not Vert_Stationary):
             # it's a vertical line
             # Sort the tuples by Horz
-            line.sort(key=self.vertical_index);
-            start_line = ( (point[1] == line[0][1]) and (point[1] <= line[1][1]) and (point[0] == line[0][0]) )
-            in_line = ( (point[1] > line[0][1]) and (point[1] < line[1][1]) and (point[0] == line[0][0]) )
-            end_line = ( (point[1] > line[0][1]) and (point[1] == line[1][1]) and (point[0] == line[0][0]) )
-            
-            #print("Vert {}, point {}, {}".format(line, point, in_line));
-            
-        if ((not Horz_Stationary) and Vert_Stationary):
+            line.sort(key=self.vertical_index)
+            start_line = (
+                (point[1] == line[0][1])
+                and (point[1] <= line[1][1])
+                and (point[0] == line[0][0])
+            )
+            in_line = (
+                (point[1] > line[0][1])
+                and (point[1] < line[1][1])
+                and (point[0] == line[0][0])
+            )
+            end_line = (
+                (point[1] > line[0][1])
+                and (point[1] == line[1][1])
+                and (point[0] == line[0][0])
+            )
+
+            # print("Vert {}, point {}, {}".format(line, point, in_line));
+
+        if (not Horz_Stationary) and Vert_Stationary:
             # it's a horizonal line
             line.sort(key=self.horizontal_index)
-            start_line = ( (point[0] == line[0][0]) and (point[0] <= line[1][0]) and ( point[1] == line[0][1]))
-            in_line = ( (point[0] > line[0][0]) and (point[0] < line[1][0]) and ( point[1] == line[0][1]))
-            end_line = ( (point[0] > line[0][0]) and (point[0] == line[1][0]) and ( point[1] == line[0][1]))
-            #print("Horz {}, point {}, {}".format(line, point, in_line));
+            start_line = (
+                (point[0] == line[0][0])
+                and (point[0] <= line[1][0])
+                and (point[1] == line[0][1])
+            )
+            in_line = (
+                (point[0] > line[0][0])
+                and (point[0] < line[1][0])
+                and (point[1] == line[0][1])
+            )
+            end_line = (
+                (point[0] > line[0][0])
+                and (point[0] == line[1][0])
+                and (point[1] == line[0][1])
+            )
+            # print("Horz {}, point {}, {}".format(line, point, in_line));
 
         return (start_line, in_line, end_line)
-    
-    def find_horz_index(self,line_points):
-        index =0;
 
-        if (line_points[1]):
-            index+=2
+    def find_horz_index(self, line_points):
+        index = 0
+
+        if line_points[1]:
+            index += 2
         else:
-            if (line_points[0]):
-                index += 1;
-            if (line_points[2]):
-                index +=4
-        return index;
+            if line_points[0]:
+                index += 1
+            if line_points[2]:
+                index += 4
+        return index
 
     def transform(self, index):
 
         # if horz through, clear bits for horz end
         # if vert through, clear bits for vert end
-        
-        
+
         chars = {
-            0 : ' ',
-            2 : u'\u2500', # horz line
-            16 : u'\u2502',# vert line
-            9 : u'\u250c', # box top left
-            33 : u'\u2514',# box bot left
-            12 : u'\u2510',# box top right
-            36 : u'\u2518',# box bot right
-            1 : u'\u2576', # right half horz line 
-            8 : u'\u2577', # lower half vert line
-            32 : u'\u2575',# upper half vert line
-            4 : u'\u2574', # left half horz line
-            18 : u'\u253c',# vert AND horz
-            25 : u'\u251c',# vert and right
-            17 : u'\u251c',
-            20 : u'\u2524',# vert and left
-            10 : u'\u252c',# horz and bot
-            34 : u'\u2534',# horz and top
-            21 : u'\u253c',# vert AND horz
-            42 : u'\u253c'
-            
+            0: " ",
+            2: "\u2500",  # horz line
+            16: "\u2502",  # vert line
+            9: "\u250c",  # box top left
+            33: "\u2514",  # box bot left
+            12: "\u2510",  # box top right
+            36: "\u2518",  # box bot right
+            1: "\u2576",  # right half horz line
+            8: "\u2577",  # lower half vert line
+            32: "\u2575",  # upper half vert line
+            4: "\u2574",  # left half horz line
+            18: "\u253c",  # vert AND horz
+            25: "\u251c",  # vert and right
+            17: "\u251c",
+            20: "\u2524",  # vert and left
+            10: "\u252c",  # horz and bot
+            34: "\u2534",  # horz and top
+            21: "\u253c",  # vert AND horz
+            42: "\u253c",
         }
         try:
-         char = chars[index];
+            char = chars[index]
         except KeyError:
-         char = "x"
-         
+            char = "x"
+
         return char
 
     def combine(self, a, b):
-        return [ a[0] or b[0], a[1] or b[1], a[2] or b[2]];
-    
+        return [a[0] or b[0], a[1] or b[1], a[2] or b[2]]
+
     def rasterize(self):
-       for x in range(self.height):
-           for y in range(self.width):
-               char = "({},{})".format(y,x);
-               index = 0;
-               horz_line_points = [False, False, False];
-               vert_line_points = [False, False, False];
-
-               for charloc in self.characters:
-                   if (charloc[0][0] == y and charloc[0][1] == x):
-                       print(charloc[1], end='', sep='');
-                       index = -1;
-                       break
-               if index == 0: # not a character, either vert or horz line 
-                   for line in self.horz_line_list:
-                       horz_line_points = self.combine(horz_line_points, self.within_line((y,x),line));
-                   index += self.find_horz_index(horz_line_points)
-
-                   for line in self.vert_line_list:
-                       vert_line_points = self.combine(vert_line_points, self.within_line((y,x),line));
-                   index += 8*self.find_horz_index(vert_line_points)
-
-                   print("{}".format(self.transform(index)), end='', sep='')
-                   #print(" {} ".format(index), end='', sep='')
-   
-           print("")
+        for x in range(self.height):
+            for y in range(self.width):
+                char = "({},{})".format(y, x)
+                index = 0
+                horz_line_points = [False, False, False]
+                vert_line_points = [False, False, False]
+
+                for charloc in self.characters:
+                    if charloc[0][0] == y and charloc[0][1] == x:
+                        print(charloc[1], end="", sep="")
+                        index = -1
+                        break
+                if index == 0:  # not a character, either vert or horz line
+                    for line in self.horz_line_list:
+                        horz_line_points = self.combine(
+                            horz_line_points, self.within_line((y, x), line)
+                        )
+                    index += self.find_horz_index(horz_line_points)
+
+                    for line in self.vert_line_list:
+                        vert_line_points = self.combine(
+                            vert_line_points, self.within_line((y, x), line)
+                        )
+                    index += 8 * self.find_horz_index(vert_line_points)
+
+                    print("{}".format(self.transform(index)), end="", sep="")
+                    # print(" {} ".format(index), end='', sep='')
+
+            print("")
+
 
 superscripts = {
-   # 0 : u'\u2070', 
-    0 : ' ', 
-    1 : u'\u00b9', 
-    2 : u'\u00b2', 
-    3 : u'\u00b3', 
-    4 : u'\u2074', 
-    5 : u'\u2075', 
-    6 : u'\u2076', 
-    7 : u'\u2077', 
-    8 : u'\u2078', 
-    9 : u'\u2079', 
+    # 0 : u'\u2070',
+    0: " ",
+    1: "\u00b9",
+    2: "\u00b2",
+    3: "\u00b3",
+    4: "\u2074",
+    5: "\u2075",
+    6: "\u2076",
+    7: "\u2077",
+    8: "\u2078",
+    9: "\u2079",
 }
 subscripts = {
-   # 0 : u'\u2080', 
-    0 : ' ', 
-    1 : u'\u2081', 
-    2 : u'\u2082', 
-    3 : u'\u2083', 
-    4 : u'\u2084', 
-    5 : u'\u2085', 
-    6 : u'\u2086', 
-    7 : u'\u2087', 
-    8 : u'\u2088', 
-    9 : u'\u2089', 
+    # 0 : u'\u2080',
+    0: " ",
+    1: "\u2081",
+    2: "\u2082",
+    3: "\u2083",
+    4: "\u2084",
+    5: "\u2085",
+    6: "\u2086",
+    7: "\u2087",
+    8: "\u2088",
+    9: "\u2089",
 }
 
-def draw_switchbox(canvas, xoffset, yoffset, source_count, destination_count,
-         northbound, southbound, eastbound, westbound, draw_demand=True, name=""):
-    c.draw_square((xoffset+5,yoffset+4),2)
+
+def draw_switchbox(
+    canvas,
+    xoffset,
+    yoffset,
+    source_count,
+    destination_count,
+    northbound,
+    southbound,
+    eastbound,
+    westbound,
+    draw_demand=True,
+    name="",
+):
+    c.draw_square((xoffset + 5, yoffset + 4), 2)
 
     # label it
     if len(name) > 0:
-        c.draw_character((xoffset+6,yoffset+4), name[0])
+        c.draw_character((xoffset + 6, yoffset + 4), name[0])
     if len(name) > 1:
-        c.draw_character((xoffset+7,yoffset+4), name[1])
+        c.draw_character((xoffset + 7, yoffset + 4), name[1])
     if len(name) > 2:
-        c.draw_character((xoffset+8,yoffset+4), name[2])
+        c.draw_character((xoffset + 8, yoffset + 4), name[2])
     if len(name) > 3:
-        c.draw_character((xoffset+9,yoffset+4), name[3])
+        c.draw_character((xoffset + 9, yoffset + 4), name[3])
 
     # draw source and destination count
-    if(source_count > 0 or destination_count > 0):
-        c.draw_character((xoffset+7,yoffset+5), '*')
+    if source_count > 0 or destination_count > 0:
+        c.draw_character((xoffset + 7, yoffset + 5), "*")
 
     # left of the switchbox (south)
-    if northbound > 0: 
-        c.draw_line((xoffset+10,yoffset+4), (xoffset+14,yoffset+4))
-        if(draw_demand):
-            c.draw_character((xoffset+12,yoffset+3), subscripts[northbound])
-            if(northbound > 6): # if overcapacity, mark with an 'x'            
-                c.draw_character((xoffset+10,yoffset+4), 'x')
-                #c.draw_character((xoffset+11,yoffset+4), 'x')
-                c.draw_character((xoffset+12,yoffset+4), 'x')
-    if southbound > 0: 
-        c.draw_line((xoffset+0,yoffset+5), (xoffset+4,yoffset+5))
-        if(draw_demand):
-            c.draw_character((xoffset+2,yoffset+6), superscripts[southbound])
-            if(southbound > 4): # if overcapacity, mark with an 'x'
-                c.draw_character((xoffset+1, yoffset+5), 'x')
-                #c.draw_character((xoffset+2, yoffset+5), 'x')
-                c.draw_character((xoffset+3, yoffset+5), 'x')
+    if northbound > 0:
+        c.draw_line((xoffset + 10, yoffset + 4), (xoffset + 14, yoffset + 4))
+        if draw_demand:
+            c.draw_character((xoffset + 12, yoffset + 3), subscripts[northbound])
+            if northbound > 6:  # if overcapacity, mark with an 'x'
+                c.draw_character((xoffset + 10, yoffset + 4), "x")
+                # c.draw_character((xoffset+11,yoffset+4), 'x')
+                c.draw_character((xoffset + 12, yoffset + 4), "x")
+    if southbound > 0:
+        c.draw_line((xoffset + 0, yoffset + 5), (xoffset + 4, yoffset + 5))
+        if draw_demand:
+            c.draw_character((xoffset + 2, yoffset + 6), superscripts[southbound])
+            if southbound > 4:  # if overcapacity, mark with an 'x'
+                c.draw_character((xoffset + 1, yoffset + 5), "x")
+                # c.draw_character((xoffset+2, yoffset+5), 'x')
+                c.draw_character((xoffset + 3, yoffset + 5), "x")
 
     # below the switchbox (east)
-    if eastbound > 0: 
-        c.draw_line((xoffset+6,yoffset+6), (xoffset+6,yoffset+8))
-        if(draw_demand):
-            c.draw_character((xoffset+5,yoffset+7), superscripts[eastbound])
-            if(eastbound > 4): # if overcapacity, mark with an 'x'
-                c.draw_character((xoffset+6, yoffset+6), 'x')
-                #c.draw_character((xoffset+6, yoffset+7), 'x')
-                c.draw_character((xoffset+6, yoffset+8), 'x')
-    if westbound > 0: 
-        c.draw_line((xoffset+8,yoffset+1), (xoffset+8,yoffset+3))
-        if(draw_demand):
-            c.draw_character((xoffset+9,yoffset+2), superscripts[westbound])
-            if(westbound > 4): # if overcapacity, mark with an 'x'
-                c.draw_character((xoffset+8, yoffset+1), 'x')
-                #c.draw_character((xoffset+7, yoffset+2), 'x')
-                c.draw_character((xoffset+8, yoffset+3), 'x')
-
-
-SB_WIDTH = 10; SB_HEIGHT = 5 # distances between switchboxes
+    if eastbound > 0:
+        c.draw_line((xoffset + 6, yoffset + 6), (xoffset + 6, yoffset + 8))
+        if draw_demand:
+            c.draw_character((xoffset + 5, yoffset + 7), superscripts[eastbound])
+            if eastbound > 4:  # if overcapacity, mark with an 'x'
+                c.draw_character((xoffset + 6, yoffset + 6), "x")
+                # c.draw_character((xoffset+6, yoffset+7), 'x')
+                c.draw_character((xoffset + 6, yoffset + 8), "x")
+    if westbound > 0:
+        c.draw_line((xoffset + 8, yoffset + 1), (xoffset + 8, yoffset + 3))
+        if draw_demand:
+            c.draw_character((xoffset + 9, yoffset + 2), superscripts[westbound])
+            if westbound > 4:  # if overcapacity, mark with an 'x'
+                c.draw_character((xoffset + 8, yoffset + 1), "x")
+                # c.draw_character((xoffset+7, yoffset+2), 'x')
+                c.draw_character((xoffset + 8, yoffset + 3), "x")
+
+
+SB_WIDTH = 10
+SB_HEIGHT = 5  # distances between switchboxes
+
+
 def draw_switchboxes(c, switchboxes):
     for item in switchboxes:
-        draw_switchbox(c, SB_WIDTH*item['row'], SB_HEIGHT*item['col'], 
-            item['source_count'], item['destination_count'],
-            item['northbound'], item['southbound'],
-            item['eastbound'], item['westbound'], draw_demand=True,
-            name="{},{}".format(item['col'], item['row'] ))
-    
+        draw_switchbox(
+            c,
+            SB_WIDTH * item["row"],
+            SB_HEIGHT * item["col"],
+            item["source_count"],
+            item["destination_count"],
+            item["northbound"],
+            item["southbound"],
+            item["eastbound"],
+            item["westbound"],
+            draw_demand=True,
+            name="{},{}".format(item["col"], item["row"]),
+        )
+
+
 # given a route, draw arrow characters to indicate the route
 # route is a list of switchboxes, represented as int tuple coordinates
-left_arrow = u'\u2190'
-up_arrow   = u'\u2191'
-right_arrow= u'\u2192'
-down_arrow = u'\u2193'
+left_arrow = "\u2190"
+up_arrow = "\u2191"
+right_arrow = "\u2192"
+down_arrow = "\u2193"
+
+
 def draw_route(c, route):
-    for i in range(len(route)-1):
+    for i in range(len(route) - 1):
         col = route[i][0][0]
         row = route[i][0][1]
-        xoffset = SB_WIDTH*row
-        yoffset = SB_HEIGHT*col
-        if len(route[i]) == 1: continue
+        xoffset = SB_WIDTH * row
+        yoffset = SB_HEIGHT * col
+        if len(route[i]) == 1:
+            continue
         dirs = route[i][1]
 
         # draw source and destination
-        if(i == 0):
-            c.draw_character((xoffset+5,yoffset+5), 'S')
-        if(i == (len(route)-2)):
-            c.draw_character((xoffset+9,yoffset+5), 'D')
+        if i == 0:
+            c.draw_character((xoffset + 5, yoffset + 5), "S")
+        if i == (len(route) - 2):
+            c.draw_character((xoffset + 9, yoffset + 5), "D")
 
-        if(i == 0): 
-            if(row == 0): # for routes starting in the shim, draw arrows coming from PL
-                c.draw_character((xoffset+1, yoffset+4), right_arrow)
-                c.draw_character((xoffset+2, yoffset+4), right_arrow)
-                c.draw_character((xoffset+3, yoffset+4), right_arrow)
+        if i == 0:
+            if row == 0:  # for routes starting in the shim, draw arrows coming from PL
+                c.draw_character((xoffset + 1, yoffset + 4), right_arrow)
+                c.draw_character((xoffset + 2, yoffset + 4), right_arrow)
+                c.draw_character((xoffset + 3, yoffset + 4), right_arrow)
 
         for j in range(len(dirs)):
             # draw indications for cores the route passes through
-            c.replace_character((xoffset+7,yoffset+5), '*', '#')
+            c.replace_character((xoffset + 7, yoffset + 5), "*", "#")
             # 0 = North, 1 = East, 2 = South, 3 = West
-            if(dirs[j] == "North"):
-                c.draw_character((xoffset+11, yoffset+4), right_arrow)
-                c.draw_character((xoffset+12, yoffset+4), right_arrow)
-                c.draw_character((xoffset+13, yoffset+4), right_arrow)
-                row = row+1
-            elif(dirs[j] == "East"):
-                c.draw_character((xoffset+6, yoffset+7), down_arrow)
-                col = col+1
-            elif(dirs[j] == "South"):
-                c.draw_character((xoffset+1, yoffset+5), left_arrow)
-                c.draw_character((xoffset+2, yoffset+5), left_arrow)
-                c.draw_character((xoffset+3, yoffset+5), left_arrow)
-                row = row-1
-            elif(dirs[j] == "West"):
-                c.draw_character((xoffset+8, yoffset+2), up_arrow)
-                col = col-1
-            elif(dirs[j] == "DMA"):
+            if dirs[j] == "North":
+                c.draw_character((xoffset + 11, yoffset + 4), right_arrow)
+                c.draw_character((xoffset + 12, yoffset + 4), right_arrow)
+                c.draw_character((xoffset + 13, yoffset + 4), right_arrow)
+                row = row + 1
+            elif dirs[j] == "East":
+                c.draw_character((xoffset + 6, yoffset + 7), down_arrow)
+                col = col + 1
+            elif dirs[j] == "South":
+                c.draw_character((xoffset + 1, yoffset + 5), left_arrow)
+                c.draw_character((xoffset + 2, yoffset + 5), left_arrow)
+                c.draw_character((xoffset + 3, yoffset + 5), left_arrow)
+                row = row - 1
+            elif dirs[j] == "West":
+                c.draw_character((xoffset + 8, yoffset + 2), up_arrow)
+                col = col - 1
+            elif dirs[j] == "DMA":
                 # draw destination
-                c.draw_character((xoffset+9,yoffset+5), 'D')
-
+                c.draw_character((xoffset + 9, yoffset + 5), "D")
 
 
-
-    
-if __name__ == '__main__':
+if __name__ == "__main__":
     # setup python unicode encoding
     os.system("export PYTHONIOENCODING=utf8")
 
-    parser = argparse.ArgumentParser(description='Draw switchboxes, demands and routes')
-    parser.add_argument('-j', '--json', help='Filepath for JSON file to read')
-    parser.add_argument('-r', '--route_list', help='List of routes to print')
-    parser.add_argument('-o', '--output', help='Path to output directory. Text files of the routes will be stored here.')
+    parser = argparse.ArgumentParser(description="Draw switchboxes, demands and routes")
+    parser.add_argument("-j", "--json", help="Filepath for JSON file to read")
+    parser.add_argument("-r", "--route_list", help="List of routes to print")
+    parser.add_argument(
+        "-o",
+        "--output",
+        help="Path to output directory. Text files of the routes will be stored here.",
+    )
     args = parser.parse_args()
 
-    if args.json: json_file_path = args.json
-    else: json_file_path = "switchbox.json" # default JSON
+    if args.json:
+        json_file_path = args.json
+    else:
+        json_file_path = "switchbox.json"  # default JSON
 
     with open(json_file_path) as f:
         json_data = json.load(f)
-    
+
     switchboxes = []
     routes = []
 
@@ -367,22 +426,23 @@ def draw_route(c, route):
             switchboxes.append(item)
         if "route" in key:
             routes.append(item)
-    
+
     max_col = 0
     max_row = 0
     for switchbox in switchboxes:
-        if switchbox['col'] > max_col:
-            max_col = switchbox['col']
-        if switchbox['row'] > max_row:
-            max_row = switchbox['row']
+        if switchbox["col"] > max_col:
+            max_col = switchbox["col"]
+        if switchbox["row"] > max_row:
+            max_row = switchbox["row"]
 
     routes_to_print = []
-    if args.route_list: 
+    if args.route_list:
         for route in args.route_list.split(","):
             routes_to_print.append(int(route.strip()))
-    else: routes_to_print = range(len(routes))
+    else:
+        routes_to_print = range(len(routes))
 
-    output_directory = json_file_path.split('.')[0] + '/'
+    output_directory = json_file_path.split(".")[0] + "/"
     if args.output:
         output_directory = args.output
 
@@ -390,12 +450,14 @@ def draw_route(c, route):
         os.mkdir(output_directory)
 
     for i in routes_to_print:
-        c = canvas(12*(max_row+1), 5+5*(max_col+1));
+        c = canvas(12 * (max_row + 1), 5 + 5 * (max_col + 1))
         draw_switchboxes(c, switchboxes)
         filename = os.path.join(output_directory, "route{}.txt".format(i))
         sys.stdout = sys.__stdout__
-        print("Printing route {} of {}: {}".format(i, len(routes_to_print)-1, filename))
-        with open(filename, 'w') as f:
+        print(
+            "Printing route {} of {}: {}".format(i, len(routes_to_print) - 1, filename)
+        )
+        with open(filename, "w") as f:
             sys.stdout = f
             print("Route {}: {}".format(i, routes[i]))
             draw_route(c, routes[i])
diff --git a/utils/build-mlir-aie-pcie.sh b/utils/build-mlir-aie-pcie.sh
index 4d371d42bf..f715ed8596 100755
--- a/utils/build-mlir-aie-pcie.sh
+++ b/utils/build-mlir-aie-pcie.sh
@@ -63,6 +63,7 @@ CMAKE_CONFIGS="\
     -DLLVM_ENABLE_ASSERTIONS=ON \
     -DLLVM_ENABLE_RTTI=$LLVM_ENABLE_RTTI \
     -DAIE_RUNTIME_TARGETS=x86_64-hsa \
+    -DAIE_RUNTIME_TEST_TARGET=x86_64-hsa \
     -DAIE_ENABLE_PYTHON_PASSES=OFF \
     .. |& tee cmake.log"
 
diff --git a/utils/generate-test-checks.py b/utils/generate-test-checks.py
index a115d4b343..6237f2e186 100755
--- a/utils/generate-test-checks.py
+++ b/utils/generate-test-checks.py
@@ -46,15 +46,15 @@
 SSA_RE = re.compile(SSA_RE_STR)
 
 # Regex matching the left-hand side of an assignment
-SSA_RESULTS_STR = r'\s*(%' + SSA_RE_STR + r')(\s*,\s*(%' + SSA_RE_STR + r'))*\s*='
+SSA_RESULTS_STR = r"\s*(%" + SSA_RE_STR + r")(\s*,\s*(%" + SSA_RE_STR + r"))*\s*="
 SSA_RESULTS_RE = re.compile(SSA_RESULTS_STR)
 
 # Regex matching attributes
-ATTR_RE_STR = r'(#[a-zA-Z._-][a-zA-Z0-9._-]*)'
+ATTR_RE_STR = r"(#[a-zA-Z._-][a-zA-Z0-9._-]*)"
 ATTR_RE = re.compile(ATTR_RE_STR)
 
 # Regex matching the left-hand side of an attribute definition
-ATTR_DEF_RE_STR = r'\s*' + ATTR_RE_STR + r'\s*='
+ATTR_DEF_RE_STR = r"\s*" + ATTR_RE_STR + r"\s*="
 ATTR_DEF_RE = re.compile(ATTR_DEF_RE_STR)
 
 
@@ -69,7 +69,7 @@ def __init__(self, variable_names):
         self.generate_in_parent_scope_left = 0
 
         # Parse variable names
-        self.variable_names = [name.upper() for name in variable_names.split(',')]
+        self.variable_names = [name.upper() for name in variable_names.split(",")]
         self.used_variable_names = set()
 
     # Generate the following 'n' variable names in the parent scope.
@@ -80,8 +80,10 @@ def generate_in_parent_scope(self, n):
     def generate_name(self, source_variable_name):
 
         # Compute variable name
-        variable_name = self.variable_names.pop(0) if len(self.variable_names) > 0 else ''
-        if variable_name == '':
+        variable_name = (
+            self.variable_names.pop(0) if len(self.variable_names) > 0 else ""
+        )
+        if variable_name == "":
             variable_name = "VAL_" + str(self.name_counter)
             self.name_counter += 1
 
@@ -90,11 +92,11 @@ def generate_name(self, source_variable_name):
         if self.generate_in_parent_scope_left > 0:
             self.generate_in_parent_scope_left -= 1
             scope = len(self.scopes) - 2
-        assert(scope >= 0)
+        assert scope >= 0
 
         # Save variable
         if variable_name in self.used_variable_names:
-            raise RuntimeError(variable_name + ': duplicate variable name')
+            raise RuntimeError(variable_name + ": duplicate variable name")
         self.scopes[scope][source_variable_name] = variable_name
         self.used_variable_names.add(variable_name)
 
@@ -117,11 +119,12 @@ def clear_names(self):
         self.name_counter = 0
         self.used_variable_names = set()
 
+
 class AttributeNamer:
 
     def __init__(self, attribute_names):
         self.name_counter = 0
-        self.attribute_names = [name.upper() for name in attribute_names.split(',')]
+        self.attribute_names = [name.upper() for name in attribute_names.split(",")]
         self.map = {}
         self.used_attribute_names = set()
 
@@ -129,17 +132,19 @@ def __init__(self, attribute_names):
     def generate_name(self, source_attribute_name):
 
         # Compute FileCheck name
-        attribute_name = self.attribute_names.pop(0) if len(self.attribute_names) > 0 else ''
-        if attribute_name == '':
+        attribute_name = (
+            self.attribute_names.pop(0) if len(self.attribute_names) > 0 else ""
+        )
+        if attribute_name == "":
             attribute_name = "ATTR_" + str(self.name_counter)
             self.name_counter += 1
 
         # Prepend global symbol
-        attribute_name = '$' + attribute_name
+        attribute_name = "$" + attribute_name
 
         # Save attribute
         if attribute_name in self.used_attribute_names:
-            raise RuntimeError(attribute_name + ': duplicate attribute name')
+            raise RuntimeError(attribute_name + ": duplicate attribute name")
         self.map[source_attribute_name] = attribute_name
         self.used_attribute_names.add(attribute_name)
         return attribute_name
@@ -148,14 +153,19 @@ def generate_name(self, source_attribute_name):
     # has been generated for the given attribute yet, the source attribute name
     # itself is returned.
     def get_name(self, source_attribute_name):
-        return self.map[source_attribute_name] if source_attribute_name in self.map else '?'
+        return (
+            self.map[source_attribute_name]
+            if source_attribute_name in self.map
+            else "?"
+        )
+
 
 # Return the number of SSA results in a line of type
 #   %0, %1, ... = ...
 # The function returns 0 if there are no results.
 def get_num_ssa_results(input_line):
     m = SSA_RESULTS_RE.match(input_line)
-    return m.group().count('%') if m else 0
+    return m.group().count("%") if m else 0
 
 
 # Process a line of input that has been split at each SSA identifier '%'.
@@ -165,7 +175,7 @@ def process_line(line_chunks, variable_namer):
     # Process the rest that contained an SSA value name.
     for chunk in line_chunks:
         m = SSA_RE.match(chunk)
-        ssa_name = m.group(0) if m is not None else ''
+        ssa_name = m.group(0) if m is not None else ""
 
         # Check if an existing variable exists for this name.
         variable = None
@@ -207,26 +217,35 @@ def process_source_lines(source_lines, note, args):
         source_segments[-1].append(line + "\n")
     return source_segments
 
+
 def process_attribute_definition(line, attribute_namer, output):
     m = ATTR_DEF_RE.match(line)
     if m:
         attribute_name = attribute_namer.generate_name(m.group(1))
-        line = '// CHECK: #[[' + attribute_name + ':.+]] =' + line[len(m.group(0)):] + '\n'
+        line = (
+            "// CHECK: #[["
+            + attribute_name
+            + ":.+]] ="
+            + line[len(m.group(0)) :]
+            + "\n"
+        )
         output.write(line)
 
+
 def process_attribute_references(line, attribute_namer):
 
-    output_line = ''
+    output_line = ""
     components = ATTR_RE.split(line)
     for component in components:
         m = ATTR_RE.match(component)
         if m:
-            output_line += '#[[' + attribute_namer.get_name(m.group(1)) + ']]'
-            output_line += component[len(m.group()):]
+            output_line += "#[[" + attribute_namer.get_name(m.group(1)) + "]]"
+            output_line += component[len(m.group()) :]
         else:
             output_line += component
     return output_line
 
+
 # Pre-process a line of input to remove any character sequences that will be
 # problematic with FileCheck.
 def preprocess_line(line):
@@ -274,17 +293,19 @@ def main():
     parser.add_argument(
         "--variable_names",
         type=str,
-        default='',
+        default="",
         help="Names to be used in FileCheck regular expression to represent SSA "
         "variables in the order they are encountered. Separate names with commas, "
-        "and leave empty entries for default names (e.g.: 'DIM,,SUM,RESULT')")
+        "and leave empty entries for default names (e.g.: 'DIM,,SUM,RESULT')",
+    )
     parser.add_argument(
         "--attribute_names",
         type=str,
-        default='',
+        default="",
         help="Names to be used in FileCheck regular expression to represent "
         "attributes in the order they are defined. Separate names with commas,"
-        "commas, and leave empty entries for default names (e.g.: 'MAP0,,,MAP1')")
+        "commas, and leave empty entries for default names (e.g.: 'MAP0,,,MAP1')",
+    )
 
     args = parser.parse_args()
 
@@ -400,7 +421,9 @@ def main():
     output_segments = list(filter(None, output_segments))
     # Write the output.
     if source_segments:
-        assert len(output_segments) == len(source_segments), f"{len(output_segments)=}, {len(source_segments)=}"
+        assert len(output_segments) == len(
+            source_segments
+        ), f"{len(output_segments)=}, {len(source_segments)=}"
         for check_segment, source_segment in zip(output_segments, source_segments):
             for line in check_segment:
                 output.write(line)