diff --git a/programming_examples/basic/matrix_multiplication/rtp/aie2.py b/programming_examples/basic/matrix_multiplication/rtp/aie2.py
index 0ba3d5f47c..52b82bfad9 100644
--- a/programming_examples/basic/matrix_multiplication/rtp/aie2.py
+++ b/programming_examples/basic/matrix_multiplication/rtp/aie2.py
@@ -8,9 +8,6 @@
 # B matrix is assumed to be column major in this design!
 
 import argparse
-from ml_dtypes import bfloat16
-import numpy as np
-import sys
 
 from aie.extras.context import mlir_mod_ctx
 
@@ -21,14 +18,13 @@
 import aie.dialects.arith as arith_dialect
 
 dtype_map = {
-    "bf16": bfloat16,
-    "i8": np.int8,
-    "i16": np.int16,
-    "f32": np.float32,
-    "i32": np.int32,
+    "bf16": T.bf16,
+    "i8": T.i8,
+    "i16": T.i16,
+    "f32": T.f32,
+    "i32": T.i32,
 }
 
-
 def main():
     argparser = argparse.ArgumentParser(
         prog="AIE Matrix Multiplication MLIR Design (Whole Array)",
@@ -80,15 +76,8 @@ def my_matmul(M, K, N, m, k, n, n_aie_cols, dtype_in_str, dtype_out_str):
     dtype_out = dtype_map[dtype_out_str]
 
     # Only tested with these types thus far
-    assert dtype_in == bfloat16
-    assert dtype_out == np.float32
-
-    assert np.issubdtype(dtype_in, np.integer) == np.issubdtype(
-        dtype_out, np.integer
-    ), f"Input dtype ({dtype_in}) and output dtype ({dtype_out}) must either both be integral or both be float"
-    assert (
-        np.dtype(dtype_out).itemsize >= np.dtype(dtype_in).itemsize
-    ), f"Output dtype ({dtype_out}) must be equal or larger to input dtype ({dtype_in})"
+    assert dtype_in == T.bf16
+    assert dtype_out == T.f32
 
     if dtype_in_str == "bf16":
         r = 4
@@ -147,13 +136,13 @@ def my_matmul(M, K, N, m, k, n, n_aie_cols, dtype_in_str, dtype_out_str):
 
     @device(dev)
     def device_body():
-        A_l2_ty = np.ndarray[(m * k * n_A_tiles_per_shim,), np.dtype[dtype_in]]
-        B_l2_ty = np.ndarray[(k * n,), np.dtype[dtype_in]]
-        C_l2_ty = np.ndarray[(m * n * n_aie_rows,), np.dtype[dtype_out]]
-        A_l1_ty = np.ndarray[(m, k), np.dtype[dtype_in]]
-        B_l1_ty = np.ndarray[(k, n), np.dtype[dtype_in]]
-        C_l1_ty = np.ndarray[(m, n), np.dtype[dtype_out]]
-        rtp_ty = np.ndarray[(3,), np.dtype[np.int32]]
+        A_l2_ty = T.memref(m * k * n_A_tiles_per_shim, dtype_in())
+        B_l2_ty = T.memref(k * n, dtype_in())
+        C_l2_ty = T.memref(m * n * n_aie_rows, dtype_out())
+        A_l1_ty = T.memref(m, k, dtype_in())
+        B_l1_ty = T.memref(k, n, dtype_in())
+        C_l1_ty = T.memref(m, n, dtype_out())
+        rtp_ty = T.memref(3, T.i32()) 
 
         # AIE Core Function declarations
         zero = external_func(f"zero_{dtype_out_str}", inputs=[C_l1_ty])
@@ -338,9 +327,9 @@ def core_body():
 
         # To/from AIE-array data movement
         @runtime_sequence(
-            np.ndarray[(M * K,), np.dtype[dtype_in]],
-            np.ndarray[(K * N,), np.dtype[dtype_in]],
-            np.ndarray[(M * N,), np.dtype[dtype_out]],
+            T.memref(M * K, dtype_in()),
+            T.memref(K * N, dtype_in()),
+            T.memref(M * N, dtype_out()),
         )
         def sequence(A, B, C):
             # Write number of inner loop iterations for cores to use as run-time parameter.