From b19ce168dedc2c3a04b80832f1fcb5b82b6c75da Mon Sep 17 00:00:00 2001
From: Ted Themistokleous <ted.themistokleous@amd.com>
Date: Mon, 16 Sep 2024 02:31:13 +0000
Subject: [PATCH 01/10] Initial commit of parser changes to handle
 MatMulIntegerToFloat

---
 src/onnx/parse_matmul.cpp | 54 +++++++++++++++++++++++++++++++++++----
 1 file changed, 49 insertions(+), 5 deletions(-)

diff --git a/src/onnx/parse_matmul.cpp b/src/onnx/parse_matmul.cpp
index 9ded26a4b8f..410a8cbc6e9 100644
--- a/src/onnx/parse_matmul.cpp
+++ b/src/onnx/parse_matmul.cpp
@@ -35,7 +35,9 @@ struct parse_matmul : op_parser<parse_matmul>
 {
     std::vector<op_desc> operators() const
     {
-        return {{"MatMul", "dot"}, {"MatMulInteger", "quant_dot"}};
+        return {{"MatMul", "dot"},
+                {"MatMulInteger", "quant_dot"},
+                {"MatMulIntegerToFloat", "quant_dot_scaled"}};
     }
 
     static void broadcast_dimensions(const onnx_parser::node_info& info,
@@ -106,6 +108,20 @@ struct parse_matmul : op_parser<parse_matmul>
         return all_zeros;
     }
 
+    static instruction_ref set_scale_arg(const std::vector<instruction_ref>& args, const int index)
+    {
+        instruction_ref scale_arg                            = args[index];
+        std::set<migraphx::shape::type_t> supported_dq_types = {migraphx::shape::float_type,
+                                                                migraphx::shape::half_type};
+
+        if(not(contains(supported_dq_types, scale_arg->get_shape().type())))
+        {
+            MIGRAPHX_THROW("PARSE_QUANT_DOT_SCALDED: Scales must be float or half_type");
+        }
+
+        return scale_arg;
+    }
+
     static instruction_ref set_bias_arg(const std::vector<instruction_ref>& args,
                                         const int index,
                                         const instruction_ref& input,
@@ -172,7 +188,8 @@ struct parse_matmul : op_parser<parse_matmul>
             a1            = info.add_instruction(make_op("unsqueeze", {{"axes", {1}}}), args[1]);
         }
 
-        auto is_quant_dot = opd.op_name == "quant_dot";
+        auto is_quant_dot = opd.op_name == "quant_dot" or opd.op_name == "quant_dot_scaled";
+        auto has_scales   = opd.op_name == "quant_dot_scaled";
         if(s0.dynamic() or s1.dynamic())
         {
             if(is_quant_dot)
@@ -207,8 +224,23 @@ struct parse_matmul : op_parser<parse_matmul>
 
             bool has_ba0        = false;
             bool has_ba1        = false;
-            instruction_ref ba0 = set_bias_arg(args, 2, a0, has_ba0);
-            instruction_ref ba1 = set_bias_arg(args, 3, a1, has_ba1);
+
+            int a0_zp_index = 2;
+            int a1_zp_index = 3;
+
+            instruction_ref scale_a0;
+            instruction_ref scale_a1;
+            // Handles case with for when scales are present in operator
+            if(has_scales)
+            {
+                a0_zp_index = 4;
+                a1_zp_index = 5;
+                scale_a0    = set_scale_arg(args, 2);
+                scale_a1    = set_scale_arg(args, 3);
+            }
+
+            instruction_ref ba0 = set_bias_arg(args, a0_zp_index, a0, has_ba0);
+            instruction_ref ba1 = set_bias_arg(args, a1_zp_index, a1, has_ba1);
 
             // Only INT8 or UINT8 type currently supported
             std::set<migraphx::shape::type_t> supported_types = {migraphx::shape::uint8_type,
@@ -254,7 +286,19 @@ struct parse_matmul : op_parser<parse_matmul>
 
             broadcast_dimensions(info, s0_lens, s1_lens, a0, a1, ba0, ba1);
 
-            dot_res = info.add_instruction(make_op(opd.op_name), ba0, ba1);
+            // Apply the scale to dequantize input to then perform a simple dot
+            // after the zero points are applied otherwise get a int32 output from the quantized
+            // equivalent
+            if(has_scales)
+            {
+                auto dq_a0 = info.add_common_op("mul", ba0, scale_a0);
+                auto dq_a1 = info.add_common_op("mul", ba1, scale_a1);
+                dot_res    = info.add_instruction(make_op("dot"), dq_a0, dq_a1);
+            }
+            else
+            {
+                dot_res = info.add_instruction(make_op(opd.op_name), ba0, ba1);
+            }
         }
 
         // squeeze the appended or prepended dimensions

From ae9f722e17909d8c80223955762e8887e7c7c413 Mon Sep 17 00:00:00 2001
From: Ted Themistokleous <ted.themistokleous@amd.com>
Date: Mon, 16 Sep 2024 02:47:36 +0000
Subject: [PATCH 02/10] Update output to handle dot and broadcasted instead of
 mul

---
 src/onnx/parse_matmul.cpp | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/onnx/parse_matmul.cpp b/src/onnx/parse_matmul.cpp
index 410a8cbc6e9..36844ad0960 100644
--- a/src/onnx/parse_matmul.cpp
+++ b/src/onnx/parse_matmul.cpp
@@ -217,7 +217,7 @@ struct parse_matmul : op_parser<parse_matmul>
             auto s0_lens        = a0->get_shape().lens();
             auto s1_lens        = a1->get_shape().lens();
 
-            if(not is_quant_dot and args.size() > 2)
+            if(not is_quant_dot and args.size() > 2 and not has_scales)
             {
                 MIGRAPHX_THROW("PARSE_MATMUL: Bias Args not supported for MatMul");
             }
@@ -288,11 +288,12 @@ struct parse_matmul : op_parser<parse_matmul>
 
             // Apply the scale to dequantize input to then perform a simple dot
             // after the zero points are applied otherwise get a int32 output from the quantized
-            // equivalent
+            // equivalent. Ensure these are broadcasted accordingly before we perform a dot
             if(has_scales)
             {
-                auto dq_a0 = info.add_common_op("mul", ba0, scale_a0);
-                auto dq_a1 = info.add_common_op("mul", ba1, scale_a1);
+                broadcast_dimensions(info, s0_lens, s1_lens, a0, a1, scale_a0, scale_a1);
+                auto dq_a0 = info.add_instrution(make_op("dot"), ba0, scale_a0);
+                auto dq_a1 = info.add_instruction(make_op("dot"), ba1, scale_a1);
                 dot_res    = info.add_instruction(make_op("dot"), dq_a0, dq_a1);
             }
             else

From 7f62a33c48a7b8284761beaabf9a5b825722e116 Mon Sep 17 00:00:00 2001
From: Ted Themistokleous <ted.themistokleous@amd.com>
Date: Mon, 16 Sep 2024 18:07:51 +0000
Subject: [PATCH 03/10] Fix typo

---
 src/onnx/parse_matmul.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/onnx/parse_matmul.cpp b/src/onnx/parse_matmul.cpp
index 36844ad0960..828da02e32e 100644
--- a/src/onnx/parse_matmul.cpp
+++ b/src/onnx/parse_matmul.cpp
@@ -292,7 +292,7 @@ struct parse_matmul : op_parser<parse_matmul>
             if(has_scales)
             {
                 broadcast_dimensions(info, s0_lens, s1_lens, a0, a1, scale_a0, scale_a1);
-                auto dq_a0 = info.add_instrution(make_op("dot"), ba0, scale_a0);
+                auto dq_a0 = info.add_instruction(make_op("dot"), ba0, scale_a0);
                 auto dq_a1 = info.add_instruction(make_op("dot"), ba1, scale_a1);
                 dot_res    = info.add_instruction(make_op("dot"), dq_a0, dq_a1);
             }

From 92d8ea47ac5f1a6259a841a38757ee44125bea2f Mon Sep 17 00:00:00 2001
From: Ted Themistokleous <ted.themistokleous@amd.com>
Date: Tue, 17 Sep 2024 03:30:38 +0000
Subject: [PATCH 04/10] Add parser tests and updated gen onnx

Updated parser to handle bias case as well as bad scale conditions

Initial float/half tests
bad scale tests
bad bias tests
---
 src/onnx/parse_matmul.cpp                     |  60 ++++-
 test/onnx/gen_onnx.py                         | 229 ++++++++++++++++++
 .../matmulintegertofloat_bad_bias_test.onnx   | Bin 0 -> 270 bytes
 .../matmulintegertofloat_bad_bias_test2.onnx  | Bin 0 -> 274 bytes
 .../matmulintegertofloat_bad_bias_test3.onnx  | Bin 0 -> 269 bytes
 .../matmulintegertofloat_bad_scale2_test.onnx | Bin 0 -> 256 bytes
 .../matmulintegertofloat_bad_scale3_test.onnx | Bin 0 -> 256 bytes
 .../matmulintegertofloat_bad_scale_test.onnx  | Bin 0 -> 254 bytes
 test/onnx/matmulintegertofloat_half_test.onnx |  28 +++
 ...atmulintegertofloat_half_zp_bias_test.onnx | Bin 0 -> 280 bytes
 .../matmulintegertofloat_half_zp_test.onnx    | Bin 0 -> 250 bytes
 test/onnx/matmulintegertofloat_test.onnx      |  26 ++
 .../matmulintegertofloat_zp_bias_test.onnx    | Bin 0 -> 276 bytes
 test/onnx/matmulintegertofloat_zp_test.onnx   | Bin 0 -> 240 bytes
 .../matmulintegerToFloat_bad_bias_test.cpp    |  30 +++
 .../matmulintegerToFloat_bad_bias_test2.cpp   |  30 +++
 .../matmulintegerToFloat_bad_bias_test3.cpp   |  30 +++
 .../matmulintegerToFloat_bad_scale2_test.cpp  |  31 +++
 .../matmulintegerToFloat_bad_scale3_test.cpp  |  31 +++
 .../matmulintegerToFloat_bad_scale_test.cpp   |  30 +++
 .../parse/matmulintegertofloat_half_test.cpp  |  47 ++++
 test/onnx/parse/matmulintegertofloat_test.cpp |  47 ++++
 22 files changed, 617 insertions(+), 2 deletions(-)
 create mode 100644 test/onnx/matmulintegertofloat_bad_bias_test.onnx
 create mode 100644 test/onnx/matmulintegertofloat_bad_bias_test2.onnx
 create mode 100644 test/onnx/matmulintegertofloat_bad_bias_test3.onnx
 create mode 100644 test/onnx/matmulintegertofloat_bad_scale2_test.onnx
 create mode 100644 test/onnx/matmulintegertofloat_bad_scale3_test.onnx
 create mode 100644 test/onnx/matmulintegertofloat_bad_scale_test.onnx
 create mode 100644 test/onnx/matmulintegertofloat_half_test.onnx
 create mode 100644 test/onnx/matmulintegertofloat_half_zp_bias_test.onnx
 create mode 100644 test/onnx/matmulintegertofloat_half_zp_test.onnx
 create mode 100644 test/onnx/matmulintegertofloat_test.onnx
 create mode 100644 test/onnx/matmulintegertofloat_zp_bias_test.onnx
 create mode 100644 test/onnx/matmulintegertofloat_zp_test.onnx
 create mode 100644 test/onnx/parse/matmulintegerToFloat_bad_bias_test.cpp
 create mode 100644 test/onnx/parse/matmulintegerToFloat_bad_bias_test2.cpp
 create mode 100644 test/onnx/parse/matmulintegerToFloat_bad_bias_test3.cpp
 create mode 100644 test/onnx/parse/matmulintegerToFloat_bad_scale2_test.cpp
 create mode 100644 test/onnx/parse/matmulintegerToFloat_bad_scale3_test.cpp
 create mode 100644 test/onnx/parse/matmulintegerToFloat_bad_scale_test.cpp
 create mode 100644 test/onnx/parse/matmulintegertofloat_half_test.cpp
 create mode 100644 test/onnx/parse/matmulintegertofloat_test.cpp

diff --git a/src/onnx/parse_matmul.cpp b/src/onnx/parse_matmul.cpp
index 828da02e32e..af6ea509cef 100644
--- a/src/onnx/parse_matmul.cpp
+++ b/src/onnx/parse_matmul.cpp
@@ -122,6 +122,41 @@ struct parse_matmul : op_parser<parse_matmul>
         return scale_arg;
     }
 
+    static instruction_ref set_scale_bias(const std::vector<instruction_ref>& args,
+                                          const int index,
+                                          const migraphx::shape& scale_arg_shape,
+                                          const instruction_ref& compare_arg,
+                                          bool& has_valid_scale_bias)
+    {
+        has_valid_scale_bias = false;
+
+        if(args.size() > index)
+        {
+            instruction_ref scale_bias_arg                       = args[index];
+            std::set<migraphx::shape::type_t> supported_dq_types = {migraphx::shape::float_type,
+                                                                    migraphx::shape::half_type};
+
+            if(not(contains(supported_dq_types, scale_bias_arg->get_shape().type())))
+            {
+                MIGRAPHX_THROW("PARSE_QUANT_DOT_SCALDED: Bias must be float or half_type");
+            }
+
+            if(scale_bias_arg->get_shape().type() != scale_arg_shape.type())
+            {
+                MIGRAPHX_THROW("PARSE_QUANT_DOT_SCALED: Bias must be the same type as scales");
+            }
+
+            if(scale_bias_arg->get_shape().lens().at(0) != compare_arg->get_shape().lens().at(1))
+            {
+                MIGRAPHX_THROW("PARSE_QUANT_DOT_SCALED: Bias have same dim as matrix B column");
+            }
+
+            has_valid_scale_bias = true;
+            return scale_bias_arg;
+        }
+        return compare_arg;
+    }
+
     static instruction_ref set_bias_arg(const std::vector<instruction_ref>& args,
                                         const int index,
                                         const instruction_ref& input,
@@ -224,6 +259,7 @@ struct parse_matmul : op_parser<parse_matmul>
 
             bool has_ba0        = false;
             bool has_ba1        = false;
+            bool has_scale_bias = false;
 
             int a0_zp_index = 2;
             int a1_zp_index = 3;
@@ -237,11 +273,22 @@ struct parse_matmul : op_parser<parse_matmul>
                 a1_zp_index = 5;
                 scale_a0    = set_scale_arg(args, 2);
                 scale_a1    = set_scale_arg(args, 3);
+                if(scale_a0->get_shape().type() != scale_a1->get_shape().type())
+                {
+                    MIGRAPHX_THROW("PARSE_MATMULINTEGERTOFLOAT: Scales must be the same type");
+                }
             }
 
             instruction_ref ba0 = set_bias_arg(args, a0_zp_index, a0, has_ba0);
             instruction_ref ba1 = set_bias_arg(args, a1_zp_index, a1, has_ba1);
 
+            // handle optional bias arg to the result
+            instruction_ref scaled_bias;
+            if(has_scales)
+            {
+                scaled_bias = set_scale_bias(args, 6, scale_a1->get_shape(), a1, has_scale_bias);
+            }
+
             // Only INT8 or UINT8 type currently supported
             std::set<migraphx::shape::type_t> supported_types = {migraphx::shape::uint8_type,
                                                                  migraphx::shape::int8_type};
@@ -292,9 +339,18 @@ struct parse_matmul : op_parser<parse_matmul>
             if(has_scales)
             {
                 broadcast_dimensions(info, s0_lens, s1_lens, a0, a1, scale_a0, scale_a1);
-                auto dq_a0 = info.add_instruction(make_op("dot"), ba0, scale_a0);
-                auto dq_a1 = info.add_instruction(make_op("dot"), ba1, scale_a1);
+                // Convert if we're half types as dot will scream if we try to multipy half int8
+                ba0 = info.add_instruction(
+                    make_op("convert", {{"target_type", scale_a0->get_shape().type()}}), ba0);
+                ba1 = info.add_instruction(
+                    make_op("convert", {{"target_type", scale_a1->get_shape().type()}}), ba1);
+                auto dq_a0 = info.add_instruction(make_op("mul"), ba0, scale_a0);
+                auto dq_a1 = info.add_instruction(make_op("mul"), ba1, scale_a1);
                 dot_res    = info.add_instruction(make_op("dot"), dq_a0, dq_a1);
+
+                // Handle case of the bias after scaling
+                if(has_scale_bias)
+                    dot_res = info.add_common_op("sub", dot_res, scaled_bias);
             }
             else
             {
diff --git a/test/onnx/gen_onnx.py b/test/onnx/gen_onnx.py
index eeb459630a2..1a2ea05ea7b 100644
--- a/test/onnx/gen_onnx.py
+++ b/test/onnx/gen_onnx.py
@@ -6833,6 +6833,235 @@ def matmulinteger_int8_uint8_dual_zero_zp_test():
     return ([node], [m1, m2], [y], [zp1, zp2])
 
 
+@onnx_test()
+def matmulintegertofloat_test():
+    m1 = helper.make_tensor_value_info('1', TensorProto.INT8, [4, 3])
+    m2 = helper.make_tensor_value_info('2', TensorProto.INT8, [3, 2])
+    s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT, [4, 3])
+    s2 = helper.make_tensor_value_info('4', TensorProto.FLOAT, [3, 2])
+    y = helper.make_tensor_value_info('y', TensorProto.INT32, [4, 2])
+
+    node = onnx.helper.make_node(
+        'MatMulIntegerToFloat',
+        inputs=['1', '2', '3', '4'],
+        outputs=['y'],
+    )
+
+    return ([node], [m1, m2, s1, s2], [y], [])
+
+
+@onnx_test()
+def matmulintegertofloat_zp_test():
+    m1 = helper.make_tensor_value_info('1', TensorProto.INT8, [4, 3])
+    m2 = helper.make_tensor_value_info('2', TensorProto.INT8, [3, 2])
+    s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT, [4, 3])
+    s2 = helper.make_tensor_value_info('4', TensorProto.FLOAT, [3, 2])
+    zp1 = helper.make_tensor('5', TensorProto.INT8, [], [0])
+    zp2 = helper.make_tensor('6', TensorProto.UINT8, [], [128])
+    y = helper.make_tensor_value_info('y', TensorProto.INT32, [4, 2])
+
+    node = onnx.helper.make_node(
+        'MatMulIntegerToFloat',
+        inputs=['1', '2', '3', '4', '5', '6'],
+        outputs=['y'],
+    )
+
+    return ([node], [m1, m2, s1, s2], [y], [zp1, zp2])
+
+
+@onnx_test()
+def matmulintegertofloat_zp_bias_test():
+    m1 = helper.make_tensor_value_info('1', TensorProto.INT8, [4, 3])
+    m2 = helper.make_tensor_value_info('2', TensorProto.INT8, [3, 2])
+    s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT, [4, 3])
+    s2 = helper.make_tensor_value_info('4', TensorProto.FLOAT, [3, 2])
+    zp1 = helper.make_tensor('5', TensorProto.INT8, [], [0])
+    zp2 = helper.make_tensor('6', TensorProto.UINT8, [], [128])
+    b1 = helper.make_tensor('7', TensorProto.UINT8, [2], [128, -128])
+    y = helper.make_tensor_value_info('y', TensorProto.INT32, [4, 2])
+
+    node = onnx.helper.make_node(
+        'MatMulIntegerToFloat',
+        inputs=['1', '2', '3', '4', '5', '6', '7'],
+        outputs=['y'],
+    )
+
+    return ([node], [m1, m2, s1, s2], [y], [zp1, zp2, b1])
+
+
+@onnx_test()
+def matmulintegertofloat_bad_scale_test():
+    m1 = helper.make_tensor_value_info('1', TensorProto.INT8, [4, 3])
+    m2 = helper.make_tensor_value_info('2', TensorProto.INT8, [3, 2])
+    s1 = helper.make_tensor_value_info('3', TensorProto.INT8, [4, 3])
+    s2 = helper.make_tensor_value_info('4', TensorProto.FLOAT16, [3, 2])
+    zp1 = helper.make_tensor('5', TensorProto.INT8, [], [0])
+    zp2 = helper.make_tensor('6', TensorProto.UINT8, [], [128])
+    y = helper.make_tensor_value_info('y', TensorProto.INT32, [4, 2])
+
+    node = onnx.helper.make_node(
+        'MatMulIntegerToFloat',
+        inputs=['1', '2', '3', '4', '5', '6'],
+        outputs=['y'],
+    )
+
+    return ([node], [m1, m2, s1, s2], [y], [zp1, zp2])
+
+
+@onnx_test()
+def matmulintegertofloat_bad_scale2_test():
+    m1 = helper.make_tensor_value_info('1', TensorProto.INT8, [4, 3])
+    m2 = helper.make_tensor_value_info('2', TensorProto.INT8, [3, 2])
+    s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT16, [4, 3])
+    s2 = helper.make_tensor_value_info('4', TensorProto.INT8, [3, 2])
+    zp1 = helper.make_tensor('5', TensorProto.INT8, [], [0])
+    zp2 = helper.make_tensor('6', TensorProto.UINT8, [], [128])
+    y = helper.make_tensor_value_info('y', TensorProto.INT32, [4, 2])
+
+    node = onnx.helper.make_node(
+        'MatMulIntegerToFloat',
+        inputs=['1', '2', '3', '4', '5', '6'],
+        outputs=['y'],
+    )
+
+    return ([node], [m1, m2, s1, s2], [y], [zp1, zp2])
+
+
+@onnx_test()
+def matmulintegertofloat_bad_scale3_test():
+    m1 = helper.make_tensor_value_info('1', TensorProto.INT8, [4, 3])
+    m2 = helper.make_tensor_value_info('2', TensorProto.UINT8, [3, 2])
+    s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT16, [4, 3])
+    s2 = helper.make_tensor_value_info('4', TensorProto.FLOAT, [3, 2])
+    zp1 = helper.make_tensor('5', TensorProto.INT8, [], [0])
+    zp2 = helper.make_tensor('6', TensorProto.UINT8, [], [128])
+    y = helper.make_tensor_value_info('y', TensorProto.INT32, [4, 2])
+
+    node = onnx.helper.make_node(
+        'MatMulIntegerToFloat',
+        inputs=['1', '2', '3', '4', '5', '6'],
+        outputs=['y'],
+    )
+
+    return ([node], [m1, m2, s1, s2], [y], [zp1, zp2])
+
+
+@onnx_test()
+def matmulintegertofloat_bad_bias_test():
+    m1 = helper.make_tensor_value_info('1', TensorProto.INT8, [4, 3])
+    m2 = helper.make_tensor_value_info('2', TensorProto.UINT8, [3, 2])
+    s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT, [4, 3])
+    s2 = helper.make_tensor_value_info('4', TensorProto.FLOAT, [3, 2])
+    zp1 = helper.make_tensor('5', TensorProto.INT8, [], [0])
+    zp2 = helper.make_tensor('6', TensorProto.UINT8, [], [128])
+    b1 = helper.make_tensor('7', TensorProto.UINT8, [2], [128, 128])
+    y = helper.make_tensor_value_info('y', TensorProto.INT32, [4, 2])
+
+    node = onnx.helper.make_node(
+        'MatMulIntegerToFloat',
+        inputs=['1', '2', '3', '4', '5', '6', '7'],
+        outputs=['y'],
+    )
+
+    return ([node], [m1, m2, s1, s2], [y], [zp1, zp2, b1])
+
+
+@onnx_test()
+def matmulintegertofloat_bad_bias_test2():
+    m1 = helper.make_tensor_value_info('1', TensorProto.INT8, [4, 3])
+    m2 = helper.make_tensor_value_info('2', TensorProto.UINT8, [3, 2])
+    s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT, [4, 3])
+    s2 = helper.make_tensor_value_info('4', TensorProto.FLOAT, [3, 2])
+    zp1 = helper.make_tensor('5', TensorProto.INT8, [], [0])
+    zp2 = helper.make_tensor('6', TensorProto.UINT8, [], [128])
+    b1 = helper.make_tensor('7', TensorProto.FLOAT16, [2], [128, -128])
+    y = helper.make_tensor_value_info('y', TensorProto.INT32, [4, 2])
+
+    node = onnx.helper.make_node(
+        'MatMulIntegerToFloat',
+        inputs=['1', '2', '3', '4', '5', '6', '7'],
+        outputs=['y'],
+    )
+
+    return ([node], [m1, m2, s1, s2], [y], [zp1, zp2, b1])
+
+
+@onnx_test()
+def matmulintegertofloat_bad_bias_test3():
+    m1 = helper.make_tensor_value_info('1', TensorProto.INT8, [4, 3])
+    m2 = helper.make_tensor_value_info('2', TensorProto.UINT8, [3, 2])
+    s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT16, [4, 3])
+    s2 = helper.make_tensor_value_info('4', TensorProto.FLOAT, [3, 2])
+    zp1 = helper.make_tensor('5', TensorProto.INT8, [], [0])
+    zp2 = helper.make_tensor('6', TensorProto.UINT8, [], [128])
+    b1 = helper.make_tensor('7', TensorProto.FLOAT16, [], [128])
+    y = helper.make_tensor_value_info('y', TensorProto.INT32, [4, 2])
+
+    node = onnx.helper.make_node(
+        'MatMulIntegerToFloat',
+        inputs=['1', '2', '3', '4', '5', '6', '7'],
+        outputs=['y'],
+    )
+
+    return ([node], [m1, m2, s1, s2], [y], [zp1, zp2, b1])
+
+
+@onnx_test()
+def matmulintegertofloat_half_test():
+    m1 = helper.make_tensor_value_info('1', TensorProto.INT8, [4, 3])
+    m2 = helper.make_tensor_value_info('2', TensorProto.UINT8, [3, 2])
+    s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT16, [4, 3])
+    s2 = helper.make_tensor_value_info('4', TensorProto.FLOAT16, [3, 2])
+    y = helper.make_tensor_value_info('y', TensorProto.INT32, [4, 2])
+
+    node = onnx.helper.make_node(
+        'MatMulIntegerToFloat',
+        inputs=['1', '2', '3', '4'],
+        outputs=['y'],
+    )
+
+    return ([node], [m1, m2, s1, s2], [y], [])
+
+
+@onnx_test()
+def matmulintegertofloat_half_zp_test():
+    m1 = helper.make_tensor_value_info('1', TensorProto.INT8, [4, 3])
+    m2 = helper.make_tensor_value_info('2', TensorProto.UINT8, [3, 2])
+    s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT16, [4, 3])
+    s2 = helper.make_tensor_value_info('4', TensorProto.FLOAT16, [3, 2])
+    zp1 = helper.make_tensor('5', TensorProto.INT8, [], [0])
+    zp2 = helper.make_tensor('6', TensorProto.UINT8, [], [128])
+    y = helper.make_tensor_value_info('y', TensorProto.INT32, [4, 2])
+
+    node = onnx.helper.make_node(
+        'MatMulIntegerToFloat',
+        inputs=['1', '2', '3', '4', '5', '6'],
+        outputs=['y'],
+    )
+
+    return ([node], [m1, m2, s1, s2], [y], [zp1, zp2])
+
+
+@onnx_test()
+def matmulintegertofloat_half_zp_bias_test():
+    m1 = helper.make_tensor_value_info('1', TensorProto.INT8, [4, 3])
+    m2 = helper.make_tensor_value_info('2', TensorProto.UINT8, [3, 2])
+    s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT16, [4, 3])
+    s2 = helper.make_tensor_value_info('4', TensorProto.FLOAT16, [3, 2])
+    zp1 = helper.make_tensor('5', TensorProto.INT8, [], [0])
+    zp2 = helper.make_tensor('6', TensorProto.UINT8, [], [128])
+    b1 = helper.make_tensor('7', TensorProto.FLOAT16, [2], [128, -128])
+    y = helper.make_tensor_value_info('y', TensorProto.INT32, [4, 2])
+
+    node = onnx.helper.make_node(
+        'MatMulIntegerToFloat',
+        inputs=['1', '2', '3', '4', '5', '6', '7'],
+        outputs=['y'],
+    )
+
+    return ([node], [m1, m2, s1, s2], [y], [zp1, zp2, b1])
+
+
 @onnx_test()
 def max_test():
     a = helper.make_tensor_value_info('0', TensorProto.FLOAT, [3])
diff --git a/test/onnx/matmulintegertofloat_bad_bias_test.onnx b/test/onnx/matmulintegertofloat_bad_bias_test.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..f733a4642b784b0e8c6b0824550eb4aa611be7bd
GIT binary patch
literal 270
zcma*gy$ZrG6a`>!QY5{ol+-8am_e|=;MhTM=;q{7Qmj&FE!a48>#O=ECJ{t8hdZ2)
z!@11mK_^xBXq%g=uk~H^m$sLxtmL_@8ab4z9@Op$xIx$=ga{ErjF2Ft2pNYlSQk<i
zkM{5$IQDxp@XxGZ9u+XlArY=e1eqX*R4f_M!I{9s?Bwgee1!`}IYYu6ebiuRkyxY}
YiNVMs{5In+w9QI4M~m}MljOwnU)AC>SpWb4

literal 0
HcmV?d00001

diff --git a/test/onnx/matmulintegertofloat_bad_bias_test2.onnx b/test/onnx/matmulintegertofloat_bad_bias_test2.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..cd0379b482f4e5fd2ff2802235695eeccd75de6a
GIT binary patch
literal 274
zcma*gy$ZrG6oz3>A|*YDmeecIErVcx!Lftj(9OxEq*$fUTCj2G)=TO|c_F3|L^p@C
zJiL5|*?e6~RX?ioriyEESIwnrq{?!6&hk>WnJQZ4?Vf-ege`)H;3EVGAwq-@bLdvf
zRI2n*9o`GaW<M_cwJVrI1xykMh3ybQdPpD=3q~9iRzE;L&;;U>t6%du&KTtk37hJp
eBYFmjL8_6A=o^IJGWbGM=DO?*TAz|6#BpC)Yc((c

literal 0
HcmV?d00001

diff --git a/test/onnx/matmulintegertofloat_bad_bias_test3.onnx b/test/onnx/matmulintegertofloat_bad_bias_test3.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..67a095396127bece30f5936b121de61d37a209b8
GIT binary patch
literal 269
zcma*gy$ZrG6oz3>B9b0NLh2RhmO-$;;MhTM=;q{7Qmj&FE!cGE)=Tn^ycN?3ikm~=
z@bL15IbvNaU3XP^)5W#8>*i85N@uw`XL+gGOc$;8cTd0#!WO|p@DT!p5FtW{1@x<B
zs&v{_hxfv<*^di<?MlX}gh>LSbU2Y@fCM6m93>qd=8jLU`Bf}%&ZuBW*wj2aqGyp<
aqz1`|zD4{k1B>u2b5r&fXP=TJgu5^4uru)h

literal 0
HcmV?d00001

diff --git a/test/onnx/matmulintegertofloat_bad_scale2_test.onnx b/test/onnx/matmulintegertofloat_bad_scale2_test.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..d900df4348b0688b5567f0f7c802fefdc0532e9e
GIT binary patch
literal 256
zcmd<!6jI4eEXgg+$;>NBO;0T<$xqA4Pb`T~N=%6_PEO28HHt4uEiSRT$jGJ5#c0UI
zXvD>6%*ANJ#c0aKXePv1sU+f?SmIln;|b9plJ5r6Pn@Y*90JT*j0{eUrdpf=Oj=A0
zj82SZQNlobg!s64IGBYvxR^LtfS4I1Xap8y0t+$&1(`sC#$Z7%RFx)3+QBN5fGR7&
Nf^2Y=OioM!!T>)IGE4vf

literal 0
HcmV?d00001

diff --git a/test/onnx/matmulintegertofloat_bad_scale3_test.onnx b/test/onnx/matmulintegertofloat_bad_scale3_test.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..10264b6f7404f79cc735a2f6ac098b5c57003845
GIT binary patch
literal 256
zcmd<!6jI4eEXgg+$;>NBO;0T<$xqA4Pb`T~N=%6_PEO28HI6SyEiSRT$jGJ5#c0UI
zXvD>6%*ANJ#c0aKXePv1sU+f?SmIln;|b9plJ5r6Pn@Y*90JT*j0{eUrdpf=Oj=A0
zj82SZQNlobg!s64IGBYvxR^LtfS4I1Xap8y0t+$&1(`sC#$Z7%RFx)RK}LkiB%sPl
Oupk>;C6g1AfG_|+E;392

literal 0
HcmV?d00001

diff --git a/test/onnx/matmulintegertofloat_bad_scale_test.onnx b/test/onnx/matmulintegertofloat_bad_scale_test.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..7059938598089bbbd64a84b75b841d16efacd342
GIT binary patch
literal 254
zcmd<!6jIJjEXgg+$;>NBO;0T<$xqA4Pb`T~N=%6_PEO28jW0<pF0s16$feE2XvoEA
z#KmaL#c0CCXv)QCCd62&B;uP`;#->I3DF*s?*`INl%ZN20?b;B3{H%uTATt*T1*X$
zPK;(z!a!?;__%mDn1wjFm^fH~m>DE!1Quih3o-)*nLvWZ*i@Q;1-TF^lYlBK!GdgX
KGnt&21cU*?sWIgM

literal 0
HcmV?d00001

diff --git a/test/onnx/matmulintegertofloat_half_test.onnx b/test/onnx/matmulintegertofloat_half_test.onnx
new file mode 100644
index 00000000000..bd7f74cb641
--- /dev/null
+++ b/test/onnx/matmulintegertofloat_half_test.onnx
@@ -0,0 +1,28 @@
+	matmulintegertofloat_half_test:�
+%
+1
+2
+3
+4y"MatMulIntegerToFloatmatmulintegertofloat_half_testZ
+1
+
+
+Z
+2
+
+
+Z
+3
+
+
+
+Z
+4
+
+
+
+b
+y
+
+
+B
\ No newline at end of file
diff --git a/test/onnx/matmulintegertofloat_half_zp_bias_test.onnx b/test/onnx/matmulintegertofloat_half_zp_bias_test.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..0b4ffefe0fdccc865aad7b90c37cd1d4b13f0df7
GIT binary patch
literal 280
zcma*gzY2m-6vy$-y&=~_lxR=T&~S$!|3aff(BRh8B3IB7G$XHrO+J!u)oY|Th}s&?
z^5M(xFx_02LY7UDUuAku@3P8@O33&kiY$KI;v^S!EYn)<o`4&KErN~UAh-w~f{zfG
z(5{w|kWo_{-Xq6qKaTu6&6z<tOhWLuZV*m72*KwIMhxUy*F)FS5Q396d^hJfW7K3w
i*mO8LqOFi9qyv%>9fkN?x?iZuWGGvO+NUHTF{~G6E;wNT

literal 0
HcmV?d00001

diff --git a/test/onnx/matmulintegertofloat_half_zp_test.onnx b/test/onnx/matmulintegertofloat_half_zp_test.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..10e47d781e30fdd37aea97f69707b96b6c748285
GIT binary patch
literal 250
zcmd<!6jIDhEXgg+$;>NBO;0T<$xqA4Pb`VgNX$u#uPTTyNi8n1I>*SR&BbWQ#c0IE
zXw1cE!o_IH#b_qPSg9o9n^@von&S!49Fp$_(oBSrS{wq*T8s=%jHX(g0!&&=4UA5V
zW>LaGON98icsQ7aIJlTNSb&%rBxr;r$P5%@0tp&}1-VdFnji^+RVD#dR)PiD;3}D%
Hm;{6Ya2qg_

literal 0
HcmV?d00001

diff --git a/test/onnx/matmulintegertofloat_test.onnx b/test/onnx/matmulintegertofloat_test.onnx
new file mode 100644
index 00000000000..1eb839d3baa
--- /dev/null
+++ b/test/onnx/matmulintegertofloat_test.onnx
@@ -0,0 +1,26 @@
+	matmulintegertofloat_test:�
+%
+1
+2
+3
+4y"MatMulIntegerToFloatmatmulintegertofloat_testZ
+1
+
+
+Z
+2
+
+
+Z
+3
+
+
+Z
+4
+
+
+b
+y
+
+
+B
\ No newline at end of file
diff --git a/test/onnx/matmulintegertofloat_zp_bias_test.onnx b/test/onnx/matmulintegertofloat_zp_bias_test.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..07af336b78b5d1bb9faeacef568beb2a08ab6956
GIT binary patch
literal 276
zcmd<!6jIDhEXgg+$;>NBO;0T<$xqA4Pb`VADu_?YOe~HsNi8n1dcnx0$Hi#K#c0IE
zXw1cE!o_IH#c0OGXfDK9sU+f?SmIln;|b9plJ5r6PlTyj90JT*j0{eUrdpf=Oj=A0
zj82SZTB00GKn_m>W5a(a01BH&2?MPc;^X4sU>4%wV&Y%{VrGz_5t1M?P>=~EXbcu)
aL{({mBnVcS1XNiG7G#5~WO8B>5C#BTr8?~Z

literal 0
HcmV?d00001

diff --git a/test/onnx/matmulintegertofloat_zp_test.onnx b/test/onnx/matmulintegertofloat_zp_test.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..7d2b46c74ee1bd91b0d8a6023a59eece01163b31
GIT binary patch
literal 240
zcmd<!6q3nJEXgg+$;>NBO;0T<$xqA4Pb`VADu^#hEiSP-$;hS6#c0UIXvD>6%*ANJ
z#c0aKXePv1sU+f?SmIln;|b9hlJ5r6hu<_U4gqE@Mg}KFQ!P#bCM~80MkhwIC}E)a
zLVR329Lz!-TudA+K+FshG(r+&1`0BP1dYLhjHoJ2kOaXhlYlBK!GdgXl}t`d0>S{p
CO)P!@

literal 0
HcmV?d00001

diff --git a/test/onnx/parse/matmulintegerToFloat_bad_bias_test.cpp b/test/onnx/parse/matmulintegerToFloat_bad_bias_test.cpp
new file mode 100644
index 00000000000..9527bf8fd03
--- /dev/null
+++ b/test/onnx/parse/matmulintegerToFloat_bad_bias_test.cpp
@@ -0,0 +1,30 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <onnx_test.hpp>
+
+TEST_CASE(matmulintegertofloat_bad_boas_test)
+{
+    EXPECT(test::throws([&] { migraphx::parse_onnx("matmulintegertofloat_bad_bias_test.onnx"); }));
+}
diff --git a/test/onnx/parse/matmulintegerToFloat_bad_bias_test2.cpp b/test/onnx/parse/matmulintegerToFloat_bad_bias_test2.cpp
new file mode 100644
index 00000000000..c7fe8161666
--- /dev/null
+++ b/test/onnx/parse/matmulintegerToFloat_bad_bias_test2.cpp
@@ -0,0 +1,30 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <onnx_test.hpp>
+
+TEST_CASE(matmulintegertofloat_bad_bias_test2)
+{
+    EXPECT(test::throws([&] { migraphx::parse_onnx("matmulintegertofloat_bad_boas_test2.onnx"); }));
+}
diff --git a/test/onnx/parse/matmulintegerToFloat_bad_bias_test3.cpp b/test/onnx/parse/matmulintegerToFloat_bad_bias_test3.cpp
new file mode 100644
index 00000000000..becd4dba1a8
--- /dev/null
+++ b/test/onnx/parse/matmulintegerToFloat_bad_bias_test3.cpp
@@ -0,0 +1,30 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <onnx_test.hpp>
+
+TEST_CASE(matmulintegertofloat_bad_bias_test)
+{
+    EXPECT(test::throws([&] { migraphx::parse_onnx("matmulintegertofloat_bad_bias_test3.onnx"); }));
+}
diff --git a/test/onnx/parse/matmulintegerToFloat_bad_scale2_test.cpp b/test/onnx/parse/matmulintegerToFloat_bad_scale2_test.cpp
new file mode 100644
index 00000000000..f8505c7af7e
--- /dev/null
+++ b/test/onnx/parse/matmulintegerToFloat_bad_scale2_test.cpp
@@ -0,0 +1,31 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <onnx_test.hpp>
+
+TEST_CASE(matmulintegertofloat_bad_scale2_test)
+{
+    EXPECT(
+        test::throws([&] { migraphx::parse_onnx("matmulintegertofloat_bad_scale3_test.onnx"); }));
+}
diff --git a/test/onnx/parse/matmulintegerToFloat_bad_scale3_test.cpp b/test/onnx/parse/matmulintegerToFloat_bad_scale3_test.cpp
new file mode 100644
index 00000000000..f286e875322
--- /dev/null
+++ b/test/onnx/parse/matmulintegerToFloat_bad_scale3_test.cpp
@@ -0,0 +1,31 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <onnx_test.hpp>
+
+TEST_CASE(matmulintegertofloat_bad_scale3_test)
+{
+    EXPECT(
+        test::throws([&] { migraphx::parse_onnx("matmulintegertofloat_bad_scale3_test.onnx"); }));
+}
diff --git a/test/onnx/parse/matmulintegerToFloat_bad_scale_test.cpp b/test/onnx/parse/matmulintegerToFloat_bad_scale_test.cpp
new file mode 100644
index 00000000000..46663319192
--- /dev/null
+++ b/test/onnx/parse/matmulintegerToFloat_bad_scale_test.cpp
@@ -0,0 +1,30 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <onnx_test.hpp>
+
+TEST_CASE(matmulintegertofloat_bad_scale_test)
+{
+    EXPECT(test::throws([&] { migraphx::parse_onnx("matmulintegertofloat_bad_scale_test.onnx"); }));
+}
diff --git a/test/onnx/parse/matmulintegertofloat_half_test.cpp b/test/onnx/parse/matmulintegertofloat_half_test.cpp
new file mode 100644
index 00000000000..a280aec93d5
--- /dev/null
+++ b/test/onnx/parse/matmulintegertofloat_half_test.cpp
@@ -0,0 +1,47 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <onnx_test.hpp>
+
+TEST_CASE(matmulintegertofloat_half_test)
+{
+    migraphx::program p;
+    auto* mm = p.get_main_module();
+    auto l0  = mm->add_parameter("1", migraphx::shape{migraphx::shape::int8_type, {3, 6, 16}});
+    auto l1  = mm->add_parameter("2", migraphx::shape{migraphx::shape::int8_type, {3, 16, 8}});
+    auto s0  = mm->add_parameter("1", migraphx::shape{migraphx::shape::half_type, {3, 6, 16}});
+    auto s1  = mm->add_parameter("2", migraphx::shape{migraphx::shape::half_type, {3, 16, 8}});
+    s0       = mm->add_instruction(
+        migraphx::make_op("convert", {{"target_type", migraphx::shape::float_type}}), s0);
+    s1 = mm->add_instruction(
+        migraphx::make_op("convert", {{"target_type", migraphx::shape::float_type}}), s1);
+
+    auto r0 = mm->add_instruction(migraphx::make_op("dot"), l0, s0);
+    auto r1 = mm->add_instruction(migraphx::make_op("dot"), l1, s1);
+    mm->add_instruction(migraphx::make_op("dot"), r0, r1);
+
+    auto prog = optimize_onnx("matmulintegertofloat_half_test.onnx");
+
+    EXPECT(p == prog);
+}
diff --git a/test/onnx/parse/matmulintegertofloat_test.cpp b/test/onnx/parse/matmulintegertofloat_test.cpp
new file mode 100644
index 00000000000..6c77656006c
--- /dev/null
+++ b/test/onnx/parse/matmulintegertofloat_test.cpp
@@ -0,0 +1,47 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <onnx_test.hpp>
+
+TEST_CASE(matmulintegertofloat_test)
+{
+    migraphx::program p;
+    auto* mm = p.get_main_module();
+    auto l0  = mm->add_parameter("1", migraphx::shape{migraphx::shape::int8_type, {3, 6, 16}});
+    auto l1  = mm->add_parameter("2", migraphx::shape{migraphx::shape::int8_type, {3, 16, 8}});
+    auto s0  = mm->add_parameter("1", migraphx::shape{migraphx::shape::half_type, {3, 6, 16}});
+    auto s1  = mm->add_parameter("2", migraphx::shape{migraphx::shape::half_type, {3, 16, 8}});
+    s0       = mm->add_instruction(
+        migraphx::make_op("convert", {{"target_type", migraphx::shape::float_type}}), s0);
+    s1 = mm->add_instruction(
+        migraphx::make_op("convert", {{"target_type", migraphx::shape::float_type}}), s1);
+
+    auto r0 = mm->add_instruction(migraphx::make_op("dot"), l0, s0);
+    auto r1 = mm->add_instruction(migraphx::make_op("dot"), l1, s1);
+    mm->add_instruction(migraphx::make_op("dot"), r0, r1);
+
+    auto prog = optimize_onnx("matmulintegertofloat_test.onnx");
+
+    EXPECT(p == prog);
+}

From cdb307d2ddada5511b84b03f7ec7d9017fbd22f0 Mon Sep 17 00:00:00 2001
From: Ted Themistokleous <ted.themistokleous@amd.com>
Date: Tue, 17 Sep 2024 15:48:40 +0000
Subject: [PATCH 05/10] Handle scaled output result better

avoid tidy screaming about complexity
---
 src/onnx/parse_matmul.cpp | 40 +++++++++++++++++++++++++++------------
 1 file changed, 28 insertions(+), 12 deletions(-)

diff --git a/src/onnx/parse_matmul.cpp b/src/onnx/parse_matmul.cpp
index af6ea509cef..01fca0a9ce3 100644
--- a/src/onnx/parse_matmul.cpp
+++ b/src/onnx/parse_matmul.cpp
@@ -199,6 +199,32 @@ struct parse_matmul : op_parser<parse_matmul>
         }
     }
 
+    static instruction_ref handle_scaled_output(const onnx_parser::node_info& info,
+                                                const instruction_ref& ba0,
+                                                const instruction_ref& ba1,
+                                                const instruction_ref& scale_a0,
+                                                const instruction_ref& scale_a1,
+                                                const instruction_ref& scaled_bias,
+                                                const bool has_scale_bias)
+    {
+        auto bias_a0 = ba0;
+        auto bias_a1 = ba1;
+        // Convert if we're half types as dot will scream if we try to multipy half int8
+        bias_a0 = info.add_instruction(
+            make_op("convert", {{"target_type", scale_a0->get_shape().type()}}), bias_a0);
+        bias_a1 = info.add_instruction(
+            make_op("convert", {{"target_type", scale_a1->get_shape().type()}}), bias_a1);
+        auto dq_a0 = info.add_instruction(make_op("dot"), ba0, scale_a0);
+        auto dq_a1 = info.add_instruction(make_op("dot"), ba1, scale_a1);
+        auto res   = info.add_instruction(make_op("dot"), dq_a0, dq_a1);
+
+        // Handle case of the bias after scaling
+        if(has_scale_bias)
+            res = info.add_common_op("sub", res, scaled_bias);
+
+        return res;
+    }
+
     instruction_ref parse(const op_desc& opd,
                           const onnx_parser& /*parser*/,
                           const onnx_parser::node_info& info,
@@ -339,18 +365,8 @@ struct parse_matmul : op_parser<parse_matmul>
             if(has_scales)
             {
                 broadcast_dimensions(info, s0_lens, s1_lens, a0, a1, scale_a0, scale_a1);
-                // Convert if we're half types as dot will scream if we try to multipy half int8
-                ba0 = info.add_instruction(
-                    make_op("convert", {{"target_type", scale_a0->get_shape().type()}}), ba0);
-                ba1 = info.add_instruction(
-                    make_op("convert", {{"target_type", scale_a1->get_shape().type()}}), ba1);
-                auto dq_a0 = info.add_instruction(make_op("mul"), ba0, scale_a0);
-                auto dq_a1 = info.add_instruction(make_op("mul"), ba1, scale_a1);
-                dot_res    = info.add_instruction(make_op("dot"), dq_a0, dq_a1);
-
-                // Handle case of the bias after scaling
-                if(has_scale_bias)
-                    dot_res = info.add_common_op("sub", dot_res, scaled_bias);
+                dot_res = handle_scaled_output(
+                    info, ba0, ba1, scale_a0, scale_a1, scaled_bias, has_scale_bias);
             }
             else
             {

From 3ca3e6afbdb865cc0041d696836fd9a79fb5d6bb Mon Sep 17 00:00:00 2001
From: Ted Themistokleous <ted.themistokleous@amd.com>
Date: Fri, 11 Oct 2024 23:24:42 +0000
Subject: [PATCH 06/10] Update fixes for parser tests

Use dequantizelinear which elminates the need to add in shifts due to int8/uint8 mismatches

still needs parser tests
---
 src/onnx/parse_matmul.cpp                     | 46 +++++++++++++------
 test/onnx/gen_onnx.py                         | 24 +++++-----
 test/onnx/matmulintegertofloat_half_test.onnx | 24 +++++-----
 test/onnx/matmulintegertofloat_test.onnx      | 18 ++++----
 .../parse/matmulintegertofloat_half_test.cpp  | 16 +++----
 test/onnx/parse/matmulintegertofloat_test.cpp | 16 +++----
 6 files changed, 76 insertions(+), 68 deletions(-)

diff --git a/src/onnx/parse_matmul.cpp b/src/onnx/parse_matmul.cpp
index 01fca0a9ce3..68b0a2e9e4e 100644
--- a/src/onnx/parse_matmul.cpp
+++ b/src/onnx/parse_matmul.cpp
@@ -199,23 +199,33 @@ struct parse_matmul : op_parser<parse_matmul>
         }
     }
 
+    static instruction_ref handle_dequantized(const onnx_parser::node_info& info,
+                                              const instruction_ref& a0,
+                                              const instruction_ref& scale_a0,
+                                              const instruction_ref& zp_a0)
+    {
+        instruction_ref dequantized_op;
+
+        if(a0 == zp_a0)
+            dequantized_op = info.add_instruction(make_op("dequantizelinear"), a0, scale_a0);
+        else
+            dequantized_op = info.add_instruction(make_op("dequantizelinear"), a0, scale_a0, zp_a0);
+
+        return dequantized_op;
+    }
+
     static instruction_ref handle_scaled_output(const onnx_parser::node_info& info,
-                                                const instruction_ref& ba0,
-                                                const instruction_ref& ba1,
+                                                const instruction_ref& a0,
+                                                const instruction_ref& a1,
                                                 const instruction_ref& scale_a0,
                                                 const instruction_ref& scale_a1,
+                                                const instruction_ref& zp_a0,
+                                                const instruction_ref& zp_a1,
                                                 const instruction_ref& scaled_bias,
                                                 const bool has_scale_bias)
     {
-        auto bias_a0 = ba0;
-        auto bias_a1 = ba1;
-        // Convert if we're half types as dot will scream if we try to multipy half int8
-        bias_a0 = info.add_instruction(
-            make_op("convert", {{"target_type", scale_a0->get_shape().type()}}), bias_a0);
-        bias_a1 = info.add_instruction(
-            make_op("convert", {{"target_type", scale_a1->get_shape().type()}}), bias_a1);
-        auto dq_a0 = info.add_instruction(make_op("dot"), ba0, scale_a0);
-        auto dq_a1 = info.add_instruction(make_op("dot"), ba1, scale_a1);
+        auto dq_a0 = handle_dequantized(info, a0, scale_a0, zp_a0);
+        auto dq_a1 = handle_dequantized(info, a1, scale_a1, zp_a1);
         auto res   = info.add_instruction(make_op("dot"), dq_a0, dq_a1);
 
         // Handle case of the bias after scaling
@@ -249,7 +259,7 @@ struct parse_matmul : op_parser<parse_matmul>
             a1            = info.add_instruction(make_op("unsqueeze", {{"axes", {1}}}), args[1]);
         }
 
-        auto is_quant_dot = opd.op_name == "quant_dot" or opd.op_name == "quant_dot_scaled";
+        auto is_quant_dot = opd.op_name == "quant_dot";
         auto has_scales   = opd.op_name == "quant_dot_scaled";
         if(s0.dynamic() or s1.dynamic())
         {
@@ -257,6 +267,13 @@ struct parse_matmul : op_parser<parse_matmul>
             {
                 MIGRAPHX_THROW("PARSE_MATMUL: dynamic MatMulInteger not supported");
             }
+
+            if(has_scales)
+            {
+                MIGRAPHX_THROW(
+                    "PARSE_MATMULINTEGERTOFLOAT: dynamic MatMulIntegerToFloat not supported");
+            }
+
             auto s0_dds = a0->get_shape().to_dynamic().dyn_dims();
             auto s1_dds = a1->get_shape().to_dynamic().dyn_dims();
 
@@ -321,7 +338,7 @@ struct parse_matmul : op_parser<parse_matmul>
             const auto a0_type                                = a0->get_shape().type();
             const auto a1_type                                = a1->get_shape().type();
 
-            if(is_quant_dot and
+            if((is_quant_dot or has_scales) and
                (not contains(supported_types, a0_type) or not contains(supported_types, a1_type)))
             {
                 MIGRAPHX_THROW("PARSE_MATMULINTEGER: Unsupported type");
@@ -364,9 +381,8 @@ struct parse_matmul : op_parser<parse_matmul>
             // equivalent. Ensure these are broadcasted accordingly before we perform a dot
             if(has_scales)
             {
-                broadcast_dimensions(info, s0_lens, s1_lens, a0, a1, scale_a0, scale_a1);
                 dot_res = handle_scaled_output(
-                    info, ba0, ba1, scale_a0, scale_a1, scaled_bias, has_scale_bias);
+                    info, a0, a1, scale_a0, scale_a1, ba0, ba1, scaled_bias, has_scale_bias);
             }
             else
             {
diff --git a/test/onnx/gen_onnx.py b/test/onnx/gen_onnx.py
index c058e988a5b..bc743ff2a6b 100644
--- a/test/onnx/gen_onnx.py
+++ b/test/onnx/gen_onnx.py
@@ -7477,8 +7477,8 @@ def matmulinteger_int8_uint8_dual_zero_zp_test():
 def matmulintegertofloat_test():
     m1 = helper.make_tensor_value_info('1', TensorProto.INT8, [4, 3])
     m2 = helper.make_tensor_value_info('2', TensorProto.INT8, [3, 2])
-    s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT, [4, 3])
-    s2 = helper.make_tensor_value_info('4', TensorProto.FLOAT, [3, 2])
+    s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT, [4])
+    s2 = helper.make_tensor_value_info('4', TensorProto.FLOAT, [2])
     y = helper.make_tensor_value_info('y', TensorProto.INT32, [4, 2])
 
     node = onnx.helper.make_node(
@@ -7494,8 +7494,8 @@ def matmulintegertofloat_test():
 def matmulintegertofloat_zp_test():
     m1 = helper.make_tensor_value_info('1', TensorProto.INT8, [4, 3])
     m2 = helper.make_tensor_value_info('2', TensorProto.INT8, [3, 2])
-    s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT, [4, 3])
-    s2 = helper.make_tensor_value_info('4', TensorProto.FLOAT, [3, 2])
+    s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT, [4])
+    s2 = helper.make_tensor_value_info('4', TensorProto.FLOAT, [2])
     zp1 = helper.make_tensor('5', TensorProto.INT8, [], [0])
     zp2 = helper.make_tensor('6', TensorProto.UINT8, [], [128])
     y = helper.make_tensor_value_info('y', TensorProto.INT32, [4, 2])
@@ -7513,8 +7513,8 @@ def matmulintegertofloat_zp_test():
 def matmulintegertofloat_zp_bias_test():
     m1 = helper.make_tensor_value_info('1', TensorProto.INT8, [4, 3])
     m2 = helper.make_tensor_value_info('2', TensorProto.INT8, [3, 2])
-    s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT, [4, 3])
-    s2 = helper.make_tensor_value_info('4', TensorProto.FLOAT, [3, 2])
+    s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT, [4])
+    s2 = helper.make_tensor_value_info('4', TensorProto.FLOAT, [2])
     zp1 = helper.make_tensor('5', TensorProto.INT8, [], [0])
     zp2 = helper.make_tensor('6', TensorProto.UINT8, [], [128])
     b1 = helper.make_tensor('7', TensorProto.UINT8, [2], [128, -128])
@@ -7650,8 +7650,8 @@ def matmulintegertofloat_bad_bias_test3():
 def matmulintegertofloat_half_test():
     m1 = helper.make_tensor_value_info('1', TensorProto.INT8, [4, 3])
     m2 = helper.make_tensor_value_info('2', TensorProto.UINT8, [3, 2])
-    s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT16, [4, 3])
-    s2 = helper.make_tensor_value_info('4', TensorProto.FLOAT16, [3, 2])
+    s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT16, [4])
+    s2 = helper.make_tensor_value_info('4', TensorProto.FLOAT16, [2])
     y = helper.make_tensor_value_info('y', TensorProto.INT32, [4, 2])
 
     node = onnx.helper.make_node(
@@ -7667,8 +7667,8 @@ def matmulintegertofloat_half_test():
 def matmulintegertofloat_half_zp_test():
     m1 = helper.make_tensor_value_info('1', TensorProto.INT8, [4, 3])
     m2 = helper.make_tensor_value_info('2', TensorProto.UINT8, [3, 2])
-    s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT16, [4, 3])
-    s2 = helper.make_tensor_value_info('4', TensorProto.FLOAT16, [3, 2])
+    s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT16, [4])
+    s2 = helper.make_tensor_value_info('4', TensorProto.FLOAT16, [2])
     zp1 = helper.make_tensor('5', TensorProto.INT8, [], [0])
     zp2 = helper.make_tensor('6', TensorProto.UINT8, [], [128])
     y = helper.make_tensor_value_info('y', TensorProto.INT32, [4, 2])
@@ -7686,8 +7686,8 @@ def matmulintegertofloat_half_zp_test():
 def matmulintegertofloat_half_zp_bias_test():
     m1 = helper.make_tensor_value_info('1', TensorProto.INT8, [4, 3])
     m2 = helper.make_tensor_value_info('2', TensorProto.UINT8, [3, 2])
-    s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT16, [4, 3])
-    s2 = helper.make_tensor_value_info('4', TensorProto.FLOAT16, [3, 2])
+    s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT16, [4])
+    s2 = helper.make_tensor_value_info('4', TensorProto.FLOAT16, [2])
     zp1 = helper.make_tensor('5', TensorProto.INT8, [], [0])
     zp2 = helper.make_tensor('6', TensorProto.UINT8, [], [128])
     b1 = helper.make_tensor('7', TensorProto.FLOAT16, [2], [128, -128])
diff --git a/test/onnx/matmulintegertofloat_half_test.onnx b/test/onnx/matmulintegertofloat_half_test.onnx
index bd7f74cb641..d692ab3193e 100644
--- a/test/onnx/matmulintegertofloat_half_test.onnx
+++ b/test/onnx/matmulintegertofloat_half_test.onnx
@@ -1,4 +1,4 @@
-	matmulintegertofloat_half_test:�
+	matmulintegertofloat_half_test:�
 %
 1
 2
@@ -9,18 +9,18 @@
 
 Z
 2
-
-
-Z
-3
-
-
-
-Z
-4
-
-
+
 
+Z
+3
+
+
+
+Z
+4
+
+
+
 b
 y
 
diff --git a/test/onnx/matmulintegertofloat_test.onnx b/test/onnx/matmulintegertofloat_test.onnx
index 1eb839d3baa..cd63e7f5ac7 100644
--- a/test/onnx/matmulintegertofloat_test.onnx
+++ b/test/onnx/matmulintegertofloat_test.onnx
@@ -1,4 +1,4 @@
-	matmulintegertofloat_test:�
+	matmulintegertofloat_test:�
 %
 1
 2
@@ -11,14 +11,14 @@
 2
 
 
-Z
-3
-
-
-Z
-4
-
-
+Z
+3
+
+
+Z
+4
+
+
 b
 y
 
diff --git a/test/onnx/parse/matmulintegertofloat_half_test.cpp b/test/onnx/parse/matmulintegertofloat_half_test.cpp
index a280aec93d5..44f7ab7db14 100644
--- a/test/onnx/parse/matmulintegertofloat_half_test.cpp
+++ b/test/onnx/parse/matmulintegertofloat_half_test.cpp
@@ -28,17 +28,13 @@ TEST_CASE(matmulintegertofloat_half_test)
 {
     migraphx::program p;
     auto* mm = p.get_main_module();
-    auto l0  = mm->add_parameter("1", migraphx::shape{migraphx::shape::int8_type, {3, 6, 16}});
-    auto l1  = mm->add_parameter("2", migraphx::shape{migraphx::shape::int8_type, {3, 16, 8}});
-    auto s0  = mm->add_parameter("1", migraphx::shape{migraphx::shape::half_type, {3, 6, 16}});
-    auto s1  = mm->add_parameter("2", migraphx::shape{migraphx::shape::half_type, {3, 16, 8}});
-    s0       = mm->add_instruction(
-        migraphx::make_op("convert", {{"target_type", migraphx::shape::float_type}}), s0);
-    s1 = mm->add_instruction(
-        migraphx::make_op("convert", {{"target_type", migraphx::shape::float_type}}), s1);
+    auto x0       = mm->add_parameter("1", migraphx::shape{migraphx::shape::int8_type, {4, 3}});
+    auto x1       = mm->add_parameter("2", migraphx::shape{migraphx::shape::uint8_type, {3, 2}});
+    auto scale_x0 = mm->add_parameter("3", migraphx::shape{migraphx::shape::half_type, {4}});
+    auto scale_x1 = mm->add_parameter("4", migraphx::shape{migraphx::shape::half_type, {2}});
 
-    auto r0 = mm->add_instruction(migraphx::make_op("dot"), l0, s0);
-    auto r1 = mm->add_instruction(migraphx::make_op("dot"), l1, s1);
+    auto r0 = mm->add_instruction(migraphx::make_op("dequantizelinear"), x0, scale_x0);
+    auto r1 = mm->add_instruction(migraphx::make_op("dequantizelinear"), x1, scale_x1);
     mm->add_instruction(migraphx::make_op("dot"), r0, r1);
 
     auto prog = optimize_onnx("matmulintegertofloat_half_test.onnx");
diff --git a/test/onnx/parse/matmulintegertofloat_test.cpp b/test/onnx/parse/matmulintegertofloat_test.cpp
index 6c77656006c..0bdd2e68934 100644
--- a/test/onnx/parse/matmulintegertofloat_test.cpp
+++ b/test/onnx/parse/matmulintegertofloat_test.cpp
@@ -28,17 +28,13 @@ TEST_CASE(matmulintegertofloat_test)
 {
     migraphx::program p;
     auto* mm = p.get_main_module();
-    auto l0  = mm->add_parameter("1", migraphx::shape{migraphx::shape::int8_type, {3, 6, 16}});
-    auto l1  = mm->add_parameter("2", migraphx::shape{migraphx::shape::int8_type, {3, 16, 8}});
-    auto s0  = mm->add_parameter("1", migraphx::shape{migraphx::shape::half_type, {3, 6, 16}});
-    auto s1  = mm->add_parameter("2", migraphx::shape{migraphx::shape::half_type, {3, 16, 8}});
-    s0       = mm->add_instruction(
-        migraphx::make_op("convert", {{"target_type", migraphx::shape::float_type}}), s0);
-    s1 = mm->add_instruction(
-        migraphx::make_op("convert", {{"target_type", migraphx::shape::float_type}}), s1);
+    auto x0       = mm->add_parameter("1", migraphx::shape{migraphx::shape::int8_type, {4, 3}});
+    auto x1       = mm->add_parameter("2", migraphx::shape{migraphx::shape::int8_type, {3, 2}});
+    auto scale_x0 = mm->add_parameter("3", migraphx::shape{migraphx::shape::float_type, {4}});
+    auto scale_x1 = mm->add_parameter("4", migraphx::shape{migraphx::shape::float_type, {2}});
 
-    auto r0 = mm->add_instruction(migraphx::make_op("dot"), l0, s0);
-    auto r1 = mm->add_instruction(migraphx::make_op("dot"), l1, s1);
+    auto r0 = mm->add_instruction(migraphx::make_op("dequantizelinear"), x0, scale_x0);
+    auto r1 = mm->add_instruction(migraphx::make_op("dequantizelinear"), x1, scale_x1);
     mm->add_instruction(migraphx::make_op("dot"), r0, r1);
 
     auto prog = optimize_onnx("matmulintegertofloat_test.onnx");

From 547826d88db0c2c7ec13e922209a5e22838a30d3 Mon Sep 17 00:00:00 2001
From: Ted Themistokleous <ted.themistokleous@amd.com>
Date: Wed, 16 Oct 2024 02:46:35 +0000
Subject: [PATCH 07/10] Update parser to use dequantizelinear

---
 src/onnx/parse_matmul.cpp                     | 58 ++++++++++++++++---
 .../parse/matmulintegertofloat_half_test.cpp  | 16 ++++-
 test/onnx/parse/matmulintegertofloat_test.cpp | 16 ++++-
 3 files changed, 79 insertions(+), 11 deletions(-)

diff --git a/src/onnx/parse_matmul.cpp b/src/onnx/parse_matmul.cpp
index 68b0a2e9e4e..4600b694c37 100644
--- a/src/onnx/parse_matmul.cpp
+++ b/src/onnx/parse_matmul.cpp
@@ -199,18 +199,47 @@ struct parse_matmul : op_parser<parse_matmul>
         }
     }
 
+    static void handle_scaled_transposes(const onnx_parser::node_info& info,
+                                         instruction_ref& scale_a0,
+                                         instruction_ref& zp_a0,
+                                         bool has_zp)
+    {
+        if(has_zp)
+        {
+            scale_a0 =
+                info.add_instruction(make_op("transpose", {{"permutation", {0, 1}}}), scale_a0);
+        }
+        else
+        {
+            scale_a0 =
+                info.add_instruction(make_op("transpose", {{"permutation", {0, 1}}}), scale_a0);
+            zp_a0 = info.add_instruction(make_op("transpose", {{"permutation", {0, 1}}}), zp_a0);
+        }
+    }
+
     static instruction_ref handle_dequantized(const onnx_parser::node_info& info,
                                               const instruction_ref& a0,
                                               const instruction_ref& scale_a0,
-                                              const instruction_ref& zp_a0)
+                                              const instruction_ref& zp_a0,
+                                              bool has_zp)
     {
         instruction_ref dequantized_op;
 
-        if(a0 == zp_a0)
-            dequantized_op = info.add_instruction(make_op("dequantizelinear"), a0, scale_a0);
+        if(has_zp)
+        {
+            auto bc_scale_a0 = info.add_instruction(
+                make_op("multibroadcast", {{"out_lens", a0->get_shape().lens()}}), scale_a0);
+            dequantized_op = info.add_instruction(make_op("dequantizelinear"), a0, bc_scale_a0);
+        }
         else
-            dequantized_op = info.add_instruction(make_op("dequantizelinear"), a0, scale_a0, zp_a0);
-
+        {
+            auto bc_scale_a0 = info.add_instruction(
+                make_op("multibroadcast", {{"out_lens", a0->get_shape().lens()}}), scale_a0);
+            auto bc_zp_a0 = info.add_instruction(
+                make_op("multibroadcast", {{"out_lens", a0->get_shape().lens()}}), zp_a0);
+            dequantized_op =
+                info.add_instruction(make_op("dequantizelinear"), a0, bc_scale_a0, bc_zp_a0);
+        }
         return dequantized_op;
     }
 
@@ -224,8 +253,23 @@ struct parse_matmul : op_parser<parse_matmul>
                                                 const instruction_ref& scaled_bias,
                                                 const bool has_scale_bias)
     {
-        auto dq_a0 = handle_dequantized(info, a0, scale_a0, zp_a0);
-        auto dq_a1 = handle_dequantized(info, a1, scale_a1, zp_a1);
+
+        instruction_ref unsq_zp_a0;
+        instruction_ref unsq_zp_a1;
+
+        bool a0_has_no_zp = (a0 == zp_a0);
+        bool a1_has_no_zp = (a1 == zp_a1);
+
+        auto unsq_scale_a0 = info.add_instruction(make_op("unsqueeze", {{"axes", {-1}}}), scale_a0);
+        auto dq_a0         = handle_dequantized(info, a0, unsq_scale_a0, unsq_zp_a0, a0_has_no_zp);
+
+        // Transpose second input to get column dims before we broadcast to dequantizelinear
+        auto unsq_scale_a1 = info.add_instruction(make_op("unsqueeze", {{"axes", {0}}}), scale_a1);
+        instruction_ref scale_a1_tp = unsq_scale_a1;
+        instruction_ref zp_a1_tp    = unsq_zp_a1;
+        handle_scaled_transposes(info, scale_a1_tp, zp_a1_tp, a1_has_no_zp);
+
+        auto dq_a1 = handle_dequantized(info, a1, scale_a1_tp, zp_a1_tp, a1_has_no_zp);
         auto res   = info.add_instruction(make_op("dot"), dq_a0, dq_a1);
 
         // Handle case of the bias after scaling
diff --git a/test/onnx/parse/matmulintegertofloat_half_test.cpp b/test/onnx/parse/matmulintegertofloat_half_test.cpp
index 44f7ab7db14..654fda27101 100644
--- a/test/onnx/parse/matmulintegertofloat_half_test.cpp
+++ b/test/onnx/parse/matmulintegertofloat_half_test.cpp
@@ -33,8 +33,20 @@ TEST_CASE(matmulintegertofloat_half_test)
     auto scale_x0 = mm->add_parameter("3", migraphx::shape{migraphx::shape::half_type, {4}});
     auto scale_x1 = mm->add_parameter("4", migraphx::shape{migraphx::shape::half_type, {2}});
 
-    auto r0 = mm->add_instruction(migraphx::make_op("dequantizelinear"), x0, scale_x0);
-    auto r1 = mm->add_instruction(migraphx::make_op("dequantizelinear"), x1, scale_x1);
+    auto sq_scale_x0 =
+        mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {-1}}}), scale_x0);
+    auto bc_scale_x0 = mm->add_instruction(
+        migraphx::make_op("multibroadcast", {{"out_lens", x0->get_shape().lens()}}), sq_scale_x0);
+    auto r0 = mm->add_instruction(migraphx::make_op("dequantizelinear"), x0, bc_scale_x0);
+
+    auto sq_scale_x1 =
+        mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {0}}}), scale_x1);
+    auto t_sq_scale_x1 =
+        mm->add_instruction(migraphx::make_op("transpose", {{"permutation", {0, 1}}}), sq_scale_x1);
+    auto bc_scale_x1 = mm->add_instruction(
+        migraphx::make_op("multibroadcast", {{"out_lens", x1->get_shape().lens()}}), t_sq_scale_x1);
+
+    auto r1 = mm->add_instruction(migraphx::make_op("dequantizelinear"), x1, bc_scale_x1);
     mm->add_instruction(migraphx::make_op("dot"), r0, r1);
 
     auto prog = optimize_onnx("matmulintegertofloat_half_test.onnx");
diff --git a/test/onnx/parse/matmulintegertofloat_test.cpp b/test/onnx/parse/matmulintegertofloat_test.cpp
index 0bdd2e68934..7570d2080f5 100644
--- a/test/onnx/parse/matmulintegertofloat_test.cpp
+++ b/test/onnx/parse/matmulintegertofloat_test.cpp
@@ -33,8 +33,20 @@ TEST_CASE(matmulintegertofloat_test)
     auto scale_x0 = mm->add_parameter("3", migraphx::shape{migraphx::shape::float_type, {4}});
     auto scale_x1 = mm->add_parameter("4", migraphx::shape{migraphx::shape::float_type, {2}});
 
-    auto r0 = mm->add_instruction(migraphx::make_op("dequantizelinear"), x0, scale_x0);
-    auto r1 = mm->add_instruction(migraphx::make_op("dequantizelinear"), x1, scale_x1);
+    auto sq_scale_x0 =
+        mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {-1}}}), scale_x0);
+    auto bc_scale_x0 = mm->add_instruction(
+        migraphx::make_op("multibroadcast", {{"out_lens", x0->get_shape().lens()}}), sq_scale_x0);
+    auto r0 = mm->add_instruction(migraphx::make_op("dequantizelinear"), x0, bc_scale_x0);
+
+    auto sq_scale_x1 =
+        mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {0}}}), scale_x1);
+    auto t_sq_scale_x1 =
+        mm->add_instruction(migraphx::make_op("transpose", {{"permutation", {0, 1}}}), sq_scale_x1);
+    auto bc_scale_x1 = mm->add_instruction(
+        migraphx::make_op("multibroadcast", {{"out_lens", x1->get_shape().lens()}}), t_sq_scale_x1);
+
+    auto r1 = mm->add_instruction(migraphx::make_op("dequantizelinear"), x1, bc_scale_x1);
     mm->add_instruction(migraphx::make_op("dot"), r0, r1);
 
     auto prog = optimize_onnx("matmulintegertofloat_test.onnx");

From c6d8679c68706d3e168b6173729e2087592bb4ec Mon Sep 17 00:00:00 2001
From: Ted Themistokleous <ted.themistokleous@amd.com>
Date: Thu, 17 Oct 2024 20:58:02 +0000
Subject: [PATCH 08/10] Add test case for zero point in matmulintegertofloat

Needed to update initial tests and gen onnx
---
 src/onnx/parse_matmul.cpp                     |  20 +++--
 test/onnx/gen_onnx.py                         |  25 ++++++-
 .../matmulintegertofloat_half_zp_test.onnx    | Bin 250 -> 255 bytes
 test/onnx/matmulintegertofloat_zp_test.onnx   | Bin 240 -> 245 bytes
 .../matmulintegertofloat_half_zp_test.cpp     |  69 ++++++++++++++++++
 .../parse/matmulintegertofloat_zp_test.cpp    |  69 ++++++++++++++++++
 6 files changed, 174 insertions(+), 9 deletions(-)
 create mode 100644 test/onnx/parse/matmulintegertofloat_half_zp_test.cpp
 create mode 100644 test/onnx/parse/matmulintegertofloat_zp_test.cpp

diff --git a/src/onnx/parse_matmul.cpp b/src/onnx/parse_matmul.cpp
index 4600b694c37..24d316aa48c 100644
--- a/src/onnx/parse_matmul.cpp
+++ b/src/onnx/parse_matmul.cpp
@@ -202,9 +202,9 @@ struct parse_matmul : op_parser<parse_matmul>
     static void handle_scaled_transposes(const onnx_parser::node_info& info,
                                          instruction_ref& scale_a0,
                                          instruction_ref& zp_a0,
-                                         bool has_zp)
+                                         bool no_zp)
     {
-        if(has_zp)
+        if(no_zp)
         {
             scale_a0 =
                 info.add_instruction(make_op("transpose", {{"permutation", {0, 1}}}), scale_a0);
@@ -213,7 +213,7 @@ struct parse_matmul : op_parser<parse_matmul>
         {
             scale_a0 =
                 info.add_instruction(make_op("transpose", {{"permutation", {0, 1}}}), scale_a0);
-            zp_a0 = info.add_instruction(make_op("transpose", {{"permutation", {0, 1}}}), zp_a0);
+            zp_a0 = info.add_instruction(make_op("transpose", {{"permutation", {1, 0}}}), zp_a0);
         }
     }
 
@@ -221,11 +221,11 @@ struct parse_matmul : op_parser<parse_matmul>
                                               const instruction_ref& a0,
                                               const instruction_ref& scale_a0,
                                               const instruction_ref& zp_a0,
-                                              bool has_zp)
+                                              bool no_zp)
     {
         instruction_ref dequantized_op;
 
-        if(has_zp)
+        if(no_zp)
         {
             auto bc_scale_a0 = info.add_instruction(
                 make_op("multibroadcast", {{"out_lens", a0->get_shape().lens()}}), scale_a0);
@@ -235,8 +235,10 @@ struct parse_matmul : op_parser<parse_matmul>
         {
             auto bc_scale_a0 = info.add_instruction(
                 make_op("multibroadcast", {{"out_lens", a0->get_shape().lens()}}), scale_a0);
+
             auto bc_zp_a0 = info.add_instruction(
                 make_op("multibroadcast", {{"out_lens", a0->get_shape().lens()}}), zp_a0);
+
             dequantized_op =
                 info.add_instruction(make_op("dequantizelinear"), a0, bc_scale_a0, bc_zp_a0);
         }
@@ -261,7 +263,13 @@ struct parse_matmul : op_parser<parse_matmul>
         bool a1_has_no_zp = (a1 == zp_a1);
 
         auto unsq_scale_a0 = info.add_instruction(make_op("unsqueeze", {{"axes", {-1}}}), scale_a0);
-        auto dq_a0         = handle_dequantized(info, a0, unsq_scale_a0, unsq_zp_a0, a0_has_no_zp);
+        if(not a0_has_no_zp)
+            unsq_zp_a0 = info.add_instruction(make_op("unsqueeze", {{"axes", {-1}}}), zp_a0);
+
+        if(not a1_has_no_zp)
+            unsq_zp_a1 = info.add_instruction(make_op("unsqueeze", {{"axes", {-1}}}), zp_a1);
+
+        auto dq_a0 = handle_dequantized(info, a0, unsq_scale_a0, unsq_zp_a0, a0_has_no_zp);
 
         // Transpose second input to get column dims before we broadcast to dequantizelinear
         auto unsq_scale_a1 = info.add_instruction(make_op("unsqueeze", {{"axes", {0}}}), scale_a1);
diff --git a/test/onnx/gen_onnx.py b/test/onnx/gen_onnx.py
index bc743ff2a6b..57b02347579 100644
--- a/test/onnx/gen_onnx.py
+++ b/test/onnx/gen_onnx.py
@@ -7496,8 +7496,8 @@ def matmulintegertofloat_zp_test():
     m2 = helper.make_tensor_value_info('2', TensorProto.INT8, [3, 2])
     s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT, [4])
     s2 = helper.make_tensor_value_info('4', TensorProto.FLOAT, [2])
-    zp1 = helper.make_tensor('5', TensorProto.INT8, [], [0])
-    zp2 = helper.make_tensor('6', TensorProto.UINT8, [], [128])
+    zp1 = helper.make_tensor_value_info('5', TensorProto.INT8, [4])
+    zp2 = helper.make_tensor_value_info('6', TensorProto.INT8, [2])
     y = helper.make_tensor_value_info('y', TensorProto.INT32, [4, 2])
 
     node = onnx.helper.make_node(
@@ -7506,7 +7506,7 @@ def matmulintegertofloat_zp_test():
         outputs=['y'],
     )
 
-    return ([node], [m1, m2, s1, s2], [y], [zp1, zp2])
+    return ([node], [m1, m2, s1, s2, zp1, zp2], [y], [])
 
 
 @onnx_test()
@@ -7665,6 +7665,25 @@ def matmulintegertofloat_half_test():
 
 @onnx_test()
 def matmulintegertofloat_half_zp_test():
+    m1 = helper.make_tensor_value_info('1', TensorProto.INT8, [4, 3])
+    m2 = helper.make_tensor_value_info('2', TensorProto.UINT8, [3, 2])
+    s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT16, [4])
+    s2 = helper.make_tensor_value_info('4', TensorProto.FLOAT16, [2])
+    zp1 = helper.make_tensor_value_info('5', TensorProto.INT8, [4])
+    zp2 = helper.make_tensor_value_info('6', TensorProto.UINT8, [2])
+    y = helper.make_tensor_value_info('y', TensorProto.INT32, [4, 2])
+
+    node = onnx.helper.make_node(
+        'MatMulIntegerToFloat',
+        inputs=['1', '2', '3', '4', '5', '6'],
+        outputs=['y'],
+    )
+
+    return ([node], [m1, m2, s1, s2, zp1, zp2], [y], [])
+
+
+@onnx_test()
+def matmulintegertofloat_half_scalar_zp_test():
     m1 = helper.make_tensor_value_info('1', TensorProto.INT8, [4, 3])
     m2 = helper.make_tensor_value_info('2', TensorProto.UINT8, [3, 2])
     s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT16, [4])
diff --git a/test/onnx/matmulintegertofloat_half_zp_test.onnx b/test/onnx/matmulintegertofloat_half_zp_test.onnx
index 10e47d781e30fdd37aea97f69707b96b6c748285..05a324c2aae2dec0e419e5fab731c81a46eae4a7 100644
GIT binary patch
delta 93
zcmeyx_@8lt8sp`O>VXq`<-M7NIJlTNn1Prnil2+oScr>@gM&+m1t`D*5->p$0IM(s
S3os*8n1KbDgjgotl?4FDfD9Y}

delta 115
zcmey*_=|CZ8soW%>VcwK90JT*j0{eUrdpf=Oj=A0j82SZ6D#EXn1wjFm^he$m?=t_
mi_ut!kBf(c3oOV2k^l*sAPIt1CIJO2!GdgXl}t`d0>S|Mr41DT

diff --git a/test/onnx/matmulintegertofloat_zp_test.onnx b/test/onnx/matmulintegertofloat_zp_test.onnx
index 7d2b46c74ee1bd91b0d8a6023a59eece01163b31..5054fb793a13784800d73e8a48be7e5f4c4eb5bd 100644
GIT binary patch
delta 107
zcmeys_?2;jBICJ<N<I^twPX0X7>$LvxHvc%g;=<lI9Q^90wzcTOdtVMumCecg&C3n
bQ<5+jW2F!u7Y_%U5C_N%AZBu65)cLeWAY2i

delta 77
zcmey$_<?bPBIC)4N<N}m90JT*j0{eUrdpf=Oj=A0j82SZ6Z5oHg}E4wh4{F5I2eUE
VxR^LtfS5T7C}@Ht$UO0eEC4A*3zPr=

diff --git a/test/onnx/parse/matmulintegertofloat_half_zp_test.cpp b/test/onnx/parse/matmulintegertofloat_half_zp_test.cpp
new file mode 100644
index 00000000000..db624cfa295
--- /dev/null
+++ b/test/onnx/parse/matmulintegertofloat_half_zp_test.cpp
@@ -0,0 +1,69 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <onnx_test.hpp>
+
+TEST_CASE(matmulintegertofloat_half_zp_test)
+{
+    migraphx::program p;
+    auto* mm      = p.get_main_module();
+    auto x0       = mm->add_parameter("1", migraphx::shape{migraphx::shape::int8_type, {4, 3}});
+    auto x1       = mm->add_parameter("2", migraphx::shape{migraphx::shape::uint8_type, {3, 2}});
+    auto scale_x0 = mm->add_parameter("3", migraphx::shape{migraphx::shape::half_type, {4}});
+    auto scale_x1 = mm->add_parameter("4", migraphx::shape{migraphx::shape::half_type, {2}});
+    auto zp_x0    = mm->add_parameter("5", migraphx::shape{migraphx::shape::int8_type, {4}});
+    auto zp_x1    = mm->add_parameter("6", migraphx::shape{migraphx::shape::uint8_type, {2}});
+
+    auto sq_scale_x0 =
+        mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {-1}}}), scale_x0);
+    auto sq_zp_x0 = mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {-1}}}), zp_x0);
+    auto sq_zp_x1 = mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {-1}}}), zp_x1);
+
+    auto bc_scale_x0 = mm->add_instruction(
+        migraphx::make_op("multibroadcast", {{"out_lens", x0->get_shape().lens()}}), sq_scale_x0);
+    auto bc_zp_x0 = mm->add_instruction(
+        migraphx::make_op("multibroadcast", {{"out_lens", x0->get_shape().lens()}}), sq_zp_x0);
+
+    auto r0 = mm->add_instruction(migraphx::make_op("dequantizelinear"), x0, bc_scale_x0, bc_zp_x0);
+
+    auto sq_scale_x1 =
+        mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {0}}}), scale_x1);
+
+    auto t_sq_scale_x1 =
+        mm->add_instruction(migraphx::make_op("transpose", {{"permutation", {0, 1}}}), sq_scale_x1);
+    auto t_sq_zp_x1 =
+        mm->add_instruction(migraphx::make_op("transpose", {{"permutation", {1, 0}}}), sq_zp_x1);
+    auto bc_scale_x1 = mm->add_instruction(
+        migraphx::make_op("multibroadcast", {{"out_lens", x1->get_shape().lens()}}), t_sq_scale_x1);
+
+    auto bc_zp_x1 = mm->add_instruction(
+        migraphx::make_op("multibroadcast", {{"out_lens", x1->get_shape().lens()}}), t_sq_zp_x1);
+
+    auto r1 = mm->add_instruction(migraphx::make_op("dequantizelinear"), x1, bc_scale_x1, bc_zp_x1);
+    mm->add_instruction(migraphx::make_op("dot"), r0, r1);
+
+    auto prog = optimize_onnx("matmulintegertofloat_half_zp_test.onnx");
+
+    EXPECT(p == prog);
+}
diff --git a/test/onnx/parse/matmulintegertofloat_zp_test.cpp b/test/onnx/parse/matmulintegertofloat_zp_test.cpp
new file mode 100644
index 00000000000..26af4b00430
--- /dev/null
+++ b/test/onnx/parse/matmulintegertofloat_zp_test.cpp
@@ -0,0 +1,69 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <onnx_test.hpp>
+
+TEST_CASE(matmulintegertofloat_zp_test)
+{
+    migraphx::program p;
+    auto* mm      = p.get_main_module();
+    auto x0       = mm->add_parameter("1", migraphx::shape{migraphx::shape::int8_type, {4, 3}});
+    auto x1       = mm->add_parameter("2", migraphx::shape{migraphx::shape::int8_type, {3, 2}});
+    auto scale_x0 = mm->add_parameter("3", migraphx::shape{migraphx::shape::float_type, {4}});
+    auto scale_x1 = mm->add_parameter("4", migraphx::shape{migraphx::shape::float_type, {2}});
+    auto zp_x0    = mm->add_parameter("5", migraphx::shape{migraphx::shape::int8_type, {4}});
+    auto zp_x1    = mm->add_parameter("6", migraphx::shape{migraphx::shape::int8_type, {2}});
+
+    auto sq_scale_x0 =
+        mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {-1}}}), scale_x0);
+    auto sq_zp_x0 = mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {-1}}}), zp_x0);
+    auto sq_zp_x1 = mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {-1}}}), zp_x1);
+
+    auto bc_scale_x0 = mm->add_instruction(
+        migraphx::make_op("multibroadcast", {{"out_lens", x0->get_shape().lens()}}), sq_scale_x0);
+    auto bc_zp_x0 = mm->add_instruction(
+        migraphx::make_op("multibroadcast", {{"out_lens", x0->get_shape().lens()}}), sq_zp_x0);
+
+    auto r0 = mm->add_instruction(migraphx::make_op("dequantizelinear"), x0, bc_scale_x0, bc_zp_x0);
+
+    auto sq_scale_x1 =
+        mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {0}}}), scale_x1);
+
+    auto t_sq_scale_x1 =
+        mm->add_instruction(migraphx::make_op("transpose", {{"permutation", {0, 1}}}), sq_scale_x1);
+    auto t_sq_zp_x1 =
+        mm->add_instruction(migraphx::make_op("transpose", {{"permutation", {1, 0}}}), sq_zp_x1);
+    auto bc_scale_x1 = mm->add_instruction(
+        migraphx::make_op("multibroadcast", {{"out_lens", x1->get_shape().lens()}}), t_sq_scale_x1);
+
+    auto bc_zp_x1 = mm->add_instruction(
+        migraphx::make_op("multibroadcast", {{"out_lens", x1->get_shape().lens()}}), t_sq_zp_x1);
+
+    auto r1 = mm->add_instruction(migraphx::make_op("dequantizelinear"), x1, bc_scale_x1, bc_zp_x1);
+    mm->add_instruction(migraphx::make_op("dot"), r0, r1);
+
+    auto prog = optimize_onnx("matmulintegertofloat_zp_test.onnx");
+
+    EXPECT(p == prog);
+}

From 6d89fdd628f3bb5979ef2f2f44b6c8e56e9b688c Mon Sep 17 00:00:00 2001
From: Ted Themistokleous <ted.themistokleous@amd.com>
Date: Thu, 17 Oct 2024 22:32:34 +0000
Subject: [PATCH 09/10] Add parser tests for scalar inputs in
 matmulintegertofloat

---
 src/onnx/parse_matmul.cpp                     |  27 ++++++-
 test/onnx/gen_onnx.py                         |  38 +++++++++-
 ...atmulintegertofloat_scalar_scale_test.onnx | Bin 0 -> 221 bytes
 .../matmulintegertofloat_scalar_zp_test.onnx  |  32 ++++++++
 ...matmulintegertofloat_scalar_scale_test.cpp |  59 +++++++++++++++
 .../matmulintegertofloat_scalar_zp_test.cpp   |  71 ++++++++++++++++++
 6 files changed, 223 insertions(+), 4 deletions(-)
 create mode 100644 test/onnx/matmulintegertofloat_scalar_scale_test.onnx
 create mode 100644 test/onnx/matmulintegertofloat_scalar_zp_test.onnx
 create mode 100644 test/onnx/parse/matmulintegertofloat_scalar_scale_test.cpp
 create mode 100644 test/onnx/parse/matmulintegertofloat_scalar_zp_test.cpp

diff --git a/src/onnx/parse_matmul.cpp b/src/onnx/parse_matmul.cpp
index 24d316aa48c..f62d839c480 100644
--- a/src/onnx/parse_matmul.cpp
+++ b/src/onnx/parse_matmul.cpp
@@ -108,7 +108,9 @@ struct parse_matmul : op_parser<parse_matmul>
         return all_zeros;
     }
 
-    static instruction_ref set_scale_arg(const std::vector<instruction_ref>& args, const int index)
+    static instruction_ref set_scale_arg(const onnx_parser::node_info& info,
+                                         const std::vector<instruction_ref>& args,
+                                         const int index)
     {
         instruction_ref scale_arg                            = args[index];
         std::set<migraphx::shape::type_t> supported_dq_types = {migraphx::shape::float_type,
@@ -119,6 +121,11 @@ struct parse_matmul : op_parser<parse_matmul>
             MIGRAPHX_THROW("PARSE_QUANT_DOT_SCALDED: Scales must be float or half_type");
         }
 
+        if(scale_arg->get_shape().scalar())
+        {
+            scale_arg = info.add_instruction(make_op("unsqueeze", {{"axes", {-1}}}), scale_arg);
+        }
+
         return scale_arg;
     }
 
@@ -264,10 +271,24 @@ struct parse_matmul : op_parser<parse_matmul>
 
         auto unsq_scale_a0 = info.add_instruction(make_op("unsqueeze", {{"axes", {-1}}}), scale_a0);
         if(not a0_has_no_zp)
+        {
             unsq_zp_a0 = info.add_instruction(make_op("unsqueeze", {{"axes", {-1}}}), zp_a0);
+            if(zp_a0->get_shape().scalar())
+            {
+                unsq_zp_a0 =
+                    info.add_instruction(make_op("unsqueeze", {{"axes", {-1}}}), unsq_zp_a0);
+            }
+        }
 
         if(not a1_has_no_zp)
+        {
             unsq_zp_a1 = info.add_instruction(make_op("unsqueeze", {{"axes", {-1}}}), zp_a1);
+            if(zp_a1->get_shape().scalar())
+            {
+                unsq_zp_a1 =
+                    info.add_instruction(make_op("unsqueeze", {{"axes", {-1}}}), unsq_zp_a1);
+            }
+        }
 
         auto dq_a0 = handle_dequantized(info, a0, unsq_scale_a0, unsq_zp_a0, a0_has_no_zp);
 
@@ -366,8 +387,8 @@ struct parse_matmul : op_parser<parse_matmul>
             {
                 a0_zp_index = 4;
                 a1_zp_index = 5;
-                scale_a0    = set_scale_arg(args, 2);
-                scale_a1    = set_scale_arg(args, 3);
+                scale_a0    = set_scale_arg(info, args, 2);
+                scale_a1    = set_scale_arg(info, args, 3);
                 if(scale_a0->get_shape().type() != scale_a1->get_shape().type())
                 {
                     MIGRAPHX_THROW("PARSE_MATMULINTEGERTOFLOAT: Scales must be the same type");
diff --git a/test/onnx/gen_onnx.py b/test/onnx/gen_onnx.py
index 57b02347579..42cd02e05b0 100644
--- a/test/onnx/gen_onnx.py
+++ b/test/onnx/gen_onnx.py
@@ -7509,6 +7509,42 @@ def matmulintegertofloat_zp_test():
     return ([node], [m1, m2, s1, s2, zp1, zp2], [y], [])
 
 
+@onnx_test()
+def matmulintegertofloat_scalar_zp_test():
+    m1 = helper.make_tensor_value_info('1', TensorProto.INT8, [4, 3])
+    m2 = helper.make_tensor_value_info('2', TensorProto.INT8, [3, 2])
+    s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT, [4])
+    s2 = helper.make_tensor_value_info('4', TensorProto.FLOAT, [2])
+    zp1 = helper.make_tensor_value_info('5', TensorProto.INT8, [4])
+    zp2 = helper.make_tensor('6', TensorProto.INT8, [], [129])
+    y = helper.make_tensor_value_info('y', TensorProto.INT32, [4, 2])
+
+    node = onnx.helper.make_node(
+        'MatMulIntegerToFloat',
+        inputs=['1', '2', '3', '4', '5', '6'],
+        outputs=['y'],
+    )
+
+    return ([node], [m1, m2, s1, s2, zp1], [y], [zp2])
+
+
+@onnx_test()
+def matmulintegertofloat_scalar_scale_test():
+    m1 = helper.make_tensor_value_info('1', TensorProto.INT8, [4, 3])
+    m2 = helper.make_tensor_value_info('2', TensorProto.INT8, [3, 2])
+    s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT, [4])
+    s2 = helper.make_tensor('4', TensorProto.FLOAT, [], [10])
+    y = helper.make_tensor_value_info('y', TensorProto.INT32, [4, 2])
+
+    node = onnx.helper.make_node(
+        'MatMulIntegerToFloat',
+        inputs=['1', '2', '3', '4'],
+        outputs=['y'],
+    )
+
+    return ([node], [m1, m2, s1], [y], [s2])
+
+
 @onnx_test()
 def matmulintegertofloat_zp_bias_test():
     m1 = helper.make_tensor_value_info('1', TensorProto.INT8, [4, 3])
@@ -7526,7 +7562,7 @@ def matmulintegertofloat_zp_bias_test():
         outputs=['y'],
     )
 
-    return ([node], [m1, m2, s1, s2], [y], [zp1, zp2, b1])
+    return ([node], [m1, m2, s1, s2, zp1, zp2, b1], [y], [])
 
 
 @onnx_test()
diff --git a/test/onnx/matmulintegertofloat_scalar_scale_test.onnx b/test/onnx/matmulintegertofloat_scalar_scale_test.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..0c7401ea7429c9db956e559bdc22c1795d18dfce
GIT binary patch
literal 221
zcmd<!6jIAgEXgg+$;>NBO;0T<$xqA4Pb`TqPEO28ECN%h@g=FnC01(~xm39r4Y?SN
zxEPJO7)^v2E0sii6H9zcb37q>L-O4~dWkhri(7zEiG_hd!O@A)BuW@)wh$i|4+pal
w2Nx3u3lKAd1dWgcnSp{#QT#yrg}AslI2eUMs#ubMsw%;%*x*{3oR|cJ0h0eQYybcN

literal 0
HcmV?d00001

diff --git a/test/onnx/matmulintegertofloat_scalar_zp_test.onnx b/test/onnx/matmulintegertofloat_scalar_zp_test.onnx
new file mode 100644
index 00000000000..4ae2e16da23
--- /dev/null
+++ b/test/onnx/matmulintegertofloat_scalar_zp_test.onnx
@@ -0,0 +1,32 @@
+	#matmulintegertofloat_scalar_zp_test:�
++
+1
+2
+3
+4
+5
+6y"MatMulIntegerToFloat#matmulintegertofloat_scalar_zp_test*	*�B6Z
+1
+
+
+Z
+2
+
+
+Z
+3
+
+
+Z
+4
+
+
+Z
+5
+
+
+b
+y
+
+
+B
\ No newline at end of file
diff --git a/test/onnx/parse/matmulintegertofloat_scalar_scale_test.cpp b/test/onnx/parse/matmulintegertofloat_scalar_scale_test.cpp
new file mode 100644
index 00000000000..b4c7be6ff11
--- /dev/null
+++ b/test/onnx/parse/matmulintegertofloat_scalar_scale_test.cpp
@@ -0,0 +1,59 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <onnx_test.hpp>
+
+TEST_CASE(matmulintegertofloat_scalar_scale_test)
+{
+    migraphx::program p;
+    auto* mm      = p.get_main_module();
+    auto scale_x1 = mm->add_literal(
+        migraphx::literal(migraphx::shape{migraphx::shape::float_type, {1}, {0}}, {10}));
+    auto x0       = mm->add_parameter("1", migraphx::shape{migraphx::shape::int8_type, {4, 3}});
+    auto x1       = mm->add_parameter("2", migraphx::shape{migraphx::shape::int8_type, {3, 2}});
+    auto scale_x0 = mm->add_parameter("3", migraphx::shape{migraphx::shape::float_type, {4}});
+
+    auto sq_scale_x1 =
+        mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {-1}}}), scale_x1);
+
+    auto sq_scale_x0 =
+        mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {-1}}}), scale_x0);
+    auto bc_scale_x0 = mm->add_instruction(
+        migraphx::make_op("multibroadcast", {{"out_lens", x0->get_shape().lens()}}), sq_scale_x0);
+    auto r0 = mm->add_instruction(migraphx::make_op("dequantizelinear"), x0, bc_scale_x0);
+
+    sq_scale_x1 = mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {0}}}), sq_scale_x1);
+
+    auto t_sq_scale_x1 =
+        mm->add_instruction(migraphx::make_op("transpose", {{"permutation", {0, 1}}}), sq_scale_x1);
+    auto bc_scale_x1 = mm->add_instruction(
+        migraphx::make_op("multibroadcast", {{"out_lens", x1->get_shape().lens()}}), t_sq_scale_x1);
+
+    auto r1 = mm->add_instruction(migraphx::make_op("dequantizelinear"), x1, bc_scale_x1);
+    mm->add_instruction(migraphx::make_op("dot"), r0, r1);
+
+    auto prog = optimize_onnx("matmulintegertofloat_scalar_scale_test.onnx");
+
+    EXPECT(p == prog);
+}
diff --git a/test/onnx/parse/matmulintegertofloat_scalar_zp_test.cpp b/test/onnx/parse/matmulintegertofloat_scalar_zp_test.cpp
new file mode 100644
index 00000000000..5ce4240950c
--- /dev/null
+++ b/test/onnx/parse/matmulintegertofloat_scalar_zp_test.cpp
@@ -0,0 +1,71 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <onnx_test.hpp>
+
+TEST_CASE(matmulintegertofloat_scalar_zp_test)
+{
+    migraphx::program p;
+    auto* mm   = p.get_main_module();
+    auto zp_x1 = mm->add_literal(
+        migraphx::literal(migraphx::shape{migraphx::shape::int8_type, {1}, {0}}, {129}));
+    auto x0       = mm->add_parameter("1", migraphx::shape{migraphx::shape::int8_type, {4, 3}});
+    auto x1       = mm->add_parameter("2", migraphx::shape{migraphx::shape::int8_type, {3, 2}});
+    auto scale_x0 = mm->add_parameter("3", migraphx::shape{migraphx::shape::float_type, {4}});
+    auto scale_x1 = mm->add_parameter("4", migraphx::shape{migraphx::shape::float_type, {2}});
+    auto zp_x0    = mm->add_parameter("5", migraphx::shape{migraphx::shape::int8_type, {4}});
+
+    auto sq_scale_x0 =
+        mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {-1}}}), scale_x0);
+    auto sq_zp_x0 = mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {-1}}}), zp_x0);
+    auto sq_zp_x1 = mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {-1}}}), zp_x1);
+    sq_zp_x1      = mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {-1}}}), sq_zp_x1);
+
+    auto bc_scale_x0 = mm->add_instruction(
+        migraphx::make_op("multibroadcast", {{"out_lens", x0->get_shape().lens()}}), sq_scale_x0);
+    auto bc_zp_x0 = mm->add_instruction(
+        migraphx::make_op("multibroadcast", {{"out_lens", x0->get_shape().lens()}}), sq_zp_x0);
+
+    auto r0 = mm->add_instruction(migraphx::make_op("dequantizelinear"), x0, bc_scale_x0, bc_zp_x0);
+
+    auto sq_scale_x1 =
+        mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {0}}}), scale_x1);
+
+    auto t_sq_scale_x1 =
+        mm->add_instruction(migraphx::make_op("transpose", {{"permutation", {0, 1}}}), sq_scale_x1);
+    auto t_sq_zp_x1 =
+        mm->add_instruction(migraphx::make_op("transpose", {{"permutation", {1, 0}}}), sq_zp_x1);
+    auto bc_scale_x1 = mm->add_instruction(
+        migraphx::make_op("multibroadcast", {{"out_lens", x1->get_shape().lens()}}), t_sq_scale_x1);
+
+    auto bc_zp_x1 = mm->add_instruction(
+        migraphx::make_op("multibroadcast", {{"out_lens", x1->get_shape().lens()}}), t_sq_zp_x1);
+
+    auto r1 = mm->add_instruction(migraphx::make_op("dequantizelinear"), x1, bc_scale_x1, bc_zp_x1);
+    mm->add_instruction(migraphx::make_op("dot"), r0, r1);
+
+    auto prog = optimize_onnx("matmulintegertofloat_scalar_zp_test.onnx");
+
+    EXPECT(p == prog);
+}

From c0c8120f47dc739573d7d01d9339aaeb6943e661 Mon Sep 17 00:00:00 2001
From: Ted Themistokleous <ted.themistokleous@amd.com>
Date: Thu, 17 Oct 2024 23:13:09 +0000
Subject: [PATCH 10/10] Add parser for bias with zero points

---
 test/onnx/gen_onnx.py                         |   6 +-
 .../matmulintegertofloat_zp_bias_test.onnx    | Bin 276 -> 275 bytes
 .../matmulintegertofloat_zp_bias_test.cpp     |  75 ++++++++++++++++++
 3 files changed, 78 insertions(+), 3 deletions(-)
 create mode 100644 test/onnx/parse/matmulintegertofloat_zp_bias_test.cpp

diff --git a/test/onnx/gen_onnx.py b/test/onnx/gen_onnx.py
index 42cd02e05b0..e5692e4e855 100644
--- a/test/onnx/gen_onnx.py
+++ b/test/onnx/gen_onnx.py
@@ -7551,9 +7551,9 @@ def matmulintegertofloat_zp_bias_test():
     m2 = helper.make_tensor_value_info('2', TensorProto.INT8, [3, 2])
     s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT, [4])
     s2 = helper.make_tensor_value_info('4', TensorProto.FLOAT, [2])
-    zp1 = helper.make_tensor('5', TensorProto.INT8, [], [0])
-    zp2 = helper.make_tensor('6', TensorProto.UINT8, [], [128])
-    b1 = helper.make_tensor('7', TensorProto.UINT8, [2], [128, -128])
+    zp1 = helper.make_tensor_value_info('5', TensorProto.INT8, [4])
+    zp2 = helper.make_tensor_value_info('6', TensorProto.INT8, [2])
+    b1 = helper.make_tensor_value_info('7', TensorProto.FLOAT, [2])
     y = helper.make_tensor_value_info('y', TensorProto.INT32, [4, 2])
 
     node = onnx.helper.make_node(
diff --git a/test/onnx/matmulintegertofloat_zp_bias_test.onnx b/test/onnx/matmulintegertofloat_zp_bias_test.onnx
index 07af336b78b5d1bb9faeacef568beb2a08ab6956..87d5aeab53789fff3e58b7c1072c3ad8dded9cf8 100644
GIT binary patch
delta 98
zcmbQjG?{6F8sqbc>LC-CYX|UiF&YbTadB`k3bAl8aj--I1x%0xm_P!iU;$=?3Ns`D
MunKdSmWkhG0n5M)0ssI2

delta 126
zcmbQtG=*t`8sm$J>LEH>90JT*j0{eUrdpf=Oj=A0j82SZTB00GKn_m>W5a(a01BH=
ytk!lC=3+D!;^X4sU=-ruV&Y%{V&*8Ipb3&7Gf<Ez2`E?z7G#5~WO8B>5C#C${1wXp

diff --git a/test/onnx/parse/matmulintegertofloat_zp_bias_test.cpp b/test/onnx/parse/matmulintegertofloat_zp_bias_test.cpp
new file mode 100644
index 00000000000..b60ad31a6f5
--- /dev/null
+++ b/test/onnx/parse/matmulintegertofloat_zp_bias_test.cpp
@@ -0,0 +1,75 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <onnx_test.hpp>
+
+TEST_CASE(matmulintegertofloat_zp_bias_test)
+{
+    migraphx::program p;
+    auto* mm      = p.get_main_module();
+    auto x0       = mm->add_parameter("1", migraphx::shape{migraphx::shape::int8_type, {4, 3}});
+    auto x1       = mm->add_parameter("2", migraphx::shape{migraphx::shape::int8_type, {3, 2}});
+    auto scale_x0 = mm->add_parameter("3", migraphx::shape{migraphx::shape::float_type, {4}});
+    auto scale_x1 = mm->add_parameter("4", migraphx::shape{migraphx::shape::float_type, {2}});
+    auto zp_x0    = mm->add_parameter("5", migraphx::shape{migraphx::shape::int8_type, {4}});
+    auto zp_x1    = mm->add_parameter("6", migraphx::shape{migraphx::shape::int8_type, {2}});
+    auto bias     = mm->add_parameter("7", migraphx::shape{migraphx::shape::float_type, {2}});
+
+    auto sq_scale_x0 =
+        mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {-1}}}), scale_x0);
+    auto sq_zp_x0 = mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {-1}}}), zp_x0);
+    auto sq_zp_x1 = mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {-1}}}), zp_x1);
+
+    auto bc_scale_x0 = mm->add_instruction(
+        migraphx::make_op("multibroadcast", {{"out_lens", x0->get_shape().lens()}}), sq_scale_x0);
+    auto bc_zp_x0 = mm->add_instruction(
+        migraphx::make_op("multibroadcast", {{"out_lens", x0->get_shape().lens()}}), sq_zp_x0);
+
+    auto r0 = mm->add_instruction(migraphx::make_op("dequantizelinear"), x0, bc_scale_x0, bc_zp_x0);
+
+    auto sq_scale_x1 =
+        mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {0}}}), scale_x1);
+
+    auto t_sq_scale_x1 =
+        mm->add_instruction(migraphx::make_op("transpose", {{"permutation", {0, 1}}}), sq_scale_x1);
+    auto t_sq_zp_x1 =
+        mm->add_instruction(migraphx::make_op("transpose", {{"permutation", {1, 0}}}), sq_zp_x1);
+    auto bc_scale_x1 = mm->add_instruction(
+        migraphx::make_op("multibroadcast", {{"out_lens", x1->get_shape().lens()}}), t_sq_scale_x1);
+
+    auto bc_zp_x1 = mm->add_instruction(
+        migraphx::make_op("multibroadcast", {{"out_lens", x1->get_shape().lens()}}), t_sq_zp_x1);
+
+    auto r1 = mm->add_instruction(migraphx::make_op("dequantizelinear"), x1, bc_scale_x1, bc_zp_x1);
+    auto dot = mm->add_instruction(migraphx::make_op("dot"), r0, r1);
+
+    auto mb_bias =
+        mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", {4, 2}}}), bias);
+
+    mm->add_instruction(migraphx::make_op("sub"), dot, mb_bias);
+
+    auto prog = optimize_onnx("matmulintegertofloat_zp_bias_test.onnx");
+
+    EXPECT(p == prog);
+}