From b19ce168dedc2c3a04b80832f1fcb5b82b6c75da Mon Sep 17 00:00:00 2001 From: Ted Themistokleous Date: Mon, 16 Sep 2024 02:31:13 +0000 Subject: [PATCH 01/10] Initial commit of parser changes to handle MatMulIntegerToFloat --- src/onnx/parse_matmul.cpp | 54 +++++++++++++++++++++++++++++++++++---- 1 file changed, 49 insertions(+), 5 deletions(-) diff --git a/src/onnx/parse_matmul.cpp b/src/onnx/parse_matmul.cpp index 9ded26a4b8f..410a8cbc6e9 100644 --- a/src/onnx/parse_matmul.cpp +++ b/src/onnx/parse_matmul.cpp @@ -35,7 +35,9 @@ struct parse_matmul : op_parser { std::vector operators() const { - return {{"MatMul", "dot"}, {"MatMulInteger", "quant_dot"}}; + return {{"MatMul", "dot"}, + {"MatMulInteger", "quant_dot"}, + {"MatMulIntegerToFloat", "quant_dot_scaled"}}; } static void broadcast_dimensions(const onnx_parser::node_info& info, @@ -106,6 +108,20 @@ struct parse_matmul : op_parser return all_zeros; } + static instruction_ref set_scale_arg(const std::vector& args, const int index) + { + instruction_ref scale_arg = args[index]; + std::set supported_dq_types = {migraphx::shape::float_type, + migraphx::shape::half_type}; + + if(not(contains(supported_dq_types, scale_arg->get_shape().type()))) + { + MIGRAPHX_THROW("PARSE_QUANT_DOT_SCALDED: Scales must be float or half_type"); + } + + return scale_arg; + } + static instruction_ref set_bias_arg(const std::vector& args, const int index, const instruction_ref& input, @@ -172,7 +188,8 @@ struct parse_matmul : op_parser a1 = info.add_instruction(make_op("unsqueeze", {{"axes", {1}}}), args[1]); } - auto is_quant_dot = opd.op_name == "quant_dot"; + auto is_quant_dot = opd.op_name == "quant_dot" or opd.op_name == "quant_dot_scaled"; + auto has_scales = opd.op_name == "quant_dot_scaled"; if(s0.dynamic() or s1.dynamic()) { if(is_quant_dot) @@ -207,8 +224,23 @@ struct parse_matmul : op_parser bool has_ba0 = false; bool has_ba1 = false; - instruction_ref ba0 = set_bias_arg(args, 2, a0, has_ba0); - instruction_ref ba1 = set_bias_arg(args, 3, a1, has_ba1); + + int a0_zp_index = 2; + int a1_zp_index = 3; + + instruction_ref scale_a0; + instruction_ref scale_a1; + // Handles case with for when scales are present in operator + if(has_scales) + { + a0_zp_index = 4; + a1_zp_index = 5; + scale_a0 = set_scale_arg(args, 2); + scale_a1 = set_scale_arg(args, 3); + } + + instruction_ref ba0 = set_bias_arg(args, a0_zp_index, a0, has_ba0); + instruction_ref ba1 = set_bias_arg(args, a1_zp_index, a1, has_ba1); // Only INT8 or UINT8 type currently supported std::set supported_types = {migraphx::shape::uint8_type, @@ -254,7 +286,19 @@ struct parse_matmul : op_parser broadcast_dimensions(info, s0_lens, s1_lens, a0, a1, ba0, ba1); - dot_res = info.add_instruction(make_op(opd.op_name), ba0, ba1); + // Apply the scale to dequantize input to then perform a simple dot + // after the zero points are applied otherwise get a int32 output from the quantized + // equivalent + if(has_scales) + { + auto dq_a0 = info.add_common_op("mul", ba0, scale_a0); + auto dq_a1 = info.add_common_op("mul", ba1, scale_a1); + dot_res = info.add_instruction(make_op("dot"), dq_a0, dq_a1); + } + else + { + dot_res = info.add_instruction(make_op(opd.op_name), ba0, ba1); + } } // squeeze the appended or prepended dimensions From ae9f722e17909d8c80223955762e8887e7c7c413 Mon Sep 17 00:00:00 2001 From: Ted Themistokleous Date: Mon, 16 Sep 2024 02:47:36 +0000 Subject: [PATCH 02/10] Update output to handle dot and broadcasted instead of mul --- src/onnx/parse_matmul.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/onnx/parse_matmul.cpp b/src/onnx/parse_matmul.cpp index 410a8cbc6e9..36844ad0960 100644 --- a/src/onnx/parse_matmul.cpp +++ b/src/onnx/parse_matmul.cpp @@ -217,7 +217,7 @@ struct parse_matmul : op_parser auto s0_lens = a0->get_shape().lens(); auto s1_lens = a1->get_shape().lens(); - if(not is_quant_dot and args.size() > 2) + if(not is_quant_dot and args.size() > 2 and not has_scales) { MIGRAPHX_THROW("PARSE_MATMUL: Bias Args not supported for MatMul"); } @@ -288,11 +288,12 @@ struct parse_matmul : op_parser // Apply the scale to dequantize input to then perform a simple dot // after the zero points are applied otherwise get a int32 output from the quantized - // equivalent + // equivalent. Ensure these are broadcasted accordingly before we perform a dot if(has_scales) { - auto dq_a0 = info.add_common_op("mul", ba0, scale_a0); - auto dq_a1 = info.add_common_op("mul", ba1, scale_a1); + broadcast_dimensions(info, s0_lens, s1_lens, a0, a1, scale_a0, scale_a1); + auto dq_a0 = info.add_instrution(make_op("dot"), ba0, scale_a0); + auto dq_a1 = info.add_instruction(make_op("dot"), ba1, scale_a1); dot_res = info.add_instruction(make_op("dot"), dq_a0, dq_a1); } else From 7f62a33c48a7b8284761beaabf9a5b825722e116 Mon Sep 17 00:00:00 2001 From: Ted Themistokleous Date: Mon, 16 Sep 2024 18:07:51 +0000 Subject: [PATCH 03/10] Fix typo --- src/onnx/parse_matmul.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/onnx/parse_matmul.cpp b/src/onnx/parse_matmul.cpp index 36844ad0960..828da02e32e 100644 --- a/src/onnx/parse_matmul.cpp +++ b/src/onnx/parse_matmul.cpp @@ -292,7 +292,7 @@ struct parse_matmul : op_parser if(has_scales) { broadcast_dimensions(info, s0_lens, s1_lens, a0, a1, scale_a0, scale_a1); - auto dq_a0 = info.add_instrution(make_op("dot"), ba0, scale_a0); + auto dq_a0 = info.add_instruction(make_op("dot"), ba0, scale_a0); auto dq_a1 = info.add_instruction(make_op("dot"), ba1, scale_a1); dot_res = info.add_instruction(make_op("dot"), dq_a0, dq_a1); } From 92d8ea47ac5f1a6259a841a38757ee44125bea2f Mon Sep 17 00:00:00 2001 From: Ted Themistokleous Date: Tue, 17 Sep 2024 03:30:38 +0000 Subject: [PATCH 04/10] Add parser tests and updated gen onnx Updated parser to handle bias case as well as bad scale conditions Initial float/half tests bad scale tests bad bias tests --- src/onnx/parse_matmul.cpp | 60 ++++- test/onnx/gen_onnx.py | 229 ++++++++++++++++++ .../matmulintegertofloat_bad_bias_test.onnx | Bin 0 -> 270 bytes .../matmulintegertofloat_bad_bias_test2.onnx | Bin 0 -> 274 bytes .../matmulintegertofloat_bad_bias_test3.onnx | Bin 0 -> 269 bytes .../matmulintegertofloat_bad_scale2_test.onnx | Bin 0 -> 256 bytes .../matmulintegertofloat_bad_scale3_test.onnx | Bin 0 -> 256 bytes .../matmulintegertofloat_bad_scale_test.onnx | Bin 0 -> 254 bytes test/onnx/matmulintegertofloat_half_test.onnx | 28 +++ ...atmulintegertofloat_half_zp_bias_test.onnx | Bin 0 -> 280 bytes .../matmulintegertofloat_half_zp_test.onnx | Bin 0 -> 250 bytes test/onnx/matmulintegertofloat_test.onnx | 26 ++ .../matmulintegertofloat_zp_bias_test.onnx | Bin 0 -> 276 bytes test/onnx/matmulintegertofloat_zp_test.onnx | Bin 0 -> 240 bytes .../matmulintegerToFloat_bad_bias_test.cpp | 30 +++ .../matmulintegerToFloat_bad_bias_test2.cpp | 30 +++ .../matmulintegerToFloat_bad_bias_test3.cpp | 30 +++ .../matmulintegerToFloat_bad_scale2_test.cpp | 31 +++ .../matmulintegerToFloat_bad_scale3_test.cpp | 31 +++ .../matmulintegerToFloat_bad_scale_test.cpp | 30 +++ .../parse/matmulintegertofloat_half_test.cpp | 47 ++++ test/onnx/parse/matmulintegertofloat_test.cpp | 47 ++++ 22 files changed, 617 insertions(+), 2 deletions(-) create mode 100644 test/onnx/matmulintegertofloat_bad_bias_test.onnx create mode 100644 test/onnx/matmulintegertofloat_bad_bias_test2.onnx create mode 100644 test/onnx/matmulintegertofloat_bad_bias_test3.onnx create mode 100644 test/onnx/matmulintegertofloat_bad_scale2_test.onnx create mode 100644 test/onnx/matmulintegertofloat_bad_scale3_test.onnx create mode 100644 test/onnx/matmulintegertofloat_bad_scale_test.onnx create mode 100644 test/onnx/matmulintegertofloat_half_test.onnx create mode 100644 test/onnx/matmulintegertofloat_half_zp_bias_test.onnx create mode 100644 test/onnx/matmulintegertofloat_half_zp_test.onnx create mode 100644 test/onnx/matmulintegertofloat_test.onnx create mode 100644 test/onnx/matmulintegertofloat_zp_bias_test.onnx create mode 100644 test/onnx/matmulintegertofloat_zp_test.onnx create mode 100644 test/onnx/parse/matmulintegerToFloat_bad_bias_test.cpp create mode 100644 test/onnx/parse/matmulintegerToFloat_bad_bias_test2.cpp create mode 100644 test/onnx/parse/matmulintegerToFloat_bad_bias_test3.cpp create mode 100644 test/onnx/parse/matmulintegerToFloat_bad_scale2_test.cpp create mode 100644 test/onnx/parse/matmulintegerToFloat_bad_scale3_test.cpp create mode 100644 test/onnx/parse/matmulintegerToFloat_bad_scale_test.cpp create mode 100644 test/onnx/parse/matmulintegertofloat_half_test.cpp create mode 100644 test/onnx/parse/matmulintegertofloat_test.cpp diff --git a/src/onnx/parse_matmul.cpp b/src/onnx/parse_matmul.cpp index 828da02e32e..af6ea509cef 100644 --- a/src/onnx/parse_matmul.cpp +++ b/src/onnx/parse_matmul.cpp @@ -122,6 +122,41 @@ struct parse_matmul : op_parser return scale_arg; } + static instruction_ref set_scale_bias(const std::vector& args, + const int index, + const migraphx::shape& scale_arg_shape, + const instruction_ref& compare_arg, + bool& has_valid_scale_bias) + { + has_valid_scale_bias = false; + + if(args.size() > index) + { + instruction_ref scale_bias_arg = args[index]; + std::set supported_dq_types = {migraphx::shape::float_type, + migraphx::shape::half_type}; + + if(not(contains(supported_dq_types, scale_bias_arg->get_shape().type()))) + { + MIGRAPHX_THROW("PARSE_QUANT_DOT_SCALDED: Bias must be float or half_type"); + } + + if(scale_bias_arg->get_shape().type() != scale_arg_shape.type()) + { + MIGRAPHX_THROW("PARSE_QUANT_DOT_SCALED: Bias must be the same type as scales"); + } + + if(scale_bias_arg->get_shape().lens().at(0) != compare_arg->get_shape().lens().at(1)) + { + MIGRAPHX_THROW("PARSE_QUANT_DOT_SCALED: Bias have same dim as matrix B column"); + } + + has_valid_scale_bias = true; + return scale_bias_arg; + } + return compare_arg; + } + static instruction_ref set_bias_arg(const std::vector& args, const int index, const instruction_ref& input, @@ -224,6 +259,7 @@ struct parse_matmul : op_parser bool has_ba0 = false; bool has_ba1 = false; + bool has_scale_bias = false; int a0_zp_index = 2; int a1_zp_index = 3; @@ -237,11 +273,22 @@ struct parse_matmul : op_parser a1_zp_index = 5; scale_a0 = set_scale_arg(args, 2); scale_a1 = set_scale_arg(args, 3); + if(scale_a0->get_shape().type() != scale_a1->get_shape().type()) + { + MIGRAPHX_THROW("PARSE_MATMULINTEGERTOFLOAT: Scales must be the same type"); + } } instruction_ref ba0 = set_bias_arg(args, a0_zp_index, a0, has_ba0); instruction_ref ba1 = set_bias_arg(args, a1_zp_index, a1, has_ba1); + // handle optional bias arg to the result + instruction_ref scaled_bias; + if(has_scales) + { + scaled_bias = set_scale_bias(args, 6, scale_a1->get_shape(), a1, has_scale_bias); + } + // Only INT8 or UINT8 type currently supported std::set supported_types = {migraphx::shape::uint8_type, migraphx::shape::int8_type}; @@ -292,9 +339,18 @@ struct parse_matmul : op_parser if(has_scales) { broadcast_dimensions(info, s0_lens, s1_lens, a0, a1, scale_a0, scale_a1); - auto dq_a0 = info.add_instruction(make_op("dot"), ba0, scale_a0); - auto dq_a1 = info.add_instruction(make_op("dot"), ba1, scale_a1); + // Convert if we're half types as dot will scream if we try to multipy half int8 + ba0 = info.add_instruction( + make_op("convert", {{"target_type", scale_a0->get_shape().type()}}), ba0); + ba1 = info.add_instruction( + make_op("convert", {{"target_type", scale_a1->get_shape().type()}}), ba1); + auto dq_a0 = info.add_instruction(make_op("mul"), ba0, scale_a0); + auto dq_a1 = info.add_instruction(make_op("mul"), ba1, scale_a1); dot_res = info.add_instruction(make_op("dot"), dq_a0, dq_a1); + + // Handle case of the bias after scaling + if(has_scale_bias) + dot_res = info.add_common_op("sub", dot_res, scaled_bias); } else { diff --git a/test/onnx/gen_onnx.py b/test/onnx/gen_onnx.py index eeb459630a2..1a2ea05ea7b 100644 --- a/test/onnx/gen_onnx.py +++ b/test/onnx/gen_onnx.py @@ -6833,6 +6833,235 @@ def matmulinteger_int8_uint8_dual_zero_zp_test(): return ([node], [m1, m2], [y], [zp1, zp2]) +@onnx_test() +def matmulintegertofloat_test(): + m1 = helper.make_tensor_value_info('1', TensorProto.INT8, [4, 3]) + m2 = helper.make_tensor_value_info('2', TensorProto.INT8, [3, 2]) + s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT, [4, 3]) + s2 = helper.make_tensor_value_info('4', TensorProto.FLOAT, [3, 2]) + y = helper.make_tensor_value_info('y', TensorProto.INT32, [4, 2]) + + node = onnx.helper.make_node( + 'MatMulIntegerToFloat', + inputs=['1', '2', '3', '4'], + outputs=['y'], + ) + + return ([node], [m1, m2, s1, s2], [y], []) + + +@onnx_test() +def matmulintegertofloat_zp_test(): + m1 = helper.make_tensor_value_info('1', TensorProto.INT8, [4, 3]) + m2 = helper.make_tensor_value_info('2', TensorProto.INT8, [3, 2]) + s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT, [4, 3]) + s2 = helper.make_tensor_value_info('4', TensorProto.FLOAT, [3, 2]) + zp1 = helper.make_tensor('5', TensorProto.INT8, [], [0]) + zp2 = helper.make_tensor('6', TensorProto.UINT8, [], [128]) + y = helper.make_tensor_value_info('y', TensorProto.INT32, [4, 2]) + + node = onnx.helper.make_node( + 'MatMulIntegerToFloat', + inputs=['1', '2', '3', '4', '5', '6'], + outputs=['y'], + ) + + return ([node], [m1, m2, s1, s2], [y], [zp1, zp2]) + + +@onnx_test() +def matmulintegertofloat_zp_bias_test(): + m1 = helper.make_tensor_value_info('1', TensorProto.INT8, [4, 3]) + m2 = helper.make_tensor_value_info('2', TensorProto.INT8, [3, 2]) + s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT, [4, 3]) + s2 = helper.make_tensor_value_info('4', TensorProto.FLOAT, [3, 2]) + zp1 = helper.make_tensor('5', TensorProto.INT8, [], [0]) + zp2 = helper.make_tensor('6', TensorProto.UINT8, [], [128]) + b1 = helper.make_tensor('7', TensorProto.UINT8, [2], [128, -128]) + y = helper.make_tensor_value_info('y', TensorProto.INT32, [4, 2]) + + node = onnx.helper.make_node( + 'MatMulIntegerToFloat', + inputs=['1', '2', '3', '4', '5', '6', '7'], + outputs=['y'], + ) + + return ([node], [m1, m2, s1, s2], [y], [zp1, zp2, b1]) + + +@onnx_test() +def matmulintegertofloat_bad_scale_test(): + m1 = helper.make_tensor_value_info('1', TensorProto.INT8, [4, 3]) + m2 = helper.make_tensor_value_info('2', TensorProto.INT8, [3, 2]) + s1 = helper.make_tensor_value_info('3', TensorProto.INT8, [4, 3]) + s2 = helper.make_tensor_value_info('4', TensorProto.FLOAT16, [3, 2]) + zp1 = helper.make_tensor('5', TensorProto.INT8, [], [0]) + zp2 = helper.make_tensor('6', TensorProto.UINT8, [], [128]) + y = helper.make_tensor_value_info('y', TensorProto.INT32, [4, 2]) + + node = onnx.helper.make_node( + 'MatMulIntegerToFloat', + inputs=['1', '2', '3', '4', '5', '6'], + outputs=['y'], + ) + + return ([node], [m1, m2, s1, s2], [y], [zp1, zp2]) + + +@onnx_test() +def matmulintegertofloat_bad_scale2_test(): + m1 = helper.make_tensor_value_info('1', TensorProto.INT8, [4, 3]) + m2 = helper.make_tensor_value_info('2', TensorProto.INT8, [3, 2]) + s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT16, [4, 3]) + s2 = helper.make_tensor_value_info('4', TensorProto.INT8, [3, 2]) + zp1 = helper.make_tensor('5', TensorProto.INT8, [], [0]) + zp2 = helper.make_tensor('6', TensorProto.UINT8, [], [128]) + y = helper.make_tensor_value_info('y', TensorProto.INT32, [4, 2]) + + node = onnx.helper.make_node( + 'MatMulIntegerToFloat', + inputs=['1', '2', '3', '4', '5', '6'], + outputs=['y'], + ) + + return ([node], [m1, m2, s1, s2], [y], [zp1, zp2]) + + +@onnx_test() +def matmulintegertofloat_bad_scale3_test(): + m1 = helper.make_tensor_value_info('1', TensorProto.INT8, [4, 3]) + m2 = helper.make_tensor_value_info('2', TensorProto.UINT8, [3, 2]) + s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT16, [4, 3]) + s2 = helper.make_tensor_value_info('4', TensorProto.FLOAT, [3, 2]) + zp1 = helper.make_tensor('5', TensorProto.INT8, [], [0]) + zp2 = helper.make_tensor('6', TensorProto.UINT8, [], [128]) + y = helper.make_tensor_value_info('y', TensorProto.INT32, [4, 2]) + + node = onnx.helper.make_node( + 'MatMulIntegerToFloat', + inputs=['1', '2', '3', '4', '5', '6'], + outputs=['y'], + ) + + return ([node], [m1, m2, s1, s2], [y], [zp1, zp2]) + + +@onnx_test() +def matmulintegertofloat_bad_bias_test(): + m1 = helper.make_tensor_value_info('1', TensorProto.INT8, [4, 3]) + m2 = helper.make_tensor_value_info('2', TensorProto.UINT8, [3, 2]) + s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT, [4, 3]) + s2 = helper.make_tensor_value_info('4', TensorProto.FLOAT, [3, 2]) + zp1 = helper.make_tensor('5', TensorProto.INT8, [], [0]) + zp2 = helper.make_tensor('6', TensorProto.UINT8, [], [128]) + b1 = helper.make_tensor('7', TensorProto.UINT8, [2], [128, 128]) + y = helper.make_tensor_value_info('y', TensorProto.INT32, [4, 2]) + + node = onnx.helper.make_node( + 'MatMulIntegerToFloat', + inputs=['1', '2', '3', '4', '5', '6', '7'], + outputs=['y'], + ) + + return ([node], [m1, m2, s1, s2], [y], [zp1, zp2, b1]) + + +@onnx_test() +def matmulintegertofloat_bad_bias_test2(): + m1 = helper.make_tensor_value_info('1', TensorProto.INT8, [4, 3]) + m2 = helper.make_tensor_value_info('2', TensorProto.UINT8, [3, 2]) + s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT, [4, 3]) + s2 = helper.make_tensor_value_info('4', TensorProto.FLOAT, [3, 2]) + zp1 = helper.make_tensor('5', TensorProto.INT8, [], [0]) + zp2 = helper.make_tensor('6', TensorProto.UINT8, [], [128]) + b1 = helper.make_tensor('7', TensorProto.FLOAT16, [2], [128, -128]) + y = helper.make_tensor_value_info('y', TensorProto.INT32, [4, 2]) + + node = onnx.helper.make_node( + 'MatMulIntegerToFloat', + inputs=['1', '2', '3', '4', '5', '6', '7'], + outputs=['y'], + ) + + return ([node], [m1, m2, s1, s2], [y], [zp1, zp2, b1]) + + +@onnx_test() +def matmulintegertofloat_bad_bias_test3(): + m1 = helper.make_tensor_value_info('1', TensorProto.INT8, [4, 3]) + m2 = helper.make_tensor_value_info('2', TensorProto.UINT8, [3, 2]) + s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT16, [4, 3]) + s2 = helper.make_tensor_value_info('4', TensorProto.FLOAT, [3, 2]) + zp1 = helper.make_tensor('5', TensorProto.INT8, [], [0]) + zp2 = helper.make_tensor('6', TensorProto.UINT8, [], [128]) + b1 = helper.make_tensor('7', TensorProto.FLOAT16, [], [128]) + y = helper.make_tensor_value_info('y', TensorProto.INT32, [4, 2]) + + node = onnx.helper.make_node( + 'MatMulIntegerToFloat', + inputs=['1', '2', '3', '4', '5', '6', '7'], + outputs=['y'], + ) + + return ([node], [m1, m2, s1, s2], [y], [zp1, zp2, b1]) + + +@onnx_test() +def matmulintegertofloat_half_test(): + m1 = helper.make_tensor_value_info('1', TensorProto.INT8, [4, 3]) + m2 = helper.make_tensor_value_info('2', TensorProto.UINT8, [3, 2]) + s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT16, [4, 3]) + s2 = helper.make_tensor_value_info('4', TensorProto.FLOAT16, [3, 2]) + y = helper.make_tensor_value_info('y', TensorProto.INT32, [4, 2]) + + node = onnx.helper.make_node( + 'MatMulIntegerToFloat', + inputs=['1', '2', '3', '4'], + outputs=['y'], + ) + + return ([node], [m1, m2, s1, s2], [y], []) + + +@onnx_test() +def matmulintegertofloat_half_zp_test(): + m1 = helper.make_tensor_value_info('1', TensorProto.INT8, [4, 3]) + m2 = helper.make_tensor_value_info('2', TensorProto.UINT8, [3, 2]) + s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT16, [4, 3]) + s2 = helper.make_tensor_value_info('4', TensorProto.FLOAT16, [3, 2]) + zp1 = helper.make_tensor('5', TensorProto.INT8, [], [0]) + zp2 = helper.make_tensor('6', TensorProto.UINT8, [], [128]) + y = helper.make_tensor_value_info('y', TensorProto.INT32, [4, 2]) + + node = onnx.helper.make_node( + 'MatMulIntegerToFloat', + inputs=['1', '2', '3', '4', '5', '6'], + outputs=['y'], + ) + + return ([node], [m1, m2, s1, s2], [y], [zp1, zp2]) + + +@onnx_test() +def matmulintegertofloat_half_zp_bias_test(): + m1 = helper.make_tensor_value_info('1', TensorProto.INT8, [4, 3]) + m2 = helper.make_tensor_value_info('2', TensorProto.UINT8, [3, 2]) + s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT16, [4, 3]) + s2 = helper.make_tensor_value_info('4', TensorProto.FLOAT16, [3, 2]) + zp1 = helper.make_tensor('5', TensorProto.INT8, [], [0]) + zp2 = helper.make_tensor('6', TensorProto.UINT8, [], [128]) + b1 = helper.make_tensor('7', TensorProto.FLOAT16, [2], [128, -128]) + y = helper.make_tensor_value_info('y', TensorProto.INT32, [4, 2]) + + node = onnx.helper.make_node( + 'MatMulIntegerToFloat', + inputs=['1', '2', '3', '4', '5', '6', '7'], + outputs=['y'], + ) + + return ([node], [m1, m2, s1, s2], [y], [zp1, zp2, b1]) + + @onnx_test() def max_test(): a = helper.make_tensor_value_info('0', TensorProto.FLOAT, [3]) diff --git a/test/onnx/matmulintegertofloat_bad_bias_test.onnx b/test/onnx/matmulintegertofloat_bad_bias_test.onnx new file mode 100644 index 0000000000000000000000000000000000000000..f733a4642b784b0e8c6b0824550eb4aa611be7bd GIT binary patch literal 270 zcma*gy$ZrG6a`>!QY5{ol+-8am_e|=;MhTM=;q{7Qmj&FE!a48>#O=ECJ{t8hdZ2) z!@11mK_^xBXq%g=uk~H^m$sLxtmL_@8ab4z9@Op$xIx$=ga{ErjF2Ft2pNYlSQkSpWb4 literal 0 HcmV?d00001 diff --git a/test/onnx/matmulintegertofloat_bad_bias_test2.onnx b/test/onnx/matmulintegertofloat_bad_bias_test2.onnx new file mode 100644 index 0000000000000000000000000000000000000000..cd0379b482f4e5fd2ff2802235695eeccd75de6a GIT binary patch literal 274 zcma*gy$ZrG6oz3>A|*YDmeecIErVcx!Lftj(9OxEq*$fUTCj2G)=TO|c_F3|L^p@C zJiL5|*?e6~RX?ioriyEESIwnrq{?!6&hk>WnJQZ4?Vf-ege`)H;3EVGAwq-@bLdvf zRI2n*9o`GaWt6%du&KTtk37hJp eBYFmjL8_6A=o^IJGWbGM=DO?*TAz|6#BpC)Yc((c literal 0 HcmV?d00001 diff --git a/test/onnx/matmulintegertofloat_bad_bias_test3.onnx b/test/onnx/matmulintegertofloat_bad_bias_test3.onnx new file mode 100644 index 0000000000000000000000000000000000000000..67a095396127bece30f5936b121de61d37a209b8 GIT binary patch literal 269 zcma*gy$ZrG6oz3>B9b0NLh2RhmO-$;;MhTM=;q{7Qmj&FE!cGE)=Tn^ycN?3ikm~= z@bL15IbvNaU3XP^)5W#8>*i85N@uw`XL+gGOc$;8cTd0#!WO|p@DT!p5FtW{1@xLSbU2Y@fCM6m93>qd=8jLU`Bf}%&ZuBW*wj2aqGyp< aqz1`|zD4{k1B>u2b5r&fXP=TJgu5^4uru)h literal 0 HcmV?d00001 diff --git a/test/onnx/matmulintegertofloat_bad_scale2_test.onnx b/test/onnx/matmulintegertofloat_bad_scale2_test.onnx new file mode 100644 index 0000000000000000000000000000000000000000..d900df4348b0688b5567f0f7c802fefdc0532e9e GIT binary patch literal 256 zcmdNBO;0T<$xqA4Pb`T~N=%6_PEO28HHt4uEiSRT$jGJ5#c0UI zXvD>6%*ANJ#c0aKXePv1sU+f?SmIln;|b9plJ5r6Pn@Y*90JT*j0{eUrdpf=Oj=A0 zj82SZQNlobg!s64IGBYvxR^LtfS4I1Xap8y0t+$&1(`sC#$Z7%RFx)3+QBN5fGR7& Nf^2Y=OioM!!T>)IGE4vf literal 0 HcmV?d00001 diff --git a/test/onnx/matmulintegertofloat_bad_scale3_test.onnx b/test/onnx/matmulintegertofloat_bad_scale3_test.onnx new file mode 100644 index 0000000000000000000000000000000000000000..10264b6f7404f79cc735a2f6ac098b5c57003845 GIT binary patch literal 256 zcmdNBO;0T<$xqA4Pb`T~N=%6_PEO28HI6SyEiSRT$jGJ5#c0UI zXvD>6%*ANJ#c0aKXePv1sU+f?SmIln;|b9plJ5r6Pn@Y*90JT*j0{eUrdpf=Oj=A0 zj82SZQNlobg!s64IGBYvxR^LtfS4I1Xap8y0t+$&1(`sC#$Z7%RFx)RK}LkiB%sPl Oupk>;C6g1AfG_|+E;392 literal 0 HcmV?d00001 diff --git a/test/onnx/matmulintegertofloat_bad_scale_test.onnx b/test/onnx/matmulintegertofloat_bad_scale_test.onnx new file mode 100644 index 0000000000000000000000000000000000000000..7059938598089bbbd64a84b75b841d16efacd342 GIT binary patch literal 254 zcmdNBO;0T<$xqA4Pb`T~N=%6_PEO28jW0I3DF*s?*`INl%ZN20?b;B3{H%uTATt*T1*X$ zPK;(z!a!?;__%mDn1wjFm^fH~m>DE!1Quih3o-)*nLvWZ*i@Q;1-TF^lYlBK!GdgX KGnt&21cU*?sWIgM literal 0 HcmV?d00001 diff --git a/test/onnx/matmulintegertofloat_half_test.onnx b/test/onnx/matmulintegertofloat_half_test.onnx new file mode 100644 index 00000000000..bd7f74cb641 --- /dev/null +++ b/test/onnx/matmulintegertofloat_half_test.onnx @@ -0,0 +1,28 @@ + matmulintegertofloat_half_test: +% +1 +2 +3 +4y"MatMulIntegerToFloatmatmulintegertofloat_half_testZ +1 +  + +Z +2 +  + +Z +3 +  + + +Z +4 +  + + +b +y +  + +B \ No newline at end of file diff --git a/test/onnx/matmulintegertofloat_half_zp_bias_test.onnx b/test/onnx/matmulintegertofloat_half_zp_bias_test.onnx new file mode 100644 index 0000000000000000000000000000000000000000..0b4ffefe0fdccc865aad7b90c37cd1d4b13f0df7 GIT binary patch literal 280 zcma*gzY2m-6vy$-y&=~_lxR=T&~S$!|3aff(BRh8B3IB7G$XHrO+J!u)oY|Th}s&? z^5M(xFx_02LY7UDUuAku@3P8@O33&kiY$KI;v^S!EYn)9fkN?x?iZuWGGvO+NUHTF{~G6E;wNT literal 0 HcmV?d00001 diff --git a/test/onnx/matmulintegertofloat_half_zp_test.onnx b/test/onnx/matmulintegertofloat_half_zp_test.onnx new file mode 100644 index 0000000000000000000000000000000000000000..10e47d781e30fdd37aea97f69707b96b6c748285 GIT binary patch literal 250 zcmdNBO;0T<$xqA4Pb`VgNX$u#uPTTyNi8n1I>*SR&BbWQ#c0IE zXw1cE!o_IH#b_qPSg9o9n^@von&S!49Fp$_(oBSrS{wq*T8s=%jHX(g0!&&=4UA5V zW>LaGON98icsQ7aIJlTNSb&%rBxr;r$P5%@0tp&}1-VdFnji^+RVD#dR)PiD;3}D% Hm;{6Ya2qg_ literal 0 HcmV?d00001 diff --git a/test/onnx/matmulintegertofloat_test.onnx b/test/onnx/matmulintegertofloat_test.onnx new file mode 100644 index 00000000000..1eb839d3baa --- /dev/null +++ b/test/onnx/matmulintegertofloat_test.onnx @@ -0,0 +1,26 @@ + matmulintegertofloat_test: +% +1 +2 +3 +4y"MatMulIntegerToFloatmatmulintegertofloat_testZ +1 +  + +Z +2 +  + +Z +3 +  + +Z +4 +  + +b +y +  + +B \ No newline at end of file diff --git a/test/onnx/matmulintegertofloat_zp_bias_test.onnx b/test/onnx/matmulintegertofloat_zp_bias_test.onnx new file mode 100644 index 0000000000000000000000000000000000000000..07af336b78b5d1bb9faeacef568beb2a08ab6956 GIT binary patch literal 276 zcmdNBO;0T<$xqA4Pb`VADu_?YOe~HsNi8n1dcnx0$Hi#K#c0IE zXw1cE!o_IH#c0OGXfDK9sU+f?SmIln;|b9plJ5r6PlTyj90JT*j0{eUrdpf=Oj=A0 zj82SZTB00GKn_m>W5a(a01BH&2?MPc;^X4sU>4%wV&Y%{VrGz_5t1M?P>=~EXbcu) aL{({mBnVcS1XNiG7G#5~WO8B>5C#BTr8?~Z literal 0 HcmV?d00001 diff --git a/test/onnx/matmulintegertofloat_zp_test.onnx b/test/onnx/matmulintegertofloat_zp_test.onnx new file mode 100644 index 0000000000000000000000000000000000000000..7d2b46c74ee1bd91b0d8a6023a59eece01163b31 GIT binary patch literal 240 zcmdNBO;0T<$xqA4Pb`VADu^#hEiSP-$;hS6#c0UIXvD>6%*ANJ z#c0aKXePv1sU+f?SmIln;|b9hlJ5r6hu<_U4gqE@Mg}KFQ!P#bCM~80MkhwIC}E)a zLVR329Lz!-TudA+K+FshG(r+&1`0BP1dYLhjHoJ2kOaXhlYlBK!GdgXl}t`d0>S{p CO)P!@ literal 0 HcmV?d00001 diff --git a/test/onnx/parse/matmulintegerToFloat_bad_bias_test.cpp b/test/onnx/parse/matmulintegerToFloat_bad_bias_test.cpp new file mode 100644 index 00000000000..9527bf8fd03 --- /dev/null +++ b/test/onnx/parse/matmulintegerToFloat_bad_bias_test.cpp @@ -0,0 +1,30 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include + +TEST_CASE(matmulintegertofloat_bad_boas_test) +{ + EXPECT(test::throws([&] { migraphx::parse_onnx("matmulintegertofloat_bad_bias_test.onnx"); })); +} diff --git a/test/onnx/parse/matmulintegerToFloat_bad_bias_test2.cpp b/test/onnx/parse/matmulintegerToFloat_bad_bias_test2.cpp new file mode 100644 index 00000000000..c7fe8161666 --- /dev/null +++ b/test/onnx/parse/matmulintegerToFloat_bad_bias_test2.cpp @@ -0,0 +1,30 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include + +TEST_CASE(matmulintegertofloat_bad_bias_test2) +{ + EXPECT(test::throws([&] { migraphx::parse_onnx("matmulintegertofloat_bad_boas_test2.onnx"); })); +} diff --git a/test/onnx/parse/matmulintegerToFloat_bad_bias_test3.cpp b/test/onnx/parse/matmulintegerToFloat_bad_bias_test3.cpp new file mode 100644 index 00000000000..becd4dba1a8 --- /dev/null +++ b/test/onnx/parse/matmulintegerToFloat_bad_bias_test3.cpp @@ -0,0 +1,30 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include + +TEST_CASE(matmulintegertofloat_bad_bias_test) +{ + EXPECT(test::throws([&] { migraphx::parse_onnx("matmulintegertofloat_bad_bias_test3.onnx"); })); +} diff --git a/test/onnx/parse/matmulintegerToFloat_bad_scale2_test.cpp b/test/onnx/parse/matmulintegerToFloat_bad_scale2_test.cpp new file mode 100644 index 00000000000..f8505c7af7e --- /dev/null +++ b/test/onnx/parse/matmulintegerToFloat_bad_scale2_test.cpp @@ -0,0 +1,31 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include + +TEST_CASE(matmulintegertofloat_bad_scale2_test) +{ + EXPECT( + test::throws([&] { migraphx::parse_onnx("matmulintegertofloat_bad_scale3_test.onnx"); })); +} diff --git a/test/onnx/parse/matmulintegerToFloat_bad_scale3_test.cpp b/test/onnx/parse/matmulintegerToFloat_bad_scale3_test.cpp new file mode 100644 index 00000000000..f286e875322 --- /dev/null +++ b/test/onnx/parse/matmulintegerToFloat_bad_scale3_test.cpp @@ -0,0 +1,31 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include + +TEST_CASE(matmulintegertofloat_bad_scale3_test) +{ + EXPECT( + test::throws([&] { migraphx::parse_onnx("matmulintegertofloat_bad_scale3_test.onnx"); })); +} diff --git a/test/onnx/parse/matmulintegerToFloat_bad_scale_test.cpp b/test/onnx/parse/matmulintegerToFloat_bad_scale_test.cpp new file mode 100644 index 00000000000..46663319192 --- /dev/null +++ b/test/onnx/parse/matmulintegerToFloat_bad_scale_test.cpp @@ -0,0 +1,30 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include + +TEST_CASE(matmulintegertofloat_bad_scale_test) +{ + EXPECT(test::throws([&] { migraphx::parse_onnx("matmulintegertofloat_bad_scale_test.onnx"); })); +} diff --git a/test/onnx/parse/matmulintegertofloat_half_test.cpp b/test/onnx/parse/matmulintegertofloat_half_test.cpp new file mode 100644 index 00000000000..a280aec93d5 --- /dev/null +++ b/test/onnx/parse/matmulintegertofloat_half_test.cpp @@ -0,0 +1,47 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include + +TEST_CASE(matmulintegertofloat_half_test) +{ + migraphx::program p; + auto* mm = p.get_main_module(); + auto l0 = mm->add_parameter("1", migraphx::shape{migraphx::shape::int8_type, {3, 6, 16}}); + auto l1 = mm->add_parameter("2", migraphx::shape{migraphx::shape::int8_type, {3, 16, 8}}); + auto s0 = mm->add_parameter("1", migraphx::shape{migraphx::shape::half_type, {3, 6, 16}}); + auto s1 = mm->add_parameter("2", migraphx::shape{migraphx::shape::half_type, {3, 16, 8}}); + s0 = mm->add_instruction( + migraphx::make_op("convert", {{"target_type", migraphx::shape::float_type}}), s0); + s1 = mm->add_instruction( + migraphx::make_op("convert", {{"target_type", migraphx::shape::float_type}}), s1); + + auto r0 = mm->add_instruction(migraphx::make_op("dot"), l0, s0); + auto r1 = mm->add_instruction(migraphx::make_op("dot"), l1, s1); + mm->add_instruction(migraphx::make_op("dot"), r0, r1); + + auto prog = optimize_onnx("matmulintegertofloat_half_test.onnx"); + + EXPECT(p == prog); +} diff --git a/test/onnx/parse/matmulintegertofloat_test.cpp b/test/onnx/parse/matmulintegertofloat_test.cpp new file mode 100644 index 00000000000..6c77656006c --- /dev/null +++ b/test/onnx/parse/matmulintegertofloat_test.cpp @@ -0,0 +1,47 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include + +TEST_CASE(matmulintegertofloat_test) +{ + migraphx::program p; + auto* mm = p.get_main_module(); + auto l0 = mm->add_parameter("1", migraphx::shape{migraphx::shape::int8_type, {3, 6, 16}}); + auto l1 = mm->add_parameter("2", migraphx::shape{migraphx::shape::int8_type, {3, 16, 8}}); + auto s0 = mm->add_parameter("1", migraphx::shape{migraphx::shape::half_type, {3, 6, 16}}); + auto s1 = mm->add_parameter("2", migraphx::shape{migraphx::shape::half_type, {3, 16, 8}}); + s0 = mm->add_instruction( + migraphx::make_op("convert", {{"target_type", migraphx::shape::float_type}}), s0); + s1 = mm->add_instruction( + migraphx::make_op("convert", {{"target_type", migraphx::shape::float_type}}), s1); + + auto r0 = mm->add_instruction(migraphx::make_op("dot"), l0, s0); + auto r1 = mm->add_instruction(migraphx::make_op("dot"), l1, s1); + mm->add_instruction(migraphx::make_op("dot"), r0, r1); + + auto prog = optimize_onnx("matmulintegertofloat_test.onnx"); + + EXPECT(p == prog); +} From cdb307d2ddada5511b84b03f7ec7d9017fbd22f0 Mon Sep 17 00:00:00 2001 From: Ted Themistokleous Date: Tue, 17 Sep 2024 15:48:40 +0000 Subject: [PATCH 05/10] Handle scaled output result better avoid tidy screaming about complexity --- src/onnx/parse_matmul.cpp | 40 +++++++++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/src/onnx/parse_matmul.cpp b/src/onnx/parse_matmul.cpp index af6ea509cef..01fca0a9ce3 100644 --- a/src/onnx/parse_matmul.cpp +++ b/src/onnx/parse_matmul.cpp @@ -199,6 +199,32 @@ struct parse_matmul : op_parser } } + static instruction_ref handle_scaled_output(const onnx_parser::node_info& info, + const instruction_ref& ba0, + const instruction_ref& ba1, + const instruction_ref& scale_a0, + const instruction_ref& scale_a1, + const instruction_ref& scaled_bias, + const bool has_scale_bias) + { + auto bias_a0 = ba0; + auto bias_a1 = ba1; + // Convert if we're half types as dot will scream if we try to multipy half int8 + bias_a0 = info.add_instruction( + make_op("convert", {{"target_type", scale_a0->get_shape().type()}}), bias_a0); + bias_a1 = info.add_instruction( + make_op("convert", {{"target_type", scale_a1->get_shape().type()}}), bias_a1); + auto dq_a0 = info.add_instruction(make_op("dot"), ba0, scale_a0); + auto dq_a1 = info.add_instruction(make_op("dot"), ba1, scale_a1); + auto res = info.add_instruction(make_op("dot"), dq_a0, dq_a1); + + // Handle case of the bias after scaling + if(has_scale_bias) + res = info.add_common_op("sub", res, scaled_bias); + + return res; + } + instruction_ref parse(const op_desc& opd, const onnx_parser& /*parser*/, const onnx_parser::node_info& info, @@ -339,18 +365,8 @@ struct parse_matmul : op_parser if(has_scales) { broadcast_dimensions(info, s0_lens, s1_lens, a0, a1, scale_a0, scale_a1); - // Convert if we're half types as dot will scream if we try to multipy half int8 - ba0 = info.add_instruction( - make_op("convert", {{"target_type", scale_a0->get_shape().type()}}), ba0); - ba1 = info.add_instruction( - make_op("convert", {{"target_type", scale_a1->get_shape().type()}}), ba1); - auto dq_a0 = info.add_instruction(make_op("mul"), ba0, scale_a0); - auto dq_a1 = info.add_instruction(make_op("mul"), ba1, scale_a1); - dot_res = info.add_instruction(make_op("dot"), dq_a0, dq_a1); - - // Handle case of the bias after scaling - if(has_scale_bias) - dot_res = info.add_common_op("sub", dot_res, scaled_bias); + dot_res = handle_scaled_output( + info, ba0, ba1, scale_a0, scale_a1, scaled_bias, has_scale_bias); } else { From 3ca3e6afbdb865cc0041d696836fd9a79fb5d6bb Mon Sep 17 00:00:00 2001 From: Ted Themistokleous Date: Fri, 11 Oct 2024 23:24:42 +0000 Subject: [PATCH 06/10] Update fixes for parser tests Use dequantizelinear which elminates the need to add in shifts due to int8/uint8 mismatches still needs parser tests --- src/onnx/parse_matmul.cpp | 46 +++++++++++++------ test/onnx/gen_onnx.py | 24 +++++----- test/onnx/matmulintegertofloat_half_test.onnx | 24 +++++----- test/onnx/matmulintegertofloat_test.onnx | 18 ++++---- .../parse/matmulintegertofloat_half_test.cpp | 16 +++---- test/onnx/parse/matmulintegertofloat_test.cpp | 16 +++---- 6 files changed, 76 insertions(+), 68 deletions(-) diff --git a/src/onnx/parse_matmul.cpp b/src/onnx/parse_matmul.cpp index 01fca0a9ce3..68b0a2e9e4e 100644 --- a/src/onnx/parse_matmul.cpp +++ b/src/onnx/parse_matmul.cpp @@ -199,23 +199,33 @@ struct parse_matmul : op_parser } } + static instruction_ref handle_dequantized(const onnx_parser::node_info& info, + const instruction_ref& a0, + const instruction_ref& scale_a0, + const instruction_ref& zp_a0) + { + instruction_ref dequantized_op; + + if(a0 == zp_a0) + dequantized_op = info.add_instruction(make_op("dequantizelinear"), a0, scale_a0); + else + dequantized_op = info.add_instruction(make_op("dequantizelinear"), a0, scale_a0, zp_a0); + + return dequantized_op; + } + static instruction_ref handle_scaled_output(const onnx_parser::node_info& info, - const instruction_ref& ba0, - const instruction_ref& ba1, + const instruction_ref& a0, + const instruction_ref& a1, const instruction_ref& scale_a0, const instruction_ref& scale_a1, + const instruction_ref& zp_a0, + const instruction_ref& zp_a1, const instruction_ref& scaled_bias, const bool has_scale_bias) { - auto bias_a0 = ba0; - auto bias_a1 = ba1; - // Convert if we're half types as dot will scream if we try to multipy half int8 - bias_a0 = info.add_instruction( - make_op("convert", {{"target_type", scale_a0->get_shape().type()}}), bias_a0); - bias_a1 = info.add_instruction( - make_op("convert", {{"target_type", scale_a1->get_shape().type()}}), bias_a1); - auto dq_a0 = info.add_instruction(make_op("dot"), ba0, scale_a0); - auto dq_a1 = info.add_instruction(make_op("dot"), ba1, scale_a1); + auto dq_a0 = handle_dequantized(info, a0, scale_a0, zp_a0); + auto dq_a1 = handle_dequantized(info, a1, scale_a1, zp_a1); auto res = info.add_instruction(make_op("dot"), dq_a0, dq_a1); // Handle case of the bias after scaling @@ -249,7 +259,7 @@ struct parse_matmul : op_parser a1 = info.add_instruction(make_op("unsqueeze", {{"axes", {1}}}), args[1]); } - auto is_quant_dot = opd.op_name == "quant_dot" or opd.op_name == "quant_dot_scaled"; + auto is_quant_dot = opd.op_name == "quant_dot"; auto has_scales = opd.op_name == "quant_dot_scaled"; if(s0.dynamic() or s1.dynamic()) { @@ -257,6 +267,13 @@ struct parse_matmul : op_parser { MIGRAPHX_THROW("PARSE_MATMUL: dynamic MatMulInteger not supported"); } + + if(has_scales) + { + MIGRAPHX_THROW( + "PARSE_MATMULINTEGERTOFLOAT: dynamic MatMulIntegerToFloat not supported"); + } + auto s0_dds = a0->get_shape().to_dynamic().dyn_dims(); auto s1_dds = a1->get_shape().to_dynamic().dyn_dims(); @@ -321,7 +338,7 @@ struct parse_matmul : op_parser const auto a0_type = a0->get_shape().type(); const auto a1_type = a1->get_shape().type(); - if(is_quant_dot and + if((is_quant_dot or has_scales) and (not contains(supported_types, a0_type) or not contains(supported_types, a1_type))) { MIGRAPHX_THROW("PARSE_MATMULINTEGER: Unsupported type"); @@ -364,9 +381,8 @@ struct parse_matmul : op_parser // equivalent. Ensure these are broadcasted accordingly before we perform a dot if(has_scales) { - broadcast_dimensions(info, s0_lens, s1_lens, a0, a1, scale_a0, scale_a1); dot_res = handle_scaled_output( - info, ba0, ba1, scale_a0, scale_a1, scaled_bias, has_scale_bias); + info, a0, a1, scale_a0, scale_a1, ba0, ba1, scaled_bias, has_scale_bias); } else { diff --git a/test/onnx/gen_onnx.py b/test/onnx/gen_onnx.py index c058e988a5b..bc743ff2a6b 100644 --- a/test/onnx/gen_onnx.py +++ b/test/onnx/gen_onnx.py @@ -7477,8 +7477,8 @@ def matmulinteger_int8_uint8_dual_zero_zp_test(): def matmulintegertofloat_test(): m1 = helper.make_tensor_value_info('1', TensorProto.INT8, [4, 3]) m2 = helper.make_tensor_value_info('2', TensorProto.INT8, [3, 2]) - s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT, [4, 3]) - s2 = helper.make_tensor_value_info('4', TensorProto.FLOAT, [3, 2]) + s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT, [4]) + s2 = helper.make_tensor_value_info('4', TensorProto.FLOAT, [2]) y = helper.make_tensor_value_info('y', TensorProto.INT32, [4, 2]) node = onnx.helper.make_node( @@ -7494,8 +7494,8 @@ def matmulintegertofloat_test(): def matmulintegertofloat_zp_test(): m1 = helper.make_tensor_value_info('1', TensorProto.INT8, [4, 3]) m2 = helper.make_tensor_value_info('2', TensorProto.INT8, [3, 2]) - s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT, [4, 3]) - s2 = helper.make_tensor_value_info('4', TensorProto.FLOAT, [3, 2]) + s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT, [4]) + s2 = helper.make_tensor_value_info('4', TensorProto.FLOAT, [2]) zp1 = helper.make_tensor('5', TensorProto.INT8, [], [0]) zp2 = helper.make_tensor('6', TensorProto.UINT8, [], [128]) y = helper.make_tensor_value_info('y', TensorProto.INT32, [4, 2]) @@ -7513,8 +7513,8 @@ def matmulintegertofloat_zp_test(): def matmulintegertofloat_zp_bias_test(): m1 = helper.make_tensor_value_info('1', TensorProto.INT8, [4, 3]) m2 = helper.make_tensor_value_info('2', TensorProto.INT8, [3, 2]) - s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT, [4, 3]) - s2 = helper.make_tensor_value_info('4', TensorProto.FLOAT, [3, 2]) + s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT, [4]) + s2 = helper.make_tensor_value_info('4', TensorProto.FLOAT, [2]) zp1 = helper.make_tensor('5', TensorProto.INT8, [], [0]) zp2 = helper.make_tensor('6', TensorProto.UINT8, [], [128]) b1 = helper.make_tensor('7', TensorProto.UINT8, [2], [128, -128]) @@ -7650,8 +7650,8 @@ def matmulintegertofloat_bad_bias_test3(): def matmulintegertofloat_half_test(): m1 = helper.make_tensor_value_info('1', TensorProto.INT8, [4, 3]) m2 = helper.make_tensor_value_info('2', TensorProto.UINT8, [3, 2]) - s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT16, [4, 3]) - s2 = helper.make_tensor_value_info('4', TensorProto.FLOAT16, [3, 2]) + s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT16, [4]) + s2 = helper.make_tensor_value_info('4', TensorProto.FLOAT16, [2]) y = helper.make_tensor_value_info('y', TensorProto.INT32, [4, 2]) node = onnx.helper.make_node( @@ -7667,8 +7667,8 @@ def matmulintegertofloat_half_test(): def matmulintegertofloat_half_zp_test(): m1 = helper.make_tensor_value_info('1', TensorProto.INT8, [4, 3]) m2 = helper.make_tensor_value_info('2', TensorProto.UINT8, [3, 2]) - s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT16, [4, 3]) - s2 = helper.make_tensor_value_info('4', TensorProto.FLOAT16, [3, 2]) + s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT16, [4]) + s2 = helper.make_tensor_value_info('4', TensorProto.FLOAT16, [2]) zp1 = helper.make_tensor('5', TensorProto.INT8, [], [0]) zp2 = helper.make_tensor('6', TensorProto.UINT8, [], [128]) y = helper.make_tensor_value_info('y', TensorProto.INT32, [4, 2]) @@ -7686,8 +7686,8 @@ def matmulintegertofloat_half_zp_test(): def matmulintegertofloat_half_zp_bias_test(): m1 = helper.make_tensor_value_info('1', TensorProto.INT8, [4, 3]) m2 = helper.make_tensor_value_info('2', TensorProto.UINT8, [3, 2]) - s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT16, [4, 3]) - s2 = helper.make_tensor_value_info('4', TensorProto.FLOAT16, [3, 2]) + s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT16, [4]) + s2 = helper.make_tensor_value_info('4', TensorProto.FLOAT16, [2]) zp1 = helper.make_tensor('5', TensorProto.INT8, [], [0]) zp2 = helper.make_tensor('6', TensorProto.UINT8, [], [128]) b1 = helper.make_tensor('7', TensorProto.FLOAT16, [2], [128, -128]) diff --git a/test/onnx/matmulintegertofloat_half_test.onnx b/test/onnx/matmulintegertofloat_half_test.onnx index bd7f74cb641..d692ab3193e 100644 --- a/test/onnx/matmulintegertofloat_half_test.onnx +++ b/test/onnx/matmulintegertofloat_half_test.onnx @@ -1,4 +1,4 @@ - matmulintegertofloat_half_test: + matmulintegertofloat_half_test: % 1 2 @@ -9,18 +9,18 @@  Z 2 -  - -Z -3 -  - - -Z -4 -  - +   +Z +3 + + + +Z +4 + + + b y  diff --git a/test/onnx/matmulintegertofloat_test.onnx b/test/onnx/matmulintegertofloat_test.onnx index 1eb839d3baa..cd63e7f5ac7 100644 --- a/test/onnx/matmulintegertofloat_test.onnx +++ b/test/onnx/matmulintegertofloat_test.onnx @@ -1,4 +1,4 @@ - matmulintegertofloat_test: + matmulintegertofloat_test: % 1 2 @@ -11,14 +11,14 @@ 2   -Z -3 -  - -Z -4 -  - +Z +3 + + +Z +4 + + b y  diff --git a/test/onnx/parse/matmulintegertofloat_half_test.cpp b/test/onnx/parse/matmulintegertofloat_half_test.cpp index a280aec93d5..44f7ab7db14 100644 --- a/test/onnx/parse/matmulintegertofloat_half_test.cpp +++ b/test/onnx/parse/matmulintegertofloat_half_test.cpp @@ -28,17 +28,13 @@ TEST_CASE(matmulintegertofloat_half_test) { migraphx::program p; auto* mm = p.get_main_module(); - auto l0 = mm->add_parameter("1", migraphx::shape{migraphx::shape::int8_type, {3, 6, 16}}); - auto l1 = mm->add_parameter("2", migraphx::shape{migraphx::shape::int8_type, {3, 16, 8}}); - auto s0 = mm->add_parameter("1", migraphx::shape{migraphx::shape::half_type, {3, 6, 16}}); - auto s1 = mm->add_parameter("2", migraphx::shape{migraphx::shape::half_type, {3, 16, 8}}); - s0 = mm->add_instruction( - migraphx::make_op("convert", {{"target_type", migraphx::shape::float_type}}), s0); - s1 = mm->add_instruction( - migraphx::make_op("convert", {{"target_type", migraphx::shape::float_type}}), s1); + auto x0 = mm->add_parameter("1", migraphx::shape{migraphx::shape::int8_type, {4, 3}}); + auto x1 = mm->add_parameter("2", migraphx::shape{migraphx::shape::uint8_type, {3, 2}}); + auto scale_x0 = mm->add_parameter("3", migraphx::shape{migraphx::shape::half_type, {4}}); + auto scale_x1 = mm->add_parameter("4", migraphx::shape{migraphx::shape::half_type, {2}}); - auto r0 = mm->add_instruction(migraphx::make_op("dot"), l0, s0); - auto r1 = mm->add_instruction(migraphx::make_op("dot"), l1, s1); + auto r0 = mm->add_instruction(migraphx::make_op("dequantizelinear"), x0, scale_x0); + auto r1 = mm->add_instruction(migraphx::make_op("dequantizelinear"), x1, scale_x1); mm->add_instruction(migraphx::make_op("dot"), r0, r1); auto prog = optimize_onnx("matmulintegertofloat_half_test.onnx"); diff --git a/test/onnx/parse/matmulintegertofloat_test.cpp b/test/onnx/parse/matmulintegertofloat_test.cpp index 6c77656006c..0bdd2e68934 100644 --- a/test/onnx/parse/matmulintegertofloat_test.cpp +++ b/test/onnx/parse/matmulintegertofloat_test.cpp @@ -28,17 +28,13 @@ TEST_CASE(matmulintegertofloat_test) { migraphx::program p; auto* mm = p.get_main_module(); - auto l0 = mm->add_parameter("1", migraphx::shape{migraphx::shape::int8_type, {3, 6, 16}}); - auto l1 = mm->add_parameter("2", migraphx::shape{migraphx::shape::int8_type, {3, 16, 8}}); - auto s0 = mm->add_parameter("1", migraphx::shape{migraphx::shape::half_type, {3, 6, 16}}); - auto s1 = mm->add_parameter("2", migraphx::shape{migraphx::shape::half_type, {3, 16, 8}}); - s0 = mm->add_instruction( - migraphx::make_op("convert", {{"target_type", migraphx::shape::float_type}}), s0); - s1 = mm->add_instruction( - migraphx::make_op("convert", {{"target_type", migraphx::shape::float_type}}), s1); + auto x0 = mm->add_parameter("1", migraphx::shape{migraphx::shape::int8_type, {4, 3}}); + auto x1 = mm->add_parameter("2", migraphx::shape{migraphx::shape::int8_type, {3, 2}}); + auto scale_x0 = mm->add_parameter("3", migraphx::shape{migraphx::shape::float_type, {4}}); + auto scale_x1 = mm->add_parameter("4", migraphx::shape{migraphx::shape::float_type, {2}}); - auto r0 = mm->add_instruction(migraphx::make_op("dot"), l0, s0); - auto r1 = mm->add_instruction(migraphx::make_op("dot"), l1, s1); + auto r0 = mm->add_instruction(migraphx::make_op("dequantizelinear"), x0, scale_x0); + auto r1 = mm->add_instruction(migraphx::make_op("dequantizelinear"), x1, scale_x1); mm->add_instruction(migraphx::make_op("dot"), r0, r1); auto prog = optimize_onnx("matmulintegertofloat_test.onnx"); From 547826d88db0c2c7ec13e922209a5e22838a30d3 Mon Sep 17 00:00:00 2001 From: Ted Themistokleous Date: Wed, 16 Oct 2024 02:46:35 +0000 Subject: [PATCH 07/10] Update parser to use dequantizelinear --- src/onnx/parse_matmul.cpp | 58 ++++++++++++++++--- .../parse/matmulintegertofloat_half_test.cpp | 16 ++++- test/onnx/parse/matmulintegertofloat_test.cpp | 16 ++++- 3 files changed, 79 insertions(+), 11 deletions(-) diff --git a/src/onnx/parse_matmul.cpp b/src/onnx/parse_matmul.cpp index 68b0a2e9e4e..4600b694c37 100644 --- a/src/onnx/parse_matmul.cpp +++ b/src/onnx/parse_matmul.cpp @@ -199,18 +199,47 @@ struct parse_matmul : op_parser } } + static void handle_scaled_transposes(const onnx_parser::node_info& info, + instruction_ref& scale_a0, + instruction_ref& zp_a0, + bool has_zp) + { + if(has_zp) + { + scale_a0 = + info.add_instruction(make_op("transpose", {{"permutation", {0, 1}}}), scale_a0); + } + else + { + scale_a0 = + info.add_instruction(make_op("transpose", {{"permutation", {0, 1}}}), scale_a0); + zp_a0 = info.add_instruction(make_op("transpose", {{"permutation", {0, 1}}}), zp_a0); + } + } + static instruction_ref handle_dequantized(const onnx_parser::node_info& info, const instruction_ref& a0, const instruction_ref& scale_a0, - const instruction_ref& zp_a0) + const instruction_ref& zp_a0, + bool has_zp) { instruction_ref dequantized_op; - if(a0 == zp_a0) - dequantized_op = info.add_instruction(make_op("dequantizelinear"), a0, scale_a0); + if(has_zp) + { + auto bc_scale_a0 = info.add_instruction( + make_op("multibroadcast", {{"out_lens", a0->get_shape().lens()}}), scale_a0); + dequantized_op = info.add_instruction(make_op("dequantizelinear"), a0, bc_scale_a0); + } else - dequantized_op = info.add_instruction(make_op("dequantizelinear"), a0, scale_a0, zp_a0); - + { + auto bc_scale_a0 = info.add_instruction( + make_op("multibroadcast", {{"out_lens", a0->get_shape().lens()}}), scale_a0); + auto bc_zp_a0 = info.add_instruction( + make_op("multibroadcast", {{"out_lens", a0->get_shape().lens()}}), zp_a0); + dequantized_op = + info.add_instruction(make_op("dequantizelinear"), a0, bc_scale_a0, bc_zp_a0); + } return dequantized_op; } @@ -224,8 +253,23 @@ struct parse_matmul : op_parser const instruction_ref& scaled_bias, const bool has_scale_bias) { - auto dq_a0 = handle_dequantized(info, a0, scale_a0, zp_a0); - auto dq_a1 = handle_dequantized(info, a1, scale_a1, zp_a1); + + instruction_ref unsq_zp_a0; + instruction_ref unsq_zp_a1; + + bool a0_has_no_zp = (a0 == zp_a0); + bool a1_has_no_zp = (a1 == zp_a1); + + auto unsq_scale_a0 = info.add_instruction(make_op("unsqueeze", {{"axes", {-1}}}), scale_a0); + auto dq_a0 = handle_dequantized(info, a0, unsq_scale_a0, unsq_zp_a0, a0_has_no_zp); + + // Transpose second input to get column dims before we broadcast to dequantizelinear + auto unsq_scale_a1 = info.add_instruction(make_op("unsqueeze", {{"axes", {0}}}), scale_a1); + instruction_ref scale_a1_tp = unsq_scale_a1; + instruction_ref zp_a1_tp = unsq_zp_a1; + handle_scaled_transposes(info, scale_a1_tp, zp_a1_tp, a1_has_no_zp); + + auto dq_a1 = handle_dequantized(info, a1, scale_a1_tp, zp_a1_tp, a1_has_no_zp); auto res = info.add_instruction(make_op("dot"), dq_a0, dq_a1); // Handle case of the bias after scaling diff --git a/test/onnx/parse/matmulintegertofloat_half_test.cpp b/test/onnx/parse/matmulintegertofloat_half_test.cpp index 44f7ab7db14..654fda27101 100644 --- a/test/onnx/parse/matmulintegertofloat_half_test.cpp +++ b/test/onnx/parse/matmulintegertofloat_half_test.cpp @@ -33,8 +33,20 @@ TEST_CASE(matmulintegertofloat_half_test) auto scale_x0 = mm->add_parameter("3", migraphx::shape{migraphx::shape::half_type, {4}}); auto scale_x1 = mm->add_parameter("4", migraphx::shape{migraphx::shape::half_type, {2}}); - auto r0 = mm->add_instruction(migraphx::make_op("dequantizelinear"), x0, scale_x0); - auto r1 = mm->add_instruction(migraphx::make_op("dequantizelinear"), x1, scale_x1); + auto sq_scale_x0 = + mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {-1}}}), scale_x0); + auto bc_scale_x0 = mm->add_instruction( + migraphx::make_op("multibroadcast", {{"out_lens", x0->get_shape().lens()}}), sq_scale_x0); + auto r0 = mm->add_instruction(migraphx::make_op("dequantizelinear"), x0, bc_scale_x0); + + auto sq_scale_x1 = + mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {0}}}), scale_x1); + auto t_sq_scale_x1 = + mm->add_instruction(migraphx::make_op("transpose", {{"permutation", {0, 1}}}), sq_scale_x1); + auto bc_scale_x1 = mm->add_instruction( + migraphx::make_op("multibroadcast", {{"out_lens", x1->get_shape().lens()}}), t_sq_scale_x1); + + auto r1 = mm->add_instruction(migraphx::make_op("dequantizelinear"), x1, bc_scale_x1); mm->add_instruction(migraphx::make_op("dot"), r0, r1); auto prog = optimize_onnx("matmulintegertofloat_half_test.onnx"); diff --git a/test/onnx/parse/matmulintegertofloat_test.cpp b/test/onnx/parse/matmulintegertofloat_test.cpp index 0bdd2e68934..7570d2080f5 100644 --- a/test/onnx/parse/matmulintegertofloat_test.cpp +++ b/test/onnx/parse/matmulintegertofloat_test.cpp @@ -33,8 +33,20 @@ TEST_CASE(matmulintegertofloat_test) auto scale_x0 = mm->add_parameter("3", migraphx::shape{migraphx::shape::float_type, {4}}); auto scale_x1 = mm->add_parameter("4", migraphx::shape{migraphx::shape::float_type, {2}}); - auto r0 = mm->add_instruction(migraphx::make_op("dequantizelinear"), x0, scale_x0); - auto r1 = mm->add_instruction(migraphx::make_op("dequantizelinear"), x1, scale_x1); + auto sq_scale_x0 = + mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {-1}}}), scale_x0); + auto bc_scale_x0 = mm->add_instruction( + migraphx::make_op("multibroadcast", {{"out_lens", x0->get_shape().lens()}}), sq_scale_x0); + auto r0 = mm->add_instruction(migraphx::make_op("dequantizelinear"), x0, bc_scale_x0); + + auto sq_scale_x1 = + mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {0}}}), scale_x1); + auto t_sq_scale_x1 = + mm->add_instruction(migraphx::make_op("transpose", {{"permutation", {0, 1}}}), sq_scale_x1); + auto bc_scale_x1 = mm->add_instruction( + migraphx::make_op("multibroadcast", {{"out_lens", x1->get_shape().lens()}}), t_sq_scale_x1); + + auto r1 = mm->add_instruction(migraphx::make_op("dequantizelinear"), x1, bc_scale_x1); mm->add_instruction(migraphx::make_op("dot"), r0, r1); auto prog = optimize_onnx("matmulintegertofloat_test.onnx"); From c6d8679c68706d3e168b6173729e2087592bb4ec Mon Sep 17 00:00:00 2001 From: Ted Themistokleous Date: Thu, 17 Oct 2024 20:58:02 +0000 Subject: [PATCH 08/10] Add test case for zero point in matmulintegertofloat Needed to update initial tests and gen onnx --- src/onnx/parse_matmul.cpp | 20 +++-- test/onnx/gen_onnx.py | 25 ++++++- .../matmulintegertofloat_half_zp_test.onnx | Bin 250 -> 255 bytes test/onnx/matmulintegertofloat_zp_test.onnx | Bin 240 -> 245 bytes .../matmulintegertofloat_half_zp_test.cpp | 69 ++++++++++++++++++ .../parse/matmulintegertofloat_zp_test.cpp | 69 ++++++++++++++++++ 6 files changed, 174 insertions(+), 9 deletions(-) create mode 100644 test/onnx/parse/matmulintegertofloat_half_zp_test.cpp create mode 100644 test/onnx/parse/matmulintegertofloat_zp_test.cpp diff --git a/src/onnx/parse_matmul.cpp b/src/onnx/parse_matmul.cpp index 4600b694c37..24d316aa48c 100644 --- a/src/onnx/parse_matmul.cpp +++ b/src/onnx/parse_matmul.cpp @@ -202,9 +202,9 @@ struct parse_matmul : op_parser static void handle_scaled_transposes(const onnx_parser::node_info& info, instruction_ref& scale_a0, instruction_ref& zp_a0, - bool has_zp) + bool no_zp) { - if(has_zp) + if(no_zp) { scale_a0 = info.add_instruction(make_op("transpose", {{"permutation", {0, 1}}}), scale_a0); @@ -213,7 +213,7 @@ struct parse_matmul : op_parser { scale_a0 = info.add_instruction(make_op("transpose", {{"permutation", {0, 1}}}), scale_a0); - zp_a0 = info.add_instruction(make_op("transpose", {{"permutation", {0, 1}}}), zp_a0); + zp_a0 = info.add_instruction(make_op("transpose", {{"permutation", {1, 0}}}), zp_a0); } } @@ -221,11 +221,11 @@ struct parse_matmul : op_parser const instruction_ref& a0, const instruction_ref& scale_a0, const instruction_ref& zp_a0, - bool has_zp) + bool no_zp) { instruction_ref dequantized_op; - if(has_zp) + if(no_zp) { auto bc_scale_a0 = info.add_instruction( make_op("multibroadcast", {{"out_lens", a0->get_shape().lens()}}), scale_a0); @@ -235,8 +235,10 @@ struct parse_matmul : op_parser { auto bc_scale_a0 = info.add_instruction( make_op("multibroadcast", {{"out_lens", a0->get_shape().lens()}}), scale_a0); + auto bc_zp_a0 = info.add_instruction( make_op("multibroadcast", {{"out_lens", a0->get_shape().lens()}}), zp_a0); + dequantized_op = info.add_instruction(make_op("dequantizelinear"), a0, bc_scale_a0, bc_zp_a0); } @@ -261,7 +263,13 @@ struct parse_matmul : op_parser bool a1_has_no_zp = (a1 == zp_a1); auto unsq_scale_a0 = info.add_instruction(make_op("unsqueeze", {{"axes", {-1}}}), scale_a0); - auto dq_a0 = handle_dequantized(info, a0, unsq_scale_a0, unsq_zp_a0, a0_has_no_zp); + if(not a0_has_no_zp) + unsq_zp_a0 = info.add_instruction(make_op("unsqueeze", {{"axes", {-1}}}), zp_a0); + + if(not a1_has_no_zp) + unsq_zp_a1 = info.add_instruction(make_op("unsqueeze", {{"axes", {-1}}}), zp_a1); + + auto dq_a0 = handle_dequantized(info, a0, unsq_scale_a0, unsq_zp_a0, a0_has_no_zp); // Transpose second input to get column dims before we broadcast to dequantizelinear auto unsq_scale_a1 = info.add_instruction(make_op("unsqueeze", {{"axes", {0}}}), scale_a1); diff --git a/test/onnx/gen_onnx.py b/test/onnx/gen_onnx.py index bc743ff2a6b..57b02347579 100644 --- a/test/onnx/gen_onnx.py +++ b/test/onnx/gen_onnx.py @@ -7496,8 +7496,8 @@ def matmulintegertofloat_zp_test(): m2 = helper.make_tensor_value_info('2', TensorProto.INT8, [3, 2]) s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT, [4]) s2 = helper.make_tensor_value_info('4', TensorProto.FLOAT, [2]) - zp1 = helper.make_tensor('5', TensorProto.INT8, [], [0]) - zp2 = helper.make_tensor('6', TensorProto.UINT8, [], [128]) + zp1 = helper.make_tensor_value_info('5', TensorProto.INT8, [4]) + zp2 = helper.make_tensor_value_info('6', TensorProto.INT8, [2]) y = helper.make_tensor_value_info('y', TensorProto.INT32, [4, 2]) node = onnx.helper.make_node( @@ -7506,7 +7506,7 @@ def matmulintegertofloat_zp_test(): outputs=['y'], ) - return ([node], [m1, m2, s1, s2], [y], [zp1, zp2]) + return ([node], [m1, m2, s1, s2, zp1, zp2], [y], []) @onnx_test() @@ -7665,6 +7665,25 @@ def matmulintegertofloat_half_test(): @onnx_test() def matmulintegertofloat_half_zp_test(): + m1 = helper.make_tensor_value_info('1', TensorProto.INT8, [4, 3]) + m2 = helper.make_tensor_value_info('2', TensorProto.UINT8, [3, 2]) + s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT16, [4]) + s2 = helper.make_tensor_value_info('4', TensorProto.FLOAT16, [2]) + zp1 = helper.make_tensor_value_info('5', TensorProto.INT8, [4]) + zp2 = helper.make_tensor_value_info('6', TensorProto.UINT8, [2]) + y = helper.make_tensor_value_info('y', TensorProto.INT32, [4, 2]) + + node = onnx.helper.make_node( + 'MatMulIntegerToFloat', + inputs=['1', '2', '3', '4', '5', '6'], + outputs=['y'], + ) + + return ([node], [m1, m2, s1, s2, zp1, zp2], [y], []) + + +@onnx_test() +def matmulintegertofloat_half_scalar_zp_test(): m1 = helper.make_tensor_value_info('1', TensorProto.INT8, [4, 3]) m2 = helper.make_tensor_value_info('2', TensorProto.UINT8, [3, 2]) s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT16, [4]) diff --git a/test/onnx/matmulintegertofloat_half_zp_test.onnx b/test/onnx/matmulintegertofloat_half_zp_test.onnx index 10e47d781e30fdd37aea97f69707b96b6c748285..05a324c2aae2dec0e419e5fab731c81a46eae4a7 100644 GIT binary patch delta 93 zcmeyx_@8lt8sp`O>VXq`<-M7NIJlTNn1Prnil2+oScr>@gM&+m1t`D*5->p$0IM(s S3os*8n1KbDgjgotl?4FDfD9Y} delta 115 zcmey*_=|CZ8soW%>VcwK90JT*j0{eUrdpf=Oj=A0j82SZ6D#EXn1wjFm^he$m?=t_ mi_ut!kBf(c3oOV2k^l*sAPIt1CIJO2!GdgXl}t`d0>S|Mr41DT diff --git a/test/onnx/matmulintegertofloat_zp_test.onnx b/test/onnx/matmulintegertofloat_zp_test.onnx index 7d2b46c74ee1bd91b0d8a6023a59eece01163b31..5054fb793a13784800d73e8a48be7e5f4c4eb5bd 100644 GIT binary patch delta 107 zcmeys_?2;jBICJ$LvxHvc%g;= + +TEST_CASE(matmulintegertofloat_half_zp_test) +{ + migraphx::program p; + auto* mm = p.get_main_module(); + auto x0 = mm->add_parameter("1", migraphx::shape{migraphx::shape::int8_type, {4, 3}}); + auto x1 = mm->add_parameter("2", migraphx::shape{migraphx::shape::uint8_type, {3, 2}}); + auto scale_x0 = mm->add_parameter("3", migraphx::shape{migraphx::shape::half_type, {4}}); + auto scale_x1 = mm->add_parameter("4", migraphx::shape{migraphx::shape::half_type, {2}}); + auto zp_x0 = mm->add_parameter("5", migraphx::shape{migraphx::shape::int8_type, {4}}); + auto zp_x1 = mm->add_parameter("6", migraphx::shape{migraphx::shape::uint8_type, {2}}); + + auto sq_scale_x0 = + mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {-1}}}), scale_x0); + auto sq_zp_x0 = mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {-1}}}), zp_x0); + auto sq_zp_x1 = mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {-1}}}), zp_x1); + + auto bc_scale_x0 = mm->add_instruction( + migraphx::make_op("multibroadcast", {{"out_lens", x0->get_shape().lens()}}), sq_scale_x0); + auto bc_zp_x0 = mm->add_instruction( + migraphx::make_op("multibroadcast", {{"out_lens", x0->get_shape().lens()}}), sq_zp_x0); + + auto r0 = mm->add_instruction(migraphx::make_op("dequantizelinear"), x0, bc_scale_x0, bc_zp_x0); + + auto sq_scale_x1 = + mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {0}}}), scale_x1); + + auto t_sq_scale_x1 = + mm->add_instruction(migraphx::make_op("transpose", {{"permutation", {0, 1}}}), sq_scale_x1); + auto t_sq_zp_x1 = + mm->add_instruction(migraphx::make_op("transpose", {{"permutation", {1, 0}}}), sq_zp_x1); + auto bc_scale_x1 = mm->add_instruction( + migraphx::make_op("multibroadcast", {{"out_lens", x1->get_shape().lens()}}), t_sq_scale_x1); + + auto bc_zp_x1 = mm->add_instruction( + migraphx::make_op("multibroadcast", {{"out_lens", x1->get_shape().lens()}}), t_sq_zp_x1); + + auto r1 = mm->add_instruction(migraphx::make_op("dequantizelinear"), x1, bc_scale_x1, bc_zp_x1); + mm->add_instruction(migraphx::make_op("dot"), r0, r1); + + auto prog = optimize_onnx("matmulintegertofloat_half_zp_test.onnx"); + + EXPECT(p == prog); +} diff --git a/test/onnx/parse/matmulintegertofloat_zp_test.cpp b/test/onnx/parse/matmulintegertofloat_zp_test.cpp new file mode 100644 index 00000000000..26af4b00430 --- /dev/null +++ b/test/onnx/parse/matmulintegertofloat_zp_test.cpp @@ -0,0 +1,69 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include + +TEST_CASE(matmulintegertofloat_zp_test) +{ + migraphx::program p; + auto* mm = p.get_main_module(); + auto x0 = mm->add_parameter("1", migraphx::shape{migraphx::shape::int8_type, {4, 3}}); + auto x1 = mm->add_parameter("2", migraphx::shape{migraphx::shape::int8_type, {3, 2}}); + auto scale_x0 = mm->add_parameter("3", migraphx::shape{migraphx::shape::float_type, {4}}); + auto scale_x1 = mm->add_parameter("4", migraphx::shape{migraphx::shape::float_type, {2}}); + auto zp_x0 = mm->add_parameter("5", migraphx::shape{migraphx::shape::int8_type, {4}}); + auto zp_x1 = mm->add_parameter("6", migraphx::shape{migraphx::shape::int8_type, {2}}); + + auto sq_scale_x0 = + mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {-1}}}), scale_x0); + auto sq_zp_x0 = mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {-1}}}), zp_x0); + auto sq_zp_x1 = mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {-1}}}), zp_x1); + + auto bc_scale_x0 = mm->add_instruction( + migraphx::make_op("multibroadcast", {{"out_lens", x0->get_shape().lens()}}), sq_scale_x0); + auto bc_zp_x0 = mm->add_instruction( + migraphx::make_op("multibroadcast", {{"out_lens", x0->get_shape().lens()}}), sq_zp_x0); + + auto r0 = mm->add_instruction(migraphx::make_op("dequantizelinear"), x0, bc_scale_x0, bc_zp_x0); + + auto sq_scale_x1 = + mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {0}}}), scale_x1); + + auto t_sq_scale_x1 = + mm->add_instruction(migraphx::make_op("transpose", {{"permutation", {0, 1}}}), sq_scale_x1); + auto t_sq_zp_x1 = + mm->add_instruction(migraphx::make_op("transpose", {{"permutation", {1, 0}}}), sq_zp_x1); + auto bc_scale_x1 = mm->add_instruction( + migraphx::make_op("multibroadcast", {{"out_lens", x1->get_shape().lens()}}), t_sq_scale_x1); + + auto bc_zp_x1 = mm->add_instruction( + migraphx::make_op("multibroadcast", {{"out_lens", x1->get_shape().lens()}}), t_sq_zp_x1); + + auto r1 = mm->add_instruction(migraphx::make_op("dequantizelinear"), x1, bc_scale_x1, bc_zp_x1); + mm->add_instruction(migraphx::make_op("dot"), r0, r1); + + auto prog = optimize_onnx("matmulintegertofloat_zp_test.onnx"); + + EXPECT(p == prog); +} From 6d89fdd628f3bb5979ef2f2f44b6c8e56e9b688c Mon Sep 17 00:00:00 2001 From: Ted Themistokleous Date: Thu, 17 Oct 2024 22:32:34 +0000 Subject: [PATCH 09/10] Add parser tests for scalar inputs in matmulintegertofloat --- src/onnx/parse_matmul.cpp | 27 ++++++- test/onnx/gen_onnx.py | 38 +++++++++- ...atmulintegertofloat_scalar_scale_test.onnx | Bin 0 -> 221 bytes .../matmulintegertofloat_scalar_zp_test.onnx | 32 ++++++++ ...matmulintegertofloat_scalar_scale_test.cpp | 59 +++++++++++++++ .../matmulintegertofloat_scalar_zp_test.cpp | 71 ++++++++++++++++++ 6 files changed, 223 insertions(+), 4 deletions(-) create mode 100644 test/onnx/matmulintegertofloat_scalar_scale_test.onnx create mode 100644 test/onnx/matmulintegertofloat_scalar_zp_test.onnx create mode 100644 test/onnx/parse/matmulintegertofloat_scalar_scale_test.cpp create mode 100644 test/onnx/parse/matmulintegertofloat_scalar_zp_test.cpp diff --git a/src/onnx/parse_matmul.cpp b/src/onnx/parse_matmul.cpp index 24d316aa48c..f62d839c480 100644 --- a/src/onnx/parse_matmul.cpp +++ b/src/onnx/parse_matmul.cpp @@ -108,7 +108,9 @@ struct parse_matmul : op_parser return all_zeros; } - static instruction_ref set_scale_arg(const std::vector& args, const int index) + static instruction_ref set_scale_arg(const onnx_parser::node_info& info, + const std::vector& args, + const int index) { instruction_ref scale_arg = args[index]; std::set supported_dq_types = {migraphx::shape::float_type, @@ -119,6 +121,11 @@ struct parse_matmul : op_parser MIGRAPHX_THROW("PARSE_QUANT_DOT_SCALDED: Scales must be float or half_type"); } + if(scale_arg->get_shape().scalar()) + { + scale_arg = info.add_instruction(make_op("unsqueeze", {{"axes", {-1}}}), scale_arg); + } + return scale_arg; } @@ -264,10 +271,24 @@ struct parse_matmul : op_parser auto unsq_scale_a0 = info.add_instruction(make_op("unsqueeze", {{"axes", {-1}}}), scale_a0); if(not a0_has_no_zp) + { unsq_zp_a0 = info.add_instruction(make_op("unsqueeze", {{"axes", {-1}}}), zp_a0); + if(zp_a0->get_shape().scalar()) + { + unsq_zp_a0 = + info.add_instruction(make_op("unsqueeze", {{"axes", {-1}}}), unsq_zp_a0); + } + } if(not a1_has_no_zp) + { unsq_zp_a1 = info.add_instruction(make_op("unsqueeze", {{"axes", {-1}}}), zp_a1); + if(zp_a1->get_shape().scalar()) + { + unsq_zp_a1 = + info.add_instruction(make_op("unsqueeze", {{"axes", {-1}}}), unsq_zp_a1); + } + } auto dq_a0 = handle_dequantized(info, a0, unsq_scale_a0, unsq_zp_a0, a0_has_no_zp); @@ -366,8 +387,8 @@ struct parse_matmul : op_parser { a0_zp_index = 4; a1_zp_index = 5; - scale_a0 = set_scale_arg(args, 2); - scale_a1 = set_scale_arg(args, 3); + scale_a0 = set_scale_arg(info, args, 2); + scale_a1 = set_scale_arg(info, args, 3); if(scale_a0->get_shape().type() != scale_a1->get_shape().type()) { MIGRAPHX_THROW("PARSE_MATMULINTEGERTOFLOAT: Scales must be the same type"); diff --git a/test/onnx/gen_onnx.py b/test/onnx/gen_onnx.py index 57b02347579..42cd02e05b0 100644 --- a/test/onnx/gen_onnx.py +++ b/test/onnx/gen_onnx.py @@ -7509,6 +7509,42 @@ def matmulintegertofloat_zp_test(): return ([node], [m1, m2, s1, s2, zp1, zp2], [y], []) +@onnx_test() +def matmulintegertofloat_scalar_zp_test(): + m1 = helper.make_tensor_value_info('1', TensorProto.INT8, [4, 3]) + m2 = helper.make_tensor_value_info('2', TensorProto.INT8, [3, 2]) + s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT, [4]) + s2 = helper.make_tensor_value_info('4', TensorProto.FLOAT, [2]) + zp1 = helper.make_tensor_value_info('5', TensorProto.INT8, [4]) + zp2 = helper.make_tensor('6', TensorProto.INT8, [], [129]) + y = helper.make_tensor_value_info('y', TensorProto.INT32, [4, 2]) + + node = onnx.helper.make_node( + 'MatMulIntegerToFloat', + inputs=['1', '2', '3', '4', '5', '6'], + outputs=['y'], + ) + + return ([node], [m1, m2, s1, s2, zp1], [y], [zp2]) + + +@onnx_test() +def matmulintegertofloat_scalar_scale_test(): + m1 = helper.make_tensor_value_info('1', TensorProto.INT8, [4, 3]) + m2 = helper.make_tensor_value_info('2', TensorProto.INT8, [3, 2]) + s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT, [4]) + s2 = helper.make_tensor('4', TensorProto.FLOAT, [], [10]) + y = helper.make_tensor_value_info('y', TensorProto.INT32, [4, 2]) + + node = onnx.helper.make_node( + 'MatMulIntegerToFloat', + inputs=['1', '2', '3', '4'], + outputs=['y'], + ) + + return ([node], [m1, m2, s1], [y], [s2]) + + @onnx_test() def matmulintegertofloat_zp_bias_test(): m1 = helper.make_tensor_value_info('1', TensorProto.INT8, [4, 3]) @@ -7526,7 +7562,7 @@ def matmulintegertofloat_zp_bias_test(): outputs=['y'], ) - return ([node], [m1, m2, s1, s2], [y], [zp1, zp2, b1]) + return ([node], [m1, m2, s1, s2, zp1, zp2, b1], [y], []) @onnx_test() diff --git a/test/onnx/matmulintegertofloat_scalar_scale_test.onnx b/test/onnx/matmulintegertofloat_scalar_scale_test.onnx new file mode 100644 index 0000000000000000000000000000000000000000..0c7401ea7429c9db956e559bdc22c1795d18dfce GIT binary patch literal 221 zcmdNBO;0T<$xqA4Pb`TqPEO28ECN%h@g=FnC01(~xm39r4Y?SN zxEPJO7)^v2E0sii6H9zcb37q>L-O4~dWkhri(7zEiG_hd!O@A)BuW@)wh$i|4+pal w2Nx3u3lKAd1dWgcnSp{#QT#yrg}AslI2eUMs#ubMsw%;%*x*{3oR|cJ0h0eQYybcN literal 0 HcmV?d00001 diff --git a/test/onnx/matmulintegertofloat_scalar_zp_test.onnx b/test/onnx/matmulintegertofloat_scalar_zp_test.onnx new file mode 100644 index 00000000000..4ae2e16da23 --- /dev/null +++ b/test/onnx/matmulintegertofloat_scalar_zp_test.onnx @@ -0,0 +1,32 @@ + #matmulintegertofloat_scalar_zp_test: ++ +1 +2 +3 +4 +5 +6y"MatMulIntegerToFloat#matmulintegertofloat_scalar_zp_test* *B6Z +1 +  + +Z +2 +  + +Z +3 + + +Z +4 + + +Z +5 + + +b +y +  + +B \ No newline at end of file diff --git a/test/onnx/parse/matmulintegertofloat_scalar_scale_test.cpp b/test/onnx/parse/matmulintegertofloat_scalar_scale_test.cpp new file mode 100644 index 00000000000..b4c7be6ff11 --- /dev/null +++ b/test/onnx/parse/matmulintegertofloat_scalar_scale_test.cpp @@ -0,0 +1,59 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include + +TEST_CASE(matmulintegertofloat_scalar_scale_test) +{ + migraphx::program p; + auto* mm = p.get_main_module(); + auto scale_x1 = mm->add_literal( + migraphx::literal(migraphx::shape{migraphx::shape::float_type, {1}, {0}}, {10})); + auto x0 = mm->add_parameter("1", migraphx::shape{migraphx::shape::int8_type, {4, 3}}); + auto x1 = mm->add_parameter("2", migraphx::shape{migraphx::shape::int8_type, {3, 2}}); + auto scale_x0 = mm->add_parameter("3", migraphx::shape{migraphx::shape::float_type, {4}}); + + auto sq_scale_x1 = + mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {-1}}}), scale_x1); + + auto sq_scale_x0 = + mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {-1}}}), scale_x0); + auto bc_scale_x0 = mm->add_instruction( + migraphx::make_op("multibroadcast", {{"out_lens", x0->get_shape().lens()}}), sq_scale_x0); + auto r0 = mm->add_instruction(migraphx::make_op("dequantizelinear"), x0, bc_scale_x0); + + sq_scale_x1 = mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {0}}}), sq_scale_x1); + + auto t_sq_scale_x1 = + mm->add_instruction(migraphx::make_op("transpose", {{"permutation", {0, 1}}}), sq_scale_x1); + auto bc_scale_x1 = mm->add_instruction( + migraphx::make_op("multibroadcast", {{"out_lens", x1->get_shape().lens()}}), t_sq_scale_x1); + + auto r1 = mm->add_instruction(migraphx::make_op("dequantizelinear"), x1, bc_scale_x1); + mm->add_instruction(migraphx::make_op("dot"), r0, r1); + + auto prog = optimize_onnx("matmulintegertofloat_scalar_scale_test.onnx"); + + EXPECT(p == prog); +} diff --git a/test/onnx/parse/matmulintegertofloat_scalar_zp_test.cpp b/test/onnx/parse/matmulintegertofloat_scalar_zp_test.cpp new file mode 100644 index 00000000000..5ce4240950c --- /dev/null +++ b/test/onnx/parse/matmulintegertofloat_scalar_zp_test.cpp @@ -0,0 +1,71 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include + +TEST_CASE(matmulintegertofloat_scalar_zp_test) +{ + migraphx::program p; + auto* mm = p.get_main_module(); + auto zp_x1 = mm->add_literal( + migraphx::literal(migraphx::shape{migraphx::shape::int8_type, {1}, {0}}, {129})); + auto x0 = mm->add_parameter("1", migraphx::shape{migraphx::shape::int8_type, {4, 3}}); + auto x1 = mm->add_parameter("2", migraphx::shape{migraphx::shape::int8_type, {3, 2}}); + auto scale_x0 = mm->add_parameter("3", migraphx::shape{migraphx::shape::float_type, {4}}); + auto scale_x1 = mm->add_parameter("4", migraphx::shape{migraphx::shape::float_type, {2}}); + auto zp_x0 = mm->add_parameter("5", migraphx::shape{migraphx::shape::int8_type, {4}}); + + auto sq_scale_x0 = + mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {-1}}}), scale_x0); + auto sq_zp_x0 = mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {-1}}}), zp_x0); + auto sq_zp_x1 = mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {-1}}}), zp_x1); + sq_zp_x1 = mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {-1}}}), sq_zp_x1); + + auto bc_scale_x0 = mm->add_instruction( + migraphx::make_op("multibroadcast", {{"out_lens", x0->get_shape().lens()}}), sq_scale_x0); + auto bc_zp_x0 = mm->add_instruction( + migraphx::make_op("multibroadcast", {{"out_lens", x0->get_shape().lens()}}), sq_zp_x0); + + auto r0 = mm->add_instruction(migraphx::make_op("dequantizelinear"), x0, bc_scale_x0, bc_zp_x0); + + auto sq_scale_x1 = + mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {0}}}), scale_x1); + + auto t_sq_scale_x1 = + mm->add_instruction(migraphx::make_op("transpose", {{"permutation", {0, 1}}}), sq_scale_x1); + auto t_sq_zp_x1 = + mm->add_instruction(migraphx::make_op("transpose", {{"permutation", {1, 0}}}), sq_zp_x1); + auto bc_scale_x1 = mm->add_instruction( + migraphx::make_op("multibroadcast", {{"out_lens", x1->get_shape().lens()}}), t_sq_scale_x1); + + auto bc_zp_x1 = mm->add_instruction( + migraphx::make_op("multibroadcast", {{"out_lens", x1->get_shape().lens()}}), t_sq_zp_x1); + + auto r1 = mm->add_instruction(migraphx::make_op("dequantizelinear"), x1, bc_scale_x1, bc_zp_x1); + mm->add_instruction(migraphx::make_op("dot"), r0, r1); + + auto prog = optimize_onnx("matmulintegertofloat_scalar_zp_test.onnx"); + + EXPECT(p == prog); +} From c0c8120f47dc739573d7d01d9339aaeb6943e661 Mon Sep 17 00:00:00 2001 From: Ted Themistokleous Date: Thu, 17 Oct 2024 23:13:09 +0000 Subject: [PATCH 10/10] Add parser for bias with zero points --- test/onnx/gen_onnx.py | 6 +- .../matmulintegertofloat_zp_bias_test.onnx | Bin 276 -> 275 bytes .../matmulintegertofloat_zp_bias_test.cpp | 75 ++++++++++++++++++ 3 files changed, 78 insertions(+), 3 deletions(-) create mode 100644 test/onnx/parse/matmulintegertofloat_zp_bias_test.cpp diff --git a/test/onnx/gen_onnx.py b/test/onnx/gen_onnx.py index 42cd02e05b0..e5692e4e855 100644 --- a/test/onnx/gen_onnx.py +++ b/test/onnx/gen_onnx.py @@ -7551,9 +7551,9 @@ def matmulintegertofloat_zp_bias_test(): m2 = helper.make_tensor_value_info('2', TensorProto.INT8, [3, 2]) s1 = helper.make_tensor_value_info('3', TensorProto.FLOAT, [4]) s2 = helper.make_tensor_value_info('4', TensorProto.FLOAT, [2]) - zp1 = helper.make_tensor('5', TensorProto.INT8, [], [0]) - zp2 = helper.make_tensor('6', TensorProto.UINT8, [], [128]) - b1 = helper.make_tensor('7', TensorProto.UINT8, [2], [128, -128]) + zp1 = helper.make_tensor_value_info('5', TensorProto.INT8, [4]) + zp2 = helper.make_tensor_value_info('6', TensorProto.INT8, [2]) + b1 = helper.make_tensor_value_info('7', TensorProto.FLOAT, [2]) y = helper.make_tensor_value_info('y', TensorProto.INT32, [4, 2]) node = onnx.helper.make_node( diff --git a/test/onnx/matmulintegertofloat_zp_bias_test.onnx b/test/onnx/matmulintegertofloat_zp_bias_test.onnx index 07af336b78b5d1bb9faeacef568beb2a08ab6956..87d5aeab53789fff3e58b7c1072c3ad8dded9cf8 100644 GIT binary patch delta 98 zcmbQjG?{6F8sqbc>LC-CYX|UiF&YbTadB`k3bAl8aj--I1x%0xm_P!iU;$=?3Ns`D MunKdSmWkhG0n5M)0ssI2 delta 126 zcmbQtG=*t`8sm$J>LEH>90JT*j0{eUrdpf=Oj=A0j82SZTB00GKn_m>W5a(a01BH= ytk!lC=3+D!;^X4sU=-ruV&Y%{V&*8Ipb3&7Gf5C#C${1wXp diff --git a/test/onnx/parse/matmulintegertofloat_zp_bias_test.cpp b/test/onnx/parse/matmulintegertofloat_zp_bias_test.cpp new file mode 100644 index 00000000000..b60ad31a6f5 --- /dev/null +++ b/test/onnx/parse/matmulintegertofloat_zp_bias_test.cpp @@ -0,0 +1,75 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include + +TEST_CASE(matmulintegertofloat_zp_bias_test) +{ + migraphx::program p; + auto* mm = p.get_main_module(); + auto x0 = mm->add_parameter("1", migraphx::shape{migraphx::shape::int8_type, {4, 3}}); + auto x1 = mm->add_parameter("2", migraphx::shape{migraphx::shape::int8_type, {3, 2}}); + auto scale_x0 = mm->add_parameter("3", migraphx::shape{migraphx::shape::float_type, {4}}); + auto scale_x1 = mm->add_parameter("4", migraphx::shape{migraphx::shape::float_type, {2}}); + auto zp_x0 = mm->add_parameter("5", migraphx::shape{migraphx::shape::int8_type, {4}}); + auto zp_x1 = mm->add_parameter("6", migraphx::shape{migraphx::shape::int8_type, {2}}); + auto bias = mm->add_parameter("7", migraphx::shape{migraphx::shape::float_type, {2}}); + + auto sq_scale_x0 = + mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {-1}}}), scale_x0); + auto sq_zp_x0 = mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {-1}}}), zp_x0); + auto sq_zp_x1 = mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {-1}}}), zp_x1); + + auto bc_scale_x0 = mm->add_instruction( + migraphx::make_op("multibroadcast", {{"out_lens", x0->get_shape().lens()}}), sq_scale_x0); + auto bc_zp_x0 = mm->add_instruction( + migraphx::make_op("multibroadcast", {{"out_lens", x0->get_shape().lens()}}), sq_zp_x0); + + auto r0 = mm->add_instruction(migraphx::make_op("dequantizelinear"), x0, bc_scale_x0, bc_zp_x0); + + auto sq_scale_x1 = + mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {0}}}), scale_x1); + + auto t_sq_scale_x1 = + mm->add_instruction(migraphx::make_op("transpose", {{"permutation", {0, 1}}}), sq_scale_x1); + auto t_sq_zp_x1 = + mm->add_instruction(migraphx::make_op("transpose", {{"permutation", {1, 0}}}), sq_zp_x1); + auto bc_scale_x1 = mm->add_instruction( + migraphx::make_op("multibroadcast", {{"out_lens", x1->get_shape().lens()}}), t_sq_scale_x1); + + auto bc_zp_x1 = mm->add_instruction( + migraphx::make_op("multibroadcast", {{"out_lens", x1->get_shape().lens()}}), t_sq_zp_x1); + + auto r1 = mm->add_instruction(migraphx::make_op("dequantizelinear"), x1, bc_scale_x1, bc_zp_x1); + auto dot = mm->add_instruction(migraphx::make_op("dot"), r0, r1); + + auto mb_bias = + mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", {4, 2}}}), bias); + + mm->add_instruction(migraphx::make_op("sub"), dot, mb_bias); + + auto prog = optimize_onnx("matmulintegertofloat_zp_bias_test.onnx"); + + EXPECT(p == prog); +}