diff --git a/src/finn/custom_op/fpgadataflow/addstreams.py b/src/finn/custom_op/fpgadataflow/addstreams.py index ac61786ac1..8aaf3522e5 100644 --- a/src/finn/custom_op/fpgadataflow/addstreams.py +++ b/src/finn/custom_op/fpgadataflow/addstreams.py @@ -26,6 +26,7 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import math import numpy as np import warnings from qonnx.core.datatype import DataType @@ -46,7 +47,7 @@ def get_nodeattr_types(self): "NumChannels": ("i", True, ""), "PE": ("i", True, ""), # FINN DataTypes for inputs; output datatype inferred from input - "inputDataType": ("s", True, ""), + "inputDataTypes": ("strings", True, [""]), # number of input vectors, examples: # [1] is a single vector (like a FC layer with batch=1) # [4] is four vectors (like a FC layer with batch=4) @@ -87,16 +88,19 @@ def make_shape_compatible_op(self, model): return super().make_const_shape_op(oshape) def infer_node_datatype(self, model): - node = self.onnx_node - idt = model.get_tensor_datatype(node.input[0]) - if idt != self.get_input_datatype(): - warn_str = "inputDataType changing for %s: %s -> %s " % ( - node.name, - str(self.get_input_datatype()), - str(idt), - ) - warnings.warn(warn_str) - self.set_nodeattr("inputDataType", idt.name) + # check all input datatypes + for i, inp in enumerate(self.onnx_node.input): + idt = model.get_tensor_datatype(inp) + if idt != self.get_input_datatype(i): + warn_str = "inputDataType changing for %s: %s -> %s " % ( + self.onnx_node.name, + str(self.get_input_datatype(i)), + str(idt), + ) + warnings.warn(warn_str) + old_datatypes_attr = self.get_nodeattr("inputDataTypes") + old_datatypes_attr[i] = idt.name + self.set_nodeattr("inputDataTypes", old_datatypes_attr) # enforce output data type (calculated based on idt) odt = self.get_output_datatype() model.set_tensor_datatype(self.onnx_node.output[0], odt) @@ -106,22 +110,30 @@ def verify_node(self): def get_input_datatype(self, ind=0): """Returns FINN DataType of input.""" - return DataType[self.get_nodeattr("inputDataType")] + return DataType[self.get_nodeattr("inputDataTypes")[ind]] def get_output_datatype(self, ind=0): """Returns FINN DataType of output.""" - # we need to set output datatype to the next larger int or uint - # enhancement: consider specifying w/ explicit outputDataType attribute - # to allow overflow and use the same idt if user wants - idt = DataType[self.get_nodeattr("inputDataType")] - if idt.signed(): - return DataType.get_smallest_possible(2 * idt.min()) + min_input = 0 + max_input = 0 + for i in range(len(self.get_nodeattr("inputDataTypes"))): + idt = self.get_input_datatype(i) + if idt.min() < min_input: + min_input = idt.min() + if idt.max() > max_input: + max_input = idt.max() + if min_input >= 0: + out_bit_width = math.ceil(np.log2(max_input + 1)) + odt = DataType[f"UINT{out_bit_width + 1}"] else: - return DataType.get_smallest_possible(2 * idt.max()) + max_abs_input = max(-min_input, 1 + max_input) + out_bit_width = math.ceil(np.log2(max_abs_input) + 1) + odt = DataType[f"INT{out_bit_width + 1}"] + return odt def get_instream_width(self, ind=0): """Returns input stream width.""" - ibits = self.get_input_datatype().bitwidth() + ibits = self.get_input_datatype(ind).bitwidth() pe = self.get_nodeattr("PE") in_width = pe * ibits return in_width @@ -155,8 +167,10 @@ def execute_node(self, context, graph): def get_verilog_top_module_intf_names(self): intf_names = super().get_verilog_top_module_intf_names() sname = self.hls_sname() - swidth = self.get_instream_width_padded() - intf_names["s_axis"] = [(x + "_" + sname, swidth) for x in ["in0", "in1"]] + intf_names["s_axis"] = [] + for i in range(len(self.get_nodeattr("inputDataTypes"))): + swidth = self.get_instream_width_padded(i) + intf_names["s_axis"] += [("in{}_{}".format(i, sname), swidth)] return intf_names def derive_characteristic_fxns(self, period): diff --git a/src/finn/custom_op/fpgadataflow/hls/addstreams_hls.py b/src/finn/custom_op/fpgadataflow/hls/addstreams_hls.py index a3f0e043f8..db6d0eca51 100644 --- a/src/finn/custom_op/fpgadataflow/hls/addstreams_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/addstreams_hls.py @@ -61,7 +61,7 @@ def verify_node(self): self.get_nodeattr("executable_path") self.get_nodeattr("NumChannels") self.get_nodeattr("PE") - self.get_nodeattr("inputDataType") + self.get_nodeattr("inputDataTypes") info_messages.append("All necessary attributes exist") except Exception: info_messages.append("""The required LabelSelect_Batch attributes do not exist.""") @@ -90,7 +90,7 @@ def execute_node(self, context, graph): inp = context[node.input[0]] assert str(inp.dtype) == "float32", "Input datatype is not float32" assert inp.shape == exp_ishape, """Input0 shape doesn't match expected shape .""" - export_idt = self.get_input_datatype() + export_idt0 = self.get_input_datatype(0) # reshape input into folded form inp = inp.reshape(folded_ishape) # make copy before saving array @@ -101,7 +101,7 @@ def execute_node(self, context, graph): inp = context[node.input[1]] assert str(inp.dtype) == "float32", "Input datatype is not float32" assert inp.shape == exp_ishape, """Input1 shape doesn't match expected shape .""" - export_idt = self.get_input_datatype() + export_idt1 = self.get_input_datatype(1) # reshape input into folded form inp = inp.reshape(folded_ishape) # make copy before saving array @@ -118,12 +118,11 @@ def execute_node(self, context, graph): ), "cppsim did not produce expected output shape" elif mode == "rtlsim": sim = self.get_rtlsim() - nbits = self.get_instream_width() rtlsim_inp0 = npy_to_rtlsim_input( - "{}/input_0.npy".format(code_gen_dir), export_idt, nbits + "{}/input_0.npy".format(code_gen_dir), export_idt0, self.get_instream_width(0) ) rtlsim_inp1 = npy_to_rtlsim_input( - "{}/input_1.npy".format(code_gen_dir), export_idt, nbits + "{}/input_1.npy".format(code_gen_dir), export_idt1, self.get_instream_width(1) ) super().reset_rtlsim(sim) super().toggle_clk(sim) @@ -160,20 +159,15 @@ def defines(self, var): def read_npy_data(self): code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") - dtype = self.get_input_datatype() - elem_bits = dtype.bitwidth() - packed_bits = self.get_instream_width() - packed_hls_type = "ap_uint<%d>" % packed_bits - elem_hls_type = dtype.get_hls_datatype_str() npy_type = "float" self.code_gen_dict["$READNPYDATA$"] = [] npy_in = "%s/input_0.npy" % code_gen_dir self.code_gen_dict["$READNPYDATA$"].append( 'npy2apintstream<%s, %s, %d, %s>("%s", in0_%s);' % ( - packed_hls_type, - elem_hls_type, - elem_bits, + "ap_uint<%d>" % self.get_instream_width(0), + self.get_input_datatype(0).get_hls_datatype_str(), + self.get_input_datatype(0).bitwidth(), npy_type, npy_in, self.hls_sname(), @@ -183,9 +177,9 @@ def read_npy_data(self): self.code_gen_dict["$READNPYDATA$"].append( 'npy2apintstream<%s, %s, %d, %s>("%s", in1_%s);' % ( - packed_hls_type, - elem_hls_type, - elem_bits, + "ap_uint<%d>" % self.get_instream_width(1), + self.get_input_datatype(1).get_hls_datatype_str(), + self.get_input_datatype(1).bitwidth(), npy_type, npy_in, self.hls_sname(), @@ -196,12 +190,12 @@ def strm_decl(self): self.code_gen_dict["$STREAMDECLARATIONS$"] = [] self.code_gen_dict["$STREAMDECLARATIONS$"].append( 'hls::stream> in0_{} ("in0_{}");'.format( - self.get_instream_width(), self.hls_sname(), self.hls_sname() + self.get_instream_width(0), self.hls_sname(), self.hls_sname() ) ) self.code_gen_dict["$STREAMDECLARATIONS$"].append( 'hls::stream> in1_{} ("in1_{}");'.format( - self.get_instream_width(), self.hls_sname(), self.hls_sname() + self.get_instream_width(1), self.hls_sname(), self.hls_sname() ) ) self.code_gen_dict["$STREAMDECLARATIONS$"].append( @@ -216,8 +210,8 @@ def docompute(self): """{}<{}, {}, {}, {}, {}> (in0_{}, in1_{}, out_{}, 1);""".format( hls_call, self.get_nodeattr("PE"), - self.get_input_datatype().get_hls_datatype_str(), - self.get_input_datatype().get_hls_datatype_str(), + self.get_input_datatype(0).get_hls_datatype_str(), + self.get_input_datatype(1).get_hls_datatype_str(), self.get_output_datatype().get_hls_datatype_str(), self.get_number_output_values(), self.hls_sname(), @@ -231,9 +225,9 @@ def blackboxfunction(self): """void {}(hls::stream> &in0_{}, hls::stream> &in1_{}, hls::stream> &out_{})""".format( self.onnx_node.name, - self.get_nodeattr("PE") * self.get_input_datatype().bitwidth(), + self.get_nodeattr("PE") * self.get_input_datatype(0).bitwidth(), self.hls_sname(), - self.get_nodeattr("PE") * self.get_input_datatype().bitwidth(), + self.get_nodeattr("PE") * self.get_input_datatype(1).bitwidth(), self.hls_sname(), self.get_nodeattr("PE") * self.get_output_datatype().bitwidth(), self.hls_sname(), diff --git a/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py b/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py index 25a2032aeb..069dc48031 100644 --- a/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py +++ b/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py @@ -460,14 +460,8 @@ def apply(self, model): idt0 = model.get_tensor_datatype(in0) idt1 = model.get_tensor_datatype(in1) - # skip if different data types on inputs - if idt0 != idt1: - continue - - idt = idt0 - # skip conversion for layers with float input - if not idt.is_integer(): + if not idt0.is_integer() or not idt1.is_integer(): continue # check layout and convert if necessary @@ -507,7 +501,7 @@ def apply(self, model): backend="fpgadataflow", NumChannels=num_channels, PE=pe, - inputDataType=idt.name, + inputDataTypes=[idt0.name, idt1.name], numInputVectors=in0_shape[:-1], name="AddStreams_" + node.name, ) diff --git a/tests/fpgadataflow/test_fpgadataflow_addstreams.py b/tests/fpgadataflow/test_fpgadataflow_addstreams.py index 484cbbe04a..2e5debb3b6 100644 --- a/tests/fpgadataflow/test_fpgadataflow_addstreams.py +++ b/tests/fpgadataflow/test_fpgadataflow_addstreams.py @@ -37,6 +37,7 @@ from qonnx.util.basic import gen_finn_dt_tensor, qonnx_make_model import finn.core.onnx_exec as oxe +import finn.transformation.fpgadataflow.convert_to_hw_layers as to_hw from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP @@ -47,24 +48,18 @@ from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers -def make_addstreams_modelwrapper(ch, pe, idt): +def make_addstreams_modelwrapper(ch, idts): inp1 = helper.make_tensor_value_info("inp1", TensorProto.FLOAT, [1, ch]) inp2 = helper.make_tensor_value_info("inp2", TensorProto.FLOAT, [1, ch]) outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, [1, ch]) - addstreams_node = helper.make_node( - "AddStreams", + add_node = helper.make_node( + "Add", ["inp1", "inp2"], ["outp"], - domain="finn.custom_op.fpgadataflow", - backend="fpgadataflow", - NumChannels=ch, - PE=pe, - inputDataType=idt.name, - preferred_impl_style="hls", ) graph = helper.make_graph( - nodes=[addstreams_node], + nodes=[add_node], name="graph", inputs=[inp1, inp2], outputs=[outp], @@ -73,8 +68,8 @@ def make_addstreams_modelwrapper(ch, pe, idt): model = qonnx_make_model(graph, producer_name="addstreams-model") model = ModelWrapper(model) - model.set_tensor_datatype("inp1", idt) - model.set_tensor_datatype("inp2", idt) + model.set_tensor_datatype("inp1", idts[0]) + model.set_tensor_datatype("inp2", idts[1]) return model @@ -84,7 +79,9 @@ def prepare_inputs(input1, input2): # data types -@pytest.mark.parametrize("idt", [DataType["UINT4"], DataType["UINT8"]]) +@pytest.mark.parametrize( + "idts", [(DataType["UINT4"], DataType["UINT5"]), (DataType["UINT8"], DataType["INT7"])] +) # channels @pytest.mark.parametrize("ch", [1, 64]) # folding @@ -93,7 +90,7 @@ def prepare_inputs(input1, input2): @pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"]) @pytest.mark.fpgadataflow @pytest.mark.vivado -def test_fpgadataflow_addstreams(idt, ch, fold, exec_mode): +def test_fpgadataflow_addstreams(idts, ch, fold, exec_mode): if fold == -1: pe = 1 else: @@ -101,10 +98,10 @@ def test_fpgadataflow_addstreams(idt, ch, fold, exec_mode): assert ch % pe == 0 # generate input data - x1 = gen_finn_dt_tensor(idt, (1, ch)) - x2 = gen_finn_dt_tensor(idt, (1, ch)) + x1 = gen_finn_dt_tensor(idts[0], (1, ch)) + x2 = gen_finn_dt_tensor(idts[1], (1, ch)) - model = make_addstreams_modelwrapper(ch, pe, idt) + model = make_addstreams_modelwrapper(ch, idts) # prepare input data input_dict = prepare_inputs(x1, x2) @@ -116,6 +113,10 @@ def test_fpgadataflow_addstreams(idt, ch, fold, exec_mode): y_produced = oxe.execute_onnx(model, input_dict)["outp"] assert (y_produced == y_expected).all(), "Execution of hw layer failed" + model = model.transform(to_hw.InferAddStreamsLayer()) + addstreams_node = model.get_nodes_by_op_type("AddStreams")[0] + addstreams_node = getCustomOp(addstreams_node) + addstreams_node.set_nodeattr("PE", pe) model = model.transform(SpecializeLayers("xc7z020clg400-1")) if exec_mode == "cppsim":