Skip to content

Commit

Permalink
[hardware] 🐛 Parametrize and fix conversion in VMFPU
Browse files Browse the repository at this point in the history
  • Loading branch information
mp-17 committed Aug 22, 2024
1 parent 1fb435e commit 2ddce5e
Show file tree
Hide file tree
Showing 3 changed files with 146 additions and 84 deletions.
60 changes: 60 additions & 0 deletions hardware/include/ara_pkg.sv
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,66 @@ package ara_pkg;
logic [51:0] m;
} fp64_t;

function automatic int unsigned fp_mantissa_bits(rvv_pkg::vew_e fp_dtype, logic is_alt);
unique case ({fp_dtype, is_alt})
{rvv_pkg::EW8, 1'b0}: fp_mantissa_bits = 2;
{rvv_pkg::EW8, 1'b1}: fp_mantissa_bits = 3;
{rvv_pkg::EW16, 1'b0}: fp_mantissa_bits = 10;
{rvv_pkg::EW16, 1'b1}: fp_mantissa_bits = 7;
{rvv_pkg::EW32, 1'b0}: fp_mantissa_bits = 23;
{rvv_pkg::EW64, 1'b0}: fp_mantissa_bits = 52;
default: fp_mantissa_bits = -1;
endcase
endfunction

function automatic fp32_t fp32_from_fp16(fp16_t fp16, logic [$clog2(fp_mantissa_bits(rvv_pkg::EW16, 0)):0] fp16_m_lzc);
automatic fp16_t fp16_temp;
automatic fp32_t fp32;

// Wide sign
fp32.s = fp16.s;

// Wide exponent
// 127 - 15 = 112
unique case(fp16.e)
'0: fp32.e = (fp16.m == '0) ? '0 : 8'd112 - {4'd0, fp16_m_lzc}; // Zero or Subnormal
'1: fp32.e = '1; // NaN
default: fp32.e = 8'd112 + {3'd0, fp16.e}; // Normal
endcase

// Wide mantissa
// If the input is NaN, output a quiet NaN mantissa.
// Otherwise, append trailing zeros to the mantissa.
fp16_temp.m = ((fp16.e == '0) && (fp16.m != '0)) ? (fp16.m << 1) << fp16_m_lzc : fp16.m;
fp32.m = ((fp16.e == '1) && (fp16.m != '0) ) ? {1'b1, 22'b0} : {fp16_temp.m, 13'b0};

fp32_from_fp16 = fp32;
endfunction

function automatic fp64_t fp64_from_fp32(fp32_t fp32, logic [$clog2(fp_mantissa_bits(rvv_pkg::EW32, 0)):0] fp32_m_lzc);
automatic fp32_t fp32_temp;
automatic fp64_t fp64;

// Wide sign
fp64.s = fp32.s;

// Wide exponent
// 1023 - 127 = 896
unique case(fp32.e)
'0: fp64.e = (fp32.m == '0) ? '0 : 11'd896 - {6'd0, fp32_m_lzc}; // Zero or Subnormal
'1: fp64.e = '1; // NaN
default: fp64.e = 11'd896 + {3'd0, fp32.e}; // Normal
endcase

// Wide mantissa
// If the input is NaN, output a quiet NaN mantissa.
// Otherwise, append trailing zeros to the mantissa.
fp32_temp.m = ((fp32.e == '0) && (fp32.m != '0)) ? (fp32.m << 1) << fp32_m_lzc : fp32.m;
fp64.m = ((fp32.e == '1) && (fp32.m != '0)) ? {1'b1, 51'b0} : {fp32_temp.m, 29'b0};

fp64_from_fp32 = fp64;
endfunction

/////////////////////////////
// Accelerator interface //
/////////////////////////////
Expand Down
102 changes: 33 additions & 69 deletions hardware/src/lane/operand_queue.sv
Original file line number Diff line number Diff line change
Expand Up @@ -155,42 +155,46 @@ module operand_queue import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::i
// Helper to fill with neutral values the last packet
logic incomplete_packet, last_packet;

////////////////////////////////
// Floating-point conversion //
////////////////////////////////

logic [3:0] lzc_count16[2];
logic [4:0] lzc_count32;
logic [$clog2(fp_mantissa_bits(EW16, 0))-1:0] fp16_m_lzc[2]; // 4 bits each
logic [$clog2(fp_mantissa_bits(EW32, 0))-1:0] fp32_m_lzc; // 5 bits each

fp16_t fp16[2];
fp32_t fp32;

if (FPUSupport != FPUSupportNone) begin
// To convert subnormal numbers to normalized form in floating-point numbers,
// it is necessary to determine the number of leading zeros in the mantissa.
// This is typically accomplished using a lzc (leading zero count) module,
// which can accurately count the number of leading zeros in a given number.
// By knowing the number of leading zeros in the mantissa, we can properly
// adjust the exponent and shift the binary point to achieve a normalized
// representation of the number.

// To convert subnormal numbers to normalized form in floating-point numbers,
// it is necessary to determine the number of leading zeros in the mantissa.
// This is typically accomplished using a lzc (leading zero count) module,
// which can accurately count the number of leading zeros in a given number.
// By knowing the number of leading zeros in the mantissa, we can properly
// adjust the exponent and shift the binary point to achieve a normalized
// representation of the number.
if ({RVVH(FPUSupport), RVVF(FPUSupport)} == 2'b11) begin
// sew: 16-bit
for (genvar i = 0; i < 2; i = i + 1) begin
for (genvar i = 0; i < 2; i++) begin
lzc #(
.WIDTH(10),
.MODE (1 )
.WIDTH(fp_mantissa_bits(EW16, 0)),
.MODE (1)
) leading_zero_e16_i (
.in_i ( fp16[i].m ),
.cnt_o ( lzc_count16[i] ),
.empty_o ( /*Unused*/ )
.in_i (fp16[i].m ),
.cnt_o (fp16_m_lzc[i]),
.empty_o(/*Unused*/ )
);
end
end

if ({RVVF(FPUSupport), RVVD(FPUSupport)} == 2'b11) begin
// sew: 32-bit
lzc #(
.WIDTH (23),
.MODE (1 )
) leading_zero_e32(
.in_i ( fp32.m ),
.cnt_o ( lzc_count32 ),
.empty_o ( /*Unused*/ )
.WIDTH(fp_mantissa_bits(EW32, 0)),
.MODE (1)
) leading_zero_e32 (
.in_i (fp32.m ),
.cnt_o (fp32_m_lzc),
.empty_o(/*Unused*/)
);
end

Expand All @@ -204,6 +208,9 @@ module operand_queue import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::i
incomplete_packet = 1'b0;
last_packet = 1'b0;

for (int i = 0; i < 2; i++) fp16[i] = '0;
for (int i = 0; i < 1; i++) fp32[i] = '0;

// Reductions need to mask away the inactive elements
// A temporary solution is to send a neutral value directly
// from the opqueues
Expand Down Expand Up @@ -376,56 +383,13 @@ module operand_queue import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::i
unique casez ({cmd.eew, RVVH(FPUSupport), RVVF(FPUSupport), RVVD(FPUSupport)})
{EW16, 1'b1, 1'b1, 1'b?}: begin
for (int e = 0; e < 2; e++) begin
automatic fp32_t fp32_o;
automatic fp16_t fp16_temp;
automatic logic [7:0] fp32_exp;

fp16[e] = ibuf_operand[8*select + 32*e +: 16];

fp16_temp.m = ((fp16[e].e == '0) && (fp16[e].m != '0)) ? fp16[e].m << (5'd1 + {1'd0, lzc_count16[e]}) : fp16[e].m;

fp32_exp = (fp16[e].m == '0) ? '0 : 8'd112 - {4'd0, lzc_count16[e]}; //127 - 15 = 112

unique case(fp16[e].e)
'0: fp32_o.e = fp32_exp; // Zero or Subnormal
'1: fp32_o.e = '1; // NaN
default: fp32_o.e = 8'd112 + {3'd0, fp16[e].e}; // Normal ,127 - 15 = 112
endcase

fp32_o.s = fp16[e].s;

// If the input is NaN, output a quiet NaN mantissa.
// Otherwise, append trailing zeros to the mantissa.
fp32_o.m = ((fp16[e].e == '1) && (fp16[e].m != '0) ) ? {1'b1, 22'b0} : {fp16_temp.m, 13'b0};

conv_operand[32*e +: 32] = fp32_o;
conv_operand[32*e +: 32] = fp32_from_fp16(fp16[e], fp16_m_lzc[e]);
end
end
{EW32, 1'b?, 1'b1, 1'b1}: begin
automatic fp64_t fp64;
automatic fp32_t fp32_temp;

automatic logic [10:0] fp64_exp;

fp32 = ibuf_operand[8*select +: 32];

fp32_temp.m = ((fp32.e == '0) && (fp32.m != '0)) ? fp32.m << (8'd1 + {3'd0, lzc_count32}) : fp32.m;

fp64_exp = (fp32.m == '0) ? '0 : 11'd896 - {6'd0, lzc_count32}; //1023 - 127 = 896

unique case(fp32.e)
'0: fp64.e = fp64_exp; // Zero or Subnormal
'1: fp64.e = '1; // NaN
default: fp64.e = 11'd896 + {3'd0, fp32.e}; // Normal , 1023 - 127 = 896
endcase

fp64.s = fp32.s;

// If the input is NaN, output a quiet NaN mantissa.
// Otherwise, append trailing zeros to the mantissa.
fp64.m = ((fp32.e == '1) && (fp32.m != '0)) ? {1'b1, 51'b0} : {fp32_temp.m, 29'b0};

conv_operand = fp64;
fp32 = ibuf_operand[8*select +: 32];
conv_operand = fp64_from_fp32(fp32, fp32_m_lzc);
end
default:;
endcase
Expand Down
68 changes: 53 additions & 15 deletions hardware/src/lane/vmfpu.sv
Original file line number Diff line number Diff line change
Expand Up @@ -724,6 +724,50 @@ module vmfpu import ara_pkg::*; import rvv_pkg::*; import fpnew_pkg::*;
15: reduction_rx_cnt_init = reduction_rx_cnt_t'(4);
endcase
endfunction: reduction_rx_cnt_init

////////////////////////////////
// Floating-point conversion //
////////////////////////////////

logic [$clog2(fp_mantissa_bits(EW16, 0))-1:0] fp16_m_lzc[2]; // 4 bits each
logic [$clog2(fp_mantissa_bits(EW32, 0))-1:0] fp32_m_lzc; // 5 bits each

fp16_t fp16[2];
fp32_t fp32;

// To convert subnormal numbers to normalized form in floating-point numbers,
// it is necessary to determine the number of leading zeros in the mantissa.
// This is typically accomplished using a lzc (leading zero count) module,
// which can accurately count the number of leading zeros in a given number.
// By knowing the number of leading zeros in the mantissa, we can properly
// adjust the exponent and shift the binary point to achieve a normalized
// representation of the number.
if ({RVVH(FPUSupport), RVVF(FPUSupport)} == 2'b11) begin
// sew: 16-bit
for (genvar i = 0; i < 2; i++) begin
lzc #(
.WIDTH(fp_mantissa_bits(EW16, 0)),
.MODE (1)
) leading_zero_e16_i (
.in_i (fp16[i].m ),
.cnt_o (fp16_m_lzc[i]),
.empty_o(/*Unused*/ )
);
end
end

if ({RVVF(FPUSupport), RVVD(FPUSupport)} == 2'b11) begin
// sew: 32-bit
lzc #(
.WIDTH(fp_mantissa_bits(EW32, 0)),
.MODE (1)
) leading_zero_e32 (
.in_i (fp32.m ),
.cnt_o (fp32_m_lzc),
.empty_o(/*Unused*/)
);
end

///////////
// FPU //
///////////
Expand Down Expand Up @@ -1342,6 +1386,9 @@ module vmfpu import ara_pkg::*; import rvv_pkg::*; import fpnew_pkg::*;
? {vinsn_issue_q.use_vs2, vinsn_issue_q.use_vd_op, vinsn_issue_q.use_vs1}
: {vinsn_issue_q.use_vd_op, vinsn_issue_q.use_vs2, vinsn_issue_q.use_vs1};

for (int i = 0; i < 2; i++) fp16[i] = '0;
for (int i = 0; i < 1; i++) fp32[i] = '0;

first_op_d = first_op_q;
simd_red_cnt_d = simd_red_cnt_q;
reduction_rx_cnt_d = reduction_rx_cnt_q;
Expand Down Expand Up @@ -2165,24 +2212,15 @@ module vmfpu import ara_pkg::*; import rvv_pkg::*; import fpnew_pkg::*;
RVVD(FPUSupport)})
{EW32, 1'b1, 1'b1, 1'b?}: begin
for (int e = 0; e < 2; e++) begin
automatic fp16_t fp16 =
vinsn_queue_d.vinsn[vinsn_queue_q.accept_pnt].scalar_op[15:0];
automatic fp32_t fp32;
fp32.s = fp16.s;
fp32.e = (fp16.e - 15) + 127;
fp32.m = {fp16.m, 13'b0};

vinsn_queue_d.vinsn[vinsn_queue_q.accept_pnt].scalar_op[32*e +: 32] = fp32;
fp16[e] = vinsn_queue_d.vinsn[vinsn_queue_q.accept_pnt].scalar_op[15:0];
vinsn_queue_d.vinsn[vinsn_queue_q.accept_pnt].scalar_op[32*e +: 32] =
fp32_from_fp16(fp16[e], fp16_m_lzc[e]);
end
end
{EW64, 1'b?, 1'b1, 1'b1}: begin
automatic fp32_t fp32 = vinsn_queue_d.vinsn[vinsn_queue_q.accept_pnt].scalar_op[31:0];
automatic fp64_t fp64;
fp64.s = fp32.s;
fp64.e = (fp32.e - 127) + 1023;
fp64.m = {fp32.m, 29'b0};

vinsn_queue_d.vinsn[vinsn_queue_q.accept_pnt].scalar_op = fp64;
fp32 = vinsn_queue_d.vinsn[vinsn_queue_q.accept_pnt].scalar_op[31:0];
vinsn_queue_d.vinsn[vinsn_queue_q.accept_pnt].scalar_op =
fp64_from_fp32(fp32, fp32_m_lzc);
end
default:;
endcase
Expand Down

0 comments on commit 2ddce5e

Please sign in to comment.