diff options
Diffstat (limited to 'fpga/usrp3/lib/dsp/mult_add_clip.v')
-rw-r--r-- | fpga/usrp3/lib/dsp/mult_add_clip.v | 260 |
1 files changed, 260 insertions, 0 deletions
diff --git a/fpga/usrp3/lib/dsp/mult_add_clip.v b/fpga/usrp3/lib/dsp/mult_add_clip.v new file mode 100644 index 000000000..b1b49af8b --- /dev/null +++ b/fpga/usrp3/lib/dsp/mult_add_clip.v @@ -0,0 +1,260 @@ +// Copyright 2018 Ettus Research, a National Instruments Company +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// Write xilinx DSP48E1 primitive for mult-add-clip (signed) + +`default_nettype none + +module mult_add_clip #( + parameter WIDTH_A=25, // Max 25 + parameter BIN_PT_A=24, + parameter WIDTH_B=18, // Max 18 + parameter BIN_PT_B=17, + parameter WIDTH_C=43, // Max 43 + + // Min (47-WIDTH_C-1)+BIN_PT_A+BIN_PT_B, + // Max WIDTH_C-1+BIN_PT_A+BIN_PT_B + parameter BIN_PT_C=42, + + parameter WIDTH_O=43, // Max 43-(BIN_PT_A+BIN_PT_B-BIN_PT_O) + parameter BIN_PT_O=42, + parameter LATENCY=2 // Maximum is 4 +) ( + input wire clk, + input wire reset, + input wire CE, // Ordinarily set to 1'b1 + input wire [WIDTH_A-1:0] A, + input wire [WIDTH_B-1:0] B, + input wire [WIDTH_C-1:0] C, + output reg [WIDTH_O-1:0] O +); + // DSP operations: + // O = clip(A * B + C) + // + // Mux settings: + // X,Y (01,01) = M + // Z (011) = C + + localparam MREG_IN = (LATENCY >= 1) ? 1 : 0; + localparam CREG_IN = MREG_IN; + localparam PREG_IN = (LATENCY >= 2) ? 1 : 0; + localparam A2REG_IN = (LATENCY >= 3) ? 1 : 0; + localparam A1REG_IN = (LATENCY == 4) ? 1 : 0; + localparam AREG_IN = A1REG_IN + A2REG_IN; + // See OPMODE Control Bits Settings, Table 2-7,2-8,2-9 + localparam ZMUX_C = 3'b011; + localparam YMUX_M = 2'b01; + localparam XMUX_M = 2'b01; + localparam [6:0] OPMODE = {ZMUX_C, YMUX_M, XMUX_M}; + + // A_IN is 25 bits; B_IN is 18 bits. Product M's binary point shifts: + localparam BIN_PT_M = BIN_PT_A+(25-WIDTH_A) + BIN_PT_B+(18-WIDTH_B); + + // Calculate shift for C to align binary point to A*B product (M) + // Determine top and bottom indices of C (in C_IN), normalized to M + // Divide by 2**BIN_PT_C then multiply up by 2**BIN_PT_M + localparam C_TOP = WIDTH_C-1 - BIN_PT_C + BIN_PT_M; + localparam C_BOT = 0 - BIN_PT_C + BIN_PT_M; + // Determine number of sign-extended bits above C_TOP + localparam C_EXT = 47 - C_TOP; + + // P is a 43-bit fixed point number with bin pt BIN_PT_M + // O is extracted from those bits + // Sign extend if more bits to left of bin pt + localparam O_EXT = ((WIDTH_O-BIN_PT_O) > (43-BIN_PT_M)) ? + (WIDTH_O-BIN_PT_O) - (43-BIN_PT_M) : 0; + // If extending, use highest bit of P, else extract bits based on bin pt + localparam P_TOP = (O_EXT > 0) ? 42 : + (42 + (WIDTH_O-BIN_PT_O) - (43-BIN_PT_M)); + // Pad bottom of O if remaining P not enough bits + localparam O_PAD = (WIDTH_O > P_TOP+1) ? (WIDTH_O-P_TOP-1) : 0; + // If padding O, grab lowest bit of P, else determine based on O's width + localparam P_BOT = (O_PAD > 0) ? 0 : (P_TOP+1-WIDTH_O); + + //------------------------------------------------ + // Normalize C input to A*B product's binary point + //------------------------------------------------ + function automatic [47:0] align_c; + input [WIDTH_C-1:0] c; + begin + // Do sign extension + if (C_EXT > 0) begin + align_c[47 -: C_EXT] = {C_EXT{c[WIDTH_C-1]}}; + end + if (C_BOT < 0) begin + // Chop off lower bits of C + align_c[C_TOP:0] = c[WIDTH_C-1:(-C_BOT)]; + end else begin + // Place C and zero pad if necessary + align_c[C_TOP:C_BOT] = c; + if (C_BOT > 0) begin + align_c[C_BOT-1:0] = {C_BOT{1'b0}}; + end + end + end + endfunction + + wire [24:0] A_IN = (WIDTH_A < 25) ? { A, {(25-(WIDTH_A)){1'b0}}} : A; + wire [17:0] B_IN = (WIDTH_B < 18) ? { B, {(18-(WIDTH_B)){1'b0}}} : B; + wire [47:0] C_IN; + wire [47:0] P_OUT; + + //-------------------------------------------------- + // C needs more pipeline registers at higher latency + //-------------------------------------------------- + generate if (AREG_IN > 0) begin + reg [AREG_IN*WIDTH_C-1:0] c_r; + + if (AREG_IN > 1) begin + always @ (posedge clk) + begin + if (CE) begin + c_r <= {c_r[0 +: (AREG_IN-1)*WIDTH_C], C}; + end + end + end else begin + always @ (posedge clk) + begin + if (CE) begin + c_r <= C; + end + end + end + + wire [WIDTH_C-1:0] c_pre = c_r[AREG_IN*WIDTH_C-1 -: WIDTH_C]; + assign C_IN = align_c(c_pre); + end else begin + assign C_IN = align_c(C); + end endgenerate + + //---------------------------------------------- + // Track signs for overflow/underflow processing + //---------------------------------------------- + reg [LATENCY-1:0] mult_sign; + reg [LATENCY-1:0] c_sign; + wire bin_pt_overflow; + wire adder_overflow; + wire [WIDTH_O-1:0] p_extract; + + generate if (LATENCY > 1) begin + always @ (posedge clk) + begin + if (CE) begin + mult_sign <= {mult_sign[0 +: LATENCY-1], A[WIDTH_A-1] ^ B[WIDTH_B-1]}; + c_sign <= {c_sign[0 +: LATENCY-1], C[WIDTH_C-1]}; + end + end + end else begin + always @ (posedge clk) + begin + if (CE) begin + mult_sign <= A[WIDTH_A-1] ^ B[WIDTH_B-1]; + c_sign <= C[WIDTH_C-1]; + end + end + end endgenerate + + assign adder_overflow = (mult_sign[LATENCY-1] == c_sign[LATENCY-1]) && + (P_OUT[42] != c_sign[LATENCY-1]); + + //---------------------------------------------- + // Extract renormalized bits from P_OUT + //---------------------------------------------- + generate + if (P_TOP < 42) begin + assign bin_pt_overflow = (|P_OUT[42:P_TOP]) != (&P_OUT[42:P_TOP]); + end else begin + assign bin_pt_overflow = 1'b0; + end + + if (O_EXT > 0) begin + assign p_extract[WIDTH_O-1 -: O_EXT] = {O_EXT{P_OUT[42]}}; + end + + if (O_PAD > 0) begin + assign p_extract[O_PAD-1:0] = {O_PAD{1'b0}}; + end + endgenerate + + assign p_extract[WIDTH_O-1-O_EXT:O_PAD] = P_OUT[P_TOP:P_BOT]; + + //---------------------------------- + // Clip if underflowed or overflowed + //---------------------------------- + always @ (*) + begin + if (bin_pt_overflow || adder_overflow) begin + O <= {c_sign[LATENCY-1], {WIDTH_O-1{!c_sign[LATENCY-1]}}}; + end else begin + O <= p_extract; + end + end + + + DSP48E1 #( + .ACASCREG(AREG_IN), + .AREG(AREG_IN), + .ADREG(0), + .DREG(0), + .BCASCREG(AREG_IN), + .BREG(AREG_IN), + .MREG(MREG_IN), + .CREG(CREG_IN), + .PREG(PREG_IN) + ) DSP48_inst ( + // Outputs + .ACOUT(), + .BCOUT(), + .CARRYCASCOUT(), + .CARRYOUT(), + .MULTSIGNOUT(), + .OVERFLOW(), + .P(P_OUT), + .PATTERNBDETECT(), + .PATTERNDETECT(), + .PCOUT(), + .UNDERFLOW(), + + // Inputs + .A({5'b0,A_IN}), + .ACIN(30'b0), + .ALUMODE(4'b0000), + .B(B_IN), + .BCIN(18'b0), + .C(C_IN), + .CARRYCASCIN(1'b0), + .CARRYIN(1'b0), + .CARRYINSEL(3'b0), + .CEA1(CE), + .CEA2(CE), + .CEAD(1'b0), + .CEALUMODE(1'b1), + .CEB1(CE), + .CEB2(CE), + .CEC(CE), + .CECARRYIN(CE), + .CECTRL(CE), + .CED(1'b0), + .CEINMODE(CE), + .CEM(CE), + .CEP(CE), + .CLK(clk), + .D({25{1'b1}}), + .INMODE(5'b0), + .MULTSIGNIN(1'b0), + .OPMODE(OPMODE), + .PCIN(48'b0), + .RSTA(reset), + .RSTALLCARRYIN(reset), + .RSTALUMODE(reset), + .RSTB(reset), + .RSTC(reset), + .RSTD(reset), + .RSTCTRL(reset), + .RSTINMODE(reset), + .RSTM(reset), + .RSTP(reset) + ); + +endmodule // mult_add_clip +`default_nettype wire |