aboutsummaryrefslogtreecommitdiffstats
path: root/fpga/usrp3/lib/rfnoc/multiply.v
diff options
context:
space:
mode:
authorMartin Braun <martin.braun@ettus.com>2020-01-23 16:10:22 -0800
committerMartin Braun <martin.braun@ettus.com>2020-01-28 09:35:36 -0800
commitbafa9d95453387814ef25e6b6256ba8db2df612f (patch)
tree39ba24b5b67072d354775272e687796bb511848d /fpga/usrp3/lib/rfnoc/multiply.v
parent3075b981503002df3115d5f1d0b97d2619ba30f2 (diff)
downloaduhd-bafa9d95453387814ef25e6b6256ba8db2df612f.tar.gz
uhd-bafa9d95453387814ef25e6b6256ba8db2df612f.tar.bz2
uhd-bafa9d95453387814ef25e6b6256ba8db2df612f.zip
Merge FPGA repository back into UHD repository
The FPGA codebase was removed from the UHD repository in 2014 to reduce the size of the repository. However, over the last half-decade, the split between the repositories has proven more burdensome than it has been helpful. By merging the FPGA code back, it will be possible to create atomic commits that touch both FPGA and UHD codebases. Continuous integration testing is also simplified by merging the repositories, because it was previously difficult to automatically derive the correct UHD branch when testing a feature branch on the FPGA repository. This commit also updates the license files and paths therein. We are therefore merging the repositories again. Future development for FPGA code will happen in the same repository as the UHD host code and MPM code. == Original Codebase and Rebasing == The original FPGA repository will be hosted for the foreseeable future at its original local location: https://github.com/EttusResearch/fpga/ It can be used for bisecting, reference, and a more detailed history. The final commit from said repository to be merged here is 05003794e2da61cabf64dd278c45685a7abad7ec. This commit is tagged as v4.0.0.0-pre-uhd-merge. If you have changes in the FPGA repository that you want to rebase onto the UHD repository, simply run the following commands: - Create a directory to store patches (this should be an empty directory): mkdir ~/patches - Now make sure that your FPGA codebase is based on the same state as the code that was merged: cd src/fpga # Or wherever your FPGA code is stored git rebase v4.0.0.0-pre-uhd-merge Note: The rebase command may look slightly different depending on what exactly you're trying to rebase. - Create a patch set for your changes versus v4.0.0.0-pre-uhd-merge: git format-patch v4.0.0.0-pre-uhd-merge -o ~/patches Note: Make sure that only patches are stored in your output directory. It should otherwise be empty. Make sure that you picked the correct range of commits, and only commits you wanted to rebase were exported as patch files. - Go to the UHD repository and apply the patches: cd src/uhd # Or wherever your UHD repository is stored git am --directory fpga ~/patches/* rm -rf ~/patches # This is for cleanup == Contributors == The following people have contributed mainly to these files (this list is not complete): Co-authored-by: Alex Williams <alex.williams@ni.com> Co-authored-by: Andrej Rode <andrej.rode@ettus.com> Co-authored-by: Ashish Chaudhari <ashish@ettus.com> Co-authored-by: Ben Hilburn <ben.hilburn@ettus.com> Co-authored-by: Ciro Nishiguchi <ciro.nishiguchi@ni.com> Co-authored-by: Daniel Jepson <daniel.jepson@ni.com> Co-authored-by: Derek Kozel <derek.kozel@ettus.com> Co-authored-by: EJ Kreinar <ej@he360.com> Co-authored-by: Humberto Jimenez <humberto.jimenez@ni.com> Co-authored-by: Ian Buckley <ian.buckley@gmail.com> Co-authored-by: Jörg Hofrichter <joerg.hofrichter@ni.com> Co-authored-by: Jon Kiser <jon.kiser@ni.com> Co-authored-by: Josh Blum <josh@joshknows.com> Co-authored-by: Jonathon Pendlum <jonathan.pendlum@ettus.com> Co-authored-by: Martin Braun <martin.braun@ettus.com> Co-authored-by: Matt Ettus <matt@ettus.com> Co-authored-by: Michael West <michael.west@ettus.com> Co-authored-by: Moritz Fischer <moritz.fischer@ettus.com> Co-authored-by: Nick Foster <nick@ettus.com> Co-authored-by: Nicolas Cuervo <nicolas.cuervo@ettus.com> Co-authored-by: Paul Butler <paul.butler@ni.com> Co-authored-by: Paul David <paul.david@ettus.com> Co-authored-by: Ryan Marlow <ryan.marlow@ettus.com> Co-authored-by: Sugandha Gupta <sugandha.gupta@ettus.com> Co-authored-by: Sylvain Munaut <tnt@246tNt.com> Co-authored-by: Trung Tran <trung.tran@ettus.com> Co-authored-by: Vidush Vishwanath <vidush.vishwanath@ettus.com> Co-authored-by: Wade Fife <wade.fife@ettus.com>
Diffstat (limited to 'fpga/usrp3/lib/rfnoc/multiply.v')
-rw-r--r--fpga/usrp3/lib/rfnoc/multiply.v138
1 files changed, 138 insertions, 0 deletions
diff --git a/fpga/usrp3/lib/rfnoc/multiply.v b/fpga/usrp3/lib/rfnoc/multiply.v
new file mode 100644
index 000000000..ad0353c66
--- /dev/null
+++ b/fpga/usrp3/lib/rfnoc/multiply.v
@@ -0,0 +1,138 @@
+//
+// Copyright 2015 Ettus Research
+//
+// AXI Stream multiplier. Relies on synthesis engine for proper DSP inference.
+
+module multiply #(
+ parameter WIDTH_A = 16,
+ parameter WIDTH_B = 16,
+ parameter WIDTH_P = 32,
+ parameter DROP_TOP_P = 1, // Default drops extra bit (16-bit signed x 16-bit signed => 31-bits signed)
+ parameter LATENCY = 3, // multiplier pipeline latency, 0 - 4
+ parameter EN_SATURATE = 0, // Enable saturating output to avoid overflow (adds +1 to latency)
+ parameter EN_ROUND = 0, // Enable rounding dropped LSBs (adds +1 to latency, total of +2 if used with EN_SATURATE)
+ parameter SIGNED = 1) // Signed multiply
+(
+ input clk, input reset,
+ input [WIDTH_A-1:0] a_tdata, input a_tlast, input a_tvalid, output a_tready,
+ input [WIDTH_B-1:0] b_tdata, input b_tlast, input b_tvalid, output b_tready,
+ output [WIDTH_P-1:0] p_tdata, output p_tlast, output p_tvalid, input p_tready
+);
+
+ localparam A_LATENCY = (LATENCY == 1) ? 1 :
+ (LATENCY == 2) ? 1 :
+ (LATENCY == 3) ? 2 :
+ (LATENCY == 4) ? 2 : 2;
+ localparam B_LATENCY = A_LATENCY;
+ localparam P_LATENCY = (LATENCY == 2) ? 1 :
+ (LATENCY == 3) ? 1 :
+ (LATENCY == 4) ? 2 : 2;
+
+ reg [WIDTH_A-1:0] a_reg[A_LATENCY-1:0];
+ reg [WIDTH_B-1:0] b_reg[B_LATENCY-1:0];
+ reg [WIDTH_A+WIDTH_B-1:0] p_reg[P_LATENCY-1:0];
+
+ wire [A_LATENCY-1:0] en_a_reg;
+ wire [B_LATENCY-1:0] en_b_reg;
+ wire [P_LATENCY-1:0] en_p_reg;
+ wire p_int_tlast, p_int_tvalid, p_int_tready;
+ axi_pipe_join #(
+ .PRE_JOIN_STAGES0(A_LATENCY),
+ .PRE_JOIN_STAGES1(B_LATENCY),
+ .POST_JOIN_STAGES(P_LATENCY))
+ axi_pipe_join (
+ .clk(clk), .reset(reset), .clear(1'b0),
+ .i0_tlast(a_tlast), .i0_tvalid(a_tvalid), .i0_tready(a_tready),
+ .i1_tlast(b_tlast), .i1_tvalid(b_tvalid), .i1_tready(b_tready),
+ .o_tlast(p_int_tlast), .o_tvalid(p_int_tvalid), .o_tready(p_int_tready),
+ .enables0(en_a_reg), .enables1(en_b_reg), .enables_post(en_p_reg));
+
+ // Multiply
+ wire [WIDTH_A+WIDTH_B-1:0] p_mult_signed = (LATENCY == 0) ? $signed(a_tdata) * $signed(b_tdata) : $signed(a_reg[A_LATENCY-1]) * $signed(b_reg[B_LATENCY-1]);
+ wire [WIDTH_A+WIDTH_B-1:0] p_mult_unsigned = (LATENCY == 0) ? a_tdata * b_tdata : a_reg[A_LATENCY-1] * b_reg[B_LATENCY-1];
+ wire [WIDTH_A+WIDTH_B-1:0] p_int_tdata = (LATENCY == 0) ? (SIGNED ? p_mult_signed : p_mult_unsigned) : p_reg[P_LATENCY-1];
+
+ // Register pipeline
+ integer i;
+ always @(posedge clk) begin
+ if (reset) begin
+ for (i = 0; i < A_LATENCY; i = i + 1) begin
+ a_reg[i] <= 'd0;
+ end
+ for (i = 0; i < B_LATENCY; i = i + 1) begin
+ b_reg[i] <= 'd0;
+ end
+ for (i = 0; i < P_LATENCY; i = i + 1) begin
+ p_reg[i] <= 'd0;
+ end
+ end else begin
+ for (i = 0; i < A_LATENCY; i = i + 1) begin
+ if (en_a_reg[i]) begin
+ if (i == 0) begin
+ a_reg[i] <= $signed(a_tdata);
+ end else begin
+ a_reg[i] <= a_reg[i-1];
+ end
+ end
+ end
+ for (i = 0; i < B_LATENCY; i = i + 1) begin
+ if (en_b_reg[i]) begin
+ if (i == 0) begin
+ b_reg[i] <= $signed(b_tdata);
+ end else begin
+ b_reg[i] <= b_reg[i-1];
+ end
+ end
+ end
+ for (i = 0; i < P_LATENCY; i = i + 1) begin
+ if (en_p_reg[i]) begin
+ if (i == 0) begin
+ p_reg[i] <= SIGNED ? p_mult_signed : p_mult_unsigned;
+ end else begin
+ p_reg[i] <= p_reg[i-1];
+ end
+ end
+ end
+ end
+ end
+
+ // Saturate & Round
+ // TODO: Might be able to replace axi_round with DSP's built in rounding
+ generate
+ if ((EN_SATURATE == 1) && (EN_ROUND == 1)) begin
+ axi_round_and_clip #(
+ .WIDTH_IN(WIDTH_A+WIDTH_B),
+ .WIDTH_OUT(WIDTH_P),
+ .CLIP_BITS(DROP_TOP_P))
+ axi_round_and_clip (
+ .clk(clk), .reset(reset),
+ .i_tdata(p_int_tdata), .i_tlast(p_int_tlast), .i_tvalid(p_int_tvalid), .i_tready(p_int_tready),
+ .o_tdata(p_tdata), .o_tlast(p_tlast), .o_tvalid(p_tvalid), .o_tready(p_tready));
+ end else if ((EN_SATURATE == 0) && (EN_ROUND == 1)) begin
+ axi_round #(
+ .WIDTH_IN(WIDTH_A+WIDTH_B-DROP_TOP_P),
+ .WIDTH_OUT(WIDTH_P))
+ axi_round (
+ .clk(clk), .reset(reset),
+ .i_tdata(p_int_tdata[WIDTH_A+WIDTH_B-DROP_TOP_P-1:0]), .i_tlast(p_int_tlast), .i_tvalid(p_int_tvalid), .i_tready(p_int_tready),
+ .o_tdata(p_tdata), .o_tlast(p_tlast), .o_tvalid(p_tvalid), .o_tready(p_tready));
+ end else if ((EN_SATURATE == 1) && (EN_ROUND == 0)) begin
+ wire [WIDTH_A+WIDTH_B-DROP_TOP_P-1:0] p_clip_tdata;
+ axi_clip #(
+ .WIDTH_IN(WIDTH_A+WIDTH_B),
+ .WIDTH_OUT(WIDTH_A+WIDTH_B-DROP_TOP_P),
+ .CLIP_BITS(DROP_TOP_P))
+ axi_clip (
+ .clk(clk), .reset(reset),
+ .i_tdata(p_int_tdata), .i_tlast(p_int_tlast), .i_tvalid(p_int_tvalid), .i_tready(p_int_tready),
+ .o_tdata(p_clip_tdata), .o_tlast(p_tlast), .o_tvalid(p_tvalid), .o_tready(p_tready));
+ assign p_tdata = p_clip_tdata[WIDTH_A+WIDTH_B-DROP_TOP_P-1:WIDTH_A+WIDTH_B-DROP_TOP_P-WIDTH_P];
+ end else begin
+ assign p_tdata = p_int_tdata[WIDTH_A+WIDTH_B-DROP_TOP_P-1:WIDTH_A+WIDTH_B-DROP_TOP_P-WIDTH_P];
+ assign p_tlast = p_int_tlast;
+ assign p_tvalid = p_int_tvalid;
+ assign p_int_tready = p_tready;
+ end
+ endgenerate
+
+endmodule