1 files changed, 244 insertions, 0 deletions
diff --git a/fpga/usrp3/lib/dsp/duc_chain.v b/fpga/usrp3/lib/dsp/duc_chain.v
new file mode 100644
index 000000000..44931318c
--- /dev/null
+++ b/fpga/usrp3/lib/dsp/duc_chain.v
@@ -0,0 +1,244 @@
+//
+// Copyright 2011-2013 Ettus Research LLC
+// Copyright 2018 Ettus Research, a National Instruments Company
+//
+// SPDX-License-Identifier: LGPL-3.0-or-later
+//
+
+
+//! The USRP digital up-conversion chain
+
+module duc_chain
+  #(
+    parameter BASE = 0,
+    parameter DSPNO = 0,
+    parameter WIDTH = 24,
+    parameter NEW_HB_INTERP = 0,
+    parameter DEVICE = "7SERIES"
+  )
+  (input clk, input rst, input clr,
+   input set_stb, input [7:0] set_addr, input [31:0] set_data,
+
+   // To TX frontend
+   output [WIDTH-1:0] tx_fe_i,
+   output [WIDTH-1:0] tx_fe_q,
+
+   // From TX control
+   input [31:0] sample,
+   input run,
+   output strobe,
+   output [31:0] debug
+   );
+
+   genvar 	 i;
+
+
+   wire [17:0] scale_factor;
+   wire [31:0] phase_inc;
+   reg [31:0]  phase;
+   wire [7:0]  interp_rate;
+   wire [3:0]  tx_femux_a, tx_femux_b;
+   wire        enable_hb1, enable_hb2;
+   wire        rate_change;
+
+   setting_reg #(.my_addr(BASE+0)) sr_0
+     (.clk(clk),.rst(rst),.strobe(set_stb),.addr(set_addr),
+      .in(set_data),.out(phase_inc),.changed());
+
+   setting_reg #(.my_addr(BASE+1), .width(18)) sr_1
+     (.clk(clk),.rst(rst),.strobe(set_stb),.addr(set_addr),
+      .in(set_data),.out(scale_factor),.changed());
+
+   setting_reg #(.my_addr(BASE+2), .width(10)) sr_2
+     (.clk(clk),.rst(rst),.strobe(set_stb),.addr(set_addr),
+      .in(set_data),.out({enable_hb1, enable_hb2, interp_rate}),.changed(rate_change));
+
+   // Strobes are all now delayed by 1 cycle for timing reasons
+   wire        strobe_cic_pre, strobe_hb1_pre, strobe_hb2_pre;
+   reg 	       strobe_cic = 1;
+   reg 	       strobe_hb1 = 1;
+   reg 	       strobe_hb2 = 1;
+
+  assign strobe = strobe_hb1;
+
+   cic_strober #(.WIDTH(8))
+     cic_strober(.clock(clk),.reset(rst),.enable(run & ~rate_change),.rate(interp_rate),
+		 .strobe_fast(1'b1),.strobe_slow(strobe_cic_pre) );
+   cic_strober #(.WIDTH(2))
+     hb2_strober(.clock(clk),.reset(rst),.enable(run & ~rate_change),.rate(enable_hb2 ? 2'd2 : 2'd1),
+		 .strobe_fast(strobe_cic_pre),.strobe_slow(strobe_hb2_pre) );
+   cic_strober #(.WIDTH(2))
+     hb1_strober(.clock(clk),.reset(rst),.enable(run & ~rate_change),.rate(enable_hb1 ? 2'd2 : 2'd1),
+		 .strobe_fast(strobe_hb2_pre),.strobe_slow(strobe_hb1_pre) );
+
+   always @(posedge clk) strobe_hb1 <= strobe_hb1_pre;
+   always @(posedge clk) strobe_hb2 <= strobe_hb2_pre;
+   always @(posedge clk) strobe_cic <= strobe_cic_pre;
+
+   // NCO
+   always @(posedge clk)
+     if(rst)
+       phase <= 0;
+     else if(~run)
+       phase <= 0;
+     else
+       phase <= phase + phase_inc;
+
+   wire        signed [17:0] da, db;
+   wire        signed [35:0] prod_i, prod_q;
+
+   wire [17:0] i_interp, q_interp;
+
+   wire [17:0] hb1_i, hb1_q, hb2_i, hb2_q;
+
+   wire [7:0]  cpo = enable_hb2 ? ({interp_rate,1'b0}) : interp_rate;
+   // Note that max CIC rate is 128, which would give an overflow on cpo if enable_hb2 is true,
+   //   but the default case inside hb_interp handles this
+   generate
+      if (NEW_HB_INTERP == 1) begin: new_hb
+	 // First stage of halfband interpolation filters. These run at a max CPO of 2 when CIC is bypassed and HB2 enabled.
+	 hb47_int
+	   #(.WIDTH(18),
+	     .DEVICE(DEVICE))
+	     hb1_i0
+	       (
+		.clk(clk),
+		.rst(rst),
+		.bypass(~enable_hb1),
+		.stb_in(strobe_hb1),
+		.data_in({sample[31:16],2'b00}),
+		.output_rate(cpo),
+		.stb_out(strobe_hb2),
+		.data_out(hb1_i)
+		);
+
+	 hb47_int
+	   #(.WIDTH(18),
+	     .DEVICE(DEVICE))
+	     hb1_q0
+	       (
+		.clk(clk),
+		.rst(rst),
+		.bypass(~enable_hb1),
+		.stb_in(strobe_hb1),
+		.data_in({sample[15:0],2'b00}),
+		.output_rate(cpo),
+		.stb_out(strobe_hb2),
+		.data_out(hb1_q)
+		);
+
+	 // Second stage of halfband interpolation filters. These run at a max CPO of 1 when CIC is bypassed.
+	 hb47_int
+	   #(.WIDTH(18),
+	     .DEVICE(DEVICE))
+	     hb2_i0
+	       (
+		.clk(clk),
+		.rst(rst),
+		.bypass(~enable_hb2),
+		.stb_in(strobe_hb2),
+		.data_in(hb1_i),
+		.output_rate(interp_rate),
+		.stb_out(strobe_cic),
+		.data_out(hb2_i)
+		);
+
+	 hb47_int
+	   #(.WIDTH(18),
+	     .DEVICE(DEVICE))
+	     hb2_q0
+	       (
+		.clk(clk),
+		.rst(rst),
+		.bypass(~enable_hb2),
+		.stb_in(strobe_hb2),
+		.data_in(hb1_q),
+		.output_rate(interp_rate),
+		.stb_out(strobe_cic),
+		.data_out(hb2_q)
+		);
+
+      end else begin: old_hb
+
+	 hb_interp #(.IWIDTH(18),.OWIDTH(18),.ACCWIDTH(WIDTH)) hb_interp_i
+	   (.clk(clk),.rst(rst),.bypass(~enable_hb1),.cpo(cpo),.stb_in(strobe_hb1),.data_in({sample[31:16], 2'b0}),.stb_out(strobe_hb2),.data_out(hb1_i));
+	 hb_interp #(.IWIDTH(18),.OWIDTH(18),.ACCWIDTH(WIDTH)) hb_interp_q
+	   (.clk(clk),.rst(rst),.bypass(~enable_hb1),.cpo(cpo),.stb_in(strobe_hb1),.data_in({sample[15:0], 2'b0}),.stb_out(strobe_hb2),.data_out(hb1_q));
+
+	 small_hb_int #(.WIDTH(18)) small_hb_interp_i
+	   (.clk(clk),.rst(rst),.bypass(~enable_hb2),.stb_in(strobe_hb2),.data_in(hb1_i),
+	    .output_rate(interp_rate),.stb_out(strobe_cic),.data_out(hb2_i));
+	 small_hb_int #(.WIDTH(18)) small_hb_interp_q
+	   (.clk(clk),.rst(rst),.bypass(~enable_hb2),.stb_in(strobe_hb2),.data_in(hb1_q),
+	    .output_rate(interp_rate),.stb_out(strobe_cic),.data_out(hb2_q));
+
+      end // block: old_hb
+   endgenerate
+
+   cic_interp  #(.bw(18),.N(4),.log2_of_max_rate(7))
+     cic_interp_i(.clock(clk),.reset(rst),.enable(run & ~rate_change),.rate(interp_rate),
+		  .strobe_in(strobe_cic),.strobe_out(1'd1),
+		  .signal_in(hb2_i),.signal_out(i_interp));
+
+   cic_interp  #(.bw(18),.N(4),.log2_of_max_rate(7))
+     cic_interp_q(.clock(clk),.reset(rst),.enable(run & ~rate_change),.rate(interp_rate),
+		  .strobe_in(strobe_cic),.strobe_out(1'd1),
+		  .signal_in(hb2_q),.signal_out(q_interp));
+
+   localparam  cwidth = WIDTH;  // was 18
+   localparam  zwidth = 24;  // was 16
+
+   wire [cwidth-1:0] da_c, db_c;
+   //
+   // Note. No head room has been added to the CORDIC to accomodate gain in excess of the input signals dynamic range.
+   // The CORDIC has algorithmic gain of 1.647, implementation gain of 0.5 and potential gain associated with rotation of 1.414.
+   // Thus the CORDIC will overflow when rotating and an input CW with (clipped) effective amplitude of 1.22 is applied.
+   //
+   cordic_z24 #(.bitwidth(cwidth))
+     cordic(.clock(clk), .reset(rst), .enable(run),
+	    .xi({i_interp,{(cwidth-18){1'b0}}}),.yi({q_interp,{(cwidth-18){1'b0}}}),
+	    .zi(phase[31:32-zwidth]),
+	    .xo(da_c),.yo(db_c),.zo() );
+
+   MULT_MACRO #(.DEVICE(DEVICE),  // Target Device: "VIRTEX5", "VIRTEX6", "SPARTAN6","7SERIES"
+		.LATENCY(1),         // Desired clock cycle latency, 0-4
+		.WIDTH_A(18),        // Multiplier A-input bus width, 1-25
+		.WIDTH_B(18))        // Multiplier B-input bus width, 1-18
+   mult_i (.P(prod_i),             // Multiplier output bus, width determined by WIDTH_P parameter
+	   .A(da_c[cwidth-1:cwidth-18]),// Multiplier input A bus, width determined by WIDTH_A parameter
+	   .B(scale_factor),       // Multiplier input B bus, width determined by WIDTH_B parameter
+	   .CE(1'b1),              // 1-bit active high input clock enable
+	   .CLK(clk),              // 1-bit positive edge clock input
+	   .RST(rst));             // 1-bit input active high reset
+
+   MULT_MACRO #(.DEVICE(DEVICE),  // Target Device: "VIRTEX5", "VIRTEX6", "SPARTAN6","7SERIES"
+		.LATENCY(1),         // Desired clock cycle latency, 0-4
+		.WIDTH_A(18),        // Multiplier A-input bus width, 1-25
+		.WIDTH_B(18))        // Multiplier B-input bus width, 1-18
+   mult_q (.P(prod_q),             // Multiplier output bus, width determined by WIDTH_P parameter
+	   .A(db_c[cwidth-1:cwidth-18]),// Multiplier input A bus, width determined by WIDTH_A parameter
+	   .B(scale_factor),       // Multiplier input B bus, width determined by WIDTH_B parameter
+	   .CE(1'b1),              // 1-bit active high input clock enable
+	   .CLK(clk),              // 1-bit positive edge clock input
+	   .RST(rst));             // 1-bit input active high reset
+
+
+   wire [32:0] 	     i_clip, q_clip;
+
+   // Cordic rotation coupled with a saturated input signal can cause overflow
+   // so we clip here rather than allow a wrap.
+   clip_reg #(.bits_in(36), .bits_out(33), .STROBED(1)) clip_i
+     (.clk(clk), .in(prod_i[35:0]), .strobe_in(1'b1), .out(i_clip), .strobe_out());
+   clip_reg #(.bits_in(36), .bits_out(33), .STROBED(1)) clip_q
+     (.clk(clk), .in(prod_q[35:0]), .strobe_in(1'b1), .out(q_clip), .strobe_out());
+
+   assign tx_fe_i = i_clip[32:33-WIDTH];
+   assign tx_fe_q = q_clip[32:33-WIDTH];
+
+
+   //
+   // Debug
+   //
+   assign 	     debug = {strobe_cic, strobe_hb1, strobe_hb2,run};
+
+endmodule // duc_chain