Squashed B200 FPGA Source. Code from Josh Blum, Ian Buckley, and Matt Ettus.

author: Ben Hilburn <ben.hilburn@ettus.com> 2013-10-10 10:17:27 -0700
committer: Ben Hilburn <ben.hilburn@ettus.com> 2013-10-10 10:17:27 -0700
commit: 0df4b801a34697f2058b4a7b95e08d2a0576c9db (patch)
tree: be10e78d1a97c037a9e7492360a178d1873b9c09 /fpga/usrp3/lib/dsp
parent: 6e7bc850b66e8188718248b76b729c7cf9c89700 (diff)
download: uhd-0df4b801a34697f2058b4a7b95e08d2a0576c9db.tar.gz
uhd-0df4b801a34697f2058b4a7b95e08d2a0576c9db.tar.bz2
uhd-0df4b801a34697f2058b4a7b95e08d2a0576c9db.zip
32 files changed, 1963 insertions, 0 deletions
diff --git a/fpga/usrp3/lib/dsp/Makefile.srcs b/fpga/usrp3/lib/dsp/Makefile.srcs
new file mode 100644
index 000000000..3896ac5b5
--- /dev/null
+++ b/fpga/usrp3/lib/dsp/Makefile.srcs
@@ -0,0 +1,38 @@
+#
+# Copyright 2013 Ettus Research LLC
+#
+
+##################################################
+# DSP Sources
+##################################################
+DSP_SRCS = $(abspath $(addprefix $(BASE_DIR)/../lib/dsp/, \
+ddc_chain.v \
+duc_chain.v \
+sign_extend.v \
+cordic_z24.v \
+clip_reg.v \
+cordic_stage.v \
+clip.v \
+cic_strober.v \
+cic_decim.v \
+cic_interp.v \
+cic_dec_shifter.v \
+cic_int_shifter.v \
+small_hb_dec.v \
+small_hb_int.v \
+hb_dec.v \
+hb_interp.v \
+round_sd.v \
+add2_and_clip_reg.v \
+add2_and_clip.v \
+add2.v \
+add2_reg.v \
+add2_and_round_reg.v \
+add2_and_round.v \
+round_reg.v \
+round.v \
+srl.v \
+acc.v \
+rx_frontend.v \
+rx_dcoffset.v \
+))
diff --git a/fpga/usrp3/lib/dsp/README.txt b/fpga/usrp3/lib/dsp/README.txt
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/fpga/usrp3/lib/dsp/README.txt
diff --git a/fpga/usrp3/lib/dsp/acc.v b/fpga/usrp3/lib/dsp/acc.v
new file mode 100644
index 000000000..86b68512f
--- /dev/null
+++ b/fpga/usrp3/lib/dsp/acc.v
@@ -0,0 +1,33 @@
+//
+// Copyright 2011 Ettus Research LLC
+//
+
+
+
+module acc
+  #(parameter IWIDTH=16, OWIDTH=30)
+    (input clk,
+     input clear,
+     input acc,
+     input [IWIDTH-1:0] in,
+     output reg [OWIDTH-1:0] out);
+
+   wire [OWIDTH-1:0] in_signext;
+   sign_extend #(.bits_in(IWIDTH),.bits_out(OWIDTH)) 
+     acc_signext (.in(in),.out(in_signext));
+   
+   //  CLEAR & ~ACC  -->  clears the accumulator
+   //  CLEAR & ACC -->    loads the accumulator
+   //  ~CLEAR & ACC -->   accumulates
+   //  ~CLEAR & ~ACC -->  hold
+   
+   wire [OWIDTH-1:0] addend1 = clear ? 0 : out;
+   wire [OWIDTH-1:0] addend2 = ~acc ? 0 : in_signext;
+   wire [OWIDTH-1:0] sum_int = addend1 + addend2;
+
+   always @(posedge clk)
+     out <= sum_int;
+   
+endmodule // acc
+
+
diff --git a/fpga/usrp3/lib/dsp/add2.v b/fpga/usrp3/lib/dsp/add2.v
new file mode 100644
index 000000000..124f9d6ca
--- /dev/null
+++ b/fpga/usrp3/lib/dsp/add2.v
@@ -0,0 +1,16 @@
+//
+// Copyright 2011 Ettus Research LLC
+//
+
+
+
+module add2
+  #(parameter WIDTH=16)
+    (input [WIDTH-1:0] in1,
+     input [WIDTH-1:0] in2,
+     output [WIDTH-1:0] sum);
+
+   wire [WIDTH:0] 	sum_int = {in1[WIDTH-1],in1} + {in2[WIDTH-1],in2};
+   assign 		sum = sum_int[WIDTH:1];  // Note -- will have some bias
+   
+endmodule // add2
diff --git a/fpga/usrp3/lib/dsp/add2_and_clip.v b/fpga/usrp3/lib/dsp/add2_and_clip.v
new file mode 100644
index 000000000..663f5d004
--- /dev/null
+++ b/fpga/usrp3/lib/dsp/add2_and_clip.v
@@ -0,0 +1,12 @@
+
+module add2_and_clip
+  #(parameter WIDTH=16)
+    (input [WIDTH-1:0] in1,
+     input [WIDTH-1:0] in2,
+     output [WIDTH-1:0] sum);
+
+   wire [WIDTH:0] 	sum_int = {in1[WIDTH-1],in1} + {in2[WIDTH-1],in2};
+   clip #(.bits_in(WIDTH+1),.bits_out(WIDTH)) clip
+     (.in(sum_int),.out(sum));
+   
+endmodule // add2_and_clip
diff --git a/fpga/usrp3/lib/dsp/add2_and_clip_reg.v b/fpga/usrp3/lib/dsp/add2_and_clip_reg.v
new file mode 100644
index 000000000..8073b3b54
--- /dev/null
+++ b/fpga/usrp3/lib/dsp/add2_and_clip_reg.v
@@ -0,0 +1,25 @@
+
+module add2_and_clip_reg
+  #(parameter WIDTH=16)
+    (input clk,
+     input rst,
+     input [WIDTH-1:0] in1,
+     input [WIDTH-1:0] in2,
+     input strobe_in,
+     output reg [WIDTH-1:0] sum,
+     output reg strobe_out);
+
+   wire [WIDTH-1:0] sum_int;
+   
+   add2_and_clip #(.WIDTH(WIDTH)) add2_and_clip (.in1(in1),.in2(in2),.sum(sum_int));
+
+   always @(posedge clk)
+     if(rst)
+       sum <= 0;
+     else if(strobe_in)
+       sum <= sum_int;
+
+   always @(posedge clk)
+     strobe_out <= strobe_in;
+   
+endmodule // add2_and_clip_reg
diff --git a/fpga/usrp3/lib/dsp/add2_and_round.v b/fpga/usrp3/lib/dsp/add2_and_round.v
new file mode 100644
index 000000000..9d0914414
--- /dev/null
+++ b/fpga/usrp3/lib/dsp/add2_and_round.v
@@ -0,0 +1,16 @@
+//
+// Copyright 2011 Ettus Research LLC
+//
+
+
+
+module add2_and_round
+  #(parameter WIDTH=16)
+    (input [WIDTH-1:0] in1,
+     input [WIDTH-1:0] in2,
+     output [WIDTH-1:0] sum);
+
+   wire [WIDTH:0] 	sum_int = {in1[WIDTH-1],in1} + {in2[WIDTH-1],in2};
+   assign 		sum = sum_int[WIDTH:1] + (sum_int[WIDTH] & sum_int[0]);
+   
+endmodule // add2_and_round
diff --git a/fpga/usrp3/lib/dsp/add2_and_round_reg.v b/fpga/usrp3/lib/dsp/add2_and_round_reg.v
new file mode 100644
index 000000000..cb20a3c1b
--- /dev/null
+++ b/fpga/usrp3/lib/dsp/add2_and_round_reg.v
@@ -0,0 +1,21 @@
+//
+// Copyright 2011 Ettus Research LLC
+//
+
+
+
+module add2_and_round_reg
+  #(parameter WIDTH=16)
+    (input clk,
+     input [WIDTH-1:0] in1,
+     input [WIDTH-1:0] in2,
+     output reg [WIDTH-1:0] sum);
+
+   wire [WIDTH-1:0] sum_int;
+   
+   add2_and_round #(.WIDTH(WIDTH)) add2_n_rnd (.in1(in1),.in2(in2),.sum(sum_int));
+
+   always @(posedge clk)
+     sum <= sum_int;
+   
+endmodule // add2_and_round_reg
diff --git a/fpga/usrp3/lib/dsp/add2_reg.v b/fpga/usrp3/lib/dsp/add2_reg.v
new file mode 100644
index 000000000..3ac93ae2e
--- /dev/null
+++ b/fpga/usrp3/lib/dsp/add2_reg.v
@@ -0,0 +1,22 @@
+//
+// Copyright 2011 Ettus Research LLC
+//
+
+
+
+module add2_reg
+  #(parameter WIDTH=16)
+    (input clk,
+     input [WIDTH-1:0] in1,
+     input [WIDTH-1:0] in2,
+     output reg [WIDTH-1:0] sum);
+
+   wire [WIDTH-1:0] sum_int;
+   
+   add2 #(.WIDTH(WIDTH)) add2 (.in1(in1),.in2(in2),.sum(sum_int));
+
+   always @(posedge clk)
+     sum <= sum_int;
+   
+endmodule // add2_reg
+
diff --git a/fpga/usrp3/lib/dsp/cic_dec_shifter.v b/fpga/usrp3/lib/dsp/cic_dec_shifter.v
new file mode 100644
index 000000000..efc54c106
--- /dev/null
+++ b/fpga/usrp3/lib/dsp/cic_dec_shifter.v
@@ -0,0 +1,94 @@
+// -*- verilog -*-
+//
+//  USRP - Universal Software Radio Peripheral
+//
+//  Copyright (C) 2003 Matt Ettus
+//
+
+//
+
+
+// NOTE   This only works for N=4, max decim rate of 128
+// NOTE   signal "rate" is EQUAL TO the actual rate, no more -1 BS
+
+module cic_dec_shifter(rate,signal_in,signal_out);
+   parameter bw = 16;
+   parameter maxbitgain = 28;
+
+   input [7:0] rate;
+   input       wire [bw+maxbitgain-1:0] signal_in;
+   output      reg [bw-1:0] signal_out;
+
+   function [4:0] bitgain;
+      input [7:0] rate;
+      case(rate)
+	// Exact Cases -- N*log2(rate)
+	8'd1 : bitgain = 0;
+	8'd2 : bitgain = 4;
+	8'd4 : bitgain = 8;
+	8'd8 : bitgain = 12;
+	8'd16 : bitgain = 16;
+	8'd32 : bitgain = 20;
+	8'd64 : bitgain = 24;
+	8'd128 : bitgain = 28;
+	
+	// Nearest without overflow -- ceil(N*log2(rate))
+	8'd3 : bitgain = 7;
+	8'd5 : bitgain = 10;
+	8'd6 : bitgain = 11;
+	8'd7 : bitgain = 12;
+	8'd9 : bitgain = 13;
+	8'd10,8'd11 : bitgain = 14;
+	8'd12,8'd13 : bitgain = 15;
+	8'd14,8'd15 : bitgain = 16;
+	8'd17,8'd18,8'd19 : bitgain = 17;
+	8'd20,8'd21,8'd22 : bitgain = 18;
+	8'd23,8'd24,8'd25,8'd26 : bitgain = 19;
+	8'd27,8'd28,8'd29,8'd30,8'd31 : bitgain = 20;
+	8'd33,8'd34,8'd35,8'd36,8'd37,8'd38 : bitgain = 21;
+	8'd39,8'd40,8'd41,8'd42,8'd43,8'd44,8'd45 : bitgain = 22;
+	8'd46,8'd47,8'd48,8'd49,8'd50,8'd51,8'd52,8'd53 : bitgain = 23;
+	8'd54,8'd55,8'd56,8'd57,8'd58,8'd59,8'd60,8'd61,8'd62,8'd63 : bitgain = 24;
+	8'd65,8'd66,8'd67,8'd68,8'd69,8'd70,8'd71,8'd72,8'd73,8'd74,8'd75,8'd76 : bitgain = 25;
+	8'd77,8'd78,8'd79,8'd80,8'd81,8'd82,8'd83,8'd84,8'd85,8'd86,8'd87,8'd88,8'd89,8'd90 : bitgain = 26;
+	8'd91,8'd92,8'd93,8'd94,8'd95,8'd96,8'd97,8'd98,8'd99,8'd100,8'd101,8'd102,8'd103,8'd104,8'd105,8'd106,8'd107 : bitgain = 27;
+	default : bitgain = 28;
+      endcase // case(rate)
+   endfunction // bitgain
+   
+   wire [4:0] 	  shift = bitgain(rate);
+   
+   // We should be able to do this, but can't ....
+   // assign 	  signal_out = signal_in[shift+bw-1:shift];
+   
+   always @*
+     case(shift)
+       5'd0  : signal_out = signal_in[0+bw-1:0];
+       5'd4  : signal_out = signal_in[4+bw-1:4];
+       5'd7  : signal_out = signal_in[7+bw-1:7];
+       5'd8  : signal_out = signal_in[8+bw-1:8];
+       5'd10 : signal_out = signal_in[10+bw-1:10];
+       5'd11 : signal_out = signal_in[11+bw-1:11];
+       5'd12 : signal_out = signal_in[12+bw-1:12];
+       5'd13 : signal_out = signal_in[13+bw-1:13];
+       5'd14 : signal_out = signal_in[14+bw-1:14];
+       5'd15 : signal_out = signal_in[15+bw-1:15];
+       5'd16 : signal_out = signal_in[16+bw-1:16];
+       5'd17 : signal_out = signal_in[17+bw-1:17];
+       5'd18 : signal_out = signal_in[18+bw-1:18];
+       5'd19 : signal_out = signal_in[19+bw-1:19];
+       5'd20 : signal_out = signal_in[20+bw-1:20];
+       5'd21 : signal_out = signal_in[21+bw-1:21];
+       5'd22 : signal_out = signal_in[22+bw-1:22];
+       5'd23 : signal_out = signal_in[23+bw-1:23];
+       5'd24 : signal_out = signal_in[24+bw-1:24];
+       5'd25 : signal_out = signal_in[25+bw-1:25];
+       5'd26 : signal_out = signal_in[26+bw-1:26];
+       5'd27 : signal_out = signal_in[27+bw-1:27];
+       5'd28 : signal_out = signal_in[28+bw-1:28];
+       
+       default : signal_out = signal_in[28+bw-1:28];
+     endcase // case(shift)
+
+endmodule // cic_dec_shifter
+
diff --git a/fpga/usrp3/lib/dsp/cic_decim.v b/fpga/usrp3/lib/dsp/cic_decim.v
new file mode 100644
index 000000000..feb785de8
--- /dev/null
+++ b/fpga/usrp3/lib/dsp/cic_decim.v
@@ -0,0 +1,76 @@
+// -*- verilog -*-
+//
+//  USRP - Universal Software Radio Peripheral
+//
+//  Copyright (C) 2003 Matt Ettus
+//
+
+//
+
+
+module cic_decim
+  #(parameter bw = 16, parameter N = 4, parameter log2_of_max_rate = 7)
+    (input clock,
+     input reset,
+     input enable,
+     input [7:0] rate,
+     input strobe_in,
+     input strobe_out,
+     input [bw-1:0] signal_in,
+     output reg [bw-1:0] signal_out);
+
+   localparam 	     maxbitgain = N * log2_of_max_rate;
+   
+   wire [bw+maxbitgain-1:0] signal_in_ext;
+   reg [bw+maxbitgain-1:0]  integrator [0:N-1];
+   reg [bw+maxbitgain-1:0]  differentiator [0:N-1];
+   reg [bw+maxbitgain-1:0]  pipeline [0:N-1];
+   reg [bw+maxbitgain-1:0]  sampler;
+   
+   integer 		    i;
+   
+   sign_extend #(bw,bw+maxbitgain) 
+     ext_input (.in(signal_in),.out(signal_in_ext));
+   
+   always @(posedge clock)
+     if(~enable)
+       for(i=0;i<N;i=i+1)
+	 integrator[i] <= 0;
+     else if (strobe_in)
+       begin
+	  integrator[0] <= integrator[0] + signal_in_ext;
+	  for(i=1;i<N;i=i+1)
+	    integrator[i] <= integrator[i] + integrator[i-1];
+       end	
+   
+   always @(posedge clock)
+     if(~enable)
+       begin
+	  sampler <= 0;
+	  for(i=0;i<N;i=i+1)
+	    begin
+	       pipeline[i] <= 0;
+	       differentiator[i] <= 0;
+	    end
+       end
+     else if (strobe_out)
+       begin
+	  sampler <= integrator[N-1];
+	  differentiator[0] <= sampler;
+	  pipeline[0] <= sampler - differentiator[0];
+	  for(i=1;i<N;i=i+1)
+	    begin
+	       differentiator[i] <= pipeline[i-1];
+	       pipeline[i] <= pipeline[i-1] - differentiator[i];
+	    end
+       end // if (enable && strobe_out)
+   
+   wire [bw-1:0] signal_out_unreg;
+   
+   cic_dec_shifter #(bw)
+     cic_dec_shifter(rate,pipeline[N-1],signal_out_unreg);
+
+   always @(posedge clock)
+     signal_out <= signal_out_unreg;
+   
+endmodule // cic_decim
diff --git a/fpga/usrp3/lib/dsp/cic_int_shifter.v b/fpga/usrp3/lib/dsp/cic_int_shifter.v
new file mode 100644
index 000000000..ff5a30d6a
--- /dev/null
+++ b/fpga/usrp3/lib/dsp/cic_int_shifter.v
@@ -0,0 +1,88 @@
+// -*- verilog -*-
+//
+//  USRP - Universal Software Radio Peripheral
+//
+//  Copyright (C) 2003 Matt Ettus
+//
+
+//
+
+
+// NOTE   This only works for N=4, max interp rate of 128
+// NOTE   signal "rate" is EQUAL TO the actual rate (no more -1 BS)
+
+module cic_int_shifter(rate,signal_in,signal_out);
+   parameter bw = 16;
+   parameter maxbitgain = 21;
+   
+   input [7:0] rate;
+   input       wire [bw+maxbitgain-1:0] signal_in;
+   output      reg [bw-1:0] signal_out;
+
+   function [4:0] bitgain;
+      input [7:0] rate;
+      case(rate)
+	// Exact Cases
+	8'd1 : bitgain = 0;
+	8'd2 : bitgain = 3;
+	8'd4 : bitgain = 6;
+	8'd8 : bitgain = 9;
+	8'd16 : bitgain = 12;
+	8'd32 : bitgain = 15;
+	8'd64 : bitgain = 18;
+	8'd128 : bitgain = 21;
+	
+	// Nearest without overflow	
+	8'd3 : bitgain = 5;
+	8'd5 : bitgain = 7;
+	8'd6 : bitgain = 8;
+	8'd7 : bitgain = 9;
+	8'd9,8'd10 : bitgain = 10;
+	8'd11,8'd12 : bitgain = 11;
+	8'd13,8'd14,8'd15 : bitgain = 12;
+	8'd17,8'd18,8'd19,8'd20 : bitgain = 13;
+	8'd21,8'd22,8'd23,8'd24,8'd25 : bitgain = 14;
+	8'd26,8'd27,8'd28,8'd29,8'd30,8'd31 : bitgain = 15;
+	8'd33,8'd34,8'd35,8'd36,8'd37,8'd38,8'd39,8'd40 : bitgain = 16;
+	8'd41,8'd42,8'd43,8'd44,8'd45,8'd46,8'd47,8'd48,8'd49,8'd50 : bitgain = 17;
+	8'd51,8'd52,8'd53,8'd54,8'd55,8'd56,8'd57,8'd58,8'd59,8'd60,8'd61,8'd62,8'd63 : bitgain = 18;
+	8'd65,8'd66,8'd67,8'd68,8'd69,8'd70,8'd71,8'd72,8'd73,8'd74,8'd75,8'd76,8'd77,8'd78,8'd79,8'd80 : bitgain = 19;
+	8'd81,8'd82,8'd83,8'd84,8'd85,8'd86,8'd87,8'd88,8'd89,8'd90,8'd91,8'd92,8'd93,8'd94,8'd95,8'd96,8'd97,8'd98,8'd99,8'd100,8'd101 : bitgain = 20;
+	
+	default : bitgain = 21;
+      endcase // case(rate)
+   endfunction // bitgain
+   
+   wire [4:0] 	  shift = bitgain(rate);
+   
+   // We should be able to do this, but can't ....
+   // assign 	  signal_out = signal_in[shift+bw-1:shift];
+   
+   always @*
+     case(shift)
+       5'd0  : signal_out = signal_in[0+bw-1:0];
+       5'd3  : signal_out = signal_in[3+bw-1:3];
+       5'd6  : signal_out = signal_in[6+bw-1:6];
+       5'd9  : signal_out = signal_in[9+bw-1:9];
+       5'd12 : signal_out = signal_in[12+bw-1:12];
+       5'd15 : signal_out = signal_in[15+bw-1:15];
+       5'd18 : signal_out = signal_in[18+bw-1:18];
+       5'd21 : signal_out = signal_in[21+bw-1:21];
+       
+       5'd5  : signal_out = signal_in[5+bw-1:5];
+       5'd7  : signal_out = signal_in[7+bw-1:7];
+       5'd8  : signal_out = signal_in[8+bw-1:8];
+       5'd10 : signal_out = signal_in[10+bw-1:10];
+       5'd11 : signal_out = signal_in[11+bw-1:11];
+       5'd13 : signal_out = signal_in[13+bw-1:13];
+       5'd14 : signal_out = signal_in[14+bw-1:14];
+       5'd16 : signal_out = signal_in[16+bw-1:16];
+       5'd17 : signal_out = signal_in[17+bw-1:17];
+       5'd19 : signal_out = signal_in[19+bw-1:19];
+       5'd20 : signal_out = signal_in[20+bw-1:20];
+       
+       default : signal_out = signal_in[21+bw-1:21];
+     endcase // case(shift)
+
+endmodule // cic_int_shifter
+
diff --git a/fpga/usrp3/lib/dsp/cic_interp.v b/fpga/usrp3/lib/dsp/cic_interp.v
new file mode 100644
index 000000000..608c2d448
--- /dev/null
+++ b/fpga/usrp3/lib/dsp/cic_interp.v
@@ -0,0 +1,75 @@
+// -*- verilog -*-
+//
+//  USRP - Universal Software Radio Peripheral
+//
+//  Copyright (C) 2003 Matt Ettus
+//
+
+//
+
+
+module cic_interp
+  #(parameter bw = 16, parameter N = 4, parameter log2_of_max_rate = 7)
+    (input clock,
+     input reset,
+     input enable,
+     input [7:0] rate,
+     input strobe_in,
+     input strobe_out,
+     input [bw-1:0] signal_in,
+     output reg [bw-1:0] signal_out);
+   
+   integer 	     i;
+   localparam 	     maxbitgain = (N-1)*log2_of_max_rate;
+
+   wire [bw+maxbitgain-1:0] signal_in_ext;
+   reg [bw+maxbitgain-1:0]  integrator [0:N-1];
+   reg [bw+maxbitgain-1:0]  differentiator [0:N-1];
+   reg [bw+maxbitgain-1:0]  pipeline [0:N-1];
+   
+   sign_extend #(bw,bw+maxbitgain) 
+     ext_input (.in(signal_in),.out(signal_in_ext));
+   
+   //FIXME Note that this section has pipe and diff reversed
+   // It still works, but is confusing
+   always @(posedge clock)
+     if(reset | ~enable)
+       for(i=0;i<N;i=i+1)
+	 integrator[i] <= 0;
+     else if (enable & strobe_out)
+       begin
+	  if(strobe_in)
+	    integrator[0] <= integrator[0] + pipeline[N-1];
+	  for(i=1;i<N;i=i+1)
+	    integrator[i] <= integrator[i] + integrator[i-1];
+       end
+   
+   always @(posedge clock)
+     if(reset | ~enable)
+       begin
+	  for(i=0;i<N;i=i+1)
+	    begin
+	       differentiator[i] <= 0;
+	       pipeline[i] <= 0;
+ 	    end
+       end
+     else if (enable && strobe_in)
+       begin
+	  differentiator[0] <= signal_in_ext;
+	  pipeline[0] <= signal_in_ext - differentiator[0];
+	  for(i=1;i<N;i=i+1)
+	    begin
+	       differentiator[i] <= pipeline[i-1];
+	       pipeline[i] <= pipeline[i-1] - differentiator[i];
+	    end
+       end
+
+   wire [bw-1:0] signal_out_unreg;
+   cic_int_shifter #(bw)
+     cic_int_shifter(rate,integrator[N-1],signal_out_unreg);
+   
+   always @(posedge clock)
+     signal_out <= signal_out_unreg;
+   
+endmodule // cic_interp
+
diff --git a/fpga/usrp3/lib/dsp/cic_strober.v b/fpga/usrp3/lib/dsp/cic_strober.v
new file mode 100644
index 000000000..269b85f75
--- /dev/null
+++ b/fpga/usrp3/lib/dsp/cic_strober.v
@@ -0,0 +1,33 @@
+//
+//  USRP2 - Universal Software Radio Peripheral Mk II
+//
+//  Copyright (C) 2008 Matt Ettus
+//
+
+//
+
+module cic_strober
+  #(parameter WIDTH=8)
+    ( input clock,
+      input reset,
+      input enable,
+      input [WIDTH-1:0] rate, // Rate should EQUAL to your desired divide ratio, no more -1 BS
+      input strobe_fast,
+      output wire strobe_slow );
+   
+   reg [WIDTH-1:0] counter;
+   wire      now = (counter==1);
+   assign    strobe_slow = now && enable && strobe_fast;
+   
+   always @(posedge clock)
+     if(reset)
+       counter <= 0; 
+     else if (~enable)
+       counter <= rate;
+     else if(strobe_fast)
+       if(now)
+	 counter <= rate;
+       else 
+	 counter <= counter - 1;
+   
+endmodule // cic_strober
diff --git a/fpga/usrp3/lib/dsp/clip.v b/fpga/usrp3/lib/dsp/clip.v
new file mode 100644
index 000000000..294c5e8ba
--- /dev/null
+++ b/fpga/usrp3/lib/dsp/clip.v
@@ -0,0 +1,24 @@
+// -*- verilog -*-
+//
+//  USRP - Universal Software Radio Peripheral
+//
+//  Copyright (C) 2008 Matt Ettus
+//
+
+//
+
+// Clipping "macro", keeps the bottom bits
+
+module clip
+  #(parameter bits_in=0,
+    parameter bits_out=0)
+    (input [bits_in-1:0] in,
+     output [bits_out-1:0] out);
+   
+   wire 		   overflow = |in[bits_in-1:bits_out-1] & ~(&in[bits_in-1:bits_out-1]);   
+   assign 		   out = overflow ? 
+			   (in[bits_in-1] ? {1'b1,{(bits_out-1){1'b0}}} : {1'b0,{(bits_out-1){1'b1}}}) :
+			   in[bits_out-1:0];
+   
+endmodule // clip
+
diff --git a/fpga/usrp3/lib/dsp/clip_reg.v b/fpga/usrp3/lib/dsp/clip_reg.v
new file mode 100644
index 000000000..ab9a5b79d
--- /dev/null
+++ b/fpga/usrp3/lib/dsp/clip_reg.v
@@ -0,0 +1,34 @@
+// -*- verilog -*-
+//
+//  USRP - Universal Software Radio Peripheral
+//
+//  Copyright (C) 2008 Matt Ettus
+//
+
+//
+
+// Clipping "macro", keeps the bottom bits
+
+module clip_reg
+  #(parameter bits_in=0,
+    parameter bits_out=0,
+    parameter STROBED=1'b0)
+    (input clk,
+     input [bits_in-1:0] in,
+     output reg [bits_out-1:0] out,
+     input strobe_in,
+     output reg strobe_out);
+   
+   wire [bits_out-1:0] temp;
+
+   clip #(.bits_in(bits_in),.bits_out(bits_out)) clip (.in(in),.out(temp));
+
+   always @(posedge clk)
+     strobe_out <= strobe_in;
+   
+   always @(posedge clk)
+     if(strobe_in | ~STROBED)
+       out <= temp;
+   
+endmodule // clip_reg
+
diff --git a/fpga/usrp3/lib/dsp/cordic_stage.v b/fpga/usrp3/lib/dsp/cordic_stage.v
new file mode 100644
index 000000000..7019ead52
--- /dev/null
+++ b/fpga/usrp3/lib/dsp/cordic_stage.v
@@ -0,0 +1,48 @@
+// -*- verilog -*-
+//
+//  USRP - Universal Software Radio Peripheral
+//
+//  Copyright (C) 2003 Matt Ettus
+//
+
+//
+
+module cordic_stage( clock, reset, enable, xi,yi,zi,constant,xo,yo,zo);
+   parameter bitwidth = 16;
+   parameter zwidth = 16;
+   parameter shift = 1;
+   
+   input     clock;
+   input     reset;
+   input     enable;
+   input [bitwidth-1:0] xi,yi;
+   input [zwidth-1:0] zi;
+   input [zwidth-1:0] constant;
+   output [bitwidth-1:0] xo,yo;
+   output [zwidth-1:0] zo;
+   
+   wire z_is_pos = ~zi[zwidth-1];
+
+   reg [bitwidth-1:0] 	 xo,yo;
+   reg [zwidth-1:0] zo;
+   
+   always @(posedge clock)
+     if(reset)
+       begin
+	  xo <= 0;
+	  yo <= 0;
+	  zo <= 0;
+       end
+     else //if(enable)
+       begin
+	  xo <= z_is_pos ?   
+		xi - {{shift+1{yi[bitwidth-1]}},yi[bitwidth-2:shift]} :
+		xi + {{shift+1{yi[bitwidth-1]}},yi[bitwidth-2:shift]};
+	  yo <= z_is_pos ?   
+		yi + {{shift+1{xi[bitwidth-1]}},xi[bitwidth-2:shift]} :
+		yi - {{shift+1{xi[bitwidth-1]}},xi[bitwidth-2:shift]};
+	  zo <= z_is_pos ?   
+		zi - constant :
+		zi + constant;
+       end
+endmodule
diff --git a/fpga/usrp3/lib/dsp/cordic_z24.v b/fpga/usrp3/lib/dsp/cordic_z24.v
new file mode 100644
index 000000000..d9e983ccd
--- /dev/null
+++ b/fpga/usrp3/lib/dsp/cordic_z24.v
@@ -0,0 +1,112 @@
+// -*- verilog -*-
+//
+//  USRP - Universal Software Radio Peripheral
+//
+//  Copyright (C) 2003, 2007 Matt Ettus
+//
+
+//
+
+module cordic_z24(clock, reset, enable, xi, yi, zi, xo, yo, zo );
+   parameter bitwidth = 16;
+   parameter stages = 19;
+   localparam zwidth = 24;
+   
+   input clock;
+   input reset;
+   input enable;
+   input [bitwidth-1:0] xi, yi;
+   output [bitwidth-1:0] xo, yo;
+   input [zwidth-1:0] zi;
+   output [zwidth-1:0] zo;
+   
+   reg [bitwidth+1:0] 	 x0,y0;
+   reg [zwidth-2:0] 	 z0;
+   wire [bitwidth+1:0] 	 x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15,x16,x17,x18,x19,x20;
+   wire [bitwidth+1:0] 	 y1,y2,y3,y4,y5,y6,y7,y8,y9,y10,y11,y12,y13,y14,y15,y16,y17,y18,y19,y20;
+   wire [zwidth-2:0] 	 z1,z2,z3,z4,z5,z6,z7,z8,z9,z10,z11,z12,z13,z14,z15,z16,z17,z18,z19,z20;
+   
+   wire [bitwidth+1:0] xi_ext = {{2{xi[bitwidth-1]}},xi};
+   wire [bitwidth+1:0] yi_ext = {{2{yi[bitwidth-1]}},yi};
+
+   // Compute consts.  Would be easier if vlog had atan...
+   // see gen_cordic_consts.py
+
+   // constants for 24 bit wide phase
+   localparam 	       c00 = 23'd2097152;
+   localparam 	       c01 = 23'd1238021;
+   localparam 	       c02 = 23'd654136;
+   localparam 	       c03 = 23'd332050;
+   localparam 	       c04 = 23'd166669;
+   localparam 	       c05 = 23'd83416;
+   localparam 	       c06 = 23'd41718;
+   localparam 	       c07 = 23'd20860;
+   localparam 	       c08 = 23'd10430;
+   localparam 	       c09 = 23'd5215;
+   localparam 	       c10 = 23'd2608;
+   localparam 	       c11 = 23'd1304;
+   localparam 	       c12 = 23'd652;
+   localparam 	       c13 = 23'd326;
+   localparam 	       c14 = 23'd163;
+   localparam 	       c15 = 23'd81;
+   localparam 	       c16 = 23'd41;
+   localparam 	       c17 = 23'd20;
+   localparam 	       c18 = 23'd10;
+   localparam 	       c19 = 23'd5;
+   localparam 	       c20 = 23'd3;
+   localparam 	       c21 = 23'd1;
+   localparam 	       c22 = 23'd1;
+   localparam 	       c23 = 23'd0;
+
+   always @(posedge clock)
+     if(reset)
+       begin
+	  x0   <= 0; y0   <= 0;  z0   <= 0;
+       end
+     else// if(enable)
+       begin
+	  z0 <= zi[zwidth-2:0];
+	  case (zi[zwidth-1:zwidth-2])
+	    2'b00, 2'b11 : 
+	      begin
+		 x0 <= xi_ext;
+		 y0 <= yi_ext;
+	      end
+	    2'b01, 2'b10 :
+	      begin
+		 x0 <= -xi_ext;
+		 y0 <= -yi_ext;
+	      end
+	  endcase // case(zi[zwidth-1:zwidth-2])
+       end // else: !if(reset)
+   
+   // FIXME need to handle variable number of stages
+   // This would be easier if arrays worked better in vlog...
+   
+   cordic_stage #(bitwidth+2,zwidth-1,0) cordic_stage0 (clock,reset,enable,x0,y0,z0,c00,x1,y1,z1);
+   cordic_stage #(bitwidth+2,zwidth-1,1) cordic_stage1 (clock,reset,enable,x1,y1,z1,c01,x2,y2,z2);
+   cordic_stage #(bitwidth+2,zwidth-1,2) cordic_stage2 (clock,reset,enable,x2,y2,z2,c02,x3,y3,z3);
+   cordic_stage #(bitwidth+2,zwidth-1,3) cordic_stage3 (clock,reset,enable,x3,y3,z3,c03,x4,y4,z4);
+   cordic_stage #(bitwidth+2,zwidth-1,4) cordic_stage4 (clock,reset,enable,x4,y4,z4,c04,x5,y5,z5);
+   cordic_stage #(bitwidth+2,zwidth-1,5) cordic_stage5 (clock,reset,enable,x5,y5,z5,c05,x6,y6,z6);
+   cordic_stage #(bitwidth+2,zwidth-1,6) cordic_stage6 (clock,reset,enable,x6,y6,z6,c06,x7,y7,z7);
+   cordic_stage #(bitwidth+2,zwidth-1,7) cordic_stage7 (clock,reset,enable,x7,y7,z7,c07,x8,y8,z8);
+   cordic_stage #(bitwidth+2,zwidth-1,8) cordic_stage8 (clock,reset,enable,x8,y8,z8,c08,x9,y9,z9);
+   cordic_stage #(bitwidth+2,zwidth-1,9) cordic_stage9 (clock,reset,enable,x9,y9,z9,c09,x10,y10,z10);
+   cordic_stage #(bitwidth+2,zwidth-1,10) cordic_stage10 (clock,reset,enable,x10,y10,z10,c10,x11,y11,z11);
+   cordic_stage #(bitwidth+2,zwidth-1,11) cordic_stage11 (clock,reset,enable,x11,y11,z11,c11,x12,y12,z12);
+   cordic_stage #(bitwidth+2,zwidth-1,12) cordic_stage12 (clock,reset,enable,x12,y12,z12,c12,x13,y13,z13);
+   cordic_stage #(bitwidth+2,zwidth-1,13) cordic_stage13 (clock,reset,enable,x13,y13,z13,c13,x14,y14,z14);
+   cordic_stage #(bitwidth+2,zwidth-1,14) cordic_stage14 (clock,reset,enable,x14,y14,z14,c14,x15,y15,z15);
+   cordic_stage #(bitwidth+2,zwidth-1,15) cordic_stage15 (clock,reset,enable,x15,y15,z15,c15,x16,y16,z16);
+   cordic_stage #(bitwidth+2,zwidth-1,16) cordic_stage16 (clock,reset,enable,x16,y16,z16,c16,x17,y17,z17);
+   cordic_stage #(bitwidth+2,zwidth-1,17) cordic_stage17 (clock,reset,enable,x17,y17,z17,c17,x18,y18,z18);
+   cordic_stage #(bitwidth+2,zwidth-1,18) cordic_stage18 (clock,reset,enable,x18,y18,z18,c18,x19,y19,z19);
+   cordic_stage #(bitwidth+2,zwidth-1,19) cordic_stage19 (clock,reset,enable,x19,y19,z19,c19,x20,y20,z20);
+
+   assign xo = x20[bitwidth:1];  
+   assign yo = y20[bitwidth:1];
+   assign zo = z20;		  
+
+endmodule // cordic
+
diff --git a/fpga/usrp3/lib/dsp/ddc_chain.v b/fpga/usrp3/lib/dsp/ddc_chain.v
new file mode 100644
index 000000000..365175bf1
--- /dev/null
+++ b/fpga/usrp3/lib/dsp/ddc_chain.v
@@ -0,0 +1,164 @@
+//
+// Copyright 2011-2013 Ettus Research LLC
+//
+
+
+//! The USRP digital down-conversion chain
+
+module ddc_chain
+  #(
+    parameter BASE = 0,
+    parameter DSPNO = 0,
+    parameter WIDTH = 24
+  )
+  (input clk, input rst, input clr,
+   input set_stb, input [7:0] set_addr, input [31:0] set_data,
+
+   // From RX frontend
+   input [WIDTH-1:0] rx_fe_i,
+   input [WIDTH-1:0] rx_fe_q,
+
+   // To RX control
+   output [31:0] sample,
+   input run,
+   output strobe,
+   output [31:0] debug
+   );
+
+   localparam  cwidth = 25;
+   localparam  zwidth = 24;
+
+   wire [31:0] phase_inc;
+   reg [31:0]  phase;
+
+   wire [17:0] scale_factor;
+   wire [cwidth-1:0] i_cordic, q_cordic;
+   wire [WIDTH-1:0] i_cordic_clip, q_cordic_clip;
+   wire [WIDTH-1:0] i_cic, q_cic;
+   wire [WIDTH-1:0] i_hb1, q_hb1;
+   wire [WIDTH-1:0] i_hb2, q_hb2;
+   
+   wire        strobe_cic, strobe_hb1, strobe_hb2;
+   wire        enable_hb1, enable_hb2;
+   wire [7:0]  cic_decim_rate;
+
+   reg [WIDTH-1:0]  rx_fe_i_mux, rx_fe_q_mux;
+   wire        realmode;
+   wire        swap_iq;
+   
+   setting_reg #(.my_addr(BASE+0)) sr_0
+     (.clk(clk),.rst(rst),.strobe(set_stb),.addr(set_addr),
+      .in(set_data),.out(phase_inc),.changed());
+
+   setting_reg #(.my_addr(BASE+1), .width(18)) sr_1
+     (.clk(clk),.rst(rst),.strobe(set_stb),.addr(set_addr),
+      .in(set_data),.out(scale_factor),.changed());
+
+   setting_reg #(.my_addr(BASE+2), .width(10)) sr_2
+     (.clk(clk),.rst(rst),.strobe(set_stb),.addr(set_addr),
+      .in(set_data),.out({enable_hb1, enable_hb2, cic_decim_rate}),.changed());
+
+   setting_reg #(.my_addr(BASE+3), .width(2)) sr_3
+     (.clk(clk),.rst(rst),.strobe(set_stb),.addr(set_addr),
+      .in(set_data),.out({realmode,swap_iq}),.changed());
+
+   // MUX so we can do realmode signals on either input
+   
+   always @(posedge clk)
+     if(swap_iq)
+       begin
+	  rx_fe_i_mux <= rx_fe_q;
+	  rx_fe_q_mux <= realmode ? 0 : rx_fe_i;
+       end
+     else
+       begin
+	  rx_fe_i_mux <= rx_fe_i;
+	  rx_fe_q_mux <= realmode ? 0 : rx_fe_q;
+       end
+
+   // NCO
+   always @(posedge clk)
+     if(rst)
+       phase <= 0;
+     else if(~run)
+       phase <= 0;
+     else
+       phase <= phase + phase_inc;
+
+   //sign extension of cordic input
+   wire [cwidth-1:0] to_cordic_i, to_cordic_q;
+   sign_extend #(.bits_in(WIDTH), .bits_out(cwidth)) sign_extend_cordic_i (.in(rx_fe_i_mux), .out(to_cordic_i));
+   sign_extend #(.bits_in(WIDTH), .bits_out(cwidth)) sign_extend_cordic_q (.in(rx_fe_q_mux), .out(to_cordic_q));
+
+   // CORDIC  24-bit I/O
+   cordic_z24 #(.bitwidth(cwidth))
+     cordic(.clock(clk), .reset(rst), .enable(run),
+	    .xi(to_cordic_i),. yi(to_cordic_q), .zi(phase[31:32-zwidth]),
+	    .xo(i_cordic),.yo(q_cordic),.zo() );
+
+   clip_reg #(.bits_in(cwidth), .bits_out(WIDTH)) clip_i
+     (.clk(clk), .in(i_cordic), .strobe_in(1'b1), .out(i_cordic_clip));
+   clip_reg #(.bits_in(cwidth), .bits_out(WIDTH)) clip_q
+     (.clk(clk), .in(q_cordic), .strobe_in(1'b1), .out(q_cordic_clip));
+
+   // CIC decimator  24 bit I/O
+   cic_strober cic_strober(.clock(clk),.reset(rst),.enable(run),.rate(cic_decim_rate),
+			   .strobe_fast(1),.strobe_slow(strobe_cic) );
+
+   cic_decim #(.bw(WIDTH))
+     decim_i (.clock(clk),.reset(rst),.enable(run),
+	      .rate(cic_decim_rate),.strobe_in(1'b1),.strobe_out(strobe_cic),
+	      .signal_in(i_cordic_clip),.signal_out(i_cic));
+   
+   cic_decim #(.bw(WIDTH))
+     decim_q (.clock(clk),.reset(rst),.enable(run),
+	      .rate(cic_decim_rate),.strobe_in(1'b1),.strobe_out(strobe_cic),
+	      .signal_in(q_cordic_clip),.signal_out(q_cic));
+
+   // First (small) halfband  24 bit I/O
+   small_hb_dec #(.WIDTH(WIDTH)) small_hb_i
+     (.clk(clk),.rst(rst),.bypass(~enable_hb1),.run(run),
+      .stb_in(strobe_cic),.data_in(i_cic),.stb_out(strobe_hb1),.data_out(i_hb1));
+   
+   small_hb_dec #(.WIDTH(WIDTH)) small_hb_q
+     (.clk(clk),.rst(rst),.bypass(~enable_hb1),.run(run),
+      .stb_in(strobe_cic),.data_in(q_cic),.stb_out(),.data_out(q_hb1));
+
+   // Second (large) halfband  24 bit I/O
+   wire [8:0]  cpi_hb = enable_hb1 ? {cic_decim_rate,1'b0} : {1'b0,cic_decim_rate};
+   hb_dec #(.WIDTH(WIDTH)) hb_i
+     (.clk(clk),.rst(rst),.bypass(~enable_hb2),.run(run),.cpi(cpi_hb),
+      .stb_in(strobe_hb1),.data_in(i_hb1),.stb_out(strobe_hb2),.data_out(i_hb2));
+
+   hb_dec #(.WIDTH(WIDTH)) hb_q
+     (.clk(clk),.rst(rst),.bypass(~enable_hb2),.run(run),.cpi(cpi_hb),
+      .stb_in(strobe_hb1),.data_in(q_hb1),.stb_out(),.data_out(q_hb2));
+
+   //scalar operation (gain of 6 bits)
+   wire [35:0] prod_i, prod_q;
+
+   MULT18X18S mult_i
+     (.P(prod_i), .A(i_hb2[WIDTH-1:WIDTH-18]), .B(scale_factor), .C(clk), .CE(strobe_hb2), .R(rst) );
+   MULT18X18S mult_q
+     (.P(prod_q), .A(q_hb2[WIDTH-1:WIDTH-18]), .B(scale_factor), .C(clk), .CE(strobe_hb2), .R(rst) );
+
+   //pipeline for the multiplier (gain of 10 bits)
+   reg [WIDTH-1:0] prod_reg_i, prod_reg_q;
+   reg strobe_mult;
+
+   always @(posedge clk) begin
+       strobe_mult <= strobe_hb2;
+       prod_reg_i <= prod_i[33:34-WIDTH];
+       prod_reg_q <= prod_q[33:34-WIDTH];
+   end
+
+   // Round final answer to 16 bits
+   round_sd #(.WIDTH_IN(WIDTH),.WIDTH_OUT(16)) round_i
+     (.clk(clk),.reset(rst), .in(prod_reg_i),.strobe_in(strobe_mult), .out(sample[31:16]), .strobe_out(strobe));
+
+   round_sd #(.WIDTH_IN(WIDTH),.WIDTH_OUT(16)) round_q
+     (.clk(clk),.reset(rst), .in(prod_reg_q),.strobe_in(strobe_mult), .out(sample[15:0]), .strobe_out());
+
+   assign debug = {enable_hb1, enable_hb2, run, strobe, strobe_cic, strobe_hb1, strobe_hb2};
+   
+endmodule // ddc_chain
diff --git a/fpga/usrp3/lib/dsp/duc_chain.v b/fpga/usrp3/lib/dsp/duc_chain.v
new file mode 100644
index 000000000..06c87bd34
--- /dev/null
+++ b/fpga/usrp3/lib/dsp/duc_chain.v
@@ -0,0 +1,146 @@
+//
+// Copyright 2011-2013 Ettus Research LLC
+//
+
+
+//! The USRP digital up-conversion chain
+
+module duc_chain
+  #(
+    parameter BASE = 0,
+    parameter DSPNO = 0,
+    parameter WIDTH = 24
+  )
+  (input clk, input rst, input clr,
+   input set_stb, input [7:0] set_addr, input [31:0] set_data,
+
+   // To TX frontend
+   output [WIDTH-1:0] tx_fe_i,
+   output [WIDTH-1:0] tx_fe_q,
+
+   // From TX control
+   input [31:0] sample,
+   input run,
+   output strobe,
+   output [31:0] debug
+   );
+
+   wire [17:0] scale_factor;
+   wire [31:0] phase_inc;
+   reg [31:0]  phase;
+   wire [7:0]  interp_rate;
+   wire [3:0]  tx_femux_a, tx_femux_b;
+   wire        enable_hb1, enable_hb2;
+   wire        rate_change;
+   
+   setting_reg #(.my_addr(BASE+0)) sr_0
+     (.clk(clk),.rst(rst),.strobe(set_stb),.addr(set_addr),
+      .in(set_data),.out(phase_inc),.changed());
+
+   setting_reg #(.my_addr(BASE+1), .width(18)) sr_1
+     (.clk(clk),.rst(rst),.strobe(set_stb),.addr(set_addr),
+      .in(set_data),.out(scale_factor),.changed());
+   
+   setting_reg #(.my_addr(BASE+2), .width(10)) sr_2
+     (.clk(clk),.rst(rst),.strobe(set_stb),.addr(set_addr),
+      .in(set_data),.out({enable_hb1, enable_hb2, interp_rate}),.changed(rate_change));
+
+   // Strobes are all now delayed by 1 cycle for timing reasons
+   wire        strobe_cic_pre, strobe_hb1_pre, strobe_hb2_pre;
+   reg 	       strobe_cic = 1;
+   reg 	       strobe_hb1 = 1;
+   reg 	       strobe_hb2 = 1;
+
+  assign strobe = strobe_hb1;
+   
+   cic_strober #(.WIDTH(8))
+     cic_strober(.clock(clk),.reset(rst),.enable(run & ~rate_change),.rate(interp_rate),
+		 .strobe_fast(1),.strobe_slow(strobe_cic_pre) );
+   cic_strober #(.WIDTH(2))
+     hb2_strober(.clock(clk),.reset(rst),.enable(run & ~rate_change),.rate(enable_hb2 ? 2 : 1),
+		 .strobe_fast(strobe_cic_pre),.strobe_slow(strobe_hb2_pre) );
+   cic_strober #(.WIDTH(2))
+     hb1_strober(.clock(clk),.reset(rst),.enable(run & ~rate_change),.rate(enable_hb1 ? 2 : 1),
+		 .strobe_fast(strobe_hb2_pre),.strobe_slow(strobe_hb1_pre) );
+   
+   always @(posedge clk) strobe_hb1 <= strobe_hb1_pre;
+   always @(posedge clk) strobe_hb2 <= strobe_hb2_pre;
+   always @(posedge clk) strobe_cic <= strobe_cic_pre;
+
+   // NCO
+   always @(posedge clk)
+     if(rst)
+       phase <= 0;
+     else if(~run)
+       phase <= 0;
+     else
+       phase <= phase + phase_inc;
+   
+   wire        signed [17:0] da, db;
+   wire        signed [35:0] prod_i, prod_q;
+
+   assign tx_fe_i = prod_i[33:34-WIDTH];
+   assign tx_fe_q = prod_q[33:34-WIDTH];
+
+   wire [17:0] i_interp, q_interp;
+
+   wire [17:0] hb1_i, hb1_q, hb2_i, hb2_q;
+
+   wire [7:0]  cpo = enable_hb2 ? ({interp_rate,1'b0}) : interp_rate;
+   // Note that max CIC rate is 128, which would give an overflow on cpo if enable_hb2 is true,
+   //   but the default case inside hb_interp handles this
+   
+   hb_interp #(.IWIDTH(18),.OWIDTH(18),.ACCWIDTH(WIDTH)) hb_interp_i
+     (.clk(clk),.rst(rst),.bypass(~enable_hb1),.cpo(cpo),.stb_in(strobe_hb1),.data_in({sample[31:16], 2'b0}),.stb_out(strobe_hb2),.data_out(hb1_i));
+   hb_interp #(.IWIDTH(18),.OWIDTH(18),.ACCWIDTH(WIDTH)) hb_interp_q
+     (.clk(clk),.rst(rst),.bypass(~enable_hb1),.cpo(cpo),.stb_in(strobe_hb1),.data_in({sample[15:0], 2'b0}),.stb_out(strobe_hb2),.data_out(hb1_q));
+   
+   small_hb_int #(.WIDTH(18)) small_hb_interp_i
+     (.clk(clk),.rst(rst),.bypass(~enable_hb2),.stb_in(strobe_hb2),.data_in(hb1_i),
+      .output_rate(interp_rate),.stb_out(strobe_cic),.data_out(hb2_i));
+   small_hb_int #(.WIDTH(18)) small_hb_interp_q
+     (.clk(clk),.rst(rst),.bypass(~enable_hb2),.stb_in(strobe_hb2),.data_in(hb1_q),
+      .output_rate(interp_rate),.stb_out(strobe_cic),.data_out(hb2_q));
+   
+   cic_interp  #(.bw(18),.N(4),.log2_of_max_rate(7))
+     cic_interp_i(.clock(clk),.reset(rst),.enable(run & ~rate_change),.rate(interp_rate),
+		  .strobe_in(strobe_cic),.strobe_out(1),
+		  .signal_in(hb2_i),.signal_out(i_interp));
+   
+   cic_interp  #(.bw(18),.N(4),.log2_of_max_rate(7))
+     cic_interp_q(.clock(clk),.reset(rst),.enable(run & ~rate_change),.rate(interp_rate),
+		  .strobe_in(strobe_cic),.strobe_out(1),
+		  .signal_in(hb2_q),.signal_out(q_interp));
+
+   localparam  cwidth = WIDTH;  // was 18
+   localparam  zwidth = 24;  // was 16
+
+   wire [cwidth-1:0] da_c, db_c;
+   
+   cordic_z24 #(.bitwidth(cwidth))
+     cordic(.clock(clk), .reset(rst), .enable(run),
+	    .xi({i_interp,{(cwidth-18){1'b0}}}),.yi({q_interp,{(cwidth-18){1'b0}}}),
+	    .zi(phase[31:32-zwidth]),
+	    .xo(da_c),.yo(db_c),.zo() );
+   
+   MULT18X18S MULT18X18S_inst 
+     (.P(prod_i),    // 36-bit multiplier output
+      .A(da_c[cwidth-1:cwidth-18]),    // 18-bit multiplier input
+      .B(scale_factor),    // 18-bit multiplier input
+      .C(clk),    // Clock input
+      .CE(1),  // Clock enable input
+      .R(rst)     // Synchronous reset input
+      );
+   
+   MULT18X18S MULT18X18S_inst_2 
+     (.P(prod_q),    // 36-bit multiplier output
+      .A(db_c[cwidth-1:cwidth-18]),    // 18-bit multiplier input
+      .B(scale_factor),    // 18-bit multiplier input
+      .C(clk),    // Clock input
+      .CE(1),  // Clock enable input
+      .R(rst)     // Synchronous reset input
+      );
+
+   assign      debug = {strobe_cic, strobe_hb1, strobe_hb2,run};
+
+endmodule // duc_chain
diff --git a/fpga/usrp3/lib/dsp/hb_dec.v b/fpga/usrp3/lib/dsp/hb_dec.v
new file mode 100644
index 000000000..65c771ca7
--- /dev/null
+++ b/fpga/usrp3/lib/dsp/hb_dec.v
@@ -0,0 +1,177 @@
+//
+// Copyright 2011 Ettus Research LLC
+//
+
+
+// Final halfband decimator 
+// Implements impulse responses of the form [A 0 B 0 C .. 0 H 0.5 H 0 .. C 0 B 0 A]
+// Strobe in cannot come faster than every 2nd clock cycle
+// These taps designed by halfgen4 from ldoolittle
+// myfilt = round(2^18 * halfgen4(.7/4,8))
+
+module hb_dec
+  #(parameter WIDTH=24)
+    (input clk,
+     input rst,
+     input bypass,
+     input run,
+     input [8:0] cpi,  // Clocks per input -- equal to the decimation ratio ahead of this block
+     input stb_in,
+     input [WIDTH-1:0] data_in,
+     output reg stb_out,
+     output reg [WIDTH-1:0] data_out);
+
+   localparam INTWIDTH = 17;
+   localparam ACCWIDTH = WIDTH + 3;
+   
+   // Round off inputs to 17 bits because of 18 bit multipliers
+   wire [INTWIDTH-1:0] 	     data_rnd;
+   wire 		     stb_rnd;
+   
+   round_sd #(.WIDTH_IN(WIDTH),.WIDTH_OUT(INTWIDTH)) round_in
+     (.clk(clk),.reset(rst),.in(data_in),.strobe_in(stb_in),.out(data_rnd),.strobe_out(stb_rnd));
+   
+   // Control
+   reg [3:0] 		addr_odd_a, addr_odd_b, addr_odd_c, addr_odd_d;
+   wire 		write_odd, write_even, do_mult;
+   reg 			odd;
+   reg [2:0] 		phase, phase_d1;
+   reg 			stb_out_int;
+   wire 		clear, do_acc;
+   assign 		do_mult = 1;
+   
+   always @(posedge clk)
+     if(rst | ~run)
+       odd <= 0;
+     else if(stb_rnd)
+       odd <= ~odd;
+
+   assign 		write_odd = stb_rnd & odd;
+   assign 		write_even = stb_rnd & ~odd;
+
+   always @(posedge clk)
+     if(rst | ~run)
+       phase <= 0;
+     else if(stb_rnd & odd)
+       phase <= 1;
+     else if(phase == 4)
+       phase <= 0;
+     else if(phase != 0)
+       phase <= phase + 1;
+
+   always @(posedge clk)
+     phase_d1 <= phase;
+   
+   reg [15:0] 		stb_out_pre;
+   always @(posedge clk)
+     if(rst)
+       stb_out_pre <= 0;
+     else
+       stb_out_pre <= {stb_out_pre[14:0],(stb_rnd & odd)};
+
+   always @*
+     case(phase)
+       1 : begin addr_odd_a = 0; addr_odd_b = 15; end
+       2 : begin addr_odd_a = 1; addr_odd_b = 14; end
+       3 : begin addr_odd_a = 2; addr_odd_b = 13; end
+       4 : begin addr_odd_a = 3; addr_odd_b = 12; end
+       default : begin addr_odd_a = 0; addr_odd_b = 15; end
+     endcase // case(phase)
+
+   always @*
+     case(phase)
+       1 : begin addr_odd_c = 4; addr_odd_d = 11; end
+       2 : begin addr_odd_c = 5; addr_odd_d = 10; end
+       3 : begin addr_odd_c = 6; addr_odd_d = 9; end
+       4 : begin addr_odd_c = 7; addr_odd_d = 8; end
+       default : begin addr_odd_c = 4; addr_odd_d = 11; end
+     endcase // case(phase)
+
+   assign do_acc = |stb_out_pre[6:3];
+   assign clear = stb_out_pre[3];
+   
+   // Data
+   wire [INTWIDTH-1:0] data_odd_a, data_odd_b, data_odd_c, data_odd_d;
+   reg [INTWIDTH:0]    sum1, sum2;    // these are 18-bit inputs to mult
+   reg [WIDTH:0]       final_sum;
+   wire [WIDTH-1:0]    final_sum_clip;
+   reg [17:0] 	       coeff1, coeff2;
+   wire [35:0] 	       prod1, prod2;
+
+   always @*            // Outer coeffs
+     case(phase_d1)
+       1 : coeff1 = -107;
+       2 : coeff1 = 445;
+       3 : coeff1 = -1271;
+       4 : coeff1 = 2959;
+       default : coeff1 = -107;
+     endcase // case(phase)
+   
+   always @*            //  Inner coeffs
+     case(phase_d1)
+       1 : coeff2 = -6107;
+       2 : coeff2 = 11953;
+       3 : coeff2 = -24706;
+       4 : coeff2 = 82359;
+       default : coeff2 = -6107;
+     endcase // case(phase)
+   
+   srl #(.WIDTH(INTWIDTH)) srl_odd_a
+     (.clk(clk),.write(write_odd),.in(data_rnd),.addr(addr_odd_a),.out(data_odd_a));
+   srl #(.WIDTH(INTWIDTH)) srl_odd_b
+     (.clk(clk),.write(write_odd),.in(data_rnd),.addr(addr_odd_b),.out(data_odd_b));
+   srl #(.WIDTH(INTWIDTH)) srl_odd_c
+     (.clk(clk),.write(write_odd),.in(data_rnd),.addr(addr_odd_c),.out(data_odd_c));
+   srl #(.WIDTH(INTWIDTH)) srl_odd_d
+     (.clk(clk),.write(write_odd),.in(data_rnd),.addr(addr_odd_d),.out(data_odd_d));
+
+   always @(posedge clk) sum1 <= {data_odd_a[INTWIDTH-1],data_odd_a} + {data_odd_b[INTWIDTH-1],data_odd_b};
+   always @(posedge clk) sum2 <= {data_odd_c[INTWIDTH-1],data_odd_c} + {data_odd_d[INTWIDTH-1],data_odd_d};
+   
+   wire [INTWIDTH-1:0] data_even;
+   reg [3:0] 	     addr_even;
+
+   always @(posedge clk)
+     case(cpi)
+       //  1 is an error
+       2 : addr_even <= 9;  // Maximum speed (overall decim by 4)
+       3, 4, 5, 6, 7 : addr_even <= 8;
+       default : addr_even <= 7;
+     endcase // case(cpi)
+   
+   srl #(.WIDTH(INTWIDTH)) srl_even
+     (.clk(clk),.write(write_even),.in(data_rnd),.addr(addr_even),.out(data_even));
+
+   MULT18X18S mult1(.C(clk), .CE(do_mult), .R(rst), .P(prod1), .A(coeff1), .B(sum1) );
+   MULT18X18S mult2(.C(clk), .CE(do_mult), .R(rst), .P(prod2), .A(coeff2), .B(sum2) );
+
+   reg [35:0] 	     sum_of_prod;
+   always @(posedge clk) sum_of_prod <= prod1 + prod2;   // Can't overflow
+   
+   wire [ACCWIDTH-1:0] 	acc_out;
+   acc #(.IWIDTH(ACCWIDTH-2),.OWIDTH(ACCWIDTH))
+     acc (.clk(clk),.clear(clear),.acc(do_acc),.in(sum_of_prod[35:38-ACCWIDTH]),.out(acc_out));
+
+   wire [ACCWIDTH-1:0] 	data_even_signext;
+
+   localparam SHIFT_FACTOR = 6;
+   
+   sign_extend #(.bits_in(INTWIDTH),.bits_out(ACCWIDTH-SHIFT_FACTOR)) signext_data_even 
+     (.in(data_even),.out(data_even_signext[ACCWIDTH-1:SHIFT_FACTOR]));
+   assign 		data_even_signext[SHIFT_FACTOR-1:0] = 0;
+
+   always @(posedge clk) final_sum <= acc_out + data_even_signext;
+   
+   clip #(.bits_in(WIDTH+1), .bits_out(WIDTH)) clip (.in(final_sum), .out(final_sum_clip));
+   
+   // Output MUX to allow for bypass
+   wire 		selected_stb = bypass ? stb_in : stb_out_pre[8];
+   
+   always @(posedge clk)
+     begin
+	stb_out  <= selected_stb;
+	if(selected_stb)
+	  data_out <= bypass ? data_in : final_sum_clip;
+     end
+   
+endmodule // hb_dec
diff --git a/fpga/usrp3/lib/dsp/hb_interp.v b/fpga/usrp3/lib/dsp/hb_interp.v
new file mode 100644
index 000000000..c0914255c
--- /dev/null
+++ b/fpga/usrp3/lib/dsp/hb_interp.v
@@ -0,0 +1,165 @@
+//
+// Copyright 2011 Ettus Research LLC
+//
+
+
+// First halfband iterpolator 
+// Implements impulse responses of the form [A 0 B 0 C .. 0 H 0.5 H 0 .. C 0 B 0 A]
+// Strobe in cannot come faster than every 4th clock cycle, 
+// Strobe out cannot come faster than every 2nd clock cycle
+
+// These taps designed by halfgen4 from ldoolittle
+// myfilt = round(2^18 * halfgen4(.7/4,8))
+
+module hb_interp
+  #(parameter IWIDTH=18, OWIDTH=18, ACCWIDTH=24)
+    (input clk,
+     input rst,
+     input bypass,
+     input [7:0] cpo, //  Clocks per output, must be at least 2
+     input stb_in,
+     input [IWIDTH-1:0] data_in,
+     input stb_out,
+     output reg [OWIDTH-1:0] data_out);
+
+   localparam MWIDTH = ACCWIDTH-2;
+   localparam CWIDTH = 18;
+
+   reg [CWIDTH-1:0] coeff1, coeff2;
+   reg [3:0] 	    addr_a, addr_b, addr_c, addr_d, addr_e;
+   wire [IWIDTH-1:0] data_a, data_b, data_c, data_d, data_e, sum1, sum2;
+   wire [35:0] 	     prod1, prod2;
+
+   reg [2:0] 	     phase, phase_d1, phase_d2, phase_d3, phase_d4, phase_d5;
+
+   always @(posedge clk)
+     if(rst)
+       phase <= 0;
+     else
+       if(stb_in)
+	 phase <= 1;
+       else if(phase==4)
+	 phase <= 0;
+       else if(phase!=0)
+	 phase <= phase + 1;
+   always @(posedge clk) phase_d1 <= phase;
+   always @(posedge clk) phase_d2 <= phase_d1;
+   always @(posedge clk) phase_d3 <= phase_d2;
+   always @(posedge clk) phase_d4 <= phase_d3;
+   always @(posedge clk) phase_d5 <= phase_d4;     
+   
+   srl #(.WIDTH(IWIDTH)) srl_a
+     (.clk(clk),.rst(rst),.write(stb_in),.in(data_in),.addr(addr_a),.out(data_a));
+   srl #(.WIDTH(IWIDTH)) srl_b
+     (.clk(clk),.rst(rst),.write(stb_in),.in(data_in),.addr(addr_b),.out(data_b));
+   srl #(.WIDTH(IWIDTH)) srl_c
+     (.clk(clk),.rst(rst),.write(stb_in),.in(data_in),.addr(addr_c),.out(data_c));
+   srl #(.WIDTH(IWIDTH)) srl_d
+     (.clk(clk),.rst(rst),.write(stb_in),.in(data_in),.addr(addr_d),.out(data_d));
+   srl #(.WIDTH(IWIDTH)) srl_e
+     (.clk(clk),.rst(rst),.write(stb_in),.in(data_in),.addr(addr_e),.out(data_e));
+
+   always @*
+     case(phase)
+       1 : begin addr_a = 0; addr_b = 15; end
+       2 : begin addr_a = 1; addr_b = 14; end
+       3 : begin addr_a = 2; addr_b = 13; end
+       4 : begin addr_a = 3; addr_b = 12; end
+       default : begin addr_a = 0; addr_b = 15; end
+     endcase // case(phase)
+
+   always @*
+     case(phase)
+       1 : begin addr_c = 4; addr_d = 11; end
+       2 : begin addr_c = 5; addr_d = 10; end
+       3 : begin addr_c = 6; addr_d = 9; end
+       4 : begin addr_c = 7; addr_d = 8; end
+       default : begin addr_c = 4; addr_d = 11; end
+     endcase // case(phase)
+
+   always @*
+     case(cpo)
+       2 : addr_e <= 9;
+       3,4,5,6,7,8 : addr_e <= 8;
+       default : addr_e <= 7;  // This case works for 256, which = 0 due to overflow outside this block
+     endcase // case(cpo)
+   
+   always @*            // Outer coeffs
+     case(phase_d1)
+       1 : coeff1 = -107;
+       2 : coeff1 = 445;
+       3 : coeff1 = -1271;
+       4 : coeff1 = 2959;
+       default : coeff1 = -107;
+     endcase // case(phase)
+   
+   always @*            //  Inner coeffs
+     case(phase_d1)
+       1 : coeff2 = -6107;
+       2 : coeff2 = 11953;
+       3 : coeff2 = -24706;
+       4 : coeff2 = 82359;
+       default : coeff2 = -6107;
+     endcase // case(phase)
+
+   add2_reg /*_and_round_reg*/ #(.WIDTH(IWIDTH)) add1 (.clk(clk),.in1(data_a),.in2(data_b),.sum(sum1));
+   add2_reg /*_and_round_reg*/ #(.WIDTH(IWIDTH)) add2 (.clk(clk),.in1(data_c),.in2(data_d),.sum(sum2));
+   // sum1, sum2 available on phase_d1
+
+   wire do_mult = 1;
+   MULT18X18S mult1(.C(clk), .CE(do_mult), .R(rst), .P(prod1), .A(coeff1), .B(sum1) );
+   MULT18X18S mult2(.C(clk), .CE(do_mult), .R(rst), .P(prod2), .A(coeff2), .B(sum2) );
+   // prod1, prod2 available on phase_d2
+   
+   wire [MWIDTH-1:0] sum_of_prod;
+   
+   add2_and_round_reg #(.WIDTH(MWIDTH)) 
+     add3 (.clk(clk),.in1(prod1[35:36-MWIDTH]),.in2(prod2[35:36-MWIDTH]),.sum(sum_of_prod));
+   // sum_of_prod available on phase_d3
+   
+   wire [ACCWIDTH-1:0] acc_out;
+ 
+   wire 	       clear = (phase_d3 == 1);
+   wire 	       do_acc = (phase_d3 != 0);
+   
+   acc #(.IWIDTH(MWIDTH),.OWIDTH(ACCWIDTH)) //IJB rst
+     acc (.clk(clk),.clear(rst|clear),.acc(do_acc),.in(sum_of_prod),.out(acc_out));   
+   // acc_out available on phase_d4
+   
+   wire [ACCWIDTH-6:0] clipped_acc;
+   clip #(.bits_in(ACCWIDTH),.bits_out(ACCWIDTH-5)) final_clip(.in(acc_out),.out(clipped_acc));
+   
+   reg [ACCWIDTH-6:0]   clipped_reg;
+   always @(posedge clk)
+     if (rst)
+       clipped_reg <= 0;
+     else  if(phase_d4 == 4)
+       clipped_reg <= clipped_acc;
+   // clipped_reg available on phase_d5
+   
+   wire [OWIDTH-1:0]   data_out_round;
+   round #(.bits_in(ACCWIDTH-5),.bits_out(OWIDTH)) final_round (.in(clipped_reg),.out(data_out_round));
+
+   reg 		      odd;
+   always @(posedge clk)
+     if(rst)
+       odd <= 0;
+     else if(stb_in)
+       odd <= 0;
+     else if(stb_out)
+       odd <= 1;
+
+   always @(posedge clk)
+     if (rst)
+       data_out <= 0;
+     else if(bypass)
+       data_out <= data_in;
+     else if(stb_out)
+       if(odd)
+	 data_out <= data_e;
+       else
+	 data_out <= data_out_round;
+
+   // data_out available on phase_d6
+   
+endmodule // hb_interp
diff --git a/fpga/usrp3/lib/dsp/round.v b/fpga/usrp3/lib/dsp/round.v
new file mode 100644
index 000000000..7ecc10ff7
--- /dev/null
+++ b/fpga/usrp3/lib/dsp/round.v
@@ -0,0 +1,47 @@
+// -*- verilog -*-
+//
+//  USRP - Universal Software Radio Peripheral
+//
+//  Copyright (C) 2011 Matt Ettus
+//
+
+//
+
+// Rounding "macro"
+// Keeps the topmost bits, does proper 2s comp round to zero (unbiased truncation)
+
+module round
+  #(parameter bits_in=0,
+    parameter bits_out=0,
+    parameter round_to_zero=0,       // original behavior
+    parameter round_to_nearest=1,    // lowest noise
+    parameter trunc=0)               // round to negative infinity
+    (input [bits_in-1:0] in,
+     output [bits_out-1:0] out,
+     output [bits_in-bits_out:0] err);
+
+   wire 			 round_corr,round_corr_trunc,round_corr_rtz,round_corr_nearest,round_corr_nearest_safe;
+   
+   assign 			 round_corr_trunc = 0;
+   assign 			 round_corr_rtz = (in[bits_in-1] & |in[bits_in-bits_out-1:0]);
+   assign 			 round_corr_nearest = in[bits_in-bits_out-1];
+
+   generate
+      if(bits_in-bits_out > 1)
+	assign 			 round_corr_nearest_safe = (~in[bits_in-1] & (&in[bits_in-2:bits_out])) ? 0 :
+				 round_corr_nearest;
+      else
+	assign round_corr_nearest_safe = round_corr_nearest;
+   endgenerate
+   
+      
+   assign round_corr = round_to_nearest ? round_corr_nearest_safe :
+		       trunc ? round_corr_trunc : 
+		       round_to_zero ? round_corr_rtz :
+		       0;  // default to trunc
+      
+   assign out = in[bits_in-1:bits_in-bits_out] + round_corr;
+   
+   assign err = in - {out,{(bits_in-bits_out){1'b0}}};
+   
+endmodule // round
diff --git a/fpga/usrp3/lib/dsp/round_reg.v b/fpga/usrp3/lib/dsp/round_reg.v
new file mode 100644
index 000000000..c8c77f518
--- /dev/null
+++ b/fpga/usrp3/lib/dsp/round_reg.v
@@ -0,0 +1,32 @@
+// -*- verilog -*-
+//
+//  USRP - Universal Software Radio Peripheral
+//
+//  Copyright (C) 2008 Matt Ettus
+//
+
+//
+
+// Rounding "macro"
+// Keeps the topmost bits, does proper 2s comp rounding (round-to-zero)
+
+module round_reg
+  #(parameter bits_in=0,
+    parameter bits_out=0)
+    (input clk,
+     input [bits_in-1:0] in,
+     output reg [bits_out-1:0] out,
+     output reg [bits_in-bits_out:0] err);
+
+   wire [bits_out-1:0] temp;
+   wire [bits_in-bits_out:0] err_temp;
+   
+   round #(.bits_in(bits_in),.bits_out(bits_out)) round (.in(in),.out(temp), .err(err_temp));
+   
+   always @(posedge clk)
+     out <= temp;
+
+   always @(posedge clk)
+     err <= err_temp;
+   
+endmodule // round_reg
diff --git a/fpga/usrp3/lib/dsp/round_sd.v b/fpga/usrp3/lib/dsp/round_sd.v
new file mode 100644
index 000000000..94584f6ef
--- /dev/null
+++ b/fpga/usrp3/lib/dsp/round_sd.v
@@ -0,0 +1,23 @@
+
+
+module round_sd
+  #(parameter WIDTH_IN=18,
+    parameter WIDTH_OUT=16,
+    parameter DISABLE_SD=0)
+   (input clk, input reset,
+    input [WIDTH_IN-1:0] in, input strobe_in,
+    output [WIDTH_OUT-1:0] out, output strobe_out);
+
+   localparam ERR_WIDTH = WIDTH_IN - WIDTH_OUT + 1;
+
+   wire [ERR_WIDTH-1:0]  err;
+   wire [WIDTH_IN-1:0] 	 err_ext, sum;
+  
+   sign_extend #(.bits_in(ERR_WIDTH),.bits_out(WIDTH_IN)) ext_err (.in(err), .out(err_ext));
+   
+   add2_and_clip_reg #(.WIDTH(WIDTH_IN)) add2_and_clip_reg
+     (.clk(clk), .rst(reset), .in1(in), .in2((DISABLE_SD == 0) ? err_ext : 0), .strobe_in(strobe_in), .sum(sum), .strobe_out(strobe_out));
+   
+   round #(.bits_in(WIDTH_IN),.bits_out(WIDTH_OUT)) round_sum (.in(sum), .out(out), .err(err));
+   
+endmodule // round_sd
diff --git a/fpga/usrp3/lib/dsp/rx_dcoffset.v b/fpga/usrp3/lib/dsp/rx_dcoffset.v
new file mode 100644
index 000000000..f74b0f1a0
--- /dev/null
+++ b/fpga/usrp3/lib/dsp/rx_dcoffset.v
@@ -0,0 +1,46 @@
+//
+// Copyright 2011 Ettus Research LLC
+//
+
+
+
+
+module rx_dcoffset 
+  #(parameter WIDTH=16,
+    parameter ADDR=8'd0,
+    parameter alpha_shift=20)
+   (input clk, input rst, 
+    input set_stb, input [7:0] set_addr, input [31:0] set_data,
+    input [WIDTH-1:0] in, output [WIDTH-1:0] out);
+   
+   wire 	      set_now = set_stb & (ADDR == set_addr);
+   
+   reg 		      fixed;  // uses fixed offset
+   wire [WIDTH-1:0]   fixed_dco;
+
+   localparam int_width = WIDTH + alpha_shift;
+   reg [int_width-1:0] integrator;
+   wire [WIDTH-1:0]    quantized;
+
+   always @(posedge clk)
+     if(rst)
+       begin
+	  fixed <= 0;
+	  integrator <= {int_width{1'b0}};
+       end
+     else if(set_now)
+       begin
+	  fixed <= set_data[31];
+	  if(set_data[30])
+	    integrator <= {set_data[29:0],{(int_width-30){1'b0}}};
+       end
+     else if(~fixed)
+       integrator <= integrator +  {{(alpha_shift){out[WIDTH-1]}},out};
+
+   round_sd #(.WIDTH_IN(int_width),.WIDTH_OUT(WIDTH)) round_sd
+     (.clk(clk), .reset(rst), .in(integrator), .strobe_in(1'b1), .out(quantized), .strobe_out());
+   
+   add2_and_clip_reg #(.WIDTH(WIDTH)) add2_and_clip_reg
+     (.clk(clk), .rst(rst), .in1(in), .in2(-quantized), .strobe_in(1'b1), .sum(out), .strobe_out());
+
+endmodule // rx_dcoffset
diff --git a/fpga/usrp3/lib/dsp/rx_frontend.v b/fpga/usrp3/lib/dsp/rx_frontend.v
new file mode 100644
index 000000000..ebe19240c
--- /dev/null
+++ b/fpga/usrp3/lib/dsp/rx_frontend.v
@@ -0,0 +1,78 @@
+
+module rx_frontend
+  #(parameter BASE = 0,
+    parameter IQCOMP_EN = 1)
+   (input clk, input rst,
+    input set_stb, input [7:0] set_addr, input [31:0] set_data,
+
+    input [15:0] adc_a, input adc_ovf_a,
+    input [15:0] adc_b, input adc_ovf_b,
+
+    output [23:0] i_out, output [23:0] q_out,
+    input run,
+    output [31:0] debug
+    );
+   
+   reg [15:0] 	  adc_i, adc_q;
+   wire [17:0] 	  adc_i_ofs, adc_q_ofs;
+   wire [35:0] 	  corr_i, corr_q; wire [17:0] 	  mag_corr,phase_corr;
+   wire 	  swap_iq;
+   
+   setting_reg #(.my_addr(BASE), .width(1)) sr_8
+     (.clk(clk),.rst(rst),.strobe(set_stb),.addr(set_addr),
+      .in(set_data),.out(swap_iq),.changed());
+
+   always @(posedge clk)
+     if(swap_iq) // Swap
+       {adc_i,adc_q} <= {adc_b,adc_a};
+     else
+       {adc_i,adc_q} <= {adc_a,adc_b};
+       
+   setting_reg #(.my_addr(BASE+1),.width(18)) sr_1
+     (.clk(clk),.rst(rst),.strobe(set_stb),.addr(set_addr),
+      .in(set_data),.out(mag_corr),.changed());
+   
+   setting_reg #(.my_addr(BASE+2),.width(18)) sr_2
+     (.clk(clk),.rst(rst),.strobe(set_stb),.addr(set_addr),
+      .in(set_data),.out(phase_corr),.changed());
+
+   generate
+      if(IQCOMP_EN == 1)
+	begin
+	   rx_dcoffset #(.WIDTH(18),.ADDR(BASE+3)) rx_dcoffset_i
+	     (.clk(clk),.rst(rst),.set_stb(set_stb),.set_addr(set_addr),.set_data(set_data),
+	      .in({adc_i,2'b00}),.out(adc_i_ofs));
+	   
+	   rx_dcoffset #(.WIDTH(18),.ADDR(BASE+4)) rx_dcoffset_q
+	     (.clk(clk),.rst(rst),.set_stb(set_stb),.set_addr(set_addr),.set_data(set_data),
+	      .in({adc_q,2'b00}),.out(adc_q_ofs));
+	   
+	   MULT18X18S mult_mag_corr
+	     (.P(corr_i), .A(adc_i_ofs), .B(mag_corr), .C(clk), .CE(1), .R(rst) ); 
+	   
+	   MULT18X18S mult_phase_corr
+	     (.P(corr_q), .A(adc_i_ofs), .B(phase_corr), .C(clk), .CE(1), .R(rst) );
+	   
+	   add2_and_clip_reg #(.WIDTH(24)) add_clip_i
+	     (.clk(clk), .rst(rst), 
+	      .in1({adc_i_ofs,6'd0}), .in2(corr_i[35:12]), .strobe_in(1'b1),
+	      .sum(i_out), .strobe_out());
+	   
+	   add2_and_clip_reg #(.WIDTH(24)) add_clip_q
+	     (.clk(clk), .rst(rst), 
+	      .in1({adc_q_ofs,6'd0}), .in2(corr_q[35:12]), .strobe_in(1'b1),
+	      .sum(q_out), .strobe_out());
+	end // if (IQCOMP_EN == 1)
+      else
+	begin
+	   rx_dcoffset #(.WIDTH(24),.ADDR(BASE+3)) rx_dcoffset_i
+	     (.clk(clk),.rst(rst),.set_stb(set_stb),.set_addr(set_addr),.set_data(set_data),
+	      .in({adc_i,8'b00}),.out(i_out));
+	   
+	   rx_dcoffset #(.WIDTH(24),.ADDR(BASE+4)) rx_dcoffset_q
+	     (.clk(clk),.rst(rst),.set_stb(set_stb),.set_addr(set_addr),.set_data(set_data),
+	      .in({adc_q,8'b00}),.out(q_out));
+	end // else: !if(IQCOMP_EN == 1)
+      endgenerate
+   
+endmodule // rx_frontend
diff --git a/fpga/usrp3/lib/dsp/rx_frontend_tb.v b/fpga/usrp3/lib/dsp/rx_frontend_tb.v
new file mode 100644
index 000000000..487b72687
--- /dev/null
+++ b/fpga/usrp3/lib/dsp/rx_frontend_tb.v
@@ -0,0 +1,45 @@
+
+`timescale 1ns/1ns
+module rx_frontend_tb();
+   
+   reg clk, rst;
+
+   initial rst = 1;
+   initial #1000 rst = 0;
+   initial clk = 0;
+   always #5 clk = ~clk;
+   
+   initial $dumpfile("rx_frontend_tb.vcd");
+   initial $dumpvars(0,rx_frontend_tb);
+
+   reg [15:0] adc_in;
+   wire [17:0] adc_out;
+
+   always @(posedge clk)
+     begin
+	if(adc_in[13])
+	  $write("-%d,",-adc_in);
+	else
+	  $write("%d,",adc_in);
+	if(adc_out[13])
+	  $write("-%d\n",-adc_out);
+	else
+	  $write("%d\n",adc_out);
+     end	
+   
+   rx_frontend #(.BASE(0)) rx_frontend
+     (.clk(clk),.rst(rst),
+      .set_stb(0),.set_addr(0),.set_data(0),
+      .adc_a(adc_in), .adc_ovf_a(0),
+      .adc_b(0), .adc_ovf_b(0),
+      .i_out(adc_out),.q_out(),
+      .run(), .debug());
+
+   always @(posedge clk)
+     if(rst)
+       adc_in <= 0;
+     else
+       adc_in <= adc_in + 4;
+   //adc_in <= (($random % 473) + 23)/4;
+   
+endmodule // rx_frontend_tb
diff --git a/fpga/usrp3/lib/dsp/sign_extend.v b/fpga/usrp3/lib/dsp/sign_extend.v
new file mode 100644
index 000000000..7c85920aa
--- /dev/null
+++ b/fpga/usrp3/lib/dsp/sign_extend.v
@@ -0,0 +1,23 @@
+// -*- verilog -*-
+//
+//  USRP - Universal Software Radio Peripheral
+//
+//  Copyright (C) 2003 Matt Ettus
+//
+
+//
+
+
+// Sign extension "macro"
+// bits_out should be greater than bits_in
+
+module sign_extend (in,out);
+	parameter bits_in=0;  // FIXME Quartus insists on a default
+	parameter bits_out=0;
+	
+	input [bits_in-1:0] in;
+	output [bits_out-1:0] out;
+	
+	assign out = {{(bits_out-bits_in){in[bits_in-1]}},in};
+	
+endmodule
diff --git a/fpga/usrp3/lib/dsp/small_hb_dec.v b/fpga/usrp3/lib/dsp/small_hb_dec.v
new file mode 100644
index 000000000..fc776c2d7
--- /dev/null
+++ b/fpga/usrp3/lib/dsp/small_hb_dec.v
@@ -0,0 +1,124 @@
+//
+// Copyright 2011 Ettus Research LLC
+//
+
+
+// Short halfband decimator (intended to be followed by another stage)
+// Implements impulse responses of the form [A 0 B 0.5 B 0 A]
+//
+// These taps designed by halfgen4 from ldoolittle:
+//   2 * 131072 * halfgen4(.75/8,2)
+module small_hb_dec
+  #(parameter WIDTH=18)
+    (input clk,
+     input rst,
+     input bypass,
+     input run,
+     input stb_in,
+     input [WIDTH-1:0] data_in,
+     output reg stb_out,
+     output reg [WIDTH-1:0] data_out);
+
+   // Round off inputs to 17 bits because of 18 bit multipliers
+   localparam INTWIDTH = 17;
+   wire [INTWIDTH-1:0] 	data_rnd;
+   wire 		stb_rnd;
+   
+   round_sd #(.WIDTH_IN(WIDTH),.WIDTH_OUT(INTWIDTH)) round_in
+     (.clk(clk),.reset(rst),.in(data_in),.strobe_in(stb_in),.out(data_rnd),.strobe_out(stb_rnd));
+   	
+   
+   reg 			stb_rnd_d1;
+   reg [INTWIDTH-1:0] 	data_rnd_d1;
+   always @(posedge clk) stb_rnd_d1 <= stb_rnd;
+   always @(posedge clk) data_rnd_d1 <= data_rnd;
+   
+   wire 		go;
+   reg 			phase, go_d1, go_d2, go_d3, go_d4;
+   always @(posedge clk)
+     if(rst | ~run)
+       phase <= 0;
+     else if(stb_rnd_d1)
+       phase <= ~phase;
+   assign 		go = stb_rnd_d1 & phase;
+   always @(posedge clk) 
+     if(rst | ~run)
+       begin
+	  go_d1 <= 0;
+	  go_d2 <= 0;
+	  go_d3 <= 0;
+	  go_d4 <= 0;
+       end
+     else
+       begin
+	  go_d1 <= go;
+	  go_d2 <= go_d1;
+	  go_d3 <= go_d2;
+	  go_d4 <= go_d3;
+       end
+
+   wire [17:0] 		coeff_a = -10690;
+   wire [17:0] 		coeff_b = 75809;
+   
+   reg [INTWIDTH-1:0] 	d1, d2, d3, d4 , d5, d6;
+   always @(posedge clk)
+     if(stb_rnd_d1 | rst)
+       begin
+	  d1 <= data_rnd_d1;
+	  d2 <= d1;
+	  d3 <= d2;
+	  d4 <= d3;
+	  d5 <= d4;
+	  d6 <= d5;
+       end
+
+   reg [17:0] sum_a, sum_b, middle, middle_d1;
+
+   always @(posedge clk)
+     if(go)
+       begin
+	  sum_a <= {data_rnd_d1[INTWIDTH-1],data_rnd_d1} + {d6[INTWIDTH-1],d6};
+	  sum_b <= {d2[INTWIDTH-1],d2} + {d4[INTWIDTH-1],d4};
+	  //middle <= {d3[INTWIDTH-1],d3};
+	  middle <= {d3,1'b0};
+       end
+
+   always @(posedge clk)
+     if(go_d1)
+       middle_d1 <= middle;
+   
+   wire [17:0] sum = go_d1 ? sum_b : sum_a;
+   wire [17:0] coeff = go_d1 ? coeff_b : coeff_a;
+   wire [35:0] 	 prod;   
+   MULT18X18S mult(.C(clk), .CE(go_d1 | go_d2), .R(rst), .P(prod), .A(coeff), .B(sum) );
+
+   localparam ACCWIDTH = 30;
+   reg [ACCWIDTH-1:0] 	 accum;
+   
+   always @(posedge clk)
+     if(rst)
+       accum <= 0;
+     else if(go_d2)
+       accum <= {middle_d1[17],middle_d1[17],middle_d1,{(16+ACCWIDTH-36){1'b0}}} + {prod[35:36-ACCWIDTH]};
+     else if(go_d3)
+       accum <= accum + {prod[35:36-ACCWIDTH]};
+   
+   wire [WIDTH:0] 	 accum_rnd;
+   wire [WIDTH-1:0] 	 accum_rnd_clip;
+   
+   wire 	 stb_round;
+   
+   round_sd #(.WIDTH_IN(ACCWIDTH),.WIDTH_OUT(WIDTH+1)) round_acc 
+     (.clk(clk), .reset(rst), .in(accum), .strobe_in(go_d4), .out(accum_rnd), .strobe_out(stb_round));
+
+   clip #(.bits_in(WIDTH+1),.bits_out(WIDTH)) clip (.in(accum_rnd), .out(accum_rnd_clip));
+   
+   // Output
+   always @(posedge clk)
+     begin
+	stb_out  <= bypass ? stb_in : stb_round;
+	data_out <= bypass ? data_in : accum_rnd_clip;
+     end
+
+
+endmodule // small_hb_dec
diff --git a/fpga/usrp3/lib/dsp/small_hb_int.v b/fpga/usrp3/lib/dsp/small_hb_int.v
new file mode 100644
index 000000000..4b03b5d0c
--- /dev/null
+++ b/fpga/usrp3/lib/dsp/small_hb_int.v
@@ -0,0 +1,99 @@
+//
+// Copyright 2011 Ettus Research LLC
+//
+
+
+// Short halfband decimator (intended to be followed by another stage)
+// Implements impulse responses of the form [A 0 B 0.5 B 0 A]
+//
+// These taps designed by halfgen4 from ldoolittle:
+//   2 * 131072 * halfgen4(.75/8,2)
+
+module small_hb_int
+  #(parameter WIDTH=18)
+    (input clk,
+     input rst,
+     input bypass,
+     input stb_in,
+     input [WIDTH-1:0] data_in,
+     input [7:0] output_rate,
+     input stb_out,
+     output reg [WIDTH-1:0] data_out);
+
+  
+   reg [WIDTH-1:0] d1, d2, d3, d4, d5, d6;
+
+   localparam 	   MWIDTH = 36;
+   wire [MWIDTH-1:0] prod;
+
+   reg [6:0] 	     stbin_d;
+   
+   always @(posedge clk)
+     stbin_d <= {stbin_d[5:0],stb_in};
+   
+   always @(posedge clk)
+     if (rst) 
+       begin
+	  d1 <= 0;
+	  d2 <= 0;
+	  d3 <= 0;
+	  d4 <= 0;
+	  d5 <= 0;
+	  d6 <= 0;
+       end
+     else if(stb_in)
+       begin
+	  d1 <= data_in;
+	  d2 <= d1;
+	  d3 <= d2;
+	  d4 <= d3;
+	  d5 <= d4;
+	  d6 <= d5;
+       end
+
+   wire [WIDTH-1:0] sum_outer, sum_inner;
+   add2_and_round_reg #(.WIDTH(WIDTH)) add_outer (.clk(clk),.in1(d1),.in2(d4),.sum(sum_outer));
+   add2_and_round_reg #(.WIDTH(WIDTH)) add_inner (.clk(clk),.in1(d2),.in2(d3),.sum(sum_inner));
+
+   wire [17:0] 	   coeff_outer = -10690;
+   wire [17:0] 	   coeff_inner = 75809;
+
+   MULT18X18S mult(.C(clk), .CE(1), .R(rst), .P(prod), .A(stbin_d[1] ? coeff_outer : coeff_inner), 
+		   .B(stbin_d[1] ? sum_outer : sum_inner) );
+
+   wire [MWIDTH:0] accum;
+   acc #(.IWIDTH(MWIDTH),.OWIDTH(MWIDTH+1)) 
+     acc (.clk(clk),.clear(stbin_d[2]),.acc(|stbin_d[3:2]),.in(prod),.out(accum));
+   
+   wire [WIDTH+2:0] 	 accum_rnd;
+   round_reg #(.bits_in(MWIDTH+1),.bits_out(WIDTH+3))
+     final_round (.clk(clk),.in(accum),.out(accum_rnd));
+
+   wire [WIDTH-1:0] 	 clipped;
+   clip_reg #(.bits_in(WIDTH+3),.bits_out(WIDTH)) final_clip
+     (.clk(clk),.in(accum_rnd),.strobe_in(1'b1), .out(clipped));
+
+   reg [WIDTH-1:0] 	 saved, saved_d3;
+   always @(posedge clk)
+     if(stbin_d[6])
+       saved <= clipped;
+
+   always @(posedge clk)
+     if(stbin_d[3])
+       saved_d3 <= d3;
+	   
+   always @(posedge clk)
+     if(bypass)
+       data_out <= data_in;
+     else if(stb_in & stb_out)
+       case(output_rate)
+	 1 : data_out <= d6; 
+	 2 : data_out <= d4;
+	 3, 4, 5, 6, 7 : data_out <= d3;
+	 default : data_out <= d2;
+       endcase // case(output_rate)
+     else if(stb_out)
+       data_out <= saved;
+
+endmodule // small_hb_int
+
diff --git a/fpga/usrp3/lib/dsp/srl.v b/fpga/usrp3/lib/dsp/srl.v
new file mode 100644
index 000000000..bbd8ac1c9
--- /dev/null
+++ b/fpga/usrp3/lib/dsp/srl.v
@@ -0,0 +1,27 @@
+//
+// Copyright 2011 Ettus Research LLC
+//
+
+
+
+module srl
+  #(parameter WIDTH=18)
+    (input clk,
+     input rst,
+     input write,
+     input [WIDTH-1:0] in,
+     input [3:0] addr,
+     output [WIDTH-1:0] out);
+   
+   genvar 		i;
+   generate
+      for (i=0;i<WIDTH;i=i+1)
+	begin : gen_srl
+	   SRL16E
+	     srl16e(.Q(out[i]),
+		    .A0(addr[0]),.A1(addr[1]),.A2(addr[2]),.A3(addr[3]),
+		    .CE(write|rst),.CLK(clk),.D(in[i]));
+	end
+   endgenerate
+
+endmodule // srl
author	Ben Hilburn <ben.hilburn@ettus.com>	2013-10-10 10:17:27 -0700
committer	Ben Hilburn <ben.hilburn@ettus.com>	2013-10-10 10:17:27 -0700
commit	0df4b801a34697f2058b4a7b95e08d2a0576c9db (patch)
tree	be10e78d1a97c037a9e7492360a178d1873b9c09 /fpga/usrp3/lib/dsp
parent	6e7bc850b66e8188718248b76b729c7cf9c89700 (diff)
download	uhd-0df4b801a34697f2058b4a7b95e08d2a0576c9db.tar.gz uhd-0df4b801a34697f2058b4a7b95e08d2a0576c9db.tar.bz2 uhd-0df4b801a34697f2058b4a7b95e08d2a0576c9db.zip