diff options
Diffstat (limited to 'fpga/usrp3/lib/dsp')
32 files changed, 1963 insertions, 0 deletions
diff --git a/fpga/usrp3/lib/dsp/Makefile.srcs b/fpga/usrp3/lib/dsp/Makefile.srcs new file mode 100644 index 000000000..3896ac5b5 --- /dev/null +++ b/fpga/usrp3/lib/dsp/Makefile.srcs @@ -0,0 +1,38 @@ +# +# Copyright 2013 Ettus Research LLC +# + +################################################## +# DSP Sources +################################################## +DSP_SRCS = $(abspath $(addprefix $(BASE_DIR)/../lib/dsp/, \ +ddc_chain.v \ +duc_chain.v \ +sign_extend.v \ +cordic_z24.v \ +clip_reg.v \ +cordic_stage.v \ +clip.v \ +cic_strober.v \ +cic_decim.v \ +cic_interp.v \ +cic_dec_shifter.v \ +cic_int_shifter.v \ +small_hb_dec.v \ +small_hb_int.v \ +hb_dec.v \ +hb_interp.v \ +round_sd.v \ +add2_and_clip_reg.v \ +add2_and_clip.v \ +add2.v \ +add2_reg.v \ +add2_and_round_reg.v \ +add2_and_round.v \ +round_reg.v \ +round.v \ +srl.v \ +acc.v \ +rx_frontend.v \ +rx_dcoffset.v \ +)) diff --git a/fpga/usrp3/lib/dsp/README.txt b/fpga/usrp3/lib/dsp/README.txt new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/fpga/usrp3/lib/dsp/README.txt diff --git a/fpga/usrp3/lib/dsp/acc.v b/fpga/usrp3/lib/dsp/acc.v new file mode 100644 index 000000000..86b68512f --- /dev/null +++ b/fpga/usrp3/lib/dsp/acc.v @@ -0,0 +1,33 @@ +// +// Copyright 2011 Ettus Research LLC +// + + + +module acc + #(parameter IWIDTH=16, OWIDTH=30) + (input clk, + input clear, + input acc, + input [IWIDTH-1:0] in, + output reg [OWIDTH-1:0] out); + + wire [OWIDTH-1:0] in_signext; + sign_extend #(.bits_in(IWIDTH),.bits_out(OWIDTH)) + acc_signext (.in(in),.out(in_signext)); + + // CLEAR & ~ACC --> clears the accumulator + // CLEAR & ACC --> loads the accumulator + // ~CLEAR & ACC --> accumulates + // ~CLEAR & ~ACC --> hold + + wire [OWIDTH-1:0] addend1 = clear ? 0 : out; + wire [OWIDTH-1:0] addend2 = ~acc ? 0 : in_signext; + wire [OWIDTH-1:0] sum_int = addend1 + addend2; + + always @(posedge clk) + out <= sum_int; + +endmodule // acc + + diff --git a/fpga/usrp3/lib/dsp/add2.v b/fpga/usrp3/lib/dsp/add2.v new file mode 100644 index 000000000..124f9d6ca --- /dev/null +++ b/fpga/usrp3/lib/dsp/add2.v @@ -0,0 +1,16 @@ +// +// Copyright 2011 Ettus Research LLC +// + + + +module add2 + #(parameter WIDTH=16) + (input [WIDTH-1:0] in1, + input [WIDTH-1:0] in2, + output [WIDTH-1:0] sum); + + wire [WIDTH:0] sum_int = {in1[WIDTH-1],in1} + {in2[WIDTH-1],in2}; + assign sum = sum_int[WIDTH:1]; // Note -- will have some bias + +endmodule // add2 diff --git a/fpga/usrp3/lib/dsp/add2_and_clip.v b/fpga/usrp3/lib/dsp/add2_and_clip.v new file mode 100644 index 000000000..663f5d004 --- /dev/null +++ b/fpga/usrp3/lib/dsp/add2_and_clip.v @@ -0,0 +1,12 @@ + +module add2_and_clip + #(parameter WIDTH=16) + (input [WIDTH-1:0] in1, + input [WIDTH-1:0] in2, + output [WIDTH-1:0] sum); + + wire [WIDTH:0] sum_int = {in1[WIDTH-1],in1} + {in2[WIDTH-1],in2}; + clip #(.bits_in(WIDTH+1),.bits_out(WIDTH)) clip + (.in(sum_int),.out(sum)); + +endmodule // add2_and_clip diff --git a/fpga/usrp3/lib/dsp/add2_and_clip_reg.v b/fpga/usrp3/lib/dsp/add2_and_clip_reg.v new file mode 100644 index 000000000..8073b3b54 --- /dev/null +++ b/fpga/usrp3/lib/dsp/add2_and_clip_reg.v @@ -0,0 +1,25 @@ + +module add2_and_clip_reg + #(parameter WIDTH=16) + (input clk, + input rst, + input [WIDTH-1:0] in1, + input [WIDTH-1:0] in2, + input strobe_in, + output reg [WIDTH-1:0] sum, + output reg strobe_out); + + wire [WIDTH-1:0] sum_int; + + add2_and_clip #(.WIDTH(WIDTH)) add2_and_clip (.in1(in1),.in2(in2),.sum(sum_int)); + + always @(posedge clk) + if(rst) + sum <= 0; + else if(strobe_in) + sum <= sum_int; + + always @(posedge clk) + strobe_out <= strobe_in; + +endmodule // add2_and_clip_reg diff --git a/fpga/usrp3/lib/dsp/add2_and_round.v b/fpga/usrp3/lib/dsp/add2_and_round.v new file mode 100644 index 000000000..9d0914414 --- /dev/null +++ b/fpga/usrp3/lib/dsp/add2_and_round.v @@ -0,0 +1,16 @@ +// +// Copyright 2011 Ettus Research LLC +// + + + +module add2_and_round + #(parameter WIDTH=16) + (input [WIDTH-1:0] in1, + input [WIDTH-1:0] in2, + output [WIDTH-1:0] sum); + + wire [WIDTH:0] sum_int = {in1[WIDTH-1],in1} + {in2[WIDTH-1],in2}; + assign sum = sum_int[WIDTH:1] + (sum_int[WIDTH] & sum_int[0]); + +endmodule // add2_and_round diff --git a/fpga/usrp3/lib/dsp/add2_and_round_reg.v b/fpga/usrp3/lib/dsp/add2_and_round_reg.v new file mode 100644 index 000000000..cb20a3c1b --- /dev/null +++ b/fpga/usrp3/lib/dsp/add2_and_round_reg.v @@ -0,0 +1,21 @@ +// +// Copyright 2011 Ettus Research LLC +// + + + +module add2_and_round_reg + #(parameter WIDTH=16) + (input clk, + input [WIDTH-1:0] in1, + input [WIDTH-1:0] in2, + output reg [WIDTH-1:0] sum); + + wire [WIDTH-1:0] sum_int; + + add2_and_round #(.WIDTH(WIDTH)) add2_n_rnd (.in1(in1),.in2(in2),.sum(sum_int)); + + always @(posedge clk) + sum <= sum_int; + +endmodule // add2_and_round_reg diff --git a/fpga/usrp3/lib/dsp/add2_reg.v b/fpga/usrp3/lib/dsp/add2_reg.v new file mode 100644 index 000000000..3ac93ae2e --- /dev/null +++ b/fpga/usrp3/lib/dsp/add2_reg.v @@ -0,0 +1,22 @@ +// +// Copyright 2011 Ettus Research LLC +// + + + +module add2_reg + #(parameter WIDTH=16) + (input clk, + input [WIDTH-1:0] in1, + input [WIDTH-1:0] in2, + output reg [WIDTH-1:0] sum); + + wire [WIDTH-1:0] sum_int; + + add2 #(.WIDTH(WIDTH)) add2 (.in1(in1),.in2(in2),.sum(sum_int)); + + always @(posedge clk) + sum <= sum_int; + +endmodule // add2_reg + diff --git a/fpga/usrp3/lib/dsp/cic_dec_shifter.v b/fpga/usrp3/lib/dsp/cic_dec_shifter.v new file mode 100644 index 000000000..efc54c106 --- /dev/null +++ b/fpga/usrp3/lib/dsp/cic_dec_shifter.v @@ -0,0 +1,94 @@ +// -*- verilog -*- +// +// USRP - Universal Software Radio Peripheral +// +// Copyright (C) 2003 Matt Ettus +// + +// + + +// NOTE This only works for N=4, max decim rate of 128 +// NOTE signal "rate" is EQUAL TO the actual rate, no more -1 BS + +module cic_dec_shifter(rate,signal_in,signal_out); + parameter bw = 16; + parameter maxbitgain = 28; + + input [7:0] rate; + input wire [bw+maxbitgain-1:0] signal_in; + output reg [bw-1:0] signal_out; + + function [4:0] bitgain; + input [7:0] rate; + case(rate) + // Exact Cases -- N*log2(rate) + 8'd1 : bitgain = 0; + 8'd2 : bitgain = 4; + 8'd4 : bitgain = 8; + 8'd8 : bitgain = 12; + 8'd16 : bitgain = 16; + 8'd32 : bitgain = 20; + 8'd64 : bitgain = 24; + 8'd128 : bitgain = 28; + + // Nearest without overflow -- ceil(N*log2(rate)) + 8'd3 : bitgain = 7; + 8'd5 : bitgain = 10; + 8'd6 : bitgain = 11; + 8'd7 : bitgain = 12; + 8'd9 : bitgain = 13; + 8'd10,8'd11 : bitgain = 14; + 8'd12,8'd13 : bitgain = 15; + 8'd14,8'd15 : bitgain = 16; + 8'd17,8'd18,8'd19 : bitgain = 17; + 8'd20,8'd21,8'd22 : bitgain = 18; + 8'd23,8'd24,8'd25,8'd26 : bitgain = 19; + 8'd27,8'd28,8'd29,8'd30,8'd31 : bitgain = 20; + 8'd33,8'd34,8'd35,8'd36,8'd37,8'd38 : bitgain = 21; + 8'd39,8'd40,8'd41,8'd42,8'd43,8'd44,8'd45 : bitgain = 22; + 8'd46,8'd47,8'd48,8'd49,8'd50,8'd51,8'd52,8'd53 : bitgain = 23; + 8'd54,8'd55,8'd56,8'd57,8'd58,8'd59,8'd60,8'd61,8'd62,8'd63 : bitgain = 24; + 8'd65,8'd66,8'd67,8'd68,8'd69,8'd70,8'd71,8'd72,8'd73,8'd74,8'd75,8'd76 : bitgain = 25; + 8'd77,8'd78,8'd79,8'd80,8'd81,8'd82,8'd83,8'd84,8'd85,8'd86,8'd87,8'd88,8'd89,8'd90 : bitgain = 26; + 8'd91,8'd92,8'd93,8'd94,8'd95,8'd96,8'd97,8'd98,8'd99,8'd100,8'd101,8'd102,8'd103,8'd104,8'd105,8'd106,8'd107 : bitgain = 27; + default : bitgain = 28; + endcase // case(rate) + endfunction // bitgain + + wire [4:0] shift = bitgain(rate); + + // We should be able to do this, but can't .... + // assign signal_out = signal_in[shift+bw-1:shift]; + + always @* + case(shift) + 5'd0 : signal_out = signal_in[0+bw-1:0]; + 5'd4 : signal_out = signal_in[4+bw-1:4]; + 5'd7 : signal_out = signal_in[7+bw-1:7]; + 5'd8 : signal_out = signal_in[8+bw-1:8]; + 5'd10 : signal_out = signal_in[10+bw-1:10]; + 5'd11 : signal_out = signal_in[11+bw-1:11]; + 5'd12 : signal_out = signal_in[12+bw-1:12]; + 5'd13 : signal_out = signal_in[13+bw-1:13]; + 5'd14 : signal_out = signal_in[14+bw-1:14]; + 5'd15 : signal_out = signal_in[15+bw-1:15]; + 5'd16 : signal_out = signal_in[16+bw-1:16]; + 5'd17 : signal_out = signal_in[17+bw-1:17]; + 5'd18 : signal_out = signal_in[18+bw-1:18]; + 5'd19 : signal_out = signal_in[19+bw-1:19]; + 5'd20 : signal_out = signal_in[20+bw-1:20]; + 5'd21 : signal_out = signal_in[21+bw-1:21]; + 5'd22 : signal_out = signal_in[22+bw-1:22]; + 5'd23 : signal_out = signal_in[23+bw-1:23]; + 5'd24 : signal_out = signal_in[24+bw-1:24]; + 5'd25 : signal_out = signal_in[25+bw-1:25]; + 5'd26 : signal_out = signal_in[26+bw-1:26]; + 5'd27 : signal_out = signal_in[27+bw-1:27]; + 5'd28 : signal_out = signal_in[28+bw-1:28]; + + default : signal_out = signal_in[28+bw-1:28]; + endcase // case(shift) + +endmodule // cic_dec_shifter + diff --git a/fpga/usrp3/lib/dsp/cic_decim.v b/fpga/usrp3/lib/dsp/cic_decim.v new file mode 100644 index 000000000..feb785de8 --- /dev/null +++ b/fpga/usrp3/lib/dsp/cic_decim.v @@ -0,0 +1,76 @@ +// -*- verilog -*- +// +// USRP - Universal Software Radio Peripheral +// +// Copyright (C) 2003 Matt Ettus +// + +// + + +module cic_decim + #(parameter bw = 16, parameter N = 4, parameter log2_of_max_rate = 7) + (input clock, + input reset, + input enable, + input [7:0] rate, + input strobe_in, + input strobe_out, + input [bw-1:0] signal_in, + output reg [bw-1:0] signal_out); + + localparam maxbitgain = N * log2_of_max_rate; + + wire [bw+maxbitgain-1:0] signal_in_ext; + reg [bw+maxbitgain-1:0] integrator [0:N-1]; + reg [bw+maxbitgain-1:0] differentiator [0:N-1]; + reg [bw+maxbitgain-1:0] pipeline [0:N-1]; + reg [bw+maxbitgain-1:0] sampler; + + integer i; + + sign_extend #(bw,bw+maxbitgain) + ext_input (.in(signal_in),.out(signal_in_ext)); + + always @(posedge clock) + if(~enable) + for(i=0;i<N;i=i+1) + integrator[i] <= 0; + else if (strobe_in) + begin + integrator[0] <= integrator[0] + signal_in_ext; + for(i=1;i<N;i=i+1) + integrator[i] <= integrator[i] + integrator[i-1]; + end + + always @(posedge clock) + if(~enable) + begin + sampler <= 0; + for(i=0;i<N;i=i+1) + begin + pipeline[i] <= 0; + differentiator[i] <= 0; + end + end + else if (strobe_out) + begin + sampler <= integrator[N-1]; + differentiator[0] <= sampler; + pipeline[0] <= sampler - differentiator[0]; + for(i=1;i<N;i=i+1) + begin + differentiator[i] <= pipeline[i-1]; + pipeline[i] <= pipeline[i-1] - differentiator[i]; + end + end // if (enable && strobe_out) + + wire [bw-1:0] signal_out_unreg; + + cic_dec_shifter #(bw) + cic_dec_shifter(rate,pipeline[N-1],signal_out_unreg); + + always @(posedge clock) + signal_out <= signal_out_unreg; + +endmodule // cic_decim diff --git a/fpga/usrp3/lib/dsp/cic_int_shifter.v b/fpga/usrp3/lib/dsp/cic_int_shifter.v new file mode 100644 index 000000000..ff5a30d6a --- /dev/null +++ b/fpga/usrp3/lib/dsp/cic_int_shifter.v @@ -0,0 +1,88 @@ +// -*- verilog -*- +// +// USRP - Universal Software Radio Peripheral +// +// Copyright (C) 2003 Matt Ettus +// + +// + + +// NOTE This only works for N=4, max interp rate of 128 +// NOTE signal "rate" is EQUAL TO the actual rate (no more -1 BS) + +module cic_int_shifter(rate,signal_in,signal_out); + parameter bw = 16; + parameter maxbitgain = 21; + + input [7:0] rate; + input wire [bw+maxbitgain-1:0] signal_in; + output reg [bw-1:0] signal_out; + + function [4:0] bitgain; + input [7:0] rate; + case(rate) + // Exact Cases + 8'd1 : bitgain = 0; + 8'd2 : bitgain = 3; + 8'd4 : bitgain = 6; + 8'd8 : bitgain = 9; + 8'd16 : bitgain = 12; + 8'd32 : bitgain = 15; + 8'd64 : bitgain = 18; + 8'd128 : bitgain = 21; + + // Nearest without overflow + 8'd3 : bitgain = 5; + 8'd5 : bitgain = 7; + 8'd6 : bitgain = 8; + 8'd7 : bitgain = 9; + 8'd9,8'd10 : bitgain = 10; + 8'd11,8'd12 : bitgain = 11; + 8'd13,8'd14,8'd15 : bitgain = 12; + 8'd17,8'd18,8'd19,8'd20 : bitgain = 13; + 8'd21,8'd22,8'd23,8'd24,8'd25 : bitgain = 14; + 8'd26,8'd27,8'd28,8'd29,8'd30,8'd31 : bitgain = 15; + 8'd33,8'd34,8'd35,8'd36,8'd37,8'd38,8'd39,8'd40 : bitgain = 16; + 8'd41,8'd42,8'd43,8'd44,8'd45,8'd46,8'd47,8'd48,8'd49,8'd50 : bitgain = 17; + 8'd51,8'd52,8'd53,8'd54,8'd55,8'd56,8'd57,8'd58,8'd59,8'd60,8'd61,8'd62,8'd63 : bitgain = 18; + 8'd65,8'd66,8'd67,8'd68,8'd69,8'd70,8'd71,8'd72,8'd73,8'd74,8'd75,8'd76,8'd77,8'd78,8'd79,8'd80 : bitgain = 19; + 8'd81,8'd82,8'd83,8'd84,8'd85,8'd86,8'd87,8'd88,8'd89,8'd90,8'd91,8'd92,8'd93,8'd94,8'd95,8'd96,8'd97,8'd98,8'd99,8'd100,8'd101 : bitgain = 20; + + default : bitgain = 21; + endcase // case(rate) + endfunction // bitgain + + wire [4:0] shift = bitgain(rate); + + // We should be able to do this, but can't .... + // assign signal_out = signal_in[shift+bw-1:shift]; + + always @* + case(shift) + 5'd0 : signal_out = signal_in[0+bw-1:0]; + 5'd3 : signal_out = signal_in[3+bw-1:3]; + 5'd6 : signal_out = signal_in[6+bw-1:6]; + 5'd9 : signal_out = signal_in[9+bw-1:9]; + 5'd12 : signal_out = signal_in[12+bw-1:12]; + 5'd15 : signal_out = signal_in[15+bw-1:15]; + 5'd18 : signal_out = signal_in[18+bw-1:18]; + 5'd21 : signal_out = signal_in[21+bw-1:21]; + + 5'd5 : signal_out = signal_in[5+bw-1:5]; + 5'd7 : signal_out = signal_in[7+bw-1:7]; + 5'd8 : signal_out = signal_in[8+bw-1:8]; + 5'd10 : signal_out = signal_in[10+bw-1:10]; + 5'd11 : signal_out = signal_in[11+bw-1:11]; + 5'd13 : signal_out = signal_in[13+bw-1:13]; + 5'd14 : signal_out = signal_in[14+bw-1:14]; + 5'd16 : signal_out = signal_in[16+bw-1:16]; + 5'd17 : signal_out = signal_in[17+bw-1:17]; + 5'd19 : signal_out = signal_in[19+bw-1:19]; + 5'd20 : signal_out = signal_in[20+bw-1:20]; + + default : signal_out = signal_in[21+bw-1:21]; + endcase // case(shift) + +endmodule // cic_int_shifter + diff --git a/fpga/usrp3/lib/dsp/cic_interp.v b/fpga/usrp3/lib/dsp/cic_interp.v new file mode 100644 index 000000000..608c2d448 --- /dev/null +++ b/fpga/usrp3/lib/dsp/cic_interp.v @@ -0,0 +1,75 @@ +// -*- verilog -*- +// +// USRP - Universal Software Radio Peripheral +// +// Copyright (C) 2003 Matt Ettus +// + +// + + +module cic_interp + #(parameter bw = 16, parameter N = 4, parameter log2_of_max_rate = 7) + (input clock, + input reset, + input enable, + input [7:0] rate, + input strobe_in, + input strobe_out, + input [bw-1:0] signal_in, + output reg [bw-1:0] signal_out); + + integer i; + localparam maxbitgain = (N-1)*log2_of_max_rate; + + wire [bw+maxbitgain-1:0] signal_in_ext; + reg [bw+maxbitgain-1:0] integrator [0:N-1]; + reg [bw+maxbitgain-1:0] differentiator [0:N-1]; + reg [bw+maxbitgain-1:0] pipeline [0:N-1]; + + sign_extend #(bw,bw+maxbitgain) + ext_input (.in(signal_in),.out(signal_in_ext)); + + //FIXME Note that this section has pipe and diff reversed + // It still works, but is confusing + always @(posedge clock) + if(reset | ~enable) + for(i=0;i<N;i=i+1) + integrator[i] <= 0; + else if (enable & strobe_out) + begin + if(strobe_in) + integrator[0] <= integrator[0] + pipeline[N-1]; + for(i=1;i<N;i=i+1) + integrator[i] <= integrator[i] + integrator[i-1]; + end + + always @(posedge clock) + if(reset | ~enable) + begin + for(i=0;i<N;i=i+1) + begin + differentiator[i] <= 0; + pipeline[i] <= 0; + end + end + else if (enable && strobe_in) + begin + differentiator[0] <= signal_in_ext; + pipeline[0] <= signal_in_ext - differentiator[0]; + for(i=1;i<N;i=i+1) + begin + differentiator[i] <= pipeline[i-1]; + pipeline[i] <= pipeline[i-1] - differentiator[i]; + end + end + + wire [bw-1:0] signal_out_unreg; + cic_int_shifter #(bw) + cic_int_shifter(rate,integrator[N-1],signal_out_unreg); + + always @(posedge clock) + signal_out <= signal_out_unreg; + +endmodule // cic_interp + diff --git a/fpga/usrp3/lib/dsp/cic_strober.v b/fpga/usrp3/lib/dsp/cic_strober.v new file mode 100644 index 000000000..269b85f75 --- /dev/null +++ b/fpga/usrp3/lib/dsp/cic_strober.v @@ -0,0 +1,33 @@ +// +// USRP2 - Universal Software Radio Peripheral Mk II +// +// Copyright (C) 2008 Matt Ettus +// + +// + +module cic_strober + #(parameter WIDTH=8) + ( input clock, + input reset, + input enable, + input [WIDTH-1:0] rate, // Rate should EQUAL to your desired divide ratio, no more -1 BS + input strobe_fast, + output wire strobe_slow ); + + reg [WIDTH-1:0] counter; + wire now = (counter==1); + assign strobe_slow = now && enable && strobe_fast; + + always @(posedge clock) + if(reset) + counter <= 0; + else if (~enable) + counter <= rate; + else if(strobe_fast) + if(now) + counter <= rate; + else + counter <= counter - 1; + +endmodule // cic_strober diff --git a/fpga/usrp3/lib/dsp/clip.v b/fpga/usrp3/lib/dsp/clip.v new file mode 100644 index 000000000..294c5e8ba --- /dev/null +++ b/fpga/usrp3/lib/dsp/clip.v @@ -0,0 +1,24 @@ +// -*- verilog -*- +// +// USRP - Universal Software Radio Peripheral +// +// Copyright (C) 2008 Matt Ettus +// + +// + +// Clipping "macro", keeps the bottom bits + +module clip + #(parameter bits_in=0, + parameter bits_out=0) + (input [bits_in-1:0] in, + output [bits_out-1:0] out); + + wire overflow = |in[bits_in-1:bits_out-1] & ~(&in[bits_in-1:bits_out-1]); + assign out = overflow ? + (in[bits_in-1] ? {1'b1,{(bits_out-1){1'b0}}} : {1'b0,{(bits_out-1){1'b1}}}) : + in[bits_out-1:0]; + +endmodule // clip + diff --git a/fpga/usrp3/lib/dsp/clip_reg.v b/fpga/usrp3/lib/dsp/clip_reg.v new file mode 100644 index 000000000..ab9a5b79d --- /dev/null +++ b/fpga/usrp3/lib/dsp/clip_reg.v @@ -0,0 +1,34 @@ +// -*- verilog -*- +// +// USRP - Universal Software Radio Peripheral +// +// Copyright (C) 2008 Matt Ettus +// + +// + +// Clipping "macro", keeps the bottom bits + +module clip_reg + #(parameter bits_in=0, + parameter bits_out=0, + parameter STROBED=1'b0) + (input clk, + input [bits_in-1:0] in, + output reg [bits_out-1:0] out, + input strobe_in, + output reg strobe_out); + + wire [bits_out-1:0] temp; + + clip #(.bits_in(bits_in),.bits_out(bits_out)) clip (.in(in),.out(temp)); + + always @(posedge clk) + strobe_out <= strobe_in; + + always @(posedge clk) + if(strobe_in | ~STROBED) + out <= temp; + +endmodule // clip_reg + diff --git a/fpga/usrp3/lib/dsp/cordic_stage.v b/fpga/usrp3/lib/dsp/cordic_stage.v new file mode 100644 index 000000000..7019ead52 --- /dev/null +++ b/fpga/usrp3/lib/dsp/cordic_stage.v @@ -0,0 +1,48 @@ +// -*- verilog -*- +// +// USRP - Universal Software Radio Peripheral +// +// Copyright (C) 2003 Matt Ettus +// + +// + +module cordic_stage( clock, reset, enable, xi,yi,zi,constant,xo,yo,zo); + parameter bitwidth = 16; + parameter zwidth = 16; + parameter shift = 1; + + input clock; + input reset; + input enable; + input [bitwidth-1:0] xi,yi; + input [zwidth-1:0] zi; + input [zwidth-1:0] constant; + output [bitwidth-1:0] xo,yo; + output [zwidth-1:0] zo; + + wire z_is_pos = ~zi[zwidth-1]; + + reg [bitwidth-1:0] xo,yo; + reg [zwidth-1:0] zo; + + always @(posedge clock) + if(reset) + begin + xo <= 0; + yo <= 0; + zo <= 0; + end + else //if(enable) + begin + xo <= z_is_pos ? + xi - {{shift+1{yi[bitwidth-1]}},yi[bitwidth-2:shift]} : + xi + {{shift+1{yi[bitwidth-1]}},yi[bitwidth-2:shift]}; + yo <= z_is_pos ? + yi + {{shift+1{xi[bitwidth-1]}},xi[bitwidth-2:shift]} : + yi - {{shift+1{xi[bitwidth-1]}},xi[bitwidth-2:shift]}; + zo <= z_is_pos ? + zi - constant : + zi + constant; + end +endmodule diff --git a/fpga/usrp3/lib/dsp/cordic_z24.v b/fpga/usrp3/lib/dsp/cordic_z24.v new file mode 100644 index 000000000..d9e983ccd --- /dev/null +++ b/fpga/usrp3/lib/dsp/cordic_z24.v @@ -0,0 +1,112 @@ +// -*- verilog -*- +// +// USRP - Universal Software Radio Peripheral +// +// Copyright (C) 2003, 2007 Matt Ettus +// + +// + +module cordic_z24(clock, reset, enable, xi, yi, zi, xo, yo, zo ); + parameter bitwidth = 16; + parameter stages = 19; + localparam zwidth = 24; + + input clock; + input reset; + input enable; + input [bitwidth-1:0] xi, yi; + output [bitwidth-1:0] xo, yo; + input [zwidth-1:0] zi; + output [zwidth-1:0] zo; + + reg [bitwidth+1:0] x0,y0; + reg [zwidth-2:0] z0; + wire [bitwidth+1:0] x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15,x16,x17,x18,x19,x20; + wire [bitwidth+1:0] y1,y2,y3,y4,y5,y6,y7,y8,y9,y10,y11,y12,y13,y14,y15,y16,y17,y18,y19,y20; + wire [zwidth-2:0] z1,z2,z3,z4,z5,z6,z7,z8,z9,z10,z11,z12,z13,z14,z15,z16,z17,z18,z19,z20; + + wire [bitwidth+1:0] xi_ext = {{2{xi[bitwidth-1]}},xi}; + wire [bitwidth+1:0] yi_ext = {{2{yi[bitwidth-1]}},yi}; + + // Compute consts. Would be easier if vlog had atan... + // see gen_cordic_consts.py + + // constants for 24 bit wide phase + localparam c00 = 23'd2097152; + localparam c01 = 23'd1238021; + localparam c02 = 23'd654136; + localparam c03 = 23'd332050; + localparam c04 = 23'd166669; + localparam c05 = 23'd83416; + localparam c06 = 23'd41718; + localparam c07 = 23'd20860; + localparam c08 = 23'd10430; + localparam c09 = 23'd5215; + localparam c10 = 23'd2608; + localparam c11 = 23'd1304; + localparam c12 = 23'd652; + localparam c13 = 23'd326; + localparam c14 = 23'd163; + localparam c15 = 23'd81; + localparam c16 = 23'd41; + localparam c17 = 23'd20; + localparam c18 = 23'd10; + localparam c19 = 23'd5; + localparam c20 = 23'd3; + localparam c21 = 23'd1; + localparam c22 = 23'd1; + localparam c23 = 23'd0; + + always @(posedge clock) + if(reset) + begin + x0 <= 0; y0 <= 0; z0 <= 0; + end + else// if(enable) + begin + z0 <= zi[zwidth-2:0]; + case (zi[zwidth-1:zwidth-2]) + 2'b00, 2'b11 : + begin + x0 <= xi_ext; + y0 <= yi_ext; + end + 2'b01, 2'b10 : + begin + x0 <= -xi_ext; + y0 <= -yi_ext; + end + endcase // case(zi[zwidth-1:zwidth-2]) + end // else: !if(reset) + + // FIXME need to handle variable number of stages + // This would be easier if arrays worked better in vlog... + + cordic_stage #(bitwidth+2,zwidth-1,0) cordic_stage0 (clock,reset,enable,x0,y0,z0,c00,x1,y1,z1); + cordic_stage #(bitwidth+2,zwidth-1,1) cordic_stage1 (clock,reset,enable,x1,y1,z1,c01,x2,y2,z2); + cordic_stage #(bitwidth+2,zwidth-1,2) cordic_stage2 (clock,reset,enable,x2,y2,z2,c02,x3,y3,z3); + cordic_stage #(bitwidth+2,zwidth-1,3) cordic_stage3 (clock,reset,enable,x3,y3,z3,c03,x4,y4,z4); + cordic_stage #(bitwidth+2,zwidth-1,4) cordic_stage4 (clock,reset,enable,x4,y4,z4,c04,x5,y5,z5); + cordic_stage #(bitwidth+2,zwidth-1,5) cordic_stage5 (clock,reset,enable,x5,y5,z5,c05,x6,y6,z6); + cordic_stage #(bitwidth+2,zwidth-1,6) cordic_stage6 (clock,reset,enable,x6,y6,z6,c06,x7,y7,z7); + cordic_stage #(bitwidth+2,zwidth-1,7) cordic_stage7 (clock,reset,enable,x7,y7,z7,c07,x8,y8,z8); + cordic_stage #(bitwidth+2,zwidth-1,8) cordic_stage8 (clock,reset,enable,x8,y8,z8,c08,x9,y9,z9); + cordic_stage #(bitwidth+2,zwidth-1,9) cordic_stage9 (clock,reset,enable,x9,y9,z9,c09,x10,y10,z10); + cordic_stage #(bitwidth+2,zwidth-1,10) cordic_stage10 (clock,reset,enable,x10,y10,z10,c10,x11,y11,z11); + cordic_stage #(bitwidth+2,zwidth-1,11) cordic_stage11 (clock,reset,enable,x11,y11,z11,c11,x12,y12,z12); + cordic_stage #(bitwidth+2,zwidth-1,12) cordic_stage12 (clock,reset,enable,x12,y12,z12,c12,x13,y13,z13); + cordic_stage #(bitwidth+2,zwidth-1,13) cordic_stage13 (clock,reset,enable,x13,y13,z13,c13,x14,y14,z14); + cordic_stage #(bitwidth+2,zwidth-1,14) cordic_stage14 (clock,reset,enable,x14,y14,z14,c14,x15,y15,z15); + cordic_stage #(bitwidth+2,zwidth-1,15) cordic_stage15 (clock,reset,enable,x15,y15,z15,c15,x16,y16,z16); + cordic_stage #(bitwidth+2,zwidth-1,16) cordic_stage16 (clock,reset,enable,x16,y16,z16,c16,x17,y17,z17); + cordic_stage #(bitwidth+2,zwidth-1,17) cordic_stage17 (clock,reset,enable,x17,y17,z17,c17,x18,y18,z18); + cordic_stage #(bitwidth+2,zwidth-1,18) cordic_stage18 (clock,reset,enable,x18,y18,z18,c18,x19,y19,z19); + cordic_stage #(bitwidth+2,zwidth-1,19) cordic_stage19 (clock,reset,enable,x19,y19,z19,c19,x20,y20,z20); + + assign xo = x20[bitwidth:1]; + assign yo = y20[bitwidth:1]; + assign zo = z20; + +endmodule // cordic + diff --git a/fpga/usrp3/lib/dsp/ddc_chain.v b/fpga/usrp3/lib/dsp/ddc_chain.v new file mode 100644 index 000000000..365175bf1 --- /dev/null +++ b/fpga/usrp3/lib/dsp/ddc_chain.v @@ -0,0 +1,164 @@ +// +// Copyright 2011-2013 Ettus Research LLC +// + + +//! The USRP digital down-conversion chain + +module ddc_chain + #( + parameter BASE = 0, + parameter DSPNO = 0, + parameter WIDTH = 24 + ) + (input clk, input rst, input clr, + input set_stb, input [7:0] set_addr, input [31:0] set_data, + + // From RX frontend + input [WIDTH-1:0] rx_fe_i, + input [WIDTH-1:0] rx_fe_q, + + // To RX control + output [31:0] sample, + input run, + output strobe, + output [31:0] debug + ); + + localparam cwidth = 25; + localparam zwidth = 24; + + wire [31:0] phase_inc; + reg [31:0] phase; + + wire [17:0] scale_factor; + wire [cwidth-1:0] i_cordic, q_cordic; + wire [WIDTH-1:0] i_cordic_clip, q_cordic_clip; + wire [WIDTH-1:0] i_cic, q_cic; + wire [WIDTH-1:0] i_hb1, q_hb1; + wire [WIDTH-1:0] i_hb2, q_hb2; + + wire strobe_cic, strobe_hb1, strobe_hb2; + wire enable_hb1, enable_hb2; + wire [7:0] cic_decim_rate; + + reg [WIDTH-1:0] rx_fe_i_mux, rx_fe_q_mux; + wire realmode; + wire swap_iq; + + setting_reg #(.my_addr(BASE+0)) sr_0 + (.clk(clk),.rst(rst),.strobe(set_stb),.addr(set_addr), + .in(set_data),.out(phase_inc),.changed()); + + setting_reg #(.my_addr(BASE+1), .width(18)) sr_1 + (.clk(clk),.rst(rst),.strobe(set_stb),.addr(set_addr), + .in(set_data),.out(scale_factor),.changed()); + + setting_reg #(.my_addr(BASE+2), .width(10)) sr_2 + (.clk(clk),.rst(rst),.strobe(set_stb),.addr(set_addr), + .in(set_data),.out({enable_hb1, enable_hb2, cic_decim_rate}),.changed()); + + setting_reg #(.my_addr(BASE+3), .width(2)) sr_3 + (.clk(clk),.rst(rst),.strobe(set_stb),.addr(set_addr), + .in(set_data),.out({realmode,swap_iq}),.changed()); + + // MUX so we can do realmode signals on either input + + always @(posedge clk) + if(swap_iq) + begin + rx_fe_i_mux <= rx_fe_q; + rx_fe_q_mux <= realmode ? 0 : rx_fe_i; + end + else + begin + rx_fe_i_mux <= rx_fe_i; + rx_fe_q_mux <= realmode ? 0 : rx_fe_q; + end + + // NCO + always @(posedge clk) + if(rst) + phase <= 0; + else if(~run) + phase <= 0; + else + phase <= phase + phase_inc; + + //sign extension of cordic input + wire [cwidth-1:0] to_cordic_i, to_cordic_q; + sign_extend #(.bits_in(WIDTH), .bits_out(cwidth)) sign_extend_cordic_i (.in(rx_fe_i_mux), .out(to_cordic_i)); + sign_extend #(.bits_in(WIDTH), .bits_out(cwidth)) sign_extend_cordic_q (.in(rx_fe_q_mux), .out(to_cordic_q)); + + // CORDIC 24-bit I/O + cordic_z24 #(.bitwidth(cwidth)) + cordic(.clock(clk), .reset(rst), .enable(run), + .xi(to_cordic_i),. yi(to_cordic_q), .zi(phase[31:32-zwidth]), + .xo(i_cordic),.yo(q_cordic),.zo() ); + + clip_reg #(.bits_in(cwidth), .bits_out(WIDTH)) clip_i + (.clk(clk), .in(i_cordic), .strobe_in(1'b1), .out(i_cordic_clip)); + clip_reg #(.bits_in(cwidth), .bits_out(WIDTH)) clip_q + (.clk(clk), .in(q_cordic), .strobe_in(1'b1), .out(q_cordic_clip)); + + // CIC decimator 24 bit I/O + cic_strober cic_strober(.clock(clk),.reset(rst),.enable(run),.rate(cic_decim_rate), + .strobe_fast(1),.strobe_slow(strobe_cic) ); + + cic_decim #(.bw(WIDTH)) + decim_i (.clock(clk),.reset(rst),.enable(run), + .rate(cic_decim_rate),.strobe_in(1'b1),.strobe_out(strobe_cic), + .signal_in(i_cordic_clip),.signal_out(i_cic)); + + cic_decim #(.bw(WIDTH)) + decim_q (.clock(clk),.reset(rst),.enable(run), + .rate(cic_decim_rate),.strobe_in(1'b1),.strobe_out(strobe_cic), + .signal_in(q_cordic_clip),.signal_out(q_cic)); + + // First (small) halfband 24 bit I/O + small_hb_dec #(.WIDTH(WIDTH)) small_hb_i + (.clk(clk),.rst(rst),.bypass(~enable_hb1),.run(run), + .stb_in(strobe_cic),.data_in(i_cic),.stb_out(strobe_hb1),.data_out(i_hb1)); + + small_hb_dec #(.WIDTH(WIDTH)) small_hb_q + (.clk(clk),.rst(rst),.bypass(~enable_hb1),.run(run), + .stb_in(strobe_cic),.data_in(q_cic),.stb_out(),.data_out(q_hb1)); + + // Second (large) halfband 24 bit I/O + wire [8:0] cpi_hb = enable_hb1 ? {cic_decim_rate,1'b0} : {1'b0,cic_decim_rate}; + hb_dec #(.WIDTH(WIDTH)) hb_i + (.clk(clk),.rst(rst),.bypass(~enable_hb2),.run(run),.cpi(cpi_hb), + .stb_in(strobe_hb1),.data_in(i_hb1),.stb_out(strobe_hb2),.data_out(i_hb2)); + + hb_dec #(.WIDTH(WIDTH)) hb_q + (.clk(clk),.rst(rst),.bypass(~enable_hb2),.run(run),.cpi(cpi_hb), + .stb_in(strobe_hb1),.data_in(q_hb1),.stb_out(),.data_out(q_hb2)); + + //scalar operation (gain of 6 bits) + wire [35:0] prod_i, prod_q; + + MULT18X18S mult_i + (.P(prod_i), .A(i_hb2[WIDTH-1:WIDTH-18]), .B(scale_factor), .C(clk), .CE(strobe_hb2), .R(rst) ); + MULT18X18S mult_q + (.P(prod_q), .A(q_hb2[WIDTH-1:WIDTH-18]), .B(scale_factor), .C(clk), .CE(strobe_hb2), .R(rst) ); + + //pipeline for the multiplier (gain of 10 bits) + reg [WIDTH-1:0] prod_reg_i, prod_reg_q; + reg strobe_mult; + + always @(posedge clk) begin + strobe_mult <= strobe_hb2; + prod_reg_i <= prod_i[33:34-WIDTH]; + prod_reg_q <= prod_q[33:34-WIDTH]; + end + + // Round final answer to 16 bits + round_sd #(.WIDTH_IN(WIDTH),.WIDTH_OUT(16)) round_i + (.clk(clk),.reset(rst), .in(prod_reg_i),.strobe_in(strobe_mult), .out(sample[31:16]), .strobe_out(strobe)); + + round_sd #(.WIDTH_IN(WIDTH),.WIDTH_OUT(16)) round_q + (.clk(clk),.reset(rst), .in(prod_reg_q),.strobe_in(strobe_mult), .out(sample[15:0]), .strobe_out()); + + assign debug = {enable_hb1, enable_hb2, run, strobe, strobe_cic, strobe_hb1, strobe_hb2}; + +endmodule // ddc_chain diff --git a/fpga/usrp3/lib/dsp/duc_chain.v b/fpga/usrp3/lib/dsp/duc_chain.v new file mode 100644 index 000000000..06c87bd34 --- /dev/null +++ b/fpga/usrp3/lib/dsp/duc_chain.v @@ -0,0 +1,146 @@ +// +// Copyright 2011-2013 Ettus Research LLC +// + + +//! The USRP digital up-conversion chain + +module duc_chain + #( + parameter BASE = 0, + parameter DSPNO = 0, + parameter WIDTH = 24 + ) + (input clk, input rst, input clr, + input set_stb, input [7:0] set_addr, input [31:0] set_data, + + // To TX frontend + output [WIDTH-1:0] tx_fe_i, + output [WIDTH-1:0] tx_fe_q, + + // From TX control + input [31:0] sample, + input run, + output strobe, + output [31:0] debug + ); + + wire [17:0] scale_factor; + wire [31:0] phase_inc; + reg [31:0] phase; + wire [7:0] interp_rate; + wire [3:0] tx_femux_a, tx_femux_b; + wire enable_hb1, enable_hb2; + wire rate_change; + + setting_reg #(.my_addr(BASE+0)) sr_0 + (.clk(clk),.rst(rst),.strobe(set_stb),.addr(set_addr), + .in(set_data),.out(phase_inc),.changed()); + + setting_reg #(.my_addr(BASE+1), .width(18)) sr_1 + (.clk(clk),.rst(rst),.strobe(set_stb),.addr(set_addr), + .in(set_data),.out(scale_factor),.changed()); + + setting_reg #(.my_addr(BASE+2), .width(10)) sr_2 + (.clk(clk),.rst(rst),.strobe(set_stb),.addr(set_addr), + .in(set_data),.out({enable_hb1, enable_hb2, interp_rate}),.changed(rate_change)); + + // Strobes are all now delayed by 1 cycle for timing reasons + wire strobe_cic_pre, strobe_hb1_pre, strobe_hb2_pre; + reg strobe_cic = 1; + reg strobe_hb1 = 1; + reg strobe_hb2 = 1; + + assign strobe = strobe_hb1; + + cic_strober #(.WIDTH(8)) + cic_strober(.clock(clk),.reset(rst),.enable(run & ~rate_change),.rate(interp_rate), + .strobe_fast(1),.strobe_slow(strobe_cic_pre) ); + cic_strober #(.WIDTH(2)) + hb2_strober(.clock(clk),.reset(rst),.enable(run & ~rate_change),.rate(enable_hb2 ? 2 : 1), + .strobe_fast(strobe_cic_pre),.strobe_slow(strobe_hb2_pre) ); + cic_strober #(.WIDTH(2)) + hb1_strober(.clock(clk),.reset(rst),.enable(run & ~rate_change),.rate(enable_hb1 ? 2 : 1), + .strobe_fast(strobe_hb2_pre),.strobe_slow(strobe_hb1_pre) ); + + always @(posedge clk) strobe_hb1 <= strobe_hb1_pre; + always @(posedge clk) strobe_hb2 <= strobe_hb2_pre; + always @(posedge clk) strobe_cic <= strobe_cic_pre; + + // NCO + always @(posedge clk) + if(rst) + phase <= 0; + else if(~run) + phase <= 0; + else + phase <= phase + phase_inc; + + wire signed [17:0] da, db; + wire signed [35:0] prod_i, prod_q; + + assign tx_fe_i = prod_i[33:34-WIDTH]; + assign tx_fe_q = prod_q[33:34-WIDTH]; + + wire [17:0] i_interp, q_interp; + + wire [17:0] hb1_i, hb1_q, hb2_i, hb2_q; + + wire [7:0] cpo = enable_hb2 ? ({interp_rate,1'b0}) : interp_rate; + // Note that max CIC rate is 128, which would give an overflow on cpo if enable_hb2 is true, + // but the default case inside hb_interp handles this + + hb_interp #(.IWIDTH(18),.OWIDTH(18),.ACCWIDTH(WIDTH)) hb_interp_i + (.clk(clk),.rst(rst),.bypass(~enable_hb1),.cpo(cpo),.stb_in(strobe_hb1),.data_in({sample[31:16], 2'b0}),.stb_out(strobe_hb2),.data_out(hb1_i)); + hb_interp #(.IWIDTH(18),.OWIDTH(18),.ACCWIDTH(WIDTH)) hb_interp_q + (.clk(clk),.rst(rst),.bypass(~enable_hb1),.cpo(cpo),.stb_in(strobe_hb1),.data_in({sample[15:0], 2'b0}),.stb_out(strobe_hb2),.data_out(hb1_q)); + + small_hb_int #(.WIDTH(18)) small_hb_interp_i + (.clk(clk),.rst(rst),.bypass(~enable_hb2),.stb_in(strobe_hb2),.data_in(hb1_i), + .output_rate(interp_rate),.stb_out(strobe_cic),.data_out(hb2_i)); + small_hb_int #(.WIDTH(18)) small_hb_interp_q + (.clk(clk),.rst(rst),.bypass(~enable_hb2),.stb_in(strobe_hb2),.data_in(hb1_q), + .output_rate(interp_rate),.stb_out(strobe_cic),.data_out(hb2_q)); + + cic_interp #(.bw(18),.N(4),.log2_of_max_rate(7)) + cic_interp_i(.clock(clk),.reset(rst),.enable(run & ~rate_change),.rate(interp_rate), + .strobe_in(strobe_cic),.strobe_out(1), + .signal_in(hb2_i),.signal_out(i_interp)); + + cic_interp #(.bw(18),.N(4),.log2_of_max_rate(7)) + cic_interp_q(.clock(clk),.reset(rst),.enable(run & ~rate_change),.rate(interp_rate), + .strobe_in(strobe_cic),.strobe_out(1), + .signal_in(hb2_q),.signal_out(q_interp)); + + localparam cwidth = WIDTH; // was 18 + localparam zwidth = 24; // was 16 + + wire [cwidth-1:0] da_c, db_c; + + cordic_z24 #(.bitwidth(cwidth)) + cordic(.clock(clk), .reset(rst), .enable(run), + .xi({i_interp,{(cwidth-18){1'b0}}}),.yi({q_interp,{(cwidth-18){1'b0}}}), + .zi(phase[31:32-zwidth]), + .xo(da_c),.yo(db_c),.zo() ); + + MULT18X18S MULT18X18S_inst + (.P(prod_i), // 36-bit multiplier output + .A(da_c[cwidth-1:cwidth-18]), // 18-bit multiplier input + .B(scale_factor), // 18-bit multiplier input + .C(clk), // Clock input + .CE(1), // Clock enable input + .R(rst) // Synchronous reset input + ); + + MULT18X18S MULT18X18S_inst_2 + (.P(prod_q), // 36-bit multiplier output + .A(db_c[cwidth-1:cwidth-18]), // 18-bit multiplier input + .B(scale_factor), // 18-bit multiplier input + .C(clk), // Clock input + .CE(1), // Clock enable input + .R(rst) // Synchronous reset input + ); + + assign debug = {strobe_cic, strobe_hb1, strobe_hb2,run}; + +endmodule // duc_chain diff --git a/fpga/usrp3/lib/dsp/hb_dec.v b/fpga/usrp3/lib/dsp/hb_dec.v new file mode 100644 index 000000000..65c771ca7 --- /dev/null +++ b/fpga/usrp3/lib/dsp/hb_dec.v @@ -0,0 +1,177 @@ +// +// Copyright 2011 Ettus Research LLC +// + + +// Final halfband decimator +// Implements impulse responses of the form [A 0 B 0 C .. 0 H 0.5 H 0 .. C 0 B 0 A] +// Strobe in cannot come faster than every 2nd clock cycle +// These taps designed by halfgen4 from ldoolittle +// myfilt = round(2^18 * halfgen4(.7/4,8)) + +module hb_dec + #(parameter WIDTH=24) + (input clk, + input rst, + input bypass, + input run, + input [8:0] cpi, // Clocks per input -- equal to the decimation ratio ahead of this block + input stb_in, + input [WIDTH-1:0] data_in, + output reg stb_out, + output reg [WIDTH-1:0] data_out); + + localparam INTWIDTH = 17; + localparam ACCWIDTH = WIDTH + 3; + + // Round off inputs to 17 bits because of 18 bit multipliers + wire [INTWIDTH-1:0] data_rnd; + wire stb_rnd; + + round_sd #(.WIDTH_IN(WIDTH),.WIDTH_OUT(INTWIDTH)) round_in + (.clk(clk),.reset(rst),.in(data_in),.strobe_in(stb_in),.out(data_rnd),.strobe_out(stb_rnd)); + + // Control + reg [3:0] addr_odd_a, addr_odd_b, addr_odd_c, addr_odd_d; + wire write_odd, write_even, do_mult; + reg odd; + reg [2:0] phase, phase_d1; + reg stb_out_int; + wire clear, do_acc; + assign do_mult = 1; + + always @(posedge clk) + if(rst | ~run) + odd <= 0; + else if(stb_rnd) + odd <= ~odd; + + assign write_odd = stb_rnd & odd; + assign write_even = stb_rnd & ~odd; + + always @(posedge clk) + if(rst | ~run) + phase <= 0; + else if(stb_rnd & odd) + phase <= 1; + else if(phase == 4) + phase <= 0; + else if(phase != 0) + phase <= phase + 1; + + always @(posedge clk) + phase_d1 <= phase; + + reg [15:0] stb_out_pre; + always @(posedge clk) + if(rst) + stb_out_pre <= 0; + else + stb_out_pre <= {stb_out_pre[14:0],(stb_rnd & odd)}; + + always @* + case(phase) + 1 : begin addr_odd_a = 0; addr_odd_b = 15; end + 2 : begin addr_odd_a = 1; addr_odd_b = 14; end + 3 : begin addr_odd_a = 2; addr_odd_b = 13; end + 4 : begin addr_odd_a = 3; addr_odd_b = 12; end + default : begin addr_odd_a = 0; addr_odd_b = 15; end + endcase // case(phase) + + always @* + case(phase) + 1 : begin addr_odd_c = 4; addr_odd_d = 11; end + 2 : begin addr_odd_c = 5; addr_odd_d = 10; end + 3 : begin addr_odd_c = 6; addr_odd_d = 9; end + 4 : begin addr_odd_c = 7; addr_odd_d = 8; end + default : begin addr_odd_c = 4; addr_odd_d = 11; end + endcase // case(phase) + + assign do_acc = |stb_out_pre[6:3]; + assign clear = stb_out_pre[3]; + + // Data + wire [INTWIDTH-1:0] data_odd_a, data_odd_b, data_odd_c, data_odd_d; + reg [INTWIDTH:0] sum1, sum2; // these are 18-bit inputs to mult + reg [WIDTH:0] final_sum; + wire [WIDTH-1:0] final_sum_clip; + reg [17:0] coeff1, coeff2; + wire [35:0] prod1, prod2; + + always @* // Outer coeffs + case(phase_d1) + 1 : coeff1 = -107; + 2 : coeff1 = 445; + 3 : coeff1 = -1271; + 4 : coeff1 = 2959; + default : coeff1 = -107; + endcase // case(phase) + + always @* // Inner coeffs + case(phase_d1) + 1 : coeff2 = -6107; + 2 : coeff2 = 11953; + 3 : coeff2 = -24706; + 4 : coeff2 = 82359; + default : coeff2 = -6107; + endcase // case(phase) + + srl #(.WIDTH(INTWIDTH)) srl_odd_a + (.clk(clk),.write(write_odd),.in(data_rnd),.addr(addr_odd_a),.out(data_odd_a)); + srl #(.WIDTH(INTWIDTH)) srl_odd_b + (.clk(clk),.write(write_odd),.in(data_rnd),.addr(addr_odd_b),.out(data_odd_b)); + srl #(.WIDTH(INTWIDTH)) srl_odd_c + (.clk(clk),.write(write_odd),.in(data_rnd),.addr(addr_odd_c),.out(data_odd_c)); + srl #(.WIDTH(INTWIDTH)) srl_odd_d + (.clk(clk),.write(write_odd),.in(data_rnd),.addr(addr_odd_d),.out(data_odd_d)); + + always @(posedge clk) sum1 <= {data_odd_a[INTWIDTH-1],data_odd_a} + {data_odd_b[INTWIDTH-1],data_odd_b}; + always @(posedge clk) sum2 <= {data_odd_c[INTWIDTH-1],data_odd_c} + {data_odd_d[INTWIDTH-1],data_odd_d}; + + wire [INTWIDTH-1:0] data_even; + reg [3:0] addr_even; + + always @(posedge clk) + case(cpi) + // 1 is an error + 2 : addr_even <= 9; // Maximum speed (overall decim by 4) + 3, 4, 5, 6, 7 : addr_even <= 8; + default : addr_even <= 7; + endcase // case(cpi) + + srl #(.WIDTH(INTWIDTH)) srl_even + (.clk(clk),.write(write_even),.in(data_rnd),.addr(addr_even),.out(data_even)); + + MULT18X18S mult1(.C(clk), .CE(do_mult), .R(rst), .P(prod1), .A(coeff1), .B(sum1) ); + MULT18X18S mult2(.C(clk), .CE(do_mult), .R(rst), .P(prod2), .A(coeff2), .B(sum2) ); + + reg [35:0] sum_of_prod; + always @(posedge clk) sum_of_prod <= prod1 + prod2; // Can't overflow + + wire [ACCWIDTH-1:0] acc_out; + acc #(.IWIDTH(ACCWIDTH-2),.OWIDTH(ACCWIDTH)) + acc (.clk(clk),.clear(clear),.acc(do_acc),.in(sum_of_prod[35:38-ACCWIDTH]),.out(acc_out)); + + wire [ACCWIDTH-1:0] data_even_signext; + + localparam SHIFT_FACTOR = 6; + + sign_extend #(.bits_in(INTWIDTH),.bits_out(ACCWIDTH-SHIFT_FACTOR)) signext_data_even + (.in(data_even),.out(data_even_signext[ACCWIDTH-1:SHIFT_FACTOR])); + assign data_even_signext[SHIFT_FACTOR-1:0] = 0; + + always @(posedge clk) final_sum <= acc_out + data_even_signext; + + clip #(.bits_in(WIDTH+1), .bits_out(WIDTH)) clip (.in(final_sum), .out(final_sum_clip)); + + // Output MUX to allow for bypass + wire selected_stb = bypass ? stb_in : stb_out_pre[8]; + + always @(posedge clk) + begin + stb_out <= selected_stb; + if(selected_stb) + data_out <= bypass ? data_in : final_sum_clip; + end + +endmodule // hb_dec diff --git a/fpga/usrp3/lib/dsp/hb_interp.v b/fpga/usrp3/lib/dsp/hb_interp.v new file mode 100644 index 000000000..c0914255c --- /dev/null +++ b/fpga/usrp3/lib/dsp/hb_interp.v @@ -0,0 +1,165 @@ +// +// Copyright 2011 Ettus Research LLC +// + + +// First halfband iterpolator +// Implements impulse responses of the form [A 0 B 0 C .. 0 H 0.5 H 0 .. C 0 B 0 A] +// Strobe in cannot come faster than every 4th clock cycle, +// Strobe out cannot come faster than every 2nd clock cycle + +// These taps designed by halfgen4 from ldoolittle +// myfilt = round(2^18 * halfgen4(.7/4,8)) + +module hb_interp + #(parameter IWIDTH=18, OWIDTH=18, ACCWIDTH=24) + (input clk, + input rst, + input bypass, + input [7:0] cpo, // Clocks per output, must be at least 2 + input stb_in, + input [IWIDTH-1:0] data_in, + input stb_out, + output reg [OWIDTH-1:0] data_out); + + localparam MWIDTH = ACCWIDTH-2; + localparam CWIDTH = 18; + + reg [CWIDTH-1:0] coeff1, coeff2; + reg [3:0] addr_a, addr_b, addr_c, addr_d, addr_e; + wire [IWIDTH-1:0] data_a, data_b, data_c, data_d, data_e, sum1, sum2; + wire [35:0] prod1, prod2; + + reg [2:0] phase, phase_d1, phase_d2, phase_d3, phase_d4, phase_d5; + + always @(posedge clk) + if(rst) + phase <= 0; + else + if(stb_in) + phase <= 1; + else if(phase==4) + phase <= 0; + else if(phase!=0) + phase <= phase + 1; + always @(posedge clk) phase_d1 <= phase; + always @(posedge clk) phase_d2 <= phase_d1; + always @(posedge clk) phase_d3 <= phase_d2; + always @(posedge clk) phase_d4 <= phase_d3; + always @(posedge clk) phase_d5 <= phase_d4; + + srl #(.WIDTH(IWIDTH)) srl_a + (.clk(clk),.rst(rst),.write(stb_in),.in(data_in),.addr(addr_a),.out(data_a)); + srl #(.WIDTH(IWIDTH)) srl_b + (.clk(clk),.rst(rst),.write(stb_in),.in(data_in),.addr(addr_b),.out(data_b)); + srl #(.WIDTH(IWIDTH)) srl_c + (.clk(clk),.rst(rst),.write(stb_in),.in(data_in),.addr(addr_c),.out(data_c)); + srl #(.WIDTH(IWIDTH)) srl_d + (.clk(clk),.rst(rst),.write(stb_in),.in(data_in),.addr(addr_d),.out(data_d)); + srl #(.WIDTH(IWIDTH)) srl_e + (.clk(clk),.rst(rst),.write(stb_in),.in(data_in),.addr(addr_e),.out(data_e)); + + always @* + case(phase) + 1 : begin addr_a = 0; addr_b = 15; end + 2 : begin addr_a = 1; addr_b = 14; end + 3 : begin addr_a = 2; addr_b = 13; end + 4 : begin addr_a = 3; addr_b = 12; end + default : begin addr_a = 0; addr_b = 15; end + endcase // case(phase) + + always @* + case(phase) + 1 : begin addr_c = 4; addr_d = 11; end + 2 : begin addr_c = 5; addr_d = 10; end + 3 : begin addr_c = 6; addr_d = 9; end + 4 : begin addr_c = 7; addr_d = 8; end + default : begin addr_c = 4; addr_d = 11; end + endcase // case(phase) + + always @* + case(cpo) + 2 : addr_e <= 9; + 3,4,5,6,7,8 : addr_e <= 8; + default : addr_e <= 7; // This case works for 256, which = 0 due to overflow outside this block + endcase // case(cpo) + + always @* // Outer coeffs + case(phase_d1) + 1 : coeff1 = -107; + 2 : coeff1 = 445; + 3 : coeff1 = -1271; + 4 : coeff1 = 2959; + default : coeff1 = -107; + endcase // case(phase) + + always @* // Inner coeffs + case(phase_d1) + 1 : coeff2 = -6107; + 2 : coeff2 = 11953; + 3 : coeff2 = -24706; + 4 : coeff2 = 82359; + default : coeff2 = -6107; + endcase // case(phase) + + add2_reg /*_and_round_reg*/ #(.WIDTH(IWIDTH)) add1 (.clk(clk),.in1(data_a),.in2(data_b),.sum(sum1)); + add2_reg /*_and_round_reg*/ #(.WIDTH(IWIDTH)) add2 (.clk(clk),.in1(data_c),.in2(data_d),.sum(sum2)); + // sum1, sum2 available on phase_d1 + + wire do_mult = 1; + MULT18X18S mult1(.C(clk), .CE(do_mult), .R(rst), .P(prod1), .A(coeff1), .B(sum1) ); + MULT18X18S mult2(.C(clk), .CE(do_mult), .R(rst), .P(prod2), .A(coeff2), .B(sum2) ); + // prod1, prod2 available on phase_d2 + + wire [MWIDTH-1:0] sum_of_prod; + + add2_and_round_reg #(.WIDTH(MWIDTH)) + add3 (.clk(clk),.in1(prod1[35:36-MWIDTH]),.in2(prod2[35:36-MWIDTH]),.sum(sum_of_prod)); + // sum_of_prod available on phase_d3 + + wire [ACCWIDTH-1:0] acc_out; + + wire clear = (phase_d3 == 1); + wire do_acc = (phase_d3 != 0); + + acc #(.IWIDTH(MWIDTH),.OWIDTH(ACCWIDTH)) //IJB rst + acc (.clk(clk),.clear(rst|clear),.acc(do_acc),.in(sum_of_prod),.out(acc_out)); + // acc_out available on phase_d4 + + wire [ACCWIDTH-6:0] clipped_acc; + clip #(.bits_in(ACCWIDTH),.bits_out(ACCWIDTH-5)) final_clip(.in(acc_out),.out(clipped_acc)); + + reg [ACCWIDTH-6:0] clipped_reg; + always @(posedge clk) + if (rst) + clipped_reg <= 0; + else if(phase_d4 == 4) + clipped_reg <= clipped_acc; + // clipped_reg available on phase_d5 + + wire [OWIDTH-1:0] data_out_round; + round #(.bits_in(ACCWIDTH-5),.bits_out(OWIDTH)) final_round (.in(clipped_reg),.out(data_out_round)); + + reg odd; + always @(posedge clk) + if(rst) + odd <= 0; + else if(stb_in) + odd <= 0; + else if(stb_out) + odd <= 1; + + always @(posedge clk) + if (rst) + data_out <= 0; + else if(bypass) + data_out <= data_in; + else if(stb_out) + if(odd) + data_out <= data_e; + else + data_out <= data_out_round; + + // data_out available on phase_d6 + +endmodule // hb_interp diff --git a/fpga/usrp3/lib/dsp/round.v b/fpga/usrp3/lib/dsp/round.v new file mode 100644 index 000000000..7ecc10ff7 --- /dev/null +++ b/fpga/usrp3/lib/dsp/round.v @@ -0,0 +1,47 @@ +// -*- verilog -*- +// +// USRP - Universal Software Radio Peripheral +// +// Copyright (C) 2011 Matt Ettus +// + +// + +// Rounding "macro" +// Keeps the topmost bits, does proper 2s comp round to zero (unbiased truncation) + +module round + #(parameter bits_in=0, + parameter bits_out=0, + parameter round_to_zero=0, // original behavior + parameter round_to_nearest=1, // lowest noise + parameter trunc=0) // round to negative infinity + (input [bits_in-1:0] in, + output [bits_out-1:0] out, + output [bits_in-bits_out:0] err); + + wire round_corr,round_corr_trunc,round_corr_rtz,round_corr_nearest,round_corr_nearest_safe; + + assign round_corr_trunc = 0; + assign round_corr_rtz = (in[bits_in-1] & |in[bits_in-bits_out-1:0]); + assign round_corr_nearest = in[bits_in-bits_out-1]; + + generate + if(bits_in-bits_out > 1) + assign round_corr_nearest_safe = (~in[bits_in-1] & (&in[bits_in-2:bits_out])) ? 0 : + round_corr_nearest; + else + assign round_corr_nearest_safe = round_corr_nearest; + endgenerate + + + assign round_corr = round_to_nearest ? round_corr_nearest_safe : + trunc ? round_corr_trunc : + round_to_zero ? round_corr_rtz : + 0; // default to trunc + + assign out = in[bits_in-1:bits_in-bits_out] + round_corr; + + assign err = in - {out,{(bits_in-bits_out){1'b0}}}; + +endmodule // round diff --git a/fpga/usrp3/lib/dsp/round_reg.v b/fpga/usrp3/lib/dsp/round_reg.v new file mode 100644 index 000000000..c8c77f518 --- /dev/null +++ b/fpga/usrp3/lib/dsp/round_reg.v @@ -0,0 +1,32 @@ +// -*- verilog -*- +// +// USRP - Universal Software Radio Peripheral +// +// Copyright (C) 2008 Matt Ettus +// + +// + +// Rounding "macro" +// Keeps the topmost bits, does proper 2s comp rounding (round-to-zero) + +module round_reg + #(parameter bits_in=0, + parameter bits_out=0) + (input clk, + input [bits_in-1:0] in, + output reg [bits_out-1:0] out, + output reg [bits_in-bits_out:0] err); + + wire [bits_out-1:0] temp; + wire [bits_in-bits_out:0] err_temp; + + round #(.bits_in(bits_in),.bits_out(bits_out)) round (.in(in),.out(temp), .err(err_temp)); + + always @(posedge clk) + out <= temp; + + always @(posedge clk) + err <= err_temp; + +endmodule // round_reg diff --git a/fpga/usrp3/lib/dsp/round_sd.v b/fpga/usrp3/lib/dsp/round_sd.v new file mode 100644 index 000000000..94584f6ef --- /dev/null +++ b/fpga/usrp3/lib/dsp/round_sd.v @@ -0,0 +1,23 @@ + + +module round_sd + #(parameter WIDTH_IN=18, + parameter WIDTH_OUT=16, + parameter DISABLE_SD=0) + (input clk, input reset, + input [WIDTH_IN-1:0] in, input strobe_in, + output [WIDTH_OUT-1:0] out, output strobe_out); + + localparam ERR_WIDTH = WIDTH_IN - WIDTH_OUT + 1; + + wire [ERR_WIDTH-1:0] err; + wire [WIDTH_IN-1:0] err_ext, sum; + + sign_extend #(.bits_in(ERR_WIDTH),.bits_out(WIDTH_IN)) ext_err (.in(err), .out(err_ext)); + + add2_and_clip_reg #(.WIDTH(WIDTH_IN)) add2_and_clip_reg + (.clk(clk), .rst(reset), .in1(in), .in2((DISABLE_SD == 0) ? err_ext : 0), .strobe_in(strobe_in), .sum(sum), .strobe_out(strobe_out)); + + round #(.bits_in(WIDTH_IN),.bits_out(WIDTH_OUT)) round_sum (.in(sum), .out(out), .err(err)); + +endmodule // round_sd diff --git a/fpga/usrp3/lib/dsp/rx_dcoffset.v b/fpga/usrp3/lib/dsp/rx_dcoffset.v new file mode 100644 index 000000000..f74b0f1a0 --- /dev/null +++ b/fpga/usrp3/lib/dsp/rx_dcoffset.v @@ -0,0 +1,46 @@ +// +// Copyright 2011 Ettus Research LLC +// + + + + +module rx_dcoffset + #(parameter WIDTH=16, + parameter ADDR=8'd0, + parameter alpha_shift=20) + (input clk, input rst, + input set_stb, input [7:0] set_addr, input [31:0] set_data, + input [WIDTH-1:0] in, output [WIDTH-1:0] out); + + wire set_now = set_stb & (ADDR == set_addr); + + reg fixed; // uses fixed offset + wire [WIDTH-1:0] fixed_dco; + + localparam int_width = WIDTH + alpha_shift; + reg [int_width-1:0] integrator; + wire [WIDTH-1:0] quantized; + + always @(posedge clk) + if(rst) + begin + fixed <= 0; + integrator <= {int_width{1'b0}}; + end + else if(set_now) + begin + fixed <= set_data[31]; + if(set_data[30]) + integrator <= {set_data[29:0],{(int_width-30){1'b0}}}; + end + else if(~fixed) + integrator <= integrator + {{(alpha_shift){out[WIDTH-1]}},out}; + + round_sd #(.WIDTH_IN(int_width),.WIDTH_OUT(WIDTH)) round_sd + (.clk(clk), .reset(rst), .in(integrator), .strobe_in(1'b1), .out(quantized), .strobe_out()); + + add2_and_clip_reg #(.WIDTH(WIDTH)) add2_and_clip_reg + (.clk(clk), .rst(rst), .in1(in), .in2(-quantized), .strobe_in(1'b1), .sum(out), .strobe_out()); + +endmodule // rx_dcoffset diff --git a/fpga/usrp3/lib/dsp/rx_frontend.v b/fpga/usrp3/lib/dsp/rx_frontend.v new file mode 100644 index 000000000..ebe19240c --- /dev/null +++ b/fpga/usrp3/lib/dsp/rx_frontend.v @@ -0,0 +1,78 @@ + +module rx_frontend + #(parameter BASE = 0, + parameter IQCOMP_EN = 1) + (input clk, input rst, + input set_stb, input [7:0] set_addr, input [31:0] set_data, + + input [15:0] adc_a, input adc_ovf_a, + input [15:0] adc_b, input adc_ovf_b, + + output [23:0] i_out, output [23:0] q_out, + input run, + output [31:0] debug + ); + + reg [15:0] adc_i, adc_q; + wire [17:0] adc_i_ofs, adc_q_ofs; + wire [35:0] corr_i, corr_q; wire [17:0] mag_corr,phase_corr; + wire swap_iq; + + setting_reg #(.my_addr(BASE), .width(1)) sr_8 + (.clk(clk),.rst(rst),.strobe(set_stb),.addr(set_addr), + .in(set_data),.out(swap_iq),.changed()); + + always @(posedge clk) + if(swap_iq) // Swap + {adc_i,adc_q} <= {adc_b,adc_a}; + else + {adc_i,adc_q} <= {adc_a,adc_b}; + + setting_reg #(.my_addr(BASE+1),.width(18)) sr_1 + (.clk(clk),.rst(rst),.strobe(set_stb),.addr(set_addr), + .in(set_data),.out(mag_corr),.changed()); + + setting_reg #(.my_addr(BASE+2),.width(18)) sr_2 + (.clk(clk),.rst(rst),.strobe(set_stb),.addr(set_addr), + .in(set_data),.out(phase_corr),.changed()); + + generate + if(IQCOMP_EN == 1) + begin + rx_dcoffset #(.WIDTH(18),.ADDR(BASE+3)) rx_dcoffset_i + (.clk(clk),.rst(rst),.set_stb(set_stb),.set_addr(set_addr),.set_data(set_data), + .in({adc_i,2'b00}),.out(adc_i_ofs)); + + rx_dcoffset #(.WIDTH(18),.ADDR(BASE+4)) rx_dcoffset_q + (.clk(clk),.rst(rst),.set_stb(set_stb),.set_addr(set_addr),.set_data(set_data), + .in({adc_q,2'b00}),.out(adc_q_ofs)); + + MULT18X18S mult_mag_corr + (.P(corr_i), .A(adc_i_ofs), .B(mag_corr), .C(clk), .CE(1), .R(rst) ); + + MULT18X18S mult_phase_corr + (.P(corr_q), .A(adc_i_ofs), .B(phase_corr), .C(clk), .CE(1), .R(rst) ); + + add2_and_clip_reg #(.WIDTH(24)) add_clip_i + (.clk(clk), .rst(rst), + .in1({adc_i_ofs,6'd0}), .in2(corr_i[35:12]), .strobe_in(1'b1), + .sum(i_out), .strobe_out()); + + add2_and_clip_reg #(.WIDTH(24)) add_clip_q + (.clk(clk), .rst(rst), + .in1({adc_q_ofs,6'd0}), .in2(corr_q[35:12]), .strobe_in(1'b1), + .sum(q_out), .strobe_out()); + end // if (IQCOMP_EN == 1) + else + begin + rx_dcoffset #(.WIDTH(24),.ADDR(BASE+3)) rx_dcoffset_i + (.clk(clk),.rst(rst),.set_stb(set_stb),.set_addr(set_addr),.set_data(set_data), + .in({adc_i,8'b00}),.out(i_out)); + + rx_dcoffset #(.WIDTH(24),.ADDR(BASE+4)) rx_dcoffset_q + (.clk(clk),.rst(rst),.set_stb(set_stb),.set_addr(set_addr),.set_data(set_data), + .in({adc_q,8'b00}),.out(q_out)); + end // else: !if(IQCOMP_EN == 1) + endgenerate + +endmodule // rx_frontend diff --git a/fpga/usrp3/lib/dsp/rx_frontend_tb.v b/fpga/usrp3/lib/dsp/rx_frontend_tb.v new file mode 100644 index 000000000..487b72687 --- /dev/null +++ b/fpga/usrp3/lib/dsp/rx_frontend_tb.v @@ -0,0 +1,45 @@ + +`timescale 1ns/1ns +module rx_frontend_tb(); + + reg clk, rst; + + initial rst = 1; + initial #1000 rst = 0; + initial clk = 0; + always #5 clk = ~clk; + + initial $dumpfile("rx_frontend_tb.vcd"); + initial $dumpvars(0,rx_frontend_tb); + + reg [15:0] adc_in; + wire [17:0] adc_out; + + always @(posedge clk) + begin + if(adc_in[13]) + $write("-%d,",-adc_in); + else + $write("%d,",adc_in); + if(adc_out[13]) + $write("-%d\n",-adc_out); + else + $write("%d\n",adc_out); + end + + rx_frontend #(.BASE(0)) rx_frontend + (.clk(clk),.rst(rst), + .set_stb(0),.set_addr(0),.set_data(0), + .adc_a(adc_in), .adc_ovf_a(0), + .adc_b(0), .adc_ovf_b(0), + .i_out(adc_out),.q_out(), + .run(), .debug()); + + always @(posedge clk) + if(rst) + adc_in <= 0; + else + adc_in <= adc_in + 4; + //adc_in <= (($random % 473) + 23)/4; + +endmodule // rx_frontend_tb diff --git a/fpga/usrp3/lib/dsp/sign_extend.v b/fpga/usrp3/lib/dsp/sign_extend.v new file mode 100644 index 000000000..7c85920aa --- /dev/null +++ b/fpga/usrp3/lib/dsp/sign_extend.v @@ -0,0 +1,23 @@ +// -*- verilog -*- +// +// USRP - Universal Software Radio Peripheral +// +// Copyright (C) 2003 Matt Ettus +// + +// + + +// Sign extension "macro" +// bits_out should be greater than bits_in + +module sign_extend (in,out); + parameter bits_in=0; // FIXME Quartus insists on a default + parameter bits_out=0; + + input [bits_in-1:0] in; + output [bits_out-1:0] out; + + assign out = {{(bits_out-bits_in){in[bits_in-1]}},in}; + +endmodule diff --git a/fpga/usrp3/lib/dsp/small_hb_dec.v b/fpga/usrp3/lib/dsp/small_hb_dec.v new file mode 100644 index 000000000..fc776c2d7 --- /dev/null +++ b/fpga/usrp3/lib/dsp/small_hb_dec.v @@ -0,0 +1,124 @@ +// +// Copyright 2011 Ettus Research LLC +// + + +// Short halfband decimator (intended to be followed by another stage) +// Implements impulse responses of the form [A 0 B 0.5 B 0 A] +// +// These taps designed by halfgen4 from ldoolittle: +// 2 * 131072 * halfgen4(.75/8,2) +module small_hb_dec + #(parameter WIDTH=18) + (input clk, + input rst, + input bypass, + input run, + input stb_in, + input [WIDTH-1:0] data_in, + output reg stb_out, + output reg [WIDTH-1:0] data_out); + + // Round off inputs to 17 bits because of 18 bit multipliers + localparam INTWIDTH = 17; + wire [INTWIDTH-1:0] data_rnd; + wire stb_rnd; + + round_sd #(.WIDTH_IN(WIDTH),.WIDTH_OUT(INTWIDTH)) round_in + (.clk(clk),.reset(rst),.in(data_in),.strobe_in(stb_in),.out(data_rnd),.strobe_out(stb_rnd)); + + + reg stb_rnd_d1; + reg [INTWIDTH-1:0] data_rnd_d1; + always @(posedge clk) stb_rnd_d1 <= stb_rnd; + always @(posedge clk) data_rnd_d1 <= data_rnd; + + wire go; + reg phase, go_d1, go_d2, go_d3, go_d4; + always @(posedge clk) + if(rst | ~run) + phase <= 0; + else if(stb_rnd_d1) + phase <= ~phase; + assign go = stb_rnd_d1 & phase; + always @(posedge clk) + if(rst | ~run) + begin + go_d1 <= 0; + go_d2 <= 0; + go_d3 <= 0; + go_d4 <= 0; + end + else + begin + go_d1 <= go; + go_d2 <= go_d1; + go_d3 <= go_d2; + go_d4 <= go_d3; + end + + wire [17:0] coeff_a = -10690; + wire [17:0] coeff_b = 75809; + + reg [INTWIDTH-1:0] d1, d2, d3, d4 , d5, d6; + always @(posedge clk) + if(stb_rnd_d1 | rst) + begin + d1 <= data_rnd_d1; + d2 <= d1; + d3 <= d2; + d4 <= d3; + d5 <= d4; + d6 <= d5; + end + + reg [17:0] sum_a, sum_b, middle, middle_d1; + + always @(posedge clk) + if(go) + begin + sum_a <= {data_rnd_d1[INTWIDTH-1],data_rnd_d1} + {d6[INTWIDTH-1],d6}; + sum_b <= {d2[INTWIDTH-1],d2} + {d4[INTWIDTH-1],d4}; + //middle <= {d3[INTWIDTH-1],d3}; + middle <= {d3,1'b0}; + end + + always @(posedge clk) + if(go_d1) + middle_d1 <= middle; + + wire [17:0] sum = go_d1 ? sum_b : sum_a; + wire [17:0] coeff = go_d1 ? coeff_b : coeff_a; + wire [35:0] prod; + MULT18X18S mult(.C(clk), .CE(go_d1 | go_d2), .R(rst), .P(prod), .A(coeff), .B(sum) ); + + localparam ACCWIDTH = 30; + reg [ACCWIDTH-1:0] accum; + + always @(posedge clk) + if(rst) + accum <= 0; + else if(go_d2) + accum <= {middle_d1[17],middle_d1[17],middle_d1,{(16+ACCWIDTH-36){1'b0}}} + {prod[35:36-ACCWIDTH]}; + else if(go_d3) + accum <= accum + {prod[35:36-ACCWIDTH]}; + + wire [WIDTH:0] accum_rnd; + wire [WIDTH-1:0] accum_rnd_clip; + + wire stb_round; + + round_sd #(.WIDTH_IN(ACCWIDTH),.WIDTH_OUT(WIDTH+1)) round_acc + (.clk(clk), .reset(rst), .in(accum), .strobe_in(go_d4), .out(accum_rnd), .strobe_out(stb_round)); + + clip #(.bits_in(WIDTH+1),.bits_out(WIDTH)) clip (.in(accum_rnd), .out(accum_rnd_clip)); + + // Output + always @(posedge clk) + begin + stb_out <= bypass ? stb_in : stb_round; + data_out <= bypass ? data_in : accum_rnd_clip; + end + + +endmodule // small_hb_dec diff --git a/fpga/usrp3/lib/dsp/small_hb_int.v b/fpga/usrp3/lib/dsp/small_hb_int.v new file mode 100644 index 000000000..4b03b5d0c --- /dev/null +++ b/fpga/usrp3/lib/dsp/small_hb_int.v @@ -0,0 +1,99 @@ +// +// Copyright 2011 Ettus Research LLC +// + + +// Short halfband decimator (intended to be followed by another stage) +// Implements impulse responses of the form [A 0 B 0.5 B 0 A] +// +// These taps designed by halfgen4 from ldoolittle: +// 2 * 131072 * halfgen4(.75/8,2) + +module small_hb_int + #(parameter WIDTH=18) + (input clk, + input rst, + input bypass, + input stb_in, + input [WIDTH-1:0] data_in, + input [7:0] output_rate, + input stb_out, + output reg [WIDTH-1:0] data_out); + + + reg [WIDTH-1:0] d1, d2, d3, d4, d5, d6; + + localparam MWIDTH = 36; + wire [MWIDTH-1:0] prod; + + reg [6:0] stbin_d; + + always @(posedge clk) + stbin_d <= {stbin_d[5:0],stb_in}; + + always @(posedge clk) + if (rst) + begin + d1 <= 0; + d2 <= 0; + d3 <= 0; + d4 <= 0; + d5 <= 0; + d6 <= 0; + end + else if(stb_in) + begin + d1 <= data_in; + d2 <= d1; + d3 <= d2; + d4 <= d3; + d5 <= d4; + d6 <= d5; + end + + wire [WIDTH-1:0] sum_outer, sum_inner; + add2_and_round_reg #(.WIDTH(WIDTH)) add_outer (.clk(clk),.in1(d1),.in2(d4),.sum(sum_outer)); + add2_and_round_reg #(.WIDTH(WIDTH)) add_inner (.clk(clk),.in1(d2),.in2(d3),.sum(sum_inner)); + + wire [17:0] coeff_outer = -10690; + wire [17:0] coeff_inner = 75809; + + MULT18X18S mult(.C(clk), .CE(1), .R(rst), .P(prod), .A(stbin_d[1] ? coeff_outer : coeff_inner), + .B(stbin_d[1] ? sum_outer : sum_inner) ); + + wire [MWIDTH:0] accum; + acc #(.IWIDTH(MWIDTH),.OWIDTH(MWIDTH+1)) + acc (.clk(clk),.clear(stbin_d[2]),.acc(|stbin_d[3:2]),.in(prod),.out(accum)); + + wire [WIDTH+2:0] accum_rnd; + round_reg #(.bits_in(MWIDTH+1),.bits_out(WIDTH+3)) + final_round (.clk(clk),.in(accum),.out(accum_rnd)); + + wire [WIDTH-1:0] clipped; + clip_reg #(.bits_in(WIDTH+3),.bits_out(WIDTH)) final_clip + (.clk(clk),.in(accum_rnd),.strobe_in(1'b1), .out(clipped)); + + reg [WIDTH-1:0] saved, saved_d3; + always @(posedge clk) + if(stbin_d[6]) + saved <= clipped; + + always @(posedge clk) + if(stbin_d[3]) + saved_d3 <= d3; + + always @(posedge clk) + if(bypass) + data_out <= data_in; + else if(stb_in & stb_out) + case(output_rate) + 1 : data_out <= d6; + 2 : data_out <= d4; + 3, 4, 5, 6, 7 : data_out <= d3; + default : data_out <= d2; + endcase // case(output_rate) + else if(stb_out) + data_out <= saved; + +endmodule // small_hb_int + diff --git a/fpga/usrp3/lib/dsp/srl.v b/fpga/usrp3/lib/dsp/srl.v new file mode 100644 index 000000000..bbd8ac1c9 --- /dev/null +++ b/fpga/usrp3/lib/dsp/srl.v @@ -0,0 +1,27 @@ +// +// Copyright 2011 Ettus Research LLC +// + + + +module srl + #(parameter WIDTH=18) + (input clk, + input rst, + input write, + input [WIDTH-1:0] in, + input [3:0] addr, + output [WIDTH-1:0] out); + + genvar i; + generate + for (i=0;i<WIDTH;i=i+1) + begin : gen_srl + SRL16E + srl16e(.Q(out[i]), + .A0(addr[0]),.A1(addr[1]),.A2(addr[2]),.A3(addr[3]), + .CE(write|rst),.CLK(clk),.D(in[i])); + end + endgenerate + +endmodule // srl |