diff options
author | Wade Fife <wade.fife@ettus.com> | 2021-06-15 14:14:14 -0500 |
---|---|---|
committer | Wade Fife <wade.fife@ettus.com> | 2021-08-08 14:59:26 -0500 |
commit | 77975d108a704ce18ec52b4ee1764381b1893752 (patch) | |
tree | 21df90b7b78b67e8f28c6dc0a1de8a8e23d9e8fa /fpga/usrp3/lib/rfnoc | |
parent | da4202e6f74796603072aa14544581604e81df02 (diff) | |
download | uhd-77975d108a704ce18ec52b4ee1764381b1893752.tar.gz uhd-77975d108a704ce18ec52b4ee1764381b1893752.tar.bz2 uhd-77975d108a704ce18ec52b4ee1764381b1893752.zip |
fpga: rfnoc: Fix EOB loss in DUC
There were some rare corner cases where the EOB could get lost in the
DUC due to the dds_timed logic not always passing it through as it
should. This resulted in an underflow error message at the end of
transmission.
This commit also fixes an issue where part of the last packet
used a frequency shift of 0 instead of the requested frequency
shift, and an issue where the first few samples of a burst used the
wrong frequency shift value.
Part of the fix includes adding a TUSER port to dds_sin_cos_lut_only.
The TUSER port is built into the IP but was disabled. It is now
enabled and set to 1 bit wide. This has a very small effect on
resource usage and can be left unconnected when not needed.
The dds_freq_tune block was shared by the DUC and DDC. To avoid
affecting the DDC, a new version, dds_freq_tune_duc, is being
added for the DUC to use that has the necessary fixes.
The new dds_wrapper.v is a wrapper for the dds_sin_cos_lut_only IP.
This IP has the undesirable behavior that new inputs must be provided
to push previous outputs through the IP. This wrapper hides that
complexity by adding some logic to ensure all data gets pushed through
automatically. This logic uses the TUSER port on the IP.
Finally, a testbench for dds_timed was added.
Diffstat (limited to 'fpga/usrp3/lib/rfnoc')
-rw-r--r-- | fpga/usrp3/lib/rfnoc/Makefile.srcs | 2 | ||||
-rw-r--r-- | fpga/usrp3/lib/rfnoc/dds_freq_tune_duc.v | 228 | ||||
-rw-r--r-- | fpga/usrp3/lib/rfnoc/dds_timed.v | 718 | ||||
-rw-r--r-- | fpga/usrp3/lib/rfnoc/dds_wrapper.v | 160 | ||||
-rw-r--r-- | fpga/usrp3/lib/rfnoc/sim/dds_timed_tb/Makefile | 68 | ||||
-rw-r--r-- | fpga/usrp3/lib/rfnoc/sim/dds_timed_tb/dds_timed_tb.sv | 886 |
6 files changed, 1850 insertions, 212 deletions
diff --git a/fpga/usrp3/lib/rfnoc/Makefile.srcs b/fpga/usrp3/lib/rfnoc/Makefile.srcs index ab4a9e179..512569748 100644 --- a/fpga/usrp3/lib/rfnoc/Makefile.srcs +++ b/fpga/usrp3/lib/rfnoc/Makefile.srcs @@ -101,6 +101,8 @@ fir_filter_slice.v \ axi_fir_filter_dec.v \ addsub.vhd \ dds_freq_tune.v \ +dds_freq_tune_duc.v \ dds_timed.v \ +dds_wrapper.v \ datapath_gatekeeper.v \ )) diff --git a/fpga/usrp3/lib/rfnoc/dds_freq_tune_duc.v b/fpga/usrp3/lib/rfnoc/dds_freq_tune_duc.v new file mode 100644 index 000000000..4b8a299b8 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/dds_freq_tune_duc.v @@ -0,0 +1,228 @@ +// +// Copyright 2021 Ettus Research, a National Instruments Brand +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: dds_freq_tune_duc +// +// Description: +// +// Performs a frequency shift on a signal by multiplying it with a complex +// sinusoid synthesized from a DDS. This module expects samples data to be in +// {Q,I} order. +// +// The din input is expected to contain a complex 24-bit signed fixed-point +// values with 15 fractional bits. The phase input is expected to contain +// unsigned 24-bit fixed-point with 24 fractional bits, and therefore +// represents the range [0,1), which corresponds to the range [0,2π) radians. +// The output will then be a complex 24-bit signed fixed-point with 15 +// fractional bits. +// +// This version does the same thing as dds_freq_tune, but does not +// reset/flush the DDS between packets or when an EOB occurs, and it includes +// a FIFO on the din data path. This separate version was created to avoid +// affecting the behavior of the DDC. +// +// ┌───┐ +// phase >──┤DDS├──┐ ┌───────┐ +// └───┘ └─┤Complex│ ┌─────┐ +// │ Mult ├──┤Round├───> dout +// ┌────┐ ┌─┤ │ └─────┘ +// din >──┤FIFO├─┘ └───────┘ +// └────┘ +// +// Parameters: +// +// Note: The parameters should NOT be changed, since they depend on the IP +// configurations. +// +// INPUT_W : Width of each component of din. +// PHASE_W : Width of the phase input. +// OUTPUT_W : Width of each component of dout. +// + +`default_nettype none + + +module dds_freq_tune_duc #( + parameter INPUT_W = 24, + parameter PHASE_W = 24, + parameter OUTPUT_W = 24 +) ( + input wire clk, + input wire reset, + + // IQ input (Q in the upper, I in the lower bits) + input wire [INPUT_W*2-1:0] s_axis_din_tdata, + input wire s_axis_din_tlast, + input wire s_axis_din_tvalid, + output wire s_axis_din_tready, + + // Phase input from NCO + input wire [PHASE_W-1:0] s_axis_phase_tdata, + input wire s_axis_phase_tlast, + input wire s_axis_phase_tvalid, + output wire s_axis_phase_tready, + + // IQ output (Q in the upper, I in the lower bits) + output wire [OUTPUT_W*2-1:0] m_axis_dout_tdata, + output wire m_axis_dout_tlast, + output wire m_axis_dout_tvalid, + input wire m_axis_dout_tready +); + + //--------------------------------------------------------------------------- + // Reset Generation + //--------------------------------------------------------------------------- + + reg reset_d1, reset_int; + + // Create a local reset, named reset_int, which will always be asserted for + // at least 2 clock cycles, which is required by Xilinx DDS and complex + // multiplier IP. + always @(posedge clk) begin + reset_d1 <= reset; + reset_int <= reset | reset_d1; + end + + + //--------------------------------------------------------------------------- + // Data Input FIFO + //--------------------------------------------------------------------------- + // + // We want the din and phase inputs paths to be balanced, so that a new + // data/phase pair can be input on each clock cycles. This FIFO allows the + // din data path to queue up samples while the DDS is processing. + // + //--------------------------------------------------------------------------- + + wire [INPUT_W*2-1:0] s_axis_fifo_tdata; + wire s_axis_fifo_tlast; + wire s_axis_fifo_tvalid; + wire s_axis_fifo_tready; + + axi_fifo #( + .WIDTH (2*INPUT_W+1), + .SIZE (5) + ) axi_fifo_i ( + .clk (clk), + .reset (reset), + .clear (1'b0), + .i_tdata ({ s_axis_din_tlast, s_axis_din_tdata }), + .i_tvalid (s_axis_din_tvalid), + .i_tready (s_axis_din_tready), + .o_tdata ({ s_axis_fifo_tlast, s_axis_fifo_tdata }), + .o_tvalid (s_axis_fifo_tvalid), + .o_tready (s_axis_fifo_tready), + .space (), + .occupied () + ); + + + //--------------------------------------------------------------------------- + // DDS/NCO + //--------------------------------------------------------------------------- + + // Width of each component of the DDS output. This width is fixed by the IP + // configuration. + parameter DDS_W = 16; + + wire m_axis_dds_tlast; + wire m_axis_dds_tvalid; + wire m_axis_dds_tready; + wire [DDS_W*2-1:0] m_axis_dds_tdata; + + // DDS to convert the phase input to a unit-length complex number with that + // phase. It takes in an unsigned 24-bit phase with 24 fractional bits and + // outputs two signed 16-bit fixed point values with 14 fractional bits. The + // output has sin(2*pi*phase) in the upper 16 bits and cos(2*pi*phase) in the + // lower 16-bits. + dds_wrapper dds_wrapper_i ( + .clk (clk), + .rst (reset_int), + .s_axis_phase_tdata (s_axis_phase_tdata), + .s_axis_phase_tvalid (s_axis_phase_tvalid), + .s_axis_phase_tlast (s_axis_phase_tlast), + .s_axis_phase_tready (s_axis_phase_tready), + .m_axis_data_tdata (m_axis_dds_tdata), + .m_axis_data_tvalid (m_axis_dds_tvalid), + .m_axis_data_tlast (m_axis_dds_tlast), + .m_axis_data_tready (m_axis_dds_tready) + ); + + + //--------------------------------------------------------------------------- + // Complex Multiplier + //--------------------------------------------------------------------------- + // + // Use a complex multiplier to multiply the DDS complex sinusoid by the input + // data samples. + // + //--------------------------------------------------------------------------- + + // Width of each component on the output of the complex_multiplier_dds IP. + // This width is fixed by the IP configuration. + localparam MULT_OUT_W = 32; + + // Width is set by the IP + wire [2*MULT_OUT_W-1:0] mult_out_tdata; + wire mult_out_tvalid; + wire mult_out_tready; + wire mult_out_tlast; + + // The complex multiplier IP is configured so that the A input is 21 bits + // with 15 fractional bits, and the B input (dds) is 16 bits with 14 + // fractional bits. Due to AXI-Stream requirements, A is rounded up to 24 + // bits in width. The full multiplier output result would be 21+16+1 = 38 + // bits, but the output is configured for 32, dropping the lower 6 bits. + // Therefore, the result has 15+14-6 = 23 fractional bits. + // + // The IP is configured to pass the TLAST from port A through, but we connect + // the B path anyway for completeness. + complex_multiplier_dds complex_multiplier_dds_i ( + .aclk (clk), + .aresetn (~reset_int), + .s_axis_a_tvalid (s_axis_fifo_tvalid), + .s_axis_a_tready (s_axis_fifo_tready), + .s_axis_a_tlast (s_axis_fifo_tlast), + .s_axis_a_tdata (s_axis_fifo_tdata), + .s_axis_b_tvalid (m_axis_dds_tvalid), + .s_axis_b_tready (m_axis_dds_tready), + .s_axis_b_tlast (m_axis_dds_tlast), + .s_axis_b_tdata (m_axis_dds_tdata), + .m_axis_dout_tvalid (mult_out_tvalid), + .m_axis_dout_tready (mult_out_tready), + .m_axis_dout_tlast (mult_out_tlast), + .m_axis_dout_tdata (mult_out_tdata) + ); + + + //--------------------------------------------------------------------------- + // Round + //--------------------------------------------------------------------------- + // + // Round the 32-bit multiplier result down to 24 bits. This moves the binary + // point so that we go from 23 fractional bits down to 15 fractional bits. + // + //--------------------------------------------------------------------------- + + axi_round_complex #( + .WIDTH_IN (MULT_OUT_W), + .WIDTH_OUT (OUTPUT_W) + ) axi_round_complex_i ( + .clk (clk), + .reset (reset_int), + .i_tdata (mult_out_tdata), + .i_tlast (mult_out_tlast), + .i_tvalid (mult_out_tvalid), + .i_tready (mult_out_tready), + .o_tdata (m_axis_dout_tdata), + .o_tlast (m_axis_dout_tlast), + .o_tvalid (m_axis_dout_tvalid), + .o_tready (m_axis_dout_tready) + ); + +endmodule + + +`default_nettype wire diff --git a/fpga/usrp3/lib/rfnoc/dds_timed.v b/fpga/usrp3/lib/rfnoc/dds_timed.v index fd03f6a23..1af82683d 100644 --- a/fpga/usrp3/lib/rfnoc/dds_timed.v +++ b/fpga/usrp3/lib/rfnoc/dds_timed.v @@ -1,9 +1,75 @@ // -// Copyright 2016 Ettus Research, a National Instruments Company +// Copyright 2021 Ettus Research, a National Instruments Brand // // SPDX-License-Identifier: LGPL-3.0-or-later // -// DDS that supports timed commands via the settings bus +// Module: dds_timed +// +// Description: +// +// DDS (direct digital synthesis) and frequency shift block that supports +// timed commands via the settings bus. +// +// This block takes in samples on i_t* and performs a complex multiplication +// with a digitally synthesized oscillator to implement a digital RF mixer. +// The output is then scaled (optionally), rounded, and clipped if necessary, +// then output on o_t*. +// +// Timed commands allow you to update the SR_FREQ register (the phase +// increment) at the desired time. +// +// The TUSER port contains the packet header information: +// +// tuser[125] : Has timestamp +// tuser[124] : End of burst (EOB) +// tuser[63:0] : Timestamp +// +// For the input, i_tuser should be valid for the duration of the packet. For +// the output, o_tuser is only guaranteed to be valid for the last sample of +// the packet. +// +// Registers: +// +// SR_FREQ : Frequency shift to apply to the input signal. This can be +// thought of as an unsigned PHASE_ACCUM_WIDTH-bit register +// with PHASE_ACCUM_WIDTH fractional bits. That is, the range +// of this register maps to the real values [0,1). This +// register controls the amount by which the phase accumulator +// for the DDS is incremented each clock cycle. It can +// therefore be thought of as a phase angle corresponding to +// the range [0,2π) radians. +// SR_SCALE_IQ : Scaler by which to multiply the IQ outputs. This is a +// SCALING_WIDTH-bit signed fixed-point register with 15 +// fractional bits. If SCALING_WIDTH is 18, then it has the +// range [-4,4). +// +// Parameters: +// +// Note: Care must be used when overriding these parameters because there are +// many dependencies on them. For example, the DDS_WIDTH and +// PHASE_WIDTH depend on the configuration of the underlying DDS IP and +// should only be modified to match that IP. +// +// SR_FREQ_ADDR : Register offset to assign to the SR_FREQ register, +// which contains the phase increment per sample needed +// to achieve the desired DDS frequency. +// SR_SCALE_IQ_ADDR : Register offset to assign to the SR_SCALE_IQ register. +// CMD_FIFO_SIZE : Log2 of the size of the timed command FIFO to use. +// WIDTH : Data width of the I/Q components of the input/output +// samples, typically 16. +// DDS_WIDTH : Bit width to use for the DDS and complex multiplier. +// PHASE_WIDTH : Bit width to use for the phase provided to the DDS IP. +// PHASE_ACCUM_WIDTH : Bit width to use for the phase increment values. +// SCALING_WIDTH : Bit width to use for the IQ scale registers. +// HEADER_WIDTH : Width of the header info (tuser). +// HEADER_FIFO_SIZE : Log2 of the size of the header FIFO. +// SR_AWIDTH : Settings bus address width. +// SR_DWIDTH : Settings bus data width. +// SR_TWIDTH : Settings bus time width. +// + +`default_nettype none + module dds_timed #( parameter SR_FREQ_ADDR = 0, @@ -19,142 +85,280 @@ module dds_timed #( parameter SR_AWIDTH = 8, parameter SR_DWIDTH = 32, parameter SR_TWIDTH = 64 -)( - input clk, input reset, input clear, - output timed_cmd_fifo_full, - input set_stb, input [SR_AWIDTH-1:0] set_addr, input [SR_DWIDTH-1:0] set_data, - input [SR_TWIDTH-1:0] set_time, input set_has_time, - input [2*WIDTH-1:0] i_tdata, input i_tlast, input i_tvalid, output i_tready, input [HEADER_WIDTH-1:0] i_tuser, - output [2*WIDTH-1:0] o_tdata, output o_tlast, output o_tvalid, input o_tready, output [HEADER_WIDTH-1:0] o_tuser +) ( + input wire clk, + input wire reset, + input wire clear, + + // Indicates if the timed command FIFO is full + output wire timed_cmd_fifo_full, + + // Settings bus for register access + input wire set_stb, + input wire [SR_AWIDTH-1:0] set_addr, + input wire [SR_DWIDTH-1:0] set_data, + input wire [SR_TWIDTH-1:0] set_time, + input wire set_has_time, + + // Input sample stream + input wire [ 2*WIDTH-1:0] i_tdata, + input wire i_tlast, + input wire i_tvalid, + output wire i_tready, + input wire [HEADER_WIDTH-1:0] i_tuser, + + // Output sample stream + output wire [ 2*WIDTH-1:0] o_tdata, + output wire o_tlast, + output wire o_tvalid, + input wire o_tready, + output wire [HEADER_WIDTH-1:0] o_tuser ); - /************************************************************************** - * Track VITA time - *************************************************************************/ - wire [2*WIDTH-1:0] int_tdata; + //--------------------------------------------------------------------------- + // Time Tracking + //--------------------------------------------------------------------------- + + wire [ 2*WIDTH-1:0] int_tdata; wire [HEADER_WIDTH-1:0] int_tuser; - wire int_tlast, int_tvalid, int_tready, int_tag; - wire [SR_AWIDTH-1:0] out_set_addr, timed_set_addr; - wire [SR_DWIDTH-1:0] out_set_data, timed_set_data; - wire out_set_stb, timed_set_stb; - wire eob; + wire int_tlast; + wire int_tvalid; + wire int_tready; + wire int_tag; + wire [ SR_AWIDTH-1:0] out_set_addr; + wire [ SR_AWIDTH-1:0] timed_set_addr; + wire [ SR_DWIDTH-1:0] out_set_data; + wire [ SR_DWIDTH-1:0] timed_set_data; + wire out_set_stb; + wire timed_set_stb; + // This module checks for timed writes to SR_FREQ_ADDR and outputs the + // register write on timed_set_* (if it was timed) or set_* (if it was not + // timed). It then tags the sample for which the timed command to + // SR_FREQ_ADDR should occur by asserting m_axis_data_tag when that sample is + // output. axi_tag_time #( - .WIDTH(2*WIDTH), - .NUM_TAGS(1), - .SR_TAG_ADDRS(SR_FREQ_ADDR)) - axi_tag_time ( - .clk(clk), - .reset(reset), - .clear(clear), - .tick_rate(16'd1), - .timed_cmd_fifo_full(timed_cmd_fifo_full), - .s_axis_data_tdata(i_tdata), .s_axis_data_tlast(i_tlast), - .s_axis_data_tvalid(i_tvalid), .s_axis_data_tready(i_tready), - .s_axis_data_tuser(i_tuser), - .m_axis_data_tdata(int_tdata), .m_axis_data_tlast(int_tlast), - .m_axis_data_tvalid(int_tvalid), .m_axis_data_tready(int_tready), - .m_axis_data_tuser(int_tuser), .m_axis_data_tag(int_tag), - .in_set_stb(set_stb), .in_set_addr(set_addr), .in_set_data(set_data), - .in_set_time(set_time), .in_set_has_time(set_has_time), - .out_set_stb(out_set_stb), .out_set_addr(out_set_addr), .out_set_data(out_set_data), - .timed_set_stb(timed_set_stb), .timed_set_addr(timed_set_addr), .timed_set_data(timed_set_data)); - - wire [2*WIDTH-1:0] dds_in_tdata, unused_tdata; - wire [HEADER_WIDTH-1:0] header_in_tdata, header_out_tdata, unused_tuser; - wire dds_in_tlast, dds_in_tvalid, dds_in_tready, dds_in_tag; - wire header_in_tvalid, header_in_tready, header_in_tlast, unused_tag; - wire header_out_tvalid, header_out_tready; + .WIDTH (2*WIDTH), + .NUM_TAGS (1), + .SR_TAG_ADDRS (SR_FREQ_ADDR) + ) axi_tag_time ( + .clk (clk), + .reset (reset), + .clear (clear), + .tick_rate (16'd1), + .timed_cmd_fifo_full (timed_cmd_fifo_full), + .s_axis_data_tdata (i_tdata), + .s_axis_data_tlast (i_tlast), + .s_axis_data_tvalid (i_tvalid), + .s_axis_data_tready (i_tready), + .s_axis_data_tuser (i_tuser), + .m_axis_data_tdata (int_tdata), + .m_axis_data_tlast (int_tlast), + .m_axis_data_tvalid (int_tvalid), + .m_axis_data_tready (int_tready), + .m_axis_data_tuser (int_tuser), + .m_axis_data_tag (int_tag), + .in_set_stb (set_stb), + .in_set_addr (set_addr), + .in_set_data (set_data), + .in_set_time (set_time), + .in_set_has_time (set_has_time), + .out_set_stb (out_set_stb), + .out_set_addr (out_set_addr), + .out_set_data (out_set_data), + .timed_set_stb (timed_set_stb), + .timed_set_addr (timed_set_addr), + .timed_set_data (timed_set_data) + ); + + wire [ 2*WIDTH-1:0] dds_in_tdata; + wire [ 2*WIDTH-1:0] unused_tdata; + wire [HEADER_WIDTH-1:0] header_in_tdata; + wire [HEADER_WIDTH-1:0] header_out_tdata; + wire [HEADER_WIDTH-1:0] dds_in_tuser; + wire dds_in_tlast; + wire dds_in_tvalid; + wire dds_in_tready; + wire dds_in_tag; + wire header_in_tvalid; + wire header_in_tready; + wire header_in_tlast; + wire unused_tag; + wire header_out_tvalid; + wire header_out_tready; + + + //--------------------------------------------------------------------------- + // Split Stream + //--------------------------------------------------------------------------- + // + // Split the data stream into two streams, one with the data/tag (dds_in_t*) + // and the other with the header (header_in_t*). + // + //--------------------------------------------------------------------------- split_stream #( - .WIDTH(2*WIDTH+HEADER_WIDTH+1), .ACTIVE_MASK(4'b0011)) - split_head ( - .clk(clk), .reset(reset), .clear(clear), - .i_tdata({int_tdata,int_tuser,int_tag}), .i_tlast(int_tlast), - .i_tvalid(int_tvalid), .i_tready(int_tready), - .o0_tdata({dds_in_tdata,unused_tuser,dds_in_tag}), .o0_tlast(dds_in_tlast), - .o0_tvalid(dds_in_tvalid), .o0_tready(dds_in_tready), - .o1_tdata({unused_tdata,header_in_tdata,unused_tag}), .o1_tlast(header_in_tlast), - .o1_tvalid(header_in_tvalid), .o1_tready(header_in_tready), - .o2_tready(1'b0), .o3_tready(1'b0)); + .WIDTH (2*WIDTH+HEADER_WIDTH+1), + .ACTIVE_MASK (4'b0011) + ) split_head ( + .clk (clk), + .reset (reset), + .clear (clear), + .i_tdata ({ int_tdata, int_tuser, int_tag }), + .i_tlast (int_tlast), + .i_tvalid (int_tvalid), + .i_tready (int_tready), + .o0_tdata ({ dds_in_tdata, dds_in_tuser, dds_in_tag }), + .o0_tlast (dds_in_tlast), + .o0_tvalid (dds_in_tvalid), + .o0_tready (dds_in_tready), + .o1_tdata ({ unused_tdata, header_in_tdata, unused_tag }), + .o1_tlast (header_in_tlast), + .o1_tvalid (header_in_tvalid), + .o1_tready (header_in_tready), + .o2_tdata (), + .o2_tlast (), + .o2_tvalid (), + .o2_tready (1'b0), + .o3_tdata (), + .o3_tlast (), + .o3_tvalid (), + .o3_tready (1'b0) + ); + + + //--------------------------------------------------------------------------- + // Header FIFO + //--------------------------------------------------------------------------- + // + // Store each packet header in a FIFO to be read out when the packet is + // output. + // + //--------------------------------------------------------------------------- axi_fifo #( - .WIDTH(HEADER_WIDTH), .SIZE(HEADER_FIFO_SIZE)) - axi_fifo_header ( - .clk(clk), .reset(reset), .clear(clear), - .i_tdata(header_in_tdata), .i_tvalid(header_in_tvalid & header_in_tlast), .i_tready(header_in_tready), - .o_tdata(header_out_tdata), .o_tvalid(header_out_tvalid), - .o_tready(header_out_tready), // Consume header on last output sample - .space(), .occupied()); - - assign eob = header_in_tdata[124]; - - /************************************************************************** - * Settings Regs - *************************************************************************/ - wire [PHASE_ACCUM_WIDTH-1:0] phase_inc_tdata, phase_inc_timed_tdata; - wire phase_inc_tlast, phase_inc_tvalid, phase_inc_tready; - wire phase_inc_timed_tlast, phase_inc_timed_tready , phase_inc_timed_tvalid; + .WIDTH (HEADER_WIDTH), + .SIZE (HEADER_FIFO_SIZE) + ) axi_fifo_header ( + .clk (clk), + .reset (reset), + .clear (clear), + .i_tdata (header_in_tdata), + .i_tvalid (header_in_tvalid & header_in_tlast), + .i_tready (header_in_tready), + .o_tdata (header_out_tdata), + .o_tvalid (header_out_tvalid), + .o_tready (header_out_tready), // Consume header on last output sample + .space (), + .occupied () + ); + + + //--------------------------------------------------------------------------- + // Settings Bus Registers + //--------------------------------------------------------------------------- + wire [PHASE_ACCUM_WIDTH-1:0] phase_inc_tdata; + wire [PHASE_ACCUM_WIDTH-1:0] phase_inc_timed_tdata; + wire phase_inc_tlast; + wire phase_inc_tvalid; + wire phase_inc_tready; + wire phase_inc_timed_tlast; + wire phase_inc_timed_tready; + wire phase_inc_timed_tvalid; + + // Frequency register (phase increment) used for *un-timed* commands axi_setting_reg #( - .ADDR(SR_FREQ_ADDR), .AWIDTH(SR_AWIDTH), .WIDTH(PHASE_ACCUM_WIDTH), .STROBE_LAST(1)) - set_freq ( - .clk(clk), .reset(reset), - .set_stb(out_set_stb), .set_addr(out_set_addr), .set_data(out_set_data), - .o_tdata(phase_inc_tdata), .o_tlast(phase_inc_tlast), .o_tvalid(phase_inc_tvalid), .o_tready(phase_inc_tready)); + .ADDR (SR_FREQ_ADDR), + .AWIDTH (SR_AWIDTH), + .WIDTH (PHASE_ACCUM_WIDTH), + .STROBE_LAST (1) + ) set_freq ( + .clk (clk), + .reset (reset), + .set_stb (out_set_stb), + .set_addr (out_set_addr), + .set_data (out_set_data), + .o_tdata (phase_inc_tdata), + .o_tlast (phase_inc_tlast), + .o_tvalid (phase_inc_tvalid), + .o_tready (phase_inc_tready) + ); + // Frequency register (phase increment) used for *timed* commands axi_setting_reg #( - .ADDR(SR_FREQ_ADDR), .USE_FIFO(1), .FIFO_SIZE(CMD_FIFO_SIZE), .AWIDTH(SR_AWIDTH), .WIDTH(PHASE_ACCUM_WIDTH), .STROBE_LAST(1)) - set_freq_timed ( - .clk(clk), .reset(reset), - .set_stb(timed_set_stb), .set_addr(timed_set_addr), .set_data(timed_set_data), - .o_tdata(phase_inc_timed_tdata), .o_tlast(phase_inc_timed_tlast), .o_tvalid(phase_inc_timed_tvalid), .o_tready(phase_inc_timed_tready)); + .ADDR (SR_FREQ_ADDR), + .USE_FIFO (1), + .FIFO_SIZE (CMD_FIFO_SIZE), + .AWIDTH (SR_AWIDTH), + .WIDTH (PHASE_ACCUM_WIDTH), + .STROBE_LAST (1) + ) set_freq_timed ( + .clk (clk), + .reset (reset), + .set_stb (timed_set_stb), + .set_addr (timed_set_addr), + .set_data (timed_set_data), + .o_tdata (phase_inc_timed_tdata), + .o_tlast (phase_inc_timed_tlast), + .o_tvalid (phase_inc_timed_tvalid), + .o_tready (phase_inc_timed_tready) + ); wire [SCALING_WIDTH-1:0] scaling_tdata; - wire scaling_tvalid, scaling_tready; + wire scaling_tready; + // Scale value register axi_setting_reg #( - .ADDR(SR_SCALE_IQ_ADDR), .AWIDTH(SR_AWIDTH), .WIDTH(SCALING_WIDTH), .REPEATS(1)) - set_scale ( - .clk(clk), .reset(reset), - .set_stb(out_set_stb), .set_addr(out_set_addr), .set_data(out_set_data), - .o_tdata(scaling_tdata), .o_tlast(), .o_tvalid(scaling_tvalid), .o_tready(scaling_tready)); - - /************************************************************************** - * DDS + Complex Mult + Phase Accumulator - *************************************************************************/ + .ADDR (SR_SCALE_IQ_ADDR), + .AWIDTH (SR_AWIDTH), + .WIDTH (SCALING_WIDTH), + .REPEATS (1) + ) set_scale ( + .clk (clk), + .reset (reset), + .set_stb (out_set_stb), + .set_addr (out_set_addr), + .set_data (out_set_data), + .o_tdata (scaling_tdata), + .o_tlast (), + .o_tvalid (), + .o_tready (scaling_tready) + ); + + + //--------------------------------------------------------------------------- + // Phase Accumulator for DDS + //--------------------------------------------------------------------------- + wire [PHASE_ACCUM_WIDTH-1:0] phase_inc_mux_tdata; - reg [PHASE_ACCUM_WIDTH-1:0] phase_inc; - wire phase_inc_mux_tlast, phase_inc_mux_tvalid, phase_inc_mux_tready; - reg [PHASE_ACCUM_WIDTH-1:0] phase; - - wire [PHASE_WIDTH-1:0] phase_tdata = phase[PHASE_ACCUM_WIDTH-1:PHASE_ACCUM_WIDTH-PHASE_WIDTH]; - wire phase_tvalid, phase_tready, phase_tlast; - - wire [WIDTH*2-1:0] dds_in_fifo_tdata; - wire dds_in_fifo_tvalid, dds_in_fifo_tready, dds_in_fifo_tlast; - wire dds_out_tlast, dds_out_tvalid, dds_out_tready; - - wire [DDS_WIDTH-1:0] dds_in_i_tdata, dds_in_q_tdata; - wire [DDS_WIDTH-1:0] dds_out_i_tdata, dds_out_q_tdata; - wire [15:0] dds_input_fifo_space, dds_input_fifo_occupied; - - wire [WIDTH*2-1:0] dds_in_sync_tdata; - wire dds_in_sync_tvalid, dds_in_sync_tready, dds_in_sync_tlast; - wire [PHASE_WIDTH-1:0] phase_sync_tdata; - wire phase_sync_tvalid, phase_sync_tready, phase_sync_tlast; - - assign phase_inc_mux_tdata = phase_inc_timed_tready ? phase_inc_timed_tdata : phase_inc_tdata; - assign phase_inc_mux_tlast = phase_inc_timed_tready ? phase_inc_timed_tlast : phase_inc_tlast; + reg [PHASE_ACCUM_WIDTH-1:0] phase_inc; + wire phase_inc_mux_tlast; + wire phase_inc_mux_tvalid; + wire phase_inc_mux_tready; + reg [PHASE_ACCUM_WIDTH-1:0] phase; + + wire [PHASE_WIDTH-1:0] phase_tdata = phase[PHASE_ACCUM_WIDTH-1:PHASE_ACCUM_WIDTH-PHASE_WIDTH]; + wire phase_tvalid; + wire phase_tready; + wire phase_tlast; + + wire dds_in_teob = dds_in_tuser[124]; + + // Multiplexer to select between the timed and un-timed phase registers. + assign phase_inc_mux_tdata = phase_inc_timed_tready ? phase_inc_timed_tdata : phase_inc_tdata; + assign phase_inc_mux_tlast = phase_inc_timed_tready ? phase_inc_timed_tlast : phase_inc_tlast; assign phase_inc_mux_tvalid = phase_inc_timed_tready ? phase_inc_timed_tvalid : phase_inc_tvalid; assign phase_inc_tready = phase_inc_mux_tready; assign phase_inc_timed_tready = phase_inc_mux_tready & dds_in_tag; - assign phase_inc_mux_tready = phase_tready; + assign phase_inc_mux_tready = phase_tready; - // phase is only valid when input i/q data stream is valid + // Phase is only valid when input IQ data stream is valid assign phase_tvalid = dds_in_tvalid; - assign phase_tlast = dds_in_tlast; + assign phase_tlast = dds_in_tlast; + // Phase increment register, sourced from either the timed or un-timed + // SR_FREQ register. always @(posedge clk) begin if (reset | clear) begin phase_inc <= 0; @@ -162,129 +366,219 @@ module dds_timed #( phase_inc <= phase_inc_mux_tdata; end end - - // NCO, increment phase input to DDS SIN/COS LUT + + // Phase accumulator for DDS. This increments the "phase" input provided to + // the DDS IP. always @(posedge clk) begin - if (reset | clear | (phase_inc_mux_tvalid & phase_inc_mux_tready) | eob) begin + if (reset | clear | (phase_inc_mux_tvalid & phase_inc_mux_tready)) begin + // Reset the phase on reset or clear, but also whenever the phase + // increment is updated. phase <= 0; - end else if (dds_in_tvalid & dds_in_tready) begin //only increment phase when data into dds is valid and data fifo is ready - phase <= phase + phase_inc; + end else if (dds_in_tvalid & dds_in_tready) begin + if (dds_in_tlast & dds_in_teob) begin + // Reset the phase at the end of each burst so we get predictable + // output. + phase <= 0; + end else begin + // Increment the phase for each new sample. + phase <= phase + phase_inc; + end end end + //--------------------------------------------------------------------------- + // AXI Sync + //--------------------------------------------------------------------------- + // + // Sync the IQ and phase paths' pipeline delay. This is needed to ensure that + // applying the phase update happens on the correct sample regardless of + // differences in path delays. + // + //--------------------------------------------------------------------------- + + + wire [PHASE_WIDTH-1:0] phase_sync_tdata; + wire phase_sync_tvalid; + wire phase_sync_tready; + wire phase_sync_tlast; + + wire [ WIDTH*2-1:0] dds_in_sync_tdata; + wire dds_in_sync_tvalid; + wire dds_in_sync_tready; + wire dds_in_sync_tlast; - // Sync the two path's pipeline delay. - // This is needed to ensure that applying the phase update happens on the - // correct sample regardless of differing downstream path delays. axi_sync #( - .SIZE(2), - .WIDTH_VEC({PHASE_WIDTH,2*WIDTH}), // Vector of widths, each width is defined by a 32-bit value - .FIFO_SIZE(0)) - axi_sync ( - .clk(clk), .reset(reset), .clear(clear), - .i_tdata({phase_tdata,dds_in_tdata}), - .i_tlast({phase_tlast,dds_in_tlast}), - .i_tvalid({phase_tvalid,dds_in_tvalid}), - .i_tready({phase_tready,dds_in_tready}), - .o_tdata({phase_sync_tdata,dds_in_sync_tdata}), - .o_tlast({phase_sync_tlast,dds_in_sync_tlast}), - .o_tvalid({phase_sync_tvalid,dds_in_sync_tvalid}), - .o_tready({phase_sync_tready,dds_in_sync_tready})); - - // fifo to hold input data while pipeline catches up in dds - // this is blocked by the axi_sync following the dds - axi_fifo #(.WIDTH(2*WIDTH+1), .SIZE(5)) dds_input_fifo( - .clk(clk), .reset(reset), .clear(clear), - .i_tdata({dds_in_sync_tlast,dds_in_sync_tdata}), .i_tvalid(dds_in_sync_tvalid), .i_tready(dds_in_sync_tready), - .o_tdata({dds_in_fifo_tlast,dds_in_fifo_tdata}), .o_tvalid(dds_in_fifo_tvalid), .o_tready(dds_in_fifo_tready), - .space(dds_input_fifo_space), .occupied(dds_input_fifo_occupied) + .SIZE (2), + .WIDTH_VEC ({ PHASE_WIDTH, 2*WIDTH }), // Vector of 32-bit width values + .FIFO_SIZE (0) + ) axi_sync_i ( + .clk (clk), + .reset (reset), + .clear (clear), + .i_tdata ({ phase_tdata, dds_in_tdata }), + .i_tlast ({ phase_tlast, dds_in_tlast }), + .i_tvalid ({ phase_tvalid, dds_in_tvalid }), + .i_tready ({ phase_tready, dds_in_tready }), + .o_tdata ({ phase_sync_tdata, dds_in_sync_tdata }), + .o_tlast ({ phase_sync_tlast, dds_in_sync_tlast }), + .o_tvalid ({ phase_sync_tvalid, dds_in_sync_tvalid }), + .o_tready ({ phase_sync_tready, dds_in_sync_tready }) ); - - // after fifo, do q quick sign extend op to get up to 24 bits. to match how the cordic deals with the data path. - sign_extend #( - .bits_in(WIDTH), .bits_out(DDS_WIDTH)) - sign_extend_dds_i ( - .in(dds_in_fifo_tdata[2*WIDTH-1:WIDTH]), .out(dds_in_i_tdata)); + + //--------------------------------------------------------------------------- + // DDS and Complex Multiplier + //--------------------------------------------------------------------------- + + wire [DDS_WIDTH-1:0] dds_in_i_tdata; + wire [DDS_WIDTH-1:0] dds_in_q_tdata; + + wire [DDS_WIDTH-1:0] dds_out_i_tdata; + wire [DDS_WIDTH-1:0] dds_out_q_tdata; + wire dds_out_tlast; + wire dds_out_tvalid; + wire dds_out_tready; + + // Sign extend I and Q to get up to 24 bits. sign_extend #( - .bits_in(WIDTH), .bits_out(DDS_WIDTH)) - sign_extend_dds_q ( - .in(dds_in_fifo_tdata[WIDTH-1:0]), .out(dds_in_q_tdata)); - - - // Wrapper for Xilinx IP AXI DDS + Complex Multiply - // NOTE: Seems Xilinx IP expects opposite I/Q combined complex data buses, so they are swapped here. - dds_freq_tune dds_freq_tune_inst ( - .clk(clk), - .reset(reset | clear), - .eob(eob), - .rate_changed(1'b0), - .dds_input_fifo_occupied(dds_input_fifo_occupied), - /* IQ input */ - .s_axis_din_tlast(dds_in_fifo_tlast), - .s_axis_din_tvalid(dds_in_fifo_tvalid), - .s_axis_din_tready(dds_in_fifo_tready), - .s_axis_din_tdata({dds_in_q_tdata, dds_in_i_tdata}), - /* Phase input from NCO */ - .s_axis_phase_tlast(phase_sync_tlast), + .bits_in (WIDTH), + .bits_out (DDS_WIDTH) + ) sign_extend_i ( + .in (dds_in_sync_tdata[2*WIDTH-1:WIDTH]), + .out (dds_in_i_tdata) + ); + sign_extend #( + .bits_in (WIDTH), + .bits_out (DDS_WIDTH) + ) sign_extend_q ( + .in (dds_in_sync_tdata[WIDTH-1:0]), + .out (dds_in_q_tdata) + ); + + // Wrapper for DDS + Complex Multiply. This block expects {q,i} instead of + // {i,q} data ordering. + dds_freq_tune_duc dds_freq_tune_duc_i ( + .clk (clk), + .reset (reset | clear), + // IQ input (signed 24-bit number with 15 fractional bits) + .s_axis_din_tlast (dds_in_sync_tlast), + .s_axis_din_tvalid (dds_in_sync_tvalid), + .s_axis_din_tready (dds_in_sync_tready), + .s_axis_din_tdata ({ dds_in_q_tdata, dds_in_i_tdata }), + // Phase input from DDS (unsigned 24-bit number with 24 fractional bits) + .s_axis_phase_tlast (phase_sync_tlast), .s_axis_phase_tvalid(phase_sync_tvalid), .s_axis_phase_tready(phase_sync_tready), - .s_axis_phase_tdata(phase_sync_tdata), //24 bit - /* IQ output */ - .m_axis_dout_tlast(dds_out_tlast), - .m_axis_dout_tvalid(dds_out_tvalid), - .m_axis_dout_tready(dds_out_tready), - .m_axis_dout_tdata({dds_out_q_tdata, dds_out_i_tdata}) - //debug signals + .s_axis_phase_tdata (phase_sync_tdata), // 24-bit + // IQ output (signed 24-bit number with 15 fractional bits) + .m_axis_dout_tlast (dds_out_tlast), + .m_axis_dout_tvalid (dds_out_tvalid), + .m_axis_dout_tready (dds_out_tready), + .m_axis_dout_tdata ({dds_out_q_tdata, dds_out_i_tdata}) ); - /************************************************************************ - * Perform scaling on the IQ output - ************************************************************************/ - wire [DDS_WIDTH+SCALING_WIDTH-1:0] scaled_i_tdata, scaled_q_tdata; - wire scaled_tlast, scaled_tvalid, scaled_tready; + + + //--------------------------------------------------------------------------- + // Scale the IQ Output + //--------------------------------------------------------------------------- + + wire [DDS_WIDTH+SCALING_WIDTH-1:0] scaled_i_tdata; + wire [DDS_WIDTH+SCALING_WIDTH-1:0] scaled_q_tdata; + wire scaled_tlast; + wire scaled_tvalid; + wire scaled_tready; mult #( - .WIDTH_A(DDS_WIDTH), - .WIDTH_B(SCALING_WIDTH), - .WIDTH_P(DDS_WIDTH+SCALING_WIDTH), - .DROP_TOP_P(4), - .LATENCY(3), - .CASCADE_OUT(0)) - i_mult ( - .clk(clk), .reset(reset | clear), - .a_tdata(dds_out_i_tdata), .a_tlast(dds_out_tlast), .a_tvalid(dds_out_tvalid), .a_tready(dds_out_tready), - .b_tdata(scaling_tdata), .b_tlast(1'b0), .b_tvalid(dds_out_tvalid /* aligning scaling_tdata with dds_tdata */), .b_tready(scaling_tready), - .p_tdata(scaled_i_tdata), .p_tlast(scaled_tlast), .p_tvalid(scaled_tvalid), .p_tready(scaled_tready)); + .WIDTH_A (DDS_WIDTH), + .WIDTH_B (SCALING_WIDTH), + .WIDTH_P (DDS_WIDTH+SCALING_WIDTH), + .DROP_TOP_P (4), + .LATENCY (3), + .CASCADE_OUT (0) + ) mult_i ( + .clk (clk), + .reset (reset | clear), + .a_tdata (dds_out_i_tdata), + .a_tlast (dds_out_tlast), + .a_tvalid (dds_out_tvalid), + .a_tready (dds_out_tready), + .b_tdata (scaling_tdata), + .b_tlast (1'b0), + .b_tvalid (dds_out_tvalid), // Align scaling_tdata with dds_out_tdata + .b_tready (scaling_tready), + .p_tdata (scaled_i_tdata), + .p_tlast (scaled_tlast), + .p_tvalid (scaled_tvalid), + .p_tready (scaled_tready) + ); mult #( - .WIDTH_A(DDS_WIDTH), - .WIDTH_B(SCALING_WIDTH), - .WIDTH_P(DDS_WIDTH+SCALING_WIDTH), - .DROP_TOP_P(4), - .LATENCY(3), - .CASCADE_OUT(0)) - q_mult ( - .clk(clk), .reset(reset | clear), - .a_tdata(dds_out_q_tdata), .a_tlast(), .a_tvalid(dds_out_tvalid), .a_tready(), - .b_tdata(scaling_tdata), .b_tlast(1'b0), .b_tvalid(dds_out_tvalid /* aligning scaling_tdata with dds_tdata */), .b_tready(), - .p_tdata(scaled_q_tdata), .p_tlast(), .p_tvalid(), .p_tready(scaled_tready)); + .WIDTH_A (DDS_WIDTH), + .WIDTH_B (SCALING_WIDTH), + .WIDTH_P (DDS_WIDTH+SCALING_WIDTH), + .DROP_TOP_P (4), + .LATENCY (3), + .CASCADE_OUT (0) + ) mult_q ( + .clk (clk), + .reset (reset | clear), + .a_tdata (dds_out_q_tdata), + .a_tlast (), + .a_tvalid (dds_out_tvalid), + .a_tready (), + .b_tdata (scaling_tdata), + .b_tlast (1'b0), + .b_tvalid (dds_out_tvalid), // Align scaling_tdata with dds_out_tdata + .b_tready (), + .p_tdata (scaled_q_tdata), + .p_tlast (), + .p_tvalid (), + .p_tready (scaled_tready) + ); + + + //--------------------------------------------------------------------------- + // Round + //--------------------------------------------------------------------------- wire [2*WIDTH-1:0] sample_tdata; - wire sample_tlast, sample_tvalid, sample_tready; + wire sample_tlast; + wire sample_tvalid; + wire sample_tready; axi_round_and_clip_complex #( - .WIDTH_IN(DDS_WIDTH+SCALING_WIDTH), .WIDTH_OUT(WIDTH), .CLIP_BITS(12)) - axi_round_and_clip_complex ( - .clk(clk), .reset(reset | clear), - .i_tdata({scaled_i_tdata, scaled_q_tdata}), .i_tlast(scaled_tlast), .i_tvalid(scaled_tvalid), .i_tready(scaled_tready), - .o_tdata(sample_tdata), .o_tlast(sample_tlast), .o_tvalid(sample_tvalid), .o_tready(sample_tready)); + .WIDTH_IN (DDS_WIDTH+SCALING_WIDTH), + .WIDTH_OUT (WIDTH), + .CLIP_BITS (12) + ) axi_round_and_clip_complex_i ( + .clk (clk), + .reset (reset | clear), + .i_tdata ({scaled_i_tdata, scaled_q_tdata}), + .i_tlast (scaled_tlast), + .i_tvalid (scaled_tvalid), + .i_tready (scaled_tready), + .o_tdata (sample_tdata), + .o_tlast (sample_tlast), + .o_tvalid (sample_tvalid), + .o_tready (sample_tready) + ); + + + //--------------------------------------------------------------------------- + // Output Logic + //--------------------------------------------------------------------------- // Throttle output on last sample if header is not valid assign header_out_tready = sample_tlast & sample_tvalid & o_tready; - assign sample_tready = (sample_tvalid & sample_tlast) ? (header_out_tvalid & o_tready) : o_tready; - assign o_tvalid = (sample_tvalid & sample_tlast) ? header_out_tvalid : sample_tvalid; + assign sample_tready = (sample_tvalid & sample_tlast) ? + (header_out_tvalid & o_tready) : o_tready; + assign o_tvalid = (sample_tvalid & sample_tlast) ? + header_out_tvalid : sample_tvalid; assign o_tlast = sample_tlast; assign o_tdata = sample_tdata; assign o_tuser = header_out_tdata; - + endmodule + + +`default_nettype wire diff --git a/fpga/usrp3/lib/rfnoc/dds_wrapper.v b/fpga/usrp3/lib/rfnoc/dds_wrapper.v new file mode 100644 index 000000000..d9151afa8 --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/dds_wrapper.v @@ -0,0 +1,160 @@ +// +// Copyright 2021 Ettus Research, a National Instruments Brand +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: dds_wrapper +// +// Description: +// +// This module computes the complex number e^(j*2*pi*phase). The phase input +// can be thought of as a 24-bit unsigned fixed-point value with 24 +// fractional bits. In other words, the integer range of the input maps to a +// phase in the range [0, 1.0). The output consists of two 16-bit signed +// fixed-point values with 14 fractional bits. The value sin(2*pi*phase) is +// in the upper 16 bits and cos(2*pi*phase) is in the lower 16-bits. This +// puts the output in {Q,I} order. +// +// This is a wrapper for the dds_sin_cos_lut_only IP, which is based on the +// Xilinx DDS Compiler. This IP has the undesirable behavior that input must +// be provided to flush out any data stuck in its pipeline. This wrapper +// hides that behavior so that every input causes a corresponding output, +// even if the input stops. +// +// NOTE: The DDS IP requires at least 2 cycles of reset. +// +// Parameters: +// +// The parameters in this module should not be modified. They match the IP +// configuration. +// + + +module dds_wrapper #( + parameter PHASE_W = 24, + parameter OUTPUT_W = 32 +) ( + input wire clk, + input wire rst, + + // Phase input + input wire [ PHASE_W-1:0] s_axis_phase_tdata, + input wire s_axis_phase_tvalid, + input wire s_axis_phase_tlast, + output wire s_axis_phase_tready, + + // IQ output (Q in the upper, I in the lower bits) + output wire [OUTPUT_W-1:0] m_axis_data_tdata, + output wire m_axis_data_tvalid, + output wire m_axis_data_tlast, + input wire m_axis_data_tready +); + + // Width of number needed to represent the DDS fullness. This value was + // determined experimentally. The max fullness was 33. + localparam FULLNESS_W = 6; + + wire [PHASE_W-1:0] phase_tdata; + wire phase_tvalid; + wire phase_tlast; + wire phase_tready; + + wire [OUTPUT_W-1:0] dds_tdata; + wire dds_tvalid; + wire dds_tlast; + wire dds_tready; + + + //--------------------------------------------------------------------------- + // DDS Fullness Counter + //--------------------------------------------------------------------------- + // + // Count the number of valid samples in the DDS's data pipeline. + // + //--------------------------------------------------------------------------- + + // The fullness counter must be large enough for DDS's latency. + reg [FULLNESS_W-1:0] fullness = 0; + reg dds_has_data = 0; + + wire increment = s_axis_phase_tvalid & s_axis_phase_tready; + wire decrement = m_axis_data_tvalid & m_axis_data_tready; + + always @(posedge clk) begin + if (rst) begin + fullness <= 0; + dds_has_data <= 0; + end else begin + if (increment && !decrement) begin + //synthesis translate_off + if (fullness+1'b1 == 1'b0) begin + $display("ERROR: Fullness overflowed!"); + end + //synthesis translate_on + fullness <= fullness + 1; + dds_has_data <= 1; + end else if (decrement && !increment) begin + //synthesis translate_off + if (fullness-1'b1 > fullness) begin + $display("ERROR: Fullness underflowed!"); + end + //synthesis translate_on + fullness <= fullness - 1; + dds_has_data <= (fullness > 1); + end else begin + dds_has_data <= (fullness > 0); + end + end + end + + + //--------------------------------------------------------------------------- + // Input Logic + //--------------------------------------------------------------------------- + + assign s_axis_phase_tready = phase_tready; + assign phase_tlast = s_axis_phase_tlast; + assign phase_tdata = s_axis_phase_tdata; + + // Always input something when the DDS has data stuck inside it so that all + // data gets flushed out automatically. + assign phase_tvalid = s_axis_phase_tvalid || dds_has_data; + + + //--------------------------------------------------------------------------- + // DDS IP + //--------------------------------------------------------------------------- + + // Use the TUSER path on the DDS IP to indicate if the sample is empty and is + // just to flush the output. + wire flush_in = ~s_axis_phase_tvalid; // It's a flush if input is not valid + wire flush_out; + + dds_sin_cos_lut_only dds_sin_cos_lut_only_i ( + .aclk (clk), + .aresetn (~rst), + .s_axis_phase_tvalid (phase_tvalid), + .s_axis_phase_tready (phase_tready), + .s_axis_phase_tdata (phase_tdata), + .s_axis_phase_tlast (phase_tlast), + .s_axis_phase_tuser (flush_in), + .m_axis_data_tvalid (dds_tvalid), + .m_axis_data_tready (dds_tready), + .m_axis_data_tdata (dds_tdata), + .m_axis_data_tlast (dds_tlast), + .m_axis_data_tuser (flush_out) + ); + + + //--------------------------------------------------------------------------- + // Output Logic + //--------------------------------------------------------------------------- + + assign m_axis_data_tdata = dds_tdata; + assign m_axis_data_tlast = dds_tlast; + + // Discard the current sample if it was for flushing. + assign m_axis_data_tvalid = dds_tvalid & ~flush_out; + assign dds_tready = m_axis_data_tready | flush_out; + +endmodule diff --git a/fpga/usrp3/lib/rfnoc/sim/dds_timed_tb/Makefile b/fpga/usrp3/lib/rfnoc/sim/dds_timed_tb/Makefile new file mode 100644 index 000000000..22dd93ecb --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/sim/dds_timed_tb/Makefile @@ -0,0 +1,68 @@ +# +# Copyright 2021 Ettus Research, A National Instruments Brand +# +# SPDX-License-Identifier: LGPL-3.0-or-later +# + +#------------------------------------------------- +# Top-of-Makefile +#------------------------------------------------- +# Define BASE_DIR to point to the "top" dir +BASE_DIR = $(abspath ../../../../top) +# Include viv_sim_preamble after defining BASE_DIR +include $(BASE_DIR)/../tools/make/viv_sim_preamble.mak + +#------------------------------------------------- +# Design Specific +#------------------------------------------------- +# Include makefiles and sources for the DUT and its dependencies +include $(BASE_DIR)/../lib/rfnoc/Makefile.srcs + +DESIGN_SRCS += $(abspath \ +$(RFNOC_SRCS) \ +) + +#------------------------------------------------- +# IP Specific +#------------------------------------------------- +# If simulation contains IP, define the IP_DIR and point +# it to the base level IP directory. +IP_DIR = $(BASE_DIR)/x300/ip + +# Include makefiles and sources for all IP components +# *after* defining the IP_DIR +include $(LIB_IP_DIR)/complex_multiplier_dds/Makefile.inc +include $(LIB_IP_DIR)/dds_sin_cos_lut_only/Makefile.inc + +DESIGN_SRCS += $(abspath \ +$(LIB_IP_COMPLEX_MULTIPLIER_DDS_SRCS) \ +$(LIB_IP_DDS_SIN_COS_LUT_ONLY_SRCS) \ +) + +#------------------------------------------------- +# ModelSim Specific +#------------------------------------------------- + +modelsim vlint : DESIGN_SRCS += $(abspath \ +$(IP_BUILD_DIR)/dds_sin_cos_lut_only/sim/dds_sin_cos_lut_only.vhd \ +$(IP_BUILD_DIR)/complex_multiplier_dds/sim/complex_multiplier_dds.vhd \ +) + +MODELSIM_ARGS = glbl + +#------------------------------------------------- +# Testbench Specific +#------------------------------------------------- +SIM_TOP ?= dds_timed_tb + +SIM_SRCS = \ +$(abspath $(SIM_TOP).sv) \ +$(VIVADO_PATH)/data/verilog/src/glbl.v \ + +#------------------------------------------------- +# Bottom-of-Makefile +#------------------------------------------------- +# Include all simulator specific makefiles here +# Each should define a unique target to simulate +# e.g. xsim, vsim, etc and a common "clean" target +include $(BASE_DIR)/../tools/make/viv_simulator.mak diff --git a/fpga/usrp3/lib/rfnoc/sim/dds_timed_tb/dds_timed_tb.sv b/fpga/usrp3/lib/rfnoc/sim/dds_timed_tb/dds_timed_tb.sv new file mode 100644 index 000000000..c9cc29edd --- /dev/null +++ b/fpga/usrp3/lib/rfnoc/sim/dds_timed_tb/dds_timed_tb.sv @@ -0,0 +1,886 @@ +// +// Copyright 2021 Ettus Research, A National Instruments Brand +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// Module: dds_timed_tb +// + +`default_nettype none + + +module dds_timed_tb; + + // Include macros and time declarations for use with PkgTestExec + `include "test_exec.svh" + + import PkgTestExec::*; + import PkgAxiStreamBfm::*; + import PkgComplex::*; + import PkgMath::*; + import PkgRandom::*; + + //--------------------------------------------------------------------------- + // Testbench Configuration + //--------------------------------------------------------------------------- + + localparam real CLK_PERIOD = 10.0; + + // Values needed by the DUT (use the same values as the DUC) + localparam int SR_FREQ_ADDR = 132; + localparam int SR_SCALE_IQ_ADDR = 133; + localparam int SR_AWIDTH = 8; + localparam int SR_DWIDTH = 32; + localparam int SR_TWIDTH = 64; + localparam int PHASE_ACCUM_WIDTH = 32; + localparam int SCALING_WIDTH = 18; + + // Bit widths for our sample size + localparam int FRAC_W = 15; // Number of fixed point fractional bits + localparam int COMP_W = 16; // Width of just the imag/real part + localparam int SAMPLE_W = 2*COMP_W; // Width of a complex sample + + // Max min possible values for the components of a sample + localparam bit signed [COMP_W-1:0] MAX_COMP = 2**(COMP_W-1) - 1; + localparam bit signed [COMP_W-1:0] MIN_COMP = -2**(COMP_W-1); + + // Max/min possible values for the scale register + localparam real MAX_SCALE = +(2**(SCALING_WIDTH-1) - 1) / (2.0**FRAC_W); + localparam real MIN_SCALE = -(2**(SCALING_WIDTH-1) ) / (2.0**FRAC_W); + + // TUSER bit positions + localparam int HAS_TIME_POS = 125; + localparam int EOB_POS = 124; + localparam int TIMESTAMP_POS = 0; + + // AXI-Stream data bus parameters + localparam int DATA_W = SAMPLE_W; + localparam int USER_W = 128; + + // Amount of rounding error to allow (in ULPs). We generally expect the error + // in computation to be +/- 1 ULP, but since the DUT performs several + // computations, error can accumulate. The testbench computations also + // introduce some error. In particular, using the scale register in the DUT + // also scales the error. The MAX_ERROR can be reduced to 2 if you keep the + // scale register < 1.0. + localparam MAX_ERROR = 8; + + + //--------------------------------------------------------------------------- + // Type Definitions + //--------------------------------------------------------------------------- + + // Burst test configuration + typedef struct { + int spp; // Samples per packet to generate. + int spp_last; // Length of last packet, if different from SPP. + // Set to -1 to use spp value. + int num_packets; // Number of packets in each burst. + int num_bursts; // Number of bursts to send. + real amp; // Amplitude of the test signal to generate. + real freq; // Normalized frequency of test signal to generate. + bit timed; // Set to 1 for timed packet, 0 for non-timed. If + // doing a timed tune, this must be 1. + real scale; // Scale value to use, in the range [-4,4). + real freq_shift; // Initial frequency shift to use. + real tune_freq_shift; // New frequency shift to tune to. + longint tune_time; // Time after which to tune the frequency (set a + // new frequency shift). Set to -1 to disable. + } burst_cfg_t; + + typedef AxiStreamPacket #(.DATA_WIDTH(DATA_W), .USER_WIDTH(USER_W)) axis_pkt_t; + typedef axis_pkt_t axis_pkt_queue_t[$]; + + // Default settings to use for burst_cfg_t. This creates a nice complex + // sinusoid and the output should match the input, unchanged. + localparam burst_cfg_t DEFAULT_BURST_CFG = '{ + spp : 256, + spp_last : -1, + num_packets : 1, + num_bursts : 1, + freq : 1.0/16.0, + amp : 0.75, + timed : 1, + scale : 1.0, + freq_shift : 0.0, + tune_freq_shift : 0.0, + tune_time : -1.0 + }; + + + //--------------------------------------------------------------------------- + // Clocks and Resets + //--------------------------------------------------------------------------- + + bit clk, rst; + + sim_clock_gen #(CLK_PERIOD) clk_gen (clk, rst); + + + //--------------------------------------------------------------------------- + // AXI-Stream BFM + //--------------------------------------------------------------------------- + + // AXI-Stream interfaces to/from DUT + AxiStreamIf #(.DATA_WIDTH(DATA_W), .USER_WIDTH(USER_W), .TKEEP(0)) + to_dut (clk, rst); + AxiStreamIf #(.DATA_WIDTH(DATA_W), .USER_WIDTH(USER_W), .TKEEP(0)) + from_dut (clk, rst); + + // BFM for the AXI-Stream interface to DUT + AxiStreamBfm #(.DATA_WIDTH(DATA_W), .USER_WIDTH(USER_W), .TKEEP(0)) + axis_bfm = new(to_dut, from_dut); + + + //--------------------------------------------------------------------------- + // DUT + //--------------------------------------------------------------------------- + + logic clear = 1'b0; + logic timed_cmd_fifo_full; + logic set_stb = 1'b0; + logic [SR_AWIDTH-1:0] set_addr; + logic [SR_DWIDTH-1:0] set_data; + logic [SR_TWIDTH-1:0] set_time; + logic set_has_time; + logic [ SAMPLE_W-1:0] i_tdata; + logic i_tlast; + logic i_tvalid; + logic i_tready; + logic [ USER_W-1:0] i_tuser; + logic [ SAMPLE_W-1:0] o_tdata; + logic o_tlast; + logic o_tvalid; + logic o_tready; + logic [ USER_W-1:0] o_tuser; + + dds_timed #( + .SR_FREQ_ADDR (SR_FREQ_ADDR), + .SR_SCALE_IQ_ADDR (SR_SCALE_IQ_ADDR), + .PHASE_ACCUM_WIDTH (PHASE_ACCUM_WIDTH), + .SCALING_WIDTH (SCALING_WIDTH), + .SR_AWIDTH (SR_AWIDTH), + .SR_DWIDTH (SR_DWIDTH), + .SR_TWIDTH (SR_TWIDTH) + ) dds_timed_i ( + .clk (clk), + .reset (rst), + .clear (clear), + .timed_cmd_fifo_full (timed_cmd_fifo_full), + .set_stb (set_stb), + .set_addr (set_addr), + .set_data (set_data), + .set_time (set_time), + .set_has_time (set_has_time), + .i_tdata (to_dut.tdata), + .i_tlast (to_dut.tlast), + .i_tvalid (to_dut.tvalid), + .i_tready (to_dut.tready), + .i_tuser (to_dut.tuser), + .o_tdata (from_dut.tdata), + .o_tlast (from_dut.tlast), + .o_tvalid (from_dut.tvalid), + .o_tready (from_dut.tready), + .o_tuser (from_dut.tuser) + ); + + + //--------------------------------------------------------------------------- + // Timer + //--------------------------------------------------------------------------- + // + // Count the samples going into the DUT so we have something that tracks + // packet timestamp in the testbench. + // + //--------------------------------------------------------------------------- + + longint current_time = 0; + + always @(posedge clk) begin + if (to_dut.tvalid && to_dut.tready) begin + current_time <= current_time + 1; + end + end + + + //--------------------------------------------------------------------------- + // Expected Output + //--------------------------------------------------------------------------- + // + // This assigns the expected output to a signal so we can visualize what the + // testbench is expecting. Error checking isn't done here. This is only to + // aid in debug. + // + //--------------------------------------------------------------------------- + + mailbox #(axis_pkt_t) exp_pkts_mb = new(); + bit exp_data_mismatch = 0; + bit exp_user_mismatch = 0; + + logic [SAMPLE_W-1:0] exp_tdata; + logic [ USER_W-1:0] exp_tuser; + + always @(posedge clk) begin + if (rst) begin + exp_tdata = 'X; + exp_tuser = 'X; + end else begin + static axis_pkt_t exp_pkt = null; + static bit out_valid = 0; + + // Give time for the DUT to update its status, so we know what to do. + #(0.01ns); + + // Output the next expected sample if we haven't done so already + if (from_dut.tvalid && !out_valid) begin + int rval; + + // Get the next packet from the mailbox if needed + if (exp_pkt == null) begin + rval = exp_pkts_mb.try_get(exp_pkt); + `ASSERT_ERROR(rval, "Couldn't get first packet from exp_pkts_mb."); + end else if (exp_pkt.data.size() == 0) begin + rval = exp_pkts_mb.try_get(exp_pkt); + `ASSERT_ERROR(rval, "Couldn't get next packet from exp_pkts_mb."); + end + + // Output the next sample + `ASSERT_ERROR(exp_pkt.data.size(), "exp_pkt.data is empty"); + exp_tdata = exp_pkt.data.pop_front(); + `ASSERT_ERROR(exp_pkt.user.size(), "exp_pkt.user is empty"); + exp_tuser = exp_pkt.user.pop_front(); + out_valid = 1; + end + + exp_data_mismatch = compare_samples(exp_tdata, from_dut.tdata); + exp_user_mismatch = compare_samples(exp_tuser, from_dut.tuser); + + // Check if the output has been accepted and needs to update + if (from_dut.tvalid && from_dut.tready) begin + out_valid = 0; + end + end + end + + + //--------------------------------------------------------------------------- + // Helper Functions + //--------------------------------------------------------------------------- + + // Round a floating point number to num_bits bits of precision. + function automatic real round_bits(real num, int num_bits); + return real'(longint'(num * (2.0**num_bits))) / (2.0**num_bits); + endfunction : round_bits + + + // Compare the samples a and b to see if either component differs by more + // than MAX_ERROR. + function automatic bit compare_samples(sc16_t a, sc16_t b); + Math #(s16_t) m; + sc16_t diff; + diff = sub_sc16(a, b); + if (m.abs(diff.re) > MAX_ERROR || m.abs(diff.im) > MAX_ERROR) return 1; + return 0; + endfunction : compare_samples + + + // Compare the packets, sample by sample. Returns a string error message + // explaining the nature of the mismatch. If packets match, an empty string + // is returned. + function automatic string compare_packets(axis_pkt_t actual, axis_pkt_t expected); + if (actual.data.size() != expected.data.size()) begin + return $sformatf("Packet lengths do not match. Actual is %0d, expected is %0d.", + actual.data.size(), expected.data.size()); + end + + foreach(actual.data[i]) begin + sc16_t a, b; + + // Check the samples in TDATA. + // Calculate the difference between the actual end expected values. + a = actual.data[i]; + b = expected.data[i]; + if (compare_samples(a, b)) begin + `ASSERT_WARNING(0, "compare_packets: Skipping rest of packet due to mismatch.") + return $sformatf("Word %0d in packet TDATA does not match. Actual is 0x%X, expected is 0x%X.", + i, actual.data[i], expected.data[i]); + end + + // Check TUSER. This is only guaranteed to be valid on the last sample of + // each packet due to the way it's currently implemented. + if (i == actual.data.size()-1 && actual.user[i] != expected.user[i]) begin + string fields; + if (actual.user[i][EOB_POS] != expected.user[i][EOB_POS]) begin + fields = {fields, "(EOB)"}; + end + if (actual.user[i][HAS_TIME_POS] != expected.user[i][HAS_TIME_POS]) begin + fields = {fields, "(HAS_TIME)"}; + end + if (actual.user[i][TIMESTAMP_POS+:64] != expected.user[i][TIMESTAMP_POS+:64]) begin + fields = {fields, "(TIMESTAMP)"}; + end + if (fields == "") fields = "<None>"; + `ASSERT_WARNING(0, "compare_packets: Skipping rest of packet due to mismatch.") + return $sformatf({ + "Word %0d in packet TUSER does not match. ", + "Fields not matching: %s. ", + "Actual is %X, expected is %X."}, + i, fields, actual.user[i], expected.user[i]); + end + end + // Return empty string if all is well + return ""; + endfunction : compare_packets + + + // Generate a test packet containing a complex sinusoid signal e^(j∙2π∙f∙t) + // and return it. + // + // length: The length of the packet to generate in samples. + // freq: Normalized frequency of the signal to generate. + // eob: EOB flag for the packet. + // timed: Set to 1 for a timed packet, 0 for non-timed. Timed is the + // default. + // timestamp: Timestamp for the first packet. Leave at the default value + // to continue from the time of the previous packet. + // init: Initial phase value to use (t in e^jt). Leave at the default + // value to use the last value of the previous packet. Must be + // in the range [0,1), where 1.0 corresponds to 2*pi radians. + // + function automatic axis_pkt_t gen_test_packet( + int length, + real freq, + real amp = 0.75, + bit eob = 0, + longint timed = 1, + longint timestamp = -1, + real init = -1.0 + ); + static real phase; + static longint next_time = 0; + bit signed [COMP_W-1:0] re, im; + int re_int, im_int; + logic [USER_W-1:0] user; + axis_pkt_t packet; + + if (init != -1.0) begin + phase = init; + end + + if (timestamp >= 0) begin + next_time = timestamp; + end + + packet = new(); + for (int sample_num = 0; sample_num < length; sample_num++) begin + // Calculate I/Q + re_int = $cos(phase*TAU) * amp * 2**FRAC_W; + im_int = $sin(phase*TAU) * amp * 2**FRAC_W; + + // Saturate + if(re_int > MAX_COMP) re = MAX_COMP; + else if(re_int < MIN_COMP) re = MIN_COMP; + else re = re_int; + if(im_int > MAX_COMP) im = MAX_COMP; + else if(im_int < MIN_COMP) im = MIN_COMP; + else im = im_int; + + // Calculate TUSER (header) + user = '0; + user[EOB_POS] = eob; + user[HAS_TIME_POS] = timed; + user[TIMESTAMP_POS +: 64] = timed ? next_time : 'X; + + // Enqueue the sample + packet.data.push_back({re, im}); + packet.user.push_back(user); + phase += freq; + end + + // Calculate the timestamp for the next packet + next_time += length; + + return packet; + endfunction : gen_test_packet + + + // Apply a frequency shift to the packet data, by multiplying each sample by + // the output of a complex NCO. The implementation here models the HDL so + // that we don't accumulate error over time. + // + // packet : Input packet with the samples to frequency shift. + // freq : Normalized frequency shift to apply. + // reset_nco : If 1, reset the NCO to 0 before beginning. Otherwise + // continue from previous value. + // first_sample : First sample to frequency shift + // last_sample : Last sample to frequency shift (inclusive) + // + // Returns: A new packet with the frequency-shifted data. + // + function automatic axis_pkt_t freq_shift_pkt( + axis_pkt_t packet, + real freq, + bit reset_nco = 0, + int first_sample = 0, + int last_sample = -1 + ); + // Normalized phase angle in the range [0,1), corresponding to [0,2π) + // radians. + static bit [PHASE_ACCUM_WIDTH-1:0] phase = 0; + bit [PHASE_ACCUM_WIDTH-1:0] phase_inc; + axis_pkt_t new_packet; + + new_packet = packet.copy(); + + phase_inc = freq * (2.0**PHASE_ACCUM_WIDTH); + if (reset_nco) begin + phase = 0; + end + + if (packet == null) return null; + + last_sample = last_sample < 0 ? packet.data.size()-1 : last_sample; + for (int i = first_sample; i <= last_sample; i++) begin + // There are a lot of redundant variables in this loop. This was done to + // aid in debugging so we can correlate what's calculated here to what + // the DUT computes, and to have both fixed-point and floating point + // values. + sc16_t in_sc16, out_sc16; + complex_t nco; + complex_t in_c, out_c; + real phase_real; + + // Get the next input sample and convert it + in_sc16 = packet.data[i]; + in_c = sc16_to_complex(in_sc16); + + // Convert the phase + phase_real = real'(phase) / (2.0**PHASE_ACCUM_WIDTH); + + // Compute the new NCO value: nco = exp(j∙2π∙phase) + nco = polar_to_complex(1.0, TAU * phase_real); + + // Compute the new data output: sample_out = nco * sample_in + out_c = mul(nco, in_c); + out_sc16 = complex_to_sc16(out_c); + new_packet.data[i] = out_sc16; + + // Update the phase for the next iteration + phase = phase + phase_inc; + end + return new_packet; + endfunction : freq_shift_pkt + + // Return a scaled version of the input data packet. That is, where each + // sample is multiplied by scale. This models the precision provided by the + // scaler in the DUT. + function automatic axis_pkt_t scale_packet(axis_pkt_t packet, real scale); + bit [SAMPLE_W-1:0] sample; + bit signed [COMP_W-1:0] re, im, a, b; + int re_tmp, im_tmp; + axis_pkt_t new_packet; + + // Make sure scale is in the range supported by hardware + if (scale > MAX_SCALE) scale = MAX_SCALE; + else if (scale < MIN_SCALE) scale = MIN_SCALE; + + new_packet = packet.copy(); + foreach (packet.data[i]) begin + sample = packet.data[i]; + re = sample[1*COMP_W +: COMP_W]; + im = sample[0*COMP_W +: COMP_W]; + // Scale with full precision + re_tmp = re * scale; + im_tmp = im * scale; + // Saturate the values + if (re_tmp > MAX_COMP) re = MAX_COMP; + else if (re_tmp < MIN_COMP) re = MIN_COMP; + else re = re_tmp; + if (im_tmp > MAX_COMP) im = MAX_COMP; + else if (im_tmp < MIN_COMP) im = MIN_COMP; + else im = im_tmp; + new_packet.data[i] = { re, im }; + end + return new_packet; + endfunction : scale_packet + + // Generate the output packets we expect from the DUT given the provided + // burst of packets and configuration. + // + // cfg : Burst test configuration used + // packets : Queue of packets that were input to the DUT + // + // returns : Expected packets from DUT + // + function automatic axis_pkt_queue_t generate_expected( + burst_cfg_t cfg, + axis_pkt_queue_t packets + ); + static longint timestamp = 0; + axis_pkt_t expected[$]; + axis_pkt_t packet; + bit reset_nco; + int first_sample; + int last_sample; + real freq_shift; + + freq_shift = cfg.freq_shift; + + foreach(packets[i]) begin + // Make a copy of the input + packet = packets[i].copy(); + + // Check if we're supposed to tune the frequency in this packet + first_sample = 0; + if (cfg.timed && timestamp <= cfg.tune_time && + timestamp + packet.data.size() > cfg.tune_time) begin + last_sample = cfg.tune_time - timestamp; + end else begin + last_sample = -1; + end + + // Apply a frequency shift (reset the NCO before each burst) + reset_nco = i % cfg.num_packets == 0; + packet = freq_shift_pkt(packet, freq_shift, reset_nco, first_sample, last_sample); + + // If there was a tune, shift the rest of the packet differently + if (last_sample >= 0 && last_sample < packet.data.size()) begin + freq_shift = cfg.tune_freq_shift; + reset_nco = 1; + first_sample = last_sample + 1; + last_sample = -1; + packet = freq_shift_pkt(packet, freq_shift, reset_nco, first_sample, last_sample); + end + + // Multiply packet samples by a scaler + packet = scale_packet(packet, cfg.scale); + + // Add this packet to the queue + expected.push_back(packet); + + // Send this packet to the expected packets mailbox, for debug + `ASSERT_ERROR(exp_pkts_mb.try_put(packet.copy()), "Unable to put expected packet"); + + // Calculate new timestamp + timestamp += packet.data.size(); + end + + return expected; + endfunction : generate_expected + + + // Generate a queue of packets modeled after the burst test configuration + // defined by cfg. + function automatic axis_pkt_queue_t generate_bursts(burst_cfg_t cfg); + axis_pkt_t packets[$]; + + // Reset initial phase and time to 0 in generated packets by calling the + // generator with init and timestamp set to 0. + void'(gen_test_packet(.length(0), .freq(0), .init(0))); + + // Build the packets to send + for (int burst_num = 0; burst_num < cfg.num_bursts; burst_num++) begin + for (int packet_num = 0; packet_num < cfg.num_packets; packet_num++) begin + axis_pkt_t packet; + bit eob; + int length; + + // Set EOB and use spp_last for the last packet + if (packet_num == cfg.num_packets-1) begin + eob = 1; + length = (cfg.spp_last > 0) ? cfg.spp_last : cfg.spp; + end else begin + eob = 0; + length = cfg.spp; + end + + packet = gen_test_packet( + .length (length), + .freq (cfg.freq), + .amp (cfg.amp), + .eob (eob), + .timed (cfg.timed)); + packets.push_back(packet); + end + end + + return packets; + endfunction : generate_bursts + + + // Write a value to a settings register. + // + // addr : Address of the register to write to. + // value : Value to write to the register. + // timestamp : Timestamp to provide with the write. Set to -1 if the write + // should not be timed. + // + task automatic write_reg( + bit [SR_AWIDTH-1:0] addr, + bit [SR_DWIDTH-1:0] value, + longint timestamp = -1 + ); + @(posedge clk); + set_stb <= 1; + set_addr <= addr; + set_data <= value; + set_time <= (timestamp > 0) ? timestamp : 'X; + set_has_time <= (timestamp > 0); + @(posedge clk); + set_stb <= 0; + set_addr <= 'X; + set_data <= 'X; + set_time <= 'X; + set_has_time <= 'X; + @(posedge clk); + endtask : write_reg + + + // Write a value to the frequency register. + // + // freq : Normalized frequency to write to the register. E.g., in the + // range [-0.5,0.5) or [0,1). Numerically, either works. + // timestamp : Timestamp to provide with the write. Set to -1 if the write + // should not be timed. + // + task automatic write_reg_freq(real freq, longint timestamp = -1); + write_reg(SR_FREQ_ADDR, freq * (2.0**PHASE_ACCUM_WIDTH), timestamp); + endtask : write_reg_freq + + + // Write a value to the scale register. + // + // scale : Scaler to write to the register, in the range [-4,4). + // timestamp : Timestamp to provide with the write. Set to -1 if the write + // should not be timed. + // + task automatic write_reg_scale(real scale); + // Saturate to the range allowed by the register + scale = scale > MAX_SCALE ? MAX_SCALE : scale; + scale = scale < MIN_SCALE ? MIN_SCALE : scale; + write_reg(SR_SCALE_IQ_ADDR, scale * (2.0**FRAC_W)); + endtask : write_reg_scale + + + // Check that the output matches what we would expect. + // + // cfg: Test configuration + // packets: The packets that were input to the DUT + // + task automatic verify_output(burst_cfg_t cfg, axis_pkt_queue_t packets); + axis_pkt_t expected[$]; + + expected = generate_expected(cfg, packets); + + foreach(packets[i]) begin + axis_pkt_t recvd; + string msg; + axis_bfm.get(recvd); + msg = compare_packets(recvd, expected[i]); + `ASSERT_ERROR(msg == "", + $sformatf("Error in packet %0d: %s", i, msg)); + end + endtask : verify_output + + + // Test a burst (i.e., multiple packets ending with EOB) through the DUT + // using the provided configuration. + task automatic test_bursts(burst_cfg_t cfg); + axis_pkt_t packets[$]; + + // Are we doing timed packets? + cfg.timed = (cfg.timed || cfg.tune_time >= 0); + + // Set the registers + write_reg_scale(cfg.scale); + write_reg_freq(cfg.freq_shift); + + // Schedule a timed tune, if requested + if (cfg.tune_time >= 0) begin + write_reg_freq(cfg.tune_freq_shift, cfg.tune_time); + end + + // Wait a bit for the register changes to take effect + clk_gen.clk_wait_r(10); + + + // Generate test packets to send + packets = generate_bursts(cfg); + + // Send the packets + foreach (packets[i]) axis_bfm.put(packets[i]); + + // Check the results + verify_output(cfg, packets); + endtask : test_bursts + + + //--------------------------------------------------------------------------- + // Test Procedures + //--------------------------------------------------------------------------- + + // This performs a few directed test as a sanity check and to test a few + // corner cases. + task automatic directed_tests(); + burst_cfg_t cfg; + + // Iterate over different flow control settings to exercise different + // scenarios. + for (int bfm_config = 0; bfm_config < 4; bfm_config++) begin + case (bfm_config) + 0 : begin + // No stalls: on input or output to DUT + axis_bfm.set_master_stall_prob(0); + axis_bfm.set_slave_stall_prob(0); + end + 1 : begin + // Overflow: Input to DUT faster than output + axis_bfm.set_master_stall_prob(10); + axis_bfm.set_slave_stall_prob(30); + end + 2 : begin + // Underflow: Input to DUT slower than output + axis_bfm.set_master_stall_prob(30); + axis_bfm.set_slave_stall_prob(10); + end + 3 : begin + // Lots of stalls: Input and output stall frequently + axis_bfm.set_master_stall_prob(40); + axis_bfm.set_slave_stall_prob(40); + end + endcase + + //------------------------------- + // Test Basic Configurations + //------------------------------- + + // Test the default configuration + cfg = DEFAULT_BURST_CFG; + test.start_test($sformatf("Directed Test: bfm_config: %0d, %p", bfm_config, cfg)); + test_bursts(cfg); + test.end_test(); + + // Test a somewhat arbitrary but different configuration + cfg = DEFAULT_BURST_CFG; + cfg.spp = 97; + cfg.spp_last = 33; + cfg.num_bursts = 2; + cfg.num_packets = 3; + cfg.amp = 0.5; + cfg.scale = 1.25; + cfg.freq = 0.23; + cfg.freq_shift = 0.17; + test.start_test($sformatf("Directed Test: bfm_config: %0d, %p", bfm_config, cfg)); + test_bursts(cfg); + test.end_test(); + + // Repeat with a single-sample packet + cfg.spp = 1; + cfg.spp_last = 1; + test.start_test($sformatf("Directed Test: bfm_config: %0d, %p", bfm_config, cfg)); + test_bursts(cfg); + test.end_test(); + + //------------------------------- + // Test timed tunes + //------------------------------- + + cfg = DEFAULT_BURST_CFG; + cfg.spp = 135; + cfg.freq = 1.0/32.0; + cfg.freq_shift = 0.0; + cfg.num_bursts = 1; + cfg.num_packets = 3; + cfg.scale = 0.75; + cfg.freq_shift = 0.0; // Initial frequency shift + cfg.tune_freq_shift = 0.13; // New frequency shift + + // Test tuning in the middle of a packet + cfg.tune_time = current_time + cfg.num_packets*cfg.spp/2; + test.start_test($sformatf("Directed Test: bfm_config: %0d, %p", bfm_config, cfg)); + test_bursts(cfg); + test.end_test(); + + // Test tuning at the end of the first packet + cfg.tune_time = current_time + cfg.spp-1; + test.start_test($sformatf("Directed Test: bfm_config: %0d, %p", bfm_config, cfg)); + test_bursts(cfg); + test.end_test(); + + // Test tuning at the beginning of a packet + cfg.tune_time = current_time + cfg.spp; + test.start_test($sformatf("Directed Test: bfm_config: %0d, %p", bfm_config, cfg)); + test_bursts(cfg); + test.end_test(); + end + endtask : directed_tests + + + // This generates a randomized configuration exercises the DUT with that + // configuration. This is repeated num_tests times, with a unique + // configuration each time. + task automatic random_tests(int num_tests); + burst_cfg_t cfg; + int master_stall_prob, slave_stall_prob; + + repeat (num_tests) begin + // Choose random values for this run. Round the floating point numbers to + // a smaller number of bits to reduce rounding differences between the + // testbench and the DUT. + cfg = DEFAULT_BURST_CFG; + cfg.spp = $urandom_range(1, 64); + cfg.spp_last = $urandom_range(1, 64); + cfg.num_packets = $urandom_range(1, 3); + cfg.num_bursts = $urandom_range(1, 2); + cfg.amp = round_bits(frand_range(1.0/16.0, 15.0/16.0), 15); + cfg.freq = frand(0.5); + cfg.timed = $urandom_range(0, 1); + cfg.scale = round_bits(frand_range(-4.0, 4.0), 15); + cfg.freq_shift = round_bits(frand(0.5), 32); + cfg.tune_freq_shift = round_bits(frand(0.5), 32); + if (cfg.timed) begin + cfg.tune_time = current_time + + $urandom_range(0, (cfg.num_packets-1)*cfg.spp + cfg.spp_last - 1); + end + master_stall_prob = $urandom_range(0, 50); + slave_stall_prob = $urandom_range(0, 50); + + // Run the test + test.start_test($sformatf("Random Test: InStall: %0d, OutStall: %0d, %p", + master_stall_prob, slave_stall_prob, cfg)); + axis_bfm.set_master_stall_prob(master_stall_prob); + axis_bfm.set_slave_stall_prob(slave_stall_prob); + test_bursts(cfg); + test.end_test(); + end + endtask : random_tests + + + //--------------------------------------------------------------------------- + // Main Test Process + //--------------------------------------------------------------------------- + + initial begin : main + test.start_tb("dds_timed_tb"); + + // Start the BFMs running + axis_bfm.run(); + + // Reset + clk_gen.reset(); + @(negedge rst); + + //------------------------------- + // Run Tests + //------------------------------- + + directed_tests(); + random_tests(200); + + test.end_tb(); + end + +endmodule + + +`default_nettype wire |